From 9cedb8bb69b89b0f0c529937247a6a80cabdbaec Mon Sep 17 00:00:00 2001 From: dim Date: Fri, 21 Mar 2014 17:53:59 +0000 Subject: MFC 261991: Upgrade our copy of llvm/clang to 3.4 release. This version supports all of the features in the current working draft of the upcoming C++ standard, provisionally named C++1y. The code generator's performance is greatly increased, and the loop auto-vectorizer is now enabled at -Os and -O2 in addition to -O3. The PowerPC backend has made several major improvements to code generation quality and compile time, and the X86, SPARC, ARM32, Aarch64 and SystemZ backends have all seen major feature work. Release notes for llvm and clang can be found here: MFC 262121 (by emaste): Update lldb for clang/llvm 3.4 import This commit largely restores the lldb source to the upstream r196259 snapshot with the addition of threaded inferior support and a few bug fixes. Specific upstream lldb revisions restored include: SVN git 181387 779e6ac 181703 7bef4e2 182099 b31044e 182650 f2dcf35 182683 0d91b80 183862 15c1774 183929 99447a6 184177 0b2934b 184948 4dc3761 184954 007e7bc 186990 eebd175 Sponsored by: DARPA, AFRL MFC 262186 (by emaste): Fix mismerge in r262121 A break statement was lost in the merge. The error had no functional impact, but restore it to reduce the diff against upstream. MFC 262303: Pull in r197521 from upstream clang trunk (by rdivacky): Use the integrated assembler by default on FreeBSD/ppc and ppc64. Requested by: jhibbits MFC 262611: Pull in r196874 from upstream llvm trunk: Fix a crash that occurs when PWD is invalid. MCJIT needs to be able to run in hostile environments, even when PWD is invalid. There's no need to crash MCJIT in this case. The obvious fix is to simply leave MCContext's CompilationDir empty when PWD can't be determined. This way, MCJIT clients, and other clients that link with LLVM don't need a valid working directory. If we do want to guarantee valid CompilationDir, that should be done only for clients of getCompilationDir(). This is as simple as checking for an empty string. The only current use of getCompilationDir is EmitGenDwarfInfo, which won't conceivably run with an invalid working dir. However, in the purely hypothetically and untestable case that this happens, the AT_comp_dir will be omitted from the compilation_unit DIE. This should help fix assertions occurring with ports-mgmt/tinderbox, when it is using jails, and sometimes invalidates clang's current working directory. Reported by: decke MFC 262809: Pull in r203007 from upstream clang trunk: Don't produce an alias between destructors with different calling conventions. Fixes pr19007. (Please note that is an LLVM PR identifier, not a FreeBSD one.) This should fix Firefox and/or libxul crashes (due to problems with regparm/stdcall calling conventions) on i386. Reported by: multiple users on freebsd-current PR: bin/187103 MFC 263048: Repair recognition of "CC" as an alias for the C++ compiler, since it was silently broken by upstream for a Windows-specific use-case. Apparently some versions of CMake still rely on this archaic feature... Reported by: rakuco MFC 263049: Garbage collect the old way of adding the libstdc++ include directories in clang's InitHeaderSearch.cpp. This has been superseded by David Chisnall's commit in r255321. Moreover, if libc++ is used, the libstdc++ include directories should not be in the search path at all. These directories are now only used if you pass -stdlib=libstdc++. --- contrib/llvm/lib/Analysis/AliasAnalysis.cpp | 47 +- contrib/llvm/lib/Analysis/AliasSetTracker.cpp | 2 - contrib/llvm/lib/Analysis/Analysis.cpp | 11 +- contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp | 122 +- contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp | 118 +- .../llvm/lib/Analysis/BranchProbabilityInfo.cpp | 115 +- contrib/llvm/lib/Analysis/CFG.cpp | 245 + contrib/llvm/lib/Analysis/CaptureTracking.cpp | 8 +- contrib/llvm/lib/Analysis/ConstantFolding.cpp | 228 +- contrib/llvm/lib/Analysis/CostModel.cpp | 289 +- contrib/llvm/lib/Analysis/Delinearization.cpp | 133 + contrib/llvm/lib/Analysis/DependenceAnalysis.cpp | 85 +- contrib/llvm/lib/Analysis/IPA/CallGraph.cpp | 230 +- contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp | 2 +- contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp | 2 +- contrib/llvm/lib/Analysis/IPA/IPA.cpp | 3 +- contrib/llvm/lib/Analysis/IPA/InlineCost.cpp | 159 +- contrib/llvm/lib/Analysis/InstructionSimplify.cpp | 46 +- contrib/llvm/lib/Analysis/LazyValueInfo.cpp | 8 +- contrib/llvm/lib/Analysis/Lint.cpp | 42 +- contrib/llvm/lib/Analysis/LoopInfo.cpp | 97 +- contrib/llvm/lib/Analysis/LoopPass.cpp | 4 + contrib/llvm/lib/Analysis/MemoryBuiltins.cpp | 114 +- .../llvm/lib/Analysis/MemoryDependenceAnalysis.cpp | 15 +- contrib/llvm/lib/Analysis/PathNumbering.cpp | 521 -- contrib/llvm/lib/Analysis/PathProfileInfo.cpp | 433 - contrib/llvm/lib/Analysis/PathProfileVerifier.cpp | 206 - contrib/llvm/lib/Analysis/ProfileDataLoader.cpp | 155 - .../llvm/lib/Analysis/ProfileDataLoaderPass.cpp | 188 - contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp | 426 - contrib/llvm/lib/Analysis/ProfileInfo.cpp | 1079 --- contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp | 155 - .../llvm/lib/Analysis/ProfileInfoLoaderPass.cpp | 267 - contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp | 383 - contrib/llvm/lib/Analysis/RegionInfo.cpp | 6 +- contrib/llvm/lib/Analysis/ScalarEvolution.cpp | 1112 ++- .../llvm/lib/Analysis/ScalarEvolutionExpander.cpp | 145 +- .../lib/Analysis/ScalarEvolutionNormalization.cpp | 18 +- contrib/llvm/lib/Analysis/TargetTransformInfo.cpp | 76 +- .../llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp | 116 +- contrib/llvm/lib/Analysis/ValueTracking.cpp | 88 +- contrib/llvm/lib/Archive/Archive.cpp | 262 - contrib/llvm/lib/Archive/ArchiveInternals.h | 88 - contrib/llvm/lib/Archive/ArchiveReader.cpp | 633 -- contrib/llvm/lib/Archive/ArchiveWriter.cpp | 489 -- contrib/llvm/lib/AsmParser/LLLexer.cpp | 9 +- contrib/llvm/lib/AsmParser/LLParser.cpp | 87 +- contrib/llvm/lib/AsmParser/LLParser.h | 4 +- contrib/llvm/lib/AsmParser/LLToken.h | 9 +- contrib/llvm/lib/AsmParser/Parser.cpp | 2 +- contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp | 928 ++- contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h | 104 +- .../llvm/lib/Bitcode/Reader/BitstreamReader.cpp | 13 +- contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp | 2 +- contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp | 226 +- .../llvm/lib/Bitcode/Writer/ValueEnumerator.cpp | 5 + contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h | 2 +- .../llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp | 6 +- contrib/llvm/lib/CodeGen/Analysis.cpp | 475 +- .../llvm/lib/CodeGen/AsmPrinter/ARMException.cpp | 18 +- contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 418 +- .../lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 49 +- .../lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 4 +- contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp | 137 +- contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h | 174 +- contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp | 507 ++ contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h | 147 + .../lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 87 +- .../llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 124 +- .../lib/CodeGen/AsmPrinter/DwarfCFIException.cpp | 2 +- .../lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 1456 ++-- .../llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 337 +- contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 1668 ++-- contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h | 268 +- .../llvm/lib/CodeGen/AsmPrinter/DwarfException.h | 4 +- .../llvm/lib/CodeGen/BasicTargetTransformInfo.cpp | 90 +- contrib/llvm/lib/CodeGen/BranchFolding.cpp | 19 +- contrib/llvm/lib/CodeGen/BranchFolding.h | 6 +- contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp | 41 +- contrib/llvm/lib/CodeGen/CallingConvLower.cpp | 2 +- contrib/llvm/lib/CodeGen/CodeGen.cpp | 2 - .../llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp | 8 +- contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h | 2 +- contrib/llvm/lib/CodeGen/DFAPacketizer.cpp | 3 +- .../lib/CodeGen/DeadMachineInstructionElim.cpp | 4 +- contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp | 16 +- contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp | 127 +- contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp | 2 +- contrib/llvm/lib/CodeGen/IfConversion.cpp | 229 +- contrib/llvm/lib/CodeGen/InlineSpiller.cpp | 222 +- contrib/llvm/lib/CodeGen/InterferenceCache.cpp | 8 +- contrib/llvm/lib/CodeGen/InterferenceCache.h | 2 +- contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp | 5 +- contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp | 50 +- contrib/llvm/lib/CodeGen/LexicalScopes.cpp | 10 +- contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp | 92 +- contrib/llvm/lib/CodeGen/LiveDebugVariables.h | 4 +- contrib/llvm/lib/CodeGen/LiveInterval.cpp | 401 +- contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp | 283 +- contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp | 56 +- contrib/llvm/lib/CodeGen/LiveRangeCalc.h | 38 +- contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp | 246 +- contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp | 6 +- contrib/llvm/lib/CodeGen/LiveRegUnits.cpp | 111 + contrib/llvm/lib/CodeGen/LiveVariables.cpp | 37 +- contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp | 8 +- .../llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp | 5 - contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp | 26 +- contrib/llvm/lib/CodeGen/MachineCSE.cpp | 8 +- .../llvm/lib/CodeGen/MachineCopyPropagation.cpp | 5 +- contrib/llvm/lib/CodeGen/MachineFunction.cpp | 57 +- contrib/llvm/lib/CodeGen/MachineInstr.cpp | 65 +- contrib/llvm/lib/CodeGen/MachineLICM.cpp | 20 +- contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp | 7 +- contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp | 56 +- contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp | 2 +- contrib/llvm/lib/CodeGen/MachineScheduler.cpp | 1456 ++-- contrib/llvm/lib/CodeGen/MachineSink.cpp | 38 +- contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp | 10 +- contrib/llvm/lib/CodeGen/MachineVerifier.cpp | 418 +- contrib/llvm/lib/CodeGen/PHIElimination.cpp | 30 +- contrib/llvm/lib/CodeGen/PHIEliminationUtils.h | 2 +- contrib/llvm/lib/CodeGen/Passes.cpp | 31 +- contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp | 250 +- contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp | 47 +- contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp | 2 +- contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp | 430 +- contrib/llvm/lib/CodeGen/PrologEpilogInserter.h | 103 +- contrib/llvm/lib/CodeGen/RegAllocBase.cpp | 16 +- contrib/llvm/lib/CodeGen/RegAllocBase.h | 7 +- contrib/llvm/lib/CodeGen/RegAllocBasic.cpp | 18 +- contrib/llvm/lib/CodeGen/RegAllocFast.cpp | 60 +- contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp | 199 +- contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp | 64 +- contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp | 32 + contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp | 249 +- contrib/llvm/lib/CodeGen/RegisterPressure.cpp | 496 +- contrib/llvm/lib/CodeGen/RegisterScavenging.cpp | 19 +- contrib/llvm/lib/CodeGen/ScheduleDAG.cpp | 10 +- contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp | 139 +- .../llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 2537 ++++-- contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp | 85 +- .../CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 46 +- .../llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 48 +- .../llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h | 2 +- .../llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 581 +- .../CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 225 +- .../CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 399 +- .../lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 80 +- .../llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h | 39 +- .../CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 123 +- .../lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 78 +- .../CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 532 +- .../CodeGen/SelectionDAG/ResourcePriorityQueue.cpp | 21 +- .../llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h | 56 - .../lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 12 +- .../lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 33 +- .../CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 39 +- .../llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 859 +- .../CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 1801 ++-- .../lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 272 +- .../CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 19 +- .../lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 347 +- .../lib/CodeGen/SelectionDAG/TargetLowering.cpp | 156 +- contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp | 1152 --- contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp | 218 +- contrib/llvm/lib/CodeGen/SpillPlacement.cpp | 159 +- contrib/llvm/lib/CodeGen/SpillPlacement.h | 7 +- contrib/llvm/lib/CodeGen/Spiller.cpp | 33 +- contrib/llvm/lib/CodeGen/SplitKit.cpp | 63 +- contrib/llvm/lib/CodeGen/SplitKit.h | 4 +- contrib/llvm/lib/CodeGen/StackColoring.cpp | 40 +- contrib/llvm/lib/CodeGen/StackMaps.cpp | 314 + contrib/llvm/lib/CodeGen/StackProtector.cpp | 485 +- contrib/llvm/lib/CodeGen/StackSlotColoring.cpp | 100 +- contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp | 825 -- contrib/llvm/lib/CodeGen/TailDuplication.cpp | 38 +- contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp | 80 +- contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp | 67 +- .../lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 48 +- contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp | 31 +- contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp | 18 +- contrib/llvm/lib/CodeGen/TargetSchedule.cpp | 73 +- .../llvm/lib/CodeGen/TwoAddressInstructionPass.cpp | 6 +- contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp | 4 - contrib/llvm/lib/CodeGen/VirtRegMap.cpp | 46 +- .../lib/DebugInfo/DWARFAbbreviationDeclaration.cpp | 76 +- .../lib/DebugInfo/DWARFAbbreviationDeclaration.h | 29 +- contrib/llvm/lib/DebugInfo/DWARFAttribute.h | 30 - contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp | 258 +- contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h | 125 +- contrib/llvm/lib/DebugInfo/DWARFContext.cpp | 327 +- contrib/llvm/lib/DebugInfo/DWARFContext.h | 93 +- contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp | 49 - contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h | 5 - contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp | 206 +- contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h | 92 +- contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp | 311 +- contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h | 90 +- contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp | 2 +- contrib/llvm/lib/DebugInfo/DWARFDebugLoc.cpp | 74 + contrib/llvm/lib/DebugInfo/DWARFDebugLoc.h | 60 + contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp | 310 +- contrib/llvm/lib/DebugInfo/DWARFTypeUnit.cpp | 39 + contrib/llvm/lib/DebugInfo/DWARFTypeUnit.h | 35 + contrib/llvm/lib/DebugInfo/DWARFUnit.cpp | 365 + contrib/llvm/lib/DebugInfo/DWARFUnit.h | 168 + .../llvm/lib/ExecutionEngine/ExecutionEngine.cpp | 63 +- .../ExecutionEngine/ExecutionEngineBindings.cpp | 111 +- .../IntelJITEvents/IntelJITEventsWrapper.h | 2 +- .../lib/ExecutionEngine/Interpreter/Execution.cpp | 746 +- .../Interpreter/ExternalFunctions.cpp | 31 +- .../lib/ExecutionEngine/Interpreter/Interpreter.h | 6 + contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp | 156 - .../lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp | 596 -- .../llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h | 77 - .../llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp | 52 +- .../lib/ExecutionEngine/JIT/JITMemoryManager.cpp | 44 +- contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp | 296 +- contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h | 244 +- .../ExecutionEngine/MCJIT/SectionMemoryManager.cpp | 112 +- .../OProfileJIT/OProfileJITEventListener.cpp | 66 + .../OProfileJIT/OProfileWrapper.cpp | 17 +- .../lib/ExecutionEngine/RTDyldMemoryManager.cpp | 282 + .../lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h | 1 + .../RuntimeDyld/ObjectImageCommon.h | 1 + .../ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 220 +- .../ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 462 +- .../ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h | 55 +- .../ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h | 104 +- .../RuntimeDyld/RuntimeDyldMachO.cpp | 172 +- .../ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h | 32 +- contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp | 8 + contrib/llvm/lib/IR/AsmWriter.cpp | 137 +- contrib/llvm/lib/IR/AsmWriter.h | 118 + contrib/llvm/lib/IR/AttributeImpl.h | 220 +- contrib/llvm/lib/IR/Attributes.cpp | 153 +- contrib/llvm/lib/IR/AutoUpgrade.cpp | 118 +- contrib/llvm/lib/IR/ConstantFold.cpp | 93 +- contrib/llvm/lib/IR/Constants.cpp | 66 +- contrib/llvm/lib/IR/Core.cpp | 118 +- contrib/llvm/lib/IR/DIBuilder.cpp | 589 +- contrib/llvm/lib/IR/DataLayout.cpp | 26 +- contrib/llvm/lib/IR/DebugInfo.cpp | 803 +- contrib/llvm/lib/IR/Function.cpp | 56 +- contrib/llvm/lib/IR/GCOV.cpp | 297 +- contrib/llvm/lib/IR/Globals.cpp | 16 +- contrib/llvm/lib/IR/Instruction.cpp | 25 +- contrib/llvm/lib/IR/Instructions.cpp | 369 +- contrib/llvm/lib/IR/LLVMContextImpl.h | 5 + contrib/llvm/lib/IR/LegacyPassManager.cpp | 1920 +++++ contrib/llvm/lib/IR/Metadata.cpp | 8 +- contrib/llvm/lib/IR/Module.cpp | 75 +- contrib/llvm/lib/IR/PassManager.cpp | 1985 +---- contrib/llvm/lib/IR/PassRegistry.cpp | 21 +- contrib/llvm/lib/IR/Type.cpp | 6 +- contrib/llvm/lib/IR/TypeFinder.cpp | 31 +- contrib/llvm/lib/IR/Value.cpp | 74 +- contrib/llvm/lib/IR/ValueTypes.cpp | 10 + contrib/llvm/lib/IR/Verifier.cpp | 439 +- contrib/llvm/lib/IRReader/IRReader.cpp | 40 +- contrib/llvm/lib/LTO/LTOCodeGenerator.cpp | 521 ++ contrib/llvm/lib/LTO/LTOModule.cpp | 794 ++ contrib/llvm/lib/Linker/LinkModules.cpp | 168 +- contrib/llvm/lib/MC/ELFObjectWriter.cpp | 76 +- contrib/llvm/lib/MC/MCAsmBackend.cpp | 4 +- contrib/llvm/lib/MC/MCAsmInfo.cpp | 15 +- contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp | 1 - contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp | 2 - contrib/llvm/lib/MC/MCAsmInfoELF.cpp | 23 + contrib/llvm/lib/MC/MCAsmStreamer.cpp | 310 +- contrib/llvm/lib/MC/MCAssembler.cpp | 17 +- contrib/llvm/lib/MC/MCAtom.cpp | 123 +- contrib/llvm/lib/MC/MCContext.cpp | 84 +- contrib/llvm/lib/MC/MCDisassembler.cpp | 42 + .../llvm/lib/MC/MCDisassembler/Disassembler.cpp | 171 +- contrib/llvm/lib/MC/MCDisassembler/Disassembler.h | 9 + contrib/llvm/lib/MC/MCDwarf.cpp | 210 +- contrib/llvm/lib/MC/MCELF.cpp | 7 +- contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp | 18 +- contrib/llvm/lib/MC/MCELFStreamer.cpp | 122 +- contrib/llvm/lib/MC/MCExpr.cpp | 205 +- contrib/llvm/lib/MC/MCExternalSymbolizer.cpp | 181 + contrib/llvm/lib/MC/MCFunction.cpp | 81 + contrib/llvm/lib/MC/MCInstPrinter.cpp | 10 +- contrib/llvm/lib/MC/MCInstrAnalysis.cpp | 9 +- contrib/llvm/lib/MC/MCMachOStreamer.cpp | 36 +- contrib/llvm/lib/MC/MCModule.cpp | 125 +- contrib/llvm/lib/MC/MCModuleYAML.cpp | 461 ++ contrib/llvm/lib/MC/MCNullStreamer.cpp | 20 +- contrib/llvm/lib/MC/MCObjectDisassembler.cpp | 584 ++ contrib/llvm/lib/MC/MCObjectFileInfo.cpp | 41 +- contrib/llvm/lib/MC/MCObjectStreamer.cpp | 48 +- contrib/llvm/lib/MC/MCObjectSymbolizer.cpp | 310 + contrib/llvm/lib/MC/MCParser/AsmLexer.cpp | 58 +- contrib/llvm/lib/MC/MCParser/AsmParser.cpp | 1658 ++-- contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp | 284 +- contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp | 43 +- contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp | 121 +- contrib/llvm/lib/MC/MCPureStreamer.cpp | 20 +- contrib/llvm/lib/MC/MCRegisterInfo.cpp | 12 + contrib/llvm/lib/MC/MCRelocationInfo.cpp | 39 + contrib/llvm/lib/MC/MCSectionCOFF.cpp | 24 +- contrib/llvm/lib/MC/MCSectionELF.cpp | 57 +- contrib/llvm/lib/MC/MCStreamer.cpp | 149 +- contrib/llvm/lib/MC/MCSubtargetInfo.cpp | 9 +- contrib/llvm/lib/MC/MCSymbol.cpp | 17 +- contrib/llvm/lib/MC/MCSymbolizer.cpp | 20 + contrib/llvm/lib/MC/MCWin64EH.cpp | 50 +- contrib/llvm/lib/MC/MachObjectWriter.cpp | 117 +- contrib/llvm/lib/MC/SubtargetFeature.cpp | 8 +- contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp | 106 +- contrib/llvm/lib/MC/WinCOFFStreamer.cpp | 27 +- contrib/llvm/lib/Object/Archive.cpp | 279 +- contrib/llvm/lib/Object/Binary.cpp | 57 +- contrib/llvm/lib/Object/COFFObjectFile.cpp | 438 +- contrib/llvm/lib/Object/COFFYAML.cpp | 281 + contrib/llvm/lib/Object/ELF.cpp | 714 ++ contrib/llvm/lib/Object/ELFObjectFile.cpp | 5 +- contrib/llvm/lib/Object/ELFYAML.cpp | 338 + contrib/llvm/lib/Object/Error.cpp | 10 +- contrib/llvm/lib/Object/MachOObjectFile.cpp | 897 +- contrib/llvm/lib/Object/MachOUniversal.cpp | 139 + contrib/llvm/lib/Object/Object.cpp | 5 +- contrib/llvm/lib/Object/ObjectFile.cpp | 66 +- contrib/llvm/lib/Object/YAML.cpp | 68 + contrib/llvm/lib/Option/ArgList.cpp | 15 + contrib/llvm/lib/Option/OptTable.cpp | 111 +- contrib/llvm/lib/Option/Option.cpp | 53 +- contrib/llvm/lib/Support/APFloat.cpp | 696 +- contrib/llvm/lib/Support/APInt.cpp | 62 +- contrib/llvm/lib/Support/Allocator.cpp | 7 +- contrib/llvm/lib/Support/BlockFrequency.cpp | 150 +- contrib/llvm/lib/Support/CommandLine.cpp | 352 +- contrib/llvm/lib/Support/Compression.cpp | 7 + contrib/llvm/lib/Support/ConstantRange.cpp | 21 +- contrib/llvm/lib/Support/ConvertUTFWrapper.cpp | 55 + contrib/llvm/lib/Support/CrashRecoveryContext.cpp | 44 +- contrib/llvm/lib/Support/DataStream.cpp | 15 +- contrib/llvm/lib/Support/Disassembler.cpp | 10 +- contrib/llvm/lib/Support/Dwarf.cpp | 55 +- contrib/llvm/lib/Support/DynamicLibrary.cpp | 41 +- contrib/llvm/lib/Support/Errno.cpp | 33 +- contrib/llvm/lib/Support/ErrorHandling.cpp | 22 + contrib/llvm/lib/Support/FileOutputBuffer.cpp | 29 +- contrib/llvm/lib/Support/FileUtilities.cpp | 33 +- contrib/llvm/lib/Support/FormattedStream.cpp | 45 +- contrib/llvm/lib/Support/GraphWriter.cpp | 71 +- contrib/llvm/lib/Support/Host.cpp | 217 +- contrib/llvm/lib/Support/Locale.cpp | 33 +- contrib/llvm/lib/Support/LocaleGeneric.inc | 17 - contrib/llvm/lib/Support/LocaleWindows.inc | 15 - contrib/llvm/lib/Support/LocaleXlocale.inc | 61 - contrib/llvm/lib/Support/LockFileManager.cpp | 31 +- contrib/llvm/lib/Support/MD5.cpp | 286 + contrib/llvm/lib/Support/MemoryBuffer.cpp | 132 +- contrib/llvm/lib/Support/MemoryObject.cpp | 6 +- contrib/llvm/lib/Support/Path.cpp | 1132 ++- contrib/llvm/lib/Support/PathV2.cpp | 949 --- contrib/llvm/lib/Support/PrettyStackTrace.cpp | 58 +- contrib/llvm/lib/Support/Process.cpp | 18 + contrib/llvm/lib/Support/Program.cpp | 51 +- contrib/llvm/lib/Support/Regex.cpp | 9 +- contrib/llvm/lib/Support/SmallPtrSet.cpp | 9 +- contrib/llvm/lib/Support/SourceMgr.cpp | 21 +- .../llvm/lib/Support/StreamableMemoryObject.cpp | 11 +- contrib/llvm/lib/Support/StringRef.cpp | 31 +- contrib/llvm/lib/Support/StringRefMemoryObject.cpp | 29 + contrib/llvm/lib/Support/SystemUtils.cpp | 22 - contrib/llvm/lib/Support/TargetRegistry.cpp | 6 +- contrib/llvm/lib/Support/ThreadLocal.cpp | 2 +- contrib/llvm/lib/Support/Timer.cpp | 4 +- contrib/llvm/lib/Support/ToolOutputFile.cpp | 20 +- contrib/llvm/lib/Support/Triple.cpp | 27 +- contrib/llvm/lib/Support/Unicode.cpp | 367 + contrib/llvm/lib/Support/Unix/Memory.inc | 7 + contrib/llvm/lib/Support/Unix/Path.inc | 1197 ++- contrib/llvm/lib/Support/Unix/PathV2.inc | 693 -- contrib/llvm/lib/Support/Unix/Process.inc | 119 +- contrib/llvm/lib/Support/Unix/Program.inc | 217 +- contrib/llvm/lib/Support/Unix/Signals.inc | 18 +- contrib/llvm/lib/Support/Unix/ThreadLocal.inc | 2 +- contrib/llvm/lib/Support/Unix/TimeValue.inc | 19 +- contrib/llvm/lib/Support/Unix/Unix.h | 14 +- .../llvm/lib/Support/Windows/DynamicLibrary.inc | 15 +- contrib/llvm/lib/Support/Windows/Memory.inc | 4 +- contrib/llvm/lib/Support/Windows/Path.inc | 1712 ++-- contrib/llvm/lib/Support/Windows/PathV2.inc | 1022 --- contrib/llvm/lib/Support/Windows/Process.inc | 116 +- contrib/llvm/lib/Support/Windows/Program.inc | 288 +- contrib/llvm/lib/Support/Windows/RWMutex.inc | 4 +- contrib/llvm/lib/Support/Windows/Signals.inc | 56 +- contrib/llvm/lib/Support/Windows/TimeValue.inc | 38 +- contrib/llvm/lib/Support/Windows/Windows.h | 30 +- contrib/llvm/lib/Support/YAMLParser.cpp | 111 +- contrib/llvm/lib/Support/YAMLTraits.cpp | 100 +- contrib/llvm/lib/Support/raw_ostream.cpp | 48 +- contrib/llvm/lib/TableGen/Main.cpp | 2 +- contrib/llvm/lib/TableGen/Record.cpp | 31 +- contrib/llvm/lib/TableGen/TGParser.cpp | 12 +- contrib/llvm/lib/Target/AArch64/AArch64.td | 7 +- .../llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp | 172 +- .../llvm/lib/Target/AArch64/AArch64AsmPrinter.h | 4 - .../lib/Target/AArch64/AArch64BranchFixupPass.cpp | 2 +- .../llvm/lib/Target/AArch64/AArch64CallingConv.td | 9 +- .../lib/Target/AArch64/AArch64FrameLowering.cpp | 35 +- .../llvm/lib/Target/AArch64/AArch64FrameLowering.h | 2 +- .../lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 1065 ++- .../lib/Target/AArch64/AArch64ISelLowering.cpp | 1814 +++- .../llvm/lib/Target/AArch64/AArch64ISelLowering.h | 147 +- .../llvm/lib/Target/AArch64/AArch64InstrFormats.td | 528 ++ .../llvm/lib/Target/AArch64/AArch64InstrInfo.cpp | 105 +- contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h | 4 - .../llvm/lib/Target/AArch64/AArch64InstrInfo.td | 87 +- .../llvm/lib/Target/AArch64/AArch64InstrNEON.td | 8671 ++++++++++++++++++++ .../llvm/lib/Target/AArch64/AArch64MCInstLower.cpp | 7 +- .../lib/Target/AArch64/AArch64RegisterInfo.cpp | 7 +- .../llvm/lib/Target/AArch64/AArch64RegisterInfo.h | 7 +- .../llvm/lib/Target/AArch64/AArch64RegisterInfo.td | 176 +- .../llvm/lib/Target/AArch64/AArch64Subtarget.cpp | 28 +- contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h | 20 +- .../lib/Target/AArch64/AArch64TargetMachine.cpp | 1 + .../Target/AArch64/AsmParser/AArch64AsmParser.cpp | 521 +- .../AArch64/Disassembler/AArch64Disassembler.cpp | 795 +- .../AArch64/InstPrinter/AArch64InstPrinter.cpp | 131 + .../AArch64/InstPrinter/AArch64InstPrinter.h | 14 +- .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 4 +- .../AArch64/MCTargetDesc/AArch64ELFStreamer.cpp | 20 +- .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp | 5 +- .../Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h | 10 +- .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 106 +- .../AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp | 21 +- .../AArch64/MCTargetDesc/AArch64MCTargetDesc.h | 5 +- .../AArch64/TargetInfo/AArch64TargetInfo.cpp | 2 +- .../lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 68 +- .../lib/Target/AArch64/Utils/AArch64BaseInfo.h | 65 +- contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp | 13 +- contrib/llvm/lib/Target/ARM/ARM.td | 94 +- contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp | 605 +- contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h | 8 +- contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp | 470 +- contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h | 28 +- .../llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp | 99 +- contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h | 27 +- contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h | 71 +- contrib/llvm/lib/Target/ARM/ARMCallingConv.td | 26 +- contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp | 6 +- .../llvm/lib/Target/ARM/ARMConstantIslandPass.cpp | 3 +- .../llvm/lib/Target/ARM/ARMConstantPoolValue.cpp | 50 +- contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h | 33 + .../llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp | 75 +- contrib/llvm/lib/Target/ARM/ARMFPUName.def | 32 + contrib/llvm/lib/Target/ARM/ARMFPUName.h | 26 + contrib/llvm/lib/Target/ARM/ARMFastISel.cpp | 375 +- contrib/llvm/lib/Target/ARM/ARMFeatures.h | 93 + contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp | 156 +- .../llvm/lib/Target/ARM/ARMHazardRecognizer.cpp | 10 +- contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h | 13 +- contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp | 562 +- contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp | 1722 ++-- contrib/llvm/lib/Target/ARM/ARMISelLowering.h | 72 +- contrib/llvm/lib/Target/ARM/ARMInstrFormats.td | 290 +- contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp | 34 +- contrib/llvm/lib/Target/ARM/ARMInstrInfo.td | 885 +- contrib/llvm/lib/Target/ARM/ARMInstrNEON.td | 451 +- contrib/llvm/lib/Target/ARM/ARMInstrThumb.td | 263 +- contrib/llvm/lib/Target/ARM/ARMInstrThumb2.td | 849 +- contrib/llvm/lib/Target/ARM/ARMInstrVFP.td | 348 +- .../llvm/lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 115 +- contrib/llvm/lib/Target/ARM/ARMMCInstLower.cpp | 2 +- .../llvm/lib/Target/ARM/ARMMachineFunctionInfo.h | 78 +- contrib/llvm/lib/Target/ARM/ARMRegisterInfo.cpp | 5 +- contrib/llvm/lib/Target/ARM/ARMRegisterInfo.h | 6 +- contrib/llvm/lib/Target/ARM/ARMRegisterInfo.td | 84 +- contrib/llvm/lib/Target/ARM/ARMSchedule.td | 18 + contrib/llvm/lib/Target/ARM/ARMScheduleA9.td | 196 +- contrib/llvm/lib/Target/ARM/ARMScheduleSwift.td | 944 ++- .../llvm/lib/Target/ARM/ARMSelectionDAGInfo.cpp | 4 +- contrib/llvm/lib/Target/ARM/ARMSelectionDAGInfo.h | 4 +- contrib/llvm/lib/Target/ARM/ARMSubtarget.cpp | 120 +- contrib/llvm/lib/Target/ARM/ARMSubtarget.h | 79 +- contrib/llvm/lib/Target/ARM/ARMTargetMachine.cpp | 15 +- .../llvm/lib/Target/ARM/ARMTargetObjectFile.cpp | 2 +- .../llvm/lib/Target/ARM/ARMTargetTransformInfo.cpp | 113 +- .../llvm/lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 1429 ++-- .../Target/ARM/Disassembler/ARMDisassembler.cpp | 817 +- .../lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 284 +- .../lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 4 + .../Target/ARM/MCTargetDesc/ARMAddressingModes.h | 12 +- .../lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 53 +- .../llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h | 61 +- .../lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 735 +- .../lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h | 27 - .../lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.cpp | 2 - .../lib/Target/ARM/MCTargetDesc/ARMMCAsmInfo.h | 3 +- .../Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 102 +- .../Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 93 +- .../lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.h | 16 +- .../ARM/MCTargetDesc/ARMMachORelocationInfo.cpp | 43 + .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 129 +- .../lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.cpp | 157 +- .../lib/Target/ARM/MCTargetDesc/ARMUnwindOpAsm.h | 73 +- .../llvm/lib/Target/ARM/Thumb1FrameLowering.cpp | 36 +- contrib/llvm/lib/Target/ARM/Thumb1InstrInfo.cpp | 2 +- contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.cpp | 20 +- contrib/llvm/lib/Target/ARM/Thumb1RegisterInfo.h | 2 +- contrib/llvm/lib/Target/ARM/Thumb2ITBlockPass.cpp | 71 +- contrib/llvm/lib/Target/ARM/Thumb2InstrInfo.cpp | 25 +- contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.cpp | 6 +- contrib/llvm/lib/Target/ARM/Thumb2RegisterInfo.h | 6 +- contrib/llvm/lib/Target/CppBackend/CPPBackend.cpp | 27 +- contrib/llvm/lib/Target/Hexagon/Hexagon.h | 6 +- contrib/llvm/lib/Target/Hexagon/Hexagon.td | 30 +- .../llvm/lib/Target/Hexagon/HexagonAsmPrinter.cpp | 4 +- .../lib/Target/Hexagon/HexagonCallingConvLower.cpp | 8 +- .../lib/Target/Hexagon/HexagonCallingConvLower.h | 5 +- .../lib/Target/Hexagon/HexagonCopyToCombine.cpp | 677 ++ .../lib/Target/Hexagon/HexagonFrameLowering.cpp | 80 +- .../lib/Target/Hexagon/HexagonHardwareLoops.cpp | 10 +- .../lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 72 +- .../lib/Target/Hexagon/HexagonISelLowering.cpp | 104 +- .../llvm/lib/Target/Hexagon/HexagonISelLowering.h | 17 +- .../llvm/lib/Target/Hexagon/HexagonInstrFormats.td | 5 +- .../llvm/lib/Target/Hexagon/HexagonInstrInfo.cpp | 857 +- contrib/llvm/lib/Target/Hexagon/HexagonInstrInfo.h | 15 +- .../llvm/lib/Target/Hexagon/HexagonInstrInfo.td | 105 +- .../llvm/lib/Target/Hexagon/HexagonInstrInfoV4.td | 265 +- .../llvm/lib/Target/Hexagon/HexagonInstrInfoV5.td | 23 +- .../llvm/lib/Target/Hexagon/HexagonMCInstLower.cpp | 2 +- .../Target/Hexagon/HexagonMachineFunctionInfo.cpp | 16 + .../Target/Hexagon/HexagonMachineFunctionInfo.h | 6 +- .../lib/Target/Hexagon/HexagonMachineScheduler.cpp | 21 +- .../lib/Target/Hexagon/HexagonMachineScheduler.h | 5 +- .../lib/Target/Hexagon/HexagonNewValueJump.cpp | 1 + .../llvm/lib/Target/Hexagon/HexagonPeephole.cpp | 2 +- .../lib/Target/Hexagon/HexagonRegisterInfo.cpp | 26 +- .../llvm/lib/Target/Hexagon/HexagonRegisterInfo.h | 8 +- .../llvm/lib/Target/Hexagon/HexagonRegisterInfo.td | 4 +- .../lib/Target/Hexagon/HexagonSelectionDAGInfo.cpp | 2 +- .../lib/Target/Hexagon/HexagonSelectionDAGInfo.h | 2 +- .../Hexagon/HexagonSplitConst32AndConst64.cpp | 174 + .../llvm/lib/Target/Hexagon/HexagonSubtarget.cpp | 2 + contrib/llvm/lib/Target/Hexagon/HexagonSubtarget.h | 2 +- .../lib/Target/Hexagon/HexagonTargetMachine.cpp | 31 +- .../lib/Target/Hexagon/HexagonTargetObjectFile.cpp | 8 +- .../lib/Target/Hexagon/HexagonTargetObjectFile.h | 1 + .../lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 1833 +---- .../Hexagon/InstPrinter/HexagonInstPrinter.cpp | 15 +- .../Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h | 5 +- .../Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp | 6 +- .../Target/Hexagon/MCTargetDesc/HexagonMCAsmInfo.h | 9 +- .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp | 11 +- .../Target/MBlaze/AsmParser/MBlazeAsmParser.cpp | 572 -- .../MBlaze/Disassembler/MBlazeDisassembler.cpp | 719 -- .../MBlaze/Disassembler/MBlazeDisassembler.h | 49 - .../MBlaze/InstPrinter/MBlazeInstPrinter.cpp | 71 - .../Target/MBlaze/InstPrinter/MBlazeInstPrinter.h | 43 - contrib/llvm/lib/Target/MBlaze/MBlaze.h | 32 - contrib/llvm/lib/Target/MBlaze/MBlaze.td | 73 - .../llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp | 326 - .../llvm/lib/Target/MBlaze/MBlazeCallingConv.td | 24 - .../lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp | 254 - .../llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp | 488 -- .../llvm/lib/Target/MBlaze/MBlazeFrameLowering.h | 56 - .../llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp | 277 - .../llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp | 1154 --- .../llvm/lib/Target/MBlaze/MBlazeISelLowering.h | 179 - contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td | 219 - contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td | 229 - .../llvm/lib/Target/MBlaze/MBlazeInstrFormats.td | 228 - contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp | 297 - contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h | 240 - contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td | 1051 --- .../llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp | 112 - .../llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h | 33 - contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td | 131 - .../llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp | 167 - contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h | 47 - .../lib/Target/MBlaze/MBlazeMachineFunction.cpp | 14 - .../llvm/lib/Target/MBlaze/MBlazeMachineFunction.h | 169 - .../llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp | 145 - .../llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h | 71 - .../llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td | 148 - contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h | 47 - contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td | 50 - contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td | 236 - contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td | 267 - .../lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp | 23 - .../lib/Target/MBlaze/MBlazeSelectionDAGInfo.h | 31 - contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp | 56 - contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h | 75 - .../llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp | 81 - .../llvm/lib/Target/MBlaze/MBlazeTargetMachine.h | 80 - .../lib/Target/MBlaze/MBlazeTargetObjectFile.cpp | 90 - .../lib/Target/MBlaze/MBlazeTargetObjectFile.h | 40 - .../MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp | 171 - .../Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h | 237 - .../MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp | 77 - .../Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp | 26 - .../Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h | 30 - .../MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp | 222 - .../MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp | 141 - .../MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h | 56 - .../Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp | 19 - .../Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp | 5 +- .../Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.h | 7 +- .../llvm/lib/Target/MSP430/MSP430AsmPrinter.cpp | 2 +- .../llvm/lib/Target/MSP430/MSP430CallingConv.td | 7 +- .../llvm/lib/Target/MSP430/MSP430FrameLowering.h | 4 +- .../llvm/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 9 +- .../llvm/lib/Target/MSP430/MSP430ISelLowering.cpp | 191 +- .../llvm/lib/Target/MSP430/MSP430ISelLowering.h | 13 +- contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.cpp | 7 +- contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.h | 1 + contrib/llvm/lib/Target/MSP430/MSP430InstrInfo.td | 4 +- .../llvm/lib/Target/MSP430/MSP430MCInstLower.cpp | 2 +- .../llvm/lib/Target/MSP430/MSP430RegisterInfo.cpp | 6 +- .../llvm/lib/Target/MSP430/MSP430RegisterInfo.h | 3 +- .../llvm/lib/Target/MSP430/MSP430RegisterInfo.td | 2 +- .../llvm/lib/Target/MSP430/MSP430TargetMachine.cpp | 4 +- contrib/llvm/lib/Target/Mangler.cpp | 149 +- .../lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 1572 ++-- .../Target/Mips/Disassembler/MipsDisassembler.cpp | 446 +- .../Target/Mips/InstPrinter/MipsInstPrinter.cpp | 89 +- .../lib/Target/Mips/InstPrinter/MipsInstPrinter.h | 9 +- .../Target/Mips/MCTargetDesc/MipsAsmBackend.cpp | 43 +- .../Mips/MCTargetDesc/MipsDirectObjLower.cpp | 81 - .../Target/Mips/MCTargetDesc/MipsDirectObjLower.h | 28 - .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 39 + .../Target/Mips/MCTargetDesc/MipsELFStreamer.cpp | 89 - .../lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h | 43 - .../lib/Target/Mips/MCTargetDesc/MipsFixupKinds.h | 39 + .../lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp | 3 +- .../lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.h | 7 +- .../Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 210 +- .../Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp | 49 +- .../Target/Mips/MCTargetDesc/MipsMCTargetDesc.h | 16 +- .../Mips/MCTargetDesc/MipsTargetStreamer.cpp | 67 + contrib/llvm/lib/Target/Mips/MSA.txt | 78 + .../llvm/lib/Target/Mips/MicroMipsInstrFormats.td | 196 +- contrib/llvm/lib/Target/Mips/MicroMipsInstrInfo.td | 228 +- contrib/llvm/lib/Target/Mips/Mips.h | 1 - contrib/llvm/lib/Target/Mips/Mips.td | 3 + .../llvm/lib/Target/Mips/Mips16FrameLowering.cpp | 26 +- contrib/llvm/lib/Target/Mips/Mips16FrameLowering.h | 2 +- contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp | 517 ++ contrib/llvm/lib/Target/Mips/Mips16HardFloat.h | 54 + .../llvm/lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 21 +- contrib/llvm/lib/Target/Mips/Mips16ISelDAGToDAG.h | 2 +- .../llvm/lib/Target/Mips/Mips16ISelLowering.cpp | 239 +- contrib/llvm/lib/Target/Mips/Mips16ISelLowering.h | 4 +- contrib/llvm/lib/Target/Mips/Mips16InstrFormats.td | 18 +- contrib/llvm/lib/Target/Mips/Mips16InstrInfo.cpp | 203 +- contrib/llvm/lib/Target/Mips/Mips16InstrInfo.h | 15 +- contrib/llvm/lib/Target/Mips/Mips16InstrInfo.td | 188 +- .../llvm/lib/Target/Mips/Mips16RegisterInfo.cpp | 17 +- contrib/llvm/lib/Target/Mips/Mips16RegisterInfo.h | 4 +- contrib/llvm/lib/Target/Mips/Mips64InstrInfo.td | 440 +- .../llvm/lib/Target/Mips/MipsAnalyzeImmediate.cpp | 2 +- .../llvm/lib/Target/Mips/MipsAnalyzeImmediate.h | 10 +- contrib/llvm/lib/Target/Mips/MipsAsmPrinter.cpp | 141 +- contrib/llvm/lib/Target/Mips/MipsAsmPrinter.h | 25 +- contrib/llvm/lib/Target/Mips/MipsCallingConv.td | 30 +- contrib/llvm/lib/Target/Mips/MipsCodeEmitter.cpp | 40 +- contrib/llvm/lib/Target/Mips/MipsCondMov.td | 210 +- .../lib/Target/Mips/MipsConstantIslandPass.cpp | 1470 +++- contrib/llvm/lib/Target/Mips/MipsDSPInstrInfo.td | 468 +- .../llvm/lib/Target/Mips/MipsDelaySlotFiller.cpp | 38 +- contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.cpp | 80 +- contrib/llvm/lib/Target/Mips/MipsISelDAGToDAG.h | 38 + contrib/llvm/lib/Target/Mips/MipsISelLowering.cpp | 824 +- contrib/llvm/lib/Target/Mips/MipsISelLowering.h | 184 +- contrib/llvm/lib/Target/Mips/MipsInstrFPU.td | 456 +- contrib/llvm/lib/Target/Mips/MipsInstrFormats.td | 143 +- contrib/llvm/lib/Target/Mips/MipsInstrInfo.cpp | 43 +- contrib/llvm/lib/Target/Mips/MipsInstrInfo.h | 16 +- contrib/llvm/lib/Target/Mips/MipsInstrInfo.td | 983 ++- contrib/llvm/lib/Target/Mips/MipsJITInfo.cpp | 6 +- contrib/llvm/lib/Target/Mips/MipsLongBranch.cpp | 25 +- contrib/llvm/lib/Target/Mips/MipsMCInstLower.cpp | 5 +- contrib/llvm/lib/Target/Mips/MipsMCInstLower.h | 4 +- .../llvm/lib/Target/Mips/MipsMSAInstrFormats.td | 406 + contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td | 3694 +++++++++ .../llvm/lib/Target/Mips/MipsMachineFunction.cpp | 72 +- contrib/llvm/lib/Target/Mips/MipsMachineFunction.h | 103 +- contrib/llvm/lib/Target/Mips/MipsOs16.cpp | 45 +- contrib/llvm/lib/Target/Mips/MipsRegisterInfo.cpp | 79 +- contrib/llvm/lib/Target/Mips/MipsRegisterInfo.h | 8 +- contrib/llvm/lib/Target/Mips/MipsRegisterInfo.td | 411 +- .../llvm/lib/Target/Mips/MipsSEFrameLowering.cpp | 184 +- contrib/llvm/lib/Target/Mips/MipsSEFrameLowering.h | 2 +- .../llvm/lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 443 +- contrib/llvm/lib/Target/Mips/MipsSEISelDAGToDAG.h | 48 +- .../llvm/lib/Target/Mips/MipsSEISelLowering.cpp | 2329 +++++- contrib/llvm/lib/Target/Mips/MipsSEISelLowering.h | 48 +- contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.cpp | 310 +- contrib/llvm/lib/Target/Mips/MipsSEInstrInfo.h | 40 +- .../llvm/lib/Target/Mips/MipsSERegisterInfo.cpp | 85 +- contrib/llvm/lib/Target/Mips/MipsSERegisterInfo.h | 5 +- contrib/llvm/lib/Target/Mips/MipsSchedule.td | 15 +- contrib/llvm/lib/Target/Mips/MipsSubtarget.cpp | 34 +- contrib/llvm/lib/Target/Mips/MipsSubtarget.h | 31 +- contrib/llvm/lib/Target/Mips/MipsTargetMachine.cpp | 21 +- contrib/llvm/lib/Target/Mips/MipsTargetMachine.h | 6 + contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h | 44 + .../Target/NVPTX/InstPrinter/NVPTXInstPrinter.cpp | 290 +- .../Target/NVPTX/InstPrinter/NVPTXInstPrinter.h | 53 + .../lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h | 1 - .../Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp | 12 +- .../lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.h | 2 +- .../NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp | 17 + contrib/llvm/lib/Target/NVPTX/ManagedStringPool.h | 2 +- contrib/llvm/lib/Target/NVPTX/NVPTX.h | 55 +- contrib/llvm/lib/Target/NVPTX/NVPTX.td | 6 + .../llvm/lib/Target/NVPTX/NVPTXAllocaHoisting.cpp | 2 +- contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 595 +- contrib/llvm/lib/Target/NVPTX/NVPTXAsmPrinter.h | 27 +- .../llvm/lib/Target/NVPTX/NVPTXFrameLowering.cpp | 41 +- .../llvm/lib/Target/NVPTX/NVPTXGenericToNVVM.cpp | 4 +- .../llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 1045 ++- contrib/llvm/lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 31 +- .../llvm/lib/Target/NVPTX/NVPTXISelLowering.cpp | 1627 ++-- contrib/llvm/lib/Target/NVPTX/NVPTXISelLowering.h | 39 +- contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.cpp | 51 +- contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.h | 1 + contrib/llvm/lib/Target/NVPTX/NVPTXInstrInfo.td | 2114 +++-- contrib/llvm/lib/Target/NVPTX/NVPTXIntrinsics.td | 632 +- contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp | 46 + contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h | 83 + contrib/llvm/lib/Target/NVPTX/NVPTXNumRegisters.h | 16 - .../lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp | 225 + .../llvm/lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 13 +- contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.h | 2 +- contrib/llvm/lib/Target/NVPTX/NVPTXRegisterInfo.td | 26 +- contrib/llvm/lib/Target/NVPTX/NVPTXSection.h | 4 +- .../llvm/lib/Target/NVPTX/NVPTXSplitBBatBar.cpp | 2 +- contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.cpp | 20 +- contrib/llvm/lib/Target/NVPTX/NVPTXSubtarget.h | 2 +- .../llvm/lib/Target/NVPTX/NVPTXTargetMachine.cpp | 64 +- .../llvm/lib/Target/NVPTX/NVPTXTargetObjectFile.h | 44 +- contrib/llvm/lib/Target/NVPTX/NVVMReflect.cpp | 4 +- .../lib/Target/PowerPC/AsmParser/PPCAsmParser.cpp | 738 +- .../Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 189 +- .../Target/PowerPC/InstPrinter/PPCInstPrinter.h | 18 +- .../Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 101 +- .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 340 +- .../Target/PowerPC/MCTargetDesc/PPCFixupKinds.h | 28 +- .../Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 9 +- .../lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.h | 3 +- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 102 +- .../lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp | 155 + .../lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h | 96 + .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 84 +- .../Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 8 +- .../PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp | 389 + .../Target/PowerPC/MCTargetDesc/PPCPredicates.cpp | 32 + .../Target/PowerPC/MCTargetDesc/PPCPredicates.h | 32 +- contrib/llvm/lib/Target/PowerPC/PPC.h | 24 +- contrib/llvm/lib/Target/PowerPC/PPC.td | 66 +- contrib/llvm/lib/Target/PowerPC/PPCAsmPrinter.cpp | 218 +- contrib/llvm/lib/Target/PowerPC/PPCCTRLoops.cpp | 1164 ++- contrib/llvm/lib/Target/PowerPC/PPCCallingConv.td | 70 +- contrib/llvm/lib/Target/PowerPC/PPCCodeEmitter.cpp | 51 +- contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp | 2236 +++++ .../llvm/lib/Target/PowerPC/PPCFrameLowering.cpp | 635 +- contrib/llvm/lib/Target/PowerPC/PPCFrameLowering.h | 10 + .../lib/Target/PowerPC/PPCHazardRecognizers.cpp | 6 +- .../llvm/lib/Target/PowerPC/PPCHazardRecognizers.h | 4 +- .../llvm/lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 111 +- .../llvm/lib/Target/PowerPC/PPCISelLowering.cpp | 1056 ++- contrib/llvm/lib/Target/PowerPC/PPCISelLowering.h | 143 +- contrib/llvm/lib/Target/PowerPC/PPCInstr64Bit.td | 165 +- contrib/llvm/lib/Target/PowerPC/PPCInstrAltivec.td | 52 +- contrib/llvm/lib/Target/PowerPC/PPCInstrFormats.td | 71 +- contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.cpp | 65 +- contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.h | 7 +- contrib/llvm/lib/Target/PowerPC/PPCInstrInfo.td | 805 +- contrib/llvm/lib/Target/PowerPC/PPCJITInfo.cpp | 40 +- contrib/llvm/lib/Target/PowerPC/PPCMCInstLower.cpp | 62 +- .../lib/Target/PowerPC/PPCMachineFunctionInfo.h | 9 +- .../llvm/lib/Target/PowerPC/PPCRegisterInfo.cpp | 264 +- contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.h | 11 +- contrib/llvm/lib/Target/PowerPC/PPCRegisterInfo.td | 20 +- contrib/llvm/lib/Target/PowerPC/PPCSchedule.td | 8 + contrib/llvm/lib/Target/PowerPC/PPCScheduleA2.td | 841 +- .../llvm/lib/Target/PowerPC/PPCScheduleE500mc.td | 2 + .../llvm/lib/Target/PowerPC/PPCScheduleE5500.td | 1 + contrib/llvm/lib/Target/PowerPC/PPCSubtarget.cpp | 154 +- contrib/llvm/lib/Target/PowerPC/PPCSubtarget.h | 28 +- .../llvm/lib/Target/PowerPC/PPCTargetMachine.cpp | 16 +- .../lib/Target/PowerPC/PPCTargetObjectFile.cpp | 67 + .../llvm/lib/Target/PowerPC/PPCTargetObjectFile.h | 35 + .../llvm/lib/Target/PowerPC/PPCTargetStreamer.h | 23 + .../lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 9 + .../PowerPC/TargetInfo/PowerPCTargetInfo.cpp | 5 +- contrib/llvm/lib/Target/R600/AMDGPU.h | 66 +- contrib/llvm/lib/Target/R600/AMDGPU.td | 85 + contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.cpp | 86 +- contrib/llvm/lib/Target/R600/AMDGPUAsmPrinter.h | 12 +- contrib/llvm/lib/Target/R600/AMDGPUCallingConv.td | 46 +- .../llvm/lib/Target/R600/AMDGPUFrameLowering.cpp | 23 +- .../llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp | 585 ++ .../llvm/lib/Target/R600/AMDGPUISelLowering.cpp | 445 +- contrib/llvm/lib/Target/R600/AMDGPUISelLowering.h | 56 +- .../lib/Target/R600/AMDGPUIndirectAddressing.cpp | 343 - contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.cpp | 140 +- contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.h | 50 +- contrib/llvm/lib/Target/R600/AMDGPUInstrInfo.td | 22 +- contrib/llvm/lib/Target/R600/AMDGPUInstructions.td | 268 +- contrib/llvm/lib/Target/R600/AMDGPUIntrinsics.td | 2 + contrib/llvm/lib/Target/R600/AMDGPUMCInstLower.cpp | 43 +- .../llvm/lib/Target/R600/AMDGPUMachineFunction.cpp | 9 +- .../llvm/lib/Target/R600/AMDGPUMachineFunction.h | 9 +- .../llvm/lib/Target/R600/AMDGPURegisterInfo.cpp | 38 +- contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.h | 11 +- contrib/llvm/lib/Target/R600/AMDGPURegisterInfo.td | 3 +- .../llvm/lib/Target/R600/AMDGPUStructurizeCFG.cpp | 896 -- contrib/llvm/lib/Target/R600/AMDGPUSubtarget.cpp | 78 +- contrib/llvm/lib/Target/R600/AMDGPUSubtarget.h | 33 +- .../llvm/lib/Target/R600/AMDGPUTargetMachine.cpp | 81 +- contrib/llvm/lib/Target/R600/AMDGPUTargetMachine.h | 61 +- .../lib/Target/R600/AMDGPUTargetTransformInfo.cpp | 90 + contrib/llvm/lib/Target/R600/AMDIL.h | 121 - contrib/llvm/lib/Target/R600/AMDIL7XXDevice.cpp | 115 - contrib/llvm/lib/Target/R600/AMDIL7XXDevice.h | 72 - contrib/llvm/lib/Target/R600/AMDILBase.td | 64 - .../llvm/lib/Target/R600/AMDILCFGStructurizer.cpp | 4155 ++++------ contrib/llvm/lib/Target/R600/AMDILDevice.cpp | 132 - contrib/llvm/lib/Target/R600/AMDILDevice.h | 117 - contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp | 97 - contrib/llvm/lib/Target/R600/AMDILDeviceInfo.h | 88 - contrib/llvm/lib/Target/R600/AMDILDevices.h | 19 - .../llvm/lib/Target/R600/AMDILEvergreenDevice.cpp | 169 - .../llvm/lib/Target/R600/AMDILEvergreenDevice.h | 93 - contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp | 667 -- contrib/llvm/lib/Target/R600/AMDILISelLowering.cpp | 61 +- contrib/llvm/lib/Target/R600/AMDILInstrInfo.td | 67 +- .../llvm/lib/Target/R600/AMDILIntrinsicInfo.cpp | 4 +- contrib/llvm/lib/Target/R600/AMDILNIDevice.cpp | 65 - contrib/llvm/lib/Target/R600/AMDILNIDevice.h | 57 - contrib/llvm/lib/Target/R600/AMDILSIDevice.cpp | 48 - contrib/llvm/lib/Target/R600/AMDILSIDevice.h | 39 - .../Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 137 +- .../Target/R600/InstPrinter/AMDGPUInstPrinter.h | 4 + .../Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp | 8 +- .../Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 13 +- .../lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h | 4 +- .../R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp | 21 + .../Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h | 1 + .../R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp | 2 +- .../Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h | 4 +- .../Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 102 +- contrib/llvm/lib/Target/R600/Processors.td | 48 +- .../llvm/lib/Target/R600/R600ClauseMergePass.cpp | 204 + .../lib/Target/R600/R600ControlFlowFinalizer.cpp | 152 +- contrib/llvm/lib/Target/R600/R600Defines.h | 122 +- .../llvm/lib/Target/R600/R600EmitClauseMarkers.cpp | 164 +- .../lib/Target/R600/R600ExpandSpecialInstrs.cpp | 90 +- contrib/llvm/lib/Target/R600/R600ISelLowering.cpp | 1299 ++- contrib/llvm/lib/Target/R600/R600ISelLowering.h | 14 +- contrib/llvm/lib/Target/R600/R600InstrFormats.td | 492 ++ contrib/llvm/lib/Target/R600/R600InstrInfo.cpp | 825 +- contrib/llvm/lib/Target/R600/R600InstrInfo.h | 121 +- contrib/llvm/lib/Target/R600/R600Instructions.td | 1785 ++-- contrib/llvm/lib/Target/R600/R600Intrinsics.td | 44 + .../lib/Target/R600/R600MachineFunctionInfo.cpp | 6 +- .../llvm/lib/Target/R600/R600MachineFunctionInfo.h | 3 +- .../llvm/lib/Target/R600/R600MachineScheduler.cpp | 300 +- .../llvm/lib/Target/R600/R600MachineScheduler.h | 44 +- .../Target/R600/R600OptimizeVectorRegisters.cpp | 380 + contrib/llvm/lib/Target/R600/R600Packetizer.cpp | 311 +- contrib/llvm/lib/Target/R600/R600RegisterInfo.cpp | 53 +- contrib/llvm/lib/Target/R600/R600RegisterInfo.h | 13 +- contrib/llvm/lib/Target/R600/R600RegisterInfo.td | 79 +- contrib/llvm/lib/Target/R600/R600Schedule.td | 6 +- .../Target/R600/R600TextureIntrinsicsReplacer.cpp | 303 + .../llvm/lib/Target/R600/SIAnnotateControlFlow.cpp | 16 +- contrib/llvm/lib/Target/R600/SIDefines.h | 16 + contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp | 263 + contrib/llvm/lib/Target/R600/SIISelLowering.cpp | 756 +- contrib/llvm/lib/Target/R600/SIISelLowering.h | 31 +- contrib/llvm/lib/Target/R600/SIInsertWaits.cpp | 34 +- contrib/llvm/lib/Target/R600/SIInstrFormats.td | 116 +- contrib/llvm/lib/Target/R600/SIInstrInfo.cpp | 524 +- contrib/llvm/lib/Target/R600/SIInstrInfo.h | 80 +- contrib/llvm/lib/Target/R600/SIInstrInfo.td | 286 +- contrib/llvm/lib/Target/R600/SIInstructions.td | 798 +- contrib/llvm/lib/Target/R600/SIIntrinsics.td | 26 +- .../llvm/lib/Target/R600/SILowerControlFlow.cpp | 38 +- .../llvm/lib/Target/R600/SIMachineFunctionInfo.cpp | 4 + .../llvm/lib/Target/R600/SIMachineFunctionInfo.h | 1 + contrib/llvm/lib/Target/R600/SIRegisterInfo.cpp | 88 +- contrib/llvm/lib/Target/R600/SIRegisterInfo.h | 26 +- contrib/llvm/lib/Target/R600/SIRegisterInfo.td | 18 +- contrib/llvm/lib/Target/R600/SITypeRewriter.cpp | 162 + .../Target/R600/TargetInfo/AMDGPUTargetInfo.cpp | 2 +- contrib/llvm/lib/Target/Sparc/DelaySlotFiller.cpp | 267 +- contrib/llvm/lib/Target/Sparc/FPMover.cpp | 141 - .../lib/Target/Sparc/MCTargetDesc/SparcBaseInfo.h | 22 +- .../Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp | 15 +- .../lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.h | 7 +- contrib/llvm/lib/Target/Sparc/Sparc.h | 24 +- contrib/llvm/lib/Target/Sparc/Sparc.td | 6 +- contrib/llvm/lib/Target/Sparc/SparcAsmPrinter.cpp | 112 +- contrib/llvm/lib/Target/Sparc/SparcCallingConv.td | 13 +- contrib/llvm/lib/Target/Sparc/SparcCodeEmitter.cpp | 245 + .../llvm/lib/Target/Sparc/SparcFrameLowering.cpp | 228 +- contrib/llvm/lib/Target/Sparc/SparcFrameLowering.h | 20 +- .../llvm/lib/Target/Sparc/SparcISelDAGToDAG.cpp | 20 +- .../llvm/lib/Target/Sparc/SparcISelLowering.cpp | 1169 ++- contrib/llvm/lib/Target/Sparc/SparcISelLowering.h | 50 +- contrib/llvm/lib/Target/Sparc/SparcInstr64Bit.td | 103 +- contrib/llvm/lib/Target/Sparc/SparcInstrFormats.td | 92 +- contrib/llvm/lib/Target/Sparc/SparcInstrInfo.cpp | 177 +- contrib/llvm/lib/Target/Sparc/SparcInstrInfo.h | 15 +- contrib/llvm/lib/Target/Sparc/SparcInstrInfo.td | 528 +- contrib/llvm/lib/Target/Sparc/SparcJITInfo.cpp | 165 + contrib/llvm/lib/Target/Sparc/SparcJITInfo.h | 67 + .../lib/Target/Sparc/SparcMachineFunctionInfo.h | 12 +- .../llvm/lib/Target/Sparc/SparcRegisterInfo.cpp | 166 +- contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.h | 10 +- contrib/llvm/lib/Target/Sparc/SparcRegisterInfo.td | 124 +- contrib/llvm/lib/Target/Sparc/SparcRelocations.h | 41 + contrib/llvm/lib/Target/Sparc/SparcSubtarget.cpp | 33 +- contrib/llvm/lib/Target/Sparc/SparcSubtarget.h | 16 +- .../llvm/lib/Target/Sparc/SparcTargetMachine.cpp | 9 +- contrib/llvm/lib/Target/Sparc/SparcTargetMachine.h | 6 + .../Target/Sparc/TargetInfo/SparcTargetInfo.cpp | 8 +- .../Target/SystemZ/AsmParser/SystemZAsmParser.cpp | 475 +- .../SystemZ/Disassembler/SystemZDisassembler.cpp | 323 + .../SystemZ/InstPrinter/SystemZInstPrinter.cpp | 23 +- .../SystemZ/InstPrinter/SystemZInstPrinter.h | 3 +- .../SystemZ/MCTargetDesc/SystemZMCAsmBackend.cpp | 49 +- .../SystemZ/MCTargetDesc/SystemZMCAsmInfo.cpp | 4 +- .../Target/SystemZ/MCTargetDesc/SystemZMCAsmInfo.h | 7 +- .../SystemZ/MCTargetDesc/SystemZMCCodeEmitter.cpp | 114 +- .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.cpp | 81 +- .../SystemZ/MCTargetDesc/SystemZMCTargetDesc.h | 38 +- contrib/llvm/lib/Target/SystemZ/README.txt | 65 +- contrib/llvm/lib/Target/SystemZ/SystemZ.h | 42 +- contrib/llvm/lib/Target/SystemZ/SystemZ.td | 7 +- .../llvm/lib/Target/SystemZ/SystemZAsmPrinter.cpp | 136 +- .../llvm/lib/Target/SystemZ/SystemZCallingConv.td | 4 +- .../Target/SystemZ/SystemZConstantPoolValue.cpp | 2 +- .../llvm/lib/Target/SystemZ/SystemZElimCompare.cpp | 471 ++ .../lib/Target/SystemZ/SystemZFrameLowering.cpp | 108 +- .../llvm/lib/Target/SystemZ/SystemZFrameLowering.h | 27 +- .../lib/Target/SystemZ/SystemZISelDAGToDAG.cpp | 603 +- .../lib/Target/SystemZ/SystemZISelLowering.cpp | 1523 +++- .../llvm/lib/Target/SystemZ/SystemZISelLowering.h | 139 +- contrib/llvm/lib/Target/SystemZ/SystemZInstrFP.td | 320 +- .../llvm/lib/Target/SystemZ/SystemZInstrFormats.td | 1242 ++- .../llvm/lib/Target/SystemZ/SystemZInstrInfo.cpp | 907 +- contrib/llvm/lib/Target/SystemZ/SystemZInstrInfo.h | 150 +- .../llvm/lib/Target/SystemZ/SystemZInstrInfo.td | 1226 ++- .../llvm/lib/Target/SystemZ/SystemZLongBranch.cpp | 462 ++ .../llvm/lib/Target/SystemZ/SystemZMCInstLower.cpp | 116 +- .../llvm/lib/Target/SystemZ/SystemZMCInstLower.h | 15 +- .../Target/SystemZ/SystemZMachineFunctionInfo.cpp | 17 + .../Target/SystemZ/SystemZMachineFunctionInfo.h | 12 +- contrib/llvm/lib/Target/SystemZ/SystemZOperands.td | 153 +- .../llvm/lib/Target/SystemZ/SystemZOperators.td | 208 +- contrib/llvm/lib/Target/SystemZ/SystemZPatterns.td | 99 +- .../llvm/lib/Target/SystemZ/SystemZProcessors.td | 46 + .../lib/Target/SystemZ/SystemZRegisterInfo.cpp | 37 +- .../llvm/lib/Target/SystemZ/SystemZRegisterInfo.h | 16 +- .../llvm/lib/Target/SystemZ/SystemZRegisterInfo.td | 71 +- .../lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp | 293 + .../lib/Target/SystemZ/SystemZSelectionDAGInfo.h | 80 + .../llvm/lib/Target/SystemZ/SystemZShortenInst.cpp | 163 + .../llvm/lib/Target/SystemZ/SystemZSubtarget.cpp | 15 +- contrib/llvm/lib/Target/SystemZ/SystemZSubtarget.h | 22 + .../lib/Target/SystemZ/SystemZTargetMachine.cpp | 50 +- .../llvm/lib/Target/SystemZ/SystemZTargetMachine.h | 4 +- contrib/llvm/lib/Target/Target.cpp | 8 + contrib/llvm/lib/Target/TargetLibraryInfo.cpp | 44 +- .../llvm/lib/Target/TargetLoweringObjectFile.cpp | 20 +- contrib/llvm/lib/Target/TargetMachine.cpp | 6 +- contrib/llvm/lib/Target/TargetMachineC.cpp | 56 +- contrib/llvm/lib/Target/TargetSubtargetInfo.cpp | 19 + .../llvm/lib/Target/X86/AsmParser/X86AsmParser.cpp | 412 +- .../Target/X86/Disassembler/X86Disassembler.cpp | 145 +- .../X86/Disassembler/X86DisassemblerDecoder.c | 267 +- .../X86/Disassembler/X86DisassemblerDecoder.h | 106 +- .../Disassembler/X86DisassemblerDecoderCommon.h | 165 +- .../Target/X86/InstPrinter/X86ATTInstPrinter.cpp | 34 +- .../lib/Target/X86/InstPrinter/X86ATTInstPrinter.h | 22 +- .../Target/X86/InstPrinter/X86IntelInstPrinter.cpp | 23 +- .../Target/X86/InstPrinter/X86IntelInstPrinter.h | 52 +- .../lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 403 +- .../llvm/lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 82 +- .../Target/X86/MCTargetDesc/X86ELFObjectWriter.cpp | 22 +- .../X86/MCTargetDesc/X86ELFRelocationInfo.cpp | 135 + .../lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 6 +- .../lib/Target/X86/MCTargetDesc/X86MCAsmInfo.h | 3 +- .../Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 335 +- .../Target/X86/MCTargetDesc/X86MCTargetDesc.cpp | 35 +- .../lib/Target/X86/MCTargetDesc/X86MCTargetDesc.h | 13 +- .../X86/MCTargetDesc/X86MachORelocationInfo.cpp | 116 + .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 177 +- .../X86/MCTargetDesc/X86WinCOFFObjectWriter.cpp | 2 +- contrib/llvm/lib/Target/X86/X86.td | 71 +- contrib/llvm/lib/Target/X86/X86AsmPrinter.cpp | 86 +- contrib/llvm/lib/Target/X86/X86AsmPrinter.h | 20 +- contrib/llvm/lib/Target/X86/X86CallingConv.h | 35 + contrib/llvm/lib/Target/X86/X86CallingConv.td | 95 +- contrib/llvm/lib/Target/X86/X86CodeEmitter.cpp | 47 +- contrib/llvm/lib/Target/X86/X86FastISel.cpp | 463 +- contrib/llvm/lib/Target/X86/X86FixupLEAs.cpp | 2 +- contrib/llvm/lib/Target/X86/X86FloatingPoint.cpp | 18 +- contrib/llvm/lib/Target/X86/X86FrameLowering.cpp | 337 +- contrib/llvm/lib/Target/X86/X86FrameLowering.h | 27 - contrib/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp | 225 +- contrib/llvm/lib/Target/X86/X86ISelLowering.cpp | 3106 +++++-- contrib/llvm/lib/Target/X86/X86ISelLowering.h | 136 +- contrib/llvm/lib/Target/X86/X86InstrAVX512.td | 3526 ++++++++ contrib/llvm/lib/Target/X86/X86InstrArithmetic.td | 250 +- contrib/llvm/lib/Target/X86/X86InstrCompiler.td | 130 +- contrib/llvm/lib/Target/X86/X86InstrControl.td | 5 +- contrib/llvm/lib/Target/X86/X86InstrExtension.td | 68 +- contrib/llvm/lib/Target/X86/X86InstrFMA.td | 77 +- contrib/llvm/lib/Target/X86/X86InstrFPStack.td | 30 +- contrib/llvm/lib/Target/X86/X86InstrFormats.td | 181 +- .../llvm/lib/Target/X86/X86InstrFragmentsSIMD.td | 141 +- contrib/llvm/lib/Target/X86/X86InstrInfo.cpp | 996 ++- contrib/llvm/lib/Target/X86/X86InstrInfo.h | 26 +- contrib/llvm/lib/Target/X86/X86InstrInfo.td | 601 +- contrib/llvm/lib/Target/X86/X86InstrMMX.td | 78 +- contrib/llvm/lib/Target/X86/X86InstrSSE.td | 1833 +++-- contrib/llvm/lib/Target/X86/X86InstrSVM.td | 18 +- contrib/llvm/lib/Target/X86/X86InstrShiftRotate.td | 136 +- contrib/llvm/lib/Target/X86/X86InstrSystem.td | 76 +- contrib/llvm/lib/Target/X86/X86InstrTSX.td | 7 + contrib/llvm/lib/Target/X86/X86InstrXOP.td | 146 +- contrib/llvm/lib/Target/X86/X86JITInfo.cpp | 3 +- contrib/llvm/lib/Target/X86/X86MCInstLower.cpp | 270 +- contrib/llvm/lib/Target/X86/X86RegisterInfo.cpp | 89 +- contrib/llvm/lib/Target/X86/X86RegisterInfo.h | 10 +- contrib/llvm/lib/Target/X86/X86RegisterInfo.td | 92 +- contrib/llvm/lib/Target/X86/X86SchedHaswell.td | 14 +- contrib/llvm/lib/Target/X86/X86SchedSandyBridge.td | 13 +- contrib/llvm/lib/Target/X86/X86Schedule.td | 73 +- contrib/llvm/lib/Target/X86/X86ScheduleAtom.td | 41 +- contrib/llvm/lib/Target/X86/X86ScheduleSLM.td | 668 ++ .../llvm/lib/Target/X86/X86SelectionDAGInfo.cpp | 6 +- contrib/llvm/lib/Target/X86/X86SelectionDAGInfo.h | 4 +- contrib/llvm/lib/Target/X86/X86Subtarget.cpp | 60 +- contrib/llvm/lib/Target/X86/X86Subtarget.h | 43 +- contrib/llvm/lib/Target/X86/X86TargetMachine.cpp | 10 +- .../llvm/lib/Target/X86/X86TargetObjectFile.cpp | 10 +- contrib/llvm/lib/Target/X86/X86TargetObjectFile.h | 3 + .../llvm/lib/Target/X86/X86TargetTransformInfo.cpp | 234 +- contrib/llvm/lib/Target/X86/X86VZeroUpper.cpp | 45 +- .../XCore/Disassembler/XCoreDisassembler.cpp | 8 +- .../Target/XCore/MCTargetDesc/XCoreMCAsmInfo.cpp | 10 +- .../lib/Target/XCore/MCTargetDesc/XCoreMCAsmInfo.h | 6 +- .../XCore/MCTargetDesc/XCoreMCTargetDesc.cpp | 10 +- contrib/llvm/lib/Target/XCore/XCore.h | 2 + contrib/llvm/lib/Target/XCore/XCoreAsmPrinter.cpp | 80 +- .../llvm/lib/Target/XCore/XCoreFrameLowering.cpp | 107 +- .../llvm/lib/Target/XCore/XCoreISelDAGToDAG.cpp | 14 +- .../llvm/lib/Target/XCore/XCoreISelLowering.cpp | 200 +- contrib/llvm/lib/Target/XCore/XCoreISelLowering.h | 24 +- contrib/llvm/lib/Target/XCore/XCoreInstrInfo.cpp | 17 +- contrib/llvm/lib/Target/XCore/XCoreInstrInfo.h | 7 +- contrib/llvm/lib/Target/XCore/XCoreInstrInfo.td | 26 +- .../lib/Target/XCore/XCoreLowerThreadLocal.cpp | 114 +- contrib/llvm/lib/Target/XCore/XCoreMCInstLower.cpp | 2 +- .../llvm/lib/Target/XCore/XCoreRegisterInfo.cpp | 6 +- contrib/llvm/lib/Target/XCore/XCoreRegisterInfo.h | 4 +- .../llvm/lib/Target/XCore/XCoreTargetMachine.cpp | 9 + contrib/llvm/lib/Target/XCore/XCoreTargetMachine.h | 2 + .../lib/Target/XCore/XCoreTargetTransformInfo.cpp | 83 + .../llvm/lib/Transforms/IPO/ArgumentPromotion.cpp | 31 +- contrib/llvm/lib/Transforms/IPO/ConstantMerge.cpp | 11 +- .../lib/Transforms/IPO/DeadArgumentElimination.cpp | 138 +- contrib/llvm/lib/Transforms/IPO/ExtractGV.cpp | 54 +- contrib/llvm/lib/Transforms/IPO/FunctionAttrs.cpp | 437 +- contrib/llvm/lib/Transforms/IPO/GlobalDCE.cpp | 3 + contrib/llvm/lib/Transforms/IPO/GlobalOpt.cpp | 514 +- contrib/llvm/lib/Transforms/IPO/InlineAlways.cpp | 2 +- contrib/llvm/lib/Transforms/IPO/InlineSimple.cpp | 4 +- contrib/llvm/lib/Transforms/IPO/Inliner.cpp | 27 +- contrib/llvm/lib/Transforms/IPO/Internalize.cpp | 136 +- contrib/llvm/lib/Transforms/IPO/MergeFunctions.cpp | 77 +- .../llvm/lib/Transforms/IPO/PassManagerBuilder.cpp | 62 +- contrib/llvm/lib/Transforms/IPO/PruneEH.cpp | 14 +- contrib/llvm/lib/Transforms/IPO/StripSymbols.cpp | 231 +- .../llvm/lib/Transforms/InstCombine/InstCombine.h | 16 +- .../Transforms/InstCombine/InstCombineAddSub.cpp | 49 +- .../Transforms/InstCombine/InstCombineAndOrXor.cpp | 263 +- .../Transforms/InstCombine/InstCombineCalls.cpp | 46 +- .../Transforms/InstCombine/InstCombineCasts.cpp | 168 +- .../Transforms/InstCombine/InstCombineCompares.cpp | 392 +- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 30 +- .../InstCombine/InstCombineMulDivRem.cpp | 385 +- .../lib/Transforms/InstCombine/InstCombinePHI.cpp | 14 +- .../Transforms/InstCombine/InstCombineSelect.cpp | 15 +- .../InstCombine/InstCombineSimplifyDemanded.cpp | 3 +- .../InstCombine/InstCombineVectorOps.cpp | 321 +- .../Transforms/InstCombine/InstCombineWorklist.h | 9 +- .../InstCombine/InstructionCombining.cpp | 114 +- .../Instrumentation/AddressSanitizer.cpp | 366 +- .../lib/Transforms/Instrumentation/BlackList.cpp | 126 - .../Transforms/Instrumentation/BoundsChecking.cpp | 6 +- .../Instrumentation/DataFlowSanitizer.cpp | 1397 ++++ .../lib/Transforms/Instrumentation/DebugIR.cpp | 618 ++ .../llvm/lib/Transforms/Instrumentation/DebugIR.h | 99 + .../Transforms/Instrumentation/EdgeProfiling.cpp | 117 - .../Transforms/Instrumentation/GCOVProfiling.cpp | 93 +- .../Transforms/Instrumentation/Instrumentation.cpp | 4 +- .../Transforms/Instrumentation/MemorySanitizer.cpp | 537 +- .../Instrumentation/OptimalEdgeProfiling.cpp | 225 - .../Transforms/Instrumentation/PathProfiling.cpp | 1424 ---- .../Transforms/Instrumentation/ProfilingUtils.cpp | 169 - .../Transforms/Instrumentation/ProfilingUtils.h | 36 - .../Transforms/Instrumentation/ThreadSanitizer.cpp | 18 +- .../lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h | 186 + .../lib/Transforms/ObjCARC/DependencyAnalysis.h | 2 +- contrib/llvm/lib/Transforms/ObjCARC/ObjCARC.h | 6 +- .../Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp | 2 +- .../lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h | 2 +- .../lib/Transforms/ObjCARC/ObjCARCContract.cpp | 110 +- .../llvm/lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 760 +- .../llvm/lib/Transforms/ObjCARC/ObjCARCUtil.cpp | 4 +- .../lib/Transforms/ObjCARC/ProvenanceAnalysis.h | 2 +- contrib/llvm/lib/Transforms/Scalar/ADCE.cpp | 2 +- .../lib/Transforms/Scalar/BasicBlockPlacement.cpp | 152 - .../llvm/lib/Transforms/Scalar/CodeGenPrepare.cpp | 105 +- contrib/llvm/lib/Transforms/Scalar/EarlyCSE.cpp | 10 - .../llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp | 79 + contrib/llvm/lib/Transforms/Scalar/GVN.cpp | 252 +- contrib/llvm/lib/Transforms/Scalar/GlobalMerge.cpp | 18 +- .../llvm/lib/Transforms/Scalar/IndVarSimplify.cpp | 91 +- .../llvm/lib/Transforms/Scalar/JumpThreading.cpp | 89 +- .../llvm/lib/Transforms/Scalar/LoopDeletion.cpp | 10 +- .../lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 74 +- .../llvm/lib/Transforms/Scalar/LoopRerollPass.cpp | 1184 +++ .../lib/Transforms/Scalar/LoopStrengthReduce.cpp | 136 +- .../llvm/lib/Transforms/Scalar/LoopUnrollPass.cpp | 42 +- .../llvm/lib/Transforms/Scalar/LoopUnswitch.cpp | 229 +- .../llvm/lib/Transforms/Scalar/MemCpyOptimizer.cpp | 23 +- .../Transforms/Scalar/PartiallyInlineLibCalls.cpp | 156 + contrib/llvm/lib/Transforms/Scalar/Reassociate.cpp | 41 +- contrib/llvm/lib/Transforms/Scalar/SCCP.cpp | 14 +- contrib/llvm/lib/Transforms/Scalar/SROA.cpp | 2802 +++---- .../llvm/lib/Transforms/Scalar/SampleProfile.cpp | 479 ++ contrib/llvm/lib/Transforms/Scalar/Scalar.cpp | 16 +- .../lib/Transforms/Scalar/ScalarReplAggregates.cpp | 51 +- .../llvm/lib/Transforms/Scalar/SimplifyCFGPass.cpp | 194 +- .../lib/Transforms/Scalar/SimplifyLibCalls.cpp | 247 - .../llvm/lib/Transforms/Scalar/StructurizeCFG.cpp | 906 ++ .../Transforms/Scalar/TailRecursionElimination.cpp | 170 +- .../llvm/lib/Transforms/Utils/BasicBlockUtils.cpp | 175 +- .../lib/Transforms/Utils/BreakCriticalEdges.cpp | 43 +- .../llvm/lib/Transforms/Utils/CloneFunction.cpp | 5 +- .../llvm/lib/Transforms/Utils/CodeExtractor.cpp | 7 +- .../llvm/lib/Transforms/Utils/DemoteRegToStack.cpp | 1 + contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp | 486 ++ contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp | 183 + .../llvm/lib/Transforms/Utils/InlineFunction.cpp | 3 +- contrib/llvm/lib/Transforms/Utils/LCSSA.cpp | 15 +- contrib/llvm/lib/Transforms/Utils/Local.cpp | 473 +- contrib/llvm/lib/Transforms/Utils/LoopSimplify.cpp | 23 +- contrib/llvm/lib/Transforms/Utils/LoopUnroll.cpp | 8 +- .../lib/Transforms/Utils/LowerExpectIntrinsic.cpp | 2 +- contrib/llvm/lib/Transforms/Utils/LowerInvoke.cpp | 21 +- contrib/llvm/lib/Transforms/Utils/LowerSwitch.cpp | 62 +- contrib/llvm/lib/Transforms/Utils/MetaRenamer.cpp | 2 +- contrib/llvm/lib/Transforms/Utils/ModuleUtils.cpp | 18 + .../Transforms/Utils/PromoteMemoryToRegister.cpp | 1082 ++- contrib/llvm/lib/Transforms/Utils/SSAUpdater.cpp | 50 +- contrib/llvm/lib/Transforms/Utils/SimplifyCFG.cpp | 311 +- .../llvm/lib/Transforms/Utils/SimplifyIndVar.cpp | 2 +- .../llvm/lib/Transforms/Utils/SimplifyLibCalls.cpp | 323 +- .../llvm/lib/Transforms/Utils/SpecialCaseList.cpp | 222 + contrib/llvm/lib/Transforms/Utils/ValueMapper.cpp | 29 +- .../llvm/lib/Transforms/Vectorize/BBVectorize.cpp | 90 +- .../lib/Transforms/Vectorize/LoopVectorize.cpp | 2795 +++++-- .../lib/Transforms/Vectorize/SLPVectorizer.cpp | 2529 +++++- contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp | 730 -- contrib/llvm/lib/Transforms/Vectorize/VecUtils.h | 164 - 1182 files changed, 148508 insertions(+), 82745 deletions(-) create mode 100644 contrib/llvm/lib/Analysis/CFG.cpp create mode 100644 contrib/llvm/lib/Analysis/Delinearization.cpp delete mode 100644 contrib/llvm/lib/Analysis/PathNumbering.cpp delete mode 100644 contrib/llvm/lib/Analysis/PathProfileInfo.cpp delete mode 100644 contrib/llvm/lib/Analysis/PathProfileVerifier.cpp delete mode 100644 contrib/llvm/lib/Analysis/ProfileDataLoader.cpp delete mode 100644 contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp delete mode 100644 contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp delete mode 100644 contrib/llvm/lib/Analysis/ProfileInfo.cpp delete mode 100644 contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp delete mode 100644 contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp delete mode 100644 contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp delete mode 100644 contrib/llvm/lib/Archive/Archive.cpp delete mode 100644 contrib/llvm/lib/Archive/ArchiveInternals.h delete mode 100644 contrib/llvm/lib/Archive/ArchiveReader.cpp delete mode 100644 contrib/llvm/lib/Archive/ArchiveWriter.cpp create mode 100644 contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp create mode 100644 contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h create mode 100644 contrib/llvm/lib/CodeGen/LiveRegUnits.cpp delete mode 100644 contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h delete mode 100644 contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp create mode 100644 contrib/llvm/lib/CodeGen/StackMaps.cpp delete mode 100644 contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp delete mode 100644 contrib/llvm/lib/DebugInfo/DWARFAttribute.h create mode 100644 contrib/llvm/lib/DebugInfo/DWARFDebugLoc.cpp create mode 100644 contrib/llvm/lib/DebugInfo/DWARFDebugLoc.h create mode 100644 contrib/llvm/lib/DebugInfo/DWARFTypeUnit.cpp create mode 100644 contrib/llvm/lib/DebugInfo/DWARFTypeUnit.h create mode 100644 contrib/llvm/lib/DebugInfo/DWARFUnit.cpp create mode 100644 contrib/llvm/lib/DebugInfo/DWARFUnit.h delete mode 100644 contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp delete mode 100644 contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h create mode 100644 contrib/llvm/lib/ExecutionEngine/RTDyldMemoryManager.cpp create mode 100644 contrib/llvm/lib/IR/AsmWriter.h create mode 100644 contrib/llvm/lib/IR/LegacyPassManager.cpp create mode 100644 contrib/llvm/lib/LTO/LTOCodeGenerator.cpp create mode 100644 contrib/llvm/lib/LTO/LTOModule.cpp create mode 100644 contrib/llvm/lib/MC/MCAsmInfoELF.cpp create mode 100644 contrib/llvm/lib/MC/MCExternalSymbolizer.cpp create mode 100644 contrib/llvm/lib/MC/MCFunction.cpp create mode 100644 contrib/llvm/lib/MC/MCModuleYAML.cpp create mode 100644 contrib/llvm/lib/MC/MCObjectDisassembler.cpp create mode 100644 contrib/llvm/lib/MC/MCObjectSymbolizer.cpp create mode 100644 contrib/llvm/lib/MC/MCRelocationInfo.cpp create mode 100644 contrib/llvm/lib/MC/MCSymbolizer.cpp create mode 100644 contrib/llvm/lib/Object/COFFYAML.cpp create mode 100644 contrib/llvm/lib/Object/ELF.cpp create mode 100644 contrib/llvm/lib/Object/ELFYAML.cpp create mode 100644 contrib/llvm/lib/Object/MachOUniversal.cpp create mode 100644 contrib/llvm/lib/Object/YAML.cpp delete mode 100644 contrib/llvm/lib/Support/LocaleGeneric.inc delete mode 100644 contrib/llvm/lib/Support/LocaleWindows.inc delete mode 100644 contrib/llvm/lib/Support/LocaleXlocale.inc create mode 100644 contrib/llvm/lib/Support/MD5.cpp delete mode 100644 contrib/llvm/lib/Support/PathV2.cpp create mode 100644 contrib/llvm/lib/Support/StringRefMemoryObject.cpp create mode 100644 contrib/llvm/lib/Support/Unicode.cpp delete mode 100644 contrib/llvm/lib/Support/Unix/PathV2.inc delete mode 100644 contrib/llvm/lib/Support/Windows/PathV2.inc create mode 100644 contrib/llvm/lib/Target/AArch64/AArch64InstrNEON.td create mode 100644 contrib/llvm/lib/Target/ARM/ARMFPUName.def create mode 100644 contrib/llvm/lib/Target/ARM/ARMFPUName.h create mode 100644 contrib/llvm/lib/Target/ARM/ARMFeatures.h delete mode 100644 contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h create mode 100644 contrib/llvm/lib/Target/ARM/MCTargetDesc/ARMMachORelocationInfo.cpp create mode 100644 contrib/llvm/lib/Target/Hexagon/HexagonCopyToCombine.cpp create mode 100644 contrib/llvm/lib/Target/Hexagon/HexagonMachineFunctionInfo.cpp create mode 100644 contrib/llvm/lib/Target/Hexagon/HexagonSplitConst32AndConst64.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/Disassembler/MBlazeDisassembler.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/InstPrinter/MBlazeInstPrinter.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlaze.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlaze.td delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeAsmPrinter.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeCallingConv.td delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeFrameLowering.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeISelLowering.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeInstrFPU.td delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeInstrFSL.td delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeInstrFormats.td delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeInstrInfo.td delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsicInfo.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeIntrinsics.td delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeMCInstLower.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeMachineFunction.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeRegisterInfo.td delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeRelocations.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeSchedule.td delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeSchedule3.td delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeSchedule5.td delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeSelectionDAGInfo.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeSubtarget.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeTargetMachine.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MBlazeTargetObjectFile.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeBaseInfo.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeELFObjectWriter.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCAsmInfo.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp delete mode 100644 contrib/llvm/lib/Target/MBlaze/MCTargetDesc/MBlazeMCTargetDesc.h delete mode 100644 contrib/llvm/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp delete mode 100644 contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.cpp delete mode 100644 contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsDirectObjLower.h delete mode 100644 contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp delete mode 100644 contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h create mode 100644 contrib/llvm/lib/Target/Mips/MCTargetDesc/MipsTargetStreamer.cpp create mode 100644 contrib/llvm/lib/Target/Mips/MSA.txt create mode 100644 contrib/llvm/lib/Target/Mips/Mips16HardFloat.cpp create mode 100644 contrib/llvm/lib/Target/Mips/Mips16HardFloat.h create mode 100644 contrib/llvm/lib/Target/Mips/MipsMSAInstrFormats.td create mode 100644 contrib/llvm/lib/Target/Mips/MipsMSAInstrInfo.td create mode 100644 contrib/llvm/lib/Target/Mips/MipsTargetStreamer.h create mode 100644 contrib/llvm/lib/Target/NVPTX/InstPrinter/NVPTXInstPrinter.h create mode 100644 contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.cpp create mode 100644 contrib/llvm/lib/Target/NVPTX/NVPTXMCExpr.h delete mode 100644 contrib/llvm/lib/Target/NVPTX/NVPTXNumRegisters.h create mode 100644 contrib/llvm/lib/Target/NVPTX/NVPTXPrologEpilogPass.cpp create mode 100644 contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.cpp create mode 100644 contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMCExpr.h create mode 100644 contrib/llvm/lib/Target/PowerPC/MCTargetDesc/PPCMachObjectWriter.cpp create mode 100644 contrib/llvm/lib/Target/PowerPC/PPCFastISel.cpp create mode 100644 contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.cpp create mode 100644 contrib/llvm/lib/Target/PowerPC/PPCTargetObjectFile.h create mode 100644 contrib/llvm/lib/Target/PowerPC/PPCTargetStreamer.h create mode 100644 contrib/llvm/lib/Target/R600/AMDGPUISelDAGToDAG.cpp delete mode 100644 contrib/llvm/lib/Target/R600/AMDGPUIndirectAddressing.cpp delete mode 100644 contrib/llvm/lib/Target/R600/AMDGPUStructurizeCFG.cpp create mode 100644 contrib/llvm/lib/Target/R600/AMDGPUTargetTransformInfo.cpp delete mode 100644 contrib/llvm/lib/Target/R600/AMDIL.h delete mode 100644 contrib/llvm/lib/Target/R600/AMDIL7XXDevice.cpp delete mode 100644 contrib/llvm/lib/Target/R600/AMDIL7XXDevice.h delete mode 100644 contrib/llvm/lib/Target/R600/AMDILDevice.cpp delete mode 100644 contrib/llvm/lib/Target/R600/AMDILDevice.h delete mode 100644 contrib/llvm/lib/Target/R600/AMDILDeviceInfo.cpp delete mode 100644 contrib/llvm/lib/Target/R600/AMDILDeviceInfo.h delete mode 100644 contrib/llvm/lib/Target/R600/AMDILDevices.h delete mode 100644 contrib/llvm/lib/Target/R600/AMDILEvergreenDevice.cpp delete mode 100644 contrib/llvm/lib/Target/R600/AMDILEvergreenDevice.h delete mode 100644 contrib/llvm/lib/Target/R600/AMDILISelDAGToDAG.cpp delete mode 100644 contrib/llvm/lib/Target/R600/AMDILNIDevice.cpp delete mode 100644 contrib/llvm/lib/Target/R600/AMDILNIDevice.h delete mode 100644 contrib/llvm/lib/Target/R600/AMDILSIDevice.cpp delete mode 100644 contrib/llvm/lib/Target/R600/AMDILSIDevice.h create mode 100644 contrib/llvm/lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.cpp create mode 100644 contrib/llvm/lib/Target/R600/R600ClauseMergePass.cpp create mode 100644 contrib/llvm/lib/Target/R600/R600InstrFormats.td create mode 100644 contrib/llvm/lib/Target/R600/R600OptimizeVectorRegisters.cpp create mode 100644 contrib/llvm/lib/Target/R600/R600TextureIntrinsicsReplacer.cpp create mode 100644 contrib/llvm/lib/Target/R600/SIFixSGPRCopies.cpp create mode 100644 contrib/llvm/lib/Target/R600/SITypeRewriter.cpp delete mode 100644 contrib/llvm/lib/Target/Sparc/FPMover.cpp create mode 100644 contrib/llvm/lib/Target/Sparc/SparcCodeEmitter.cpp create mode 100644 contrib/llvm/lib/Target/Sparc/SparcJITInfo.cpp create mode 100644 contrib/llvm/lib/Target/Sparc/SparcJITInfo.h create mode 100644 contrib/llvm/lib/Target/Sparc/SparcRelocations.h create mode 100644 contrib/llvm/lib/Target/SystemZ/Disassembler/SystemZDisassembler.cpp create mode 100644 contrib/llvm/lib/Target/SystemZ/SystemZElimCompare.cpp create mode 100644 contrib/llvm/lib/Target/SystemZ/SystemZLongBranch.cpp create mode 100644 contrib/llvm/lib/Target/SystemZ/SystemZMachineFunctionInfo.cpp create mode 100644 contrib/llvm/lib/Target/SystemZ/SystemZProcessors.td create mode 100644 contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.cpp create mode 100644 contrib/llvm/lib/Target/SystemZ/SystemZSelectionDAGInfo.h create mode 100644 contrib/llvm/lib/Target/SystemZ/SystemZShortenInst.cpp create mode 100644 contrib/llvm/lib/Target/X86/MCTargetDesc/X86ELFRelocationInfo.cpp create mode 100644 contrib/llvm/lib/Target/X86/MCTargetDesc/X86MachORelocationInfo.cpp create mode 100644 contrib/llvm/lib/Target/X86/X86CallingConv.h create mode 100644 contrib/llvm/lib/Target/X86/X86InstrAVX512.td create mode 100644 contrib/llvm/lib/Target/X86/X86ScheduleSLM.td create mode 100644 contrib/llvm/lib/Target/XCore/XCoreTargetTransformInfo.cpp delete mode 100644 contrib/llvm/lib/Transforms/Instrumentation/BlackList.cpp create mode 100644 contrib/llvm/lib/Transforms/Instrumentation/DataFlowSanitizer.cpp create mode 100644 contrib/llvm/lib/Transforms/Instrumentation/DebugIR.cpp create mode 100644 contrib/llvm/lib/Transforms/Instrumentation/DebugIR.h delete mode 100644 contrib/llvm/lib/Transforms/Instrumentation/EdgeProfiling.cpp delete mode 100644 contrib/llvm/lib/Transforms/Instrumentation/OptimalEdgeProfiling.cpp delete mode 100644 contrib/llvm/lib/Transforms/Instrumentation/PathProfiling.cpp delete mode 100644 contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.cpp delete mode 100644 contrib/llvm/lib/Transforms/Instrumentation/ProfilingUtils.h create mode 100644 contrib/llvm/lib/Transforms/ObjCARC/ARCRuntimeEntryPoints.h delete mode 100644 contrib/llvm/lib/Transforms/Scalar/BasicBlockPlacement.cpp create mode 100644 contrib/llvm/lib/Transforms/Scalar/FlattenCFGPass.cpp create mode 100644 contrib/llvm/lib/Transforms/Scalar/LoopRerollPass.cpp create mode 100644 contrib/llvm/lib/Transforms/Scalar/PartiallyInlineLibCalls.cpp create mode 100644 contrib/llvm/lib/Transforms/Scalar/SampleProfile.cpp delete mode 100644 contrib/llvm/lib/Transforms/Scalar/SimplifyLibCalls.cpp create mode 100644 contrib/llvm/lib/Transforms/Scalar/StructurizeCFG.cpp create mode 100644 contrib/llvm/lib/Transforms/Utils/FlattenCFG.cpp create mode 100644 contrib/llvm/lib/Transforms/Utils/GlobalStatus.cpp create mode 100644 contrib/llvm/lib/Transforms/Utils/SpecialCaseList.cpp delete mode 100644 contrib/llvm/lib/Transforms/Vectorize/VecUtils.cpp delete mode 100644 contrib/llvm/lib/Transforms/Vectorize/VecUtils.h (limited to 'contrib/llvm/lib') diff --git a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp index 210b80a..b8b6d37 100644 --- a/contrib/llvm/lib/Analysis/AliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/AliasAnalysis.cpp @@ -26,6 +26,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/BasicBlock.h" @@ -361,24 +362,6 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { } namespace { - // Conservatively return true. Return false, if there is a single path - // starting from "From" and the path does not reach "To". - static bool hasPath(const BasicBlock *From, const BasicBlock *To) { - const unsigned MaxCheck = 5; - const BasicBlock *Current = From; - for (unsigned I = 0; I < MaxCheck; I++) { - unsigned NumSuccs = Current->getTerminator()->getNumSuccessors(); - if (NumSuccs > 1) - return true; - if (NumSuccs == 0) - return false; - Current = Current->getTerminator()->getSuccessor(0); - if (Current == To) - return true; - } - return true; - } - /// Only find pointer captures which happen before the given instruction. Uses /// the dominator tree to determine whether one instruction is before another. /// Only support the case where the Value is defined in the same basic block @@ -400,7 +383,7 @@ namespace { // there is no need to explore the use if BeforeHere dominates use. // Check whether there is a path from I to BeforeHere. if (BeforeHere != I && DT->dominates(BeforeHere, I) && - !hasPath(BB, BeforeHere->getParent())) + !isPotentiallyReachable(I, BeforeHere, DT)) return false; return true; } @@ -412,7 +395,7 @@ namespace { if (BeforeHere != I && !DT->isReachableFromEntry(BB)) return false; if (BeforeHere != I && DT->dominates(BeforeHere, I) && - !hasPath(BB, BeforeHere->getParent())) + !isPotentiallyReachable(I, BeforeHere, DT)) return false; Captured = true; return true; @@ -450,6 +433,7 @@ AliasAnalysis::callCapturesBefore(const Instruction *I, return AliasAnalysis::ModRef; unsigned ArgNo = 0; + AliasAnalysis::ModRefResult R = AliasAnalysis::NoModRef; for (ImmutableCallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end(); CI != CE; ++CI, ++ArgNo) { // Only look at the no-capture or byval pointer arguments. If this @@ -463,12 +447,18 @@ AliasAnalysis::callCapturesBefore(const Instruction *I, // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't // escape. - if (!isNoAlias(AliasAnalysis::Location(*CI), - AliasAnalysis::Location(Object))) { - return AliasAnalysis::ModRef; + if (isNoAlias(AliasAnalysis::Location(*CI), + AliasAnalysis::Location(Object))) + continue; + if (CS.doesNotAccessMemory(ArgNo)) + continue; + if (CS.onlyReadsMemory(ArgNo)) { + R = AliasAnalysis::Ref; + continue; } + return AliasAnalysis::ModRef; } - return AliasAnalysis::NoModRef; + return R; } // AliasAnalysis destructor: DO NOT move this to the header file for @@ -537,6 +527,15 @@ bool llvm::isNoAliasCall(const Value *V) { return false; } +/// isNoAliasArgument - Return true if this is an argument with the noalias +/// attribute. +bool llvm::isNoAliasArgument(const Value *V) +{ + if (const Argument *A = dyn_cast(V)) + return A->hasNoAliasAttr(); + return false; +} + /// isIdentifiedObject - Return true if this pointer refers to a distinct and /// identifiable object. This returns true for: /// Global Variables and Functions (but not Global Aliases) diff --git a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp index 5910526..2289c12 100644 --- a/contrib/llvm/lib/Analysis/AliasSetTracker.cpp +++ b/contrib/llvm/lib/Analysis/AliasSetTracker.cpp @@ -299,7 +299,6 @@ bool AliasSetTracker::add(Value *Ptr, uint64_t Size, const MDNode *TBAAInfo) { bool AliasSetTracker::add(LoadInst *LI) { if (LI->getOrdering() > Monotonic) return addUnknown(LI); AliasSet::AccessType ATy = AliasSet::Refs; - if (!LI->isUnordered()) ATy = AliasSet::ModRef; bool NewPtr; AliasSet &AS = addPointer(LI->getOperand(0), AA.getTypeStoreSize(LI->getType()), @@ -312,7 +311,6 @@ bool AliasSetTracker::add(LoadInst *LI) { bool AliasSetTracker::add(StoreInst *SI) { if (SI->getOrdering() > Monotonic) return addUnknown(SI); AliasSet::AccessType ATy = AliasSet::Mods; - if (!SI->isUnordered()) ATy = AliasSet::ModRef; bool NewPtr; Value *Val = SI->getOperand(0); AliasSet &AS = addPointer(SI->getOperand(1), diff --git a/contrib/llvm/lib/Analysis/Analysis.cpp b/contrib/llvm/lib/Analysis/Analysis.cpp index 349c417..98f2a55 100644 --- a/contrib/llvm/lib/Analysis/Analysis.cpp +++ b/contrib/llvm/lib/Analysis/Analysis.cpp @@ -34,6 +34,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); initializeDependenceAnalysisPass(Registry); + initializeDelinearizationPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); initializeDomPrinterPass(Registry); @@ -54,16 +55,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeMemoryDependenceAnalysisPass(Registry); initializeModuleDebugInfoPrinterPass(Registry); initializePostDominatorTreePass(Registry); - initializeProfileEstimatorPassPass(Registry); - initializeNoProfileInfoPass(Registry); - initializeNoPathProfileInfoPass(Registry); - initializeProfileInfoAnalysisGroup(Registry); - initializePathProfileInfoAnalysisGroup(Registry); - initializeLoaderPassPass(Registry); - initializePathProfileLoaderPassPass(Registry); - initializeProfileVerifierPassPass(Registry); - initializePathProfileVerifierPass(Registry); - initializeProfileMetadataLoaderPassPass(Registry); initializeRegionInfoPass(Registry); initializeRegionViewerPass(Registry); initializeRegionPrinterPass(Registry); diff --git a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp index f8509dd..b2c2011 100644 --- a/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/BasicAliasAnalysis.cpp @@ -122,7 +122,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, // question (in this case rewind to p), or // - just give up. It is up to caller to make sure the pointer is pointing // to the base address the object. - // + // // We go for 2nd option for simplicity. if (!isIdentifiedObject(V)) return false; @@ -130,7 +130,7 @@ static bool isObjectSmallerThan(const Value *V, uint64_t Size, // This function needs to use the aligned object size because we allow // reads a bit past the end given sufficient alignment. uint64_t ObjectSize = getObjectSize(V, TD, TLI, /*RoundToAlign*/true); - + return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize < Size; } @@ -142,6 +142,17 @@ static bool isObjectSize(const Value *V, uint64_t Size, return ObjectSize != AliasAnalysis::UnknownSize && ObjectSize == Size; } +/// isIdentifiedFunctionLocal - Return true if V is umabigously identified +/// at the function-level. Different IdentifiedFunctionLocals can't alias. +/// Further, an IdentifiedFunctionLocal can not alias with any function +/// arguments other than itself, which is not neccessarily true for +/// IdentifiedObjects. +static bool isIdentifiedFunctionLocal(const Value *V) +{ + return isa(V) || isNoAliasCall(V) || isNoAliasArgument(V); +} + + //===----------------------------------------------------------------------===// // GetElementPtr Instruction Decomposition and Analysis //===----------------------------------------------------------------------===// @@ -152,7 +163,7 @@ namespace { EK_SignExt, EK_ZeroExt }; - + struct VariableGEPIndex { const Value *V; ExtensionKind Extension; @@ -189,7 +200,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, Offset = 0; return V; } - + if (BinaryOperator *BOp = dyn_cast(V)) { if (ConstantInt *RHSC = dyn_cast(BOp->getOperand(1))) { switch (BOp->getOpcode()) { @@ -220,7 +231,7 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, } } } - + // Since GEP indices are sign extended anyway, we don't care about the high // bits of a sign or zero extended value - just scales and offsets. The // extensions have to be consistent though. @@ -237,10 +248,10 @@ static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset, TD, Depth+1); Scale = Scale.zext(OldWidth); Offset = Offset.zext(OldWidth); - + return Result; } - + Scale = 1; Offset = 0; return V; @@ -265,7 +276,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, const DataLayout *TD) { // Limit recursion depth to limit compile time in crazy cases. unsigned MaxLookup = 6; - + BaseOffs = 0; do { // See if this is a bitcast or GEP. @@ -280,7 +291,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, } return V; } - + if (Op->getOpcode() == Instruction::BitCast) { V = Op->getOperand(0); continue; @@ -297,15 +308,14 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, V = Simplified; continue; } - + return V; } - + // Don't attempt to analyze GEPs over unsized objects. - if (!cast(GEPOp->getOperand(0)->getType()) - ->getElementType()->isSized()) + if (!GEPOp->getOperand(0)->getType()->getPointerElementType()->isSized()) return V; - + // If we are lacking DataLayout information, we can't compute the offets of // elements computed by GEPs. However, we can handle bitcast equivalent // GEPs. @@ -315,7 +325,8 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, V = GEPOp->getOperand(0); continue; } - + + unsigned AS = GEPOp->getPointerAddressSpace(); // Walk the indices of the GEP, accumulating them into BaseOff/VarIndices. gep_type_iterator GTI = gep_type_begin(GEPOp); for (User::const_op_iterator I = GEPOp->op_begin()+1, @@ -326,38 +337,37 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, // For a struct, add the member offset. unsigned FieldNo = cast(Index)->getZExtValue(); if (FieldNo == 0) continue; - + BaseOffs += TD->getStructLayout(STy)->getElementOffset(FieldNo); continue; } - + // For an array/pointer, add the element offset, explicitly scaled. if (ConstantInt *CIdx = dyn_cast(Index)) { if (CIdx->isZero()) continue; BaseOffs += TD->getTypeAllocSize(*GTI)*CIdx->getSExtValue(); continue; } - + uint64_t Scale = TD->getTypeAllocSize(*GTI); ExtensionKind Extension = EK_NotExtended; - + // If the integer type is smaller than the pointer size, it is implicitly // sign extended to pointer size. - unsigned Width = cast(Index->getType())->getBitWidth(); - if (TD->getPointerSizeInBits() > Width) + unsigned Width = Index->getType()->getIntegerBitWidth(); + if (TD->getPointerSizeInBits(AS) > Width) Extension = EK_SignExt; - + // Use GetLinearExpression to decompose the index into a C1*V+C2 form. APInt IndexScale(Width, 0), IndexOffset(Width, 0); Index = GetLinearExpression(Index, IndexScale, IndexOffset, Extension, *TD, 0); - + // The GEP index scale ("Scale") scales C1*V+C2, yielding (C1*V+C2)*Scale. // This gives us an aggregate computation of (C1*Scale)*V + C2*Scale. BaseOffs += IndexOffset.getSExtValue()*Scale; Scale *= IndexScale.getSExtValue(); - - + // If we already had an occurrence of this index variable, merge this // scale into it. For example, we want to handle: // A[x][x] -> x*16 + x*4 -> x*20 @@ -370,25 +380,25 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, break; } } - + // Make sure that we have a scale that makes sense for this target's // pointer size. - if (unsigned ShiftBits = 64-TD->getPointerSizeInBits()) { + if (unsigned ShiftBits = 64 - TD->getPointerSizeInBits(AS)) { Scale <<= ShiftBits; Scale = (int64_t)Scale >> ShiftBits; } - + if (Scale) { VariableGEPIndex Entry = {Index, Extension, static_cast(Scale)}; VarIndices.push_back(Entry); } } - + // Analyze the base pointer next. V = GEPOp->getOperand(0); } while (--MaxLookup); - + // If the chain of expressions is too deep, just return early. return V; } @@ -396,7 +406,7 @@ DecomposeGEPExpression(const Value *V, int64_t &BaseOffs, /// GetIndexDifference - Dest and Src are the variable indices from two /// decomposed GetElementPtr instructions GEP1 and GEP2 which have common base /// pointers. Subtract the GEP2 indices from GEP1 to find the symbolic -/// difference between the two pointers. +/// difference between the two pointers. static void GetIndexDifference(SmallVectorImpl &Dest, const SmallVectorImpl &Src) { if (Src.empty()) return; @@ -405,12 +415,12 @@ static void GetIndexDifference(SmallVectorImpl &Dest, const Value *V = Src[i].V; ExtensionKind Extension = Src[i].Extension; int64_t Scale = Src[i].Scale; - + // Find V in Dest. This is N^2, but pointer indices almost never have more // than a few variable indexes. for (unsigned j = 0, e = Dest.size(); j != e; ++j) { if (Dest[j].V != V || Dest[j].Extension != Extension) continue; - + // If we found it, subtract off Scale V's from the entry in Dest. If it // goes to zero, remove the entry. if (Dest[j].Scale != Scale) @@ -420,7 +430,7 @@ static void GetIndexDifference(SmallVectorImpl &Dest, Scale = 0; break; } - + // If we didn't consume this entry, add it to the end of the Dest list. if (Scale) { VariableGEPIndex Entry = { V, Extension, -Scale }; @@ -515,7 +525,7 @@ namespace { return (AliasAnalysis*)this; return this; } - + private: // AliasCache - Track alias queries to guard against recursion. typedef std::pair LocPair; @@ -685,7 +695,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, "AliasAnalysis query involving multiple functions!"); const Value *Object = GetUnderlyingObject(Loc.Ptr, TD); - + // If this is a tail call and Loc.Ptr points to a stack location, we know that // the tail call cannot access or modify the local stack. // We cannot exclude byval arguments here; these belong to the caller of @@ -695,7 +705,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (const CallInst *CI = dyn_cast(CS.getInstruction())) if (CI->isTailCall()) return NoModRef; - + // If the pointer is to a locally allocated object that does not escape, // then the call can not mod/ref the pointer unless the call takes the pointer // as an argument, and itself doesn't capture it. @@ -711,7 +721,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, if (!(*CI)->getType()->isPointerTy() || (!CS.doesNotCapture(ArgNo) && !CS.isByValArgument(ArgNo))) continue; - + // If this is a no-capture pointer argument, see if we can tell that it // is impossible to alias the pointer we're checking. If not, we have to // assume that the call could touch the pointer, even though it doesn't @@ -721,7 +731,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, break; } } - + if (!PassedAsArg) return NoModRef; } @@ -810,7 +820,7 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, } // We can bound the aliasing properties of memset_pattern16 just as we can - // for memcpy/memset. This is particularly important because the + // for memcpy/memset. This is particularly important because the // LoopIdiomRecognizer likes to turn loops into calls to memset_pattern16 // whenever possible. else if (TLI.has(LibFunc::memset_pattern16) && @@ -846,8 +856,8 @@ BasicAliasAnalysis::getModRefInfo(ImmutableCallSite CS, return ModRefResult(AliasAnalysis::getModRefInfo(CS, Loc) & Min); } -static bool areVarIndicesEqual(SmallVector &Indices1, - SmallVector &Indices2) { +static bool areVarIndicesEqual(SmallVectorImpl &Indices1, + SmallVectorImpl &Indices2) { unsigned Size1 = Indices1.size(); unsigned Size2 = Indices2.size(); @@ -914,22 +924,22 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, GEP1VariableIndices.clear(); } } - + // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. if (BaseAlias != MustAlias) return BaseAlias; - + // Otherwise, we have a MustAlias. Since the base pointers alias each other // exactly, see if the computed offset from the common pointer tells us // about the relation of the resulting pointer. const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); - + int64_t GEP2BaseOffset; SmallVector GEP2VariableIndices; const Value *GEP2BasePtr = DecomposeGEPExpression(GEP2, GEP2BaseOffset, GEP2VariableIndices, TD); - + // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1 || GEP2BasePtr != UnderlyingV2) { @@ -937,12 +947,12 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, "DecomposeGEPExpression and GetUnderlyingObject disagree!"); return MayAlias; } - + // Subtract the GEP2 pointer from the GEP1 pointer to find out their // symbolic difference. GEP1BaseOffset -= GEP2BaseOffset; GetIndexDifference(GEP1VariableIndices, GEP2VariableIndices); - + } else { // Check to see if these two pointers are related by the getelementptr // instruction. If one pointer is a GEP with a non-zero index of the other @@ -964,7 +974,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, const Value *GEP1BasePtr = DecomposeGEPExpression(GEP1, GEP1BaseOffset, GEP1VariableIndices, TD); - + // DecomposeGEPExpression and GetUnderlyingObject should return the // same result except when DecomposeGEPExpression has no DataLayout. if (GEP1BasePtr != UnderlyingV1) { @@ -973,7 +983,7 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, return MayAlias; } } - + // In the two GEP Case, if there is no difference in the offsets of the // computed pointers, the resultant pointers are a must alias. This // hapens when we have two lexically identical GEP's (for example). @@ -1205,17 +1215,17 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, (isa(O2) && isIdentifiedObject(O1) && !isa(O1))) return NoAlias; - // Arguments can't alias with local allocations or noalias calls - // in the same function. - if (((isa(O1) && (isa(O2) || isNoAliasCall(O2))) || - (isa(O2) && (isa(O1) || isNoAliasCall(O1))))) + // Function arguments can't alias with things that are known to be + // unambigously identified at the function level. + if ((isa(O1) && isIdentifiedFunctionLocal(O2)) || + (isa(O2) && isIdentifiedFunctionLocal(O1))) return NoAlias; // Most objects can't alias null. if ((isa(O2) && isKnownNonNull(O1)) || (isa(O1) && isKnownNonNull(O2))) return NoAlias; - + // If one pointer is the result of a call/invoke or load and the other is a // non-escaping local object within the same function, then we know the // object couldn't escape to a point where the call could return it. @@ -1237,7 +1247,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, uint64_t V1Size, if ((V1Size != UnknownSize && isObjectSmallerThan(O2, V1Size, *TD, *TLI)) || (V2Size != UnknownSize && isObjectSmallerThan(O1, V2Size, *TD, *TLI))) return NoAlias; - + // Check the cache before climbing up use-def chains. This also terminates // otherwise infinitely recursive queries. LocPair Locs(Location(V1, V1Size, V1TBAAInfo), diff --git a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp index 100e5c8..62f3ab1 100644 --- a/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/Analysis/BlockFrequencyInfo.cpp @@ -1,4 +1,4 @@ -//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------=======// +//=======-------- BlockFrequencyInfo.cpp - Block Frequency Analysis -------===// // // The LLVM Compiler Infrastructure // @@ -17,14 +17,97 @@ #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/InitializePasses.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" using namespace llvm; -INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", - true, true) +#ifndef NDEBUG +enum GVDAGType { + GVDT_None, + GVDT_Fraction, + GVDT_Integer +}; + +static cl::opt +ViewBlockFreqPropagationDAG("view-block-freq-propagation-dags", cl::Hidden, + cl::desc("Pop up a window to show a dag displaying how block " + "frequencies propagation through the CFG."), + cl::values( + clEnumValN(GVDT_None, "none", + "do not display graphs."), + clEnumValN(GVDT_Fraction, "fraction", "display a graph using the " + "fractional block frequency representation."), + clEnumValN(GVDT_Integer, "integer", "display a graph using the raw " + "integer fractional block frequency representation."), + clEnumValEnd)); + +namespace llvm { + +template <> +struct GraphTraits { + typedef const BasicBlock NodeType; + typedef succ_const_iterator ChildIteratorType; + typedef Function::const_iterator nodes_iterator; + + static inline const NodeType *getEntryNode(const BlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + static ChildIteratorType child_begin(const NodeType *N) { + return succ_begin(N); + } + static ChildIteratorType child_end(const NodeType *N) { + return succ_end(N); + } + static nodes_iterator nodes_begin(const BlockFrequencyInfo *G) { + return G->getFunction()->begin(); + } + static nodes_iterator nodes_end(const BlockFrequencyInfo *G) { + return G->getFunction()->end(); + } +}; + +template<> +struct DOTGraphTraits : public DefaultDOTGraphTraits { + explicit DOTGraphTraits(bool isSimple=false) : + DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const BlockFrequencyInfo *G) { + return G->getFunction()->getName(); + } + + std::string getNodeLabel(const BasicBlock *Node, + const BlockFrequencyInfo *Graph) { + std::string Result; + raw_string_ostream OS(Result); + + OS << Node->getName().str() << ":"; + switch (ViewBlockFreqPropagationDAG) { + case GVDT_Fraction: + Graph->getBlockFreq(Node).print(OS); + break; + case GVDT_Integer: + OS << Graph->getBlockFreq(Node).getFrequency(); + break; + case GVDT_None: + llvm_unreachable("If we are not supposed to render a graph we should " + "never reach this point."); + } + + return Result; + } +}; + +} // end namespace llvm +#endif + +INITIALIZE_PASS_BEGIN(BlockFrequencyInfo, "block-freq", + "Block Frequency Analysis", true, true) INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfo) -INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", "Block Frequency Analysis", - true, true) +INITIALIZE_PASS_END(BlockFrequencyInfo, "block-freq", + "Block Frequency Analysis", true, true) char BlockFrequencyInfo::ID = 0; @@ -46,6 +129,10 @@ void BlockFrequencyInfo::getAnalysisUsage(AnalysisUsage &AU) const { bool BlockFrequencyInfo::runOnFunction(Function &F) { BranchProbabilityInfo &BPI = getAnalysis(); BFI->doFunction(&F, &BPI); +#ifndef NDEBUG + if (ViewBlockFreqPropagationDAG != GVDT_None) + view(); +#endif return false; } @@ -53,11 +140,22 @@ void BlockFrequencyInfo::print(raw_ostream &O, const Module *) const { if (BFI) BFI->print(O); } -/// getblockFreq - Return block frequency. Return 0 if we don't have the -/// information. Please note that initial frequency is equal to 1024. It means -/// that we should not rely on the value itself, but only on the comparison to -/// the other block frequencies. We do this to avoid using of floating points. -/// BlockFrequency BlockFrequencyInfo::getBlockFreq(const BasicBlock *BB) const { return BFI->getBlockFreq(BB); } + +/// Pop up a ghostview window with the current block frequency propagation +/// rendered using dot. +void BlockFrequencyInfo::view() const { +// This code is only for debugging. +#ifndef NDEBUG + ViewGraph(const_cast(this), "BlockFrequencyDAGs"); +#else + errs() << "BlockFrequencyInfo::view is only available in debug builds on " + "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +const Function *BlockFrequencyInfo::getFunction() const { + return BFI->Fn; +} diff --git a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp index 6c58856..86560ca 100644 --- a/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp +++ b/contrib/llvm/lib/Analysis/BranchProbabilityInfo.cpp @@ -69,6 +69,20 @@ static const uint32_t UR_TAKEN_WEIGHT = 1; /// easily subsume it. static const uint32_t UR_NONTAKEN_WEIGHT = 1024*1024 - 1; +/// \brief Weight for a branch taken going into a cold block. +/// +/// This is the weight for a branch taken toward a block marked +/// cold. A block is marked cold if it's postdominated by a +/// block containing a call to a cold function. Cold functions +/// are those marked with attribute 'cold'. +static const uint32_t CC_TAKEN_WEIGHT = 4; + +/// \brief Weight for a branch not-taken into a cold block. +/// +/// This is the weight for a branch not taken toward a block marked +/// cold. +static const uint32_t CC_NONTAKEN_WEIGHT = 64; + static const uint32_t PH_TAKEN_WEIGHT = 20; static const uint32_t PH_NONTAKEN_WEIGHT = 12; @@ -137,8 +151,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { uint32_t UnreachableWeight = std::max(UR_TAKEN_WEIGHT / (unsigned)UnreachableEdges.size(), MIN_WEIGHT); - for (SmallVector::iterator I = UnreachableEdges.begin(), - E = UnreachableEdges.end(); + for (SmallVectorImpl::iterator I = UnreachableEdges.begin(), + E = UnreachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, UnreachableWeight); @@ -147,8 +161,8 @@ bool BranchProbabilityInfo::calcUnreachableHeuristics(BasicBlock *BB) { uint32_t ReachableWeight = std::max(UR_NONTAKEN_WEIGHT / (unsigned)ReachableEdges.size(), NORMAL_WEIGHT); - for (SmallVector::iterator I = ReachableEdges.begin(), - E = ReachableEdges.end(); + for (SmallVectorImpl::iterator I = ReachableEdges.begin(), + E = ReachableEdges.end(); I != E; ++I) setEdgeWeight(BB, *I, ReachableWeight); @@ -193,6 +207,67 @@ bool BranchProbabilityInfo::calcMetadataWeights(BasicBlock *BB) { return true; } +/// \brief Calculate edge weights for edges leading to cold blocks. +/// +/// A cold block is one post-dominated by a block with a call to a +/// cold function. Those edges are unlikely to be taken, so we give +/// them relatively low weight. +/// +/// Return true if we could compute the weights for cold edges. +/// Return false, otherwise. +bool BranchProbabilityInfo::calcColdCallHeuristics(BasicBlock *BB) { + TerminatorInst *TI = BB->getTerminator(); + if (TI->getNumSuccessors() == 0) + return false; + + // Determine which successors are post-dominated by a cold block. + SmallVector ColdEdges; + SmallVector NormalEdges; + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + if (PostDominatedByColdCall.count(*I)) + ColdEdges.push_back(I.getSuccessorIndex()); + else + NormalEdges.push_back(I.getSuccessorIndex()); + + // If all successors are in the set of blocks post-dominated by cold calls, + // this block is in the set post-dominated by cold calls. + if (ColdEdges.size() == TI->getNumSuccessors()) + PostDominatedByColdCall.insert(BB); + else { + // Otherwise, if the block itself contains a cold function, add it to the + // set of blocks postdominated by a cold call. + assert(!PostDominatedByColdCall.count(BB)); + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) + if (CallInst *CI = dyn_cast(I)) + if (CI->hasFnAttr(Attribute::Cold)) { + PostDominatedByColdCall.insert(BB); + break; + } + } + + // Skip probabilities if this block has a single successor. + if (TI->getNumSuccessors() == 1 || ColdEdges.empty()) + return false; + + uint32_t ColdWeight = + std::max(CC_TAKEN_WEIGHT / (unsigned) ColdEdges.size(), MIN_WEIGHT); + for (SmallVectorImpl::iterator I = ColdEdges.begin(), + E = ColdEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, ColdWeight); + + if (NormalEdges.empty()) + return true; + uint32_t NormalWeight = std::max( + CC_NONTAKEN_WEIGHT / (unsigned) NormalEdges.size(), NORMAL_WEIGHT); + for (SmallVectorImpl::iterator I = NormalEdges.begin(), + E = NormalEdges.end(); + I != E; ++I) + setEdgeWeight(BB, *I, NormalWeight); + + return true; +} + // Calculate Edge Weights using "Pointer Heuristics". Predict a comparsion // between two pointer or pointer and NULL will fail. bool BranchProbabilityInfo::calcPointerHeuristics(BasicBlock *BB) { @@ -251,7 +326,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (backWeight < NORMAL_WEIGHT) backWeight = NORMAL_WEIGHT; - for (SmallVector::iterator EI = BackEdges.begin(), + for (SmallVectorImpl::iterator EI = BackEdges.begin(), EE = BackEdges.end(); EI != EE; ++EI) { setEdgeWeight(BB, *EI, backWeight); } @@ -262,7 +337,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (inWeight < NORMAL_WEIGHT) inWeight = NORMAL_WEIGHT; - for (SmallVector::iterator EI = InEdges.begin(), + for (SmallVectorImpl::iterator EI = InEdges.begin(), EE = InEdges.end(); EI != EE; ++EI) { setEdgeWeight(BB, *EI, inWeight); } @@ -273,7 +348,7 @@ bool BranchProbabilityInfo::calcLoopBranchHeuristics(BasicBlock *BB) { if (exitWeight < MIN_WEIGHT) exitWeight = MIN_WEIGHT; - for (SmallVector::iterator EI = ExitingEdges.begin(), + for (SmallVectorImpl::iterator EI = ExitingEdges.begin(), EE = ExitingEdges.end(); EI != EE; ++EI) { setEdgeWeight(BB, *EI, exitWeight); } @@ -323,10 +398,24 @@ bool BranchProbabilityInfo::calcZeroHeuristics(BasicBlock *BB) { // InstCombine canonicalizes X <= 0 into X < 1. // X <= 0 -> Unlikely isProb = false; - } else if (CV->isAllOnesValue() && CI->getPredicate() == CmpInst::ICMP_SGT) { - // InstCombine canonicalizes X >= 0 into X > -1. - // X >= 0 -> Likely - isProb = true; + } else if (CV->isAllOnesValue()) { + switch (CI->getPredicate()) { + case CmpInst::ICMP_EQ: + // X == -1 -> Unlikely + isProb = false; + break; + case CmpInst::ICMP_NE: + // X != -1 -> Likely + isProb = true; + break; + case CmpInst::ICMP_SGT: + // InstCombine canonicalizes X >= 0 into X > -1. + // X >= 0 -> Likely + isProb = true; + break; + default: + return false; + } } else { return false; } @@ -397,6 +486,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { LastF = &F; // Store the last function we ran on for printing. LI = &getAnalysis(); assert(PostDominatedByUnreachable.empty()); + assert(PostDominatedByColdCall.empty()); // Walk the basic blocks in post-order so that we can build up state about // the successors of a block iteratively. @@ -408,6 +498,8 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { continue; if (calcMetadataWeights(*I)) continue; + if (calcColdCallHeuristics(*I)) + continue; if (calcLoopBranchHeuristics(*I)) continue; if (calcPointerHeuristics(*I)) @@ -420,6 +512,7 @@ bool BranchProbabilityInfo::runOnFunction(Function &F) { } PostDominatedByUnreachable.clear(); + PostDominatedByColdCall.clear(); return false; } diff --git a/contrib/llvm/lib/Analysis/CFG.cpp b/contrib/llvm/lib/Analysis/CFG.cpp new file mode 100644 index 0000000..c3f32d3 --- /dev/null +++ b/contrib/llvm/lib/Analysis/CFG.cpp @@ -0,0 +1,245 @@ +//===-- CFG.cpp - BasicBlock analysis --------------------------------------==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This family of functions performs analyses on basic blocks, and instructions +// contained within basic blocks. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CFG.h" + +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/LoopInfo.h" + +using namespace llvm; + +/// FindFunctionBackedges - Analyze the specified function to find all of the +/// loop backedges in the function and return them. This is a relatively cheap +/// (compared to computing dominators and loop info) analysis. +/// +/// The output is added to Result, as pairs of edge info. +void llvm::FindFunctionBackedges(const Function &F, + SmallVectorImpl > &Result) { + const BasicBlock *BB = &F.getEntryBlock(); + if (succ_begin(BB) == succ_end(BB)) + return; + + SmallPtrSet Visited; + SmallVector, 8> VisitStack; + SmallPtrSet InStack; + + Visited.insert(BB); + VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); + InStack.insert(BB); + do { + std::pair &Top = VisitStack.back(); + const BasicBlock *ParentBB = Top.first; + succ_const_iterator &I = Top.second; + + bool FoundNew = false; + while (I != succ_end(ParentBB)) { + BB = *I++; + if (Visited.insert(BB)) { + FoundNew = true; + break; + } + // Successor is in VisitStack, it's a back edge. + if (InStack.count(BB)) + Result.push_back(std::make_pair(ParentBB, BB)); + } + + if (FoundNew) { + // Go down one level if there is a unvisited successor. + InStack.insert(BB); + VisitStack.push_back(std::make_pair(BB, succ_begin(BB))); + } else { + // Go up one level. + InStack.erase(VisitStack.pop_back_val().first); + } + } while (!VisitStack.empty()); +} + +/// GetSuccessorNumber - Search for the specified successor of basic block BB +/// and return its position in the terminator instruction's list of +/// successors. It is an error to call this with a block that is not a +/// successor. +unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) { + TerminatorInst *Term = BB->getTerminator(); +#ifndef NDEBUG + unsigned e = Term->getNumSuccessors(); +#endif + for (unsigned i = 0; ; ++i) { + assert(i != e && "Didn't find edge?"); + if (Term->getSuccessor(i) == Succ) + return i; + } +} + +/// isCriticalEdge - Return true if the specified edge is a critical edge. +/// Critical edges are edges from a block with multiple successors to a block +/// with multiple predecessors. +bool llvm::isCriticalEdge(const TerminatorInst *TI, unsigned SuccNum, + bool AllowIdenticalEdges) { + assert(SuccNum < TI->getNumSuccessors() && "Illegal edge specification!"); + if (TI->getNumSuccessors() == 1) return false; + + const BasicBlock *Dest = TI->getSuccessor(SuccNum); + const_pred_iterator I = pred_begin(Dest), E = pred_end(Dest); + + // If there is more than one predecessor, this is a critical edge... + assert(I != E && "No preds, but we have an edge to the block?"); + const BasicBlock *FirstPred = *I; + ++I; // Skip one edge due to the incoming arc from TI. + if (!AllowIdenticalEdges) + return I != E; + + // If AllowIdenticalEdges is true, then we allow this edge to be considered + // non-critical iff all preds come from TI's block. + while (I != E) { + const BasicBlock *P = *I; + if (P != FirstPred) + return true; + // Note: leave this as is until no one ever compiles with either gcc 4.0.1 + // or Xcode 2. This seems to work around the pred_iterator assert in PR 2207 + E = pred_end(P); + ++I; + } + return false; +} + +// LoopInfo contains a mapping from basic block to the innermost loop. Find +// the outermost loop in the loop nest that contains BB. +static const Loop *getOutermostLoop(const LoopInfo *LI, const BasicBlock *BB) { + const Loop *L = LI->getLoopFor(BB); + if (L) { + while (const Loop *Parent = L->getParentLoop()) + L = Parent; + } + return L; +} + +// True if there is a loop which contains both BB1 and BB2. +static bool loopContainsBoth(const LoopInfo *LI, + const BasicBlock *BB1, const BasicBlock *BB2) { + const Loop *L1 = getOutermostLoop(LI, BB1); + const Loop *L2 = getOutermostLoop(LI, BB2); + return L1 != NULL && L1 == L2; +} + +static bool isPotentiallyReachableInner(SmallVectorImpl &Worklist, + BasicBlock *StopBB, + const DominatorTree *DT, + const LoopInfo *LI) { + // When the stop block is unreachable, it's dominated from everywhere, + // regardless of whether there's a path between the two blocks. + if (DT && !DT->isReachableFromEntry(StopBB)) + DT = 0; + + // Limit the number of blocks we visit. The goal is to avoid run-away compile + // times on large CFGs without hampering sensible code. Arbitrarily chosen. + unsigned Limit = 32; + SmallSet Visited; + do { + BasicBlock *BB = Worklist.pop_back_val(); + if (!Visited.insert(BB)) + continue; + if (BB == StopBB) + return true; + if (DT && DT->dominates(BB, StopBB)) + return true; + if (LI && loopContainsBoth(LI, BB, StopBB)) + return true; + + if (!--Limit) { + // We haven't been able to prove it one way or the other. Conservatively + // answer true -- that there is potentially a path. + return true; + } + + if (const Loop *Outer = LI ? getOutermostLoop(LI, BB) : 0) { + // All blocks in a single loop are reachable from all other blocks. From + // any of these blocks, we can skip directly to the exits of the loop, + // ignoring any other blocks inside the loop body. + Outer->getExitBlocks(Worklist); + } else { + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + Worklist.push_back(*I); + } + } while (!Worklist.empty()); + + // We have exhausted all possible paths and are certain that 'To' can not be + // reached from 'From'. + return false; +} + +bool llvm::isPotentiallyReachable(const BasicBlock *A, const BasicBlock *B, + const DominatorTree *DT, const LoopInfo *LI) { + assert(A->getParent() == B->getParent() && + "This analysis is function-local!"); + + SmallVector Worklist; + Worklist.push_back(const_cast(A)); + + return isPotentiallyReachableInner(Worklist, const_cast(B), + DT, LI); +} + +bool llvm::isPotentiallyReachable(const Instruction *A, const Instruction *B, + const DominatorTree *DT, const LoopInfo *LI) { + assert(A->getParent()->getParent() == B->getParent()->getParent() && + "This analysis is function-local!"); + + SmallVector Worklist; + + if (A->getParent() == B->getParent()) { + // The same block case is special because it's the only time we're looking + // within a single block to see which instruction comes first. Once we + // start looking at multiple blocks, the first instruction of the block is + // reachable, so we only need to determine reachability between whole + // blocks. + BasicBlock *BB = const_cast(A->getParent()); + + // If the block is in a loop then we can reach any instruction in the block + // from any other instruction in the block by going around a backedge. + if (LI && LI->getLoopFor(BB) != 0) + return true; + + // Linear scan, start at 'A', see whether we hit 'B' or the end first. + for (BasicBlock::const_iterator I = A, E = BB->end(); I != E; ++I) { + if (&*I == B) + return true; + } + + // Can't be in a loop if it's the entry block -- the entry block may not + // have predecessors. + if (BB == &BB->getParent()->getEntryBlock()) + return false; + + // Otherwise, continue doing the normal per-BB CFG walk. + for (succ_iterator I = succ_begin(BB), E = succ_end(BB); I != E; ++I) + Worklist.push_back(*I); + + if (Worklist.empty()) { + // We've proven that there's no path! + return false; + } + } else { + Worklist.push_back(const_cast(A->getParent())); + } + + if (A->getParent() == &A->getParent()->getParent()->getEntryBlock()) + return true; + if (B->getParent() == &A->getParent()->getParent()->getEntryBlock()) + return false; + + return isPotentiallyReachableInner(Worklist, + const_cast(B->getParent()), + DT, LI); +} diff --git a/contrib/llvm/lib/Analysis/CaptureTracking.cpp b/contrib/llvm/lib/Analysis/CaptureTracking.cpp index 2118917..79fab1b 100644 --- a/contrib/llvm/lib/Analysis/CaptureTracking.cpp +++ b/contrib/llvm/lib/Analysis/CaptureTracking.cpp @@ -164,10 +164,10 @@ void llvm::PointerMayBeCaptured(const Value *V, CaptureTracker *Tracker) { // Don't count comparisons of a no-alias return value against null as // captures. This allows us to ignore comparisons of malloc results // with null, for example. - if (isNoAliasCall(V->stripPointerCasts())) - if (ConstantPointerNull *CPN = - dyn_cast(I->getOperand(1))) - if (CPN->getType()->getAddressSpace() == 0) + if (ConstantPointerNull *CPN = + dyn_cast(I->getOperand(1))) + if (CPN->getType()->getAddressSpace() == 0) + if (isNoAliasCall(V->stripPointerCasts())) break; // Otherwise, be conservative. There are crazy ways to capture pointers // using comparisons. diff --git a/contrib/llvm/lib/Analysis/ConstantFolding.cpp b/contrib/llvm/lib/Analysis/ConstantFolding.cpp index bc0dffc..3d32232 100644 --- a/contrib/llvm/lib/Analysis/ConstantFolding.cpp +++ b/contrib/llvm/lib/Analysis/ConstantFolding.cpp @@ -224,7 +224,8 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, APInt &Offset, const DataLayout &TD) { // Trivial case, constant is the global. if ((GV = dyn_cast(C))) { - Offset.clearAllBits(); + unsigned BitWidth = TD.getPointerTypeSizeInBits(GV->getType()); + Offset = APInt(BitWidth, 0); return true; } @@ -238,16 +239,23 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) - if (GEPOperator *GEP = dyn_cast(CE)) { - // If the base isn't a global+constant, we aren't either. - if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) - return false; + GEPOperator *GEP = dyn_cast(CE); + if (!GEP) + return false; - // Otherwise, add any offset that our operands provide. - return GEP->accumulateConstantOffset(TD, Offset); - } + unsigned BitWidth = TD.getPointerTypeSizeInBits(GEP->getType()); + APInt TmpOffset(BitWidth, 0); - return false; + // If the base isn't a global+constant, we aren't either. + if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, TmpOffset, TD)) + return false; + + // Otherwise, add any offset that our operands provide. + if (!GEP->accumulateConstantOffset(TD, TmpOffset)) + return false; + + Offset = TmpOffset; + return true; } /// ReadDataFromGlobal - Recursive helper to read bits out of global. C is the @@ -324,12 +332,12 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, // If we read all of the bytes we needed from this element we're done. uint64_t NextEltOffset = SL->getElementOffset(Index); - if (BytesLeft <= NextEltOffset-CurEltOffset-ByteOffset) + if (BytesLeft <= NextEltOffset - CurEltOffset - ByteOffset) return true; // Move to the next element of the struct. - CurPtr += NextEltOffset-CurEltOffset-ByteOffset; - BytesLeft -= NextEltOffset-CurEltOffset-ByteOffset; + CurPtr += NextEltOffset - CurEltOffset - ByteOffset; + BytesLeft -= NextEltOffset - CurEltOffset - ByteOffset; ByteOffset = 0; CurEltOffset = NextEltOffset; } @@ -338,7 +346,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (isa(C) || isa(C) || isa(C)) { - Type *EltTy = cast(C->getType())->getElementType(); + Type *EltTy = C->getType()->getSequentialElementType(); uint64_t EltSize = TD.getTypeAllocSize(EltTy); uint64_t Index = ByteOffset / EltSize; uint64_t Offset = ByteOffset - Index * EltSize; @@ -346,7 +354,7 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (ArrayType *AT = dyn_cast(C->getType())) NumElts = AT->getNumElements(); else - NumElts = cast(C->getType())->getNumElements(); + NumElts = C->getType()->getVectorNumElements(); for (; Index != NumElts; ++Index) { if (!ReadDataFromGlobal(C->getAggregateElement(Index), Offset, CurPtr, @@ -367,9 +375,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, if (ConstantExpr *CE = dyn_cast(C)) { if (CE->getOpcode() == Instruction::IntToPtr && - CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getContext())) + CE->getOperand(0)->getType() == TD.getIntPtrType(CE->getType())) { return ReadDataFromGlobal(CE->getOperand(0), ByteOffset, CurPtr, BytesLeft, TD); + } } // Otherwise, unknown initializer type. @@ -378,26 +387,29 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, const DataLayout &TD) { - Type *LoadTy = cast(C->getType())->getElementType(); + PointerType *PTy = cast(C->getType()); + Type *LoadTy = PTy->getElementType(); IntegerType *IntType = dyn_cast(LoadTy); // If this isn't an integer load we can't fold it directly. if (!IntType) { + unsigned AS = PTy->getAddressSpace(); + // If this is a float/double load, we can try folding it as an int32/64 load // and then bitcast the result. This can be useful for union cases. Note // that address spaces don't matter here since we're not going to result in // an actual new load. Type *MapTy; if (LoadTy->isHalfTy()) - MapTy = Type::getInt16PtrTy(C->getContext()); + MapTy = Type::getInt16PtrTy(C->getContext(), AS); else if (LoadTy->isFloatTy()) - MapTy = Type::getInt32PtrTy(C->getContext()); + MapTy = Type::getInt32PtrTy(C->getContext(), AS); else if (LoadTy->isDoubleTy()) - MapTy = Type::getInt64PtrTy(C->getContext()); + MapTy = Type::getInt64PtrTy(C->getContext(), AS); else if (LoadTy->isVectorTy()) { - MapTy = IntegerType::get(C->getContext(), - TD.getTypeAllocSizeInBits(LoadTy)); - MapTy = PointerType::getUnqual(MapTy); + MapTy = PointerType::getIntNPtrTy(C->getContext(), + TD.getTypeAllocSizeInBits(LoadTy), + AS); } else return 0; @@ -408,10 +420,11 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, } unsigned BytesLoaded = (IntType->getBitWidth() + 7) / 8; - if (BytesLoaded > 32 || BytesLoaded == 0) return 0; + if (BytesLoaded > 32 || BytesLoaded == 0) + return 0; GlobalValue *GVal; - APInt Offset(TD.getPointerSizeInBits(), 0); + APInt Offset; if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) return 0; @@ -422,7 +435,8 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. - if (Offset.isNegative()) return 0; + if (Offset.isNegative()) + return 0; // If we're not accessing anything in this constant, the result is undefined. if (Offset.getZExtValue() >= @@ -439,7 +453,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, ResultVal = RawBytes[BytesLoaded - 1]; for (unsigned i = 1; i != BytesLoaded; ++i) { ResultVal <<= 8; - ResultVal |= RawBytes[BytesLoaded-1-i]; + ResultVal |= RawBytes[BytesLoaded - 1 - i]; } } else { ResultVal = RawBytes[0]; @@ -464,14 +478,17 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C, // If the loaded value isn't a constant expr, we can't handle it. ConstantExpr *CE = dyn_cast(C); - if (!CE) return 0; + if (!CE) + return 0; if (CE->getOpcode() == Instruction::GetElementPtr) { - if (GlobalVariable *GV = dyn_cast(CE->getOperand(0))) - if (GV->isConstant() && GV->hasDefinitiveInitializer()) + if (GlobalVariable *GV = dyn_cast(CE->getOperand(0))) { + if (GV->isConstant() && GV->hasDefinitiveInitializer()) { if (Constant *V = ConstantFoldLoadThroughGEPConstantExpr(GV->getInitializer(), CE)) return V; + } + } } // Instead of loading constant c string, use corresponding integer value @@ -576,13 +593,13 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, // constant. This happens frequently when iterating over a global array. if (Opc == Instruction::Sub && DL) { GlobalValue *GV1, *GV2; - unsigned PtrSize = DL->getPointerSizeInBits(); - unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); - APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0); + APInt Offs1, Offs2; if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL)) if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) && GV1 == GV2) { + unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); + // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. // PtrToInt may change the bitwidth so we have convert to the right size // first. @@ -600,15 +617,18 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, static Constant *CastGEPIndices(ArrayRef Ops, Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { - if (!TD) return 0; - Type *IntPtrTy = TD->getIntPtrType(ResultTy->getContext()); + if (!TD) + return 0; + + Type *IntPtrTy = TD->getIntPtrType(ResultTy); bool Any = false; SmallVector NewIdxs; for (unsigned i = 1, e = Ops.size(); i != e; ++i) { if ((i == 1 || - !isa(GetElementPtrInst::getIndexedType(Ops[0]->getType(), - Ops.slice(1, i-1)))) && + !isa(GetElementPtrInst::getIndexedType( + Ops[0]->getType(), + Ops.slice(1, i - 1)))) && Ops[i]->getType() != IntPtrTy) { Any = true; NewIdxs.push_back(ConstantExpr::getCast(CastInst::getCastOpcode(Ops[i], @@ -619,13 +639,16 @@ static Constant *CastGEPIndices(ArrayRef Ops, } else NewIdxs.push_back(Ops[i]); } - if (!Any) return 0; - Constant *C = - ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); - if (ConstantExpr *CE = dyn_cast(C)) + if (!Any) + return 0; + + Constant *C = ConstantExpr::getGetElementPtr(Ops[0], NewIdxs); + if (ConstantExpr *CE = dyn_cast(C)) { if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; + } + return C; } @@ -640,7 +663,7 @@ static Constant* StripPtrCastKeepAS(Constant* Ptr) { if (NewPtrTy->getAddressSpace() != OldPtrTy->getAddressSpace()) { NewPtrTy = NewPtrTy->getElementType()->getPointerTo( OldPtrTy->getAddressSpace()); - Ptr = ConstantExpr::getBitCast(Ptr, NewPtrTy); + Ptr = ConstantExpr::getPointerCast(Ptr, NewPtrTy); } return Ptr; } @@ -651,11 +674,12 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, Type *ResultTy, const DataLayout *TD, const TargetLibraryInfo *TLI) { Constant *Ptr = Ops[0]; - if (!TD || !cast(Ptr->getType())->getElementType()->isSized() || + if (!TD || !Ptr->getType()->getPointerElementType()->isSized() || !Ptr->getType()->isPointerTy()) return 0; - Type *IntPtrTy = TD->getIntPtrType(Ptr->getContext()); + Type *IntPtrTy = TD->getIntPtrType(Ptr->getType()); + Type *ResultElementTy = ResultTy->getPointerElementType(); // If this is a constant expr gep that is effectively computing an // "offsetof", fold it into 'cast int Size to T*' instead of 'gep 0, 0, 12' @@ -664,8 +688,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, // If this is "gep i8* Ptr, (sub 0, V)", fold this as: // "inttoptr (sub (ptrtoint Ptr), V)" - if (Ops.size() == 2 && - cast(ResultTy)->getElementType()->isIntegerTy(8)) { + if (Ops.size() == 2 && ResultElementTy->isIntegerTy(8)) { ConstantExpr *CE = dyn_cast(Ops[1]); assert((CE == 0 || CE->getType() == IntPtrTy) && "CastGEPIndices didn't canonicalize index types!"); @@ -692,7 +715,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, // If this is a GEP of a GEP, fold it all into a single GEP. while (GEPOperator *GEP = dyn_cast(Ptr)) { - SmallVector NestedOps(GEP->op_begin()+1, GEP->op_end()); + SmallVector NestedOps(GEP->op_begin() + 1, GEP->op_end()); // Do not try the incorporate the sub-GEP if some index is not a number. bool AllConstantInt = true; @@ -713,12 +736,15 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, // If the base value for this address is a literal integer value, fold the // getelementptr to the resulting integer value casted to the pointer type. APInt BasePtr(BitWidth, 0); - if (ConstantExpr *CE = dyn_cast(Ptr)) - if (CE->getOpcode() == Instruction::IntToPtr) + if (ConstantExpr *CE = dyn_cast(Ptr)) { + if (CE->getOpcode() == Instruction::IntToPtr) { if (ConstantInt *Base = dyn_cast(CE->getOperand(0))) BasePtr = Base->getValue().zextOrTrunc(BitWidth); + } + } + if (Ptr->isNullValue() || BasePtr != 0) { - Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr); + Constant *C = ConstantInt::get(Ptr->getContext(), Offset + BasePtr); return ConstantExpr::getIntToPtr(C, ResultTy); } @@ -728,7 +754,8 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, // Also, this helps GlobalOpt do SROA on GlobalVariables. Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Forming regular GEP of non-pointer type"); - SmallVector NewIdxs; + SmallVector NewIdxs; + do { if (SequentialType *ATy = dyn_cast(Ty)) { if (ATy->isPointerTy()) { @@ -743,7 +770,6 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, // Determine which element of the array the offset points into. APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType())); - IntegerType *IntPtrTy = TD->getIntPtrType(Ty->getContext()); if (ElemSize == 0) // The element size is 0. This may be [0 x Ty]*, so just use a zero // index for this level and proceed to the next level to see if it can @@ -778,7 +804,7 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, // We've reached some non-indexable type. break; } - } while (Ty != cast(ResultTy)->getElementType()); + } while (Ty != ResultElementTy); // If we haven't used up the entire offset by descending the static // type, then the offset is pointing into the middle of an indivisible @@ -787,14 +813,13 @@ static Constant *SymbolicallyEvaluateGEP(ArrayRef Ops, return 0; // Create a GEP. - Constant *C = - ConstantExpr::getGetElementPtr(Ptr, NewIdxs); - assert(cast(C->getType())->getElementType() == Ty && + Constant *C = ConstantExpr::getGetElementPtr(Ptr, NewIdxs); + assert(C->getType()->getPointerElementType() == Ty && "Computed GetElementPtr has unexpected type!"); // If we ended up indexing a member with a type that doesn't match // the type of what the original indices indexed, add a cast. - if (Ty != cast(ResultTy)->getElementType()) + if (Ty != ResultElementTy) C = FoldBitCast(C, ResultTy, *TD); return C; @@ -867,16 +892,18 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, if (const LoadInst *LI = dyn_cast(I)) return ConstantFoldLoadInst(LI, TD); - if (InsertValueInst *IVI = dyn_cast(I)) + if (InsertValueInst *IVI = dyn_cast(I)) { return ConstantExpr::getInsertValue( cast(IVI->getAggregateOperand()), cast(IVI->getInsertedValueOperand()), IVI->getIndices()); + } - if (ExtractValueInst *EVI = dyn_cast(I)) + if (ExtractValueInst *EVI = dyn_cast(I)) { return ConstantExpr::getExtractValue( cast(EVI->getAggregateOperand()), EVI->getIndices()); + } return ConstantFoldInstOperands(I->getOpcode(), I->getType(), Ops, TD, TLI); } @@ -930,9 +957,10 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, const TargetLibraryInfo *TLI) { // Handle easy binops first. if (Instruction::isBinaryOp(Opcode)) { - if (isa(Ops[0]) || isa(Ops[1])) + if (isa(Ops[0]) || isa(Ops[1])) { if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD)) return C; + } return ConstantExpr::get(Opcode, Ops[0], Ops[1]); } @@ -953,10 +981,11 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, if (TD && CE->getOpcode() == Instruction::IntToPtr) { Constant *Input = CE->getOperand(0); unsigned InWidth = Input->getType()->getScalarSizeInBits(); - if (TD->getPointerSizeInBits() < InWidth) { + unsigned PtrWidth = TD->getPointerTypeSizeInBits(CE->getType()); + if (PtrWidth < InWidth) { Constant *Mask = - ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth, - TD->getPointerSizeInBits())); + ConstantInt::get(CE->getContext(), + APInt::getLowBitsSet(InWidth, PtrWidth)); Input = ConstantExpr::getAnd(Input, Mask); } // Do a zext or trunc to get to the dest size. @@ -966,13 +995,22 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, return ConstantExpr::getCast(Opcode, Ops[0], DestTy); case Instruction::IntToPtr: // If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if - // the int size is >= the ptr size. This requires knowing the width of a - // pointer, so it can't be done in ConstantExpr::getCast. - if (ConstantExpr *CE = dyn_cast(Ops[0])) - if (TD && - TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() && - CE->getOpcode() == Instruction::PtrToInt) - return FoldBitCast(CE->getOperand(0), DestTy, *TD); + // the int size is >= the ptr size and the address spaces are the same. + // This requires knowing the width of a pointer, so it can't be done in + // ConstantExpr::getCast. + if (ConstantExpr *CE = dyn_cast(Ops[0])) { + if (TD && CE->getOpcode() == Instruction::PtrToInt) { + Constant *SrcPtr = CE->getOperand(0); + unsigned SrcPtrSize = TD->getPointerTypeSizeInBits(SrcPtr->getType()); + unsigned MidIntSize = CE->getType()->getScalarSizeInBits(); + + if (MidIntSize >= SrcPtrSize) { + unsigned SrcAS = SrcPtr->getType()->getPointerAddressSpace(); + if (SrcAS == DestTy->getPointerAddressSpace()) + return FoldBitCast(CE->getOperand(0), DestTy, *TD); + } + } + } return ConstantExpr::getCast(Opcode, Ops[0], DestTy); case Instruction::Trunc: @@ -984,6 +1022,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, Type *DestTy, case Instruction::SIToFP: case Instruction::FPToUI: case Instruction::FPToSI: + case Instruction::AddrSpaceCast: return ConstantExpr::getCast(Opcode, Ops[0], DestTy); case Instruction::BitCast: if (TD) @@ -1024,8 +1063,8 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // around to know if bit truncation is happening. if (ConstantExpr *CE0 = dyn_cast(Ops0)) { if (TD && Ops1->isNullValue()) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); if (CE0->getOpcode() == Instruction::IntToPtr) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0), @@ -1036,19 +1075,21 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. - if (CE0->getOpcode() == Instruction::PtrToInt && - CE0->getType() == IntPtrTy) { - Constant *C = CE0->getOperand(0); - Constant *Null = Constant::getNullValue(C->getType()); - return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + if (CE0->getOpcode() == Instruction::PtrToInt) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + if (CE0->getType() == IntPtrTy) { + Constant *C = CE0->getOperand(0); + Constant *Null = Constant::getNullValue(C->getType()); + return ConstantFoldCompareInstOperands(Predicate, C, Null, TD, TLI); + } } } if (ConstantExpr *CE1 = dyn_cast(Ops1)) { if (TD && CE0->getOpcode() == CE1->getOpcode()) { - Type *IntPtrTy = TD->getIntPtrType(CE0->getContext()); - if (CE0->getOpcode() == Instruction::IntToPtr) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getType()); + // Convert the integer value to the right size to ensure we get the // proper extension or truncation. Constant *C0 = ConstantExpr::getIntegerCast(CE0->getOperand(0), @@ -1060,11 +1101,17 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate, // Only do this transformation if the int is intptrty in size, otherwise // there is a truncation or extension that we aren't modeling. - if ((CE0->getOpcode() == Instruction::PtrToInt && - CE0->getType() == IntPtrTy && - CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) - return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0), - CE1->getOperand(0), TD, TLI); + if (CE0->getOpcode() == Instruction::PtrToInt) { + Type *IntPtrTy = TD->getIntPtrType(CE0->getOperand(0)->getType()); + if (CE0->getType() == IntPtrTy && + CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()) { + return ConstantFoldCompareInstOperands(Predicate, + CE0->getOperand(0), + CE1->getOperand(0), + TD, + TLI); + } + } } } @@ -1101,7 +1148,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPConstantExpr(Constant *C, // addressing. for (unsigned i = 2, e = CE->getNumOperands(); i != e; ++i) { C = C->getAggregateElement(CE->getOperand(i)); - if (C == 0) return 0; + if (C == 0) + return 0; } return C; } @@ -1116,7 +1164,8 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, // addressing. for (unsigned i = 0, e = Indices.size(); i != e; ++i) { C = C->getAggregateElement(Indices[i]); - if (C == 0) return 0; + if (C == 0) + return 0; } return C; } @@ -1128,8 +1177,7 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, /// canConstantFoldCallTo - Return true if its even possible to fold a call to /// the specified function. -bool -llvm::canConstantFoldCallTo(const Function *F) { +bool llvm::canConstantFoldCallTo(const Function *F) { switch (F->getIntrinsicID()) { case Intrinsic::fabs: case Intrinsic::log: @@ -1167,7 +1215,8 @@ llvm::canConstantFoldCallTo(const Function *F) { case 0: break; } - if (!F->hasName()) return false; + if (!F->hasName()) + return false; StringRef Name = F->getName(); // In these cases, the check of the length is required. We don't want to @@ -1250,7 +1299,7 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), static Constant *ConstantFoldConvertToInt(const APFloat &Val, bool roundTowardZero, Type *Ty) { // All of these conversion intrinsics form an integer of at most 64bits. - unsigned ResultWidth = cast(Ty)->getBitWidth(); + unsigned ResultWidth = Ty->getIntegerBitWidth(); assert(ResultWidth <= 64 && "Can only constant fold conversions to 64 and 32 bit ints"); @@ -1271,7 +1320,8 @@ static Constant *ConstantFoldConvertToInt(const APFloat &Val, Constant * llvm::ConstantFoldCall(Function *F, ArrayRef Operands, const TargetLibraryInfo *TLI) { - if (!F->hasName()) return 0; + if (!F->hasName()) + return 0; StringRef Name = F->getName(); Type *Ty = F->getReturnType(); diff --git a/contrib/llvm/lib/Analysis/CostModel.cpp b/contrib/llvm/lib/Analysis/CostModel.cpp index 98a7780..f943258 100644 --- a/contrib/llvm/lib/Analysis/CostModel.cpp +++ b/contrib/llvm/lib/Analysis/CostModel.cpp @@ -19,6 +19,7 @@ #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Function.h" @@ -26,10 +27,15 @@ #include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Value.h" #include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; +static cl::opt EnableReduxCost("costmodel-reduxcost", cl::init(false), + cl::Hidden, + cl::desc("Recognize reduction patterns.")); + namespace { class CostModelAnalysis : public FunctionPass { @@ -81,7 +87,7 @@ CostModelAnalysis::runOnFunction(Function &F) { return false; } -static bool isReverseVectorMask(SmallVector &Mask) { +static bool isReverseVectorMask(SmallVectorImpl &Mask) { for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) return false; @@ -105,6 +111,260 @@ static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { return OpInfo; } +static bool matchMask(SmallVectorImpl &M1, SmallVectorImpl &M2) { + if (M1.size() != M2.size()) + return false; + + for (unsigned i = 0, e = M1.size(); i != e; ++i) + if (M1[i] != M2[i]) + return false; + + return true; +} + +static bool matchPairwiseShuffleMask(ShuffleVectorInst *SI, bool IsLeft, + unsigned Level) { + // We don't need a shuffle if we just want to have element 0 in position 0 of + // the vector. + if (!SI && Level == 0 && IsLeft) + return true; + else if (!SI) + return false; + + SmallVector Mask(SI->getType()->getVectorNumElements(), -1); + + // Build a mask of 0, 2, ... (left) or 1, 3, ... (right) depending on whether + // we look at the left or right side. + for (unsigned i = 0, e = (1 << Level), val = !IsLeft; i != e; ++i, val += 2) + Mask[i] = val; + + SmallVector ActualMask = SI->getShuffleMask(); + if (!matchMask(Mask, ActualMask)) + return false; + + return true; +} + +static bool matchPairwiseReductionAtLevel(const BinaryOperator *BinOp, + unsigned Level, unsigned NumLevels) { + // Match one level of pairwise operations. + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + if (BinOp == 0) + return false; + + assert(BinOp->getType()->isVectorTy() && "Expecting a vector type"); + + unsigned Opcode = BinOp->getOpcode(); + Value *L = BinOp->getOperand(0); + Value *R = BinOp->getOperand(1); + + ShuffleVectorInst *LS = dyn_cast(L); + if (!LS && Level) + return false; + ShuffleVectorInst *RS = dyn_cast(R); + if (!RS && Level) + return false; + + // On level 0 we can omit one shufflevector instruction. + if (!Level && !RS && !LS) + return false; + + // Shuffle inputs must match. + Value *NextLevelOpL = LS ? LS->getOperand(0) : 0; + Value *NextLevelOpR = RS ? RS->getOperand(0) : 0; + Value *NextLevelOp = 0; + if (NextLevelOpR && NextLevelOpL) { + // If we have two shuffles their operands must match. + if (NextLevelOpL != NextLevelOpR) + return false; + + NextLevelOp = NextLevelOpL; + } else if (Level == 0 && (NextLevelOpR || NextLevelOpL)) { + // On the first level we can omit the shufflevector <0, undef,...>. So the + // input to the other shufflevector <1, undef> must match with one of the + // inputs to the current binary operation. + // Example: + // %NextLevelOpL = shufflevector %R, <1, undef ...> + // %BinOp = fadd %NextLevelOpL, %R + if (NextLevelOpL && NextLevelOpL != R) + return false; + else if (NextLevelOpR && NextLevelOpR != L) + return false; + + NextLevelOp = NextLevelOpL ? R : L; + } else + return false; + + // Check that the next levels binary operation exists and matches with the + // current one. + BinaryOperator *NextLevelBinOp = 0; + if (Level + 1 != NumLevels) { + if (!(NextLevelBinOp = dyn_cast(NextLevelOp))) + return false; + else if (NextLevelBinOp->getOpcode() != Opcode) + return false; + } + + // Shuffle mask for pairwise operation must match. + if (matchPairwiseShuffleMask(LS, true, Level)) { + if (!matchPairwiseShuffleMask(RS, false, Level)) + return false; + } else if (matchPairwiseShuffleMask(RS, true, Level)) { + if (!matchPairwiseShuffleMask(LS, false, Level)) + return false; + } else + return false; + + if (++Level == NumLevels) + return true; + + // Match next level. + return matchPairwiseReductionAtLevel(NextLevelBinOp, Level, NumLevels); +} + +static bool matchPairwiseReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return false; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return false; + + BinaryOperator *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); + if (!RdxStart) + return false; + + Type *VecTy = ReduxRoot->getOperand(0)->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return false; + + // We look for a sequence of shuffle,shuffle,add triples like the following + // that builds a pairwise reduction tree. + // + // (X0, X1, X2, X3) + // (X0 + X1, X2 + X3, undef, undef) + // ((X0 + X1) + (X2 + X3), undef, undef, undef) + // + // %rdx.shuf.0.0 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> + // %rdx.shuf.0.1 = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> + // %bin.rdx.0 = fadd <4 x float> %rdx.shuf.0.0, %rdx.shuf.0.1 + // %rdx.shuf.1.0 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> + // %rdx.shuf.1.1 = shufflevector <4 x float> %bin.rdx.0, <4 x float> undef, + // <4 x i32> + // %bin.rdx8 = fadd <4 x float> %rdx.shuf.1.0, %rdx.shuf.1.1 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + if (!matchPairwiseReductionAtLevel(RdxStart, 0, Log2_32(NumVecElems))) + return false; + + Opcode = RdxStart->getOpcode(); + Ty = VecTy; + + return true; +} + +static std::pair +getShuffleAndOtherOprd(BinaryOperator *B) { + + Value *L = B->getOperand(0); + Value *R = B->getOperand(1); + ShuffleVectorInst *S = 0; + + if ((S = dyn_cast(L))) + return std::make_pair(R, S); + + S = dyn_cast(R); + return std::make_pair(L, S); +} + +static bool matchVectorSplittingReduction(const ExtractElementInst *ReduxRoot, + unsigned &Opcode, Type *&Ty) { + if (!EnableReduxCost) + return false; + + // Need to extract the first element. + ConstantInt *CI = dyn_cast(ReduxRoot->getOperand(1)); + unsigned Idx = ~0u; + if (CI) + Idx = CI->getZExtValue(); + if (Idx != 0) + return false; + + BinaryOperator *RdxStart = dyn_cast(ReduxRoot->getOperand(0)); + if (!RdxStart) + return false; + unsigned RdxOpcode = RdxStart->getOpcode(); + + Type *VecTy = ReduxRoot->getOperand(0)->getType(); + unsigned NumVecElems = VecTy->getVectorNumElements(); + if (!isPowerOf2_32(NumVecElems)) + return false; + + // We look for a sequence of shuffles and adds like the following matching one + // fadd, shuffle vector pair at a time. + // + // %rdx.shuf = shufflevector <4 x float> %rdx, <4 x float> undef, + // <4 x i32> + // %bin.rdx = fadd <4 x float> %rdx, %rdx.shuf + // %rdx.shuf7 = shufflevector <4 x float> %bin.rdx, <4 x float> undef, + // <4 x i32> + // %bin.rdx8 = fadd <4 x float> %bin.rdx, %rdx.shuf7 + // %r = extractelement <4 x float> %bin.rdx8, i32 0 + + unsigned MaskStart = 1; + Value *RdxOp = RdxStart; + SmallVector ShuffleMask(NumVecElems, 0); + unsigned NumVecElemsRemain = NumVecElems; + while (NumVecElemsRemain - 1) { + // Check for the right reduction operation. + BinaryOperator *BinOp; + if (!(BinOp = dyn_cast(RdxOp))) + return false; + if (BinOp->getOpcode() != RdxOpcode) + return false; + + Value *NextRdxOp; + ShuffleVectorInst *Shuffle; + tie(NextRdxOp, Shuffle) = getShuffleAndOtherOprd(BinOp); + + // Check the current reduction operation and the shuffle use the same value. + if (Shuffle == 0) + return false; + if (Shuffle->getOperand(0) != NextRdxOp) + return false; + + // Check that shuffle masks matches. + for (unsigned j = 0; j != MaskStart; ++j) + ShuffleMask[j] = MaskStart + j; + // Fill the rest of the mask with -1 for undef. + std::fill(&ShuffleMask[MaskStart], ShuffleMask.end(), -1); + + SmallVector Mask = Shuffle->getShuffleMask(); + if (!matchMask(ShuffleMask, Mask)) + return false; + + RdxOp = NextRdxOp; + NumVecElemsRemain /= 2; + MaskStart *= 2; + } + + Opcode = RdxOpcode; + Ty = VecTy; + return true; +} + unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { if (!TTI) return -1; @@ -189,18 +449,29 @@ unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); + + // Try to match a reduction sequence (series of shufflevector and vector + // adds followed by a extractelement). + unsigned ReduxOpCode; + Type *ReduxType; + + if (matchVectorSplittingReduction(EEI, ReduxOpCode, ReduxType)) + return TTI->getReductionCost(ReduxOpCode, ReduxType, false); + else if (matchPairwiseReduction(EEI, ReduxOpCode, ReduxType)) + return TTI->getReductionCost(ReduxOpCode, ReduxType, true); + return TTI->getVectorInstrCost(I->getOpcode(), EEI->getOperand(0)->getType(), Idx); } case Instruction::InsertElement: { - const InsertElementInst * IE = cast(I); - ConstantInt *CI = dyn_cast(IE->getOperand(2)); - unsigned Idx = -1; - if (CI) - Idx = CI->getZExtValue(); - return TTI->getVectorInstrCost(I->getOpcode(), - IE->getType(), Idx); - } + const InsertElementInst * IE = cast(I); + ConstantInt *CI = dyn_cast(IE->getOperand(2)); + unsigned Idx = -1; + if (CI) + Idx = CI->getZExtValue(); + return TTI->getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); + } case Instruction::ShuffleVector: { const ShuffleVectorInst *Shuffle = cast(I); Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); diff --git a/contrib/llvm/lib/Analysis/Delinearization.cpp b/contrib/llvm/lib/Analysis/Delinearization.cpp new file mode 100644 index 0000000..3ed0609 --- /dev/null +++ b/contrib/llvm/lib/Analysis/Delinearization.cpp @@ -0,0 +1,133 @@ +//===---- Delinearization.cpp - MultiDimensional Index Delinearization ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements an analysis pass that tries to delinearize all GEP +// instructions in all loops using the SCEV analysis functionality. This pass is +// only used for testing purposes: if your pass needs delinearization, please +// use the on-demand SCEVAddRecExpr::delinearize() function. +// +//===----------------------------------------------------------------------===// + +#define DL_NAME "delinearize" +#define DEBUG_TYPE DL_NAME +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/Pass.h" +#include "llvm/IR/Type.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/InstIterator.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +namespace { + +class Delinearization : public FunctionPass { + Delinearization(const Delinearization &); // do not implement +protected: + Function *F; + LoopInfo *LI; + ScalarEvolution *SE; + +public: + static char ID; // Pass identification, replacement for typeid + + Delinearization() : FunctionPass(ID) { + initializeDelinearizationPass(*PassRegistry::getPassRegistry()); + } + virtual bool runOnFunction(Function &F); + virtual void getAnalysisUsage(AnalysisUsage &AU) const; + virtual void print(raw_ostream &O, const Module *M = 0) const; +}; + +} // end anonymous namespace + +void Delinearization::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + AU.addRequired(); +} + +bool Delinearization::runOnFunction(Function &F) { + this->F = &F; + SE = &getAnalysis(); + LI = &getAnalysis(); + return false; +} + +static Value *getPointerOperand(Instruction &Inst) { + if (LoadInst *Load = dyn_cast(&Inst)) + return Load->getPointerOperand(); + else if (StoreInst *Store = dyn_cast(&Inst)) + return Store->getPointerOperand(); + else if (GetElementPtrInst *Gep = dyn_cast(&Inst)) + return Gep->getPointerOperand(); + return NULL; +} + +void Delinearization::print(raw_ostream &O, const Module *) const { + O << "Delinearization on function " << F->getName() << ":\n"; + for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { + Instruction *Inst = &(*I); + + // Only analyze loads and stores. + if (!isa(Inst) && !isa(Inst) && + !isa(Inst)) + continue; + + const BasicBlock *BB = Inst->getParent(); + // Delinearize the memory access as analyzed in all the surrounding loops. + // Do not analyze memory accesses outside loops. + for (Loop *L = LI->getLoopFor(BB); L != NULL; L = L->getParentLoop()) { + const SCEV *AccessFn = SE->getSCEVAtScope(getPointerOperand(*Inst), L); + const SCEVAddRecExpr *AR = dyn_cast(AccessFn); + + // Do not try to delinearize memory accesses that are not AddRecs. + if (!AR) + break; + + O << "AddRec: " << *AR << "\n"; + + SmallVector Subscripts, Sizes; + const SCEV *Res = AR->delinearize(*SE, Subscripts, Sizes); + int Size = Subscripts.size(); + if (Res == AR || Size == 0) { + O << "failed to delinearize\n"; + continue; + } + O << "Base offset: " << *Res << "\n"; + O << "ArrayDecl[UnknownSize]"; + for (int i = 0; i < Size - 1; i++) + O << "[" << *Sizes[i] << "]"; + O << " with elements of " << *Sizes[Size - 1] << " bytes.\n"; + + O << "ArrayRef"; + for (int i = 0; i < Size; i++) + O << "[" << *Subscripts[i] << "]"; + O << "\n"; + } + } +} + +char Delinearization::ID = 0; +static const char delinearization_name[] = "Delinearization"; +INITIALIZE_PASS_BEGIN(Delinearization, DL_NAME, delinearization_name, true, + true) +INITIALIZE_PASS_DEPENDENCY(LoopInfo) +INITIALIZE_PASS_END(Delinearization, DL_NAME, delinearization_name, true, true) + +FunctionPass *llvm::createDelinearizationPass() { return new Delinearization; } diff --git a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp index cbc71bd..3b3e2ef 100644 --- a/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/DependenceAnalysis.cpp @@ -24,11 +24,11 @@ // Both of these are conservative weaknesses; // that is, not a source of correctness problems. // -// The implementation depends on the GEP instruction to -// differentiate subscripts. Since Clang linearizes subscripts -// for most arrays, we give up some precision (though the existing MIV tests -// will help). We trust that the GEP instruction will eventually be extended. -// In the meantime, we should explore Maslov's ideas about delinearization. +// The implementation depends on the GEP instruction to differentiate +// subscripts. Since Clang linearizes some array subscripts, the dependence +// analysis is using SCEV->delinearize to recover the representation of multiple +// subscripts, and thus avoid the more expensive and less precise MIV tests. The +// delinearization is controlled by the flag -da-delinearize. // // We should pay some careful attention to the possibility of integer overflow // in the implementation of the various tests. This could happen with Add, @@ -61,6 +61,7 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Operator.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" @@ -104,6 +105,10 @@ STATISTIC(BanerjeeApplications, "Banerjee applications"); STATISTIC(BanerjeeIndependence, "Banerjee independence"); STATISTIC(BanerjeeSuccesses, "Banerjee successes"); +static cl::opt +Delinearize("da-delinearize", cl::init(false), cl::Hidden, cl::ZeroOrMore, + cl::desc("Try to delinearize array references.")); + //===----------------------------------------------------------------------===// // basics @@ -508,7 +513,7 @@ bool DependenceAnalysis::intersectConstraints(Constraint *X, APInt Xr = Xtop; // though they're just going to be overwritten APInt::sdivrem(Xtop, Xbot, Xq, Xr); APInt Yq = Ytop; - APInt Yr = Ytop;; + APInt Yr = Ytop; APInt::sdivrem(Ytop, Ybot, Yq, Yr); if (Xr != 0 || Yr != 0) { X->setEmpty(); @@ -2951,6 +2956,11 @@ const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, AddRec->getLoop(), AddRec->getNoWrapFlags()); } + if (SE->isLoopInvariant(AddRec, TargetLoop)) + return SE->getAddRecExpr(AddRec, + Value, + TargetLoop, + SCEV::FlagAnyWrap); return SE->getAddRecExpr(addToCoefficient(AddRec->getStart(), TargetLoop, Value), AddRec->getStepRecurrence(*SE), @@ -2972,7 +2982,7 @@ const SCEV *DependenceAnalysis::addToCoefficient(const SCEV *Expr, bool DependenceAnalysis::propagate(const SCEV *&Src, const SCEV *&Dst, SmallBitVector &Loops, - SmallVector &Constraints, + SmallVectorImpl &Constraints, bool &Consistent) { bool Result = false; for (int LI = Loops.find_first(); LI >= 0; LI = Loops.find_next(LI)) { @@ -3166,6 +3176,55 @@ void DependenceAnalysis::updateDirection(Dependence::DVEntry &Level, llvm_unreachable("constraint has unexpected kind"); } +/// Check if we can delinearize the subscripts. If the SCEVs representing the +/// source and destination array references are recurrences on a nested loop, +/// this function flattens the nested recurrences into seperate recurrences +/// for each loop level. +bool +DependenceAnalysis::tryDelinearize(const SCEV *SrcSCEV, const SCEV *DstSCEV, + SmallVectorImpl &Pair) const { + const SCEVAddRecExpr *SrcAR = dyn_cast(SrcSCEV); + const SCEVAddRecExpr *DstAR = dyn_cast(DstSCEV); + if (!SrcAR || !DstAR || !SrcAR->isAffine() || !DstAR->isAffine()) + return false; + + SmallVector SrcSubscripts, DstSubscripts, SrcSizes, DstSizes; + SrcAR->delinearize(*SE, SrcSubscripts, SrcSizes); + DstAR->delinearize(*SE, DstSubscripts, DstSizes); + + int size = SrcSubscripts.size(); + int dstSize = DstSubscripts.size(); + if (size != dstSize || size < 2) + return false; + +#ifndef NDEBUG + DEBUG(errs() << "\nSrcSubscripts: "); + for (int i = 0; i < size; i++) + DEBUG(errs() << *SrcSubscripts[i]); + DEBUG(errs() << "\nDstSubscripts: "); + for (int i = 0; i < size; i++) + DEBUG(errs() << *DstSubscripts[i]); +#endif + + // The delinearization transforms a single-subscript MIV dependence test into + // a multi-subscript SIV dependence test that is easier to compute. So we + // resize Pair to contain as many pairs of subscripts as the delinearization + // has found, and then initialize the pairs following the delinearization. + Pair.resize(size); + for (int i = 0; i < size; ++i) { + Pair[i].Src = SrcSubscripts[i]; + Pair[i].Dst = DstSubscripts[i]; + + // FIXME: we should record the bounds SrcSizes[i] and DstSizes[i] that the + // delinearization has found, and add these constraints to the dependence + // check to avoid memory accesses overflow from one dimension into another. + // This is related to the problem of determining the existence of data + // dependences in array accesses using a different number of subscripts: in + // C one can access an array A[100][100]; as A[0][9999], *A[9999], etc. + } + + return true; +} //===----------------------------------------------------------------------===// @@ -3275,6 +3334,12 @@ Dependence *DependenceAnalysis::depends(Instruction *Src, Pair[0].Dst = DstSCEV; } + if (Delinearize && Pairs == 1 && CommonLevels > 1 && + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } + for (unsigned P = 0; P < Pairs; ++P) { Pair[P].Loops.resize(MaxLevels + 1); Pair[P].GroupLoops.resize(MaxLevels + 1); @@ -3693,6 +3758,12 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, Pair[0].Dst = DstSCEV; } + if (Delinearize && Pairs == 1 && CommonLevels > 1 && + tryDelinearize(Pair[0].Src, Pair[0].Dst, Pair)) { + DEBUG(dbgs() << " delinerized GEP\n"); + Pairs = Pair.size(); + } + for (unsigned P = 0; P < Pairs; ++P) { Pair[P].Loops.resize(MaxLevels + 1); Pair[P].GroupLoops.resize(MaxLevels + 1); diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp index 7620fd9..f042964 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp +++ b/contrib/llvm/lib/Analysis/IPA/CallGraph.cpp @@ -6,11 +6,6 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file implements the CallGraph class and provides the BasicCallGraph -// default implementation. -// -//===----------------------------------------------------------------------===// #include "llvm/Analysis/CallGraph.h" #include "llvm/IR/Instructions.h" @@ -21,168 +16,92 @@ #include "llvm/Support/raw_ostream.h" using namespace llvm; -namespace { +CallGraph::CallGraph() + : ModulePass(ID), Root(0), ExternalCallingNode(0), CallsExternalNode(0) { + initializeCallGraphPass(*PassRegistry::getPassRegistry()); +} -//===----------------------------------------------------------------------===// -// BasicCallGraph class definition -// -class BasicCallGraph : public ModulePass, public CallGraph { - // Root is root of the call graph, or the external node if a 'main' function - // couldn't be found. - // - CallGraphNode *Root; - - // ExternalCallingNode - This node has edges to all external functions and - // those internal functions that have their address taken. - CallGraphNode *ExternalCallingNode; - - // CallsExternalNode - This node has edges to it from all functions making - // indirect calls or calling an external function. - CallGraphNode *CallsExternalNode; - -public: - static char ID; // Class identification, replacement for typeinfo - BasicCallGraph() : ModulePass(ID), Root(0), - ExternalCallingNode(0), CallsExternalNode(0) { - initializeBasicCallGraphPass(*PassRegistry::getPassRegistry()); - } +void CallGraph::addToCallGraph(Function *F) { + CallGraphNode *Node = getOrInsertFunction(F); - // runOnModule - Compute the call graph for the specified module. - virtual bool runOnModule(Module &M) { - CallGraph::initialize(M); - - ExternalCallingNode = getOrInsertFunction(0); - CallsExternalNode = new CallGraphNode(0); - Root = 0; - - // Add every function to the call graph. - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - addToCallGraph(I); - - // If we didn't find a main function, use the external call graph node - if (Root == 0) Root = ExternalCallingNode; - - return false; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } + // If this function has external linkage, anything could call it. + if (!F->hasLocalLinkage()) { + ExternalCallingNode->addCalledFunction(CallSite(), Node); - virtual void print(raw_ostream &OS, const Module *) const { - OS << "CallGraph Root is: "; - if (Function *F = getRoot()->getFunction()) - OS << F->getName() << "\n"; - else { - OS << "<>\n"; + // Found the entry point? + if (F->getName() == "main") { + if (Root) // Found multiple external mains? Don't pick one. + Root = ExternalCallingNode; + else + Root = Node; // Found a main, keep track of it! } - - CallGraph::print(OS, 0); } - virtual void releaseMemory() { - destroy(); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it should - /// override this to adjust the this pointer as needed for the specified pass - /// info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &CallGraph::ID) - return (CallGraph*)this; - return this; - } - - CallGraphNode* getExternalCallingNode() const { return ExternalCallingNode; } - CallGraphNode* getCallsExternalNode() const { return CallsExternalNode; } - - // getRoot - Return the root of the call graph, which is either main, or if - // main cannot be found, the external node. - // - CallGraphNode *getRoot() { return Root; } - const CallGraphNode *getRoot() const { return Root; } - -private: - //===--------------------------------------------------------------------- - // Implementation of CallGraph construction - // - - // addToCallGraph - Add a function to the call graph, and link the node to all - // of the functions that it calls. - // - void addToCallGraph(Function *F) { - CallGraphNode *Node = getOrInsertFunction(F); - - // If this function has external linkage, anything could call it. - if (!F->hasLocalLinkage()) { - ExternalCallingNode->addCalledFunction(CallSite(), Node); - - // Found the entry point? - if (F->getName() == "main") { - if (Root) // Found multiple external mains? Don't pick one. - Root = ExternalCallingNode; - else - Root = Node; // Found a main, keep track of it! + // If this function has its address taken, anything could call it. + if (F->hasAddressTaken()) + ExternalCallingNode->addCalledFunction(CallSite(), Node); + + // If this function is not defined in this translation unit, it could call + // anything. + if (F->isDeclaration() && !F->isIntrinsic()) + Node->addCalledFunction(CallSite(), CallsExternalNode); + + // Look for calls by this function. + for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; + ++II) { + CallSite CS(cast(II)); + if (CS) { + const Function *Callee = CS.getCalledFunction(); + if (!Callee) + // Indirect calls of intrinsics are not allowed so no need to check. + Node->addCalledFunction(CS, CallsExternalNode); + else if (!Callee->isIntrinsic()) + Node->addCalledFunction(CS, getOrInsertFunction(Callee)); } } +} - // If this function has its address taken, anything could call it. - if (F->hasAddressTaken()) - ExternalCallingNode->addCalledFunction(CallSite(), Node); - - // If this function is not defined in this translation unit, it could call - // anything. - if (F->isDeclaration() && !F->isIntrinsic()) - Node->addCalledFunction(CallSite(), CallsExternalNode); - - // Look for calls by this function. - for (Function::iterator BB = F->begin(), BBE = F->end(); BB != BBE; ++BB) - for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); - II != IE; ++II) { - CallSite CS(cast(II)); - if (CS) { - const Function *Callee = CS.getCalledFunction(); - if (!Callee) - // Indirect calls of intrinsics are not allowed so no need to check. - Node->addCalledFunction(CS, CallsExternalNode); - else if (!Callee->isIntrinsic()) - Node->addCalledFunction(CS, getOrInsertFunction(Callee)); - } - } - } +void CallGraph::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); +} - // - // destroy - Release memory for the call graph - virtual void destroy() { - /// CallsExternalNode is not in the function map, delete it explicitly. - if (CallsExternalNode) { - CallsExternalNode->allReferencesDropped(); - delete CallsExternalNode; - CallsExternalNode = 0; - } - CallGraph::destroy(); - } -}; +bool CallGraph::runOnModule(Module &M) { + Mod = &M; -} //End anonymous namespace + ExternalCallingNode = getOrInsertFunction(0); + assert(!CallsExternalNode); + CallsExternalNode = new CallGraphNode(0); + Root = 0; -INITIALIZE_ANALYSIS_GROUP(CallGraph, "Call Graph", BasicCallGraph) -INITIALIZE_AG_PASS(BasicCallGraph, CallGraph, "basiccg", - "Basic CallGraph Construction", false, true, true) + // Add every function to the call graph. + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + addToCallGraph(I); -char CallGraph::ID = 0; -char BasicCallGraph::ID = 0; + // If we didn't find a main function, use the external call graph node + if (Root == 0) + Root = ExternalCallingNode; -void CallGraph::initialize(Module &M) { - Mod = &M; + return false; } -void CallGraph::destroy() { - if (FunctionMap.empty()) return; - - // Reset all node's use counts to zero before deleting them to prevent an - // assertion from firing. +INITIALIZE_PASS(CallGraph, "basiccg", "CallGraph Construction", false, true) + +char CallGraph::ID = 0; + +void CallGraph::releaseMemory() { + /// CallsExternalNode is not in the function map, delete it explicitly. + if (CallsExternalNode) { + CallsExternalNode->allReferencesDropped(); + delete CallsExternalNode; + CallsExternalNode = 0; + } + + if (FunctionMap.empty()) + return; + +// Reset all node's use counts to zero before deleting them to prevent an +// assertion from firing. #ifndef NDEBUG for (FunctionMapTy::iterator I = FunctionMap.begin(), E = FunctionMap.end(); I != E; ++I) @@ -195,7 +114,14 @@ void CallGraph::destroy() { FunctionMap.clear(); } -void CallGraph::print(raw_ostream &OS, Module*) const { +void CallGraph::print(raw_ostream &OS, const Module*) const { + OS << "CallGraph Root is: "; + if (Function *F = Root->getFunction()) + OS << F->getName() << "\n"; + else { + OS << "<>\n"; + } + for (CallGraph::const_iterator I = begin(), E = end(); I != E; ++I) I->second->print(OS); } diff --git a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp index a0d788f..182beca 100644 --- a/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/contrib/llvm/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -22,7 +22,7 @@ #include "llvm/Analysis/CallGraph.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" -#include "llvm/PassManagers.h" +#include "llvm/IR/LegacyPassManagers.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" diff --git a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp index 92d0d23..7ec4644 100644 --- a/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/contrib/llvm/lib/Analysis/IPA/GlobalsModRef.cpp @@ -189,7 +189,7 @@ char GlobalsModRef::ID = 0; INITIALIZE_AG_PASS_BEGIN(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", "Simple mod/ref analysis for globals", false, true, false) -INITIALIZE_AG_DEPENDENCY(CallGraph) +INITIALIZE_PASS_DEPENDENCY(CallGraph) INITIALIZE_AG_PASS_END(GlobalsModRef, AliasAnalysis, "globalsmodref-aa", "Simple mod/ref analysis for globals", false, true, false) diff --git a/contrib/llvm/lib/Analysis/IPA/IPA.cpp b/contrib/llvm/lib/Analysis/IPA/IPA.cpp index 1c1816d..47357cf 100644 --- a/contrib/llvm/lib/Analysis/IPA/IPA.cpp +++ b/contrib/llvm/lib/Analysis/IPA/IPA.cpp @@ -19,8 +19,7 @@ using namespace llvm; /// initializeIPA - Initialize all passes linked into the IPA library. void llvm::initializeIPA(PassRegistry &Registry) { - initializeBasicCallGraphPass(Registry); - initializeCallGraphAnalysisGroup(Registry); + initializeCallGraphPass(Registry); initializeCallGraphPrinterPass(Registry); initializeCallGraphViewerPass(Registry); initializeFindUsedTypesPass(Registry); diff --git a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp index 35c45e6..3bc796e 100644 --- a/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp +++ b/contrib/llvm/lib/Analysis/IPA/InlineCost.cpp @@ -59,6 +59,8 @@ class CallAnalyzer : public InstVisitor { bool ExposesReturnsTwice; bool HasDynamicAlloca; bool ContainsNoDuplicateCall; + bool HasReturn; + bool HasIndirectBr; /// Number of bytes allocated statically by the callee. uint64_t AllocatedSize; @@ -124,7 +126,7 @@ class CallAnalyzer : public InstVisitor { bool visitIntToPtr(IntToPtrInst &I); bool visitCastInst(CastInst &I); bool visitUnaryInstruction(UnaryInstruction &I); - bool visitICmp(ICmpInst &I); + bool visitCmpInst(CmpInst &I); bool visitSub(BinaryOperator &I); bool visitBinaryOperator(BinaryOperator &I); bool visitLoad(LoadInst &I); @@ -132,6 +134,12 @@ class CallAnalyzer : public InstVisitor { bool visitExtractValue(ExtractValueInst &I); bool visitInsertValue(InsertValueInst &I); bool visitCallSite(CallSite CS); + bool visitReturnInst(ReturnInst &RI); + bool visitBranchInst(BranchInst &BI); + bool visitSwitchInst(SwitchInst &SI); + bool visitIndirectBrInst(IndirectBrInst &IBI); + bool visitResumeInst(ResumeInst &RI); + bool visitUnreachableInst(UnreachableInst &I); public: CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, @@ -139,12 +147,13 @@ public: : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), IsCallerRecursive(false), IsRecursiveCall(false), ExposesReturnsTwice(false), HasDynamicAlloca(false), - ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), - NumVectorInstructions(0), FiftyPercentVectorBonus(0), - TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), - NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), - NumConstantPtrDiffs(0), NumInstructionsSimplified(0), - SROACostSavings(0), SROACostSavingsLost(0) {} + ContainsNoDuplicateCall(false), HasReturn(false), HasIndirectBr(false), + AllocatedSize(0), NumInstructions(0), NumVectorInstructions(0), + FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), + NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), + NumConstantPtrCmps(0), NumConstantPtrDiffs(0), + NumInstructionsSimplified(0), SROACostSavings(0), + SROACostSavingsLost(0) {} bool analyzeCall(CallSite CS); @@ -490,7 +499,7 @@ bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { return false; } -bool CallAnalyzer::visitICmp(ICmpInst &I) { +bool CallAnalyzer::visitCmpInst(CmpInst &I) { Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); // First try to handle simplified comparisons. if (!isa(LHS)) @@ -499,12 +508,16 @@ bool CallAnalyzer::visitICmp(ICmpInst &I) { if (!isa(RHS)) if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) RHS = SimpleRHS; - if (Constant *CLHS = dyn_cast(LHS)) + if (Constant *CLHS = dyn_cast(LHS)) { if (Constant *CRHS = dyn_cast(RHS)) - if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + if (Constant *C = ConstantExpr::getCompare(I.getPredicate(), CLHS, CRHS)) { SimplifiedValues[&I] = C; return true; } + } + + if (I.getOpcode() == Instruction::FCmp) + return false; // Otherwise look for a comparison between constant offset pointers with // a common base. @@ -700,7 +713,7 @@ bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { } bool CallAnalyzer::visitCallSite(CallSite CS) { - if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && + if (CS.hasFnAttr(Attribute::ReturnsTwice) && !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::ReturnsTwice)) { // This aborts the entire analysis. @@ -781,6 +794,60 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { return Base::visitCallSite(CS); } +bool CallAnalyzer::visitReturnInst(ReturnInst &RI) { + // At least one return instruction will be free after inlining. + bool Free = !HasReturn; + HasReturn = true; + return Free; +} + +bool CallAnalyzer::visitBranchInst(BranchInst &BI) { + // We model unconditional branches as essentially free -- they really + // shouldn't exist at all, but handling them makes the behavior of the + // inliner more regular and predictable. Interestingly, conditional branches + // which will fold away are also free. + return BI.isUnconditional() || isa(BI.getCondition()) || + dyn_cast_or_null( + SimplifiedValues.lookup(BI.getCondition())); +} + +bool CallAnalyzer::visitSwitchInst(SwitchInst &SI) { + // We model unconditional switches as free, see the comments on handling + // branches. + return isa(SI.getCondition()) || + dyn_cast_or_null( + SimplifiedValues.lookup(SI.getCondition())); +} + +bool CallAnalyzer::visitIndirectBrInst(IndirectBrInst &IBI) { + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this + // indirect jump would jump from the inlined copy of the function into the + // original function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions with + // indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably don't + // want to inline this function. + HasIndirectBr = true; + return false; +} + +bool CallAnalyzer::visitResumeInst(ResumeInst &RI) { + // FIXME: It's not clear that a single instruction is an accurate model for + // the inline cost of a resume instruction. + return false; +} + +bool CallAnalyzer::visitUnreachableInst(UnreachableInst &I) { + // FIXME: It might be reasonably to discount the cost of instructions leading + // to unreachable as they have the lowest possible impact on both runtime and + // code size. + return true; // No actual code is needed for unreachable. +} + bool CallAnalyzer::visitInstruction(Instruction &I) { // Some instructions are free. All of the free intrinsics can also be // handled by SROA, etc. @@ -804,8 +871,7 @@ bool CallAnalyzer::visitInstruction(Instruction &I) { /// construct has been detected. It returns false if inlining is no longer /// viable, and true if inlining remains viable. bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { - for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); - I != E; ++I) { + for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) { ++NumInstructions; if (isa(I) || I->getType()->isVectorTy()) ++NumVectorInstructions; @@ -821,7 +887,8 @@ bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { Cost += InlineConstants::InstrCost; // If the visit this instruction detected an uninlinable pattern, abort. - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || + HasIndirectBr) return false; // If the caller is a recursive function then we don't want to inline @@ -985,10 +1052,6 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { } } - // Track whether we've seen a return instruction. The first return - // instruction is free, as at least one will usually disappear in inlining. - bool HasReturn = false; - // Populate our simplified values by mapping from function arguments to call // arguments with known important simplifications. CallSite::arg_iterator CAI = CS.arg_begin(); @@ -1035,33 +1098,11 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { if (BB->empty()) continue; - // Handle the terminator cost here where we can track returns and other - // function-wide constructs. - TerminatorInst *TI = BB->getTerminator(); - - // We never want to inline functions that contain an indirectbr. This is - // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this - // indirect jump would jump from the inlined copy of the function into the - // original function which is extremely undefined behavior. - // FIXME: This logic isn't really right; we can safely inline functions - // with indirectbr's as long as no other function or global references the - // blockaddress of a block within the current function. And as a QOI issue, - // if someone is using a blockaddress without an indirectbr, and that - // reference somehow ends up in another function or global, we probably - // don't want to inline this function. - if (isa(TI)) - return false; - - if (!HasReturn && isa(TI)) - HasReturn = true; - else - Cost += InlineConstants::InstrCost; - // Analyze the cost of this block. If we blow through the threshold, this // returns false, and we can bail on out. if (!analyzeBlock(BB)) { - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca || + HasIndirectBr) return false; // If the caller is a recursive function then we don't want to inline @@ -1074,6 +1115,8 @@ bool CallAnalyzer::analyzeCall(CallSite CS) { break; } + TerminatorInst *TI = BB->getTerminator(); + // Add in the live successors by first checking whether we have terminator // that may be simplified based on the values simplified by this call. if (BranchInst *BI = dyn_cast(TI)) { @@ -1167,6 +1210,22 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { return getInlineCost(CS, CS.getCalledFunction(), Threshold); } +/// \brief Test that two functions either have or have not the given attribute +/// at the same time. +static bool attributeMatches(Function *F1, Function *F2, + Attribute::AttrKind Attr) { + return F1->hasFnAttribute(Attr) == F2->hasFnAttribute(Attr); +} + +/// \brief Test that there are no attribute conflicts between Caller and Callee +/// that prevent inlining. +static bool functionsHaveCompatibleAttributes(Function *Caller, + Function *Callee) { + return attributeMatches(Caller, Callee, Attribute::SanitizeAddress) && + attributeMatches(Caller, Callee, Attribute::SanitizeMemory) && + attributeMatches(Caller, Callee, Attribute::SanitizeThread); +} + InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, int Threshold) { // Cannot inline indirect calls. @@ -1175,20 +1234,26 @@ InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, // Calls to functions with always-inline attributes should be inlined // whenever possible. - if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::AlwaysInline)) { + if (Callee->hasFnAttribute(Attribute::AlwaysInline)) { if (isInlineViable(*Callee)) return llvm::InlineCost::getAlways(); return llvm::InlineCost::getNever(); } + // Never inline functions with conflicting attributes (unless callee has + // always-inline attribute). + if (!functionsHaveCompatibleAttributes(CS.getCaller(), Callee)) + return llvm::InlineCost::getNever(); + + // Don't inline this call if the caller has the optnone attribute. + if (CS.getCaller()->hasFnAttribute(Attribute::OptimizeNone)) + return llvm::InlineCost::getNever(); + // Don't inline functions which can be redefined at link-time to mean // something else. Don't inline functions marked noinline or call sites // marked noinline. if (Callee->mayBeOverridden() || - Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::NoInline) || - CS.isNoInline()) + Callee->hasFnAttribute(Attribute::NoInline) || CS.isNoInline()) return llvm::InlineCost::getNever(); DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() diff --git a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp index bf77451..b867af1 100644 --- a/contrib/llvm/lib/Analysis/InstructionSimplify.cpp +++ b/contrib/llvm/lib/Analysis/InstructionSimplify.cpp @@ -668,7 +668,8 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, /// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. /// folding. static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, - Value *&V) { + Value *&V, + bool AllowNonInbounds = false) { assert(V->getType()->getScalarType()->isPointerTy()); // Without DataLayout, just be conservative for now. Theoretically, more could @@ -676,8 +677,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, if (!TD) return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0); - unsigned IntPtrWidth = TD->getPointerSizeInBits(); - APInt Offset = APInt::getNullValue(IntPtrWidth); + Type *IntPtrTy = TD->getIntPtrType(V->getType())->getScalarType(); + APInt Offset = APInt::getNullValue(IntPtrTy->getIntegerBitWidth()); // Even though we don't look through PHI nodes, we could be called on an // instruction in an unreachable block, which may be on a cycle. @@ -685,7 +686,8 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast(V)) { - if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset)) + if ((!AllowNonInbounds && !GEP->isInBounds()) || + !GEP->accumulateConstantOffset(*TD, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -701,7 +703,6 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD->getIntPtrType(V->getContext()); Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); if (V->getType()->isVectorTy()) return ConstantVector::getSplat(V->getType()->getVectorNumElements(), @@ -1363,6 +1364,10 @@ static Value *SimplifyLShrInst(Value *Op0, Value *Op1, bool isExact, if (Value *V = SimplifyShift(Instruction::LShr, Op0, Op1, Q, MaxRecurse)) return V; + // X >> X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + // undef >>l X -> 0 if (match(Op0, m_Undef())) return Constant::getNullValue(Op0->getType()); @@ -1391,6 +1396,10 @@ static Value *SimplifyAShrInst(Value *Op0, Value *Op1, bool isExact, if (Value *V = SimplifyShift(Instruction::AShr, Op0, Op1, Q, MaxRecurse)) return V; + // X >> X -> 0 + if (Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + // all ones >>a X -> all ones if (match(Op0, m_AllOnes())) return Op0; @@ -1730,7 +1739,7 @@ static Constant *computePointerICmp(const DataLayout *TD, RHS = RHS->stripPointerCasts(); // A non-null pointer is not equal to a null pointer. - if (llvm::isKnownNonNull(LHS) && isa(RHS) && + if (llvm::isKnownNonNull(LHS, TLI) && isa(RHS) && (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE)) return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); @@ -1830,6 +1839,17 @@ static Constant *computePointerICmp(const DataLayout *TD, return ConstantInt::get(GetCompareTy(LHS), !CmpInst::isTrueWhenEqual(Pred)); } + + // Even if an non-inbounds GEP occurs along the path we can still optimize + // equality comparisons concerning the result. We avoid walking the whole + // chain again by starting where the last calls to + // stripAndComputeConstantOffsets left off and accumulate the offsets. + Constant *LHSNoBound = stripAndComputeConstantOffsets(TD, LHS, true); + Constant *RHSNoBound = stripAndComputeConstantOffsets(TD, RHS, true); + if (LHS == RHS) + return ConstantExpr::getICmp(Pred, + ConstantExpr::getAdd(LHSOffset, LHSNoBound), + ConstantExpr::getAdd(RHSOffset, RHSNoBound)); } // Otherwise, fail. @@ -2026,7 +2046,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Turn icmp (ptrtoint x), (ptrtoint/constant) into a compare of the input // if the integer type is the same size as the pointer type. if (MaxRecurse && Q.TD && isa(LI) && - Q.TD->getPointerSizeInBits() == DstTy->getPrimitiveSizeInBits()) { + Q.TD->getTypeSizeInBits(SrcTy) == DstTy->getPrimitiveSizeInBits()) { if (Constant *RHSC = dyn_cast(RHS)) { // Transfer the cast to the constant. if (Value *V = SimplifyICmpInst(Pred, SrcOp, @@ -2238,6 +2258,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } + // icmp pred (urem X, Y), Y if (LBO && match(LBO, m_URem(m_Value(), m_Specific(RHS)))) { bool KnownNonNegative, KnownNegative; switch (Pred) { @@ -2245,7 +2266,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -2255,7 +2276,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getFalse(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -2265,6 +2286,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); } } + + // icmp pred X, (urem Y, X) if (RBO && match(RBO, m_URem(m_Value(), m_Specific(LHS)))) { bool KnownNonNegative, KnownNegative; switch (Pred) { @@ -2272,7 +2295,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, break; case ICmpInst::ICMP_SGT: case ICmpInst::ICMP_SGE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -2282,7 +2305,7 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, return getTrue(ITy); case ICmpInst::ICMP_SLT: case ICmpInst::ICMP_SLE: - ComputeSignBit(RHS, KnownNonNegative, KnownNegative, Q.TD); + ComputeSignBit(LHS, KnownNonNegative, KnownNegative, Q.TD); if (!KnownNonNegative) break; // fall-through @@ -2936,6 +2959,7 @@ static bool IsIdempotent(Intrinsic::ID ID) { case Intrinsic::trunc: case Intrinsic::rint: case Intrinsic::nearbyint: + case Intrinsic::round: return true; } } diff --git a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp index 66b5e85..b6970af 100644 --- a/contrib/llvm/lib/Analysis/LazyValueInfo.cpp +++ b/contrib/llvm/lib/Analysis/LazyValueInfo.cpp @@ -421,8 +421,8 @@ void LVIValueHandle::deleted() { if (I->second == getValPtr()) ToErase.push_back(*I); } - - for (SmallVector::iterator I = ToErase.begin(), + + for (SmallVectorImpl::iterator I = ToErase.begin(), E = ToErase.end(); I != E; ++I) Parent->OverDefinedCache.erase(*I); @@ -444,8 +444,8 @@ void LazyValueInfoCache::eraseBlock(BasicBlock *BB) { if (I->first == BB) ToErase.push_back(*I); } - - for (SmallVector::iterator I = ToErase.begin(), + + for (SmallVectorImpl::iterator I = ToErase.begin(), E = ToErase.end(); I != E; ++I) OverDefinedCache.erase(*I); diff --git a/contrib/llvm/lib/Analysis/Lint.cpp b/contrib/llvm/lib/Analysis/Lint.cpp index 9393508..ec17f47 100644 --- a/contrib/llvm/lib/Analysis/Lint.cpp +++ b/contrib/llvm/lib/Analysis/Lint.cpp @@ -207,7 +207,7 @@ void Lint::visitCallSite(CallSite CS) { &I); FunctionType *FT = F->getFunctionType(); - unsigned NumActualArgs = unsigned(CS.arg_end()-CS.arg_begin()); + unsigned NumActualArgs = CS.arg_size(); Assert1(FT->isVarArg() ? FT->getNumParams() <= NumActualArgs : @@ -504,14 +504,42 @@ void Lint::visitShl(BinaryOperator &I) { "Undefined result: Shift count out of range", &I); } -static bool isZero(Value *V, DataLayout *TD) { +static bool isZero(Value *V, DataLayout *DL) { // Assume undef could be zero. - if (isa(V)) return true; + if (isa(V)) + return true; + + VectorType *VecTy = dyn_cast(V->getType()); + if (!VecTy) { + unsigned BitWidth = V->getType()->getIntegerBitWidth(); + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(V, KnownZero, KnownOne, DL); + return KnownZero.isAllOnesValue(); + } + + // Per-component check doesn't work with zeroinitializer + Constant *C = dyn_cast(V); + if (!C) + return false; + + if (C->isZeroValue()) + return true; + + // For a vector, KnownZero will only be true if all values are zero, so check + // this per component + unsigned BitWidth = VecTy->getElementType()->getIntegerBitWidth(); + for (unsigned I = 0, N = VecTy->getNumElements(); I != N; ++I) { + Constant *Elem = C->getAggregateElement(I); + if (isa(Elem)) + return true; + + APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); + ComputeMaskedBits(Elem, KnownZero, KnownOne, DL); + if (KnownZero.isAllOnesValue()) + return true; + } - unsigned BitWidth = cast(V->getType())->getBitWidth(); - APInt KnownZero(BitWidth, 0), KnownOne(BitWidth, 0); - ComputeMaskedBits(V, KnownZero, KnownOne, TD); - return KnownZero.isAllOnesValue(); + return false; } void Lint::visitSDiv(BinaryOperator &I) { diff --git a/contrib/llvm/lib/Analysis/LoopInfo.cpp b/contrib/llvm/lib/Analysis/LoopInfo.cpp index f1ad650..e369633 100644 --- a/contrib/llvm/lib/Analysis/LoopInfo.cpp +++ b/contrib/llvm/lib/Analysis/LoopInfo.cpp @@ -50,6 +50,9 @@ INITIALIZE_PASS_BEGIN(LoopInfo, "loops", "Natural Loop Information", true, true) INITIALIZE_PASS_DEPENDENCY(DominatorTree) INITIALIZE_PASS_END(LoopInfo, "loops", "Natural Loop Information", true, true) +// Loop identifier metadata name. +static const char *const LoopMDName = "llvm.loop"; + //===----------------------------------------------------------------------===// // Loop implementation // @@ -174,10 +177,6 @@ PHINode *Loop::getCanonicalInductionVariable() const { /// isLCSSAForm - Return true if the Loop is in LCSSA form bool Loop::isLCSSAForm(DominatorTree &DT) const { - // Sort the blocks vector so that we can use binary search to do quick - // lookups. - SmallPtrSet LoopBBs(block_begin(), block_end()); - for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) { BasicBlock *BB = *BI; for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I) @@ -193,7 +192,7 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const { // block they are defined in. Also, blocks not reachable from the // entry are special; uses in them don't need to go through PHIs. if (UserBB != BB && - !LoopBBs.count(UserBB) && + !contains(UserBB) && DT.isReachableFromEntry(UserBB)) return false; } @@ -217,12 +216,12 @@ bool Loop::isSafeToClone() const { // Return false if any loop blocks contain indirectbrs, or there are any calls // to noduplicate functions. for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) { - if (isa((*I)->getTerminator())) { + if (isa((*I)->getTerminator())) return false; - } else if (const InvokeInst *II = dyn_cast((*I)->getTerminator())) { + + if (const InvokeInst *II = dyn_cast((*I)->getTerminator())) if (II->hasFnAttr(Attribute::NoDuplicate)) return false; - } for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) { if (const CallInst *CI = dyn_cast(BI)) { @@ -234,14 +233,62 @@ bool Loop::isSafeToClone() const { return true; } -bool Loop::isAnnotatedParallel() const { +MDNode *Loop::getLoopID() const { + MDNode *LoopID = 0; + if (isLoopSimplifyForm()) { + LoopID = getLoopLatch()->getTerminator()->getMetadata(LoopMDName); + } else { + // Go through each predecessor of the loop header and check the + // terminator for the metadata. + BasicBlock *H = getHeader(); + for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) { + TerminatorInst *TI = (*I)->getTerminator(); + MDNode *MD = 0; + + // Check if this terminator branches to the loop header. + for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) { + if (TI->getSuccessor(i) == H) { + MD = TI->getMetadata(LoopMDName); + break; + } + } + if (!MD) + return 0; - BasicBlock *latch = getLoopLatch(); - if (latch == NULL) - return false; + if (!LoopID) + LoopID = MD; + else if (MD != LoopID) + return 0; + } + } + if (!LoopID || LoopID->getNumOperands() == 0 || + LoopID->getOperand(0) != LoopID) + return 0; + return LoopID; +} - MDNode *desiredLoopIdMetadata = - latch->getTerminator()->getMetadata("llvm.loop.parallel"); +void Loop::setLoopID(MDNode *LoopID) const { + assert(LoopID && "Loop ID should not be null"); + assert(LoopID->getNumOperands() > 0 && "Loop ID needs at least one operand"); + assert(LoopID->getOperand(0) == LoopID && "Loop ID should refer to itself"); + + if (isLoopSimplifyForm()) { + getLoopLatch()->getTerminator()->setMetadata(LoopMDName, LoopID); + return; + } + + BasicBlock *H = getHeader(); + for (block_iterator I = block_begin(), IE = block_end(); I != IE; ++I) { + TerminatorInst *TI = (*I)->getTerminator(); + for (unsigned i = 0, ie = TI->getNumSuccessors(); i != ie; ++i) { + if (TI->getSuccessor(i) == H) + TI->setMetadata(LoopMDName, LoopID); + } + } +} + +bool Loop::isAnnotatedParallel() const { + MDNode *desiredLoopIdMetadata = getLoopID(); if (!desiredLoopIdMetadata) return false; @@ -258,15 +305,15 @@ bool Loop::isAnnotatedParallel() const { if (!II->mayReadOrWriteMemory()) continue; - if (!II->getMetadata("llvm.mem.parallel_loop_access")) - return false; - // The memory instruction can refer to the loop identifier metadata // directly or indirectly through another list metadata (in case of // nested parallel loops). The loop identifier metadata refers to // itself so we can check both cases with the same routine. - MDNode *loopIdMD = - dyn_cast(II->getMetadata("llvm.mem.parallel_loop_access")); + MDNode *loopIdMD = II->getMetadata("llvm.mem.parallel_loop_access"); + + if (!loopIdMD) + return false; + bool loopIdMDFound = false; for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) { if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) { @@ -286,9 +333,6 @@ bool Loop::isAnnotatedParallel() const { /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool Loop::hasDedicatedExits() const { - // Sort the blocks vector so that we can use binary search to do quick - // lookups. - SmallPtrSet LoopBBs(block_begin(), block_end()); // Each predecessor of each exit block of a normal loop is contained // within the loop. SmallVector ExitBlocks; @@ -296,7 +340,7 @@ bool Loop::hasDedicatedExits() const { for (unsigned i = 0, e = ExitBlocks.size(); i != e; ++i) for (pred_iterator PI = pred_begin(ExitBlocks[i]), PE = pred_end(ExitBlocks[i]); PI != PE; ++PI) - if (!LoopBBs.count(*PI)) + if (!contains(*PI)) return false; // All the requirements are met. return true; @@ -311,11 +355,6 @@ Loop::getUniqueExitBlocks(SmallVectorImpl &ExitBlocks) const { assert(hasDedicatedExits() && "getUniqueExitBlocks assumes the loop has canonical form exits!"); - // Sort the blocks vector so that we can use binary search to do quick - // lookups. - SmallVector LoopBBs(block_begin(), block_end()); - std::sort(LoopBBs.begin(), LoopBBs.end()); - SmallVector switchExitBlocks; for (block_iterator BI = block_begin(), BE = block_end(); BI != BE; ++BI) { @@ -325,7 +364,7 @@ Loop::getUniqueExitBlocks(SmallVectorImpl &ExitBlocks) const { for (succ_iterator I = succ_begin(*BI), E = succ_end(*BI); I != E; ++I) { // If block is inside the loop then it is not a exit block. - if (std::binary_search(LoopBBs.begin(), LoopBBs.end(), *I)) + if (contains(*I)) continue; pred_iterator PI = pred_begin(*I); diff --git a/contrib/llvm/lib/Analysis/LoopPass.cpp b/contrib/llvm/lib/Analysis/LoopPass.cpp index 1540112..acf2ba6 100644 --- a/contrib/llvm/lib/Analysis/LoopPass.cpp +++ b/contrib/llvm/lib/Analysis/LoopPass.cpp @@ -188,6 +188,10 @@ bool LPPassManager::runOnFunction(Function &F) { // advantage in deleting uses in a later loop before optimizing the // definitions in an earlier loop. If we find a clear reason to process in // forward order, then a forward variant of LoopPassManager should be created. + // + // Note that LoopInfo::iterator visits loops in reverse program + // order. Here, reverse_iterator gives us a forward order, and the LoopQueue + // reverses the order a third time by popping from the back. for (LoopInfo::reverse_iterator I = LI->rbegin(), E = LI->rend(); I != E; ++I) addLoopIntoQueue(*I, LQ); diff --git a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp index 9c0d8ac..1db0f63 100644 --- a/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp +++ b/contrib/llvm/lib/Analysis/MemoryBuiltins.cpp @@ -31,12 +31,13 @@ using namespace llvm; enum AllocType { - MallocLike = 1<<0, // allocates - CallocLike = 1<<1, // allocates + bzero - ReallocLike = 1<<2, // reallocates - StrDupLike = 1<<3, + OpNewLike = 1<<0, // allocates; never returns null + MallocLike = 1<<1 | OpNewLike, // allocates; may return null + CallocLike = 1<<2, // allocates + bzero + ReallocLike = 1<<3, // reallocates + StrDupLike = 1<<4, AllocLike = MallocLike | CallocLike | StrDupLike, - AnyAlloc = MallocLike | CallocLike | ReallocLike | StrDupLike + AnyAlloc = AllocLike | ReallocLike }; struct AllocFnsTy { @@ -52,20 +53,20 @@ struct AllocFnsTy { static const AllocFnsTy AllocationFnData[] = { {LibFunc::malloc, MallocLike, 1, 0, -1}, {LibFunc::valloc, MallocLike, 1, 0, -1}, - {LibFunc::Znwj, MallocLike, 1, 0, -1}, // new(unsigned int) + {LibFunc::Znwj, OpNewLike, 1, 0, -1}, // new(unsigned int) {LibFunc::ZnwjRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) - {LibFunc::Znwm, MallocLike, 1, 0, -1}, // new(unsigned long) + {LibFunc::Znwm, OpNewLike, 1, 0, -1}, // new(unsigned long) {LibFunc::ZnwmRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new(unsigned long, nothrow) - {LibFunc::Znaj, MallocLike, 1, 0, -1}, // new[](unsigned int) + {LibFunc::Znaj, OpNewLike, 1, 0, -1}, // new[](unsigned int) {LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) - {LibFunc::Znam, MallocLike, 1, 0, -1}, // new[](unsigned long) + {LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long) {LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) - {LibFunc::posix_memalign, MallocLike, 3, 2, -1}, {LibFunc::calloc, CallocLike, 2, 0, 1}, {LibFunc::realloc, ReallocLike, 2, 1, -1}, {LibFunc::reallocf, ReallocLike, 2, 1, -1}, {LibFunc::strdup, StrDupLike, 1, -1, -1}, {LibFunc::strndup, StrDupLike, 2, 1, -1} + // TODO: Handle "int posix_memalign(void **, size_t, size_t)" }; @@ -77,6 +78,9 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { if (!CS.getInstruction()) return 0; + if (CS.isNoBuiltin()) + return 0; + Function *Callee = CS.getCalledFunction(); if (!Callee || !Callee->isDeclaration()) return 0; @@ -114,7 +118,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, return 0; const AllocFnsTy *FnData = &AllocationFnData[i]; - if ((FnData->AllocTy & AllocTy) == 0) + if ((FnData->AllocTy & AllocTy) != FnData->AllocTy) return 0; // Check function prototype. @@ -186,6 +190,13 @@ bool llvm::isReallocLikeFn(const Value *V, const TargetLibraryInfo *TLI, return getAllocationData(V, ReallocLike, TLI, LookThroughBitCast); } +/// \brief Tests if a value is a call or invoke to a library function that +/// allocates memory and never returns null (such as operator new). +bool llvm::isOperatorNewLikeFn(const Value *V, const TargetLibraryInfo *TLI, + bool LookThroughBitCast) { + return getAllocationData(V, OpNewLike, TLI, LookThroughBitCast); +} + /// extractMallocCall - Returns the corresponding CallInst if the instruction /// is a malloc call. Since CallInst::CreateMalloc() only creates calls, we /// ignore InvokeInst here. @@ -194,7 +205,7 @@ const CallInst *llvm::extractMallocCall(const Value *I, return isMallocLikeFn(I, TLI) ? dyn_cast(I) : 0; } -static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, +static Value *computeArraySize(const CallInst *CI, const DataLayout *DL, const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) @@ -202,12 +213,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); - if (!T || !T->isSized() || !TD) + if (!T || !T->isSized() || !DL) return 0; - unsigned ElementSize = TD->getTypeAllocSize(T); + unsigned ElementSize = DL->getTypeAllocSize(T); if (StructType *ST = dyn_cast(T)) - ElementSize = TD->getStructLayout(ST)->getSizeInBytes(); + ElementSize = DL->getStructLayout(ST)->getSizeInBytes(); // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. @@ -224,10 +235,10 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. const CallInst *llvm::isArrayMalloc(const Value *I, - const DataLayout *TD, + const DataLayout *DL, const TargetLibraryInfo *TLI) { const CallInst *CI = extractMallocCall(I, TLI); - Value *ArraySize = computeArraySize(CI, TD, TLI); + Value *ArraySize = computeArraySize(CI, DL, TLI); if (ConstantInt *ConstSize = dyn_cast_or_null(ArraySize)) if (ConstSize->isOne()) @@ -285,11 +296,11 @@ Type *llvm::getMallocAllocatedType(const CallInst *CI, /// then return that multiple. For non-array mallocs, the multiple is /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be /// determined. -Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *TD, +Value *llvm::getMallocArraySize(CallInst *CI, const DataLayout *DL, const TargetLibraryInfo *TLI, bool LookThroughSExt) { assert(isMallocLikeFn(CI, TLI) && "getMallocArraySize and not malloc call"); - return computeArraySize(CI, TD, TLI, LookThroughSExt); + return computeArraySize(CI, DL, TLI, LookThroughSExt); } @@ -315,9 +326,15 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { if (!TLI || !TLI->getLibFunc(FnName, TLIFn) || !TLI->has(TLIFn)) return 0; - if (TLIFn != LibFunc::free && - TLIFn != LibFunc::ZdlPv && // operator delete(void*) - TLIFn != LibFunc::ZdaPv) // operator delete[](void*) + unsigned ExpectedNumParams; + if (TLIFn == LibFunc::free || + TLIFn == LibFunc::ZdlPv || // operator delete(void*) + TLIFn == LibFunc::ZdaPv) // operator delete[](void*) + ExpectedNumParams = 1; + else if (TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) + TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow) + ExpectedNumParams = 2; + else return 0; // Check free prototype. @@ -326,7 +343,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) return 0; - if (FTy->getNumParams() != 1) + if (FTy->getNumParams() != ExpectedNumParams) return 0; if (FTy->getParamType(0) != Type::getInt8PtrTy(Callee->getContext())) return 0; @@ -345,12 +362,12 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { /// object size in Size if successful, and false otherwise. /// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, /// byval arguments, and global variables. -bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD, +bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *DL, const TargetLibraryInfo *TLI, bool RoundToAlign) { - if (!TD) + if (!DL) return false; - ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign); + ObjectSizeOffsetVisitor Visitor(DL, TLI, Ptr->getContext(), RoundToAlign); SizeOffsetType Data = Visitor.compute(const_cast(Ptr)); if (!Visitor.bothKnown(Data)) return false; @@ -377,12 +394,12 @@ APInt ObjectSizeOffsetVisitor::align(APInt Size, uint64_t Align) { return Size; } -ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, +ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *DL, const TargetLibraryInfo *TLI, LLVMContext &Context, bool RoundToAlign) -: TD(TD), TLI(TLI), RoundToAlign(RoundToAlign) { - IntegerType *IntTy = TD->getIntPtrType(Context); +: DL(DL), TLI(TLI), RoundToAlign(RoundToAlign) { + IntegerType *IntTy = DL->getIntPtrType(Context); IntTyBits = IntTy->getBitWidth(); Zero = APInt::getNullValue(IntTyBits); } @@ -425,7 +442,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitAllocaInst(AllocaInst &I) { if (!I.getAllocatedType()->isSized()) return unknown(); - APInt Size(IntTyBits, TD->getTypeAllocSize(I.getAllocatedType())); + APInt Size(IntTyBits, DL->getTypeAllocSize(I.getAllocatedType())); if (!I.isArrayAllocation()) return std::make_pair(align(Size, I.getAlignment()), Zero); @@ -444,7 +461,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitArgument(Argument &A) { return unknown(); } PointerType *PT = cast(A.getType()); - APInt Size(IntTyBits, TD->getTypeAllocSize(PT->getElementType())); + APInt Size(IntTyBits, DL->getTypeAllocSize(PT->getElementType())); return std::make_pair(align(Size, A.getParamAlignment()), Zero); } @@ -517,7 +534,7 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { SizeOffsetType PtrData = compute(GEP.getPointerOperand()); APInt Offset(IntTyBits, 0); - if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*TD, Offset)) + if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*DL, Offset)) return unknown(); return std::make_pair(PtrData.first, PtrData.second + Offset); @@ -533,7 +550,7 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ if (!GV.hasDefinitiveInitializer()) return unknown(); - APInt Size(IntTyBits, TD->getTypeAllocSize(GV.getType()->getElementType())); + APInt Size(IntTyBits, DL->getTypeAllocSize(GV.getType()->getElementType())); return std::make_pair(align(Size, GV.getAlignment()), Zero); } @@ -569,12 +586,13 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitInstruction(Instruction &I) { return unknown(); } - -ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *TD, - const TargetLibraryInfo *TLI, - LLVMContext &Context) -: TD(TD), TLI(TLI), Context(Context), Builder(Context, TargetFolder(TD)) { - IntTy = TD->getIntPtrType(Context); +ObjectSizeOffsetEvaluator::ObjectSizeOffsetEvaluator(const DataLayout *DL, + const TargetLibraryInfo *TLI, + LLVMContext &Context, + bool RoundToAlign) +: DL(DL), TLI(TLI), Context(Context), Builder(Context, TargetFolder(DL)), + RoundToAlign(RoundToAlign) { + IntTy = DL->getIntPtrType(Context); Zero = ConstantInt::get(IntTy, 0); } @@ -598,7 +616,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute(Value *V) { } SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { - ObjectSizeOffsetVisitor Visitor(TD, TLI, Context); + ObjectSizeOffsetVisitor Visitor(DL, TLI, Context, RoundToAlign); SizeOffsetType Const = Visitor.compute(V); if (Visitor.bothKnown(Const)) return std::make_pair(ConstantInt::get(Context, Const.first), @@ -617,13 +635,15 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { if (Instruction *I = dyn_cast(V)) Builder.SetInsertPoint(I); - // record the pointers that were handled in this run, so that they can be - // cleaned later if something fails - SeenVals.insert(V); - // now compute the size and offset SizeOffsetEvalType Result; - if (GEPOperator *GEP = dyn_cast(V)) { + + // Record the pointers that were handled in this run, so that they can be + // cleaned later if something fails. We also use this set to break cycles that + // can occur in dead code. + if (!SeenVals.insert(V)) { + Result = unknown(); + } else if (GEPOperator *GEP = dyn_cast(V)) { Result = visitGEPOperator(*GEP); } else if (Instruction *I = dyn_cast(V)) { Result = visit(*I); @@ -656,7 +676,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::visitAllocaInst(AllocaInst &I) { assert(I.isArrayAllocation()); Value *ArraySize = I.getArraySize(); Value *Size = ConstantInt::get(ArraySize->getType(), - TD->getTypeAllocSize(I.getAllocatedType())); + DL->getTypeAllocSize(I.getAllocatedType())); Size = Builder.CreateMul(Size, ArraySize); return std::make_pair(Size, Zero); } @@ -708,7 +728,7 @@ ObjectSizeOffsetEvaluator::visitGEPOperator(GEPOperator &GEP) { if (!bothKnown(PtrData)) return unknown(); - Value *Offset = EmitGEPOffset(&Builder, *TD, &GEP, /*NoAssumptions=*/true); + Value *Offset = EmitGEPOffset(&Builder, *DL, &GEP, /*NoAssumptions=*/true); Offset = Builder.CreateAdd(PtrData.second, Offset); return std::make_pair(PtrData.first, Offset); } diff --git a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp index c0009cb..84ff2ee 100644 --- a/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -1,4 +1,4 @@ -//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation --*- C++ -*-===// +//===- MemoryDependenceAnalysis.cpp - Mem Deps Implementation -------------===// // // The LLVM Compiler Infrastructure // @@ -89,7 +89,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) { AA = &getAnalysis(); TD = getAnalysisIfAvailable(); DT = getAnalysisIfAvailable(); - if (PredCache == 0) + if (!PredCache) PredCache.reset(new PredIteratorCache()); return false; } @@ -371,18 +371,19 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { + Instruction *Inst = --ScanIt; + + if (IntrinsicInst *II = dyn_cast(Inst)) + // Debug intrinsics don't (and can't) cause dependencies. + if (isa(II)) continue; + // Limit the amount of scanning we do so we don't end up with quadratic // running time on extreme testcases. --Limit; if (!Limit) return MemDepResult::getUnknown(); - Instruction *Inst = --ScanIt; - if (IntrinsicInst *II = dyn_cast(Inst)) { - // Debug intrinsics don't (and can't) cause dependences. - if (isa(II)) continue; - // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. if (II->getIntrinsicID() == Intrinsic::lifetime_start) { diff --git a/contrib/llvm/lib/Analysis/PathNumbering.cpp b/contrib/llvm/lib/Analysis/PathNumbering.cpp deleted file mode 100644 index 30d213b..0000000 --- a/contrib/llvm/lib/Analysis/PathNumbering.cpp +++ /dev/null @@ -1,521 +0,0 @@ -//===- PathNumbering.cpp --------------------------------------*- C++ -*---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Ball-Larus path numbers uniquely identify paths through a directed acyclic -// graph (DAG) [Ball96]. For a CFG backedges are removed and replaced by phony -// edges to obtain a DAG, and thus the unique path numbers [Ball96]. -// -// The purpose of this analysis is to enumerate the edges in a CFG in order -// to obtain paths from path numbers in a convenient manner. As described in -// [Ball96] edges can be enumerated such that given a path number by following -// the CFG and updating the path number, the path is obtained. -// -// [Ball96] -// T. Ball and J. R. Larus. "Efficient Path Profiling." -// International Symposium on Microarchitecture, pages 46-57, 1996. -// http://portal.acm.org/citation.cfm?id=243857 -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "ball-larus-numbering" - -#include "llvm/Analysis/PathNumbering.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/TypeBuilder.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -#include -#include -#include - -using namespace llvm; - -// Are we enabling early termination -static cl::opt ProcessEarlyTermination( - "path-profile-early-termination", cl::Hidden, - cl::desc("In path profiling, insert extra instrumentation to account for " - "unexpected function termination.")); - -// Returns the basic block for the BallLarusNode -BasicBlock* BallLarusNode::getBlock() { - return(_basicBlock); -} - -// Returns the number of paths to the exit starting at the node. -unsigned BallLarusNode::getNumberPaths() { - return(_numberPaths); -} - -// Sets the number of paths to the exit starting at the node. -void BallLarusNode::setNumberPaths(unsigned numberPaths) { - _numberPaths = numberPaths; -} - -// Gets the NodeColor used in graph algorithms. -BallLarusNode::NodeColor BallLarusNode::getColor() { - return(_color); -} - -// Sets the NodeColor used in graph algorithms. -void BallLarusNode::setColor(BallLarusNode::NodeColor color) { - _color = color; -} - -// Returns an iterator over predecessor edges. Includes phony and -// backedges. -BLEdgeIterator BallLarusNode::predBegin() { - return(_predEdges.begin()); -} - -// Returns the end sentinel for the predecessor iterator. -BLEdgeIterator BallLarusNode::predEnd() { - return(_predEdges.end()); -} - -// Returns the number of predecessor edges. Includes phony and -// backedges. -unsigned BallLarusNode::getNumberPredEdges() { - return(_predEdges.size()); -} - -// Returns an iterator over successor edges. Includes phony and -// backedges. -BLEdgeIterator BallLarusNode::succBegin() { - return(_succEdges.begin()); -} - -// Returns the end sentinel for the successor iterator. -BLEdgeIterator BallLarusNode::succEnd() { - return(_succEdges.end()); -} - -// Returns the number of successor edges. Includes phony and -// backedges. -unsigned BallLarusNode::getNumberSuccEdges() { - return(_succEdges.size()); -} - -// Add an edge to the predecessor list. -void BallLarusNode::addPredEdge(BallLarusEdge* edge) { - _predEdges.push_back(edge); -} - -// Remove an edge from the predecessor list. -void BallLarusNode::removePredEdge(BallLarusEdge* edge) { - removeEdge(_predEdges, edge); -} - -// Add an edge to the successor list. -void BallLarusNode::addSuccEdge(BallLarusEdge* edge) { - _succEdges.push_back(edge); -} - -// Remove an edge from the successor list. -void BallLarusNode::removeSuccEdge(BallLarusEdge* edge) { - removeEdge(_succEdges, edge); -} - -// Returns the name of the BasicBlock being represented. If BasicBlock -// is null then returns "". If BasicBlock has no name, then -// "" is returned. Intended for use with debug output. -std::string BallLarusNode::getName() { - std::stringstream name; - - if(getBlock() != NULL) { - if(getBlock()->hasName()) { - std::string tempName(getBlock()->getName()); - name << tempName.c_str() << " (" << _uid << ")"; - } else - name << " (" << _uid << ")"; - } else - name << " (" << _uid << ")"; - - return name.str(); -} - -// Removes an edge from an edgeVector. Used by removePredEdge and -// removeSuccEdge. -void BallLarusNode::removeEdge(BLEdgeVector& v, BallLarusEdge* e) { - // TODO: Avoid linear scan by using a set instead - for(BLEdgeIterator i = v.begin(), - end = v.end(); - i != end; - ++i) { - if((*i) == e) { - v.erase(i); - break; - } - } -} - -// Returns the source node of this edge. -BallLarusNode* BallLarusEdge::getSource() const { - return(_source); -} - -// Returns the target node of this edge. -BallLarusNode* BallLarusEdge::getTarget() const { - return(_target); -} - -// Sets the type of the edge. -BallLarusEdge::EdgeType BallLarusEdge::getType() const { - return _edgeType; -} - -// Gets the type of the edge. -void BallLarusEdge::setType(EdgeType type) { - _edgeType = type; -} - -// Returns the weight of this edge. Used to decode path numbers to sequences -// of basic blocks. -unsigned BallLarusEdge::getWeight() { - return(_weight); -} - -// Sets the weight of the edge. Used during path numbering. -void BallLarusEdge::setWeight(unsigned weight) { - _weight = weight; -} - -// Gets the phony edge originating at the root. -BallLarusEdge* BallLarusEdge::getPhonyRoot() { - return _phonyRoot; -} - -// Sets the phony edge originating at the root. -void BallLarusEdge::setPhonyRoot(BallLarusEdge* phonyRoot) { - _phonyRoot = phonyRoot; -} - -// Gets the phony edge terminating at the exit. -BallLarusEdge* BallLarusEdge::getPhonyExit() { - return _phonyExit; -} - -// Sets the phony edge terminating at the exit. -void BallLarusEdge::setPhonyExit(BallLarusEdge* phonyExit) { - _phonyExit = phonyExit; -} - -// Gets the associated real edge if this is a phony edge. -BallLarusEdge* BallLarusEdge::getRealEdge() { - return _realEdge; -} - -// Sets the associated real edge if this is a phony edge. -void BallLarusEdge::setRealEdge(BallLarusEdge* realEdge) { - _realEdge = realEdge; -} - -// Returns the duplicate number of the edge. -unsigned BallLarusEdge::getDuplicateNumber() { - return(_duplicateNumber); -} - -// Initialization that requires virtual functions which are not fully -// functional in the constructor. -void BallLarusDag::init() { - BLBlockNodeMap inDag; - std::stack dfsStack; - - _root = addNode(&(_function.getEntryBlock())); - _exit = addNode(NULL); - - // start search from root - dfsStack.push(getRoot()); - - // dfs to add each bb into the dag - while(dfsStack.size()) - buildNode(inDag, dfsStack); - - // put in the final edge - addEdge(getExit(),getRoot(),0); -} - -// Frees all memory associated with the DAG. -BallLarusDag::~BallLarusDag() { - for(BLEdgeIterator edge = _edges.begin(), end = _edges.end(); edge != end; - ++edge) - delete (*edge); - - for(BLNodeIterator node = _nodes.begin(), end = _nodes.end(); node != end; - ++node) - delete (*node); -} - -// Calculate the path numbers by assigning edge increments as prescribed -// in Ball-Larus path profiling. -void BallLarusDag::calculatePathNumbers() { - BallLarusNode* node; - std::queue bfsQueue; - bfsQueue.push(getExit()); - - while(bfsQueue.size() > 0) { - node = bfsQueue.front(); - - DEBUG(dbgs() << "calculatePathNumbers on " << node->getName() << "\n"); - - bfsQueue.pop(); - unsigned prevPathNumber = node->getNumberPaths(); - calculatePathNumbersFrom(node); - - // Check for DAG splitting - if( node->getNumberPaths() > 100000000 && node != getRoot() ) { - // Add new phony edge from the split-node to the DAG's exit - BallLarusEdge* exitEdge = addEdge(node, getExit(), 0); - exitEdge->setType(BallLarusEdge::SPLITEDGE_PHONY); - - // Counters to handle the possibility of a multi-graph - BasicBlock* oldTarget = 0; - unsigned duplicateNumber = 0; - - // Iterate through each successor edge, adding phony edges - for( BLEdgeIterator succ = node->succBegin(), end = node->succEnd(); - succ != end; oldTarget = (*succ)->getTarget()->getBlock(), succ++ ) { - - if( (*succ)->getType() == BallLarusEdge::NORMAL ) { - // is this edge a duplicate? - if( oldTarget != (*succ)->getTarget()->getBlock() ) - duplicateNumber = 0; - - // create the new phony edge: root -> succ - BallLarusEdge* rootEdge = - addEdge(getRoot(), (*succ)->getTarget(), duplicateNumber++); - rootEdge->setType(BallLarusEdge::SPLITEDGE_PHONY); - rootEdge->setRealEdge(*succ); - - // split on this edge and reference it's exit/root phony edges - (*succ)->setType(BallLarusEdge::SPLITEDGE); - (*succ)->setPhonyRoot(rootEdge); - (*succ)->setPhonyExit(exitEdge); - (*succ)->setWeight(0); - } - } - - calculatePathNumbersFrom(node); - } - - DEBUG(dbgs() << "prev, new number paths " << prevPathNumber << ", " - << node->getNumberPaths() << ".\n"); - - if(prevPathNumber == 0 && node->getNumberPaths() != 0) { - DEBUG(dbgs() << "node ready : " << node->getName() << "\n"); - for(BLEdgeIterator pred = node->predBegin(), end = node->predEnd(); - pred != end; pred++) { - if( (*pred)->getType() == BallLarusEdge::BACKEDGE || - (*pred)->getType() == BallLarusEdge::SPLITEDGE ) - continue; - - BallLarusNode* nextNode = (*pred)->getSource(); - // not yet visited? - if(nextNode->getNumberPaths() == 0) - bfsQueue.push(nextNode); - } - } - } - - DEBUG(dbgs() << "\tNumber of paths: " << getRoot()->getNumberPaths() << "\n"); -} - -// Returns the number of paths for the Dag. -unsigned BallLarusDag::getNumberOfPaths() { - return(getRoot()->getNumberPaths()); -} - -// Returns the root (i.e. entry) node for the DAG. -BallLarusNode* BallLarusDag::getRoot() { - return _root; -} - -// Returns the exit node for the DAG. -BallLarusNode* BallLarusDag::getExit() { - return _exit; -} - -// Returns the function for the DAG. -Function& BallLarusDag::getFunction() { - return(_function); -} - -// Clears the node colors. -void BallLarusDag::clearColors(BallLarusNode::NodeColor color) { - for (BLNodeIterator nodeIt = _nodes.begin(); nodeIt != _nodes.end(); nodeIt++) - (*nodeIt)->setColor(color); -} - -// Processes one node and its imediate edges for building the DAG. -void BallLarusDag::buildNode(BLBlockNodeMap& inDag, BLNodeStack& dfsStack) { - BallLarusNode* currentNode = dfsStack.top(); - BasicBlock* currentBlock = currentNode->getBlock(); - - if(currentNode->getColor() != BallLarusNode::WHITE) { - // we have already visited this node - dfsStack.pop(); - currentNode->setColor(BallLarusNode::BLACK); - } else { - // are there any external procedure calls? - if( ProcessEarlyTermination ) { - for( BasicBlock::iterator bbCurrent = currentNode->getBlock()->begin(), - bbEnd = currentNode->getBlock()->end(); bbCurrent != bbEnd; - bbCurrent++ ) { - Instruction& instr = *bbCurrent; - if( instr.getOpcode() == Instruction::Call ) { - BallLarusEdge* callEdge = addEdge(currentNode, getExit(), 0); - callEdge->setType(BallLarusEdge::CALLEDGE_PHONY); - break; - } - } - } - - TerminatorInst* terminator = currentNode->getBlock()->getTerminator(); - if(isa(terminator) || isa(terminator) || - isa(terminator)) - addEdge(currentNode, getExit(),0); - - currentNode->setColor(BallLarusNode::GRAY); - inDag[currentBlock] = currentNode; - - BasicBlock* oldSuccessor = 0; - unsigned duplicateNumber = 0; - - // iterate through this node's successors - for(succ_iterator successor = succ_begin(currentBlock), - succEnd = succ_end(currentBlock); successor != succEnd; - oldSuccessor = *successor, ++successor ) { - BasicBlock* succBB = *successor; - - // is this edge a duplicate? - if (oldSuccessor == succBB) - duplicateNumber++; - else - duplicateNumber = 0; - - buildEdge(inDag, dfsStack, currentNode, succBB, duplicateNumber); - } - } -} - -// Process an edge in the CFG for DAG building. -void BallLarusDag::buildEdge(BLBlockNodeMap& inDag, std::stack& - dfsStack, BallLarusNode* currentNode, - BasicBlock* succBB, unsigned duplicateCount) { - BallLarusNode* succNode = inDag[succBB]; - - if(succNode && succNode->getColor() == BallLarusNode::BLACK) { - // visited node and forward edge - addEdge(currentNode, succNode, duplicateCount); - } else if(succNode && succNode->getColor() == BallLarusNode::GRAY) { - // visited node and back edge - DEBUG(dbgs() << "Backedge detected.\n"); - addBackedge(currentNode, succNode, duplicateCount); - } else { - BallLarusNode* childNode; - // not visited node and forward edge - if(succNode) // an unvisited node that is child of a gray node - childNode = succNode; - else { // an unvisited node that is a child of a an unvisted node - childNode = addNode(succBB); - inDag[succBB] = childNode; - } - addEdge(currentNode, childNode, duplicateCount); - dfsStack.push(childNode); - } -} - -// The weight on each edge is the increment required along any path that -// contains that edge. -void BallLarusDag::calculatePathNumbersFrom(BallLarusNode* node) { - if(node == getExit()) - // The Exit node must be base case - node->setNumberPaths(1); - else { - unsigned sumPaths = 0; - BallLarusNode* succNode; - - for(BLEdgeIterator succ = node->succBegin(), end = node->succEnd(); - succ != end; succ++) { - if( (*succ)->getType() == BallLarusEdge::BACKEDGE || - (*succ)->getType() == BallLarusEdge::SPLITEDGE ) - continue; - - (*succ)->setWeight(sumPaths); - succNode = (*succ)->getTarget(); - - if( !succNode->getNumberPaths() ) - return; - sumPaths += succNode->getNumberPaths(); - } - - node->setNumberPaths(sumPaths); - } -} - -// Allows subclasses to determine which type of Node is created. -// Override this method to produce subclasses of BallLarusNode if -// necessary. The destructor of BallLarusDag will call free on each -// pointer created. -BallLarusNode* BallLarusDag::createNode(BasicBlock* BB) { - return( new BallLarusNode(BB) ); -} - -// Allows subclasses to determine which type of Edge is created. -// Override this method to produce subclasses of BallLarusEdge if -// necessary. The destructor of BallLarusDag will call free on each -// pointer created. -BallLarusEdge* BallLarusDag::createEdge(BallLarusNode* source, - BallLarusNode* target, - unsigned duplicateCount) { - return( new BallLarusEdge(source, target, duplicateCount) ); -} - -// Proxy to node's constructor. Updates the DAG state. -BallLarusNode* BallLarusDag::addNode(BasicBlock* BB) { - BallLarusNode* newNode = createNode(BB); - _nodes.push_back(newNode); - return( newNode ); -} - -// Proxy to edge's constructor. Updates the DAG state. -BallLarusEdge* BallLarusDag::addEdge(BallLarusNode* source, - BallLarusNode* target, - unsigned duplicateCount) { - BallLarusEdge* newEdge = createEdge(source, target, duplicateCount); - _edges.push_back(newEdge); - source->addSuccEdge(newEdge); - target->addPredEdge(newEdge); - return(newEdge); -} - -// Adds a backedge with its phony edges. Updates the DAG state. -void BallLarusDag::addBackedge(BallLarusNode* source, BallLarusNode* target, - unsigned duplicateCount) { - BallLarusEdge* childEdge = addEdge(source, target, duplicateCount); - childEdge->setType(BallLarusEdge::BACKEDGE); - - childEdge->setPhonyRoot(addEdge(getRoot(), target,0)); - childEdge->setPhonyExit(addEdge(source, getExit(),0)); - - childEdge->getPhonyRoot()->setRealEdge(childEdge); - childEdge->getPhonyRoot()->setType(BallLarusEdge::BACKEDGE_PHONY); - - childEdge->getPhonyExit()->setRealEdge(childEdge); - childEdge->getPhonyExit()->setType(BallLarusEdge::BACKEDGE_PHONY); - _backEdges.push_back(childEdge); -} diff --git a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp b/contrib/llvm/lib/Analysis/PathProfileInfo.cpp deleted file mode 100644 index bc53221..0000000 --- a/contrib/llvm/lib/Analysis/PathProfileInfo.cpp +++ /dev/null @@ -1,433 +0,0 @@ -//===- PathProfileInfo.cpp ------------------------------------*- C++ -*---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface used by optimizers to load path profiles, -// and provides a loader pass which reads a path profile file. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "path-profile-info" - -#include "llvm/Analysis/PathProfileInfo.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include - -using namespace llvm; - -// command line option for loading path profiles -static cl::opt -PathProfileInfoFilename("path-profile-loader-file", cl::init("llvmprof.out"), - cl::value_desc("filename"), - cl::desc("Path profile file loaded by -path-profile-loader"), cl::Hidden); - -namespace { - class PathProfileLoaderPass : public ModulePass, public PathProfileInfo { - public: - PathProfileLoaderPass() : ModulePass(ID) { } - ~PathProfileLoaderPass(); - - // this pass doesn't change anything (only loads information) - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - - // the full name of the loader pass - virtual const char* getPassName() const { - return "Path Profiling Information Loader"; - } - - // required since this pass implements multiple inheritance - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &PathProfileInfo::ID) - return (PathProfileInfo*)this; - return this; - } - - // entry point to run the pass - bool runOnModule(Module &M); - - // pass identification - static char ID; - - private: - // make a reference table to refer to function by number - void buildFunctionRefs(Module &M); - - // process argument info of a program from the input file - void handleArgumentInfo(); - - // process path number information from the input file - void handlePathInfo(); - - // array of references to the functions in the module - std::vector _functions; - - // path profile file handle - FILE* _file; - - // path profile file name - std::string _filename; - }; -} - -// register PathLoader -char PathProfileLoaderPass::ID = 0; - -INITIALIZE_ANALYSIS_GROUP(PathProfileInfo, "Path Profile Information", - NoPathProfileInfo) -INITIALIZE_AG_PASS(PathProfileLoaderPass, PathProfileInfo, - "path-profile-loader", - "Load path profile information from file", - false, true, false) - -char &llvm::PathProfileLoaderPassID = PathProfileLoaderPass::ID; - -// link PathLoader as a pass, and make it available as an optimisation -ModulePass *llvm::createPathProfileLoaderPass() { - return new PathProfileLoaderPass; -} - -// ---------------------------------------------------------------------------- -// PathEdge implementation -// -ProfilePathEdge::ProfilePathEdge (BasicBlock* source, BasicBlock* target, - unsigned duplicateNumber) - : _source(source), _target(target), _duplicateNumber(duplicateNumber) {} - -// ---------------------------------------------------------------------------- -// Path implementation -// - -ProfilePath::ProfilePath (unsigned int number, unsigned int count, - double countStdDev, PathProfileInfo* ppi) - : _number(number) , _count(count), _countStdDev(countStdDev), _ppi(ppi) {} - -double ProfilePath::getFrequency() const { - return 100 * double(_count) / - double(_ppi->_functionPathCounts[_ppi->_currentFunction]); -} - -static BallLarusEdge* getNextEdge (BallLarusNode* node, - unsigned int pathNumber) { - BallLarusEdge* best = 0; - - for( BLEdgeIterator next = node->succBegin(), - end = node->succEnd(); next != end; next++ ) { - if( (*next)->getType() != BallLarusEdge::BACKEDGE && // no backedges - (*next)->getType() != BallLarusEdge::SPLITEDGE && // no split edges - (*next)->getWeight() <= pathNumber && // weight must be <= pathNumber - (!best || (best->getWeight() < (*next)->getWeight())) ) // best one? - best = *next; - } - - return best; -} - -ProfilePathEdgeVector* ProfilePath::getPathEdges() const { - BallLarusNode* currentNode = _ppi->_currentDag->getRoot (); - unsigned int increment = _number; - ProfilePathEdgeVector* pev = new ProfilePathEdgeVector; - - while (currentNode != _ppi->_currentDag->getExit()) { - BallLarusEdge* next = getNextEdge(currentNode, increment); - - increment -= next->getWeight(); - - if( next->getType() != BallLarusEdge::BACKEDGE_PHONY && - next->getType() != BallLarusEdge::SPLITEDGE_PHONY && - next->getTarget() != _ppi->_currentDag->getExit() ) - pev->push_back(ProfilePathEdge( - next->getSource()->getBlock(), - next->getTarget()->getBlock(), - next->getDuplicateNumber())); - - if( next->getType() == BallLarusEdge::BACKEDGE_PHONY && - next->getTarget() == _ppi->_currentDag->getExit() ) - pev->push_back(ProfilePathEdge( - next->getRealEdge()->getSource()->getBlock(), - next->getRealEdge()->getTarget()->getBlock(), - next->getDuplicateNumber())); - - if( next->getType() == BallLarusEdge::SPLITEDGE_PHONY && - next->getSource() == _ppi->_currentDag->getRoot() ) - pev->push_back(ProfilePathEdge( - next->getRealEdge()->getSource()->getBlock(), - next->getRealEdge()->getTarget()->getBlock(), - next->getDuplicateNumber())); - - // set the new node - currentNode = next->getTarget(); - } - - return pev; -} - -ProfilePathBlockVector* ProfilePath::getPathBlocks() const { - BallLarusNode* currentNode = _ppi->_currentDag->getRoot (); - unsigned int increment = _number; - ProfilePathBlockVector* pbv = new ProfilePathBlockVector; - - while (currentNode != _ppi->_currentDag->getExit()) { - BallLarusEdge* next = getNextEdge(currentNode, increment); - increment -= next->getWeight(); - - // add block to the block list if it is a real edge - if( next->getType() == BallLarusEdge::NORMAL) - pbv->push_back (currentNode->getBlock()); - // make the back edge the last edge since we are at the end - else if( next->getTarget() == _ppi->_currentDag->getExit() ) { - pbv->push_back (currentNode->getBlock()); - pbv->push_back (next->getRealEdge()->getTarget()->getBlock()); - } - - // set the new node - currentNode = next->getTarget(); - } - - return pbv; -} - -BasicBlock* ProfilePath::getFirstBlockInPath() const { - BallLarusNode* root = _ppi->_currentDag->getRoot(); - BallLarusEdge* edge = getNextEdge(root, _number); - - if( edge && (edge->getType() == BallLarusEdge::BACKEDGE_PHONY || - edge->getType() == BallLarusEdge::SPLITEDGE_PHONY) ) - return edge->getTarget()->getBlock(); - - return root->getBlock(); -} - -// ---------------------------------------------------------------------------- -// PathProfileInfo implementation -// - -// Pass identification -char llvm::PathProfileInfo::ID = 0; - -PathProfileInfo::PathProfileInfo () : _currentDag(0) , _currentFunction(0) { -} - -PathProfileInfo::~PathProfileInfo() { - if (_currentDag) - delete _currentDag; -} - -// set the function for which paths are currently begin processed -void PathProfileInfo::setCurrentFunction(Function* F) { - // Make sure it exists - if (!F) return; - - if (_currentDag) - delete _currentDag; - - _currentFunction = F; - _currentDag = new BallLarusDag(*F); - _currentDag->init(); - _currentDag->calculatePathNumbers(); -} - -// get the function for which paths are currently being processed -Function* PathProfileInfo::getCurrentFunction() const { - return _currentFunction; -} - -// get the entry block of the function -BasicBlock* PathProfileInfo::getCurrentFunctionEntry() { - return _currentDag->getRoot()->getBlock(); -} - -// return the path based on its number -ProfilePath* PathProfileInfo::getPath(unsigned int number) { - return _functionPaths[_currentFunction][number]; -} - -// return the number of paths which a function may potentially execute -unsigned int PathProfileInfo::getPotentialPathCount() { - return _currentDag ? _currentDag->getNumberOfPaths() : 0; -} - -// return an iterator for the beginning of a functions executed paths -ProfilePathIterator PathProfileInfo::pathBegin() { - return _functionPaths[_currentFunction].begin(); -} - -// return an iterator for the end of a functions executed paths -ProfilePathIterator PathProfileInfo::pathEnd() { - return _functionPaths[_currentFunction].end(); -} - -// returns the total number of paths run in the function -unsigned int PathProfileInfo::pathsRun() { - return _currentFunction ? _functionPaths[_currentFunction].size() : 0; -} - -// ---------------------------------------------------------------------------- -// PathLoader implementation -// - -// remove all generated paths -PathProfileLoaderPass::~PathProfileLoaderPass() { - for( FunctionPathIterator funcNext = _functionPaths.begin(), - funcEnd = _functionPaths.end(); funcNext != funcEnd; funcNext++) - for( ProfilePathIterator pathNext = funcNext->second.begin(), - pathEnd = funcNext->second.end(); pathNext != pathEnd; pathNext++) - delete pathNext->second; -} - -// entry point of the pass; this loads and parses a file -bool PathProfileLoaderPass::runOnModule(Module &M) { - // get the filename and setup the module's function references - _filename = PathProfileInfoFilename; - buildFunctionRefs (M); - - if (!(_file = fopen(_filename.c_str(), "rb"))) { - errs () << "error: input '" << _filename << "' file does not exist.\n"; - return false; - } - - ProfilingType profType; - - while( fread(&profType, sizeof(ProfilingType), 1, _file) ) { - switch (profType) { - case ArgumentInfo: - handleArgumentInfo (); - break; - case PathInfo: - handlePathInfo (); - break; - default: - errs () << "error: bad path profiling file syntax, " << profType << "\n"; - fclose (_file); - return false; - } - } - - fclose (_file); - - return true; -} - -// create a reference table for functions defined in the path profile file -void PathProfileLoaderPass::buildFunctionRefs (Module &M) { - _functions.push_back(0); // make the 0 index a null pointer - - for (Module::iterator F = M.begin(), E = M.end(); F != E; F++) { - if (F->isDeclaration()) - continue; - _functions.push_back(F); - } -} - -// handle command like argument infor in the output file -void PathProfileLoaderPass::handleArgumentInfo() { - // get the argument list's length - unsigned savedArgsLength; - if( fread(&savedArgsLength, sizeof(unsigned), 1, _file) != 1 ) { - errs() << "warning: argument info header/data mismatch\n"; - return; - } - - // allocate a buffer, and get the arguments - char* args = new char[savedArgsLength+1]; - if( fread(args, 1, savedArgsLength, _file) != savedArgsLength ) - errs() << "warning: argument info header/data mismatch\n"; - - args[savedArgsLength] = '\0'; - argList = std::string(args); - delete [] args; // cleanup dynamic string - - // byte alignment - if (savedArgsLength & 3) - fseek(_file, 4-(savedArgsLength&3), SEEK_CUR); -} - -// Handle path profile information in the output file -void PathProfileLoaderPass::handlePathInfo () { - // get the number of functions in this profile - unsigned functionCount; - if( fread(&functionCount, sizeof(functionCount), 1, _file) != 1 ) { - errs() << "warning: path info header/data mismatch\n"; - return; - } - - // gather path information for each function - for (unsigned i = 0; i < functionCount; i++) { - PathProfileHeader pathHeader; - if( fread(&pathHeader, sizeof(pathHeader), 1, _file) != 1 ) { - errs() << "warning: bad header for path function info\n"; - break; - } - - Function* f = _functions[pathHeader.fnNumber]; - - // dynamically allocate a table to store path numbers - PathProfileTableEntry* pathTable = - new PathProfileTableEntry[pathHeader.numEntries]; - - if( fread(pathTable, sizeof(PathProfileTableEntry), - pathHeader.numEntries, _file) != pathHeader.numEntries) { - delete [] pathTable; - errs() << "warning: path function info header/data mismatch\n"; - return; - } - - // Build a new path for the current function - unsigned int totalPaths = 0; - for (unsigned int j = 0; j < pathHeader.numEntries; j++) { - totalPaths += pathTable[j].pathCounter; - _functionPaths[f][pathTable[j].pathNumber] - = new ProfilePath(pathTable[j].pathNumber, pathTable[j].pathCounter, - 0, this); - } - - _functionPathCounts[f] = totalPaths; - - delete [] pathTable; - } -} - -//===----------------------------------------------------------------------===// -// NoProfile PathProfileInfo implementation -// - -namespace { - struct NoPathProfileInfo : public ImmutablePass, public PathProfileInfo { - static char ID; // Class identification, replacement for typeinfo - NoPathProfileInfo() : ImmutablePass(ID) { - initializeNoPathProfileInfoPass(*PassRegistry::getPassRegistry()); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &PathProfileInfo::ID) - return (PathProfileInfo*)this; - return this; - } - - virtual const char *getPassName() const { - return "NoPathProfileInfo"; - } - }; -} // End of anonymous namespace - -char NoPathProfileInfo::ID = 0; -// Register this pass... -INITIALIZE_AG_PASS(NoPathProfileInfo, PathProfileInfo, "no-path-profile", - "No Path Profile Information", false, true, true) - -ImmutablePass *llvm::createNoPathProfileInfoPass() { return new NoPathProfileInfo(); } diff --git a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp b/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp deleted file mode 100644 index 48d7d05..0000000 --- a/contrib/llvm/lib/Analysis/PathProfileVerifier.cpp +++ /dev/null @@ -1,206 +0,0 @@ -//===- PathProfileVerifier.cpp --------------------------------*- C++ -*---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This verifier derives an edge profile file from current path profile -// information -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "path-profile-verifier" - -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/PathProfileInfo.h" -#include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include - -using namespace llvm; - -namespace { - class PathProfileVerifier : public ModulePass { - private: - bool runOnModule(Module &M); - - public: - static char ID; // Pass identification, replacement for typeid - PathProfileVerifier() : ModulePass(ID) { - initializePathProfileVerifierPass(*PassRegistry::getPassRegistry()); - } - - - virtual const char *getPassName() const { - return "Path Profiler Verifier"; - } - - // The verifier requires the path profile and edge profile. - virtual void getAnalysisUsage(AnalysisUsage& AU) const; - }; -} - -static cl::opt -EdgeProfileFilename("path-profile-verifier-file", - cl::init("edgefrompath.llvmprof.out"), - cl::value_desc("filename"), - cl::desc("Edge profile file generated by -path-profile-verifier"), - cl::Hidden); - -char PathProfileVerifier::ID = 0; -INITIALIZE_PASS(PathProfileVerifier, "path-profile-verifier", - "Compare the path profile derived edge profile against the " - "edge profile.", true, true) - -ModulePass *llvm::createPathProfileVerifierPass() { - return new PathProfileVerifier(); -} - -// The verifier requires the path profile and edge profile. -void PathProfileVerifier::getAnalysisUsage(AnalysisUsage& AU) const { - AU.addRequired(); - AU.addPreserved(); -} - -typedef std::map DuplicateToIndexMap; -typedef std::map BlockToDuplicateMap; -typedef std::map NestedBlockToIndexMap; - -// the verifier iterates through each path to gather the total -// number of edge frequencies -bool PathProfileVerifier::runOnModule (Module &M) { - PathProfileInfo& pathProfileInfo = getAnalysis(); - - // setup a data structure to map path edges which index an - // array of edge counters - NestedBlockToIndexMap arrayMap; - unsigned i = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - - arrayMap[(BasicBlock*)0][F->begin()][0] = i++; - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - - unsigned duplicate = 0; - BasicBlock* prev = 0; - for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; - prev = TI->getSuccessor(s), ++s) { - if (prev == TI->getSuccessor(s)) - duplicate++; - else duplicate = 0; - - arrayMap[BB][TI->getSuccessor(s)][duplicate] = i++; - } - } - } - - std::vector edgeArray(i); - - // iterate through each path and increment the edge counters as needed - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - - pathProfileInfo.setCurrentFunction(F); - - DEBUG(dbgs() << "function '" << F->getName() << "' ran " - << pathProfileInfo.pathsRun() - << "/" << pathProfileInfo.getPotentialPathCount() - << " potential paths\n"); - - for( ProfilePathIterator nextPath = pathProfileInfo.pathBegin(), - endPath = pathProfileInfo.pathEnd(); - nextPath != endPath; nextPath++ ) { - ProfilePath* currentPath = nextPath->second; - - ProfilePathEdgeVector* pev = currentPath->getPathEdges(); - DEBUG(dbgs () << "path #" << currentPath->getNumber() << ": " - << currentPath->getCount() << "\n"); - // setup the entry edge (normally path profiling doesn't care about this) - if (currentPath->getFirstBlockInPath() == &F->getEntryBlock()) - edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]] - += currentPath->getCount(); - - for( ProfilePathEdgeIterator nextEdge = pev->begin(), - endEdge = pev->end(); nextEdge != endEdge; nextEdge++ ) { - if (nextEdge != pev->begin()) - DEBUG(dbgs() << " :: "); - - BasicBlock* source = nextEdge->getSource(); - BasicBlock* target = nextEdge->getTarget(); - unsigned duplicateNumber = nextEdge->getDuplicateNumber(); - DEBUG(dbgs() << source->getName() << " --{" << duplicateNumber - << "}--> " << target->getName()); - - // Ensure all the referenced edges exist - // TODO: make this a separate function - if( !arrayMap.count(source) ) { - errs() << " error [" << F->getName() << "()]: source '" - << source->getName() - << "' does not exist in the array map.\n"; - } else if( !arrayMap[source].count(target) ) { - errs() << " error [" << F->getName() << "()]: target '" - << target->getName() - << "' does not exist in the array map.\n"; - } else if( !arrayMap[source][target].count(duplicateNumber) ) { - errs() << " error [" << F->getName() << "()]: edge " - << source->getName() << " -> " << target->getName() - << " duplicate number " << duplicateNumber - << " does not exist in the array map.\n"; - } else { - edgeArray[arrayMap[source][target][duplicateNumber]] - += currentPath->getCount(); - } - } - - DEBUG(errs() << "\n"); - - delete pev; - } - } - - std::string errorInfo; - std::string filename = EdgeProfileFilename; - - // Open a handle to the file - FILE* edgeFile = fopen(filename.c_str(),"wb"); - - if (!edgeFile) { - errs() << "error: unable to open file '" << filename << "' for output.\n"; - return false; - } - - errs() << "Generating edge profile '" << filename << "' ...\n"; - - // write argument info - unsigned type = ArgumentInfo; - unsigned num = pathProfileInfo.argList.size(); - int zeros = 0; - - fwrite(&type,sizeof(unsigned),1,edgeFile); - fwrite(&num,sizeof(unsigned),1,edgeFile); - fwrite(pathProfileInfo.argList.c_str(),1,num,edgeFile); - if (num&3) - fwrite(&zeros, 1, 4-(num&3), edgeFile); - - type = EdgeInfo; - num = edgeArray.size(); - fwrite(&type,sizeof(unsigned),1,edgeFile); - fwrite(&num,sizeof(unsigned),1,edgeFile); - - // write each edge to the file - for( std::vector::iterator s = edgeArray.begin(), - e = edgeArray.end(); s != e; s++) - fwrite(&*s, sizeof (unsigned), 1, edgeFile); - - fclose (edgeFile); - - return true; -} diff --git a/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp b/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp deleted file mode 100644 index d7f444b..0000000 --- a/contrib/llvm/lib/Analysis/ProfileDataLoader.cpp +++ /dev/null @@ -1,155 +0,0 @@ -//===- ProfileDataLoader.cpp - Load profile information from disk ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The ProfileDataLoader class is used to load raw profiling data from the dump -// file. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/ProfileDataLoader.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/Analysis/ProfileDataTypes.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/Support/system_error.h" -#include -#include -using namespace llvm; - -raw_ostream &llvm::operator<<(raw_ostream &O, std::pair E) { - O << "("; - - if (E.first) - O << E.first->getName(); - else - O << "0"; - - O << ","; - - if (E.second) - O << E.second->getName(); - else - O << "0"; - - return O << ")"; -} - -/// AddCounts - Add 'A' and 'B', accounting for the fact that the value of one -/// (or both) may not be defined. -static unsigned AddCounts(unsigned A, unsigned B) { - // If either value is undefined, use the other. - // Undefined + undefined = undefined. - if (A == ProfileDataLoader::Uncounted) return B; - if (B == ProfileDataLoader::Uncounted) return A; - - return A + B; -} - -/// ReadProfilingData - Load 'NumEntries' items of type 'T' from file 'F' -template -static void ReadProfilingData(const char *ToolName, FILE *F, - T *Data, size_t NumEntries) { - // Read in the block of data... - if (fread(Data, sizeof(T), NumEntries, F) != NumEntries) - report_fatal_error(Twine(ToolName) + ": Profiling data truncated"); -} - -/// ReadProfilingNumEntries - Read how many entries are in this profiling data -/// packet. -static unsigned ReadProfilingNumEntries(const char *ToolName, FILE *F, - bool ShouldByteSwap) { - unsigned Entry; - ReadProfilingData(ToolName, F, &Entry, 1); - return ShouldByteSwap ? ByteSwap_32(Entry) : Entry; -} - -/// ReadProfilingBlock - Read the number of entries in the next profiling data -/// packet and then accumulate the entries into 'Data'. -static void ReadProfilingBlock(const char *ToolName, FILE *F, - bool ShouldByteSwap, - SmallVector &Data) { - // Read the number of entries... - unsigned NumEntries = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); - - // Read in the data. - SmallVector TempSpace(NumEntries); - ReadProfilingData(ToolName, F, TempSpace.data(), NumEntries); - - // Make sure we have enough space ... - if (Data.size() < NumEntries) - Data.resize(NumEntries, ProfileDataLoader::Uncounted); - - // Accumulate the data we just read into the existing data. - for (unsigned i = 0; i < NumEntries; ++i) { - unsigned Entry = ShouldByteSwap ? ByteSwap_32(TempSpace[i]) : TempSpace[i]; - Data[i] = AddCounts(Entry, Data[i]); - } -} - -/// ReadProfilingArgBlock - Read the command line arguments that the progam was -/// run with when the current profiling data packet(s) were generated. -static void ReadProfilingArgBlock(const char *ToolName, FILE *F, - bool ShouldByteSwap, - SmallVector &CommandLines) { - // Read the number of bytes ... - unsigned ArgLength = ReadProfilingNumEntries(ToolName, F, ShouldByteSwap); - - // Read in the arguments (if there are any to read). Round up the length to - // the nearest 4-byte multiple. - SmallVector Args(ArgLength+4); - if (ArgLength) - ReadProfilingData(ToolName, F, Args.data(), (ArgLength+3) & ~3); - - // Store the arguments. - CommandLines.push_back(std::string(&Args[0], &Args[ArgLength])); -} - -const unsigned ProfileDataLoader::Uncounted = ~0U; - -/// ProfileDataLoader ctor - Read the specified profiling data file, reporting -/// a fatal error if the file is invalid or broken. -ProfileDataLoader::ProfileDataLoader(const char *ToolName, - const std::string &Filename) - : Filename(Filename) { - FILE *F = fopen(Filename.c_str(), "rb"); - if (F == 0) - report_fatal_error(Twine(ToolName) + ": Error opening '" + - Filename + "': "); - - // Keep reading packets until we run out of them. - unsigned PacketType; - while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { - // If the low eight bits of the packet are zero, we must be dealing with an - // endianness mismatch. Byteswap all words read from the profiling - // information. This can happen when the compiler host and target have - // different endianness. - bool ShouldByteSwap = (char)PacketType == 0; - PacketType = ShouldByteSwap ? ByteSwap_32(PacketType) : PacketType; - - switch (PacketType) { - case ArgumentInfo: - ReadProfilingArgBlock(ToolName, F, ShouldByteSwap, CommandLines); - break; - - case EdgeInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); - break; - - default: - report_fatal_error(std::string(ToolName) - + ": Unknown profiling packet type"); - break; - } - } - - fclose(F); -} diff --git a/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp deleted file mode 100644 index 2ee0093..0000000 --- a/contrib/llvm/lib/Analysis/ProfileDataLoaderPass.cpp +++ /dev/null @@ -1,188 +0,0 @@ -//===- ProfileDataLoaderPass.cpp - Set branch weight metadata from prof ---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass loads profiling data from a dump file and sets branch weight -// metadata. -// -// TODO: Replace all "profile-metadata-loader" strings with "profile-loader" -// once ProfileInfo etc. has been removed. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "profile-metadata-loader" -#include "llvm/Analysis/Passes.h" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileDataLoader.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/MDBuilder.h" -#include "llvm/IR/Metadata.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -STATISTIC(NumEdgesRead, "The # of edges read."); -STATISTIC(NumTermsAnnotated, "The # of terminator instructions annotated."); - -static cl::opt -ProfileMetadataFilename("profile-file", cl::init("llvmprof.out"), - cl::value_desc("filename"), - cl::desc("Profile file loaded by -profile-metadata-loader")); - -namespace { - /// This pass loads profiling data from a dump file and sets branch weight - /// metadata. - class ProfileMetadataLoaderPass : public ModulePass { - std::string Filename; - public: - static char ID; // Class identification, replacement for typeinfo - explicit ProfileMetadataLoaderPass(const std::string &filename = "") - : ModulePass(ID), Filename(filename) { - initializeProfileMetadataLoaderPassPass(*PassRegistry::getPassRegistry()); - if (filename.empty()) Filename = ProfileMetadataFilename; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - - virtual const char *getPassName() const { - return "Profile loader"; - } - - virtual void readEdge(unsigned, ProfileData&, ProfileData::Edge, - ArrayRef); - virtual unsigned matchEdges(Module&, ProfileData&, ArrayRef); - virtual void setBranchWeightMetadata(Module&, ProfileData&); - - virtual bool runOnModule(Module &M); - }; -} // End of anonymous namespace - -char ProfileMetadataLoaderPass::ID = 0; -INITIALIZE_PASS_BEGIN(ProfileMetadataLoaderPass, "profile-metadata-loader", - "Load profile information from llvmprof.out", false, true) -INITIALIZE_PASS_END(ProfileMetadataLoaderPass, "profile-metadata-loader", - "Load profile information from llvmprof.out", false, true) - -char &llvm::ProfileMetadataLoaderPassID = ProfileMetadataLoaderPass::ID; - -/// createProfileMetadataLoaderPass - This function returns a Pass that loads -/// the profiling information for the module from the specified filename, -/// making it available to the optimizers. -ModulePass *llvm::createProfileMetadataLoaderPass() { - return new ProfileMetadataLoaderPass(); -} -ModulePass *llvm::createProfileMetadataLoaderPass(const std::string &Filename) { - return new ProfileMetadataLoaderPass(Filename); -} - -/// readEdge - Take the value from a profile counter and assign it to an edge. -void ProfileMetadataLoaderPass::readEdge(unsigned ReadCount, - ProfileData &PB, ProfileData::Edge e, - ArrayRef Counters) { - if (ReadCount >= Counters.size()) return; - - unsigned weight = Counters[ReadCount]; - assert(weight != ProfileDataLoader::Uncounted); - PB.addEdgeWeight(e, weight); - - DEBUG(dbgs() << "-- Read Edge Counter for " << e - << " (# "<< (ReadCount) << "): " - << PB.getEdgeWeight(e) << "\n"); -} - -/// matchEdges - Link every profile counter with an edge. -unsigned ProfileMetadataLoaderPass::matchEdges(Module &M, ProfileData &PB, - ArrayRef Counters) { - if (Counters.size() == 0) return 0; - - unsigned ReadCount = 0; - - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Loading edges in '" << F->getName() << "'\n"); - readEdge(ReadCount++, PB, PB.getEdge(0, &F->getEntryBlock()), Counters); - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { - readEdge(ReadCount++, PB, PB.getEdge(BB,TI->getSuccessor(s)), - Counters); - } - } - } - - return ReadCount; -} - -/// setBranchWeightMetadata - Translate the counter values associated with each -/// edge into branch weights for each conditional branch (a branch with 2 or -/// more desinations). -void ProfileMetadataLoaderPass::setBranchWeightMetadata(Module &M, - ProfileData &PB) { - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Setting branch metadata in '" << F->getName() << "'\n"); - - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - unsigned NumSuccessors = TI->getNumSuccessors(); - - // If there is only one successor then we can not set a branch - // probability as the target is certain. - if (NumSuccessors < 2) continue; - - // Load the weights of all edges leading from this terminator. - DEBUG(dbgs() << "-- Terminator with " << NumSuccessors - << " successors:\n"); - SmallVector Weights(NumSuccessors); - for (unsigned s = 0 ; s < NumSuccessors ; ++s) { - ProfileData::Edge edge = PB.getEdge(BB, TI->getSuccessor(s)); - Weights[s] = (uint32_t)PB.getEdgeWeight(edge); - DEBUG(dbgs() << "---- Edge '" << edge << "' has weight " - << Weights[s] << "\n"); - } - - // Set branch weight metadata. This will set branch probabilities of - // 100%/0% if that is true of the dynamic execution. - // BranchProbabilityInfo can account for this when it loads this metadata - // (it gives the unexectuted branch a weight of 1 for the purposes of - // probability calculations). - MDBuilder MDB(TI->getContext()); - MDNode *Node = MDB.createBranchWeights(Weights); - TI->setMetadata(LLVMContext::MD_prof, Node); - NumTermsAnnotated++; - } - } -} - -bool ProfileMetadataLoaderPass::runOnModule(Module &M) { - ProfileDataLoader PDL("profile-data-loader", Filename); - ProfileData PB; - - ArrayRef Counters = PDL.getRawEdgeCounts(); - - unsigned ReadCount = matchEdges(M, PB, Counters); - - if (ReadCount != Counters.size()) { - errs() << "WARNING: profile information is inconsistent with " - << "the current program!\n"; - } - NumEdgesRead = ReadCount; - - setBranchWeightMetadata(M, PB); - - return ReadCount > 0; -} diff --git a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp b/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp deleted file mode 100644 index b284b99..0000000 --- a/contrib/llvm/lib/Analysis/ProfileEstimatorPass.cpp +++ /dev/null @@ -1,426 +0,0 @@ -//===- ProfileEstimatorPass.cpp - LLVM Pass to estimate profile info ------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a concrete implementation of profiling information that -// estimates the profiling information in a very crude and unimaginative way. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "profile-estimator" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Pass.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -static cl::opt -LoopWeight( - "profile-estimator-loop-weight", cl::init(10), - cl::value_desc("loop-weight"), - cl::desc("Number of loop executions used for profile-estimator") -); - -namespace { - class ProfileEstimatorPass : public FunctionPass, public ProfileInfo { - double ExecCount; - LoopInfo *LI; - std::set BBToVisit; - std::map LoopExitWeights; - std::map MinimalWeight; - public: - static char ID; // Class identification, replacement for typeinfo - explicit ProfileEstimatorPass(const double execcount = 0) - : FunctionPass(ID), ExecCount(execcount) { - initializeProfileEstimatorPassPass(*PassRegistry::getPassRegistry()); - if (execcount == 0) ExecCount = LoopWeight; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired(); - } - - virtual const char *getPassName() const { - return "Profiling information estimator"; - } - - /// run - Estimate the profile information from the specified file. - virtual bool runOnFunction(Function &F); - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &ProfileInfo::ID) - return (ProfileInfo*)this; - return this; - } - - virtual void recurseBasicBlock(BasicBlock *BB); - - void inline printEdgeWeight(Edge); - }; -} // End of anonymous namespace - -char ProfileEstimatorPass::ID = 0; -INITIALIZE_AG_PASS_BEGIN(ProfileEstimatorPass, ProfileInfo, "profile-estimator", - "Estimate profiling information", false, true, false) -INITIALIZE_PASS_DEPENDENCY(LoopInfo) -INITIALIZE_AG_PASS_END(ProfileEstimatorPass, ProfileInfo, "profile-estimator", - "Estimate profiling information", false, true, false) - -namespace llvm { - char &ProfileEstimatorPassID = ProfileEstimatorPass::ID; - - FunctionPass *createProfileEstimatorPass() { - return new ProfileEstimatorPass(); - } - - /// createProfileEstimatorPass - This function returns a Pass that estimates - /// profiling information using the given loop execution count. - Pass *createProfileEstimatorPass(const unsigned execcount) { - return new ProfileEstimatorPass(execcount); - } -} - -static double ignoreMissing(double w) { - if (w == ProfileInfo::MissingValue) return 0; - return w; -} - -static void inline printEdgeError(ProfileInfo::Edge e, const char *M) { - DEBUG(dbgs() << "-- Edge " << e << " is not calculated, " << M << "\n"); -} - -void inline ProfileEstimatorPass::printEdgeWeight(Edge E) { - DEBUG(dbgs() << "-- Weight of Edge " << E << ":" - << format("%20.20g", getEdgeWeight(E)) << "\n"); -} - -// recurseBasicBlock() - This calculates the ProfileInfo estimation for a -// single block and then recurses into the successors. -// The algorithm preserves the flow condition, meaning that the sum of the -// weight of the incoming edges must be equal the block weight which must in -// turn be equal to the sume of the weights of the outgoing edges. -// Since the flow of an block is deterimined from the current state of the -// flow, once an edge has a flow assigned this flow is never changed again, -// otherwise it would be possible to violate the flow condition in another -// block. -void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) { - - // Break the recursion if this BasicBlock was already visited. - if (BBToVisit.find(BB) == BBToVisit.end()) return; - - // Read the LoopInfo for this block. - bool BBisHeader = LI->isLoopHeader(BB); - Loop* BBLoop = LI->getLoopFor(BB); - - // To get the block weight, read all incoming edges. - double BBWeight = 0; - std::set ProcessedPreds; - for ( pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - bbi != bbe; ++bbi ) { - // If this block was not considered already, add weight. - Edge edge = getEdge(*bbi,BB); - double w = getEdgeWeight(edge); - if (ProcessedPreds.insert(*bbi).second) { - BBWeight += ignoreMissing(w); - } - // If this block is a loop header and the predecessor is contained in this - // loop, thus the edge is a backedge, continue and do not check if the - // value is valid. - if (BBisHeader && BBLoop->contains(*bbi)) { - printEdgeError(edge, "but is backedge, continuing"); - continue; - } - // If the edges value is missing (and this is no loop header, and this is - // no backedge) return, this block is currently non estimatable. - if (w == MissingValue) { - printEdgeError(edge, "returning"); - return; - } - } - if (getExecutionCount(BB) != MissingValue) { - BBWeight = getExecutionCount(BB); - } - - // Fetch all necessary information for current block. - SmallVector ExitEdges; - SmallVector Edges; - if (BBLoop) { - BBLoop->getExitEdges(ExitEdges); - } - - // If this is a loop header, consider the following: - // Exactly the flow that is entering this block, must exit this block too. So - // do the following: - // *) get all the exit edges, read the flow that is already leaving this - // loop, remember the edges that do not have any flow on them right now. - // (The edges that have already flow on them are most likely exiting edges of - // other loops, do not touch those flows because the previously caclulated - // loopheaders would not be exact anymore.) - // *) In case there is not a single exiting edge left, create one at the loop - // latch to prevent the flow from building up in the loop. - // *) Take the flow that is not leaving the loop already and distribute it on - // the remaining exiting edges. - // (This ensures that all flow that enters the loop also leaves it.) - // *) Increase the flow into the loop by increasing the weight of this block. - // There is at least one incoming backedge that will bring us this flow later - // on. (So that the flow condition in this node is valid again.) - if (BBisHeader) { - double incoming = BBWeight; - // Subtract the flow leaving the loop. - std::set ProcessedExits; - for (SmallVector::iterator ei = ExitEdges.begin(), - ee = ExitEdges.end(); ei != ee; ++ei) { - if (ProcessedExits.insert(*ei).second) { - double w = getEdgeWeight(*ei); - if (w == MissingValue) { - Edges.push_back(*ei); - // Check if there is a necessary minimal weight, if yes, subtract it - // from weight. - if (MinimalWeight.find(*ei) != MinimalWeight.end()) { - incoming -= MinimalWeight[*ei]; - DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); - } - } else { - incoming -= w; - } - } - } - // If no exit edges, create one: - if (Edges.size() == 0) { - BasicBlock *Latch = BBLoop->getLoopLatch(); - if (Latch) { - Edge edge = getEdge(Latch,0); - EdgeInformation[BB->getParent()][edge] = BBWeight; - printEdgeWeight(edge); - edge = getEdge(Latch, BB); - EdgeInformation[BB->getParent()][edge] = BBWeight * ExecCount; - printEdgeWeight(edge); - } - } - - // Distribute remaining weight to the exting edges. To prevent fractions - // from building up and provoking precision problems the weight which is to - // be distributed is split and the rounded, the last edge gets a somewhat - // bigger value, but we are close enough for an estimation. - double fraction = floor(incoming/Edges.size()); - for (SmallVector::iterator ei = Edges.begin(), ee = Edges.end(); - ei != ee; ++ei) { - double w = 0; - if (ei != (ee-1)) { - w = fraction; - incoming -= fraction; - } else { - w = incoming; - } - EdgeInformation[BB->getParent()][*ei] += w; - // Read necessary minimal weight. - if (MinimalWeight.find(*ei) != MinimalWeight.end()) { - EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; - DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); - } - printEdgeWeight(*ei); - - // Add minimal weight to paths to all exit edges, this is used to ensure - // that enough flow is reaching this edges. - Path p; - const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest); - while (Dest != BB) { - const BasicBlock *Parent = p.find(Dest)->second; - Edge e = getEdge(Parent, Dest); - if (MinimalWeight.find(e) == MinimalWeight.end()) { - MinimalWeight[e] = 0; - } - MinimalWeight[e] += w; - DEBUG(dbgs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n"); - Dest = Parent; - } - } - // Increase flow into the loop. - BBWeight *= (ExecCount+1); - } - - BlockInformation[BB->getParent()][BB] = BBWeight; - // Up until now we considered only the loop exiting edges, now we have a - // definite block weight and must distribute this onto the outgoing edges. - // Since there may be already flow attached to some of the edges, read this - // flow first and remember the edges that have still now flow attached. - Edges.clear(); - std::set ProcessedSuccs; - - succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - // Also check for (BB,0) edges that may already contain some flow. (But only - // in case there are no successors.) - if (bbi == bbe) { - Edge edge = getEdge(BB,0); - EdgeInformation[BB->getParent()][edge] = BBWeight; - printEdgeWeight(edge); - } - for ( ; bbi != bbe; ++bbi ) { - if (ProcessedSuccs.insert(*bbi).second) { - Edge edge = getEdge(BB,*bbi); - double w = getEdgeWeight(edge); - if (w != MissingValue) { - BBWeight -= getEdgeWeight(edge); - } else { - Edges.push_back(edge); - // If minimal weight is necessary, reserve weight by subtracting weight - // from block weight, this is readded later on. - if (MinimalWeight.find(edge) != MinimalWeight.end()) { - BBWeight -= MinimalWeight[edge]; - DEBUG(dbgs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n"); - } - } - } - } - - double fraction = Edges.size() ? floor(BBWeight/Edges.size()) : 0.0; - // Finally we know what flow is still not leaving the block, distribute this - // flow onto the empty edges. - for (SmallVector::iterator ei = Edges.begin(), ee = Edges.end(); - ei != ee; ++ei) { - if (ei != (ee-1)) { - EdgeInformation[BB->getParent()][*ei] += fraction; - BBWeight -= fraction; - } else { - EdgeInformation[BB->getParent()][*ei] += BBWeight; - } - // Readd minial necessary weight. - if (MinimalWeight.find(*ei) != MinimalWeight.end()) { - EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei]; - DEBUG(dbgs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n"); - } - printEdgeWeight(*ei); - } - - // This block is visited, mark this before the recursion. - BBToVisit.erase(BB); - - // Recurse into successors. - for (succ_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi) { - recurseBasicBlock(*bbi); - } -} - -bool ProfileEstimatorPass::runOnFunction(Function &F) { - if (F.isDeclaration()) return false; - - // Fetch LoopInfo and clear ProfileInfo for this function. - LI = &getAnalysis(); - FunctionInformation.erase(&F); - BlockInformation[&F].clear(); - EdgeInformation[&F].clear(); - BBToVisit.clear(); - - // Mark all blocks as to visit. - for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi) - BBToVisit.insert(bi); - - // Clear Minimal Edges. - MinimalWeight.clear(); - - DEBUG(dbgs() << "Working on function " << F.getName() << "\n"); - - // Since the entry block is the first one and has no predecessors, the edge - // (0,entry) is inserted with the starting weight of 1. - BasicBlock *entry = &F.getEntryBlock(); - BlockInformation[&F][entry] = pow(2.0, 32.0); - Edge edge = getEdge(0,entry); - EdgeInformation[&F][edge] = BlockInformation[&F][entry]; - printEdgeWeight(edge); - - // Since recurseBasicBlock() maybe returns with a block which was not fully - // estimated, use recurseBasicBlock() until everything is calculated. - bool cleanup = false; - recurseBasicBlock(entry); - while (BBToVisit.size() > 0 && !cleanup) { - // Remember number of open blocks, this is later used to check if progress - // was made. - unsigned size = BBToVisit.size(); - - // Try to calculate all blocks in turn. - for (std::set::iterator bi = BBToVisit.begin(), - be = BBToVisit.end(); bi != be; ++bi) { - recurseBasicBlock(*bi); - // If at least one block was finished, break because iterator may be - // invalid. - if (BBToVisit.size() < size) break; - } - - // If there was not a single block resolved, make some assumptions. - if (BBToVisit.size() == size) { - bool found = false; - for (std::set::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end(); - (BBI != BBE) && (!found); ++BBI) { - BasicBlock *BB = *BBI; - // Try each predecessor if it can be assumend. - for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - (bbi != bbe) && (!found); ++bbi) { - Edge e = getEdge(*bbi,BB); - double w = getEdgeWeight(e); - // Check that edge from predecessor is still free. - if (w == MissingValue) { - // Check if there is a circle from this block to predecessor. - Path P; - const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest); - if (Dest != *bbi) { - // If there is no circle, just set edge weight to 0 - EdgeInformation[&F][e] = 0; - DEBUG(dbgs() << "Assuming edge weight: "); - printEdgeWeight(e); - found = true; - } - } - } - } - if (!found) { - cleanup = true; - DEBUG(dbgs() << "No assumption possible in Fuction "< -#include -#include -using namespace llvm; - -namespace llvm { - template<> char ProfileInfoT::ID = 0; -} - -// Register the ProfileInfo interface, providing a nice name to refer to. -INITIALIZE_ANALYSIS_GROUP(ProfileInfo, "Profile Information", NoProfileInfo) - -namespace llvm { - -template <> -ProfileInfoT::ProfileInfoT() {} -template <> -ProfileInfoT::~ProfileInfoT() {} - -template <> -ProfileInfoT::ProfileInfoT() { - MachineProfile = 0; -} -template <> -ProfileInfoT::~ProfileInfoT() { - if (MachineProfile) delete MachineProfile; -} - -template<> -char ProfileInfoT::ID = 0; - -template<> -const double ProfileInfoT::MissingValue = -1; - -template<> const -double ProfileInfoT::MissingValue = -1; - -template<> double -ProfileInfoT::getExecutionCount(const BasicBlock *BB) { - std::map::iterator J = - BlockInformation.find(BB->getParent()); - if (J != BlockInformation.end()) { - BlockCounts::iterator I = J->second.find(BB); - if (I != J->second.end()) - return I->second; - } - - double Count = MissingValue; - - const_pred_iterator PI = pred_begin(BB), PE = pred_end(BB); - - // Are there zero predecessors of this block? - if (PI == PE) { - Edge e = getEdge(0, BB); - Count = getEdgeWeight(e); - } else { - // Otherwise, if there are predecessors, the execution count of this block is - // the sum of the edge frequencies from the incoming edges. - std::set ProcessedPreds; - Count = 0; - for (; PI != PE; ++PI) { - const BasicBlock *P = *PI; - if (ProcessedPreds.insert(P).second) { - double w = getEdgeWeight(getEdge(P, BB)); - if (w == MissingValue) { - Count = MissingValue; - break; - } - Count += w; - } - } - } - - // If the predecessors did not suffice to get block weight, try successors. - if (Count == MissingValue) { - - succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB); - - // Are there zero successors of this block? - if (SI == SE) { - Edge e = getEdge(BB,0); - Count = getEdgeWeight(e); - } else { - std::set ProcessedSuccs; - Count = 0; - for (; SI != SE; ++SI) - if (ProcessedSuccs.insert(*SI).second) { - double w = getEdgeWeight(getEdge(BB, *SI)); - if (w == MissingValue) { - Count = MissingValue; - break; - } - Count += w; - } - } - } - - if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count; - return Count; -} - -template<> -double ProfileInfoT:: - getExecutionCount(const MachineBasicBlock *MBB) { - std::map::iterator J = - BlockInformation.find(MBB->getParent()); - if (J != BlockInformation.end()) { - BlockCounts::iterator I = J->second.find(MBB); - if (I != J->second.end()) - return I->second; - } - - return MissingValue; -} - -template<> -double ProfileInfoT::getExecutionCount(const Function *F) { - std::map::iterator J = - FunctionInformation.find(F); - if (J != FunctionInformation.end()) - return J->second; - - // isDeclaration() is checked here and not at start of function to allow - // functions without a body still to have a execution count. - if (F->isDeclaration()) return MissingValue; - - double Count = getExecutionCount(&F->getEntryBlock()); - if (Count != MissingValue) FunctionInformation[F] = Count; - return Count; -} - -template<> -double ProfileInfoT:: - getExecutionCount(const MachineFunction *MF) { - std::map::iterator J = - FunctionInformation.find(MF); - if (J != FunctionInformation.end()) - return J->second; - - double Count = getExecutionCount(&MF->front()); - if (Count != MissingValue) FunctionInformation[MF] = Count; - return Count; -} - -template<> -void ProfileInfoT:: - setExecutionCount(const BasicBlock *BB, double w) { - DEBUG(dbgs() << "Creating Block " << BB->getName() - << " (weight: " << format("%.20g",w) << ")\n"); - BlockInformation[BB->getParent()][BB] = w; -} - -template<> -void ProfileInfoT:: - setExecutionCount(const MachineBasicBlock *MBB, double w) { - DEBUG(dbgs() << "Creating Block " << MBB->getBasicBlock()->getName() - << " (weight: " << format("%.20g",w) << ")\n"); - BlockInformation[MBB->getParent()][MBB] = w; -} - -template<> -void ProfileInfoT::addEdgeWeight(Edge e, double w) { - double oldw = getEdgeWeight(e); - assert (oldw != MissingValue && "Adding weight to Edge with no previous weight"); - DEBUG(dbgs() << "Adding to Edge " << e - << " (new weight: " << format("%.20g",oldw + w) << ")\n"); - EdgeInformation[getFunction(e)][e] = oldw + w; -} - -template<> -void ProfileInfoT:: - addExecutionCount(const BasicBlock *BB, double w) { - double oldw = getExecutionCount(BB); - assert (oldw != MissingValue && "Adding weight to Block with no previous weight"); - DEBUG(dbgs() << "Adding to Block " << BB->getName() - << " (new weight: " << format("%.20g",oldw + w) << ")\n"); - BlockInformation[BB->getParent()][BB] = oldw + w; -} - -template<> -void ProfileInfoT::removeBlock(const BasicBlock *BB) { - std::map::iterator J = - BlockInformation.find(BB->getParent()); - if (J == BlockInformation.end()) return; - - DEBUG(dbgs() << "Deleting " << BB->getName() << "\n"); - J->second.erase(BB); -} - -template<> -void ProfileInfoT::removeEdge(Edge e) { - std::map::iterator J = - EdgeInformation.find(getFunction(e)); - if (J == EdgeInformation.end()) return; - - DEBUG(dbgs() << "Deleting" << e << "\n"); - J->second.erase(e); -} - -template<> -void ProfileInfoT:: - replaceEdge(const Edge &oldedge, const Edge &newedge) { - double w; - if ((w = getEdgeWeight(newedge)) == MissingValue) { - w = getEdgeWeight(oldedge); - DEBUG(dbgs() << "Replacing " << oldedge << " with " << newedge << "\n"); - } else { - w += getEdgeWeight(oldedge); - DEBUG(dbgs() << "Adding " << oldedge << " to " << newedge << "\n"); - } - setEdgeWeight(newedge,w); - removeEdge(oldedge); -} - -template<> -const BasicBlock *ProfileInfoT:: - GetPath(const BasicBlock *Src, const BasicBlock *Dest, - Path &P, unsigned Mode) { - const BasicBlock *BB = 0; - bool hasFoundPath = false; - - std::queue BFS; - BFS.push(Src); - - while(BFS.size() && !hasFoundPath) { - BB = BFS.front(); - BFS.pop(); - - succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); - if (Succ == End) { - P[(const BasicBlock*)0] = BB; - if (Mode & GetPathToExit) { - hasFoundPath = true; - BB = 0; - } - } - for(;Succ != End; ++Succ) { - if (P.find(*Succ) != P.end()) continue; - Edge e = getEdge(BB,*Succ); - if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue; - P[*Succ] = BB; - BFS.push(*Succ); - if ((Mode & GetPathToDest) && *Succ == Dest) { - hasFoundPath = true; - BB = *Succ; - break; - } - if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) { - hasFoundPath = true; - BB = *Succ; - break; - } - } - } - - return BB; -} - -template<> -void ProfileInfoT:: - divertFlow(const Edge &oldedge, const Edge &newedge) { - DEBUG(dbgs() << "Diverting " << oldedge << " via " << newedge ); - - // First check if the old edge was taken, if not, just delete it... - if (getEdgeWeight(oldedge) == 0) { - removeEdge(oldedge); - return; - } - - Path P; - P[newedge.first] = 0; - P[newedge.second] = newedge.first; - const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest); - - double w = getEdgeWeight (oldedge); - DEBUG(dbgs() << ", Weight: " << format("%.20g",w) << "\n"); - do { - const BasicBlock *Parent = P.find(BB)->second; - Edge e = getEdge(Parent,BB); - double oldw = getEdgeWeight(e); - double oldc = getExecutionCount(e.first); - setEdgeWeight(e, w+oldw); - if (Parent != oldedge.first) { - setExecutionCount(e.first, w+oldc); - } - BB = Parent; - } while (BB != newedge.first); - removeEdge(oldedge); -} - -/// Replaces all occurrences of RmBB in the ProfilingInfo with DestBB. -/// This checks all edges of the function the blocks reside in and replaces the -/// occurrences of RmBB with DestBB. -template<> -void ProfileInfoT:: - replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) { - DEBUG(dbgs() << "Replacing " << RmBB->getName() - << " with " << DestBB->getName() << "\n"); - const Function *F = DestBB->getParent(); - std::map::iterator J = - EdgeInformation.find(F); - if (J == EdgeInformation.end()) return; - - Edge e, newedge; - bool erasededge = false; - EdgeWeights::iterator I = J->second.begin(), E = J->second.end(); - while(I != E) { - e = (I++)->first; - bool foundedge = false; bool eraseedge = false; - if (e.first == RmBB) { - if (e.second == DestBB) { - eraseedge = true; - } else { - newedge = getEdge(DestBB, e.second); - foundedge = true; - } - } - if (e.second == RmBB) { - if (e.first == DestBB) { - eraseedge = true; - } else { - newedge = getEdge(e.first, DestBB); - foundedge = true; - } - } - if (foundedge) { - replaceEdge(e, newedge); - } - if (eraseedge) { - if (erasededge) { - Edge newedge = getEdge(DestBB, DestBB); - replaceEdge(e, newedge); - } else { - removeEdge(e); - erasededge = true; - } - } - } -} - -/// Splits an edge in the ProfileInfo and redirects flow over NewBB. -/// Since its possible that there is more than one edge in the CFG from FristBB -/// to SecondBB its necessary to redirect the flow proporionally. -template<> -void ProfileInfoT::splitEdge(const BasicBlock *FirstBB, - const BasicBlock *SecondBB, - const BasicBlock *NewBB, - bool MergeIdenticalEdges) { - const Function *F = FirstBB->getParent(); - std::map::iterator J = - EdgeInformation.find(F); - if (J == EdgeInformation.end()) return; - - // Generate edges and read current weight. - Edge e = getEdge(FirstBB, SecondBB); - Edge n1 = getEdge(FirstBB, NewBB); - Edge n2 = getEdge(NewBB, SecondBB); - EdgeWeights &ECs = J->second; - double w = ECs[e]; - - int succ_count = 0; - if (!MergeIdenticalEdges) { - // First count the edges from FristBB to SecondBB, if there is more than - // one, only slice out a proporional part for NewBB. - for(succ_const_iterator BBI = succ_begin(FirstBB), BBE = succ_end(FirstBB); - BBI != BBE; ++BBI) { - if (*BBI == SecondBB) succ_count++; - } - // When the NewBB is completely new, increment the count by one so that - // the counts are properly distributed. - if (getExecutionCount(NewBB) == ProfileInfo::MissingValue) succ_count++; - } else { - // When the edges are merged anyway, then redirect all flow. - succ_count = 1; - } - - // We know now how many edges there are from FirstBB to SecondBB, reroute a - // proportional part of the edge weight over NewBB. - double neww = floor(w / succ_count); - ECs[n1] += neww; - ECs[n2] += neww; - BlockInformation[F][NewBB] += neww; - if (succ_count == 1) { - ECs.erase(e); - } else { - ECs[e] -= neww; - } -} - -template<> -void ProfileInfoT::splitBlock(const BasicBlock *Old, - const BasicBlock* New) { - const Function *F = Old->getParent(); - std::map::iterator J = - EdgeInformation.find(F); - if (J == EdgeInformation.end()) return; - - DEBUG(dbgs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n"); - - std::set Edges; - for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end(); - ewi != ewe; ++ewi) { - Edge old = ewi->first; - if (old.first == Old) { - Edges.insert(old); - } - } - for (std::set::iterator EI = Edges.begin(), EE = Edges.end(); - EI != EE; ++EI) { - Edge newedge = getEdge(New, EI->second); - replaceEdge(*EI, newedge); - } - - double w = getExecutionCount(Old); - setEdgeWeight(getEdge(Old, New), w); - setExecutionCount(New, w); -} - -template<> -void ProfileInfoT::splitBlock(const BasicBlock *BB, - const BasicBlock* NewBB, - BasicBlock *const *Preds, - unsigned NumPreds) { - const Function *F = BB->getParent(); - std::map::iterator J = - EdgeInformation.find(F); - if (J == EdgeInformation.end()) return; - - DEBUG(dbgs() << "Splitting " << NumPreds << " Edges from " << BB->getName() - << " to " << NewBB->getName() << "\n"); - - // Collect weight that was redirected over NewBB. - double newweight = 0; - - std::set ProcessedPreds; - // For all requestes Predecessors. - for (unsigned pred = 0; pred < NumPreds; ++pred) { - const BasicBlock * Pred = Preds[pred]; - if (ProcessedPreds.insert(Pred).second) { - // Create edges and read old weight. - Edge oldedge = getEdge(Pred, BB); - Edge newedge = getEdge(Pred, NewBB); - - // Remember how much weight was redirected. - newweight += getEdgeWeight(oldedge); - - replaceEdge(oldedge,newedge); - } - } - - Edge newedge = getEdge(NewBB,BB); - setEdgeWeight(newedge, newweight); - setExecutionCount(NewBB, newweight); -} - -template<> -void ProfileInfoT::transfer(const Function *Old, - const Function *New) { - DEBUG(dbgs() << "Replacing Function " << Old->getName() << " with " - << New->getName() << "\n"); - std::map::iterator J = - EdgeInformation.find(Old); - if(J != EdgeInformation.end()) { - EdgeInformation[New] = J->second; - } - EdgeInformation.erase(Old); - BlockInformation.erase(Old); - FunctionInformation.erase(Old); -} - -static double readEdgeOrRemember(ProfileInfo::Edge edge, double w, - ProfileInfo::Edge &tocalc, unsigned &uncalc) { - if (w == ProfileInfo::MissingValue) { - tocalc = edge; - uncalc++; - return 0; - } else { - return w; - } -} - -template<> -bool ProfileInfoT:: - CalculateMissingEdge(const BasicBlock *BB, Edge &removed, - bool assumeEmptySelf) { - Edge edgetocalc; - unsigned uncalculated = 0; - - // collect weights of all incoming and outgoing edges, rememer edges that - // have no value - double incount = 0; - SmallSet pred_visited; - const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - if (bbi==bbe) { - Edge e = getEdge(0,BB); - incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); - } - for (;bbi != bbe; ++bbi) { - if (pred_visited.insert(*bbi)) { - Edge e = getEdge(*bbi,BB); - incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated); - } - } - - double outcount = 0; - SmallSet succ_visited; - succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); - if (sbbi==sbbe) { - Edge e = getEdge(BB,0); - if (getEdgeWeight(e) == MissingValue) { - double w = getExecutionCount(BB); - if (w != MissingValue) { - setEdgeWeight(e,w); - removed = e; - } - } - outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); - } - for (;sbbi != sbbe; ++sbbi) { - if (succ_visited.insert(*sbbi)) { - Edge e = getEdge(BB,*sbbi); - outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated); - } - } - - // if exactly one edge weight was missing, calculate it and remove it from - // spanning tree - if (uncalculated == 0 ) { - return true; - } else - if (uncalculated == 1) { - if (incount < outcount) { - EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount; - } else { - EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount; - } - DEBUG(dbgs() << "--Calc Edge Counter for " << edgetocalc << ": " - << format("%.20g", getEdgeWeight(edgetocalc)) << "\n"); - removed = edgetocalc; - return true; - } else - if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) { - setEdgeWeight(edgetocalc, incount * 10); - removed = edgetocalc; - return true; - } else { - return false; - } -} - -static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set &misscount) { - double w = PI->getEdgeWeight(e); - if (w != ProfileInfo::MissingValue) { - calcw += w; - } else { - misscount.insert(e); - } -} - -template<> -bool ProfileInfoT::EstimateMissingEdges(const BasicBlock *BB) { - double inWeight = 0; - std::set inMissing; - std::set ProcessedPreds; - const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - if (bbi == bbe) { - readEdge(this,getEdge(0,BB),inWeight,inMissing); - } - for( ; bbi != bbe; ++bbi ) { - if (ProcessedPreds.insert(*bbi).second) { - readEdge(this,getEdge(*bbi,BB),inWeight,inMissing); - } - } - - double outWeight = 0; - std::set outMissing; - std::set ProcessedSuccs; - succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB); - if (sbbi == sbbe) - readEdge(this,getEdge(BB,0),outWeight,outMissing); - for ( ; sbbi != sbbe; ++sbbi ) { - if (ProcessedSuccs.insert(*sbbi).second) { - readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing); - } - } - - double share; - std::set::iterator ei,ee; - if (inMissing.size() == 0 && outMissing.size() > 0) { - ei = outMissing.begin(); - ee = outMissing.end(); - share = inWeight/outMissing.size(); - setExecutionCount(BB,inWeight); - } else - if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) { - ei = inMissing.begin(); - ee = inMissing.end(); - share = 0; - setExecutionCount(BB,0); - } else - if (inMissing.size() == 0 && outMissing.size() == 0) { - setExecutionCount(BB,outWeight); - return true; - } else { - return false; - } - for ( ; ei != ee; ++ei ) { - setEdgeWeight(*ei,share); - } - return true; -} - -template<> -void ProfileInfoT::repair(const Function *F) { -// if (getExecutionCount(&(F->getEntryBlock())) == 0) { -// for (Function::const_iterator FI = F->begin(), FE = F->end(); -// FI != FE; ++FI) { -// const BasicBlock* BB = &(*FI); -// { -// const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); -// if (NBB == End) { -// setEdgeWeight(getEdge(0,BB),0); -// } -// for(;NBB != End; ++NBB) { -// setEdgeWeight(getEdge(*NBB,BB),0); -// } -// } -// { -// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); -// if (NBB == End) { -// setEdgeWeight(getEdge(0,BB),0); -// } -// for(;NBB != End; ++NBB) { -// setEdgeWeight(getEdge(*NBB,BB),0); -// } -// } -// } -// return; -// } - // The set of BasicBlocks that are still unvisited. - std::set Unvisited; - - // The set of return edges (Edges with no successors). - std::set ReturnEdges; - double ReturnWeight = 0; - - // First iterate over the whole function and collect: - // 1) The blocks in this function in the Unvisited set. - // 2) The return edges in the ReturnEdges set. - // 3) The flow that is leaving the function already via return edges. - - // Data structure for searching the function. - std::queue BFS; - const BasicBlock *BB = &(F->getEntryBlock()); - BFS.push(BB); - Unvisited.insert(BB); - - while (BFS.size()) { - BB = BFS.front(); BFS.pop(); - succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); - if (NBB == End) { - Edge e = getEdge(BB,0); - double w = getEdgeWeight(e); - if (w == MissingValue) { - // If the return edge has no value, try to read value from block. - double bw = getExecutionCount(BB); - if (bw != MissingValue) { - setEdgeWeight(e,bw); - ReturnWeight += bw; - } else { - // If both return edge and block provide no value, collect edge. - ReturnEdges.insert(e); - } - } else { - // If the return edge has a proper value, collect it. - ReturnWeight += w; - } - } - for (;NBB != End; ++NBB) { - if (Unvisited.insert(*NBB).second) { - BFS.push(*NBB); - } - } - } - - while (Unvisited.size() > 0) { - unsigned oldUnvisitedCount = Unvisited.size(); - bool FoundPath = false; - - // If there is only one edge left, calculate it. - if (ReturnEdges.size() == 1) { - ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight; - - Edge e = *ReturnEdges.begin(); - setEdgeWeight(e,ReturnWeight); - setExecutionCount(e.first,ReturnWeight); - - Unvisited.erase(e.first); - ReturnEdges.erase(e); - continue; - } - - // Calculate all blocks where only one edge is missing, this may also - // resolve furhter return edges. - std::set::iterator FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE) { - const BasicBlock *BB = *FI; ++FI; - Edge e; - if(CalculateMissingEdge(BB,e,true)) { - if (BlockInformation[F].find(BB) == BlockInformation[F].end()) { - setExecutionCount(BB,getExecutionCount(BB)); - } - Unvisited.erase(BB); - if (e.first != 0 && e.second == 0) { - ReturnEdges.erase(e); - ReturnWeight += getEdgeWeight(e); - } - } - } - if (oldUnvisitedCount > Unvisited.size()) continue; - - // Estimate edge weights by dividing the flow proportionally. - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE) { - const BasicBlock *BB = *FI; ++FI; - const BasicBlock *Dest = 0; - bool AllEdgesHaveSameReturn = true; - // Check each Successor, these must all end up in the same or an empty - // return block otherwise its dangerous to do an estimation on them. - for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); - Succ != End; ++Succ) { - Path P; - GetPath(*Succ, 0, P, GetPathToExit); - if (Dest && Dest != P[(const BasicBlock*)0]) { - AllEdgesHaveSameReturn = false; - } - Dest = P[(const BasicBlock*)0]; - } - if (AllEdgesHaveSameReturn) { - if(EstimateMissingEdges(BB)) { - Unvisited.erase(BB); - break; - } - } - } - if (oldUnvisitedCount > Unvisited.size()) continue; - - // Check if there is a path to an block that has a known value and redirect - // flow accordingly. - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE && !FoundPath) { - // Fetch path. - const BasicBlock *BB = *FI; ++FI; - Path P; - const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue); - - // Calculate incoming flow. - double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0; - std::set Processed; - for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); - NBB != End; ++NBB) { - if (Processed.insert(*NBB).second) { - Edge e = getEdge(*NBB, BB); - double ew = getEdgeWeight(e); - if (ew != MissingValue) { - iw += ew; - invalid++; - } else { - // If the path contains the successor, this means its a backedge, - // do not count as missing. - if (P.find(*NBB) == P.end()) - inmissing++; - } - incount++; - } - } - if (inmissing == incount) continue; - if (invalid == 0) continue; - - // Subtract (already) outgoing flow. - Processed.clear(); - for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); - NBB != End; ++NBB) { - if (Processed.insert(*NBB).second) { - Edge e = getEdge(BB, *NBB); - double ew = getEdgeWeight(e); - if (ew != MissingValue) { - iw -= ew; - } - } - } - if (iw < 0) continue; - - // Check the receiving end of the path if it can handle the flow. - double ow = getExecutionCount(Dest); - Processed.clear(); - for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB); - NBB != End; ++NBB) { - if (Processed.insert(*NBB).second) { - Edge e = getEdge(BB, *NBB); - double ew = getEdgeWeight(e); - if (ew != MissingValue) { - ow -= ew; - } - } - } - if (ow < 0) continue; - - // Determine how much flow shall be used. - double ew = getEdgeWeight(getEdge(P[Dest],Dest)); - if (ew != MissingValue) { - ew = ew Processed; - for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); - NBB != End; ++NBB) { - if (Processed.insert(*NBB).second) { - Edge e = getEdge(*NBB, BB); - double ew = getEdgeWeight(e); - if (ew != MissingValue) { - iw += ew; - } - } - } - setEdgeWeight(e,iw * 10); - FoundPath = true; - } - } - } - if (FoundPath) continue; - - // Determine backedges, set them to zero. - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE && !FoundPath) { - const BasicBlock *BB = *FI; ++FI; - const BasicBlock *Dest = 0; - Path P; - bool BackEdgeFound = false; - for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); - NBB != End; ++NBB) { - Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges); - if (Dest == *NBB) { - BackEdgeFound = true; - break; - } - } - if (BackEdgeFound) { - Edge e = getEdge(Dest,BB); - double w = getEdgeWeight(e); - if (w == MissingValue) { - setEdgeWeight(e,0); - FoundPath = true; - } - do { - Edge e = getEdge(P[Dest], Dest); - double w = getEdgeWeight(e); - if (w == MissingValue) { - setEdgeWeight(e,0); - FoundPath = true; - } - Dest = P[Dest]; - } while (Dest != BB); - } - } - if (FoundPath) continue; - - // Channel flow to return block. - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE && !FoundPath) { - const BasicBlock *BB = *FI; ++FI; - - Path P; - const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges); - Dest = P[(const BasicBlock*)0]; - if (!Dest) continue; - - if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) { - // Calculate incoming flow. - double iw = 0; - std::set Processed; - for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); - NBB != End; ++NBB) { - if (Processed.insert(*NBB).second) { - Edge e = getEdge(*NBB, BB); - double ew = getEdgeWeight(e); - if (ew != MissingValue) { - iw += ew; - } - } - } - do { - Edge e = getEdge(P[Dest], Dest); - double w = getEdgeWeight(e); - if (w == MissingValue) { - setEdgeWeight(e,iw); - FoundPath = true; - } else { - assert(0 && "Edge should not have value already!"); - } - Dest = P[Dest]; - } while (Dest != BB); - } - } - if (FoundPath) continue; - - // Speculatively set edges to zero. - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE && !FoundPath) { - const BasicBlock *BB = *FI; ++FI; - - for (const_pred_iterator NBB = pred_begin(BB), End = pred_end(BB); - NBB != End; ++NBB) { - Edge e = getEdge(*NBB,BB); - double w = getEdgeWeight(e); - if (w == MissingValue) { - setEdgeWeight(e,0); - FoundPath = true; - break; - } - } - } - if (FoundPath) continue; - - errs() << "{"; - FI = Unvisited.begin(), FE = Unvisited.end(); - while(FI != FE) { - const BasicBlock *BB = *FI; ++FI; - dbgs() << BB->getName(); - if (FI != FE) - dbgs() << ","; - } - errs() << "}"; - - errs() << "ASSERT: could not repair function"; - assert(0 && "could not repair function"); - } - - EdgeWeights J = EdgeInformation[F]; - for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) { - Edge e = EI->first; - - bool SuccFound = false; - if (e.first != 0) { - succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first); - if (NBB == End) { - if (0 == e.second) { - SuccFound = true; - } - } - for (;NBB != End; ++NBB) { - if (*NBB == e.second) { - SuccFound = true; - break; - } - } - if (!SuccFound) { - removeEdge(e); - } - } - } -} - -raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) { - return O << MF->getFunction()->getName() << "(MF)"; -} - -raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) { - return O << MBB->getBasicBlock()->getName() << "(MB)"; -} - -raw_ostream& operator<<(raw_ostream &O, std::pair E) { - O << "("; - - if (E.first) - O << E.first; - else - O << "0"; - - O << ","; - - if (E.second) - O << E.second; - else - O << "0"; - - return O << ")"; -} - -} // namespace llvm - -//===----------------------------------------------------------------------===// -// NoProfile ProfileInfo implementation -// - -namespace { - struct NoProfileInfo : public ImmutablePass, public ProfileInfo { - static char ID; // Class identification, replacement for typeinfo - NoProfileInfo() : ImmutablePass(ID) { - initializeNoProfileInfoPass(*PassRegistry::getPassRegistry()); - } - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &ProfileInfo::ID) - return (ProfileInfo*)this; - return this; - } - - virtual const char *getPassName() const { - return "NoProfileInfo"; - } - }; -} // End of anonymous namespace - -char NoProfileInfo::ID = 0; -// Register this pass... -INITIALIZE_AG_PASS(NoProfileInfo, ProfileInfo, "no-profile", - "No Profile Information", false, true, true) - -ImmutablePass *llvm::createNoProfileInfoPass() { return new NoProfileInfo(); } diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp deleted file mode 100644 index f1f3e940..0000000 --- a/contrib/llvm/lib/Analysis/ProfileInfoLoader.cpp +++ /dev/null @@ -1,155 +0,0 @@ -//===- ProfileInfoLoad.cpp - Load profile information from disk -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The ProfileInfoLoader class is used to load and represent profiling -// information read in from the dump file. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Analysis/ProfileInfoLoader.h" -#include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/raw_ostream.h" -#include -#include -using namespace llvm; - -// ByteSwap - Byteswap 'Var' if 'Really' is true. -// -static inline unsigned ByteSwap(unsigned Var, bool Really) { - if (!Really) return Var; - return ((Var & (255U<< 0U)) << 24U) | - ((Var & (255U<< 8U)) << 8U) | - ((Var & (255U<<16U)) >> 8U) | - ((Var & (255U<<24U)) >> 24U); -} - -static unsigned AddCounts(unsigned A, unsigned B) { - // If either value is undefined, use the other. - if (A == ProfileInfoLoader::Uncounted) return B; - if (B == ProfileInfoLoader::Uncounted) return A; - return A + B; -} - -static void ReadProfilingBlock(const char *ToolName, FILE *F, - bool ShouldByteSwap, - std::vector &Data) { - // Read the number of entries... - unsigned NumEntries; - if (fread(&NumEntries, sizeof(unsigned), 1, F) != 1) { - errs() << ToolName << ": data packet truncated!\n"; - perror(0); - exit(1); - } - NumEntries = ByteSwap(NumEntries, ShouldByteSwap); - - // Read the counts... - std::vector TempSpace(NumEntries); - - // Read in the block of data... - if (fread(&TempSpace[0], sizeof(unsigned)*NumEntries, 1, F) != 1) { - errs() << ToolName << ": data packet truncated!\n"; - perror(0); - exit(1); - } - - // Make sure we have enough space... The space is initialised to -1 to - // facitiltate the loading of missing values for OptimalEdgeProfiling. - if (Data.size() < NumEntries) - Data.resize(NumEntries, ProfileInfoLoader::Uncounted); - - // Accumulate the data we just read into the data. - if (!ShouldByteSwap) { - for (unsigned i = 0; i != NumEntries; ++i) { - Data[i] = AddCounts(TempSpace[i], Data[i]); - } - } else { - for (unsigned i = 0; i != NumEntries; ++i) { - Data[i] = AddCounts(ByteSwap(TempSpace[i], true), Data[i]); - } - } -} - -const unsigned ProfileInfoLoader::Uncounted = ~0U; - -// ProfileInfoLoader ctor - Read the specified profiling data file, exiting the -// program if the file is invalid or broken. -// -ProfileInfoLoader::ProfileInfoLoader(const char *ToolName, - const std::string &Filename) - : Filename(Filename) { - FILE *F = fopen(Filename.c_str(), "rb"); - if (F == 0) { - errs() << ToolName << ": Error opening '" << Filename << "': "; - perror(0); - exit(1); - } - - // Keep reading packets until we run out of them. - unsigned PacketType; - while (fread(&PacketType, sizeof(unsigned), 1, F) == 1) { - // If the low eight bits of the packet are zero, we must be dealing with an - // endianness mismatch. Byteswap all words read from the profiling - // information. - bool ShouldByteSwap = (char)PacketType == 0; - PacketType = ByteSwap(PacketType, ShouldByteSwap); - - switch (PacketType) { - case ArgumentInfo: { - unsigned ArgLength; - if (fread(&ArgLength, sizeof(unsigned), 1, F) != 1) { - errs() << ToolName << ": arguments packet truncated!\n"; - perror(0); - exit(1); - } - ArgLength = ByteSwap(ArgLength, ShouldByteSwap); - - // Read in the arguments... - std::vector Chars(ArgLength+4); - - if (ArgLength) - if (fread(&Chars[0], (ArgLength+3) & ~3, 1, F) != 1) { - errs() << ToolName << ": arguments packet truncated!\n"; - perror(0); - exit(1); - } - CommandLines.push_back(std::string(&Chars[0], &Chars[ArgLength])); - break; - } - - case FunctionInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, FunctionCounts); - break; - - case BlockInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, BlockCounts); - break; - - case EdgeInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, EdgeCounts); - break; - - case OptEdgeInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, OptimalEdgeCounts); - break; - - case BBTraceInfo: - ReadProfilingBlock(ToolName, F, ShouldByteSwap, BBTrace); - break; - - default: - errs() << ToolName << ": Unknown packet type #" << PacketType << "!\n"; - exit(1); - } - } - - fclose(F); -} - diff --git a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp b/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp deleted file mode 100644 index 346f8d6..0000000 --- a/contrib/llvm/lib/Analysis/ProfileInfoLoaderPass.cpp +++ /dev/null @@ -1,267 +0,0 @@ -//===- ProfileInfoLoaderPass.cpp - LLVM Pass to load profile info ---------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a concrete implementation of profiling information that -// loads the information from a profile dump file. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "profile-loader" -#include "llvm/Analysis/Passes.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Analysis/ProfileInfoLoader.h" -#include "llvm/IR/BasicBlock.h" -#include "llvm/IR/InstrTypes.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/raw_ostream.h" -#include -using namespace llvm; - -STATISTIC(NumEdgesRead, "The # of edges read."); - -static cl::opt -ProfileInfoFilename("profile-info-file", cl::init("llvmprof.out"), - cl::value_desc("filename"), - cl::desc("Profile file loaded by -profile-loader")); - -namespace { - class LoaderPass : public ModulePass, public ProfileInfo { - std::string Filename; - std::set SpanningTree; - std::set BBisUnvisited; - unsigned ReadCount; - public: - static char ID; // Class identification, replacement for typeinfo - explicit LoaderPass(const std::string &filename = "") - : ModulePass(ID), Filename(filename) { - initializeLoaderPassPass(*PassRegistry::getPassRegistry()); - if (filename.empty()) Filename = ProfileInfoFilename; - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - - virtual const char *getPassName() const { - return "Profiling information loader"; - } - - // recurseBasicBlock() - Calculates the edge weights for as much basic - // blocks as possbile. - virtual void recurseBasicBlock(const BasicBlock *BB); - virtual void readEdgeOrRemember(Edge, Edge&, unsigned &, double &); - virtual void readEdge(ProfileInfo::Edge, std::vector&); - - /// getAdjustedAnalysisPointer - This method is used when a pass implements - /// an analysis interface through multiple inheritance. If needed, it - /// should override this to adjust the this pointer as needed for the - /// specified pass info. - virtual void *getAdjustedAnalysisPointer(AnalysisID PI) { - if (PI == &ProfileInfo::ID) - return (ProfileInfo*)this; - return this; - } - - /// run - Load the profile information from the specified file. - virtual bool runOnModule(Module &M); - }; -} // End of anonymous namespace - -char LoaderPass::ID = 0; -INITIALIZE_AG_PASS(LoaderPass, ProfileInfo, "profile-loader", - "Load profile information from llvmprof.out", false, true, false) - -char &llvm::ProfileLoaderPassID = LoaderPass::ID; - -ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); } - -/// createProfileLoaderPass - This function returns a Pass that loads the -/// profiling information for the module from the specified filename, making it -/// available to the optimizers. -Pass *llvm::createProfileLoaderPass(const std::string &Filename) { - return new LoaderPass(Filename); -} - -void LoaderPass::readEdgeOrRemember(Edge edge, Edge &tocalc, - unsigned &uncalc, double &count) { - double w; - if ((w = getEdgeWeight(edge)) == MissingValue) { - tocalc = edge; - uncalc++; - } else { - count+=w; - } -} - -// recurseBasicBlock - Visits all neighbours of a block and then tries to -// calculate the missing edge values. -void LoaderPass::recurseBasicBlock(const BasicBlock *BB) { - - // break recursion if already visited - if (BBisUnvisited.find(BB) == BBisUnvisited.end()) return; - BBisUnvisited.erase(BB); - if (!BB) return; - - for (succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi) { - recurseBasicBlock(*bbi); - } - for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - bbi != bbe; ++bbi) { - recurseBasicBlock(*bbi); - } - - Edge tocalc; - if (CalculateMissingEdge(BB, tocalc)) { - SpanningTree.erase(tocalc); - } -} - -void LoaderPass::readEdge(ProfileInfo::Edge e, - std::vector &ECs) { - if (ReadCount < ECs.size()) { - double weight = ECs[ReadCount++]; - if (weight != ProfileInfoLoader::Uncounted) { - // Here the data realm changes from the unsigned of the file to the - // double of the ProfileInfo. This conversion is save because we know - // that everything thats representable in unsinged is also representable - // in double. - EdgeInformation[getFunction(e)][e] += (double)weight; - - DEBUG(dbgs() << "--Read Edge Counter for " << e - << " (# "<< (ReadCount-1) << "): " - << (unsigned)getEdgeWeight(e) << "\n"); - } else { - // This happens only if reading optimal profiling information, not when - // reading regular profiling information. - SpanningTree.insert(e); - } - } -} - -bool LoaderPass::runOnModule(Module &M) { - ProfileInfoLoader PIL("profile-loader", Filename); - - EdgeInformation.clear(); - std::vector Counters = PIL.getRawEdgeCounts(); - if (Counters.size() > 0) { - ReadCount = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Working on " << F->getName() << "\n"); - readEdge(getEdge(0,&F->getEntryBlock()), Counters); - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { - readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); - } - } - } - if (ReadCount != Counters.size()) { - errs() << "WARNING: profile information is inconsistent with " - << "the current program!\n"; - } - NumEdgesRead = ReadCount; - } - - Counters = PIL.getRawOptimalEdgeCounts(); - if (Counters.size() > 0) { - ReadCount = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - DEBUG(dbgs() << "Working on " << F->getName() << "\n"); - readEdge(getEdge(0,&F->getEntryBlock()), Counters); - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { - TerminatorInst *TI = BB->getTerminator(); - if (TI->getNumSuccessors() == 0) { - readEdge(getEdge(BB,0), Counters); - } - for (unsigned s = 0, e = TI->getNumSuccessors(); s != e; ++s) { - readEdge(getEdge(BB,TI->getSuccessor(s)), Counters); - } - } - while (SpanningTree.size() > 0) { - - unsigned size = SpanningTree.size(); - - BBisUnvisited.clear(); - for (std::set::iterator ei = SpanningTree.begin(), - ee = SpanningTree.end(); ei != ee; ++ei) { - BBisUnvisited.insert(ei->first); - BBisUnvisited.insert(ei->second); - } - while (BBisUnvisited.size() > 0) { - recurseBasicBlock(*BBisUnvisited.begin()); - } - - if (SpanningTree.size() == size) { - DEBUG(dbgs()<<"{"); - for (std::set::iterator ei = SpanningTree.begin(), - ee = SpanningTree.end(); ei != ee; ++ei) { - DEBUG(dbgs()<< *ei <<","); - } - assert(0 && "No edge calculated!"); - } - - } - } - if (ReadCount != Counters.size()) { - errs() << "WARNING: profile information is inconsistent with " - << "the current program!\n"; - } - NumEdgesRead = ReadCount; - } - - BlockInformation.clear(); - Counters = PIL.getRawBlockCounts(); - if (Counters.size() > 0) { - ReadCount = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - if (ReadCount < Counters.size()) - // Here the data realm changes from the unsigned of the file to the - // double of the ProfileInfo. This conversion is save because we know - // that everything thats representable in unsinged is also - // representable in double. - BlockInformation[F][BB] = (double)Counters[ReadCount++]; - } - if (ReadCount != Counters.size()) { - errs() << "WARNING: profile information is inconsistent with " - << "the current program!\n"; - } - } - - FunctionInformation.clear(); - Counters = PIL.getRawFunctionCounts(); - if (Counters.size() > 0) { - ReadCount = 0; - for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { - if (F->isDeclaration()) continue; - if (ReadCount < Counters.size()) - // Here the data realm changes from the unsigned of the file to the - // double of the ProfileInfo. This conversion is save because we know - // that everything thats representable in unsinged is also - // representable in double. - FunctionInformation[F] = (double)Counters[ReadCount++]; - } - if (ReadCount != Counters.size()) { - errs() << "WARNING: profile information is inconsistent with " - << "the current program!\n"; - } - } - - return false; -} diff --git a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp b/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp deleted file mode 100644 index c8896de..0000000 --- a/contrib/llvm/lib/Analysis/ProfileVerifierPass.cpp +++ /dev/null @@ -1,383 +0,0 @@ -//===- ProfileVerifierPass.cpp - LLVM Pass to estimate profile info -------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass that checks profiling information for -// plausibility. -// -//===----------------------------------------------------------------------===// -#define DEBUG_TYPE "profile-verifier" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ProfileInfo.h" -#include "llvm/IR/Instructions.h" -#include "llvm/IR/Module.h" -#include "llvm/Pass.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/InstIterator.h" -#include "llvm/Support/raw_ostream.h" -#include -using namespace llvm; - -static cl::opt -ProfileVerifierDisableAssertions("profile-verifier-noassert", - cl::desc("Disable assertions")); - -namespace { - template - class ProfileVerifierPassT : public FunctionPass { - - struct DetailedBlockInfo { - const BType *BB; - double BBWeight; - double inWeight; - int inCount; - double outWeight; - int outCount; - }; - - ProfileInfoT *PI; - std::set BBisVisited; - std::set FisVisited; - bool DisableAssertions; - - // When debugging is enabled, the verifier prints a whole slew of debug - // information, otherwise its just the assert. These are all the helper - // functions. - bool PrintedDebugTree; - std::set BBisPrinted; - void debugEntry(DetailedBlockInfo*); - void printDebugInfo(const BType *BB); - - public: - static char ID; // Class identification, replacement for typeinfo - - explicit ProfileVerifierPassT () : FunctionPass(ID) { - initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry()); - DisableAssertions = ProfileVerifierDisableAssertions; - } - explicit ProfileVerifierPassT (bool da) : FunctionPass(ID), - DisableAssertions(da) { - initializeProfileVerifierPassPass(*PassRegistry::getPassRegistry()); - } - - void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired >(); - } - - const char *getPassName() const { - return "Profiling information verifier"; - } - - /// run - Verify the profile information. - bool runOnFunction(FType &F); - void recurseBasicBlock(const BType*); - - bool exitReachable(const FType*); - double ReadOrAssert(typename ProfileInfoT::Edge); - void CheckValue(bool, const char*, DetailedBlockInfo*); - }; - - typedef ProfileVerifierPassT ProfileVerifierPass; - - template - void ProfileVerifierPassT::printDebugInfo(const BType *BB) { - - if (BBisPrinted.find(BB) != BBisPrinted.end()) return; - - double BBWeight = PI->getExecutionCount(BB); - if (BBWeight == ProfileInfoT::MissingValue) { BBWeight = 0; } - double inWeight = 0; - int inCount = 0; - std::set ProcessedPreds; - for (const_pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB); - bbi != bbe; ++bbi ) { - if (ProcessedPreds.insert(*bbi).second) { - typename ProfileInfoT::Edge E = PI->getEdge(*bbi,BB); - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfoT::MissingValue) { EdgeWeight = 0; } - dbgs() << "calculated in-edge " << E << ": " - << format("%20.20g",EdgeWeight) << "\n"; - inWeight += EdgeWeight; - inCount++; - } - } - double outWeight = 0; - int outCount = 0; - std::set ProcessedSuccs; - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - if (ProcessedSuccs.insert(*bbi).second) { - typename ProfileInfoT::Edge E = PI->getEdge(BB,*bbi); - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfoT::MissingValue) { EdgeWeight = 0; } - dbgs() << "calculated out-edge " << E << ": " - << format("%20.20g",EdgeWeight) << "\n"; - outWeight += EdgeWeight; - outCount++; - } - } - dbgs() << "Block " << BB->getName() << " in " - << BB->getParent()->getName() << ":" - << "BBWeight=" << format("%20.20g",BBWeight) << "," - << "inWeight=" << format("%20.20g",inWeight) << "," - << "inCount=" << inCount << "," - << "outWeight=" << format("%20.20g",outWeight) << "," - << "outCount" << outCount << "\n"; - - // mark as visited and recurse into subnodes - BBisPrinted.insert(BB); - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - printDebugInfo(*bbi); - } - } - - template - void ProfileVerifierPassT::debugEntry (DetailedBlockInfo *DI) { - dbgs() << "TROUBLE: Block " << DI->BB->getName() << " in " - << DI->BB->getParent()->getName() << ":" - << "BBWeight=" << format("%20.20g",DI->BBWeight) << "," - << "inWeight=" << format("%20.20g",DI->inWeight) << "," - << "inCount=" << DI->inCount << "," - << "outWeight=" << format("%20.20g",DI->outWeight) << "," - << "outCount=" << DI->outCount << "\n"; - if (!PrintedDebugTree) { - PrintedDebugTree = true; - printDebugInfo(&(DI->BB->getParent()->getEntryBlock())); - } - } - - // This compares A and B for equality. - static bool Equals(double A, double B) { - return A == B; - } - - // This checks if the function "exit" is reachable from an given function - // via calls, this is necessary to check if a profile is valid despite the - // counts not fitting exactly. - template - bool ProfileVerifierPassT::exitReachable(const FType *F) { - if (!F) return false; - - if (FisVisited.count(F)) return false; - - FType *Exit = F->getParent()->getFunction("exit"); - if (Exit == F) { - return true; - } - - FisVisited.insert(F); - bool exits = false; - for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { - if (const CallInst *CI = dyn_cast(&*I)) { - FType *F = CI->getCalledFunction(); - if (F) { - exits |= exitReachable(F); - } else { - // This is a call to a pointer, all bets are off... - exits = true; - } - if (exits) break; - } - } - return exits; - } - - #define ASSERTMESSAGE(M) \ - { dbgs() << "ASSERT:" << (M) << "\n"; \ - if (!DisableAssertions) assert(0 && (M)); } - - template - double ProfileVerifierPassT::ReadOrAssert(typename ProfileInfoT::Edge E) { - double EdgeWeight = PI->getEdgeWeight(E); - if (EdgeWeight == ProfileInfoT::MissingValue) { - dbgs() << "Edge " << E << " in Function " - << ProfileInfoT::getFunction(E)->getName() << ": "; - ASSERTMESSAGE("Edge has missing value"); - return 0; - } else { - if (EdgeWeight < 0) { - dbgs() << "Edge " << E << " in Function " - << ProfileInfoT::getFunction(E)->getName() << ": "; - ASSERTMESSAGE("Edge has negative value"); - } - return EdgeWeight; - } - } - - template - void ProfileVerifierPassT::CheckValue(bool Error, - const char *Message, - DetailedBlockInfo *DI) { - if (Error) { - DEBUG(debugEntry(DI)); - dbgs() << "Block " << DI->BB->getName() << " in Function " - << DI->BB->getParent()->getName() << ": "; - ASSERTMESSAGE(Message); - } - return; - } - - // This calculates the Information for a block and then recurses into the - // successors. - template - void ProfileVerifierPassT::recurseBasicBlock(const BType *BB) { - - // Break the recursion by remembering all visited blocks. - if (BBisVisited.find(BB) != BBisVisited.end()) return; - - // Use a data structure to store all the information, this can then be handed - // to debug printers. - DetailedBlockInfo DI; - DI.BB = BB; - DI.outCount = DI.inCount = 0; - DI.inWeight = DI.outWeight = 0; - - // Read predecessors. - std::set ProcessedPreds; - const_pred_iterator bpi = pred_begin(BB), bpe = pred_end(BB); - // If there are none, check for (0,BB) edge. - if (bpi == bpe) { - DI.inWeight += ReadOrAssert(PI->getEdge(0,BB)); - DI.inCount++; - } - for (;bpi != bpe; ++bpi) { - if (ProcessedPreds.insert(*bpi).second) { - DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB)); - DI.inCount++; - } - } - - // Read successors. - std::set ProcessedSuccs; - succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - // If there is an (0,BB) edge, consider it too. (This is done not only when - // there are no successors, but every time; not every function contains - // return blocks with no successors (think loop latch as return block)). - double w = PI->getEdgeWeight(PI->getEdge(BB,0)); - if (w != ProfileInfoT::MissingValue) { - DI.outWeight += w; - DI.outCount++; - } - for (;bbi != bbe; ++bbi) { - if (ProcessedSuccs.insert(*bbi).second) { - DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi)); - DI.outCount++; - } - } - - // Read block weight. - DI.BBWeight = PI->getExecutionCount(BB); - CheckValue(DI.BBWeight == ProfileInfoT::MissingValue, - "BasicBlock has missing value", &DI); - CheckValue(DI.BBWeight < 0, - "BasicBlock has negative value", &DI); - - // Check if this block is a setjmp target. - bool isSetJmpTarget = false; - if (DI.outWeight > DI.inWeight) { - for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); - i != ie; ++i) { - if (const CallInst *CI = dyn_cast(&*i)) { - FType *F = CI->getCalledFunction(); - if (F && (F->getName() == "_setjmp")) { - isSetJmpTarget = true; break; - } - } - } - } - // Check if this block is eventually reaching exit. - bool isExitReachable = false; - if (DI.inWeight > DI.outWeight) { - for (typename BType::const_iterator i = BB->begin(), ie = BB->end(); - i != ie; ++i) { - if (const CallInst *CI = dyn_cast(&*i)) { - FType *F = CI->getCalledFunction(); - if (F) { - FisVisited.clear(); - isExitReachable |= exitReachable(F); - } else { - // This is a call to a pointer, all bets are off... - isExitReachable = true; - } - if (isExitReachable) break; - } - } - } - - if (DI.inCount > 0 && DI.outCount == 0) { - // If this is a block with no successors. - if (!isSetJmpTarget) { - CheckValue(!Equals(DI.inWeight,DI.BBWeight), - "inWeight and BBWeight do not match", &DI); - } - } else if (DI.inCount == 0 && DI.outCount > 0) { - // If this is a block with no predecessors. - if (!isExitReachable) - CheckValue(!Equals(DI.BBWeight,DI.outWeight), - "BBWeight and outWeight do not match", &DI); - } else { - // If this block has successors and predecessors. - if (DI.inWeight > DI.outWeight && !isExitReachable) - CheckValue(!Equals(DI.inWeight,DI.outWeight), - "inWeight and outWeight do not match", &DI); - if (DI.inWeight < DI.outWeight && !isSetJmpTarget) - CheckValue(!Equals(DI.inWeight,DI.outWeight), - "inWeight and outWeight do not match", &DI); - } - - - // Mark this block as visited, rescurse into successors. - BBisVisited.insert(BB); - for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB); - bbi != bbe; ++bbi ) { - recurseBasicBlock(*bbi); - } - } - - template - bool ProfileVerifierPassT::runOnFunction(FType &F) { - PI = getAnalysisIfAvailable >(); - if (!PI) - ASSERTMESSAGE("No ProfileInfo available"); - - // Prepare global variables. - PrintedDebugTree = false; - BBisVisited.clear(); - - // Fetch entry block and recurse into it. - const BType *entry = &F.getEntryBlock(); - recurseBasicBlock(entry); - - if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry)) - ASSERTMESSAGE("Function count and entry block count do not match"); - - return false; - } - - template - char ProfileVerifierPassT::ID = 0; -} - -INITIALIZE_PASS_BEGIN(ProfileVerifierPass, "profile-verifier", - "Verify profiling information", false, true) -INITIALIZE_AG_DEPENDENCY(ProfileInfo) -INITIALIZE_PASS_END(ProfileVerifierPass, "profile-verifier", - "Verify profiling information", false, true) - -namespace llvm { - FunctionPass *createProfileVerifierPass() { - return new ProfileVerifierPass(ProfileVerifierDisableAssertions); - } -} - diff --git a/contrib/llvm/lib/Analysis/RegionInfo.cpp b/contrib/llvm/lib/Analysis/RegionInfo.cpp index 8577025..5635688 100644 --- a/contrib/llvm/lib/Analysis/RegionInfo.cpp +++ b/contrib/llvm/lib/Analysis/RegionInfo.cpp @@ -9,6 +9,7 @@ // Detects single entry single exit regions in the control flow graph. //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "region" #include "llvm/Analysis/RegionInfo.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" @@ -17,12 +18,9 @@ #include "llvm/Assembly/Writer.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" - -#define DEBUG_TYPE "region" #include "llvm/Support/Debug.h" - -#include #include +#include using namespace llvm; diff --git a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp index f876748..0a02f4e 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolution.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolution.cpp @@ -585,6 +585,9 @@ namespace { // Lexicographically compare n-ary expressions. unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; + for (unsigned i = 0; i != LNumOps; ++i) { if (i >= RNumOps) return 1; @@ -758,7 +761,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K, unsigned CalculationBits = W + T; // Calculate 2^T, at width T+W. - APInt DivFactor = APInt(CalculationBits, 1).shl(T); + APInt DivFactor = APInt::getOneBitSet(CalculationBits, T); // Calculate the multiplicative inverse of K! / 2^T; // this multiplication factor will perform the exact division by @@ -1380,7 +1383,7 @@ const SCEV *ScalarEvolution::getAnyExtendExpr(const SCEV *Op, /// static bool CollectAddOperandsWithScales(DenseMap &M, - SmallVector &NewOps, + SmallVectorImpl &NewOps, APInt &AccumulatedConstant, const SCEV *const *Ops, size_t NumOperands, const APInt &Scale, @@ -1628,7 +1631,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. std::map, APIntCompare> MulOpLists; - for (SmallVector::const_iterator I = NewOps.begin(), + for (SmallVectorImpl::const_iterator I = NewOps.begin(), E = NewOps.end(); I != E; ++I) MulOpLists[M.find(*I)->second].push_back(*I); // Re-generate the operands list. @@ -2587,55 +2590,39 @@ const SCEV *ScalarEvolution::getUMinExpr(const SCEV *LHS, return getNotSCEV(getUMaxExpr(getNotSCEV(LHS), getNotSCEV(RHS))); } -const SCEV *ScalarEvolution::getSizeOfExpr(Type *AllocTy) { +const SCEV *ScalarEvolution::getSizeOfExpr(Type *IntTy, Type *AllocTy) { // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. if (TD) - return getConstant(TD->getIntPtrType(getContext()), - TD->getTypeAllocSize(AllocTy)); + return getConstant(IntTy, TD->getTypeAllocSize(AllocTy)); Constant *C = ConstantExpr::getSizeOf(AllocTy); if (ConstantExpr *CE = dyn_cast(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); + assert(Ty == IntTy && "Effective SCEV type doesn't match"); return getTruncateOrZeroExtend(getSCEV(C), Ty); } -const SCEV *ScalarEvolution::getAlignOfExpr(Type *AllocTy) { - Constant *C = ConstantExpr::getAlignOf(AllocTy); - if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) - C = Folded; - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(AllocTy)); - return getTruncateOrZeroExtend(getSCEV(C), Ty); -} - -const SCEV *ScalarEvolution::getOffsetOfExpr(StructType *STy, +const SCEV *ScalarEvolution::getOffsetOfExpr(Type *IntTy, + StructType *STy, unsigned FieldNo) { // If we have DataLayout, we can bypass creating a target-independent // constant expression and then folding it back into a ConstantInt. // This is just a compile-time optimization. - if (TD) - return getConstant(TD->getIntPtrType(getContext()), + if (TD) { + return getConstant(IntTy, TD->getStructLayout(STy)->getElementOffset(FieldNo)); + } Constant *C = ConstantExpr::getOffsetOf(STy, FieldNo); if (ConstantExpr *CE = dyn_cast(C)) if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) C = Folded; - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); - return getTruncateOrZeroExtend(getSCEV(C), Ty); -} -const SCEV *ScalarEvolution::getOffsetOfExpr(Type *CTy, - Constant *FieldNo) { - Constant *C = ConstantExpr::getOffsetOf(CTy, FieldNo); - if (ConstantExpr *CE = dyn_cast(C)) - if (Constant *Folded = ConstantFoldConstantExpression(CE, TD, TLI)) - C = Folded; - Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(CTy)); + Type *Ty = getEffectiveSCEVType(PointerType::getUnqual(STy)); return getTruncateOrZeroExtend(getSCEV(C), Ty); } @@ -2700,12 +2687,15 @@ uint64_t ScalarEvolution::getTypeSizeInBits(Type *Ty) const { Type *ScalarEvolution::getEffectiveSCEVType(Type *Ty) const { assert(isSCEVable(Ty) && "Type is not SCEVable!"); - if (Ty->isIntegerTy()) + if (Ty->isIntegerTy()) { return Ty; + } // The only other support type is pointer. assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!"); - if (TD) return TD->getIntPtrType(getContext()); + + if (TD) + return TD->getIntPtrType(Ty); // Without DataLayout, conservatively assume pointers are 64-bit. return Type::getInt64Ty(getContext()); @@ -2715,13 +2705,51 @@ const SCEV *ScalarEvolution::getCouldNotCompute() { return &CouldNotCompute; } +namespace { + // Helper class working with SCEVTraversal to figure out if a SCEV contains + // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne + // is set iff if find such SCEVUnknown. + // + struct FindInvalidSCEVUnknown { + bool FindOne; + FindInvalidSCEVUnknown() { FindOne = false; } + bool follow(const SCEV *S) { + switch (S->getSCEVType()) { + case scConstant: + return false; + case scUnknown: + if (!cast(S)->getValue()) + FindOne = true; + return false; + default: + return true; + } + } + bool isDone() const { return FindOne; } + }; +} + +bool ScalarEvolution::checkValidity(const SCEV *S) const { + FindInvalidSCEVUnknown F; + SCEVTraversal ST(F); + ST.visitAll(S); + + return !F.FindOne; +} + /// getSCEV - Return an existing SCEV if it exists, otherwise analyze the /// expression and create a new one. const SCEV *ScalarEvolution::getSCEV(Value *V) { assert(isSCEVable(V->getType()) && "Value is not SCEVable!"); - ValueExprMapType::const_iterator I = ValueExprMap.find_as(V); - if (I != ValueExprMap.end()) return I->second; + ValueExprMapType::iterator I = ValueExprMap.find_as(V); + if (I != ValueExprMap.end()) { + const SCEV *S = I->second; + if (checkValidity(S)) + return S; + else + ValueExprMap.erase(I); + } const SCEV *S = createSCEV(V); // The process of creating a SCEV for V may have caused other SCEVs @@ -3060,15 +3088,26 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { Flags = setFlags(Flags, SCEV::FlagNUW); if (OBO->hasNoSignedWrap()) Flags = setFlags(Flags, SCEV::FlagNSW); - } else if (const GEPOperator *GEP = - dyn_cast(BEValueV)) { + } else if (GEPOperator *GEP = dyn_cast(BEValueV)) { // If the increment is an inbounds GEP, then we know the address // space cannot be wrapped around. We cannot make any guarantee // about signed or unsigned overflow because pointers are // unsigned but we may have a negative index from the base - // pointer. - if (GEP->isInBounds()) + // pointer. We can guarantee that no unsigned wrap occurs if the + // indices form a positive value. + if (GEP->isInBounds()) { Flags = setFlags(Flags, SCEV::FlagNW); + + const SCEV *Ptr = getSCEV(GEP->getPointerOperand()); + if (isKnownPositive(getMinusSCEV(getSCEV(GEP), Ptr))) + Flags = setFlags(Flags, SCEV::FlagNUW); + } + } else if (const SubOperator *OBO = + dyn_cast(BEValueV)) { + if (OBO->hasNoUnsignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNUW); + if (OBO->hasNoSignedWrap()) + Flags = setFlags(Flags, SCEV::FlagNSW); } const SCEV *StartVal = getSCEV(StartValueV); @@ -3136,18 +3175,18 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) { /// operations. This allows them to be analyzed by regular SCEV code. /// const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { + Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); + Value *Base = GEP->getOperand(0); + // Don't attempt to analyze GEPs over unsized objects. + if (!Base->getType()->getPointerElementType()->isSized()) + return getUnknown(GEP); // Don't blindly transfer the inbounds flag from the GEP instruction to the // Add expression, because the Instruction may be guarded by control flow // and the no-overflow bits may not be valid for the expression in any // context. - bool isInBounds = GEP->isInBounds(); + SCEV::NoWrapFlags Wrap = GEP->isInBounds() ? SCEV::FlagNSW : SCEV::FlagAnyWrap; - Type *IntPtrTy = getEffectiveSCEVType(GEP->getType()); - Value *Base = GEP->getOperand(0); - // Don't attempt to analyze GEPs over unsized objects. - if (!cast(Base->getType())->getElementType()->isSized()) - return getUnknown(GEP); const SCEV *TotalOffset = getConstant(IntPtrTy, 0); gep_type_iterator GTI = gep_type_begin(GEP); for (GetElementPtrInst::op_iterator I = llvm::next(GEP->op_begin()), @@ -3158,21 +3197,19 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { if (StructType *STy = dyn_cast(*GTI++)) { // For a struct, add the member offset. unsigned FieldNo = cast(Index)->getZExtValue(); - const SCEV *FieldOffset = getOffsetOfExpr(STy, FieldNo); + const SCEV *FieldOffset = getOffsetOfExpr(IntPtrTy, STy, FieldNo); // Add the field offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, FieldOffset); } else { // For an array, add the element offset, explicitly scaled. - const SCEV *ElementSize = getSizeOfExpr(*GTI); + const SCEV *ElementSize = getSizeOfExpr(IntPtrTy, *GTI); const SCEV *IndexS = getSCEV(Index); // Getelementptr indices are signed. IndexS = getTruncateOrSignExtend(IndexS, IntPtrTy); // Multiply the index by the element size to compute the element offset. - const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, - isInBounds ? SCEV::FlagNSW : - SCEV::FlagAnyWrap); + const SCEV *LocalOffset = getMulExpr(IndexS, ElementSize, Wrap); // Add the element offset to the running total offset. TotalOffset = getAddExpr(TotalOffset, LocalOffset); @@ -3183,8 +3220,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) { const SCEV *BaseS = getSCEV(Base); // Add the total offset from all the GEP indices to the base. - return getAddExpr(BaseS, TotalOffset, - isInBounds ? SCEV::FlagNSW : SCEV::FlagAnyWrap); + return getAddExpr(BaseS, TotalOffset, Wrap); } /// GetMinTrailingZeros - Determine the minimum number of zero bits that S is @@ -3551,7 +3587,7 @@ ScalarEvolution::getSignedRange(const SCEV *S) { if (!U->getValue()->getType()->isIntegerTy() && !TD) return setSignedRange(U, ConservativeResult); unsigned NS = ComputeNumSignBits(U->getValue(), TD); - if (NS == 1) + if (NS <= 1) return setSignedRange(U, ConservativeResult); return setSignedRange(U, ConservativeResult.intersectWith( ConstantRange(APInt::getSignedMinValue(BitWidth).ashr(NS - 1), @@ -3751,7 +3787,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; Constant *X = ConstantInt::get(getContext(), - APInt(BitWidth, 1).shl(SA->getZExtValue())); + APInt::getOneBitSet(BitWidth, SA->getZExtValue())); return getMulExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } break; @@ -3769,7 +3805,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { break; Constant *X = ConstantInt::get(getContext(), - APInt(BitWidth, 1).shl(SA->getZExtValue())); + APInt::getOneBitSet(BitWidth, SA->getZExtValue())); return getUDivExpr(getSCEV(U->getOperand(0)), getSCEV(X)); } break; @@ -3947,7 +3983,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) { /// depends on a NSW assumption, and we would only fall back to a conservative /// trip count in that case. unsigned ScalarEvolution:: -getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) { +getSmallConstantTripCount(Loop *L, BasicBlock * /*ExitingBlock*/) { const SCEVConstant *ExitCount = dyn_cast(getBackedgeTakenCount(L)); if (!ExitCount) @@ -3976,7 +4012,7 @@ getSmallConstantTripCount(Loop *L, BasicBlock */*ExitingBlock*/) { /// As explained in the comments for getSmallConstantTripCount, this assumes /// that control exits the loop via ExitingBlock. unsigned ScalarEvolution:: -getSmallConstantTripMultiple(Loop *L, BasicBlock */*ExitingBlock*/) { +getSmallConstantTripMultiple(Loop *L, BasicBlock * /*ExitingBlock*/) { const SCEV *ExitCount = getBackedgeTakenCount(L); if (ExitCount == getCouldNotCompute()) return 1; @@ -4575,25 +4611,17 @@ ScalarEvolution::ComputeExitLimitFromICmp(const Loop *L, if (EL.hasAnyInfo()) return EL; break; } - case ICmpInst::ICMP_SLT: { - ExitLimit EL = HowManyLessThans(LHS, RHS, L, true, IsSubExpr); - if (EL.hasAnyInfo()) return EL; - break; - } - case ICmpInst::ICMP_SGT: { - ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), - getNotSCEV(RHS), L, true, IsSubExpr); - if (EL.hasAnyInfo()) return EL; - break; - } - case ICmpInst::ICMP_ULT: { - ExitLimit EL = HowManyLessThans(LHS, RHS, L, false, IsSubExpr); + case ICmpInst::ICMP_SLT: + case ICmpInst::ICMP_ULT: { // while (X < Y) + bool IsSigned = Cond == ICmpInst::ICMP_SLT; + ExitLimit EL = HowManyLessThans(LHS, RHS, L, IsSigned, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } - case ICmpInst::ICMP_UGT: { - ExitLimit EL = HowManyLessThans(getNotSCEV(LHS), - getNotSCEV(RHS), L, false, IsSubExpr); + case ICmpInst::ICMP_SGT: + case ICmpInst::ICMP_UGT: { // while (X > Y) + bool IsSigned = Cond == ICmpInst::ICMP_SGT; + ExitLimit EL = HowManyGreaterThans(LHS, RHS, L, IsSigned, IsSubExpr); if (EL.hasAnyInfo()) return EL; break; } @@ -5031,15 +5059,21 @@ const SCEV *ScalarEvolution::ComputeExitCountExhaustively(const Loop *L, /// original value V is returned. const SCEV *ScalarEvolution::getSCEVAtScope(const SCEV *V, const Loop *L) { // Check to see if we've folded this expression at this loop before. - std::map &Values = ValuesAtScopes[V]; - std::pair::iterator, bool> Pair = - Values.insert(std::make_pair(L, static_cast(0))); - if (!Pair.second) - return Pair.first->second ? Pair.first->second : V; - + SmallVector, 2> &Values = ValuesAtScopes[V]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == L) + return Values[u].second ? Values[u].second : V; + } + Values.push_back(std::make_pair(L, static_cast(0))); // Otherwise compute it. const SCEV *C = computeSCEVAtScope(V, L); - ValuesAtScopes[V][L] = C; + SmallVector, 2> &Values2 = ValuesAtScopes[V]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == L) { + Values2[u - 1].second = C; + break; + } + } return C; } @@ -5078,18 +5112,23 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { case scAddExpr: { const SCEVAddExpr *SA = cast(V); if (Constant *C = BuildConstantFromSCEV(SA->getOperand(0))) { - if (C->getType()->isPointerTy()) - C = ConstantExpr::getBitCast(C, Type::getInt8PtrTy(C->getContext())); + if (PointerType *PTy = dyn_cast(C->getType())) { + unsigned AS = PTy->getAddressSpace(); + Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); + C = ConstantExpr::getBitCast(C, DestPtrTy); + } for (unsigned i = 1, e = SA->getNumOperands(); i != e; ++i) { Constant *C2 = BuildConstantFromSCEV(SA->getOperand(i)); if (!C2) return 0; // First pointer! if (!C->getType()->isPointerTy() && C2->getType()->isPointerTy()) { + unsigned AS = C2->getType()->getPointerAddressSpace(); std::swap(C, C2); + Type *DestPtrTy = Type::getInt8PtrTy(C->getContext(), AS); // The offsets have been converted to bytes. We can add bytes to an // i8* by GEP with the byte count in the first index. - C = ConstantExpr::getBitCast(C,Type::getInt8PtrTy(C->getContext())); + C = ConstantExpr::getBitCast(C, DestPtrTy); } // Don't bother trying to sum two pointers. We probably can't @@ -5097,8 +5136,8 @@ static Constant *BuildConstantFromSCEV(const SCEV *V) { if (C2->getType()->isPointerTy()) return 0; - if (C->getType()->isPointerTy()) { - if (cast(C->getType())->getElementType()->isStructTy()) + if (PointerType *PTy = dyn_cast(C->getType())) { + if (PTy->getElementType()->isStructTy()) C2 = ConstantExpr::getIntegerCast( C2, Type::getInt32Ty(C->getContext()), true); C = ConstantExpr::getGetElementPtr(C, C2); @@ -6295,45 +6334,72 @@ ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred, return false; } -/// getBECount - Subtract the end and start values and divide by the step, -/// rounding up, to get the number of times the backedge is executed. Return -/// CouldNotCompute if an intermediate computation overflows. -const SCEV *ScalarEvolution::getBECount(const SCEV *Start, - const SCEV *End, - const SCEV *Step, - bool NoWrap) { - assert(!isKnownNegative(Step) && - "This code doesn't handle negative strides yet!"); - - Type *Ty = Start->getType(); - - // When Start == End, we have an exact BECount == 0. Short-circuit this case - // here because SCEV may not be able to determine that the unsigned division - // after rounding is zero. - if (Start == End) - return getConstant(Ty, 0); - - const SCEV *NegOne = getConstant(Ty, (uint64_t)-1); - const SCEV *Diff = getMinusSCEV(End, Start); - const SCEV *RoundUp = getAddExpr(Step, NegOne); - - // Add an adjustment to the difference between End and Start so that - // the division will effectively round up. - const SCEV *Add = getAddExpr(Diff, RoundUp); - - if (!NoWrap) { - // Check Add for unsigned overflow. - // TODO: More sophisticated things could be done here. - Type *WideTy = IntegerType::get(getContext(), - getTypeSizeInBits(Ty) + 1); - const SCEV *EDiff = getZeroExtendExpr(Diff, WideTy); - const SCEV *ERoundUp = getZeroExtendExpr(RoundUp, WideTy); - const SCEV *OperandExtendedAdd = getAddExpr(EDiff, ERoundUp); - if (getZeroExtendExpr(Add, WideTy) != OperandExtendedAdd) - return getCouldNotCompute(); +// Verify if an linear IV with positive stride can overflow when in a +// less-than comparison, knowing the invariant term of the comparison, the +// stride and the knowledge of NSW/NUW flags on the recurrence. +bool ScalarEvolution::doesIVOverflowOnLT(const SCEV *RHS, const SCEV *Stride, + bool IsSigned, bool NoWrap) { + if (NoWrap) return false; + + unsigned BitWidth = getTypeSizeInBits(RHS->getType()); + const SCEV *One = getConstant(Stride->getType(), 1); + + if (IsSigned) { + APInt MaxRHS = getSignedRange(RHS).getSignedMax(); + APInt MaxValue = APInt::getSignedMaxValue(BitWidth); + APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) + .getSignedMax(); + + // SMaxRHS + SMaxStrideMinusOne > SMaxValue => overflow! + return (MaxValue - MaxStrideMinusOne).slt(MaxRHS); } - return getUDivExpr(Add, Step); + APInt MaxRHS = getUnsignedRange(RHS).getUnsignedMax(); + APInt MaxValue = APInt::getMaxValue(BitWidth); + APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) + .getUnsignedMax(); + + // UMaxRHS + UMaxStrideMinusOne > UMaxValue => overflow! + return (MaxValue - MaxStrideMinusOne).ult(MaxRHS); +} + +// Verify if an linear IV with negative stride can overflow when in a +// greater-than comparison, knowing the invariant term of the comparison, +// the stride and the knowledge of NSW/NUW flags on the recurrence. +bool ScalarEvolution::doesIVOverflowOnGT(const SCEV *RHS, const SCEV *Stride, + bool IsSigned, bool NoWrap) { + if (NoWrap) return false; + + unsigned BitWidth = getTypeSizeInBits(RHS->getType()); + const SCEV *One = getConstant(Stride->getType(), 1); + + if (IsSigned) { + APInt MinRHS = getSignedRange(RHS).getSignedMin(); + APInt MinValue = APInt::getSignedMinValue(BitWidth); + APInt MaxStrideMinusOne = getSignedRange(getMinusSCEV(Stride, One)) + .getSignedMax(); + + // SMinRHS - SMaxStrideMinusOne < SMinValue => overflow! + return (MinValue + MaxStrideMinusOne).sgt(MinRHS); + } + + APInt MinRHS = getUnsignedRange(RHS).getUnsignedMin(); + APInt MinValue = APInt::getMinValue(BitWidth); + APInt MaxStrideMinusOne = getUnsignedRange(getMinusSCEV(Stride, One)) + .getUnsignedMax(); + + // UMinRHS - UMaxStrideMinusOne < UMinValue => overflow! + return (MinValue + MaxStrideMinusOne).ugt(MinRHS); +} + +// Compute the backedge taken count knowing the interval difference, the +// stride and presence of the equality in the comparison. +const SCEV *ScalarEvolution::computeBECount(const SCEV *Delta, const SCEV *Step, + bool Equality) { + const SCEV *One = getConstant(Step->getType(), 1); + Delta = Equality ? getAddExpr(Delta, Step) + : getAddExpr(Delta, getMinusSCEV(Step, One)); + return getUDivExpr(Delta, Step); } /// HowManyLessThans - Return the number of times a backedge containing the @@ -6345,119 +6411,144 @@ const SCEV *ScalarEvolution::getBECount(const SCEV *Start, /// a subexpression that cannot overflow before evaluating true. ScalarEvolution::ExitLimit ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS, - const Loop *L, bool isSigned, + const Loop *L, bool IsSigned, bool IsSubExpr) { - // Only handle: "ADDREC < LoopInvariant". - if (!isLoopInvariant(RHS, L)) return getCouldNotCompute(); + // We handle only IV < Invariant + if (!isLoopInvariant(RHS, L)) + return getCouldNotCompute(); - const SCEVAddRecExpr *AddRec = dyn_cast(LHS); - if (!AddRec || AddRec->getLoop() != L) + const SCEVAddRecExpr *IV = dyn_cast(LHS); + + // Avoid weird loops + if (!IV || IV->getLoop() != L || !IV->isAffine()) return getCouldNotCompute(); - // Check to see if we have a flag which makes analysis easy. - bool NoWrap = false; - if (!IsSubExpr) { - NoWrap = AddRec->getNoWrapFlags( - (SCEV::NoWrapFlags)(((isSigned ? SCEV::FlagNSW : SCEV::FlagNUW)) - | SCEV::FlagNW)); - } - if (AddRec->isAffine()) { - unsigned BitWidth = getTypeSizeInBits(AddRec->getType()); - const SCEV *Step = AddRec->getStepRecurrence(*this); + bool NoWrap = !IsSubExpr && + IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); - if (Step->isZero()) - return getCouldNotCompute(); - if (Step->isOne()) { - // With unit stride, the iteration never steps past the limit value. - } else if (isKnownPositive(Step)) { - // Test whether a positive iteration can step past the limit - // value and past the maximum value for its type in a single step. - // Note that it's not sufficient to check NoWrap here, because even - // though the value after a wrap is undefined, it's not undefined - // behavior, so if wrap does occur, the loop could either terminate or - // loop infinitely, but in either case, the loop is guaranteed to - // iterate at least until the iteration where the wrapping occurs. - const SCEV *One = getConstant(Step->getType(), 1); - if (isSigned) { - APInt Max = APInt::getSignedMaxValue(BitWidth); - if ((Max - getSignedRange(getMinusSCEV(Step, One)).getSignedMax()) - .slt(getSignedRange(RHS).getSignedMax())) - return getCouldNotCompute(); - } else { - APInt Max = APInt::getMaxValue(BitWidth); - if ((Max - getUnsignedRange(getMinusSCEV(Step, One)).getUnsignedMax()) - .ult(getUnsignedRange(RHS).getUnsignedMax())) - return getCouldNotCompute(); - } - } else - // TODO: Handle negative strides here and below. - return getCouldNotCompute(); + const SCEV *Stride = IV->getStepRecurrence(*this); - // We know the LHS is of the form {n,+,s} and the RHS is some loop-invariant - // m. So, we count the number of iterations in which {n,+,s} < m is true. - // Note that we cannot simply return max(m-n,0)/s because it's not safe to - // treat m-n as signed nor unsigned due to overflow possibility. - - // First, we get the value of the LHS in the first iteration: n - const SCEV *Start = AddRec->getOperand(0); - - // Determine the minimum constant start value. - const SCEV *MinStart = getConstant(isSigned ? - getSignedRange(Start).getSignedMin() : - getUnsignedRange(Start).getUnsignedMin()); - - // If we know that the condition is true in order to enter the loop, - // then we know that it will run exactly (m-n)/s times. Otherwise, we - // only know that it will execute (max(m,n)-n)/s times. In both cases, - // the division must round up. - const SCEV *End = RHS; - if (!isLoopEntryGuardedByCond(L, - isSigned ? ICmpInst::ICMP_SLT : - ICmpInst::ICMP_ULT, - getMinusSCEV(Start, Step), RHS)) - End = isSigned ? getSMaxExpr(RHS, Start) - : getUMaxExpr(RHS, Start); - - // Determine the maximum constant end value. - const SCEV *MaxEnd = getConstant(isSigned ? - getSignedRange(End).getSignedMax() : - getUnsignedRange(End).getUnsignedMax()); - - // If MaxEnd is within a step of the maximum integer value in its type, - // adjust it down to the minimum value which would produce the same effect. - // This allows the subsequent ceiling division of (N+(step-1))/step to - // compute the correct value. - const SCEV *StepMinusOne = getMinusSCEV(Step, - getConstant(Step->getType(), 1)); - MaxEnd = isSigned ? - getSMinExpr(MaxEnd, - getMinusSCEV(getConstant(APInt::getSignedMaxValue(BitWidth)), - StepMinusOne)) : - getUMinExpr(MaxEnd, - getMinusSCEV(getConstant(APInt::getMaxValue(BitWidth)), - StepMinusOne)); - - // Finally, we subtract these two values and divide, rounding up, to get - // the number of times the backedge is executed. - const SCEV *BECount = getBECount(Start, End, Step, NoWrap); - - // The maximum backedge count is similar, except using the minimum start - // value and the maximum end value. - // If we already have an exact constant BECount, use it instead. - const SCEV *MaxBECount = isa(BECount) ? BECount - : getBECount(MinStart, MaxEnd, Step, NoWrap); - - // If the stride is nonconstant, and NoWrap == true, then - // getBECount(MinStart, MaxEnd) may not compute. This would result in an - // exact BECount and invalid MaxBECount, which should be avoided to catch - // more optimization opportunities. - if (isa(MaxBECount)) - MaxBECount = BECount; - - return ExitLimit(BECount, MaxBECount); - } + // Avoid negative or zero stride values + if (!isKnownPositive(Stride)) + return getCouldNotCompute(); - return getCouldNotCompute(); + // Avoid proven overflow cases: this will ensure that the backedge taken count + // will not generate any unsigned overflow. Relaxed no-overflow conditions + // exploit NoWrapFlags, allowing to optimize in presence of undefined + // behaviors like the case of C language. + if (!Stride->isOne() && doesIVOverflowOnLT(RHS, Stride, IsSigned, NoWrap)) + return getCouldNotCompute(); + + ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SLT + : ICmpInst::ICMP_ULT; + const SCEV *Start = IV->getStart(); + const SCEV *End = RHS; + if (!isLoopEntryGuardedByCond(L, Cond, getMinusSCEV(Start, Stride), RHS)) + End = IsSigned ? getSMaxExpr(RHS, Start) + : getUMaxExpr(RHS, Start); + + const SCEV *BECount = computeBECount(getMinusSCEV(End, Start), Stride, false); + + APInt MinStart = IsSigned ? getSignedRange(Start).getSignedMin() + : getUnsignedRange(Start).getUnsignedMin(); + + APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() + : getUnsignedRange(Stride).getUnsignedMin(); + + unsigned BitWidth = getTypeSizeInBits(LHS->getType()); + APInt Limit = IsSigned ? APInt::getSignedMaxValue(BitWidth) - (MinStride - 1) + : APInt::getMaxValue(BitWidth) - (MinStride - 1); + + // Although End can be a MAX expression we estimate MaxEnd considering only + // the case End = RHS. This is safe because in the other case (End - Start) + // is zero, leading to a zero maximum backedge taken count. + APInt MaxEnd = + IsSigned ? APIntOps::smin(getSignedRange(RHS).getSignedMax(), Limit) + : APIntOps::umin(getUnsignedRange(RHS).getUnsignedMax(), Limit); + + const SCEV *MaxBECount = getCouldNotCompute(); + if (isa(BECount)) + MaxBECount = BECount; + else + MaxBECount = computeBECount(getConstant(MaxEnd - MinStart), + getConstant(MinStride), false); + + if (isa(MaxBECount)) + MaxBECount = BECount; + + return ExitLimit(BECount, MaxBECount); +} + +ScalarEvolution::ExitLimit +ScalarEvolution::HowManyGreaterThans(const SCEV *LHS, const SCEV *RHS, + const Loop *L, bool IsSigned, + bool IsSubExpr) { + // We handle only IV > Invariant + if (!isLoopInvariant(RHS, L)) + return getCouldNotCompute(); + + const SCEVAddRecExpr *IV = dyn_cast(LHS); + + // Avoid weird loops + if (!IV || IV->getLoop() != L || !IV->isAffine()) + return getCouldNotCompute(); + + bool NoWrap = !IsSubExpr && + IV->getNoWrapFlags(IsSigned ? SCEV::FlagNSW : SCEV::FlagNUW); + + const SCEV *Stride = getNegativeSCEV(IV->getStepRecurrence(*this)); + + // Avoid negative or zero stride values + if (!isKnownPositive(Stride)) + return getCouldNotCompute(); + + // Avoid proven overflow cases: this will ensure that the backedge taken count + // will not generate any unsigned overflow. Relaxed no-overflow conditions + // exploit NoWrapFlags, allowing to optimize in presence of undefined + // behaviors like the case of C language. + if (!Stride->isOne() && doesIVOverflowOnGT(RHS, Stride, IsSigned, NoWrap)) + return getCouldNotCompute(); + + ICmpInst::Predicate Cond = IsSigned ? ICmpInst::ICMP_SGT + : ICmpInst::ICMP_UGT; + + const SCEV *Start = IV->getStart(); + const SCEV *End = RHS; + if (!isLoopEntryGuardedByCond(L, Cond, getAddExpr(Start, Stride), RHS)) + End = IsSigned ? getSMinExpr(RHS, Start) + : getUMinExpr(RHS, Start); + + const SCEV *BECount = computeBECount(getMinusSCEV(Start, End), Stride, false); + + APInt MaxStart = IsSigned ? getSignedRange(Start).getSignedMax() + : getUnsignedRange(Start).getUnsignedMax(); + + APInt MinStride = IsSigned ? getSignedRange(Stride).getSignedMin() + : getUnsignedRange(Stride).getUnsignedMin(); + + unsigned BitWidth = getTypeSizeInBits(LHS->getType()); + APInt Limit = IsSigned ? APInt::getSignedMinValue(BitWidth) + (MinStride - 1) + : APInt::getMinValue(BitWidth) + (MinStride - 1); + + // Although End can be a MIN expression we estimate MinEnd considering only + // the case End = RHS. This is safe because in the other case (Start - End) + // is zero, leading to a zero maximum backedge taken count. + APInt MinEnd = + IsSigned ? APIntOps::smax(getSignedRange(RHS).getSignedMin(), Limit) + : APIntOps::umax(getUnsignedRange(RHS).getUnsignedMin(), Limit); + + + const SCEV *MaxBECount = getCouldNotCompute(); + if (isa(BECount)) + MaxBECount = BECount; + else + MaxBECount = computeBECount(getConstant(MaxStart - MinEnd), + getConstant(MinStride), false); + + if (isa(MaxBECount)) + MaxBECount = BECount; + + return ExitLimit(BECount, MaxBECount); } /// getNumIterationsInRange - Return the number of iterations of this loop that @@ -6586,7 +6677,534 @@ const SCEV *SCEVAddRecExpr::getNumIterationsInRange(ConstantRange Range, return SE.getCouldNotCompute(); } +static const APInt gcd(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue().abs(); + APInt B = C2->getValue()->getValue().abs(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + if (ABW > BBW) + B = B.zext(ABW); + else if (ABW < BBW) + A = A.zext(BBW); + + return APIntOps::GreatestCommonDivisor(A, B); +} + +static const APInt srem(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.sext(ABW); + else if (ABW < BBW) + A = A.sext(BBW); + + return APIntOps::srem(A, B); +} + +static const APInt sdiv(const SCEVConstant *C1, const SCEVConstant *C2) { + APInt A = C1->getValue()->getValue(); + APInt B = C2->getValue()->getValue(); + uint32_t ABW = A.getBitWidth(); + uint32_t BBW = B.getBitWidth(); + + if (ABW > BBW) + B = B.sext(ABW); + else if (ABW < BBW) + A = A.sext(BBW); + + return APIntOps::sdiv(A, B); +} + +namespace { +struct SCEVGCD : public SCEVVisitor { +public: + // Pattern match Step into Start. When Step is a multiply expression, find + // the largest subexpression of Step that appears in Start. When Start is an + // add expression, try to match Step in the subexpressions of Start, non + // matching subexpressions are returned under Remainder. + static const SCEV *findGCD(ScalarEvolution &SE, const SCEV *Start, + const SCEV *Step, const SCEV **Remainder) { + assert(Remainder && "Remainder should not be NULL"); + SCEVGCD R(SE, Step, SE.getConstant(Step->getType(), 0)); + const SCEV *Res = R.visit(Start); + *Remainder = R.Remainder; + return Res; + } + + SCEVGCD(ScalarEvolution &S, const SCEV *G, const SCEV *R) + : SE(S), GCD(G), Remainder(R) { + Zero = SE.getConstant(GCD->getType(), 0); + One = SE.getConstant(GCD->getType(), 1); + } + + const SCEV *visitConstant(const SCEVConstant *Constant) { + if (GCD == Constant || Constant == Zero) + return GCD; + + if (const SCEVConstant *CGCD = dyn_cast(GCD)) { + const SCEV *Res = SE.getConstant(gcd(Constant, CGCD)); + if (Res != One) + return Res; + + Remainder = SE.getConstant(srem(Constant, CGCD)); + Constant = cast(SE.getMinusSCEV(Constant, Remainder)); + Res = SE.getConstant(gcd(Constant, CGCD)); + return Res; + } + + // When GCD is not a constant, it could be that the GCD is an Add, Mul, + // AddRec, etc., in which case we want to find out how many times the + // Constant divides the GCD: we then return that as the new GCD. + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, GCD, Constant, &Rem); + + if (Res == One || Rem != Zero) { + Remainder = Constant; + return One; + } + + assert(isa(Res) && "Res should be a constant"); + Remainder = SE.getConstant(srem(Constant, cast(Res))); + return Res; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + if (GCD == Expr) + return GCD; + + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(e - 1 - i), GCD, &Rem); + + // FIXME: There may be ambiguous situations: for instance, + // GCD(-4 + (3 * %m), 2 * %m) where 2 divides -4 and %m divides (3 * %m). + // The order in which the AddExpr is traversed computes a different GCD + // and Remainder. + if (Res != One) + GCD = Res; + if (Rem != Zero) + Remainder = SE.getAddExpr(Remainder, Rem); + } + + return GCD; + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + if (GCD == Expr) + return GCD; + + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (Expr->getOperand(i) == GCD) + return GCD; + } + + // If we have not returned yet, it means that GCD is not part of Expr. + const SCEV *PartialGCD = One; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(i), GCD, &Rem); + if (Rem != Zero) + // GCD does not divide Expr->getOperand(i). + continue; + + if (Res == GCD) + return GCD; + PartialGCD = SE.getMulExpr(PartialGCD, Res); + if (PartialGCD == GCD) + return GCD; + } + + if (PartialGCD != One) + return PartialGCD; + + Remainder = Expr; + const SCEVMulExpr *Mul = dyn_cast(GCD); + if (!Mul) + return PartialGCD; + + // When the GCD is a multiply expression, try to decompose it: + // this occurs when Step does not divide the Start expression + // as in: {(-4 + (3 * %m)),+,(2 * %m)} + for (int i = 0, e = Mul->getNumOperands(); i < e; ++i) { + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr, Mul->getOperand(i), &Rem); + if (Rem == Zero) { + Remainder = Rem; + return Res; + } + } + + return PartialGCD; + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (GCD == Expr) + return GCD; + + if (!Expr->isAffine()) { + Remainder = Expr; + return GCD; + } + + const SCEV *Rem = Zero; + const SCEV *Res = findGCD(SE, Expr->getOperand(0), GCD, &Rem); + if (Rem != Zero) + Remainder = SE.getAddExpr(Remainder, Rem); + + Rem = Zero; + Res = findGCD(SE, Expr->getOperand(1), Res, &Rem); + if (Rem != Zero) { + Remainder = Expr; + return GCD; + } + + return Res; + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (GCD != Expr) + Remainder = Expr; + return GCD; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return One; + } + +private: + ScalarEvolution &SE; + const SCEV *GCD, *Remainder, *Zero, *One; +}; + +struct SCEVDivision : public SCEVVisitor { +public: + // Remove from Start all multiples of Step. + static const SCEV *divide(ScalarEvolution &SE, const SCEV *Start, + const SCEV *Step) { + SCEVDivision D(SE, Step); + const SCEV *Rem = D.Zero; + (void)Rem; + // The division is guaranteed to succeed: Step should divide Start with no + // remainder. + assert(Step == SCEVGCD::findGCD(SE, Start, Step, &Rem) && Rem == D.Zero && + "Step should divide Start with no remainder."); + return D.visit(Start); + } + + SCEVDivision(ScalarEvolution &S, const SCEV *G) : SE(S), GCD(G) { + Zero = SE.getConstant(GCD->getType(), 0); + One = SE.getConstant(GCD->getType(), 1); + } + + const SCEV *visitConstant(const SCEVConstant *Constant) { + if (GCD == Constant) + return One; + + if (const SCEVConstant *CGCD = dyn_cast(GCD)) + return SE.getConstant(sdiv(Constant, CGCD)); + return Constant; + } + + const SCEV *visitTruncateExpr(const SCEVTruncateExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitZeroExtendExpr(const SCEVZeroExtendExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitSignExtendExpr(const SCEVSignExtendExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitAddExpr(const SCEVAddExpr *Expr) { + if (GCD == Expr) + return One; + + SmallVector Operands; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + + if (Operands.size() == 1) + return Operands[0]; + return SE.getAddExpr(Operands); + } + + const SCEV *visitMulExpr(const SCEVMulExpr *Expr) { + if (GCD == Expr) + return One; + + bool FoundGCDTerm = false; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) + if (Expr->getOperand(i) == GCD) + FoundGCDTerm = true; + + SmallVector Operands; + if (FoundGCDTerm) { + FoundGCDTerm = false; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (FoundGCDTerm) + Operands.push_back(Expr->getOperand(i)); + else if (Expr->getOperand(i) == GCD) + FoundGCDTerm = true; + else + Operands.push_back(Expr->getOperand(i)); + } + } else { + FoundGCDTerm = false; + const SCEV *PartialGCD = One; + for (int i = 0, e = Expr->getNumOperands(); i < e; ++i) { + if (PartialGCD == GCD) { + Operands.push_back(Expr->getOperand(i)); + continue; + } + + const SCEV *Rem = Zero; + const SCEV *Res = SCEVGCD::findGCD(SE, Expr->getOperand(i), GCD, &Rem); + if (Rem == Zero) { + PartialGCD = SE.getMulExpr(PartialGCD, Res); + Operands.push_back(divide(SE, Expr->getOperand(i), GCD)); + } else { + Operands.push_back(Expr->getOperand(i)); + } + } + } + + if (Operands.size() == 1) + return Operands[0]; + return SE.getMulExpr(Operands); + } + + const SCEV *visitUDivExpr(const SCEVUDivExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitAddRecExpr(const SCEVAddRecExpr *Expr) { + if (GCD == Expr) + return One; + + assert(Expr->isAffine() && "Expr should be affine"); + + const SCEV *Start = divide(SE, Expr->getStart(), GCD); + const SCEV *Step = divide(SE, Expr->getStepRecurrence(SE), GCD); + + return SE.getAddRecExpr(Start, Step, Expr->getLoop(), + Expr->getNoWrapFlags()); + } + + const SCEV *visitSMaxExpr(const SCEVSMaxExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitUMaxExpr(const SCEVUMaxExpr *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitUnknown(const SCEVUnknown *Expr) { + if (GCD == Expr) + return One; + return Expr; + } + + const SCEV *visitCouldNotCompute(const SCEVCouldNotCompute *Expr) { + return Expr; + } + +private: + ScalarEvolution &SE; + const SCEV *GCD, *Zero, *One; +}; +} + +/// Splits the SCEV into two vectors of SCEVs representing the subscripts and +/// sizes of an array access. Returns the remainder of the delinearization that +/// is the offset start of the array. The SCEV->delinearize algorithm computes +/// the multiples of SCEV coefficients: that is a pattern matching of sub +/// expressions in the stride and base of a SCEV corresponding to the +/// computation of a GCD (greatest common divisor) of base and stride. When +/// SCEV->delinearize fails, it returns the SCEV unchanged. +/// +/// For example: when analyzing the memory access A[i][j][k] in this loop nest +/// +/// void foo(long n, long m, long o, double A[n][m][o]) { +/// +/// for (long i = 0; i < n; i++) +/// for (long j = 0; j < m; j++) +/// for (long k = 0; k < o; k++) +/// A[i][j][k] = 1.0; +/// } +/// +/// the delinearization input is the following AddRec SCEV: +/// +/// AddRec: {{{%A,+,(8 * %m * %o)}<%for.i>,+,(8 * %o)}<%for.j>,+,8}<%for.k> +/// +/// From this SCEV, we are able to say that the base offset of the access is %A +/// because it appears as an offset that does not divide any of the strides in +/// the loops: +/// +/// CHECK: Base offset: %A +/// +/// and then SCEV->delinearize determines the size of some of the dimensions of +/// the array as these are the multiples by which the strides are happening: +/// +/// CHECK: ArrayDecl[UnknownSize][%m][%o] with elements of sizeof(double) bytes. +/// +/// Note that the outermost dimension remains of UnknownSize because there are +/// no strides that would help identifying the size of the last dimension: when +/// the array has been statically allocated, one could compute the size of that +/// dimension by dividing the overall size of the array by the size of the known +/// dimensions: %m * %o * 8. +/// +/// Finally delinearize provides the access functions for the array reference +/// that does correspond to A[i][j][k] of the above C testcase: +/// +/// CHECK: ArrayRef[{0,+,1}<%for.i>][{0,+,1}<%for.j>][{0,+,1}<%for.k>] +/// +/// The testcases are checking the output of a function pass: +/// DelinearizationPass that walks through all loads and stores of a function +/// asking for the SCEV of the memory access with respect to all enclosing +/// loops, calling SCEV->delinearize on that and printing the results. + +const SCEV * +SCEVAddRecExpr::delinearize(ScalarEvolution &SE, + SmallVectorImpl &Subscripts, + SmallVectorImpl &Sizes) const { + // Early exit in case this SCEV is not an affine multivariate function. + if (!this->isAffine()) + return this; + + const SCEV *Start = this->getStart(); + const SCEV *Step = this->getStepRecurrence(SE); + + // Build the SCEV representation of the cannonical induction variable in the + // loop of this SCEV. + const SCEV *Zero = SE.getConstant(this->getType(), 0); + const SCEV *One = SE.getConstant(this->getType(), 1); + const SCEV *IV = + SE.getAddRecExpr(Zero, One, this->getLoop(), this->getNoWrapFlags()); + + DEBUG(dbgs() << "(delinearize: " << *this << "\n"); + + // Currently we fail to delinearize when the stride of this SCEV is 1. We + // could decide to not fail in this case: we could just return 1 for the size + // of the subscript, and this same SCEV for the access function. + if (Step == One) { + DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n"); + return this; + } + + // Find the GCD and Remainder of the Start and Step coefficients of this SCEV. + const SCEV *Remainder = NULL; + const SCEV *GCD = SCEVGCD::findGCD(SE, Start, Step, &Remainder); + + DEBUG(dbgs() << "GCD: " << *GCD << "\n"); + DEBUG(dbgs() << "Remainder: " << *Remainder << "\n"); + + // Same remark as above: we currently fail the delinearization, although we + // can very well handle this special case. + if (GCD == One) { + DEBUG(dbgs() << "failed to delinearize " << *this << "\n)\n"); + return this; + } + + // As findGCD computed Remainder, GCD divides "Start - Remainder." The + // Quotient is then this SCEV without Remainder, scaled down by the GCD. The + // Quotient is what will be used in the next subscript delinearization. + const SCEV *Quotient = + SCEVDivision::divide(SE, SE.getMinusSCEV(Start, Remainder), GCD); + DEBUG(dbgs() << "Quotient: " << *Quotient << "\n"); + + const SCEV *Rem; + if (const SCEVAddRecExpr *AR = dyn_cast(Quotient)) + // Recursively call delinearize on the Quotient until there are no more + // multiples that can be recognized. + Rem = AR->delinearize(SE, Subscripts, Sizes); + else + Rem = Quotient; + + // Scale up the cannonical induction variable IV by whatever remains from the + // Step after division by the GCD: the GCD is the size of all the sub-array. + if (Step != GCD) { + Step = SCEVDivision::divide(SE, Step, GCD); + IV = SE.getMulExpr(IV, Step); + } + // The access function in the current subscript is computed as the cannonical + // induction variable IV (potentially scaled up by the step) and offset by + // Rem, the offset of delinearization in the sub-array. + const SCEV *Index = SE.getAddExpr(IV, Rem); + + // Record the access function and the size of the current subscript. + Subscripts.push_back(Index); + Sizes.push_back(GCD); + +#ifndef NDEBUG + int Size = Sizes.size(); + DEBUG(dbgs() << "succeeded to delinearize " << *this << "\n"); + DEBUG(dbgs() << "ArrayDecl[UnknownSize]"); + for (int i = 0; i < Size - 1; i++) + DEBUG(dbgs() << "[" << *Sizes[i] << "]"); + DEBUG(dbgs() << " with elements of " << *Sizes[Size - 1] << " bytes.\n"); + + DEBUG(dbgs() << "ArrayRef"); + for (int i = 0; i < Size; i++) + DEBUG(dbgs() << "[" << *Subscripts[i] << "]"); + DEBUG(dbgs() << "\n)\n"); +#endif + + return Remainder; +} //===----------------------------------------------------------------------===// // SCEVCallbackVH Class Implementation @@ -6642,7 +7260,7 @@ ScalarEvolution::SCEVCallbackVH::SCEVCallbackVH(Value *V, ScalarEvolution *se) //===----------------------------------------------------------------------===// ScalarEvolution::ScalarEvolution() - : FunctionPass(ID), FirstUnknown(0) { + : FunctionPass(ID), ValuesAtScopes(64), LoopDispositions(64), BlockDispositions(64), FirstUnknown(0) { initializeScalarEvolutionPass(*PassRegistry::getPassRegistry()); } @@ -6780,14 +7398,21 @@ void ScalarEvolution::print(raw_ostream &OS, const Module *) const { ScalarEvolution::LoopDisposition ScalarEvolution::getLoopDisposition(const SCEV *S, const Loop *L) { - std::map &Values = LoopDispositions[S]; - std::pair::iterator, bool> Pair = - Values.insert(std::make_pair(L, LoopVariant)); - if (!Pair.second) - return Pair.first->second; - + SmallVector, 2> &Values = LoopDispositions[S]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == L) + return Values[u].second; + } + Values.push_back(std::make_pair(L, LoopVariant)); LoopDisposition D = computeLoopDisposition(S, L); - return LoopDispositions[S][L] = D; + SmallVector, 2> &Values2 = LoopDispositions[S]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == L) { + Values2[u - 1].second = D; + break; + } + } + return D; } ScalarEvolution::LoopDisposition @@ -6879,14 +7504,21 @@ bool ScalarEvolution::hasComputableLoopEvolution(const SCEV *S, const Loop *L) { ScalarEvolution::BlockDisposition ScalarEvolution::getBlockDisposition(const SCEV *S, const BasicBlock *BB) { - std::map &Values = BlockDispositions[S]; - std::pair::iterator, bool> - Pair = Values.insert(std::make_pair(BB, DoesNotDominateBlock)); - if (!Pair.second) - return Pair.first->second; - + SmallVector, 2> &Values = BlockDispositions[S]; + for (unsigned u = 0; u < Values.size(); u++) { + if (Values[u].first == BB) + return Values[u].second; + } + Values.push_back(std::make_pair(BB, DoesNotDominateBlock)); BlockDisposition D = computeBlockDisposition(S, BB); - return BlockDispositions[S][BB] = D; + SmallVector, 2> &Values2 = BlockDispositions[S]; + for (unsigned u = Values2.size(); u > 0; u--) { + if (Values2[u - 1].first == BB) { + Values2[u - 1].second = D; + break; + } + } + return D; } ScalarEvolution::BlockDisposition diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp index fcd7ce2..86a557b 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionExpander.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" @@ -176,8 +177,8 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + DebugLoc Loc = Builder.GetInsertPoint()->getDebugLoc(); + BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { @@ -191,13 +192,9 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode, // If we haven't found this binop, insert it. Instruction *BO = cast(Builder.CreateBinOp(Opcode, LHS, RHS)); - BO->setDebugLoc(SaveInsertPt->getDebugLoc()); + BO->setDebugLoc(Loc); rememberInstruction(BO); - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - return BO; } @@ -294,8 +291,8 @@ static bool FactorOutConstant(const SCEV *&S, const SCEV *Start = A->getStart(); if (!FactorOutConstant(Start, Remainder, Factor, SE, TD)) return false; - // FIXME: can use A->getNoWrapFlags(FlagNW) - S = SE.getAddRecExpr(Start, Step, A->getLoop(), SCEV::FlagAnyWrap); + S = SE.getAddRecExpr(Start, Step, A->getLoop(), + A->getNoWrapFlags(SCEV::FlagNW)); return true; } @@ -348,8 +345,7 @@ static void SplitAddRecs(SmallVectorImpl &Ops, AddRecs.push_back(SE.getAddRecExpr(Zero, A->getStepRecurrence(SE), A->getLoop(), - // FIXME: A->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + A->getNoWrapFlags(SCEV::FlagNW))); if (const SCEVAddExpr *Add = dyn_cast(Start)) { Ops[i] = Zero; Ops.append(Add->op_begin(), Add->op_end()); @@ -407,6 +403,10 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // without the other. SplitAddRecs(Ops, Ty, SE); + Type *IntPtrTy = SE.TD + ? SE.TD->getIntPtrType(PTy) + : Type::getInt64Ty(PTy->getContext()); + // Descend down the pointer's type and attempt to convert the other // operands into GEP indices, at each level. The first index in a GEP // indexes into the array implied by the pointer operand; the rest of @@ -417,7 +417,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, // array indexing. SmallVector ScaledOps; if (ElTy->isSized()) { - const SCEV *ElSize = SE.getSizeOfExpr(ElTy); + const SCEV *ElSize = SE.getSizeOfExpr(IntPtrTy, ElTy); if (!ElSize->isZero()) { SmallVector NewOps; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { @@ -549,8 +549,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); // Move the insertion point out of as many loops as we can. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { @@ -566,16 +565,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, Value *GEP = Builder.CreateGEP(V, Idx, "uglygep"); rememberInstruction(GEP); - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - return GEP; } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPoint SaveInsertPt = Builder.saveIP(); // Move the insertion point out of as many loops as we can. while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) { @@ -611,8 +605,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin, rememberInstruction(GEP); // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Builder.restoreIP(SaveInsertPt); return expand(SE.getAddExpr(Ops)); } @@ -846,8 +839,7 @@ static void ExposePointerBase(const SCEV *&Base, const SCEV *&Rest, SE.getAddRecExpr(SE.getConstant(A->getType(), 0), A->getStepRecurrence(SE), A->getLoop(), - // FIXME: A->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + A->getNoWrapFlags(SCEV::FlagNW))); } if (const SCEVAddExpr *A = dyn_cast(Base)) { Base = A->getOperand(A->getNumOperands()-1); @@ -1078,8 +1070,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, } // Save the original insertion point so we can restore it when we're done. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); // Another AddRec may need to be recursively expanded below. For example, if // this AddRec is quadratic, the StepV may itself be an AddRec in this @@ -1137,14 +1128,15 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized, IVIncInsertPos : Pred->getTerminator(); Builder.SetInsertPoint(InsertPos); Value *IncV = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); - + if (isa(IncV)) { + if (Normalized->getNoWrapFlags(SCEV::FlagNUW)) + cast(IncV)->setHasNoUnsignedWrap(); + if (Normalized->getNoWrapFlags(SCEV::FlagNSW)) + cast(IncV)->setHasNoSignedWrap(); + } PN->addIncoming(IncV, Pred); } - // Restore the original insert point. - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); - // After expanding subexpressions, restore the PostIncLoops set so the caller // can ensure that IVIncrement dominates the current uses. PostIncLoops = SavedPostIncLoops; @@ -1180,8 +1172,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { Normalized = cast( SE.getAddRecExpr(Start, Normalized->getStepRecurrence(SE), Normalized->getLoop(), - // FIXME: Normalized->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + Normalized->getNoWrapFlags(SCEV::FlagNW))); } // Strip off any non-loop-dominating component from the addrec step. @@ -1191,11 +1182,9 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { PostLoopScale = Step; Step = SE.getConstant(Normalized->getType(), 1); Normalized = - cast(SE.getAddRecExpr(Start, Step, - Normalized->getLoop(), - // FIXME: Normalized - // ->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); + cast(SE.getAddRecExpr( + Start, Step, Normalized->getLoop(), + Normalized->getNoWrapFlags(SCEV::FlagNW))); } // Expand the core addrec. If we need post-loop scaling, force it to @@ -1232,19 +1221,19 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) { !ExpandTy->isPointerTy() && Step->isNonConstantNegative(); if (useSubtract) Step = SE.getNegativeSCEV(Step); - // Expand the step somewhere that dominates the loop header. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); - Value *StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); - // Restore the insertion point to the place where the caller has - // determined dominates all uses. - restoreInsertPoint(SaveInsertBB, SaveInsertPt); + Value *StepV; + { + // Expand the step somewhere that dominates the loop header. + BuilderType::InsertPointGuard Guard(Builder); + StepV = expandCodeFor(Step, IntTy, L->getHeader()->begin()); + } Result = expandIVInc(PN, StepV, L, ExpandTy, IntTy, useSubtract); } } // Re-apply any non-loop-dominating scale. if (PostLoopScale) { + assert(S->isAffine() && "Can't linearly scale non-affine recurrences."); Result = InsertNoopCastOfTo(Result, IntTy); Result = Builder.CreateMul(Result, expandCodeFor(PostLoopScale, IntTy)); @@ -1288,18 +1277,15 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { for (unsigned i = 0, e = S->getNumOperands(); i != e; ++i) NewOps[i] = SE.getAnyExtendExpr(S->op_begin()[i], CanonicalIV->getType()); Value *V = expand(SE.getAddRecExpr(NewOps, S->getLoop(), - // FIXME: S->getNoWrapFlags(FlagNW) - SCEV::FlagAnyWrap)); - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + S->getNoWrapFlags(SCEV::FlagNW))); BasicBlock::iterator NewInsertPt = llvm::next(BasicBlock::iterator(cast(V))); + BuilderType::InsertPointGuard Guard(Builder); while (isa(NewInsertPt) || isa(NewInsertPt) || isa(NewInsertPt)) ++NewInsertPt; V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0, NewInsertPt); - restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } @@ -1307,8 +1293,8 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { if (!S->getStart()->isZero()) { SmallVector NewOps(S->op_begin(), S->op_end()); NewOps[0] = SE.getConstant(Ty, 0); - // FIXME: can use S->getNoWrapFlags() - const SCEV *Rest = SE.getAddRecExpr(NewOps, L, SCEV::FlagAnyWrap); + const SCEV *Rest = SE.getAddRecExpr(NewOps, L, + S->getNoWrapFlags(SCEV::FlagNW)); // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the // comments on expandAddToGEP for details. @@ -1343,9 +1329,13 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) { Header->begin()); rememberInstruction(CanonicalIV); + SmallSet PredSeen; Constant *One = ConstantInt::get(Ty, 1); for (pred_iterator HPI = HPB; HPI != HPE; ++HPI) { BasicBlock *HP = *HPI; + if (!PredSeen.insert(HP)) + continue; + if (L->contains(HP)) { // Insert a unit add instruction right before the terminator // corresponding to the back-edge. @@ -1528,8 +1518,7 @@ Value *SCEVExpander::expand(const SCEV *S) { if (I != InsertedExpressions.end()) return I->second; - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); Builder.SetInsertPoint(InsertPt->getParent(), InsertPt); // Expand the expression into instructions. @@ -1542,8 +1531,6 @@ Value *SCEVExpander::expand(const SCEV *S) { // a postinc expansion, it could be reused by a non postinc user, but only if // its insertion point was already at the head of the loop. InsertedExpressions[std::make_pair(S, InsertPt)] = V; - - restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } @@ -1554,10 +1541,6 @@ void SCEVExpander::rememberInstruction(Value *I) { InsertedValues.insert(I); } -void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) { - Builder.SetInsertPoint(BB, I); -} - /// getOrInsertCanonicalInductionVariable - This method returns the /// canonical induction variable of the specified type for the specified /// loop (inserting one if there is none). A canonical induction variable @@ -1573,11 +1556,8 @@ SCEVExpander::getOrInsertCanonicalInductionVariable(const Loop *L, SE.getConstant(Ty, 1), L, SCEV::FlagAnyWrap); // Emit code for it. - BasicBlock *SaveInsertBB = Builder.GetInsertBlock(); - BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint(); + BuilderType::InsertPointGuard Guard(Builder); PHINode *V = cast(expandCodeFor(H, 0, L->getHeader()->begin())); - if (SaveInsertBB) - restoreInsertPoint(SaveInsertBB, SaveInsertPt); return V; } @@ -1725,28 +1705,43 @@ namespace { // Currently, we only allow division by a nonzero constant here. If this is // inadequate, we could easily allow division by SCEVUnknown by using // ValueTracking to check isKnownNonZero(). +// +// We cannot generally expand recurrences unless the step dominates the loop +// header. The expander handles the special case of affine recurrences by +// scaling the recurrence outside the loop, but this technique isn't generally +// applicable. Expanding a nested recurrence outside a loop requires computing +// binomial coefficients. This could be done, but the recurrence has to be in a +// perfectly reduced form, which can't be guaranteed. struct SCEVFindUnsafe { + ScalarEvolution &SE; bool IsUnsafe; - SCEVFindUnsafe(): IsUnsafe(false) {} + SCEVFindUnsafe(ScalarEvolution &se): SE(se), IsUnsafe(false) {} bool follow(const SCEV *S) { - const SCEVUDivExpr *D = dyn_cast(S); - if (!D) - return true; - const SCEVConstant *SC = dyn_cast(D->getRHS()); - if (SC && !SC->getValue()->isZero()) - return true; - IsUnsafe = true; - return false; + if (const SCEVUDivExpr *D = dyn_cast(S)) { + const SCEVConstant *SC = dyn_cast(D->getRHS()); + if (!SC || SC->getValue()->isZero()) { + IsUnsafe = true; + return false; + } + } + if (const SCEVAddRecExpr *AR = dyn_cast(S)) { + const SCEV *Step = AR->getStepRecurrence(SE); + if (!AR->isAffine() && !SE.dominates(Step, AR->getLoop()->getHeader())) { + IsUnsafe = true; + return false; + } + } + return true; } bool isDone() const { return IsUnsafe; } }; } namespace llvm { -bool isSafeToExpand(const SCEV *S) { - SCEVFindUnsafe Search; +bool isSafeToExpand(const SCEV *S, ScalarEvolution &SE) { + SCEVFindUnsafe Search(SE); visitAll(S, Search); return !Search.IsUnsafe; } diff --git a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp index dd2ed4f..f110616 100644 --- a/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp +++ b/contrib/llvm/lib/Analysis/ScalarEvolutionNormalization.cpp @@ -119,11 +119,19 @@ TransformImpl(const SCEV *S, Instruction *User, Value *OperandValToReplace) { const SCEV *Result = SE.getAddRecExpr(Operands, L, SCEV::FlagAnyWrap); switch (Kind) { case NormalizeAutodetect: - if (IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { - const SCEV *TransformedStep = - TransformSubExpr(AR->getStepRecurrence(SE), - User, OperandValToReplace); - Result = SE.getMinusSCEV(Result, TransformedStep); + // Normalize this SCEV by subtracting the expression for the final step. + // We only allow affine AddRecs to be normalized, otherwise we would not + // be able to correctly denormalize. + // e.g. {1,+,3,+,2} == {-2,+,1,+,2} + {3,+,2} + // Normalized form: {-2,+,1,+,2} + // Denormalized form: {1,+,3,+,2} + // + // However, denormalization would use the a different step expression than + // normalization (see getPostIncExpr), generating the wrong final + // expression: {-2,+,1,+,2} + {1,+,2} => {-1,+,3,+,2} + if (AR->isAffine() && + IVUseShouldUsePostIncValue(User, OperandValToReplace, L, &DT)) { + Result = SE.getMinusSCEV(Result, AR->getStepRecurrence(SE)); Loops.insert(L); } #if 0 diff --git a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp index 64f8e96..0353295 100644 --- a/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp +++ b/contrib/llvm/lib/Analysis/TargetTransformInfo.cpp @@ -88,10 +88,19 @@ unsigned TargetTransformInfo::getUserCost(const User *U) const { return PrevTTI->getUserCost(U); } +bool TargetTransformInfo::hasBranchDivergence() const { + return PrevTTI->hasBranchDivergence(); +} + bool TargetTransformInfo::isLoweredToCall(const Function *F) const { return PrevTTI->isLoweredToCall(F); } +void TargetTransformInfo::getUnrollingPreferences(Loop *L, + UnrollingPreferences &UP) const { + PrevTTI->getUnrollingPreferences(L, UP); +} + bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { return PrevTTI->isLegalAddImmediate(Imm); } @@ -108,6 +117,14 @@ bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, Scale); } +int TargetTransformInfo::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, + bool HasBaseReg, + int64_t Scale) const { + return PrevTTI->getScalingFactorCost(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale); +} + bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { return PrevTTI->isTruncateFree(Ty1, Ty2); } @@ -133,6 +150,10 @@ TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { return PrevTTI->getPopcntSupport(IntTyWidthInBit); } +bool TargetTransformInfo::haveFastSqrt(Type *Ty) const { + return PrevTTI->haveFastSqrt(Ty); +} + unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { return PrevTTI->getIntImmCost(Imm, Ty); } @@ -198,8 +219,14 @@ unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { return PrevTTI->getNumberOfParts(Tp); } -unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const { - return PrevTTI->getAddressComputationCost(Tp); +unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp, + bool IsComplex) const { + return PrevTTI->getAddressComputationCost(Tp, IsComplex); +} + +unsigned TargetTransformInfo::getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwise) const { + return PrevTTI->getReductionCost(Opcode, Ty, IsPairwise); } namespace { @@ -252,26 +279,34 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { // Otherwise, the default basic cost is used. return TCC_Basic; - case Instruction::IntToPtr: + case Instruction::IntToPtr: { + if (!DL) + return TCC_Basic; + // An inttoptr cast is free so long as the input is a legal integer type // which doesn't contain values outside the range of a pointer. - if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && - OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits()) + unsigned OpSize = OpTy->getScalarSizeInBits(); + if (DL->isLegalInteger(OpSize) && + OpSize <= DL->getPointerTypeSizeInBits(Ty)) return TCC_Free; // Otherwise it's not a no-op. return TCC_Basic; + } + case Instruction::PtrToInt: { + if (!DL) + return TCC_Basic; - case Instruction::PtrToInt: // A ptrtoint cast is free so long as the result is large enough to store // the pointer, and a legal integer type. - if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) && - Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits()) + unsigned DestSize = Ty->getScalarSizeInBits(); + if (DL->isLegalInteger(DestSize) && + DestSize >= DL->getPointerTypeSizeInBits(OpTy)) return TCC_Free; // Otherwise it's not a no-op. return TCC_Basic; - + } case Instruction::Trunc: // trunc to a native type is free (assuming the target has compare and // shift-right of the same width). @@ -411,6 +446,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { U->getOperand(0)->getType() : 0); } + bool hasBranchDivergence() const { return false; } + bool isLoweredToCall(const Function *F) const { // FIXME: These should almost certainly not be handled here, and instead // handled with the help of TLI or the target itself. This was largely @@ -442,6 +479,8 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return true; } + void getUnrollingPreferences(Loop *, UnrollingPreferences &) const { } + bool isLegalAddImmediate(int64_t Imm) const { return false; } @@ -457,6 +496,15 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return !BaseGV && BaseOffset == 0 && Scale <= 1; } + int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) const { + // Guess that all legal addressing mode are free. + if(isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, Scale)) + return 0; + return -1; + } + + bool isTruncateFree(Type *Ty1, Type *Ty2) const { return false; } @@ -481,6 +529,10 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return PSK_Software; } + bool haveFastSqrt(Type *Ty) const { + return false; + } + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const { return 1; } @@ -542,9 +594,13 @@ struct NoTTI : ImmutablePass, TargetTransformInfo { return 0; } - unsigned getAddressComputationCost(Type *Tp) const { + unsigned getAddressComputationCost(Type *Tp, bool) const { return 0; } + + unsigned getReductionCost(unsigned, Type *, bool) const { + return 1; + } }; } // end anonymous namespace diff --git a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp index bbf3c3a..6791d4b 100644 --- a/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/contrib/llvm/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -16,7 +16,12 @@ // typical C/C++ TBAA, but it can also be used to implement custom alias // analysis behavior for other languages. // -// The current metadata format is very simple. TBAA MDNodes have up to +// We now support two types of metadata format: scalar TBAA and struct-path +// aware TBAA. After all testing cases are upgraded to use struct-path aware +// TBAA and we can auto-upgrade existing bc files, the support for scalar TBAA +// can be dropped. +// +// The scalar TBAA metadata format is very simple. TBAA MDNodes have up to // three fields, e.g.: // !0 = metadata !{ metadata !"an example type tree" } // !1 = metadata !{ metadata !"int", metadata !0 } @@ -40,6 +45,65 @@ // should return true; see // http://llvm.org/docs/AliasAnalysis.html#OtherItfs). // +// With struct-path aware TBAA, the MDNodes attached to an instruction using +// "!tbaa" are called path tag nodes. +// +// The path tag node has 4 fields with the last field being optional. +// +// The first field is the base type node, it can be a struct type node +// or a scalar type node. The second field is the access type node, it +// must be a scalar type node. The third field is the offset into the base type. +// The last field has the same meaning as the last field of our scalar TBAA: +// it's an integer which if equal to 1 indicates that the access is "constant". +// +// The struct type node has a name and a list of pairs, one pair for each member +// of the struct. The first element of each pair is a type node (a struct type +// node or a sclar type node), specifying the type of the member, the second +// element of each pair is the offset of the member. +// +// Given an example +// typedef struct { +// short s; +// } A; +// typedef struct { +// uint16_t s; +// A a; +// } B; +// +// For an acess to B.a.s, we attach !5 (a path tag node) to the load/store +// instruction. The base type is !4 (struct B), the access type is !2 (scalar +// type short) and the offset is 4. +// +// !0 = metadata !{metadata !"Simple C/C++ TBAA"} +// !1 = metadata !{metadata !"omnipotent char", metadata !0} // Scalar type node +// !2 = metadata !{metadata !"short", metadata !1} // Scalar type node +// !3 = metadata !{metadata !"A", metadata !2, i64 0} // Struct type node +// !4 = metadata !{metadata !"B", metadata !2, i64 0, metadata !3, i64 4} +// // Struct type node +// !5 = metadata !{metadata !4, metadata !2, i64 4} // Path tag node +// +// The struct type nodes and the scalar type nodes form a type DAG. +// Root (!0) +// char (!1) -- edge to Root +// short (!2) -- edge to char +// A (!3) -- edge with offset 0 to short +// B (!4) -- edge with offset 0 to short and edge with offset 4 to A +// +// To check if two tags (tagX and tagY) can alias, we start from the base type +// of tagX, follow the edge with the correct offset in the type DAG and adjust +// the offset until we reach the base type of tagY or until we reach the Root +// node. +// If we reach the base type of tagY, compare the adjusted offset with +// offset of tagY, return Alias if the offsets are the same, return NoAlias +// otherwise. +// If we reach the Root node, perform the above starting from base type of tagY +// to see if we reach base type of tagX. +// +// If they have different roots, they're part of different potentially +// unrelated type systems, so we return Alias to be conservative. +// If neither node is an ancestor of the other and they have the same root, +// then we say NoAlias. +// // TODO: The current metadata format doesn't support struct // fields. For example: // struct X { @@ -71,7 +135,6 @@ using namespace llvm; // achieved by stripping the !tbaa tags from IR, but this option is sometimes // more convenient. static cl::opt EnableTBAA("enable-tbaa", cl::init(true)); -static cl::opt EnableStructPathTBAA("struct-path-tbaa", cl::init(false)); namespace { /// TBAANode - This is a simple wrapper around an MDNode which provides a @@ -168,8 +231,12 @@ namespace { if (Node->getNumOperands() < 2) return TBAAStructTypeNode(); - // Special handling for a scalar type node. + // Fast path for a scalar type node and a struct type node with a single + // field. if (Node->getNumOperands() <= 3) { + uint64_t Cur = Node->getNumOperands() == 2 ? 0 : + cast(Node->getOperand(2))->getZExtValue(); + Offset -= Cur; MDNode *P = dyn_cast_or_null(Node->getOperand(1)); if (!P) return TBAAStructTypeNode(); @@ -259,12 +326,21 @@ TypeBasedAliasAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { AliasAnalysis::getAnalysisUsage(AU); } +/// Check the first operand of the tbaa tag node, if it is a MDNode, we treat +/// it as struct-path aware TBAA format, otherwise, we treat it as scalar TBAA +/// format. +static bool isStructPathTBAA(const MDNode *MD) { + // Anonymous TBAA root starts with a MDNode and dragonegg uses it as + // a TBAA tag. + return isa(MD->getOperand(0)) && MD->getNumOperands() >= 3; +} + /// Aliases - Test whether the type represented by A may alias the /// type represented by B. bool TypeBasedAliasAnalysis::Aliases(const MDNode *A, const MDNode *B) const { - if (EnableStructPathTBAA) + if (isStructPathTBAA(A)) return PathAliases(A, B); // Keep track of the root node for A and B. @@ -397,8 +473,8 @@ bool TypeBasedAliasAnalysis::pointsToConstantMemory(const Location &Loc, // If this is an "immutable" type, we can assume the pointer is pointing // to constant memory. - if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) || - (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable())) + if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || + (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) return true; return AliasAnalysis::pointsToConstantMemory(Loc, OrLocal); @@ -414,8 +490,8 @@ TypeBasedAliasAnalysis::getModRefBehavior(ImmutableCallSite CS) { // If this is an "immutable" type, we can assume the call doesn't write // to memory. if (const MDNode *M = CS.getInstruction()->getMetadata(LLVMContext::MD_tbaa)) - if ((!EnableStructPathTBAA && TBAANode(M).TypeIsImmutable()) || - (EnableStructPathTBAA && TBAAStructTagNode(M).TypeIsImmutable())) + if ((!isStructPathTBAA(M) && TBAANode(M).TypeIsImmutable()) || + (isStructPathTBAA(M) && TBAAStructTagNode(M).TypeIsImmutable())) Min = OnlyReadsMemory; return ModRefBehavior(AliasAnalysis::getModRefBehavior(CS) & Min); @@ -458,6 +534,25 @@ TypeBasedAliasAnalysis::getModRefInfo(ImmutableCallSite CS1, return AliasAnalysis::getModRefInfo(CS1, CS2); } +bool MDNode::isTBAAVtableAccess() const { + if (!isStructPathTBAA(this)) { + if (getNumOperands() < 1) return false; + if (MDString *Tag1 = dyn_cast(getOperand(0))) { + if (Tag1->getString() == "vtable pointer") return true; + } + return false; + } + + // For struct-path aware TBAA, we use the access type of the tag. + if (getNumOperands() < 2) return false; + MDNode *Tag = cast_or_null(getOperand(1)); + if (!Tag) return false; + if (MDString *Tag1 = dyn_cast(Tag->getOperand(0))) { + if (Tag1->getString() == "vtable pointer") return true; + } + return false; +} + MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { if (!A || !B) return NULL; @@ -466,7 +561,8 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { return A; // For struct-path aware TBAA, we use the access type of the tag. - if (EnableStructPathTBAA) { + bool StructPath = isStructPathTBAA(A); + if (StructPath) { A = cast_or_null(A->getOperand(1)); if (!A) return 0; B = cast_or_null(B->getOperand(1)); @@ -499,7 +595,7 @@ MDNode *MDNode::getMostGenericTBAA(MDNode *A, MDNode *B) { --IA; --IB; } - if (!EnableStructPathTBAA) + if (!StructPath) return Ret; if (!Ret) diff --git a/contrib/llvm/lib/Analysis/ValueTracking.cpp b/contrib/llvm/lib/Analysis/ValueTracking.cpp index 45dcc5e..e39ee62 100644 --- a/contrib/llvm/lib/Analysis/ValueTracking.cpp +++ b/contrib/llvm/lib/Analysis/ValueTracking.cpp @@ -15,6 +15,7 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalAlias.h" @@ -39,8 +40,8 @@ const unsigned MaxDepth = 6; static unsigned getBitWidth(Type *Ty, const DataLayout *TD) { if (unsigned BitWidth = Ty->getScalarSizeInBits()) return BitWidth; - assert(isa(Ty) && "Expected a pointer type!"); - return TD ? TD->getPointerSizeInBits() : 0; + + return TD ? TD->getPointerTypeSizeInBits(Ty) : 0; } static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, @@ -290,7 +291,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, } if (Align > 0) KnownZero = APInt::getLowBitsSet(BitWidth, - CountTrailingZeros_32(Align)); + countTrailingZeros(Align)); else KnownZero.clearAllBits(); KnownOne.clearAllBits(); @@ -321,7 +322,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, } if (Align) - KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); return; } @@ -613,7 +614,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, Align = TD->getABITypeAlignment(AI->getType()->getElementType()); if (Align > 0) - KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); + KnownZero = APInt::getLowBitsSet(BitWidth, countTrailingZeros(Align)); break; } case Instruction::GetElementPtr: { @@ -629,12 +630,22 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, Value *Index = I->getOperand(i); if (StructType *STy = dyn_cast(*GTI)) { // Handle struct member offset arithmetic. - if (!TD) return; - const StructLayout *SL = TD->getStructLayout(STy); + if (!TD) + return; + + // Handle case when index is vector zeroinitializer + Constant *CIndex = cast(Index); + if (CIndex->isZeroValue()) + continue; + + if (CIndex->getType()->isVectorTy()) + Index = CIndex->getSplatValue(); + unsigned Idx = cast(Index)->getZExtValue(); + const StructLayout *SL = TD->getStructLayout(STy); uint64_t Offset = SL->getElementOffset(Idx); - TrailZ = std::min(TrailZ, - CountTrailingZeros_64(Offset)); + TrailZ = std::min(TrailZ, + countTrailingZeros(Offset)); } else { // Handle array index arithmetic. Type *IndexedTy = GTI.getIndexedType(); @@ -644,7 +655,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, LocalKnownZero = LocalKnownOne = APInt(GEPOpiBits, 0); ComputeMaskedBits(Index, LocalKnownZero, LocalKnownOne, TD, Depth+1); TrailZ = std::min(TrailZ, - unsigned(CountTrailingZeros_64(TypeSize) + + unsigned(countTrailingZeros(TypeSize) + LocalKnownZero.countTrailingOnes())); } } @@ -749,7 +760,6 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, KnownZero = APInt::getHighBitsSet(BitWidth, BitWidth - LowBits); break; } - case Intrinsic::x86_sse42_crc32_64_8: case Intrinsic::x86_sse42_crc32_64_64: KnownZero = APInt::getHighBitsSet(64, 32); break; @@ -855,6 +865,37 @@ bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { return false; } + // Adding a power-of-two or zero to the same power-of-two or zero yields + // either the original power-of-two, a larger power-of-two or zero. + if (match(V, m_Add(m_Value(X), m_Value(Y)))) { + OverflowingBinaryOperator *VOBO = cast(V); + if (OrZero || VOBO->hasNoUnsignedWrap() || VOBO->hasNoSignedWrap()) { + if (match(X, m_And(m_Specific(Y), m_Value())) || + match(X, m_And(m_Value(), m_Specific(Y)))) + if (isKnownToBeAPowerOfTwo(Y, OrZero, Depth)) + return true; + if (match(Y, m_And(m_Specific(X), m_Value())) || + match(Y, m_And(m_Value(), m_Specific(X)))) + if (isKnownToBeAPowerOfTwo(X, OrZero, Depth)) + return true; + + unsigned BitWidth = V->getType()->getScalarSizeInBits(); + APInt LHSZeroBits(BitWidth, 0), LHSOneBits(BitWidth, 0); + ComputeMaskedBits(X, LHSZeroBits, LHSOneBits, 0, Depth); + + APInt RHSZeroBits(BitWidth, 0), RHSOneBits(BitWidth, 0); + ComputeMaskedBits(Y, RHSZeroBits, RHSOneBits, 0, Depth); + // If i8 V is a power of two or zero: + // ZeroBits: 1 1 1 0 1 1 1 1 + // ~ZeroBits: 0 0 0 1 0 0 0 0 + if ((~(LHSZeroBits & RHSZeroBits)).isPowerOf2()) + // If OrZero isn't set, we cannot give back a zero result. + // Make sure either the LHS or RHS has a bit set. + if (OrZero || RHSOneBits.getBoolValue() || LHSOneBits.getBoolValue()) + return true; + } + } + // An exact divide or right shift can only shift off zero bits, so the result // is a power of two only if the first operand is a power of two and not // copying a sign bit (sdiv int_min, 2). @@ -1509,7 +1550,7 @@ Value *llvm::isBytewiseValue(Value *V) { // struct. To is the result struct built so far, new insertvalue instructions // build on that. static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, - SmallVector &Idxs, + SmallVectorImpl &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { llvm::StructType *STy = dyn_cast(IndexedType); @@ -1673,20 +1714,24 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, /// it can be expressed as a base pointer plus a constant offset. Return the /// base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout *TD) { + const DataLayout *DL) { // Without DataLayout, conservatively assume 64-bit offsets, which is // the widest we support. - unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; + unsigned BitWidth = DL ? DL->getPointerTypeSizeInBits(Ptr->getType()) : 64; APInt ByteOffset(BitWidth, 0); while (1) { if (Ptr->getType()->isVectorTy()) break; if (GEPOperator *GEP = dyn_cast(Ptr)) { - APInt GEPOffset(BitWidth, 0); - if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset)) - break; - ByteOffset += GEPOffset; + if (DL) { + APInt GEPOffset(BitWidth, 0); + if (!GEP->accumulateConstantOffset(*DL, GEPOffset)) + break; + + ByteOffset += GEPOffset; + } + Ptr = GEP->getPointerOperand(); } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) { Ptr = cast(Ptr)->getOperand(0); @@ -2019,7 +2064,7 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, /// isKnownNonNull - Return true if we know that the specified value is never /// null. -bool llvm::isKnownNonNull(const Value *V) { +bool llvm::isKnownNonNull(const Value *V, const TargetLibraryInfo *TLI) { // Alloca never returns null, malloc might. if (isa(V)) return true; @@ -2030,5 +2075,10 @@ bool llvm::isKnownNonNull(const Value *V) { // Global values are not null unless extern weak. if (const GlobalValue *GV = dyn_cast(V)) return !GV->hasExternalWeakLinkage(); + + // operator new never returns null. + if (isOperatorNewLikeFn(V, TLI, /*LookThroughBitCast=*/true)) + return true; + return false; } diff --git a/contrib/llvm/lib/Archive/Archive.cpp b/contrib/llvm/lib/Archive/Archive.cpp deleted file mode 100644 index 1f36a00..0000000 --- a/contrib/llvm/lib/Archive/Archive.cpp +++ /dev/null @@ -1,262 +0,0 @@ -//===-- Archive.cpp - Generic LLVM archive functions ------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file contains the implementation of the Archive and ArchiveMember -// classes that is common to both reading and writing archives.. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Bitcode/Archive.h" -#include "ArchiveInternals.h" -#include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/system_error.h" -#include -#include -using namespace llvm; - -// getMemberSize - compute the actual physical size of the file member as seen -// on disk. This isn't the size of member's payload. Use getSize() for that. -unsigned -ArchiveMember::getMemberSize() const { - // Basically its the file size plus the header size - unsigned result = info.fileSize + sizeof(ArchiveMemberHeader); - - // If it has a long filename, include the name length - if (hasLongFilename()) - result += path.str().length() + 1; - - // If its now odd lengthed, include the padding byte - if (result % 2 != 0 ) - result++; - - return result; -} - -// This default constructor is only use by the ilist when it creates its -// sentry node. We give it specific static values to make it stand out a bit. -ArchiveMember::ArchiveMember() - : parent(0), path("--invalid--"), flags(0), data(0) -{ - info.user = sys::Process::GetCurrentUserId(); - info.group = sys::Process::GetCurrentGroupId(); - info.mode = 0777; - info.fileSize = 0; - info.modTime = sys::TimeValue::now(); -} - -// This is the constructor that the Archive class uses when it is building or -// reading an archive. It just defaults a few things and ensures the parent is -// set for the iplist. The Archive class fills in the ArchiveMember's data. -// This is required because correctly setting the data may depend on other -// things in the Archive. -ArchiveMember::ArchiveMember(Archive* PAR) - : parent(PAR), path(), flags(0), data(0) -{ -} - -// This method allows an ArchiveMember to be replaced with the data for a -// different file, presumably as an update to the member. It also makes sure -// the flags are reset correctly. -bool ArchiveMember::replaceWith(const sys::Path& newFile, std::string* ErrMsg) { - bool Exists; - if (sys::fs::exists(newFile.str(), Exists) || !Exists) { - if (ErrMsg) - *ErrMsg = "Can not replace an archive member with a non-existent file"; - return true; - } - - data = 0; - path = newFile; - - // SVR4 symbol tables have an empty name - if (path.str() == ARFILE_SVR4_SYMTAB_NAME) - flags |= SVR4SymbolTableFlag; - else - flags &= ~SVR4SymbolTableFlag; - - // BSD4.4 symbol tables have a special name - if (path.str() == ARFILE_BSD4_SYMTAB_NAME) - flags |= BSD4SymbolTableFlag; - else - flags &= ~BSD4SymbolTableFlag; - - // LLVM symbol tables have a very specific name - if (path.str() == ARFILE_LLVM_SYMTAB_NAME) - flags |= LLVMSymbolTableFlag; - else - flags &= ~LLVMSymbolTableFlag; - - // String table name - if (path.str() == ARFILE_STRTAB_NAME) - flags |= StringTableFlag; - else - flags &= ~StringTableFlag; - - // If it has a slash then it has a path - bool hasSlash = path.str().find('/') != std::string::npos; - if (hasSlash) - flags |= HasPathFlag; - else - flags &= ~HasPathFlag; - - // If it has a slash or its over 15 chars then its a long filename format - if (hasSlash || path.str().length() > 15) - flags |= HasLongFilenameFlag; - else - flags &= ~HasLongFilenameFlag; - - // Get the signature and status info - const char* signature = (const char*) data; - SmallString<4> magic; - if (!signature) { - sys::fs::get_magic(path.str(), magic.capacity(), magic); - signature = magic.c_str(); - const sys::FileStatus *FSinfo = path.getFileStatus(false, ErrMsg); - if (FSinfo) - info = *FSinfo; - else - return true; - } - - // Determine what kind of file it is. - switch (sys::IdentifyFileType(signature,4)) { - case sys::Bitcode_FileType: - flags |= BitcodeFlag; - break; - default: - flags &= ~BitcodeFlag; - break; - } - return false; -} - -// Archive constructor - this is the only constructor that gets used for the -// Archive class. Everything else (default,copy) is deprecated. This just -// initializes and maps the file into memory, if requested. -Archive::Archive(const sys::Path& filename, LLVMContext& C) - : archPath(filename), members(), mapfile(0), base(0), symTab(), strtab(), - symTabSize(0), firstFileOffset(0), modules(), foreignST(0), Context(C) { -} - -bool -Archive::mapToMemory(std::string* ErrMsg) { - OwningPtr File; - if (error_code ec = MemoryBuffer::getFile(archPath.c_str(), File)) { - if (ErrMsg) - *ErrMsg = ec.message(); - return true; - } - mapfile = File.take(); - base = mapfile->getBufferStart(); - return false; -} - -void Archive::cleanUpMemory() { - // Shutdown the file mapping - delete mapfile; - mapfile = 0; - base = 0; - - // Forget the entire symbol table - symTab.clear(); - symTabSize = 0; - - firstFileOffset = 0; - - // Free the foreign symbol table member - if (foreignST) { - delete foreignST; - foreignST = 0; - } - - // Delete any Modules and ArchiveMember's we've allocated as a result of - // symbol table searches. - for (ModuleMap::iterator I=modules.begin(), E=modules.end(); I != E; ++I ) { - delete I->second.first; - delete I->second.second; - } -} - -// Archive destructor - just clean up memory -Archive::~Archive() { - cleanUpMemory(); -} - - - -static void getSymbols(Module*M, std::vector& symbols) { - // Loop over global variables - for (Module::global_iterator GI = M->global_begin(), GE=M->global_end(); GI != GE; ++GI) - if (!GI->isDeclaration() && !GI->hasLocalLinkage()) - if (!GI->getName().empty()) - symbols.push_back(GI->getName()); - - // Loop over functions - for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ++FI) - if (!FI->isDeclaration() && !FI->hasLocalLinkage()) - if (!FI->getName().empty()) - symbols.push_back(FI->getName()); - - // Loop over aliases - for (Module::alias_iterator AI = M->alias_begin(), AE = M->alias_end(); - AI != AE; ++AI) { - if (AI->hasName()) - symbols.push_back(AI->getName()); - } -} - -// Get just the externally visible defined symbols from the bitcode -bool llvm::GetBitcodeSymbols(const sys::Path& fName, - LLVMContext& Context, - std::vector& symbols, - std::string* ErrMsg) { - OwningPtr Buffer; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(fName.c_str(), Buffer)) { - if (ErrMsg) *ErrMsg = "Could not open file '" + fName.str() + "'" + ": " - + ec.message(); - return true; - } - - Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg); - if (!M) - return true; - - // Get the symbols - getSymbols(M, symbols); - - // Done with the module. - delete M; - return true; -} - -Module* -llvm::GetBitcodeSymbols(const char *BufPtr, unsigned Length, - const std::string& ModuleID, - LLVMContext& Context, - std::vector& symbols, - std::string* ErrMsg) { - // Get the module. - OwningPtr Buffer( - MemoryBuffer::getMemBufferCopy(StringRef(BufPtr, Length),ModuleID.c_str())); - - Module *M = ParseBitcodeFile(Buffer.get(), Context, ErrMsg); - if (!M) - return 0; - - // Get the symbols - getSymbols(M, symbols); - - // Done with the module. Note that it's the caller's responsibility to delete - // the Module. - return M; -} diff --git a/contrib/llvm/lib/Archive/ArchiveInternals.h b/contrib/llvm/lib/Archive/ArchiveInternals.h deleted file mode 100644 index f6c87e8..0000000 --- a/contrib/llvm/lib/Archive/ArchiveInternals.h +++ /dev/null @@ -1,88 +0,0 @@ -//===-- lib/Archive/ArchiveInternals.h -------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Internal implementation header for LLVM Archive files. -// -//===----------------------------------------------------------------------===// - -#ifndef LIB_ARCHIVE_ARCHIVEINTERNALS_H -#define LIB_ARCHIVE_ARCHIVEINTERNALS_H - -#include "llvm/ADT/StringExtras.h" -#include "llvm/Bitcode/Archive.h" -#include "llvm/Support/TimeValue.h" -#include - -#define ARFILE_MAGIC "!\n" ///< magic string -#define ARFILE_MAGIC_LEN (sizeof(ARFILE_MAGIC)-1) ///< length of magic string -#define ARFILE_SVR4_SYMTAB_NAME "/ " ///< SVR4 symtab entry name -#define ARFILE_LLVM_SYMTAB_NAME "#_LLVM_SYM_TAB_#" ///< LLVM symtab entry name -#define ARFILE_BSD4_SYMTAB_NAME "__.SYMDEF SORTED" ///< BSD4 symtab entry name -#define ARFILE_STRTAB_NAME "// " ///< Name of string table -#define ARFILE_PAD "\n" ///< inter-file align padding -#define ARFILE_MEMBER_MAGIC "`\n" ///< fmag field magic # - -namespace llvm { - - class LLVMContext; - - /// The ArchiveMemberHeader structure is used internally for bitcode - /// archives. - /// The header precedes each file member in the archive. This structure is - /// defined using character arrays for direct and correct interpretation - /// regardless of the endianess of the machine that produced it. - /// @brief Archive File Member Header - class ArchiveMemberHeader { - /// @name Data - /// @{ - public: - char name[16]; ///< Name of the file member. - char date[12]; ///< File date, decimal seconds since Epoch - char uid[6]; ///< user id in ASCII decimal - char gid[6]; ///< group id in ASCII decimal - char mode[8]; ///< file mode in ASCII octal - char size[10]; ///< file size in ASCII decimal - char fmag[2]; ///< Always contains ARFILE_MAGIC_TERMINATOR - - /// @} - /// @name Methods - /// @{ - public: - void init() { - memset(name,' ',16); - memset(date,' ',12); - memset(uid,' ',6); - memset(gid,' ',6); - memset(mode,' ',8); - memset(size,' ',10); - fmag[0] = '`'; - fmag[1] = '\n'; - } - - bool checkSignature() const { - return 0 == memcmp(fmag, ARFILE_MEMBER_MAGIC,2); - } - }; - - // Get just the externally visible defined symbols from the bitcode - bool GetBitcodeSymbols(const sys::Path& fName, - LLVMContext& Context, - std::vector& symbols, - std::string* ErrMsg); - - Module* GetBitcodeSymbols(const char *Buffer, unsigned Length, - const std::string& ModuleID, - LLVMContext& Context, - std::vector& symbols, - std::string* ErrMsg); -} - -#endif - -// vim: sw=2 ai diff --git a/contrib/llvm/lib/Archive/ArchiveReader.cpp b/contrib/llvm/lib/Archive/ArchiveReader.cpp deleted file mode 100644 index 14713e6..0000000 --- a/contrib/llvm/lib/Archive/ArchiveReader.cpp +++ /dev/null @@ -1,633 +0,0 @@ -//===-- ArchiveReader.cpp - Read LLVM archive files -------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Builds up standard unix archive files (.a) containing LLVM bitcode. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Bitcode/Archive.h" -#include "ArchiveInternals.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/MemoryBuffer.h" -#include -#include -using namespace llvm; - -/// Read a variable-bit-rate encoded unsigned integer -static inline unsigned readInteger(const char*&At, const char*End) { - unsigned Shift = 0; - unsigned Result = 0; - - do { - if (At == End) - return Result; - Result |= (unsigned)((*At++) & 0x7F) << Shift; - Shift += 7; - } while (At[-1] & 0x80); - return Result; -} - -// Completely parse the Archive's symbol table and populate symTab member var. -bool -Archive::parseSymbolTable(const void* data, unsigned size, std::string* error) { - const char* At = (const char*) data; - const char* End = At + size; - while (At < End) { - unsigned offset = readInteger(At, End); - if (At == End) { - if (error) - *error = "Ran out of data reading vbr_uint for symtab offset!"; - return false; - } - unsigned length = readInteger(At, End); - if (At == End) { - if (error) - *error = "Ran out of data reading vbr_uint for symtab length!"; - return false; - } - if (At + length > End) { - if (error) - *error = "Malformed symbol table: length not consistent with size"; - return false; - } - // we don't care if it can't be inserted (duplicate entry) - symTab.insert(std::make_pair(std::string(At, length), offset)); - At += length; - } - symTabSize = size; - return true; -} - -// This member parses an ArchiveMemberHeader that is presumed to be pointed to -// by At. The At pointer is updated to the byte just after the header, which -// can be variable in size. -ArchiveMember* -Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) -{ - if (At + sizeof(ArchiveMemberHeader) >= End) { - if (error) - *error = "Unexpected end of file"; - return 0; - } - - // Cast archive member header - const ArchiveMemberHeader* Hdr = (const ArchiveMemberHeader*)At; - At += sizeof(ArchiveMemberHeader); - - int flags = 0; - int MemberSize = atoi(Hdr->size); - assert(MemberSize >= 0); - - // Check the size of the member for sanity - if (At + MemberSize > End) { - if (error) - *error = "invalid member length in archive file"; - return 0; - } - - // Check the member signature - if (!Hdr->checkSignature()) { - if (error) - *error = "invalid file member signature"; - return 0; - } - - // Convert and check the member name - // The empty name ( '/' and 15 blanks) is for a foreign (non-LLVM) symbol - // table. The special name "//" and 14 blanks is for a string table, used - // for long file names. This library doesn't generate either of those but - // it will accept them. If the name starts with #1/ and the remainder is - // digits, then those digits specify the length of the name that is - // stored immediately following the header. The special name - // __LLVM_SYM_TAB__ identifies the symbol table for LLVM bitcode. - // Anything else is a regular, short filename that is terminated with - // a '/' and blanks. - - std::string pathname; - switch (Hdr->name[0]) { - case '#': - if (Hdr->name[1] == '1' && Hdr->name[2] == '/') { - if (isdigit(Hdr->name[3])) { - unsigned len = atoi(&Hdr->name[3]); - const char *nulp = (const char *)memchr(At, '\0', len); - pathname.assign(At, nulp != 0 ? (uintptr_t)(nulp - At) : len); - At += len; - MemberSize -= len; - flags |= ArchiveMember::HasLongFilenameFlag; - } else { - if (error) - *error = "invalid long filename"; - return 0; - } - } else if (Hdr->name[1] == '_' && - (0 == memcmp(Hdr->name, ARFILE_LLVM_SYMTAB_NAME, 16))) { - // The member is using a long file name (>15 chars) format. - // This format is standard for 4.4BSD and Mac OSX operating - // systems. LLVM uses it similarly. In this format, the - // remainder of the name field (after #1/) specifies the - // length of the file name which occupy the first bytes of - // the member's data. The pathname already has the #1/ stripped. - pathname.assign(ARFILE_LLVM_SYMTAB_NAME); - flags |= ArchiveMember::LLVMSymbolTableFlag; - } - break; - case '/': - if (Hdr->name[1]== '/') { - if (0 == memcmp(Hdr->name, ARFILE_STRTAB_NAME, 16)) { - pathname.assign(ARFILE_STRTAB_NAME); - flags |= ArchiveMember::StringTableFlag; - } else { - if (error) - *error = "invalid string table name"; - return 0; - } - } else if (Hdr->name[1] == ' ') { - if (0 == memcmp(Hdr->name, ARFILE_SVR4_SYMTAB_NAME, 16)) { - pathname.assign(ARFILE_SVR4_SYMTAB_NAME); - flags |= ArchiveMember::SVR4SymbolTableFlag; - } else { - if (error) - *error = "invalid SVR4 symbol table name"; - return 0; - } - } else if (isdigit(Hdr->name[1])) { - unsigned index = atoi(&Hdr->name[1]); - if (index < strtab.length()) { - const char* namep = strtab.c_str() + index; - const char* endp = strtab.c_str() + strtab.length(); - const char* p = namep; - const char* last_p = p; - while (p < endp) { - if (*p == '\n' && *last_p == '/') { - pathname.assign(namep, last_p - namep); - flags |= ArchiveMember::HasLongFilenameFlag; - break; - } - last_p = p; - p++; - } - if (p >= endp) { - if (error) - *error = "missing name terminator in string table"; - return 0; - } - } else { - if (error) - *error = "name index beyond string table"; - return 0; - } - } - break; - case '_': - if (Hdr->name[1] == '_' && - (0 == memcmp(Hdr->name, ARFILE_BSD4_SYMTAB_NAME, 16))) { - pathname.assign(ARFILE_BSD4_SYMTAB_NAME); - flags |= ArchiveMember::BSD4SymbolTableFlag; - break; - } - /* FALL THROUGH */ - - default: - const char* slash = (const char*) memchr(Hdr->name, '/', 16); - if (slash == 0) - slash = Hdr->name + 16; - pathname.assign(Hdr->name, slash - Hdr->name); - break; - } - - // Determine if this is a bitcode file - switch (sys::IdentifyFileType(At, 4)) { - case sys::Bitcode_FileType: - flags |= ArchiveMember::BitcodeFlag; - break; - default: - flags &= ~ArchiveMember::BitcodeFlag; - break; - } - - // Instantiate the ArchiveMember to be filled - ArchiveMember* member = new ArchiveMember(this); - - // Fill in fields of the ArchiveMember - member->parent = this; - member->path.set(pathname); - member->info.fileSize = MemberSize; - member->info.modTime.fromEpochTime(atoi(Hdr->date)); - unsigned int mode; - sscanf(Hdr->mode, "%o", &mode); - member->info.mode = mode; - member->info.user = atoi(Hdr->uid); - member->info.group = atoi(Hdr->gid); - member->flags = flags; - member->data = At; - - return member; -} - -bool -Archive::checkSignature(std::string* error) { - // Check the magic string at file's header - if (mapfile->getBufferSize() < 8 || memcmp(base, ARFILE_MAGIC, 8)) { - if (error) - *error = "invalid signature for an archive file"; - return false; - } - return true; -} - -// This function loads the entire archive and fully populates its ilist with -// the members of the archive file. This is typically used in preparation for -// editing the contents of the archive. -bool -Archive::loadArchive(std::string* error) { - - // Set up parsing - members.clear(); - symTab.clear(); - const char *At = base; - const char *End = mapfile->getBufferEnd(); - - if (!checkSignature(error)) - return false; - - At += 8; // Skip the magic string. - - bool seenSymbolTable = false; - bool foundFirstFile = false; - while (At < End) { - // parse the member header - const char* Save = At; - ArchiveMember* mbr = parseMemberHeader(At, End, error); - if (!mbr) - return false; - - // check if this is the foreign symbol table - if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { - // We just save this but don't do anything special - // with it. It doesn't count as the "first file". - if (foreignST) { - // What? Multiple foreign symbol tables? Just chuck it - // and retain the last one found. - delete foreignST; - } - foreignST = mbr; - At += mbr->getSize(); - if ((intptr_t(At) & 1) == 1) - At++; - } else if (mbr->isStringTable()) { - // Simply suck the entire string table into a string - // variable. This will be used to get the names of the - // members that use the "/ddd" format for their names - // (SVR4 style long names). - strtab.assign(At, mbr->getSize()); - At += mbr->getSize(); - if ((intptr_t(At) & 1) == 1) - At++; - delete mbr; - } else if (mbr->isLLVMSymbolTable()) { - // This is the LLVM symbol table for the archive. If we've seen it - // already, its an error. Otherwise, parse the symbol table and move on. - if (seenSymbolTable) { - if (error) - *error = "invalid archive: multiple symbol tables"; - return false; - } - if (!parseSymbolTable(mbr->getData(), mbr->getSize(), error)) - return false; - seenSymbolTable = true; - At += mbr->getSize(); - if ((intptr_t(At) & 1) == 1) - At++; - delete mbr; // We don't need this member in the list of members. - } else { - // This is just a regular file. If its the first one, save its offset. - // Otherwise just push it on the list and move on to the next file. - if (!foundFirstFile) { - firstFileOffset = Save - base; - foundFirstFile = true; - } - members.push_back(mbr); - At += mbr->getSize(); - if ((intptr_t(At) & 1) == 1) - At++; - } - } - return true; -} - -// Open and completely load the archive file. -Archive* -Archive::OpenAndLoad(const sys::Path& File, LLVMContext& C, - std::string* ErrorMessage) { - OwningPtr result ( new Archive(File, C)); - if (result->mapToMemory(ErrorMessage)) - return NULL; - if (!result->loadArchive(ErrorMessage)) - return NULL; - return result.take(); -} - -// Get all the bitcode modules from the archive -bool -Archive::getAllModules(std::vector& Modules, - std::string* ErrMessage) { - - for (iterator I=begin(), E=end(); I != E; ++I) { - if (I->isBitcode()) { - std::string FullMemberName = archPath.str() + - "(" + I->getPath().str() + ")"; - MemoryBuffer *Buffer = - MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()), - FullMemberName.c_str()); - - Module *M = ParseBitcodeFile(Buffer, Context, ErrMessage); - delete Buffer; - if (!M) - return true; - - Modules.push_back(M); - } - } - return false; -} - -// Load just the symbol table from the archive file -bool -Archive::loadSymbolTable(std::string* ErrorMsg) { - - // Set up parsing - members.clear(); - symTab.clear(); - const char *At = base; - const char *End = mapfile->getBufferEnd(); - - // Make sure we're dealing with an archive - if (!checkSignature(ErrorMsg)) - return false; - - At += 8; // Skip signature - - // Parse the first file member header - const char* FirstFile = At; - ArchiveMember* mbr = parseMemberHeader(At, End, ErrorMsg); - if (!mbr) - return false; - - if (mbr->isSVR4SymbolTable() || mbr->isBSD4SymbolTable()) { - // Skip the foreign symbol table, we don't do anything with it - At += mbr->getSize(); - if ((intptr_t(At) & 1) == 1) - At++; - delete mbr; - - // Read the next one - FirstFile = At; - mbr = parseMemberHeader(At, End, ErrorMsg); - if (!mbr) { - delete mbr; - return false; - } - } - - if (mbr->isStringTable()) { - // Process the string table entry - strtab.assign((const char*)mbr->getData(), mbr->getSize()); - At += mbr->getSize(); - if ((intptr_t(At) & 1) == 1) - At++; - delete mbr; - // Get the next one - FirstFile = At; - mbr = parseMemberHeader(At, End, ErrorMsg); - if (!mbr) { - delete mbr; - return false; - } - } - - // See if its the symbol table - if (mbr->isLLVMSymbolTable()) { - if (!parseSymbolTable(mbr->getData(), mbr->getSize(), ErrorMsg)) { - delete mbr; - return false; - } - - At += mbr->getSize(); - if ((intptr_t(At) & 1) == 1) - At++; - delete mbr; - // Can't be any more symtab headers so just advance - FirstFile = At; - } else { - // There's no symbol table in the file. We have to rebuild it from scratch - // because the intent of this method is to get the symbol table loaded so - // it can be searched efficiently. - // Add the member to the members list - members.push_back(mbr); - } - - firstFileOffset = FirstFile - base; - return true; -} - -// Open the archive and load just the symbol tables -Archive* Archive::OpenAndLoadSymbols(const sys::Path& File, - LLVMContext& C, - std::string* ErrorMessage) { - OwningPtr result ( new Archive(File, C) ); - if (result->mapToMemory(ErrorMessage)) - return NULL; - if (!result->loadSymbolTable(ErrorMessage)) - return NULL; - return result.take(); -} - -// Look up one symbol in the symbol table and return the module that defines -// that symbol. -Module* -Archive::findModuleDefiningSymbol(const std::string& symbol, - std::string* ErrMsg) { - SymTabType::iterator SI = symTab.find(symbol); - if (SI == symTab.end()) - return 0; - - // The symbol table was previously constructed assuming that the members were - // written without the symbol table header. Because VBR encoding is used, the - // values could not be adjusted to account for the offset of the symbol table - // because that could affect the size of the symbol table due to VBR encoding. - // We now have to account for this by adjusting the offset by the size of the - // symbol table and its header. - unsigned fileOffset = - SI->second + // offset in symbol-table-less file - firstFileOffset; // add offset to first "real" file in archive - - // See if the module is already loaded - ModuleMap::iterator MI = modules.find(fileOffset); - if (MI != modules.end()) - return MI->second.first; - - // Module hasn't been loaded yet, we need to load it - const char* modptr = base + fileOffset; - ArchiveMember* mbr = parseMemberHeader(modptr, mapfile->getBufferEnd(), - ErrMsg); - if (!mbr) - return 0; - - // Now, load the bitcode module to get the Module. - std::string FullMemberName = archPath.str() + "(" + - mbr->getPath().str() + ")"; - MemoryBuffer *Buffer = - MemoryBuffer::getMemBufferCopy(StringRef(mbr->getData(), mbr->getSize()), - FullMemberName.c_str()); - - Module *m = getLazyBitcodeModule(Buffer, Context, ErrMsg); - if (!m) - return 0; - - modules.insert(std::make_pair(fileOffset, std::make_pair(m, mbr))); - - return m; -} - -// Look up multiple symbols in the symbol table and return a set of -// Modules that define those symbols. -bool -Archive::findModulesDefiningSymbols(std::set& symbols, - SmallVectorImpl& result, - std::string* error) { - if (!mapfile || !base) { - if (error) - *error = "Empty archive invalid for finding modules defining symbols"; - return false; - } - - if (symTab.empty()) { - // We don't have a symbol table, so we must build it now but lets also - // make sure that we populate the modules table as we do this to ensure - // that we don't load them twice when findModuleDefiningSymbol is called - // below. - - // Get a pointer to the first file - const char* At = base + firstFileOffset; - const char* End = mapfile->getBufferEnd(); - - while ( At < End) { - // Compute the offset to be put in the symbol table - unsigned offset = At - base - firstFileOffset; - - // Parse the file's header - ArchiveMember* mbr = parseMemberHeader(At, End, error); - if (!mbr) - return false; - - // If it contains symbols - if (mbr->isBitcode()) { - // Get the symbols - std::vector symbols; - std::string FullMemberName = archPath.str() + "(" + - mbr->getPath().str() + ")"; - Module* M = - GetBitcodeSymbols(At, mbr->getSize(), FullMemberName, Context, - symbols, error); - - if (M) { - // Insert the module's symbols into the symbol table - for (std::vector::iterator I = symbols.begin(), - E=symbols.end(); I != E; ++I ) { - symTab.insert(std::make_pair(*I, offset)); - } - // Insert the Module and the ArchiveMember into the table of - // modules. - modules.insert(std::make_pair(offset, std::make_pair(M, mbr))); - } else { - if (error) - *error = "Can't parse bitcode member: " + - mbr->getPath().str() + ": " + *error; - delete mbr; - return false; - } - } - - // Go to the next file location - At += mbr->getSize(); - if ((intptr_t(At) & 1) == 1) - At++; - } - } - - // At this point we have a valid symbol table (one way or another) so we - // just use it to quickly find the symbols requested. - - SmallPtrSet Added; - for (std::set::iterator I=symbols.begin(), - Next = I, - E=symbols.end(); I != E; I = Next) { - // Increment Next before we invalidate it. - ++Next; - - // See if this symbol exists - Module* m = findModuleDefiningSymbol(*I,error); - if (!m) - continue; - bool NewMember = Added.insert(m); - if (!NewMember) - continue; - - // The symbol exists, insert the Module into our result. - result.push_back(m); - - // Remove the symbol now that its been resolved. - symbols.erase(I); - } - return true; -} - -bool Archive::isBitcodeArchive() { - // Make sure the symTab has been loaded. In most cases this should have been - // done when the archive was constructed, but still, this is just in case. - if (symTab.empty()) - if (!loadSymbolTable(0)) - return false; - - // Now that we know it's been loaded, return true - // if it has a size - if (symTab.size()) return true; - - // We still can't be sure it isn't a bitcode archive - if (!loadArchive(0)) - return false; - - std::vector Modules; - std::string ErrorMessage; - - // Scan the archive, trying to load a bitcode member. We only load one to - // see if this works. - for (iterator I = begin(), E = end(); I != E; ++I) { - if (!I->isBitcode()) - continue; - - std::string FullMemberName = - archPath.str() + "(" + I->getPath().str() + ")"; - - MemoryBuffer *Buffer = - MemoryBuffer::getMemBufferCopy(StringRef(I->getData(), I->getSize()), - FullMemberName.c_str()); - Module *M = ParseBitcodeFile(Buffer, Context); - delete Buffer; - if (!M) - return false; // Couldn't parse bitcode, not a bitcode archive. - delete M; - return true; - } - - return false; -} diff --git a/contrib/llvm/lib/Archive/ArchiveWriter.cpp b/contrib/llvm/lib/Archive/ArchiveWriter.cpp deleted file mode 100644 index 3eba701..0000000 --- a/contrib/llvm/lib/Archive/ArchiveWriter.cpp +++ /dev/null @@ -1,489 +0,0 @@ -//===-- ArchiveWriter.cpp - Write LLVM archive files ----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// Builds up an LLVM archive file (.a) containing LLVM bitcode. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Bitcode/Archive.h" -#include "ArchiveInternals.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/IR/Module.h" -#include "llvm/Support/FileSystem.h" -#include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Process.h" -#include "llvm/Support/Signals.h" -#include "llvm/Support/system_error.h" -#include -#include -#include -using namespace llvm; - -// Write an integer using variable bit rate encoding. This saves a few bytes -// per entry in the symbol table. -static inline void writeInteger(unsigned num, std::ofstream& ARFile) { - while (1) { - if (num < 0x80) { // done? - ARFile << (unsigned char)num; - return; - } - - // Nope, we are bigger than a character, output the next 7 bits and set the - // high bit to say that there is more coming... - ARFile << (unsigned char)(0x80 | ((unsigned char)num & 0x7F)); - num >>= 7; // Shift out 7 bits now... - } -} - -// Compute how many bytes are taken by a given VBR encoded value. This is needed -// to pre-compute the size of the symbol table. -static inline unsigned numVbrBytes(unsigned num) { - - // Note that the following nested ifs are somewhat equivalent to a binary - // search. We split it in half by comparing against 2^14 first. This allows - // most reasonable values to be done in 2 comparisons instead of 1 for - // small ones and four for large ones. We expect this to access file offsets - // in the 2^10 to 2^24 range and symbol lengths in the 2^0 to 2^8 range, - // so this approach is reasonable. - if (num < 1<<14) { - if (num < 1<<7) - return 1; - else - return 2; - } - if (num < 1<<21) - return 3; - - if (num < 1<<28) - return 4; - return 5; // anything >= 2^28 takes 5 bytes -} - -// Create an empty archive. -Archive* Archive::CreateEmpty(const sys::Path& FilePath, LLVMContext& C) { - Archive* result = new Archive(FilePath, C); - return result; -} - -// Fill the ArchiveMemberHeader with the information from a member. If -// TruncateNames is true, names are flattened to 15 chars or less. The sz field -// is provided here instead of coming from the mbr because the member might be -// stored compressed and the compressed size is not the ArchiveMember's size. -// Furthermore compressed files have negative size fields to identify them as -// compressed. -bool -Archive::fillHeader(const ArchiveMember &mbr, ArchiveMemberHeader& hdr, - int sz, bool TruncateNames) const { - - // Set the permissions mode, uid and gid - hdr.init(); - char buffer[32]; - sprintf(buffer, "%-8o", mbr.getMode()); - memcpy(hdr.mode,buffer,8); - sprintf(buffer, "%-6u", mbr.getUser()); - memcpy(hdr.uid,buffer,6); - sprintf(buffer, "%-6u", mbr.getGroup()); - memcpy(hdr.gid,buffer,6); - - // Set the last modification date - uint64_t secondsSinceEpoch = mbr.getModTime().toEpochTime(); - sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); - memcpy(hdr.date,buffer,12); - - // Get rid of trailing blanks in the name - std::string mbrPath = mbr.getPath().str(); - size_t mbrLen = mbrPath.length(); - while (mbrLen > 0 && mbrPath[mbrLen-1] == ' ') { - mbrPath.erase(mbrLen-1,1); - mbrLen--; - } - - // Set the name field in one of its various flavors. - bool writeLongName = false; - if (mbr.isStringTable()) { - memcpy(hdr.name,ARFILE_STRTAB_NAME,16); - } else if (mbr.isSVR4SymbolTable()) { - memcpy(hdr.name,ARFILE_SVR4_SYMTAB_NAME,16); - } else if (mbr.isBSD4SymbolTable()) { - memcpy(hdr.name,ARFILE_BSD4_SYMTAB_NAME,16); - } else if (mbr.isLLVMSymbolTable()) { - memcpy(hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); - } else if (TruncateNames) { - const char* nm = mbrPath.c_str(); - unsigned len = mbrPath.length(); - size_t slashpos = mbrPath.rfind('/'); - if (slashpos != std::string::npos) { - nm += slashpos + 1; - len -= slashpos +1; - } - if (len > 15) - len = 15; - memcpy(hdr.name,nm,len); - hdr.name[len] = '/'; - } else if (mbrPath.length() < 16 && mbrPath.find('/') == std::string::npos) { - memcpy(hdr.name,mbrPath.c_str(),mbrPath.length()); - hdr.name[mbrPath.length()] = '/'; - } else { - std::string nm = "#1/"; - nm += utostr(mbrPath.length()); - memcpy(hdr.name,nm.data(),nm.length()); - if (sz < 0) - sz -= mbrPath.length(); - else - sz += mbrPath.length(); - writeLongName = true; - } - - // Set the size field - if (sz < 0) { - buffer[0] = '-'; - sprintf(&buffer[1],"%-9u",(unsigned)-sz); - } else { - sprintf(buffer, "%-10u", (unsigned)sz); - } - memcpy(hdr.size,buffer,10); - - return writeLongName; -} - -// Insert a file into the archive before some other member. This also takes care -// of extracting the necessary flags and information from the file. -bool -Archive::addFileBefore(const sys::Path& filePath, iterator where, - std::string* ErrMsg) { - bool Exists; - if (sys::fs::exists(filePath.str(), Exists) || !Exists) { - if (ErrMsg) - *ErrMsg = "Can not add a non-existent file to archive"; - return true; - } - - ArchiveMember* mbr = new ArchiveMember(this); - - mbr->data = 0; - mbr->path = filePath; - const sys::FileStatus *FSInfo = mbr->path.getFileStatus(false, ErrMsg); - if (!FSInfo) { - delete mbr; - return true; - } - mbr->info = *FSInfo; - - unsigned flags = 0; - bool hasSlash = filePath.str().find('/') != std::string::npos; - if (hasSlash) - flags |= ArchiveMember::HasPathFlag; - if (hasSlash || filePath.str().length() > 15) - flags |= ArchiveMember::HasLongFilenameFlag; - - sys::fs::file_magic type; - if (sys::fs::identify_magic(mbr->path.str(), type)) - type = sys::fs::file_magic::unknown; - switch (type) { - case sys::fs::file_magic::bitcode: - flags |= ArchiveMember::BitcodeFlag; - break; - default: - break; - } - mbr->flags = flags; - members.insert(where,mbr); - return false; -} - -// Write one member out to the file. -bool -Archive::writeMember( - const ArchiveMember& member, - std::ofstream& ARFile, - bool CreateSymbolTable, - bool TruncateNames, - std::string* ErrMsg -) { - - unsigned filepos = ARFile.tellp(); - filepos -= 8; - - // Get the data and its size either from the - // member's in-memory data or directly from the file. - size_t fSize = member.getSize(); - const char *data = (const char*)member.getData(); - MemoryBuffer *mFile = 0; - if (!data) { - OwningPtr File; - if (error_code ec = MemoryBuffer::getFile(member.getPath().c_str(), File)) { - if (ErrMsg) - *ErrMsg = ec.message(); - return true; - } - mFile = File.take(); - data = mFile->getBufferStart(); - fSize = mFile->getBufferSize(); - } - - // Now that we have the data in memory, update the - // symbol table if it's a bitcode file. - if (CreateSymbolTable && member.isBitcode()) { - std::vector symbols; - std::string FullMemberName = archPath.str() + "(" + member.getPath().str() - + ")"; - Module* M = - GetBitcodeSymbols(data, fSize, FullMemberName, Context, symbols, ErrMsg); - - // If the bitcode parsed successfully - if ( M ) { - for (std::vector::iterator SI = symbols.begin(), - SE = symbols.end(); SI != SE; ++SI) { - - std::pair Res = - symTab.insert(std::make_pair(*SI,filepos)); - - if (Res.second) { - symTabSize += SI->length() + - numVbrBytes(SI->length()) + - numVbrBytes(filepos); - } - } - // We don't need this module any more. - delete M; - } else { - delete mFile; - if (ErrMsg) - *ErrMsg = "Can't parse bitcode member: " + member.getPath().str() - + ": " + *ErrMsg; - return true; - } - } - - int hdrSize = fSize; - - // Compute the fields of the header - ArchiveMemberHeader Hdr; - bool writeLongName = fillHeader(member,Hdr,hdrSize,TruncateNames); - - // Write header to archive file - ARFile.write((char*)&Hdr, sizeof(Hdr)); - - // Write the long filename if its long - if (writeLongName) { - ARFile.write(member.getPath().str().data(), - member.getPath().str().length()); - } - - // Write the (possibly compressed) member's content to the file. - ARFile.write(data,fSize); - - // Make sure the member is an even length - if ((ARFile.tellp() & 1) == 1) - ARFile << ARFILE_PAD; - - // Close the mapped file if it was opened - delete mFile; - return false; -} - -// Write out the LLVM symbol table as an archive member to the file. -void -Archive::writeSymbolTable(std::ofstream& ARFile) { - - // Construct the symbol table's header - ArchiveMemberHeader Hdr; - Hdr.init(); - memcpy(Hdr.name,ARFILE_LLVM_SYMTAB_NAME,16); - uint64_t secondsSinceEpoch = sys::TimeValue::now().toEpochTime(); - char buffer[32]; - sprintf(buffer, "%-8o", 0644); - memcpy(Hdr.mode,buffer,8); - sprintf(buffer, "%-6u", sys::Process::GetCurrentUserId()); - memcpy(Hdr.uid,buffer,6); - sprintf(buffer, "%-6u", sys::Process::GetCurrentGroupId()); - memcpy(Hdr.gid,buffer,6); - sprintf(buffer,"%-12u", unsigned(secondsSinceEpoch)); - memcpy(Hdr.date,buffer,12); - sprintf(buffer,"%-10u",symTabSize); - memcpy(Hdr.size,buffer,10); - - // Write the header - ARFile.write((char*)&Hdr, sizeof(Hdr)); - -#ifndef NDEBUG - // Save the starting position of the symbol tables data content. - unsigned startpos = ARFile.tellp(); -#endif - - // Write out the symbols sequentially - for ( Archive::SymTabType::iterator I = symTab.begin(), E = symTab.end(); - I != E; ++I) - { - // Write out the file index - writeInteger(I->second, ARFile); - // Write out the length of the symbol - writeInteger(I->first.length(), ARFile); - // Write out the symbol - ARFile.write(I->first.data(), I->first.length()); - } - -#ifndef NDEBUG - // Now that we're done with the symbol table, get the ending file position - unsigned endpos = ARFile.tellp(); -#endif - - // Make sure that the amount we wrote is what we pre-computed. This is - // critical for file integrity purposes. - assert(endpos - startpos == symTabSize && "Invalid symTabSize computation"); - - // Make sure the symbol table is even sized - if (symTabSize % 2 != 0 ) - ARFile << ARFILE_PAD; -} - -// Write the entire archive to the file specified when the archive was created. -// This writes to a temporary file first. Options are for creating a symbol -// table, flattening the file names (no directories, 15 chars max) and -// compressing each archive member. -bool -Archive::writeToDisk(bool CreateSymbolTable, bool TruncateNames, - std::string* ErrMsg) -{ - // Make sure they haven't opened up the file, not loaded it, - // but are now trying to write it which would wipe out the file. - if (members.empty() && mapfile && mapfile->getBufferSize() > 8) { - if (ErrMsg) - *ErrMsg = "Can't write an archive not opened for writing"; - return true; - } - - // Create a temporary file to store the archive in - sys::Path TmpArchive = archPath; - if (TmpArchive.createTemporaryFileOnDisk(ErrMsg)) - return true; - - // Make sure the temporary gets removed if we crash - sys::RemoveFileOnSignal(TmpArchive); - - // Create archive file for output. - std::ios::openmode io_mode = std::ios::out | std::ios::trunc | - std::ios::binary; - std::ofstream ArchiveFile(TmpArchive.c_str(), io_mode); - - // Check for errors opening or creating archive file. - if (!ArchiveFile.is_open() || ArchiveFile.bad()) { - TmpArchive.eraseFromDisk(); - if (ErrMsg) - *ErrMsg = "Error opening archive file: " + archPath.str(); - return true; - } - - // If we're creating a symbol table, reset it now - if (CreateSymbolTable) { - symTabSize = 0; - symTab.clear(); - } - - // Write magic string to archive. - ArchiveFile << ARFILE_MAGIC; - - // Loop over all member files, and write them out. Note that this also - // builds the symbol table, symTab. - for (MembersList::iterator I = begin(), E = end(); I != E; ++I) { - if (writeMember(*I, ArchiveFile, CreateSymbolTable, - TruncateNames, ErrMsg)) { - TmpArchive.eraseFromDisk(); - ArchiveFile.close(); - return true; - } - } - - // Close archive file. - ArchiveFile.close(); - - // Write the symbol table - if (CreateSymbolTable) { - // At this point we have written a file that is a legal archive but it - // doesn't have a symbol table in it. To aid in faster reading and to - // ensure compatibility with other archivers we need to put the symbol - // table first in the file. Unfortunately, this means mapping the file - // we just wrote back in and copying it to the destination file. - sys::Path FinalFilePath = archPath; - - // Map in the archive we just wrote. - { - OwningPtr arch; - if (error_code ec = MemoryBuffer::getFile(TmpArchive.c_str(), arch)) { - if (ErrMsg) - *ErrMsg = ec.message(); - return true; - } - const char* base = arch->getBufferStart(); - - // Open another temporary file in order to avoid invalidating the - // mmapped data - if (FinalFilePath.createTemporaryFileOnDisk(ErrMsg)) - return true; - sys::RemoveFileOnSignal(FinalFilePath); - - std::ofstream FinalFile(FinalFilePath.c_str(), io_mode); - if (!FinalFile.is_open() || FinalFile.bad()) { - TmpArchive.eraseFromDisk(); - if (ErrMsg) - *ErrMsg = "Error opening archive file: " + FinalFilePath.str(); - return true; - } - - // Write the file magic number - FinalFile << ARFILE_MAGIC; - - // If there is a foreign symbol table, put it into the file now. Most - // ar(1) implementations require the symbol table to be first but llvm-ar - // can deal with it being after a foreign symbol table. This ensures - // compatibility with other ar(1) implementations as well as allowing the - // archive to store both native .o and LLVM .bc files, both indexed. - if (foreignST) { - if (writeMember(*foreignST, FinalFile, false, false, ErrMsg)) { - FinalFile.close(); - TmpArchive.eraseFromDisk(); - return true; - } - } - - // Put out the LLVM symbol table now. - writeSymbolTable(FinalFile); - - // Copy the temporary file contents being sure to skip the file's magic - // number. - FinalFile.write(base + sizeof(ARFILE_MAGIC)-1, - arch->getBufferSize()-sizeof(ARFILE_MAGIC)+1); - - // Close up shop - FinalFile.close(); - } // free arch. - - // Move the final file over top of TmpArchive - if (FinalFilePath.renamePathOnDisk(TmpArchive, ErrMsg)) - return true; - } - - // Before we replace the actual archive, we need to forget all the - // members, since they point to data in that old archive. We need to do - // this because we cannot replace an open file on Windows. - cleanUpMemory(); - - if (TmpArchive.renamePathOnDisk(archPath, ErrMsg)) - return true; - - // Set correct read and write permissions after temporary file is moved - // to final destination path. - if (archPath.makeReadableOnDisk(ErrMsg)) - return true; - if (archPath.makeWriteableOnDisk(ErrMsg)) - return true; - - return false; -} diff --git a/contrib/llvm/lib/AsmParser/LLLexer.cpp b/contrib/llvm/lib/AsmParser/LLLexer.cpp index 34f93ff..1e6085b 100644 --- a/contrib/llvm/lib/AsmParser/LLLexer.cpp +++ b/contrib/llvm/lib/AsmParser/LLLexer.cpp @@ -478,12 +478,10 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(private); KEYWORD(linker_private); KEYWORD(linker_private_weak); - KEYWORD(linker_private_weak_def_auto); // FIXME: For backwards compatibility. KEYWORD(internal); KEYWORD(available_externally); KEYWORD(linkonce); KEYWORD(linkonce_odr); - KEYWORD(linkonce_odr_auto_hide); KEYWORD(weak); KEYWORD(weak_odr); KEYWORD(appending); @@ -540,6 +538,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(alignstack); KEYWORD(inteldialect); KEYWORD(gc); + KEYWORD(prefix); KEYWORD(ccc); KEYWORD(fastcc); @@ -558,6 +557,8 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(intel_ocl_bicc); KEYWORD(x86_64_sysvcc); KEYWORD(x86_64_win64cc); + KEYWORD(webkit_jscc); + KEYWORD(anyregcc); KEYWORD(cc); KEYWORD(c); @@ -565,7 +566,9 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(attributes); KEYWORD(alwaysinline); + KEYWORD(builtin); KEYWORD(byval); + KEYWORD(cold); KEYWORD(inlinehint); KEYWORD(inreg); KEYWORD(minsize); @@ -581,6 +584,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(noredzone); KEYWORD(noreturn); KEYWORD(nounwind); + KEYWORD(optnone); KEYWORD(optsize); KEYWORD(readnone); KEYWORD(readonly); @@ -661,6 +665,7 @@ lltok::Kind LLLexer::LexIdentifier() { INSTKEYWORD(inttoptr, IntToPtr); INSTKEYWORD(ptrtoint, PtrToInt); INSTKEYWORD(bitcast, BitCast); + INSTKEYWORD(addrspacecast, AddrSpaceCast); INSTKEYWORD(select, Select); INSTKEYWORD(va_arg, VAArg); INSTKEYWORD(ret, Ret); diff --git a/contrib/llvm/lib/AsmParser/LLParser.cpp b/contrib/llvm/lib/AsmParser/LLParser.cpp index f8708ff..3b903cd 100644 --- a/contrib/llvm/lib/AsmParser/LLParser.cpp +++ b/contrib/llvm/lib/AsmParser/LLParser.cpp @@ -19,6 +19,7 @@ #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/Operator.h" #include "llvm/IR/ValueSymbolTable.h" @@ -65,6 +66,9 @@ bool LLParser::ValidateEndOfModule() { ForwardRefInstMetadata.clear(); } + for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++) + UpgradeInstWithTBAATag(InstsWithTBAATag[I]); + // Handle any function attribute group forward references. for (std::map >::iterator I = ForwardRefAttrGroups.begin(), E = ForwardRefAttrGroups.end(); @@ -178,6 +182,8 @@ bool LLParser::ValidateEndOfModule() { for (Module::iterator FI = M->begin(), FE = M->end(); FI != FE; ) UpgradeCallsToIntrinsic(FI++); // must be post-increment, as we remove + UpgradeDebugInfo(*M); + return false; } @@ -242,13 +248,11 @@ bool LLParser::ParseTopLevelEntities() { case lltok::kw_private: // OptionalLinkage case lltok::kw_linker_private: // OptionalLinkage case lltok::kw_linker_private_weak: // OptionalLinkage - case lltok::kw_linker_private_weak_def_auto: // FIXME: backwards compat. case lltok::kw_internal: // OptionalLinkage case lltok::kw_weak: // OptionalLinkage case lltok::kw_weak_odr: // OptionalLinkage case lltok::kw_linkonce: // OptionalLinkage case lltok::kw_linkonce_odr: // OptionalLinkage - case lltok::kw_linkonce_odr_auto_hide: // OptionalLinkage case lltok::kw_appending: // OptionalLinkage case lltok::kw_dllexport: // OptionalLinkage case lltok::kw_common: // OptionalLinkage @@ -623,18 +627,14 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, unsigned Visibility) { assert(Lex.getKind() == lltok::kw_alias); Lex.Lex(); - unsigned Linkage; LocTy LinkageLoc = Lex.getLoc(); - if (ParseOptionalLinkage(Linkage)) + unsigned L; + if (ParseOptionalLinkage(L)) return true; - if (Linkage != GlobalValue::ExternalLinkage && - Linkage != GlobalValue::WeakAnyLinkage && - Linkage != GlobalValue::WeakODRLinkage && - Linkage != GlobalValue::InternalLinkage && - Linkage != GlobalValue::PrivateLinkage && - Linkage != GlobalValue::LinkerPrivateLinkage && - Linkage != GlobalValue::LinkerPrivateWeakLinkage) + GlobalValue::LinkageTypes Linkage = (GlobalValue::LinkageTypes) L; + + if(!GlobalAlias::isValidLinkage(Linkage)) return Error(LinkageLoc, "invalid linkage type for alias"); Constant *Aliasee; @@ -810,13 +810,13 @@ bool LLParser::ParseUnnamedAttrGrp() { assert(Lex.getKind() == lltok::AttrGrpID); unsigned VarID = Lex.getUIntVal(); std::vector unused; - LocTy NoBuiltinLoc; + LocTy BuiltinLoc; Lex.Lex(); if (ParseToken(lltok::equal, "expected '=' here") || ParseToken(lltok::lbrace, "expected '{' here") || ParseFnAttributeValuePairs(NumberedAttrBuilders[VarID], unused, true, - NoBuiltinLoc) || + BuiltinLoc) || ParseToken(lltok::rbrace, "expected end of attribute group")) return true; @@ -830,15 +830,15 @@ bool LLParser::ParseUnnamedAttrGrp() { /// ::= | '=' bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, std::vector &FwdRefAttrGrps, - bool inAttrGrp, LocTy &NoBuiltinLoc) { + bool inAttrGrp, LocTy &BuiltinLoc) { bool HaveError = false; B.clear(); while (true) { lltok::Kind Token = Lex.getKind(); - if (Token == lltok::kw_nobuiltin) - NoBuiltinLoc = Lex.getLoc(); + if (Token == lltok::kw_builtin) + BuiltinLoc = Lex.getLoc(); switch (Token) { default: if (!inAttrGrp) return HaveError; @@ -909,6 +909,8 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, continue; } case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break; + case lltok::kw_builtin: B.addAttribute(Attribute::Builtin); break; + case lltok::kw_cold: B.addAttribute(Attribute::Cold); break; case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; @@ -920,6 +922,7 @@ bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break; case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break; case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break; + case lltok::kw_optnone: B.addAttribute(Attribute::OptimizeNone); break; case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break; case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; @@ -1157,6 +1160,8 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break; + case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; + case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; case lltok::kw_returned: B.addAttribute(Attribute::Returned); break; case lltok::kw_signext: B.addAttribute(Attribute::SExt); break; case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break; @@ -1164,6 +1169,7 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_alignstack: case lltok::kw_alwaysinline: + case lltok::kw_builtin: case lltok::kw_inlinehint: case lltok::kw_minsize: case lltok::kw_naked: @@ -1175,9 +1181,8 @@ bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { case lltok::kw_noredzone: case lltok::kw_noreturn: case lltok::kw_nounwind: + case lltok::kw_optnone: case lltok::kw_optsize: - case lltok::kw_readnone: - case lltok::kw_readonly: case lltok::kw_returns_twice: case lltok::kw_sanitize_address: case lltok::kw_sanitize_memory: @@ -1222,6 +1227,8 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_alignstack: case lltok::kw_alwaysinline: + case lltok::kw_builtin: + case lltok::kw_cold: case lltok::kw_inlinehint: case lltok::kw_minsize: case lltok::kw_naked: @@ -1233,9 +1240,8 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_noredzone: case lltok::kw_noreturn: case lltok::kw_nounwind: + case lltok::kw_optnone: case lltok::kw_optsize: - case lltok::kw_readnone: - case lltok::kw_readonly: case lltok::kw_returns_twice: case lltok::kw_sanitize_address: case lltok::kw_sanitize_memory: @@ -1246,6 +1252,10 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { case lltok::kw_uwtable: HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; + + case lltok::kw_readnone: + case lltok::kw_readonly: + HaveError |= Error(Lex.getLoc(), "invalid use of attribute on return type"); } Lex.Lex(); @@ -1262,7 +1272,6 @@ bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { /// ::= 'weak_odr' /// ::= 'linkonce' /// ::= 'linkonce_odr' -/// ::= 'linkonce_odr_auto_hide' /// ::= 'available_externally' /// ::= 'appending' /// ::= 'dllexport' @@ -1284,10 +1293,6 @@ bool LLParser::ParseOptionalLinkage(unsigned &Res, bool &HasLinkage) { case lltok::kw_weak_odr: Res = GlobalValue::WeakODRLinkage; break; case lltok::kw_linkonce: Res = GlobalValue::LinkOnceAnyLinkage; break; case lltok::kw_linkonce_odr: Res = GlobalValue::LinkOnceODRLinkage; break; - case lltok::kw_linkonce_odr_auto_hide: - case lltok::kw_linker_private_weak_def_auto: // FIXME: For backwards compat. - Res = GlobalValue::LinkOnceODRAutoHideLinkage; - break; case lltok::kw_available_externally: Res = GlobalValue::AvailableExternallyLinkage; break; @@ -1339,6 +1344,8 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) { /// ::= 'spir_kernel' /// ::= 'x86_64_sysvcc' /// ::= 'x86_64_win64cc' +/// ::= 'webkit_jscc' +/// ::= 'anyregcc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { @@ -1361,6 +1368,8 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) { case lltok::kw_intel_ocl_bicc: CC = CallingConv::Intel_OCL_BI; break; case lltok::kw_x86_64_sysvcc: CC = CallingConv::X86_64_SysV; break; case lltok::kw_x86_64_win64cc: CC = CallingConv::X86_64_Win64; break; + case lltok::kw_webkit_jscc: CC = CallingConv::WebKit_JS; break; + case lltok::kw_anyregcc: CC = CallingConv::AnyReg; break; case lltok::kw_cc: { unsigned ArbitraryCC; Lex.Lex(); @@ -1417,6 +1426,9 @@ bool LLParser::ParseInstructionMetadata(Instruction *Inst, } } + if (MDK == LLVMContext::MD_tbaa) + InstsWithTBAATag.push_back(Inst); + // If this is the end of the list, we're done. } while (EatIfPresent(lltok::comma)); return false; @@ -2376,7 +2388,6 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { Lex.Lex(); ValID Fn, Label; - LocTy FnLoc, LabelLoc; if (ParseToken(lltok::lparen, "expected '(' in block address expression") || ParseValID(Fn) || @@ -2406,6 +2417,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { case lltok::kw_fptrunc: case lltok::kw_fpext: case lltok::kw_bitcast: + case lltok::kw_addrspacecast: case lltok::kw_uitofp: case lltok::kw_sitofp: case lltok::kw_fptoui: @@ -2912,7 +2924,7 @@ bool LLParser::ParseTypeAndBasicBlock(BasicBlock *&BB, LocTy &Loc, /// FunctionHeader /// ::= OptionalLinkage OptionalVisibility OptionalCallingConv OptRetAttrs /// OptUnnamedAddr Type GlobalName '(' ArgList ')' OptFuncAttrs OptSection -/// OptionalAlign OptGC +/// OptionalAlign OptGC OptionalPrefix bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { // Parse the linkage. LocTy LinkageLoc = Lex.getLoc(); @@ -2946,7 +2958,6 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { case GlobalValue::AvailableExternallyLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::DLLExportLinkage: @@ -2985,27 +2996,30 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { bool isVarArg; AttrBuilder FuncAttrs; std::vector FwdRefAttrGrps; - LocTy NoBuiltinLoc; + LocTy BuiltinLoc; std::string Section; unsigned Alignment; std::string GC; bool UnnamedAddr; LocTy UnnamedAddrLoc; + Constant *Prefix = 0; if (ParseArgumentList(ArgList, isVarArg) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, &UnnamedAddrLoc) || ParseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false, - NoBuiltinLoc) || + BuiltinLoc) || (EatIfPresent(lltok::kw_section) && ParseStringConstant(Section)) || ParseOptionalAlignment(Alignment) || (EatIfPresent(lltok::kw_gc) && - ParseStringConstant(GC))) + ParseStringConstant(GC)) || + (EatIfPresent(lltok::kw_prefix) && + ParseGlobalTypeAndValue(Prefix))) return true; - if (FuncAttrs.contains(Attribute::NoBuiltin)) - return Error(NoBuiltinLoc, "'nobuiltin' attribute not valid on function"); + if (FuncAttrs.contains(Attribute::Builtin)) + return Error(BuiltinLoc, "'builtin' attribute not valid on function"); // If the alignment was parsed as an attribute, move to the alignment field. if (FuncAttrs.hasAlignmentAttr()) { @@ -3099,6 +3113,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { Fn->setAlignment(Alignment); Fn->setSection(Section); if (!GC.empty()) Fn->setGC(GC.c_str()); + Fn->setPrefixData(Prefix); ForwardRefAttrGroups[Fn] = FwdRefAttrGrps; // Add all of the arguments we parsed to the function. @@ -3164,7 +3179,6 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { // Parse the instructions in this block until we get a terminator. Instruction *Inst; - SmallVector, 4> MetadataOnInst; do { // This instruction may have three possibilities for a name: a) none // specified, b) name specified "%foo =", c) number specified: "%4 =". @@ -3292,6 +3306,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_fptrunc: case lltok::kw_fpext: case lltok::kw_bitcast: + case lltok::kw_addrspacecast: case lltok::kw_uitofp: case lltok::kw_sitofp: case lltok::kw_fptoui: @@ -3929,7 +3944,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, bool isTail) { AttrBuilder RetAttrs, FnAttrs; std::vector FwdRefAttrGrps; - LocTy NoBuiltinLoc; + LocTy BuiltinLoc; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3944,7 +3959,7 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) || ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, - NoBuiltinLoc)) + BuiltinLoc)) return true; // If RetType is a non-function pointer type, then this is the short syntax diff --git a/contrib/llvm/lib/AsmParser/LLParser.h b/contrib/llvm/lib/AsmParser/LLParser.h index 1f2879e..ded776c 100644 --- a/contrib/llvm/lib/AsmParser/LLParser.h +++ b/contrib/llvm/lib/AsmParser/LLParser.h @@ -107,6 +107,8 @@ namespace llvm { }; DenseMap > ForwardRefInstMetadata; + SmallVector InstsWithTBAATag; + // Type resolution handling data structures. The location is set when we // have processed a use of the type but not a definition yet. StringMap > NamedTypes; @@ -242,7 +244,7 @@ namespace llvm { bool ParseUnnamedAttrGrp(); bool ParseFnAttributeValuePairs(AttrBuilder &B, std::vector &FwdRefAttrGrps, - bool inAttrGrp, LocTy &NoBuiltinLoc); + bool inAttrGrp, LocTy &BuiltinLoc); // Type Parsing. bool ParseType(Type *&Result, bool AllowVoid = false); diff --git a/contrib/llvm/lib/AsmParser/LLToken.h b/contrib/llvm/lib/AsmParser/LLToken.h index 3fa00a6..786d84d 100644 --- a/contrib/llvm/lib/AsmParser/LLToken.h +++ b/contrib/llvm/lib/AsmParser/LLToken.h @@ -38,9 +38,8 @@ namespace lltok { kw_global, kw_constant, kw_private, kw_linker_private, kw_linker_private_weak, - kw_linker_private_weak_def_auto, // FIXME: For backwards compatibility. kw_internal, - kw_linkonce, kw_linkonce_odr, kw_linkonce_odr_auto_hide, + kw_linkonce, kw_linkonce_odr, kw_weak, kw_weak_odr, kw_appending, kw_dllimport, kw_dllexport, kw_common, kw_available_externally, kw_default, kw_hidden, kw_protected, @@ -81,6 +80,7 @@ namespace lltok { kw_alignstack, kw_inteldialect, kw_gc, + kw_prefix, kw_c, kw_cc, kw_ccc, kw_fastcc, kw_coldcc, @@ -91,12 +91,15 @@ namespace lltok { kw_ptx_kernel, kw_ptx_device, kw_spir_kernel, kw_spir_func, kw_x86_64_sysvcc, kw_x86_64_win64cc, + kw_webkit_jscc, kw_anyregcc, // Attributes: kw_attributes, kw_alwaysinline, kw_sanitize_address, + kw_builtin, kw_byval, + kw_cold, kw_inlinehint, kw_inreg, kw_minsize, @@ -112,6 +115,7 @@ namespace lltok { kw_noredzone, kw_noreturn, kw_nounwind, + kw_optnone, kw_optsize, kw_readnone, kw_readonly, @@ -146,6 +150,7 @@ namespace lltok { kw_phi, kw_call, kw_trunc, kw_zext, kw_sext, kw_fptrunc, kw_fpext, kw_uitofp, kw_sitofp, kw_fptoui, kw_fptosi, kw_inttoptr, kw_ptrtoint, kw_bitcast, + kw_addrspacecast, kw_select, kw_va_arg, kw_landingpad, kw_personality, kw_cleanup, kw_catch, kw_filter, diff --git a/contrib/llvm/lib/AsmParser/Parser.cpp b/contrib/llvm/lib/AsmParser/Parser.cpp index bb4f03b..d777ab9 100644 --- a/contrib/llvm/lib/AsmParser/Parser.cpp +++ b/contrib/llvm/lib/AsmParser/Parser.cpp @@ -43,7 +43,7 @@ Module *llvm::ParseAssembly(MemoryBuffer *F, Module *llvm::ParseAssemblyFile(const std::string &Filename, SMDiagnostic &Err, LLVMContext &Context) { OwningPtr File; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) { + if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) { Err = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + ec.message()); return 0; diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp index e6ff4b4..ce3b7d1 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.cpp @@ -12,16 +12,19 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" #include "llvm/AutoUpgrade.h" +#include "llvm/Bitcode/LLVMBitCodes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/IR/Module.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Operator.h" #include "llvm/Support/DataStream.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; enum { @@ -87,7 +90,6 @@ static GlobalValue::LinkageTypes GetDecodedLinkage(unsigned Val) { case 12: return GlobalValue::AvailableExternallyLinkage; case 13: return GlobalValue::LinkerPrivateLinkage; case 14: return GlobalValue::LinkerPrivateWeakLinkage; - case 15: return GlobalValue::LinkOnceODRAutoHideLinkage; } } @@ -126,6 +128,7 @@ static int GetDecodedCastOpcode(unsigned Val) { case bitc::CAST_PTRTOINT: return Instruction::PtrToInt; case bitc::CAST_INTTOPTR: return Instruction::IntToPtr; case bitc::CAST_BITCAST : return Instruction::BitCast; + case bitc::CAST_ADDRSPACECAST: return Instruction::AddrSpaceCast; } } static int GetDecodedBinaryOpcode(unsigned Val, Type *Ty) { @@ -448,12 +451,12 @@ static void decodeLLVMAttributesForBitcode(AttrBuilder &B, (EncodedAttrs & 0xffff)); } -bool BitcodeReader::ParseAttributeBlock() { +error_code BitcodeReader::ParseAttributeBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID)) - return Error("Malformed block record"); + return Error(InvalidRecord); if (!MAttributes.empty()) - return Error("Multiple PARAMATTR blocks found!"); + return Error(InvalidMultipleBlocks); SmallVector Record; @@ -466,9 +469,9 @@ bool BitcodeReader::ParseAttributeBlock() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error("Error at end of PARAMATTR block"); + return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return false; + return error_code::success(); case BitstreamEntry::Record: // The interesting case. break; @@ -482,7 +485,7 @@ bool BitcodeReader::ParseAttributeBlock() { case bitc::PARAMATTR_CODE_ENTRY_OLD: { // ENTRY: [paramidx0, attr0, ...] // FIXME: Remove in 4.0. if (Record.size() & 1) - return Error("Invalid ENTRY record"); + return Error(InvalidRecord); for (unsigned i = 0, e = Record.size(); i != e; i += 2) { AttrBuilder B; @@ -506,12 +509,102 @@ bool BitcodeReader::ParseAttributeBlock() { } } -bool BitcodeReader::ParseAttributeGroupBlock() { +// Returns Attribute::None on unrecognized codes. +static Attribute::AttrKind GetAttrFromCode(uint64_t Code) { + switch (Code) { + default: + return Attribute::None; + case bitc::ATTR_KIND_ALIGNMENT: + return Attribute::Alignment; + case bitc::ATTR_KIND_ALWAYS_INLINE: + return Attribute::AlwaysInline; + case bitc::ATTR_KIND_BUILTIN: + return Attribute::Builtin; + case bitc::ATTR_KIND_BY_VAL: + return Attribute::ByVal; + case bitc::ATTR_KIND_COLD: + return Attribute::Cold; + case bitc::ATTR_KIND_INLINE_HINT: + return Attribute::InlineHint; + case bitc::ATTR_KIND_IN_REG: + return Attribute::InReg; + case bitc::ATTR_KIND_MIN_SIZE: + return Attribute::MinSize; + case bitc::ATTR_KIND_NAKED: + return Attribute::Naked; + case bitc::ATTR_KIND_NEST: + return Attribute::Nest; + case bitc::ATTR_KIND_NO_ALIAS: + return Attribute::NoAlias; + case bitc::ATTR_KIND_NO_BUILTIN: + return Attribute::NoBuiltin; + case bitc::ATTR_KIND_NO_CAPTURE: + return Attribute::NoCapture; + case bitc::ATTR_KIND_NO_DUPLICATE: + return Attribute::NoDuplicate; + case bitc::ATTR_KIND_NO_IMPLICIT_FLOAT: + return Attribute::NoImplicitFloat; + case bitc::ATTR_KIND_NO_INLINE: + return Attribute::NoInline; + case bitc::ATTR_KIND_NON_LAZY_BIND: + return Attribute::NonLazyBind; + case bitc::ATTR_KIND_NO_RED_ZONE: + return Attribute::NoRedZone; + case bitc::ATTR_KIND_NO_RETURN: + return Attribute::NoReturn; + case bitc::ATTR_KIND_NO_UNWIND: + return Attribute::NoUnwind; + case bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE: + return Attribute::OptimizeForSize; + case bitc::ATTR_KIND_OPTIMIZE_NONE: + return Attribute::OptimizeNone; + case bitc::ATTR_KIND_READ_NONE: + return Attribute::ReadNone; + case bitc::ATTR_KIND_READ_ONLY: + return Attribute::ReadOnly; + case bitc::ATTR_KIND_RETURNED: + return Attribute::Returned; + case bitc::ATTR_KIND_RETURNS_TWICE: + return Attribute::ReturnsTwice; + case bitc::ATTR_KIND_S_EXT: + return Attribute::SExt; + case bitc::ATTR_KIND_STACK_ALIGNMENT: + return Attribute::StackAlignment; + case bitc::ATTR_KIND_STACK_PROTECT: + return Attribute::StackProtect; + case bitc::ATTR_KIND_STACK_PROTECT_REQ: + return Attribute::StackProtectReq; + case bitc::ATTR_KIND_STACK_PROTECT_STRONG: + return Attribute::StackProtectStrong; + case bitc::ATTR_KIND_STRUCT_RET: + return Attribute::StructRet; + case bitc::ATTR_KIND_SANITIZE_ADDRESS: + return Attribute::SanitizeAddress; + case bitc::ATTR_KIND_SANITIZE_THREAD: + return Attribute::SanitizeThread; + case bitc::ATTR_KIND_SANITIZE_MEMORY: + return Attribute::SanitizeMemory; + case bitc::ATTR_KIND_UW_TABLE: + return Attribute::UWTable; + case bitc::ATTR_KIND_Z_EXT: + return Attribute::ZExt; + } +} + +error_code BitcodeReader::ParseAttrKind(uint64_t Code, + Attribute::AttrKind *Kind) { + *Kind = GetAttrFromCode(Code); + if (*Kind == Attribute::None) + return Error(InvalidValue); + return error_code::success(); +} + +error_code BitcodeReader::ParseAttributeGroupBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID)) - return Error("Malformed block record"); + return Error(InvalidRecord); if (!MAttributeGroups.empty()) - return Error("Multiple PARAMATTR_GROUP blocks found!"); + return Error(InvalidMultipleBlocks); SmallVector Record; @@ -522,9 +615,9 @@ bool BitcodeReader::ParseAttributeGroupBlock() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error("Error at end of PARAMATTR_GROUP block"); + return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return false; + return error_code::success(); case BitstreamEntry::Record: // The interesting case. break; @@ -537,7 +630,7 @@ bool BitcodeReader::ParseAttributeGroupBlock() { break; case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...] if (Record.size() < 3) - return Error("Invalid ENTRY record"); + return Error(InvalidRecord); uint64_t GrpID = Record[0]; uint64_t Idx = Record[1]; // Index of the object this attribute refers to. @@ -545,9 +638,16 @@ bool BitcodeReader::ParseAttributeGroupBlock() { AttrBuilder B; for (unsigned i = 2, e = Record.size(); i != e; ++i) { if (Record[i] == 0) { // Enum attribute - B.addAttribute(Attribute::AttrKind(Record[++i])); + Attribute::AttrKind Kind; + if (error_code EC = ParseAttrKind(Record[++i], &Kind)) + return EC; + + B.addAttribute(Kind); } else if (Record[i] == 1) { // Align attribute - if (Attribute::AttrKind(Record[++i]) == Attribute::Alignment) + Attribute::AttrKind Kind; + if (error_code EC = ParseAttrKind(Record[++i], &Kind)) + return EC; + if (Kind == Attribute::Alignment) B.addAlignmentAttr(Record[++i]); else B.addStackAlignmentAttr(Record[++i]); @@ -581,16 +681,16 @@ bool BitcodeReader::ParseAttributeGroupBlock() { } } -bool BitcodeReader::ParseTypeTable() { +error_code BitcodeReader::ParseTypeTable() { if (Stream.EnterSubBlock(bitc::TYPE_BLOCK_ID_NEW)) - return Error("Malformed block record"); + return Error(InvalidRecord); return ParseTypeTableBody(); } -bool BitcodeReader::ParseTypeTableBody() { +error_code BitcodeReader::ParseTypeTableBody() { if (!TypeList.empty()) - return Error("Multiple TYPE_BLOCKs found!"); + return Error(InvalidMultipleBlocks); SmallVector Record; unsigned NumRecords = 0; @@ -604,12 +704,11 @@ bool BitcodeReader::ParseTypeTableBody() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - Error("Error in the type table block"); - return true; + return Error(MalformedBlock); case BitstreamEntry::EndBlock: if (NumRecords != TypeList.size()) - return Error("Invalid type forward reference in TYPE_BLOCK"); - return false; + return Error(MalformedBlock); + return error_code::success(); case BitstreamEntry::Record: // The interesting case. break; @@ -619,12 +718,13 @@ bool BitcodeReader::ParseTypeTableBody() { Record.clear(); Type *ResultTy = 0; switch (Stream.readRecord(Entry.ID, Record)) { - default: return Error("unknown type in type table"); + default: + return Error(InvalidValue); case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries] // TYPE_CODE_NUMENTRY contains a count of the number of types in the // type list. This allows us to reserve space. if (Record.size() < 1) - return Error("Invalid TYPE_CODE_NUMENTRY record"); + return Error(InvalidRecord); TypeList.resize(Record[0]); continue; case bitc::TYPE_CODE_VOID: // VOID @@ -659,19 +759,20 @@ bool BitcodeReader::ParseTypeTableBody() { break; case bitc::TYPE_CODE_INTEGER: // INTEGER: [width] if (Record.size() < 1) - return Error("Invalid Integer type record"); + return Error(InvalidRecord); ResultTy = IntegerType::get(Context, Record[0]); break; case bitc::TYPE_CODE_POINTER: { // POINTER: [pointee type] or // [pointee type, address space] if (Record.size() < 1) - return Error("Invalid POINTER type record"); + return Error(InvalidRecord); unsigned AddressSpace = 0; if (Record.size() == 2) AddressSpace = Record[1]; ResultTy = getTypeByID(Record[0]); - if (ResultTy == 0) return Error("invalid element type in pointer type"); + if (ResultTy == 0) + return Error(InvalidType); ResultTy = PointerType::get(ResultTy, AddressSpace); break; } @@ -679,7 +780,7 @@ bool BitcodeReader::ParseTypeTableBody() { // FIXME: attrid is dead, remove it in LLVM 4.0 // FUNCTION: [vararg, attrid, retty, paramty x N] if (Record.size() < 3) - return Error("Invalid FUNCTION type record"); + return Error(InvalidRecord); SmallVector ArgTys; for (unsigned i = 3, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) @@ -690,7 +791,7 @@ bool BitcodeReader::ParseTypeTableBody() { ResultTy = getTypeByID(Record[2]); if (ResultTy == 0 || ArgTys.size() < Record.size()-3) - return Error("invalid type in function type"); + return Error(InvalidType); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; @@ -698,7 +799,7 @@ bool BitcodeReader::ParseTypeTableBody() { case bitc::TYPE_CODE_FUNCTION: { // FUNCTION: [vararg, retty, paramty x N] if (Record.size() < 2) - return Error("Invalid FUNCTION type record"); + return Error(InvalidRecord); SmallVector ArgTys; for (unsigned i = 2, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) @@ -709,14 +810,14 @@ bool BitcodeReader::ParseTypeTableBody() { ResultTy = getTypeByID(Record[1]); if (ResultTy == 0 || ArgTys.size() < Record.size()-2) - return Error("invalid type in function type"); + return Error(InvalidType); ResultTy = FunctionType::get(ResultTy, ArgTys, Record[0]); break; } case bitc::TYPE_CODE_STRUCT_ANON: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) - return Error("Invalid STRUCT type record"); + return Error(InvalidRecord); SmallVector EltTys; for (unsigned i = 1, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) @@ -725,21 +826,21 @@ bool BitcodeReader::ParseTypeTableBody() { break; } if (EltTys.size() != Record.size()-1) - return Error("invalid type in struct type"); + return Error(InvalidType); ResultTy = StructType::get(Context, EltTys, Record[0]); break; } case bitc::TYPE_CODE_STRUCT_NAME: // STRUCT_NAME: [strchr x N] if (ConvertToString(Record, 0, TypeName)) - return Error("Invalid STRUCT_NAME record"); + return Error(InvalidRecord); continue; case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) - return Error("Invalid STRUCT type record"); + return Error(InvalidRecord); if (NumRecords >= TypeList.size()) - return Error("invalid TYPE table"); + return Error(InvalidTYPETable); // Check to see if this was forward referenced, if so fill in the temp. StructType *Res = cast_or_null(TypeList[NumRecords]); @@ -758,17 +859,17 @@ bool BitcodeReader::ParseTypeTableBody() { break; } if (EltTys.size() != Record.size()-1) - return Error("invalid STRUCT type record"); + return Error(InvalidRecord); Res->setBody(EltTys, Record[0]); ResultTy = Res; break; } case bitc::TYPE_CODE_OPAQUE: { // OPAQUE: [] if (Record.size() != 1) - return Error("Invalid OPAQUE type record"); + return Error(InvalidRecord); if (NumRecords >= TypeList.size()) - return Error("invalid TYPE table"); + return Error(InvalidTYPETable); // Check to see if this was forward referenced, if so fill in the temp. StructType *Res = cast_or_null(TypeList[NumRecords]); @@ -783,33 +884,33 @@ bool BitcodeReader::ParseTypeTableBody() { } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] if (Record.size() < 2) - return Error("Invalid ARRAY type record"); + return Error(InvalidRecord); if ((ResultTy = getTypeByID(Record[1]))) ResultTy = ArrayType::get(ResultTy, Record[0]); else - return Error("Invalid ARRAY type element"); + return Error(InvalidType); break; case bitc::TYPE_CODE_VECTOR: // VECTOR: [numelts, eltty] if (Record.size() < 2) - return Error("Invalid VECTOR type record"); + return Error(InvalidRecord); if ((ResultTy = getTypeByID(Record[1]))) ResultTy = VectorType::get(ResultTy, Record[0]); else - return Error("Invalid ARRAY type element"); + return Error(InvalidType); break; } if (NumRecords >= TypeList.size()) - return Error("invalid TYPE table"); + return Error(InvalidTYPETable); assert(ResultTy && "Didn't read a type?"); assert(TypeList[NumRecords] == 0 && "Already read type?"); TypeList[NumRecords++] = ResultTy; } } -bool BitcodeReader::ParseValueSymbolTable() { +error_code BitcodeReader::ParseValueSymbolTable() { if (Stream.EnterSubBlock(bitc::VALUE_SYMTAB_BLOCK_ID)) - return Error("Malformed block record"); + return Error(InvalidRecord); SmallVector Record; @@ -821,9 +922,9 @@ bool BitcodeReader::ParseValueSymbolTable() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error("malformed value symbol table block"); + return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return false; + return error_code::success(); case BitstreamEntry::Record: // The interesting case. break; @@ -836,10 +937,10 @@ bool BitcodeReader::ParseValueSymbolTable() { break; case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N] if (ConvertToString(Record, 1, ValueName)) - return Error("Invalid VST_ENTRY record"); + return Error(InvalidRecord); unsigned ValueID = Record[0]; if (ValueID >= ValueList.size()) - return Error("Invalid Value ID in VST_ENTRY record"); + return Error(InvalidRecord); Value *V = ValueList[ValueID]; V->setName(StringRef(ValueName.data(), ValueName.size())); @@ -848,10 +949,10 @@ bool BitcodeReader::ParseValueSymbolTable() { } case bitc::VST_CODE_BBENTRY: { if (ConvertToString(Record, 1, ValueName)) - return Error("Invalid VST_BBENTRY record"); + return Error(InvalidRecord); BasicBlock *BB = getBasicBlock(Record[0]); if (BB == 0) - return Error("Invalid BB ID in VST_BBENTRY record"); + return Error(InvalidRecord); BB->setName(StringRef(ValueName.data(), ValueName.size())); ValueName.clear(); @@ -861,11 +962,11 @@ bool BitcodeReader::ParseValueSymbolTable() { } } -bool BitcodeReader::ParseMetadata() { +error_code BitcodeReader::ParseMetadata() { unsigned NextMDValueNo = MDValueList.size(); if (Stream.EnterSubBlock(bitc::METADATA_BLOCK_ID)) - return Error("Malformed block record"); + return Error(InvalidRecord); SmallVector Record; @@ -876,10 +977,9 @@ bool BitcodeReader::ParseMetadata() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - Error("malformed metadata block"); - return true; + return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return false; + return error_code::success(); case BitstreamEntry::Record: // The interesting case. break; @@ -908,7 +1008,7 @@ bool BitcodeReader::ParseMetadata() { for (unsigned i = 0; i != Size; ++i) { MDNode *MD = dyn_cast(MDValueList.getValueFwdRef(Record[i])); if (MD == 0) - return Error("Malformed metadata record"); + return Error(InvalidRecord); NMD->addOperand(MD); } break; @@ -918,13 +1018,14 @@ bool BitcodeReader::ParseMetadata() { // fall-through case bitc::METADATA_NODE: { if (Record.size() % 2 == 1) - return Error("Invalid METADATA_NODE record"); + return Error(InvalidRecord); unsigned Size = Record.size(); SmallVector Elts; for (unsigned i = 0; i != Size; i += 2) { Type *Ty = getTypeByID(Record[i]); - if (!Ty) return Error("Invalid METADATA_NODE record"); + if (!Ty) + return Error(InvalidRecord); if (Ty->isMetadataTy()) Elts.push_back(MDValueList.getValueFwdRef(Record[i+1])); else if (!Ty->isVoidTy()) @@ -945,14 +1046,14 @@ bool BitcodeReader::ParseMetadata() { } case bitc::METADATA_KIND: { if (Record.size() < 2) - return Error("Invalid METADATA_KIND record"); + return Error(InvalidRecord); unsigned Kind = Record[0]; SmallString<8> Name(Record.begin()+1, Record.end()); unsigned NewKind = TheModule->getMDKindID(Name.str()); if (!MDKindMap.insert(std::make_pair(Kind, NewKind)).second) - return Error("Conflicting METADATA_KIND records"); + return Error(ConflictingMETADATA_KINDRecords); break; } } @@ -972,12 +1073,14 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) { /// ResolveGlobalAndAliasInits - Resolve all of the initializers for global /// values and aliases that we can. -bool BitcodeReader::ResolveGlobalAndAliasInits() { +error_code BitcodeReader::ResolveGlobalAndAliasInits() { std::vector > GlobalInitWorklist; std::vector > AliasInitWorklist; + std::vector > FunctionPrefixWorklist; GlobalInitWorklist.swap(GlobalInits); AliasInitWorklist.swap(AliasInits); + FunctionPrefixWorklist.swap(FunctionPrefixes); while (!GlobalInitWorklist.empty()) { unsigned ValID = GlobalInitWorklist.back().second; @@ -988,7 +1091,7 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() { if (Constant *C = dyn_cast(ValueList[ValID])) GlobalInitWorklist.back().first->setInitializer(C); else - return Error("Global variable initializer is not a constant!"); + return Error(ExpectedConstant); } GlobalInitWorklist.pop_back(); } @@ -1001,11 +1104,25 @@ bool BitcodeReader::ResolveGlobalAndAliasInits() { if (Constant *C = dyn_cast(ValueList[ValID])) AliasInitWorklist.back().first->setAliasee(C); else - return Error("Alias initializer is not a constant!"); + return Error(ExpectedConstant); } AliasInitWorklist.pop_back(); } - return false; + + while (!FunctionPrefixWorklist.empty()) { + unsigned ValID = FunctionPrefixWorklist.back().second; + if (ValID >= ValueList.size()) { + FunctionPrefixes.push_back(FunctionPrefixWorklist.back()); + } else { + if (Constant *C = dyn_cast(ValueList[ValID])) + FunctionPrefixWorklist.back().first->setPrefixData(C); + else + return Error(ExpectedConstant); + } + FunctionPrefixWorklist.pop_back(); + } + + return error_code::success(); } static APInt ReadWideAPInt(ArrayRef Vals, unsigned TypeBits) { @@ -1016,9 +1133,9 @@ static APInt ReadWideAPInt(ArrayRef Vals, unsigned TypeBits) { return APInt(TypeBits, Words); } -bool BitcodeReader::ParseConstants() { +error_code BitcodeReader::ParseConstants() { if (Stream.EnterSubBlock(bitc::CONSTANTS_BLOCK_ID)) - return Error("Malformed block record"); + return Error(InvalidRecord); SmallVector Record; @@ -1031,15 +1148,15 @@ bool BitcodeReader::ParseConstants() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error("malformed block record in AST file"); + return Error(MalformedBlock); case BitstreamEntry::EndBlock: if (NextCstNo != ValueList.size()) - return Error("Invalid constant reference!"); + return Error(InvalidConstantReference); // Once all the constants have been read, go through and resolve forward // references. ValueList.ResolveConstantForwardRefs(); - return false; + return error_code::success(); case BitstreamEntry::Record: // The interesting case. break; @@ -1056,9 +1173,9 @@ bool BitcodeReader::ParseConstants() { break; case bitc::CST_CODE_SETTYPE: // SETTYPE: [typeid] if (Record.empty()) - return Error("Malformed CST_SETTYPE record"); + return Error(InvalidRecord); if (Record[0] >= TypeList.size()) - return Error("Invalid Type ID in CST_SETTYPE record"); + return Error(InvalidRecord); CurTy = TypeList[Record[0]]; continue; // Skip the ValueList manipulation. case bitc::CST_CODE_NULL: // NULL @@ -1066,12 +1183,12 @@ bool BitcodeReader::ParseConstants() { break; case bitc::CST_CODE_INTEGER: // INTEGER: [intval] if (!CurTy->isIntegerTy() || Record.empty()) - return Error("Invalid CST_INTEGER record"); + return Error(InvalidRecord); V = ConstantInt::get(CurTy, decodeSignRotatedValue(Record[0])); break; case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval] if (!CurTy->isIntegerTy() || Record.empty()) - return Error("Invalid WIDE_INTEGER record"); + return Error(InvalidRecord); APInt VInt = ReadWideAPInt(Record, cast(CurTy)->getBitWidth()); @@ -1081,7 +1198,7 @@ bool BitcodeReader::ParseConstants() { } case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] if (Record.empty()) - return Error("Invalid FLOAT record"); + return Error(InvalidRecord); if (CurTy->isHalfTy()) V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf, APInt(16, (uint16_t)Record[0]))); @@ -1111,7 +1228,7 @@ bool BitcodeReader::ParseConstants() { case bitc::CST_CODE_AGGREGATE: {// AGGREGATE: [n x value number] if (Record.empty()) - return Error("Invalid CST_AGGREGATE record"); + return Error(InvalidRecord); unsigned Size = Record.size(); SmallVector Elts; @@ -1139,7 +1256,7 @@ bool BitcodeReader::ParseConstants() { case bitc::CST_CODE_STRING: // STRING: [values] case bitc::CST_CODE_CSTRING: { // CSTRING: [values] if (Record.empty()) - return Error("Invalid CST_STRING record"); + return Error(InvalidRecord); SmallString<16> Elts(Record.begin(), Record.end()); V = ConstantDataArray::getString(Context, Elts, @@ -1148,7 +1265,7 @@ bool BitcodeReader::ParseConstants() { } case bitc::CST_CODE_DATA: {// DATA: [n x value] if (Record.empty()) - return Error("Invalid CST_DATA record"); + return Error(InvalidRecord); Type *EltTy = cast(CurTy)->getElementType(); unsigned Size = Record.size(); @@ -1193,13 +1310,14 @@ bool BitcodeReader::ParseConstants() { else V = ConstantDataArray::get(Context, Elts); } else { - return Error("Unknown element type in CE_DATA"); + return Error(InvalidTypeForValue); } break; } case bitc::CST_CODE_CE_BINOP: { // CE_BINOP: [opcode, opval, opval] - if (Record.size() < 3) return Error("Invalid CE_BINOP record"); + if (Record.size() < 3) + return Error(InvalidRecord); int Opc = GetDecodedBinaryOpcode(Record[0], CurTy); if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown binop. @@ -1229,25 +1347,30 @@ bool BitcodeReader::ParseConstants() { break; } case bitc::CST_CODE_CE_CAST: { // CE_CAST: [opcode, opty, opval] - if (Record.size() < 3) return Error("Invalid CE_CAST record"); + if (Record.size() < 3) + return Error(InvalidRecord); int Opc = GetDecodedCastOpcode(Record[0]); if (Opc < 0) { V = UndefValue::get(CurTy); // Unknown cast. } else { Type *OpTy = getTypeByID(Record[1]); - if (!OpTy) return Error("Invalid CE_CAST record"); + if (!OpTy) + return Error(InvalidRecord); Constant *Op = ValueList.getConstantFwdRef(Record[2], OpTy); - V = ConstantExpr::getCast(Opc, Op, CurTy); + V = UpgradeBitCastExpr(Opc, Op, CurTy); + if (!V) V = ConstantExpr::getCast(Opc, Op, CurTy); } break; } case bitc::CST_CODE_CE_INBOUNDS_GEP: case bitc::CST_CODE_CE_GEP: { // CE_GEP: [n x operands] - if (Record.size() & 1) return Error("Invalid CE_GEP record"); + if (Record.size() & 1) + return Error(InvalidRecord); SmallVector Elts; for (unsigned i = 0, e = Record.size(); i != e; i += 2) { Type *ElTy = getTypeByID(Record[i]); - if (!ElTy) return Error("Invalid CE_GEP record"); + if (!ElTy) + return Error(InvalidRecord); Elts.push_back(ValueList.getConstantFwdRef(Record[i+1], ElTy)); } ArrayRef Indices(Elts.begin() + 1, Elts.end()); @@ -1256,19 +1379,31 @@ bool BitcodeReader::ParseConstants() { bitc::CST_CODE_CE_INBOUNDS_GEP); break; } - case bitc::CST_CODE_CE_SELECT: // CE_SELECT: [opval#, opval#, opval#] - if (Record.size() < 3) return Error("Invalid CE_SELECT record"); - V = ConstantExpr::getSelect( - ValueList.getConstantFwdRef(Record[0], - Type::getInt1Ty(Context)), - ValueList.getConstantFwdRef(Record[1],CurTy), - ValueList.getConstantFwdRef(Record[2],CurTy)); + case bitc::CST_CODE_CE_SELECT: { // CE_SELECT: [opval#, opval#, opval#] + if (Record.size() < 3) + return Error(InvalidRecord); + + Type *SelectorTy = Type::getInt1Ty(Context); + + // If CurTy is a vector of length n, then Record[0] must be a + // vector. Otherwise, it must be a single bit. + if (VectorType *VTy = dyn_cast(CurTy)) + SelectorTy = VectorType::get(Type::getInt1Ty(Context), + VTy->getNumElements()); + + V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0], + SelectorTy), + ValueList.getConstantFwdRef(Record[1],CurTy), + ValueList.getConstantFwdRef(Record[2],CurTy)); break; + } case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval] - if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record"); + if (Record.size() < 3) + return Error(InvalidRecord); VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); - if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record"); + if (OpTy == 0) + return Error(InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); @@ -1278,7 +1413,7 @@ bool BitcodeReader::ParseConstants() { case bitc::CST_CODE_CE_INSERTELT: { // CE_INSERTELT: [opval, opval, opval] VectorType *OpTy = dyn_cast(CurTy); if (Record.size() < 3 || OpTy == 0) - return Error("Invalid CE_INSERTELT record"); + return Error(InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy->getElementType()); @@ -1290,7 +1425,7 @@ bool BitcodeReader::ParseConstants() { case bitc::CST_CODE_CE_SHUFFLEVEC: { // CE_SHUFFLEVEC: [opval, opval, opval] VectorType *OpTy = dyn_cast(CurTy); if (Record.size() < 3 || OpTy == 0) - return Error("Invalid CE_SHUFFLEVEC record"); + return Error(InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy); Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), @@ -1304,7 +1439,7 @@ bool BitcodeReader::ParseConstants() { VectorType *OpTy = dyn_cast_or_null(getTypeByID(Record[0])); if (Record.size() < 4 || RTy == 0 || OpTy == 0) - return Error("Invalid CE_SHUFVEC_EX record"); + return Error(InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); Type *ShufTy = VectorType::get(Type::getInt32Ty(Context), @@ -1314,9 +1449,11 @@ bool BitcodeReader::ParseConstants() { break; } case bitc::CST_CODE_CE_CMP: { // CE_CMP: [opty, opval, opval, pred] - if (Record.size() < 4) return Error("Invalid CE_CMP record"); + if (Record.size() < 4) + return Error(InvalidRecord); Type *OpTy = getTypeByID(Record[0]); - if (OpTy == 0) return Error("Invalid CE_CMP record"); + if (OpTy == 0) + return Error(InvalidRecord); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[2], OpTy); @@ -1329,16 +1466,17 @@ bool BitcodeReader::ParseConstants() { // This maintains backward compatibility, pre-asm dialect keywords. // FIXME: Remove with the 4.0 release. case bitc::CST_CODE_INLINEASM_OLD: { - if (Record.size() < 2) return Error("Invalid INLINEASM record"); + if (Record.size() < 2) + return Error(InvalidRecord); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; bool IsAlignStack = Record[0] >> 1; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) - return Error("Invalid INLINEASM record"); + return Error(InvalidRecord); unsigned ConstStrSize = Record[2+AsmStrSize]; if (3+AsmStrSize+ConstStrSize > Record.size()) - return Error("Invalid INLINEASM record"); + return Error(InvalidRecord); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[2+i]; @@ -1352,17 +1490,18 @@ bool BitcodeReader::ParseConstants() { // This version adds support for the asm dialect keywords (e.g., // inteldialect). case bitc::CST_CODE_INLINEASM: { - if (Record.size() < 2) return Error("Invalid INLINEASM record"); + if (Record.size() < 2) + return Error(InvalidRecord); std::string AsmStr, ConstrStr; bool HasSideEffects = Record[0] & 1; bool IsAlignStack = (Record[0] >> 1) & 1; unsigned AsmDialect = Record[0] >> 2; unsigned AsmStrSize = Record[1]; if (2+AsmStrSize >= Record.size()) - return Error("Invalid INLINEASM record"); + return Error(InvalidRecord); unsigned ConstStrSize = Record[2+AsmStrSize]; if (3+AsmStrSize+ConstStrSize > Record.size()) - return Error("Invalid INLINEASM record"); + return Error(InvalidRecord); for (unsigned i = 0; i != AsmStrSize; ++i) AsmStr += (char)Record[2+i]; @@ -1375,12 +1514,15 @@ bool BitcodeReader::ParseConstants() { break; } case bitc::CST_CODE_BLOCKADDRESS:{ - if (Record.size() < 3) return Error("Invalid CE_BLOCKADDRESS record"); + if (Record.size() < 3) + return Error(InvalidRecord); Type *FnTy = getTypeByID(Record[0]); - if (FnTy == 0) return Error("Invalid CE_BLOCKADDRESS record"); + if (FnTy == 0) + return Error(InvalidRecord); Function *Fn = dyn_cast_or_null(ValueList.getConstantFwdRef(Record[1],FnTy)); - if (Fn == 0) return Error("Invalid CE_BLOCKADDRESS record"); + if (Fn == 0) + return Error(InvalidRecord); // If the function is already parsed we can insert the block address right // away. @@ -1388,7 +1530,7 @@ bool BitcodeReader::ParseConstants() { Function::iterator BBI = Fn->begin(), BBE = Fn->end(); for (size_t I = 0, E = Record[2]; I != E; ++I) { if (BBI == BBE) - return Error("Invalid blockaddress block #"); + return Error(InvalidID); ++BBI; } V = BlockAddress::get(Fn, BBI); @@ -1411,9 +1553,9 @@ bool BitcodeReader::ParseConstants() { } } -bool BitcodeReader::ParseUseLists() { +error_code BitcodeReader::ParseUseLists() { if (Stream.EnterSubBlock(bitc::USELIST_BLOCK_ID)) - return Error("Malformed block record"); + return Error(InvalidRecord); SmallVector Record; @@ -1424,9 +1566,9 @@ bool BitcodeReader::ParseUseLists() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error("malformed use list block"); + return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return false; + return error_code::success(); case BitstreamEntry::Record: // The interesting case. break; @@ -1440,7 +1582,7 @@ bool BitcodeReader::ParseUseLists() { case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD. unsigned RecordLength = Record.size(); if (RecordLength < 1) - return Error ("Invalid UseList reader!"); + return Error(InvalidRecord); UseListRecords.push_back(Record); break; } @@ -1451,10 +1593,10 @@ bool BitcodeReader::ParseUseLists() { /// RememberAndSkipFunctionBody - When we see the block for a function body, /// remember where it is and then skip it. This lets us lazily deserialize the /// functions. -bool BitcodeReader::RememberAndSkipFunctionBody() { +error_code BitcodeReader::RememberAndSkipFunctionBody() { // Get the function we are talking about. if (FunctionsWithBodies.empty()) - return Error("Insufficient function protos"); + return Error(InsufficientFunctionProtos); Function *Fn = FunctionsWithBodies.back(); FunctionsWithBodies.pop_back(); @@ -1465,15 +1607,15 @@ bool BitcodeReader::RememberAndSkipFunctionBody() { // Skip over the function block for now. if (Stream.SkipBlock()) - return Error("Malformed block record"); - return false; + return Error(InvalidRecord); + return error_code::success(); } -bool BitcodeReader::GlobalCleanup() { +error_code BitcodeReader::GlobalCleanup() { // Patch the initializers for globals and aliases up. ResolveGlobalAndAliasInits(); if (!GlobalInits.empty() || !AliasInits.empty()) - return Error("Malformed global initializer set"); + return Error(MalformedGlobalInitializerSet); // Look for intrinsic functions which need to be upgraded at some point for (Module::iterator FI = TheModule->begin(), FE = TheModule->end(); @@ -1492,14 +1634,14 @@ bool BitcodeReader::GlobalCleanup() { // want lazy deserialization. std::vector >().swap(GlobalInits); std::vector >().swap(AliasInits); - return false; + return error_code::success(); } -bool BitcodeReader::ParseModule(bool Resume) { +error_code BitcodeReader::ParseModule(bool Resume) { if (Resume) Stream.JumpToBit(NextUnreadBit); else if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) - return Error("Malformed block record"); + return Error(InvalidRecord); SmallVector Record; std::vector SectionTable; @@ -1511,8 +1653,7 @@ bool BitcodeReader::ParseModule(bool Resume) { switch (Entry.Kind) { case BitstreamEntry::Error: - Error("malformed module block"); - return true; + return Error(MalformedBlock); case BitstreamEntry::EndBlock: return GlobalCleanup(); @@ -1520,49 +1661,51 @@ bool BitcodeReader::ParseModule(bool Resume) { switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) - return Error("Malformed block record"); + return Error(InvalidRecord); break; case bitc::BLOCKINFO_BLOCK_ID: if (Stream.ReadBlockInfoBlock()) - return Error("Malformed BlockInfoBlock"); + return Error(MalformedBlock); break; case bitc::PARAMATTR_BLOCK_ID: - if (ParseAttributeBlock()) - return true; + if (error_code EC = ParseAttributeBlock()) + return EC; break; case bitc::PARAMATTR_GROUP_BLOCK_ID: - if (ParseAttributeGroupBlock()) - return true; + if (error_code EC = ParseAttributeGroupBlock()) + return EC; break; case bitc::TYPE_BLOCK_ID_NEW: - if (ParseTypeTable()) - return true; + if (error_code EC = ParseTypeTable()) + return EC; break; case bitc::VALUE_SYMTAB_BLOCK_ID: - if (ParseValueSymbolTable()) - return true; + if (error_code EC = ParseValueSymbolTable()) + return EC; SeenValueSymbolTable = true; break; case bitc::CONSTANTS_BLOCK_ID: - if (ParseConstants() || ResolveGlobalAndAliasInits()) - return true; + if (error_code EC = ParseConstants()) + return EC; + if (error_code EC = ResolveGlobalAndAliasInits()) + return EC; break; case bitc::METADATA_BLOCK_ID: - if (ParseMetadata()) - return true; + if (error_code EC = ParseMetadata()) + return EC; break; case bitc::FUNCTION_BLOCK_ID: // If this is the first function body we've seen, reverse the // FunctionsWithBodies list. if (!SeenFirstFunctionBody) { std::reverse(FunctionsWithBodies.begin(), FunctionsWithBodies.end()); - if (GlobalCleanup()) - return true; + if (error_code EC = GlobalCleanup()) + return EC; SeenFirstFunctionBody = true; } - if (RememberAndSkipFunctionBody()) - return true; + if (error_code EC = RememberAndSkipFunctionBody()) + return EC; // For streaming bitcode, suspend parsing when we reach the function // bodies. Subsequent materialization calls will resume it when // necessary. For streaming, the function bodies must be at the end of @@ -1571,12 +1714,12 @@ bool BitcodeReader::ParseModule(bool Resume) { // just finish the parse now. if (LazyStreamer && SeenValueSymbolTable) { NextUnreadBit = Stream.GetCurrentBitNo(); - return false; + return error_code::success(); } break; case bitc::USELIST_BLOCK_ID: - if (ParseUseLists()) - return true; + if (error_code EC = ParseUseLists()) + return EC; break; } continue; @@ -1592,11 +1735,12 @@ bool BitcodeReader::ParseModule(bool Resume) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_VERSION: { // VERSION: [version#] if (Record.size() < 1) - return Error("Malformed MODULE_CODE_VERSION"); + return Error(InvalidRecord); // Only version #0 and #1 are supported so far. unsigned module_version = Record[0]; switch (module_version) { - default: return Error("Unknown bitstream version!"); + default: + return Error(InvalidValue); case 0: UseRelativeIDs = false; break; @@ -1609,21 +1753,21 @@ bool BitcodeReader::ParseModule(bool Resume) { case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error("Invalid MODULE_CODE_TRIPLE record"); + return Error(InvalidRecord); TheModule->setTargetTriple(S); break; } case bitc::MODULE_CODE_DATALAYOUT: { // DATALAYOUT: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error("Invalid MODULE_CODE_DATALAYOUT record"); + return Error(InvalidRecord); TheModule->setDataLayout(S); break; } case bitc::MODULE_CODE_ASM: { // ASM: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error("Invalid MODULE_CODE_ASM record"); + return Error(InvalidRecord); TheModule->setModuleInlineAsm(S); break; } @@ -1631,21 +1775,21 @@ bool BitcodeReader::ParseModule(bool Resume) { // FIXME: Remove in 4.0. std::string S; if (ConvertToString(Record, 0, S)) - return Error("Invalid MODULE_CODE_DEPLIB record"); + return Error(InvalidRecord); // Ignore value. break; } case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error("Invalid MODULE_CODE_SECTIONNAME record"); + return Error(InvalidRecord); SectionTable.push_back(S); break; } case bitc::MODULE_CODE_GCNAME: { // SECTIONNAME: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error("Invalid MODULE_CODE_GCNAME record"); + return Error(InvalidRecord); GCTable.push_back(S); break; } @@ -1654,11 +1798,12 @@ bool BitcodeReader::ParseModule(bool Resume) { // unnamed_addr] case bitc::MODULE_CODE_GLOBALVAR: { if (Record.size() < 6) - return Error("Invalid MODULE_CODE_GLOBALVAR record"); + return Error(InvalidRecord); Type *Ty = getTypeByID(Record[0]); - if (!Ty) return Error("Invalid MODULE_CODE_GLOBALVAR record"); + if (!Ty) + return Error(InvalidRecord); if (!Ty->isPointerTy()) - return Error("Global not a pointer type!"); + return Error(InvalidTypeForValue); unsigned AddressSpace = cast(Ty)->getAddressSpace(); Ty = cast(Ty)->getElementType(); @@ -1668,7 +1813,7 @@ bool BitcodeReader::ParseModule(bool Resume) { std::string Section; if (Record[5]) { if (Record[5]-1 >= SectionTable.size()) - return Error("Invalid section ID"); + return Error(InvalidID); Section = SectionTable[Record[5]-1]; } GlobalValue::VisibilityTypes Visibility = GlobalValue::DefaultVisibility; @@ -1707,15 +1852,16 @@ bool BitcodeReader::ParseModule(bool Resume) { // alignment, section, visibility, gc, unnamed_addr] case bitc::MODULE_CODE_FUNCTION: { if (Record.size() < 8) - return Error("Invalid MODULE_CODE_FUNCTION record"); + return Error(InvalidRecord); Type *Ty = getTypeByID(Record[0]); - if (!Ty) return Error("Invalid MODULE_CODE_FUNCTION record"); + if (!Ty) + return Error(InvalidRecord); if (!Ty->isPointerTy()) - return Error("Function not a pointer type!"); + return Error(InvalidTypeForValue); FunctionType *FTy = dyn_cast(cast(Ty)->getElementType()); if (!FTy) - return Error("Function not a pointer to function type!"); + return Error(InvalidTypeForValue); Function *Func = Function::Create(FTy, GlobalValue::ExternalLinkage, "", TheModule); @@ -1728,19 +1874,21 @@ bool BitcodeReader::ParseModule(bool Resume) { Func->setAlignment((1 << Record[5]) >> 1); if (Record[6]) { if (Record[6]-1 >= SectionTable.size()) - return Error("Invalid section ID"); + return Error(InvalidID); Func->setSection(SectionTable[Record[6]-1]); } Func->setVisibility(GetDecodedVisibility(Record[7])); if (Record.size() > 8 && Record[8]) { if (Record[8]-1 > GCTable.size()) - return Error("Invalid GC ID"); + return Error(InvalidID); Func->setGC(GCTable[Record[8]-1].c_str()); } bool UnnamedAddr = false; if (Record.size() > 9) UnnamedAddr = Record[9]; Func->setUnnamedAddr(UnnamedAddr); + if (Record.size() > 10 && Record[10] != 0) + FunctionPrefixes.push_back(std::make_pair(Func, Record[10]-1)); ValueList.push_back(Func); // If this is a function with a body, remember the prototype we are @@ -1755,11 +1903,12 @@ bool BitcodeReader::ParseModule(bool Resume) { // ALIAS: [alias type, aliasee val#, linkage, visibility] case bitc::MODULE_CODE_ALIAS: { if (Record.size() < 3) - return Error("Invalid MODULE_ALIAS record"); + return Error(InvalidRecord); Type *Ty = getTypeByID(Record[0]); - if (!Ty) return Error("Invalid MODULE_ALIAS record"); + if (!Ty) + return Error(InvalidRecord); if (!Ty->isPointerTy()) - return Error("Function not a pointer type!"); + return Error(InvalidTypeForValue); GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]), "", 0, TheModule); @@ -1774,7 +1923,7 @@ bool BitcodeReader::ParseModule(bool Resume) { case bitc::MODULE_CODE_PURGEVALS: // Trim down the value list to the specified size. if (Record.size() < 1 || Record[0] > ValueList.size()) - return Error("Invalid MODULE_PURGEVALS record"); + return Error(InvalidRecord); ValueList.shrinkTo(Record[0]); break; } @@ -1782,10 +1931,11 @@ bool BitcodeReader::ParseModule(bool Resume) { } } -bool BitcodeReader::ParseBitcodeInto(Module *M) { +error_code BitcodeReader::ParseBitcodeInto(Module *M) { TheModule = 0; - if (InitStream()) return true; + if (error_code EC = InitStream()) + return EC; // Sniff for the signature. if (Stream.Read(8) != 'B' || @@ -1794,42 +1944,42 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) { Stream.Read(4) != 0xC || Stream.Read(4) != 0xE || Stream.Read(4) != 0xD) - return Error("Invalid bitcode signature"); + return Error(InvalidBitcodeSignature); // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. while (1) { if (Stream.AtEndOfStream()) - return false; + return error_code::success(); BitstreamEntry Entry = Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); switch (Entry.Kind) { case BitstreamEntry::Error: - Error("malformed module file"); - return true; + return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return false; + return error_code::success(); case BitstreamEntry::SubBlock: switch (Entry.ID) { case bitc::BLOCKINFO_BLOCK_ID: if (Stream.ReadBlockInfoBlock()) - return Error("Malformed BlockInfoBlock"); + return Error(MalformedBlock); break; case bitc::MODULE_BLOCK_ID: // Reject multiple MODULE_BLOCK's in a single bitstream. if (TheModule) - return Error("Multiple MODULE_BLOCKs in same stream"); + return Error(InvalidMultipleBlocks); TheModule = M; - if (ParseModule(false)) - return true; - if (LazyStreamer) return false; + if (error_code EC = ParseModule(false)) + return EC; + if (LazyStreamer) + return error_code::success(); break; default: if (Stream.SkipBlock()) - return Error("Malformed block record"); + return Error(InvalidRecord); break; } continue; @@ -1842,16 +1992,16 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) { if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 && Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && Stream.AtEndOfStream()) - return false; + return error_code::success(); - return Error("Invalid record at top-level"); + return Error(InvalidRecord); } } } -bool BitcodeReader::ParseModuleTriple(std::string &Triple) { +error_code BitcodeReader::ParseModuleTriple(std::string &Triple) { if (Stream.EnterSubBlock(bitc::MODULE_BLOCK_ID)) - return Error("Malformed block record"); + return Error(InvalidRecord); SmallVector Record; @@ -1862,9 +2012,9 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error("malformed module block"); + return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return false; + return error_code::success(); case BitstreamEntry::Record: // The interesting case. break; @@ -1876,7 +2026,7 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) { case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; if (ConvertToString(Record, 0, S)) - return Error("Invalid MODULE_CODE_TRIPLE record"); + return Error(InvalidRecord); Triple = S; break; } @@ -1885,8 +2035,9 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) { } } -bool BitcodeReader::ParseTriple(std::string &Triple) { - if (InitStream()) return true; +error_code BitcodeReader::ParseTriple(std::string &Triple) { + if (error_code EC = InitStream()) + return EC; // Sniff for the signature. if (Stream.Read(8) != 'B' || @@ -1895,7 +2046,7 @@ bool BitcodeReader::ParseTriple(std::string &Triple) { Stream.Read(4) != 0xC || Stream.Read(4) != 0xE || Stream.Read(4) != 0xD) - return Error("Invalid bitcode signature"); + return Error(InvalidBitcodeSignature); // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. @@ -1904,20 +2055,17 @@ bool BitcodeReader::ParseTriple(std::string &Triple) { switch (Entry.Kind) { case BitstreamEntry::Error: - Error("malformed module file"); - return true; + return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return false; + return error_code::success(); case BitstreamEntry::SubBlock: if (Entry.ID == bitc::MODULE_BLOCK_ID) return ParseModuleTriple(Triple); // Ignore other sub-blocks. - if (Stream.SkipBlock()) { - Error("malformed block record in AST file"); - return true; - } + if (Stream.SkipBlock()) + return Error(MalformedBlock); continue; case BitstreamEntry::Record: @@ -1928,9 +2076,9 @@ bool BitcodeReader::ParseTriple(std::string &Triple) { } /// ParseMetadataAttachment - Parse metadata attachments. -bool BitcodeReader::ParseMetadataAttachment() { +error_code BitcodeReader::ParseMetadataAttachment() { if (Stream.EnterSubBlock(bitc::METADATA_ATTACHMENT_ID)) - return Error("Malformed block record"); + return Error(InvalidRecord); SmallVector Record; while (1) { @@ -1939,9 +2087,9 @@ bool BitcodeReader::ParseMetadataAttachment() { switch (Entry.Kind) { case BitstreamEntry::SubBlock: // Handled for us already. case BitstreamEntry::Error: - return Error("malformed metadata block"); + return Error(MalformedBlock); case BitstreamEntry::EndBlock: - return false; + return error_code::success(); case BitstreamEntry::Record: // The interesting case. break; @@ -1955,16 +2103,18 @@ bool BitcodeReader::ParseMetadataAttachment() { case bitc::METADATA_ATTACHMENT: { unsigned RecordLength = Record.size(); if (Record.empty() || (RecordLength - 1) % 2 == 1) - return Error ("Invalid METADATA_ATTACHMENT reader!"); + return Error(InvalidRecord); Instruction *Inst = InstructionList[Record[0]]; for (unsigned i = 1; i != RecordLength; i = i+2) { unsigned Kind = Record[i]; DenseMap::iterator I = MDKindMap.find(Kind); if (I == MDKindMap.end()) - return Error("Invalid metadata kind ID"); + return Error(InvalidID); Value *Node = MDValueList.getValueFwdRef(Record[i+1]); Inst->setMetadata(I->second, cast(Node)); + if (I->second == LLVMContext::MD_tbaa) + InstsWithTBAATag.push_back(Inst); } break; } @@ -1973,9 +2123,9 @@ bool BitcodeReader::ParseMetadataAttachment() { } /// ParseFunctionBody - Lazily parse the specified function body block. -bool BitcodeReader::ParseFunctionBody(Function *F) { +error_code BitcodeReader::ParseFunctionBody(Function *F) { if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID)) - return Error("Malformed block record"); + return Error(InvalidRecord); InstructionList.clear(); unsigned ModuleValueListSize = ValueList.size(); @@ -1998,7 +2148,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { switch (Entry.Kind) { case BitstreamEntry::Error: - return Error("Bitcode error in function block"); + return Error(MalformedBlock); case BitstreamEntry::EndBlock: goto OutOfRecordLoop; @@ -2006,20 +2156,24 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) - return Error("Malformed block record"); + return Error(InvalidRecord); break; case bitc::CONSTANTS_BLOCK_ID: - if (ParseConstants()) return true; + if (error_code EC = ParseConstants()) + return EC; NextValueNo = ValueList.size(); break; case bitc::VALUE_SYMTAB_BLOCK_ID: - if (ParseValueSymbolTable()) return true; + if (error_code EC = ParseValueSymbolTable()) + return EC; break; case bitc::METADATA_ATTACHMENT_ID: - if (ParseMetadataAttachment()) return true; + if (error_code EC = ParseMetadataAttachment()) + return EC; break; case bitc::METADATA_BLOCK_ID: - if (ParseMetadata()) return true; + if (error_code EC = ParseMetadata()) + return EC; break; } continue; @@ -2035,10 +2189,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: reject - return Error("Unknown instruction"); + return Error(InvalidValue); case bitc::FUNC_CODE_DECLAREBLOCKS: // DECLAREBLOCKS: [nblocks] if (Record.size() < 1 || Record[0] == 0) - return Error("Invalid DECLAREBLOCKS record"); + return Error(InvalidRecord); // Create all the basic blocks for the function. FunctionBBs.resize(Record[0]); for (unsigned i = 0, e = FunctionBBs.size(); i != e; ++i) @@ -2058,7 +2212,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { !FunctionBBs[CurBBNo-1]->empty()) I = &FunctionBBs[CurBBNo-1]->back(); - if (I == 0) return Error("Invalid DEBUG_LOC_AGAIN record"); + if (I == 0) + return Error(InvalidRecord); I->setDebugLoc(LastLoc); I = 0; continue; @@ -2071,7 +2226,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { !FunctionBBs[CurBBNo-1]->empty()) I = &FunctionBBs[CurBBNo-1]->back(); if (I == 0 || Record.size() < 4) - return Error("Invalid FUNC_CODE_DEBUG_LOC record"); + return Error(InvalidRecord); unsigned Line = Record[0], Col = Record[1]; unsigned ScopeID = Record[2], IAID = Record[3]; @@ -2091,10 +2246,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || OpNum+1 > Record.size()) - return Error("Invalid BINOP record"); + return Error(InvalidRecord); int Opc = GetDecodedBinaryOpcode(Record[OpNum++], LHS->getType()); - if (Opc == -1) return Error("Invalid BINOP record"); + if (Opc == -1) + return Error(InvalidRecord); I = BinaryOperator::Create((Instruction::BinaryOps)Opc, LHS, RHS); InstructionList.push_back(I); if (OpNum < Record.size()) { @@ -2136,13 +2292,21 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || OpNum+2 != Record.size()) - return Error("Invalid CAST record"); + return Error(InvalidRecord); Type *ResTy = getTypeByID(Record[OpNum]); int Opc = GetDecodedCastOpcode(Record[OpNum+1]); if (Opc == -1 || ResTy == 0) - return Error("Invalid CAST record"); - I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy); + return Error(InvalidRecord); + Instruction *Temp = 0; + if ((I = UpgradeBitCastInst(Opc, Op, ResTy, Temp))) { + if (Temp) { + InstructionList.push_back(Temp); + CurBB->getInstList().push_back(Temp); + } + } else { + I = CastInst::Create((Instruction::CastOps)Opc, Op, ResTy); + } InstructionList.push_back(I); break; } @@ -2151,13 +2315,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *BasePtr; if (getValueTypePair(Record, OpNum, NextValueNo, BasePtr)) - return Error("Invalid GEP record"); + return Error(InvalidRecord); SmallVector GEPIdx; while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) - return Error("Invalid GEP record"); + return Error(InvalidRecord); GEPIdx.push_back(Op); } @@ -2173,14 +2337,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Agg; if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) - return Error("Invalid EXTRACTVAL record"); + return Error(InvalidRecord); SmallVector EXTRACTVALIdx; for (unsigned RecSize = Record.size(); OpNum != RecSize; ++OpNum) { uint64_t Index = Record[OpNum]; if ((unsigned)Index != Index) - return Error("Invalid EXTRACTVAL index"); + return Error(InvalidValue); EXTRACTVALIdx.push_back((unsigned)Index); } @@ -2194,17 +2358,17 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Agg; if (getValueTypePair(Record, OpNum, NextValueNo, Agg)) - return Error("Invalid INSERTVAL record"); + return Error(InvalidRecord); Value *Val; if (getValueTypePair(Record, OpNum, NextValueNo, Val)) - return Error("Invalid INSERTVAL record"); + return Error(InvalidRecord); SmallVector INSERTVALIdx; for (unsigned RecSize = Record.size(); OpNum != RecSize; ++OpNum) { uint64_t Index = Record[OpNum]; if ((unsigned)Index != Index) - return Error("Invalid INSERTVAL index"); + return Error(InvalidValue); INSERTVALIdx.push_back((unsigned)Index); } @@ -2221,7 +2385,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || popValue(Record, OpNum, NextValueNo, Type::getInt1Ty(Context), Cond)) - return Error("Invalid SELECT record"); + return Error(InvalidRecord); I = SelectInst::Create(Cond, TrueVal, FalseVal); InstructionList.push_back(I); @@ -2236,18 +2400,18 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, TrueVal) || popValue(Record, OpNum, NextValueNo, TrueVal->getType(), FalseVal) || getValueTypePair(Record, OpNum, NextValueNo, Cond)) - return Error("Invalid SELECT record"); + return Error(InvalidRecord); // select condition can be either i1 or [N x i1] if (VectorType* vector_type = dyn_cast(Cond->getType())) { // expect if (vector_type->getElementType() != Type::getInt1Ty(Context)) - return Error("Invalid SELECT condition type"); + return Error(InvalidTypeForValue); } else { // expect i1 if (Cond->getType() != Type::getInt1Ty(Context)) - return Error("Invalid SELECT condition type"); + return Error(InvalidTypeForValue); } I = SelectInst::Create(Cond, TrueVal, FalseVal); @@ -2260,7 +2424,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Value *Vec, *Idx; if (getValueTypePair(Record, OpNum, NextValueNo, Vec) || popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx)) - return Error("Invalid EXTRACTELT record"); + return Error(InvalidRecord); I = ExtractElementInst::Create(Vec, Idx); InstructionList.push_back(I); break; @@ -2273,7 +2437,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { popValue(Record, OpNum, NextValueNo, cast(Vec->getType())->getElementType(), Elt) || popValue(Record, OpNum, NextValueNo, Type::getInt32Ty(Context), Idx)) - return Error("Invalid INSERTELT record"); + return Error(InvalidRecord); I = InsertElementInst::Create(Vec, Elt, Idx); InstructionList.push_back(I); break; @@ -2284,10 +2448,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Value *Vec1, *Vec2, *Mask; if (getValueTypePair(Record, OpNum, NextValueNo, Vec1) || popValue(Record, OpNum, NextValueNo, Vec1->getType(), Vec2)) - return Error("Invalid SHUFFLEVEC record"); + return Error(InvalidRecord); if (getValueTypePair(Record, OpNum, NextValueNo, Mask)) - return Error("Invalid SHUFFLEVEC record"); + return Error(InvalidRecord); I = new ShuffleVectorInst(Vec1, Vec2, Mask); InstructionList.push_back(I); break; @@ -2305,7 +2469,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, LHS) || popValue(Record, OpNum, NextValueNo, LHS->getType(), RHS) || OpNum+1 != Record.size()) - return Error("Invalid CMP record"); + return Error(InvalidRecord); if (LHS->getType()->isFPOrFPVectorTy()) I = new FCmpInst((FCmpInst::Predicate)Record[OpNum], LHS, RHS); @@ -2327,9 +2491,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 0; Value *Op = NULL; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) - return Error("Invalid RET record"); + return Error(InvalidRecord); if (OpNum != Record.size()) - return Error("Invalid RET record"); + return Error(InvalidRecord); I = ReturnInst::Create(Context, Op); InstructionList.push_back(I); @@ -2337,10 +2501,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_BR: { // BR: [bb#, bb#, opval] or [bb#] if (Record.size() != 1 && Record.size() != 3) - return Error("Invalid BR record"); + return Error(InvalidRecord); BasicBlock *TrueDest = getBasicBlock(Record[0]); if (TrueDest == 0) - return Error("Invalid BR record"); + return Error(InvalidRecord); if (Record.size() == 1) { I = BranchInst::Create(TrueDest); @@ -2351,7 +2515,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Value *Cond = getValue(Record, 2, NextValueNo, Type::getInt1Ty(Context)); if (FalseDest == 0 || Cond == 0) - return Error("Invalid BR record"); + return Error(InvalidRecord); I = BranchInst::Create(TrueDest, FalseDest, Cond); InstructionList.push_back(I); } @@ -2360,7 +2524,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...] // Check magic if ((Record[0] >> 16) == SWITCH_INST_MAGIC) { - // New SwitchInst format with case ranges. + // "New" SwitchInst format with case ranges. The changes to write this + // format were reverted but we still recognize bitcode that uses it. + // Hopefully someday we will have support for case ranges and can use + // this format again. Type *OpTy = getTypeByID(Record[1]); unsigned ValueBitWidth = cast(OpTy)->getBitWidth(); @@ -2368,7 +2535,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Value *Cond = getValue(Record, 2, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[3]); if (OpTy == 0 || Cond == 0 || Default == 0) - return Error("Invalid SWITCH record"); + return Error(InvalidRecord); unsigned NumCases = Record[4]; @@ -2377,7 +2544,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned CurIdx = 5; for (unsigned i = 0; i != NumCases; ++i) { - IntegersSubsetToBB CaseBuilder; + SmallVector CaseVals; unsigned NumItems = Record[CurIdx++]; for (unsigned ci = 0; ci != NumItems; ++ci) { bool isSingleNumber = Record[CurIdx++]; @@ -2397,20 +2564,22 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { APInt High = ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords), ValueBitWidth); - - CaseBuilder.add(IntItem::fromType(OpTy, Low), - IntItem::fromType(OpTy, High)); CurIdx += ActiveWords; + + // FIXME: It is not clear whether values in the range should be + // compared as signed or unsigned values. The partially + // implemented changes that used this format in the past used + // unsigned comparisons. + for ( ; Low.ule(High); ++Low) + CaseVals.push_back(ConstantInt::get(Context, Low)); } else - CaseBuilder.add(IntItem::fromType(OpTy, Low)); + CaseVals.push_back(ConstantInt::get(Context, Low)); } BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]); - IntegersSubset Case = CaseBuilder.getCase(); - SI->addCase(Case, DestBB); + for (SmallVector::iterator cvi = CaseVals.begin(), + cve = CaseVals.end(); cvi != cve; ++cvi) + SI->addCase(*cvi, DestBB); } - uint16_t Hash = SI->hash(); - if (Hash != (Record[0] & 0xFFFF)) - return Error("Invalid SWITCH record"); I = SI; break; } @@ -2418,12 +2587,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { // Old SwitchInst format without case ranges. if (Record.size() < 3 || (Record.size() & 1) == 0) - return Error("Invalid SWITCH record"); + return Error(InvalidRecord); Type *OpTy = getTypeByID(Record[0]); Value *Cond = getValue(Record, 1, NextValueNo, OpTy); BasicBlock *Default = getBasicBlock(Record[2]); if (OpTy == 0 || Cond == 0 || Default == 0) - return Error("Invalid SWITCH record"); + return Error(InvalidRecord); unsigned NumCases = (Record.size()-3)/2; SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); InstructionList.push_back(SI); @@ -2433,7 +2602,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { BasicBlock *DestBB = getBasicBlock(Record[1+3+i*2]); if (CaseVal == 0 || DestBB == 0) { delete SI; - return Error("Invalid SWITCH record!"); + return Error(InvalidRecord); } SI->addCase(CaseVal, DestBB); } @@ -2442,11 +2611,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_INDIRECTBR: { // INDIRECTBR: [opty, op0, op1, ...] if (Record.size() < 2) - return Error("Invalid INDIRECTBR record"); + return Error(InvalidRecord); Type *OpTy = getTypeByID(Record[0]); Value *Address = getValue(Record, 1, NextValueNo, OpTy); if (OpTy == 0 || Address == 0) - return Error("Invalid INDIRECTBR record"); + return Error(InvalidRecord); unsigned NumDests = Record.size()-2; IndirectBrInst *IBI = IndirectBrInst::Create(Address, NumDests); InstructionList.push_back(IBI); @@ -2455,7 +2624,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { IBI->addDestination(DestBB); } else { delete IBI; - return Error("Invalid INDIRECTBR record!"); + return Error(InvalidRecord); } } I = IBI; @@ -2464,7 +2633,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_INVOKE: { // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...] - if (Record.size() < 4) return Error("Invalid INVOKE record"); + if (Record.size() < 4) + return Error(InvalidRecord); AttributeSet PAL = getAttributes(Record[0]); unsigned CCInfo = Record[1]; BasicBlock *NormalBB = getBasicBlock(Record[2]); @@ -2473,7 +2643,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 4; Value *Callee; if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) - return Error("Invalid INVOKE record"); + return Error(InvalidRecord); PointerType *CalleeTy = dyn_cast(Callee->getType()); FunctionType *FTy = !CalleeTy ? 0 : @@ -2482,24 +2652,25 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { // Check that the right number of fixed parameters are here. if (FTy == 0 || NormalBB == 0 || UnwindBB == 0 || Record.size() < OpNum+FTy->getNumParams()) - return Error("Invalid INVOKE record"); + return Error(InvalidRecord); SmallVector Ops; for (unsigned i = 0, e = FTy->getNumParams(); i != e; ++i, ++OpNum) { Ops.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i))); - if (Ops.back() == 0) return Error("Invalid INVOKE record"); + if (Ops.back() == 0) + return Error(InvalidRecord); } if (!FTy->isVarArg()) { if (Record.size() != OpNum) - return Error("Invalid INVOKE record"); + return Error(InvalidRecord); } else { // Read type/value pairs for varargs params. while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) - return Error("Invalid INVOKE record"); + return Error(InvalidRecord); Ops.push_back(Op); } } @@ -2515,7 +2686,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned Idx = 0; Value *Val = 0; if (getValueTypePair(Record, Idx, NextValueNo, Val)) - return Error("Invalid RESUME record"); + return Error(InvalidRecord); I = ResumeInst::Create(Val); InstructionList.push_back(I); break; @@ -2526,9 +2697,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { break; case bitc::FUNC_CODE_INST_PHI: { // PHI: [ty, val0,bb0, ...] if (Record.size() < 1 || ((Record.size()-1)&1)) - return Error("Invalid PHI record"); + return Error(InvalidRecord); Type *Ty = getTypeByID(Record[0]); - if (!Ty) return Error("Invalid PHI record"); + if (!Ty) + return Error(InvalidRecord); PHINode *PN = PHINode::Create(Ty, (Record.size()-1)/2); InstructionList.push_back(PN); @@ -2543,7 +2715,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { else V = getValue(Record, 1+i, NextValueNo, Ty); BasicBlock *BB = getBasicBlock(Record[2+i]); - if (!V || !BB) return Error("Invalid PHI record"); + if (!V || !BB) + return Error(InvalidRecord); PN->addIncoming(V, BB); } I = PN; @@ -2554,12 +2727,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { // LANDINGPAD: [ty, val, val, num, (id0,val0 ...)?] unsigned Idx = 0; if (Record.size() < 4) - return Error("Invalid LANDINGPAD record"); + return Error(InvalidRecord); Type *Ty = getTypeByID(Record[Idx++]); - if (!Ty) return Error("Invalid LANDINGPAD record"); + if (!Ty) + return Error(InvalidRecord); Value *PersFn = 0; if (getValueTypePair(Record, Idx, NextValueNo, PersFn)) - return Error("Invalid LANDINGPAD record"); + return Error(InvalidRecord); bool IsCleanup = !!Record[Idx++]; unsigned NumClauses = Record[Idx++]; @@ -2572,7 +2746,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, Idx, NextValueNo, Val)) { delete LP; - return Error("Invalid LANDINGPAD record"); + return Error(InvalidRecord); } assert((CT != LandingPadInst::Catch || @@ -2591,13 +2765,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_ALLOCA: { // ALLOCA: [instty, opty, op, align] if (Record.size() != 4) - return Error("Invalid ALLOCA record"); + return Error(InvalidRecord); PointerType *Ty = dyn_cast_or_null(getTypeByID(Record[0])); Type *OpTy = getTypeByID(Record[1]); Value *Size = getFnValueByID(Record[2], OpTy); unsigned Align = Record[3]; - if (!Ty || !Size) return Error("Invalid ALLOCA record"); + if (!Ty || !Size) + return Error(InvalidRecord); I = new AllocaInst(Ty->getElementType(), Size, (1 << Align) >> 1); InstructionList.push_back(I); break; @@ -2607,7 +2782,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || OpNum+2 != Record.size()) - return Error("Invalid LOAD record"); + return Error(InvalidRecord); I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1); InstructionList.push_back(I); @@ -2619,15 +2794,15 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op) || OpNum+4 != Record.size()) - return Error("Invalid LOADATOMIC record"); + return Error(InvalidRecord); AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]); if (Ordering == NotAtomic || Ordering == Release || Ordering == AcquireRelease) - return Error("Invalid LOADATOMIC record"); + return Error(InvalidRecord); if (Ordering != NotAtomic && Record[OpNum] == 0) - return Error("Invalid LOADATOMIC record"); + return Error(InvalidRecord); SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]); I = new LoadInst(Op, "", Record[OpNum+1], (1 << Record[OpNum]) >> 1, @@ -2642,7 +2817,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val) || OpNum+2 != Record.size()) - return Error("Invalid STORE record"); + return Error(InvalidRecord); I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1); InstructionList.push_back(I); @@ -2656,15 +2831,15 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val) || OpNum+4 != Record.size()) - return Error("Invalid STOREATOMIC record"); + return Error(InvalidRecord); AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]); if (Ordering == NotAtomic || Ordering == Acquire || Ordering == AcquireRelease) - return Error("Invalid STOREATOMIC record"); + return Error(InvalidRecord); SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]); if (Ordering != NotAtomic && Record[OpNum] == 0) - return Error("Invalid STOREATOMIC record"); + return Error(InvalidRecord); I = new StoreInst(Val, Ptr, Record[OpNum+1], (1 << Record[OpNum]) >> 1, Ordering, SynchScope); @@ -2681,10 +2856,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), New) || OpNum+3 != Record.size()) - return Error("Invalid CMPXCHG record"); + return Error(InvalidRecord); AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+1]); if (Ordering == NotAtomic || Ordering == Unordered) - return Error("Invalid CMPXCHG record"); + return Error(InvalidRecord); SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+2]); I = new AtomicCmpXchgInst(Ptr, Cmp, New, Ordering, SynchScope); cast(I)->setVolatile(Record[OpNum]); @@ -2699,14 +2874,14 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { popValue(Record, OpNum, NextValueNo, cast(Ptr->getType())->getElementType(), Val) || OpNum+4 != Record.size()) - return Error("Invalid ATOMICRMW record"); + return Error(InvalidRecord); AtomicRMWInst::BinOp Operation = GetDecodedRMWOperation(Record[OpNum]); if (Operation < AtomicRMWInst::FIRST_BINOP || Operation > AtomicRMWInst::LAST_BINOP) - return Error("Invalid ATOMICRMW record"); + return Error(InvalidRecord); AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]); if (Ordering == NotAtomic || Ordering == Unordered) - return Error("Invalid ATOMICRMW record"); + return Error(InvalidRecord); SynchronizationScope SynchScope = GetDecodedSynchScope(Record[OpNum+3]); I = new AtomicRMWInst(Operation, Ptr, Val, Ordering, SynchScope); cast(I)->setVolatile(Record[OpNum+1]); @@ -2715,11 +2890,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_FENCE: { // FENCE:[ordering, synchscope] if (2 != Record.size()) - return Error("Invalid FENCE record"); + return Error(InvalidRecord); AtomicOrdering Ordering = GetDecodedOrdering(Record[0]); if (Ordering == NotAtomic || Ordering == Unordered || Ordering == Monotonic) - return Error("Invalid FENCE record"); + return Error(InvalidRecord); SynchronizationScope SynchScope = GetDecodedSynchScope(Record[1]); I = new FenceInst(Context, Ordering, SynchScope); InstructionList.push_back(I); @@ -2728,7 +2903,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { case bitc::FUNC_CODE_INST_CALL: { // CALL: [paramattrs, cc, fnty, fnid, arg0, arg1...] if (Record.size() < 3) - return Error("Invalid CALL record"); + return Error(InvalidRecord); AttributeSet PAL = getAttributes(Record[0]); unsigned CCInfo = Record[1]; @@ -2736,13 +2911,13 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned OpNum = 2; Value *Callee; if (getValueTypePair(Record, OpNum, NextValueNo, Callee)) - return Error("Invalid CALL record"); + return Error(InvalidRecord); PointerType *OpTy = dyn_cast(Callee->getType()); FunctionType *FTy = 0; if (OpTy) FTy = dyn_cast(OpTy->getElementType()); if (!FTy || Record.size() < FTy->getNumParams()+OpNum) - return Error("Invalid CALL record"); + return Error(InvalidRecord); SmallVector Args; // Read the fixed params. @@ -2752,18 +2927,19 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { else Args.push_back(getValue(Record, OpNum, NextValueNo, FTy->getParamType(i))); - if (Args.back() == 0) return Error("Invalid CALL record"); + if (Args.back() == 0) + return Error(InvalidRecord); } // Read type/value pairs for varargs params. if (!FTy->isVarArg()) { if (OpNum != Record.size()) - return Error("Invalid CALL record"); + return Error(InvalidRecord); } else { while (OpNum != Record.size()) { Value *Op; if (getValueTypePair(Record, OpNum, NextValueNo, Op)) - return Error("Invalid CALL record"); + return Error(InvalidRecord); Args.push_back(Op); } } @@ -2778,12 +2954,12 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { } case bitc::FUNC_CODE_INST_VAARG: { // VAARG: [valistty, valist, instty] if (Record.size() < 3) - return Error("Invalid VAARG record"); + return Error(InvalidRecord); Type *OpTy = getTypeByID(Record[0]); Value *Op = getValue(Record, 1, NextValueNo, OpTy); Type *ResTy = getTypeByID(Record[2]); if (!OpTy || !Op || !ResTy) - return Error("Invalid VAARG record"); + return Error(InvalidRecord); I = new VAArgInst(Op, ResTy); InstructionList.push_back(I); break; @@ -2794,7 +2970,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { // this file. if (CurBB == 0) { delete I; - return Error("Invalid instruction with no BB"); + return Error(InvalidInstructionWithNoBB); } CurBB->getInstList().push_back(I); @@ -2821,7 +2997,7 @@ OutOfRecordLoop: delete A; } } - return Error("Never resolved value found in function!"); + return Error(NeverResolvedValueFoundInFunction); } } @@ -2837,7 +3013,7 @@ OutOfRecordLoop: for (unsigned i = 0, e = RefList.size(); i != e; ++i) { unsigned BlockIdx = RefList[i].first; if (BlockIdx >= FunctionBBs.size()) - return Error("Invalid blockaddress block #"); + return Error(InvalidID); GlobalVariable *FwdRef = RefList[i].second; FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx])); @@ -2851,20 +3027,21 @@ OutOfRecordLoop: ValueList.shrinkTo(ModuleValueListSize); MDValueList.shrinkTo(ModuleMDValueListSize); std::vector().swap(FunctionBBs); - return false; + return error_code::success(); } -/// FindFunctionInStream - Find the function body in the bitcode stream -bool BitcodeReader::FindFunctionInStream(Function *F, +/// Find the function body in the bitcode stream +error_code BitcodeReader::FindFunctionInStream(Function *F, DenseMap::iterator DeferredFunctionInfoIterator) { while (DeferredFunctionInfoIterator->second == 0) { if (Stream.AtEndOfStream()) - return Error("Could not find Function in stream"); + return Error(CouldNotFindFunctionInStream); // ParseModule will parse the next body in the stream and set its // position in the DeferredFunctionInfo map. - if (ParseModule(true)) return true; + if (error_code EC = ParseModule(true)) + return EC; } - return false; + return error_code::success(); } //===----------------------------------------------------------------------===// @@ -2880,25 +3057,25 @@ bool BitcodeReader::isMaterializable(const GlobalValue *GV) const { return false; } -bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) { +error_code BitcodeReader::Materialize(GlobalValue *GV) { Function *F = dyn_cast(GV); // If it's not a function or is already material, ignore the request. - if (!F || !F->isMaterializable()) return false; + if (!F || !F->isMaterializable()) + return error_code::success(); DenseMap::iterator DFII = DeferredFunctionInfo.find(F); assert(DFII != DeferredFunctionInfo.end() && "Deferred function not found!"); // If its position is recorded as 0, its body is somewhere in the stream // but we haven't seen it yet. - if (DFII->second == 0) - if (LazyStreamer && FindFunctionInStream(F, DFII)) return true; + if (DFII->second == 0 && LazyStreamer) + if (error_code EC = FindFunctionInStream(F, DFII)) + return EC; // Move the bit stream to the saved position of the deferred function body. Stream.JumpToBit(DFII->second); - if (ParseFunctionBody(F)) { - if (ErrInfo) *ErrInfo = ErrorString; - return true; - } + if (error_code EC = ParseFunctionBody(F)) + return EC; // Upgrade any old intrinsic calls in the function. for (UpgradedIntrinsicMap::iterator I = UpgradedIntrinsics.begin(), @@ -2912,7 +3089,7 @@ bool BitcodeReader::Materialize(GlobalValue *GV, std::string *ErrInfo) { } } - return false; + return error_code::success(); } bool BitcodeReader::isDematerializable(const GlobalValue *GV) const { @@ -2935,17 +3112,18 @@ void BitcodeReader::Dematerialize(GlobalValue *GV) { } -bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) { +error_code BitcodeReader::MaterializeModule(Module *M) { assert(M == TheModule && "Can only Materialize the Module this BitcodeReader is attached to."); // Iterate over the module, deserializing any functions that are still on // disk. for (Module::iterator F = TheModule->begin(), E = TheModule->end(); - F != E; ++F) - if (F->isMaterializable() && - Materialize(F, ErrInfo)) - return true; - + F != E; ++F) { + if (F->isMaterializable()) { + if (error_code EC = Materialize(F)) + return EC; + } + } // At this point, if there are any function bodies, the current bit is // pointing to the END_BLOCK record after them. Now make sure the rest // of the bits in the module have been read. @@ -2971,38 +3149,43 @@ bool BitcodeReader::MaterializeModule(Module *M, std::string *ErrInfo) { } std::vector >().swap(UpgradedIntrinsics); - return false; + for (unsigned I = 0, E = InstsWithTBAATag.size(); I < E; I++) + UpgradeInstWithTBAATag(InstsWithTBAATag[I]); + + UpgradeDebugInfo(*M); + return error_code::success(); } -bool BitcodeReader::InitStream() { - if (LazyStreamer) return InitLazyStream(); +error_code BitcodeReader::InitStream() { + if (LazyStreamer) + return InitLazyStream(); return InitStreamFromBuffer(); } -bool BitcodeReader::InitStreamFromBuffer() { +error_code BitcodeReader::InitStreamFromBuffer() { const unsigned char *BufPtr = (const unsigned char*)Buffer->getBufferStart(); const unsigned char *BufEnd = BufPtr+Buffer->getBufferSize(); if (Buffer->getBufferSize() & 3) { if (!isRawBitcode(BufPtr, BufEnd) && !isBitcodeWrapper(BufPtr, BufEnd)) - return Error("Invalid bitcode signature"); + return Error(InvalidBitcodeSignature); else - return Error("Bitcode stream should be a multiple of 4 bytes in length"); + return Error(BitcodeStreamInvalidSize); } // If we have a wrapper header, parse it and ignore the non-bc file contents. // The magic number is 0x0B17C0DE stored in little endian. if (isBitcodeWrapper(BufPtr, BufEnd)) if (SkipBitcodeWrapperHeader(BufPtr, BufEnd, true)) - return Error("Invalid bitcode wrapper header"); + return Error(InvalidBitcodeWrapperHeader); StreamFile.reset(new BitstreamReader(BufPtr, BufEnd)); Stream.init(*StreamFile); - return false; + return error_code::success(); } -bool BitcodeReader::InitLazyStream() { +error_code BitcodeReader::InitLazyStream() { // Check and strip off the bitcode wrapper; BitstreamReader expects never to // see it. StreamingMemoryObject *Bytes = new StreamingMemoryObject(LazyStreamer); @@ -3010,11 +3193,11 @@ bool BitcodeReader::InitLazyStream() { Stream.init(*StreamFile); unsigned char buf[16]; - if (Bytes->readBytes(0, 16, buf, NULL) == -1) - return Error("Bitcode stream must be at least 16 bytes in length"); + if (Bytes->readBytes(0, 16, buf) == -1) + return Error(BitcodeStreamInvalidSize); if (!isBitcode(buf, buf + 16)) - return Error("Invalid bitcode signature"); + return Error(InvalidBitcodeSignature); if (isBitcodeWrapper(buf, buf + 4)) { const unsigned char *bitcodeStart = buf; @@ -3023,7 +3206,64 @@ bool BitcodeReader::InitLazyStream() { Bytes->dropLeadingBytes(bitcodeStart - buf); Bytes->setKnownObjectSize(bitcodeEnd - bitcodeStart); } - return false; + return error_code::success(); +} + +namespace { +class BitcodeErrorCategoryType : public _do_message { + const char *name() const LLVM_OVERRIDE { + return "llvm.bitcode"; + } + std::string message(int IE) const LLVM_OVERRIDE { + BitcodeReader::ErrorType E = static_cast(IE); + switch (E) { + case BitcodeReader::BitcodeStreamInvalidSize: + return "Bitcode stream length should be >= 16 bytes and a multiple of 4"; + case BitcodeReader::ConflictingMETADATA_KINDRecords: + return "Conflicting METADATA_KIND records"; + case BitcodeReader::CouldNotFindFunctionInStream: + return "Could not find function in stream"; + case BitcodeReader::ExpectedConstant: + return "Expected a constant"; + case BitcodeReader::InsufficientFunctionProtos: + return "Insufficient function protos"; + case BitcodeReader::InvalidBitcodeSignature: + return "Invalid bitcode signature"; + case BitcodeReader::InvalidBitcodeWrapperHeader: + return "Invalid bitcode wrapper header"; + case BitcodeReader::InvalidConstantReference: + return "Invalid ronstant reference"; + case BitcodeReader::InvalidID: + return "Invalid ID"; + case BitcodeReader::InvalidInstructionWithNoBB: + return "Invalid instruction with no BB"; + case BitcodeReader::InvalidRecord: + return "Invalid record"; + case BitcodeReader::InvalidTypeForValue: + return "Invalid type for value"; + case BitcodeReader::InvalidTYPETable: + return "Invalid TYPE table"; + case BitcodeReader::InvalidType: + return "Invalid type"; + case BitcodeReader::MalformedBlock: + return "Malformed block"; + case BitcodeReader::MalformedGlobalInitializerSet: + return "Malformed global initializer set"; + case BitcodeReader::InvalidMultipleBlocks: + return "Invalid multiple blocks"; + case BitcodeReader::NeverResolvedValueFoundInFunction: + return "Never resolved value found in function"; + case BitcodeReader::InvalidValue: + return "Invalid value"; + } + llvm_unreachable("Unknown error type!"); + } +}; +} + +const error_category &BitcodeReader::BitcodeErrorCategory() { + static BitcodeErrorCategoryType O; + return O; } //===----------------------------------------------------------------------===// @@ -3038,9 +3278,9 @@ Module *llvm::getLazyBitcodeModule(MemoryBuffer *Buffer, Module *M = new Module(Buffer->getBufferIdentifier(), Context); BitcodeReader *R = new BitcodeReader(Buffer, Context); M->setMaterializer(R); - if (R->ParseBitcodeInto(M)) { + if (error_code EC = R->ParseBitcodeInto(M)) { if (ErrMsg) - *ErrMsg = R->getErrorString(); + *ErrMsg = EC.message(); delete M; // Also deletes R. return 0; @@ -3061,9 +3301,9 @@ Module *llvm::getStreamedBitcodeModule(const std::string &name, Module *M = new Module(name, Context); BitcodeReader *R = new BitcodeReader(streamer, Context); M->setMaterializer(R); - if (R->ParseBitcodeInto(M)) { + if (error_code EC = R->ParseBitcodeInto(M)) { if (ErrMsg) - *ErrMsg = R->getErrorString(); + *ErrMsg = EC.message(); delete M; // Also deletes R. return 0; } @@ -3102,9 +3342,9 @@ std::string llvm::getBitcodeTargetTriple(MemoryBuffer *Buffer, R->setBufferOwned(false); std::string Triple(""); - if (R->ParseTriple(Triple)) + if (error_code EC = R->ParseTriple(Triple)) if (ErrMsg) - *ErrMsg = R->getErrorString(); + *ErrMsg = EC.message(); delete R; return Triple; diff --git a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h index 28674eb..c5d345b 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h +++ b/contrib/llvm/lib/Bitcode/Reader/BitcodeReader.h @@ -21,6 +21,7 @@ #include "llvm/IR/Attributes.h" #include "llvm/IR/OperandTraits.h" #include "llvm/IR/Type.h" +#include "llvm/Support/system_error.h" #include "llvm/Support/ValueHandle.h" #include @@ -132,8 +133,6 @@ class BitcodeReader : public GVMaterializer { uint64_t NextUnreadBit; bool SeenValueSymbolTable; - const char *ErrorString; - std::vector TypeList; BitcodeReaderValueList ValueList; BitcodeReaderMDValueList MDValueList; @@ -142,6 +141,9 @@ class BitcodeReader : public GVMaterializer { std::vector > GlobalInits; std::vector > AliasInits; + std::vector > FunctionPrefixes; + + SmallVector InstsWithTBAATag; /// MAttributes - The set of attributes by index. Index zero in the /// file is for null, and is thus not represented here. As such all indices @@ -191,17 +193,46 @@ class BitcodeReader : public GVMaterializer { /// not need this flag. bool UseRelativeIDs; + static const error_category &BitcodeErrorCategory(); + public: + enum ErrorType { + BitcodeStreamInvalidSize, + ConflictingMETADATA_KINDRecords, + CouldNotFindFunctionInStream, + ExpectedConstant, + InsufficientFunctionProtos, + InvalidBitcodeSignature, + InvalidBitcodeWrapperHeader, + InvalidConstantReference, + InvalidID, // A read identifier is not found in the table it should be in. + InvalidInstructionWithNoBB, + InvalidRecord, // A read record doesn't have the expected size or structure + InvalidTypeForValue, // Type read OK, but is invalid for its use + InvalidTYPETable, + InvalidType, // We were unable to read a type + MalformedBlock, // We are unable to advance in the stream. + MalformedGlobalInitializerSet, + InvalidMultipleBlocks, // We found multiple blocks of a kind that should + // have only one + NeverResolvedValueFoundInFunction, + InvalidValue // Invalid version, inst number, attr number, etc + }; + + error_code Error(ErrorType E) { + return error_code(E, BitcodeErrorCategory()); + } + explicit BitcodeReader(MemoryBuffer *buffer, LLVMContext &C) : Context(C), TheModule(0), Buffer(buffer), BufferOwned(false), LazyStreamer(0), NextUnreadBit(0), SeenValueSymbolTable(false), - ErrorString(0), ValueList(C), MDValueList(C), + ValueList(C), MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) { } explicit BitcodeReader(DataStreamer *streamer, LLVMContext &C) : Context(C), TheModule(0), Buffer(0), BufferOwned(false), LazyStreamer(streamer), NextUnreadBit(0), SeenValueSymbolTable(false), - ErrorString(0), ValueList(C), MDValueList(C), + ValueList(C), MDValueList(C), SeenFirstFunctionBody(false), UseRelativeIDs(false) { } ~BitcodeReader() { @@ -218,23 +249,17 @@ public: virtual bool isMaterializable(const GlobalValue *GV) const; virtual bool isDematerializable(const GlobalValue *GV) const; - virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0); - virtual bool MaterializeModule(Module *M, std::string *ErrInfo = 0); + virtual error_code Materialize(GlobalValue *GV); + virtual error_code MaterializeModule(Module *M); virtual void Dematerialize(GlobalValue *GV); - bool Error(const char *Str) { - ErrorString = Str; - return true; - } - const char *getErrorString() const { return ErrorString; } - /// @brief Main interface to parsing a bitcode buffer. /// @returns true if an error occurred. - bool ParseBitcodeInto(Module *M); + error_code ParseBitcodeInto(Module *M); /// @brief Cheap mechanism to just extract module triple /// @returns true if an error occurred. - bool ParseTriple(std::string &Triple); + error_code ParseTriple(std::string &Triple); static uint64_t decodeSignRotatedValue(uint64_t V); @@ -258,7 +283,7 @@ private: /// getValueTypePair - Read a value/type pair out of the specified record from /// slot 'Slot'. Increment Slot past the number of slots used in the record. /// Return true on failure. - bool getValueTypePair(SmallVector &Record, unsigned &Slot, + bool getValueTypePair(SmallVectorImpl &Record, unsigned &Slot, unsigned InstNum, Value *&ResVal) { if (Slot == Record.size()) return true; unsigned ValNo = (unsigned)Record[Slot++]; @@ -282,7 +307,7 @@ private: /// popValue - Read a value out of the specified record from slot 'Slot'. /// Increment Slot past the number of slots used by the value in the record. /// Return true if there is an error. - bool popValue(SmallVector &Record, unsigned &Slot, + bool popValue(SmallVectorImpl &Record, unsigned &Slot, unsigned InstNum, Type *Ty, Value *&ResVal) { if (getValue(Record, Slot, InstNum, Ty, ResVal)) return true; @@ -292,7 +317,7 @@ private: } /// getValue -- Like popValue, but does not increment the Slot number. - bool getValue(SmallVector &Record, unsigned Slot, + bool getValue(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty, Value *&ResVal) { ResVal = getValue(Record, Slot, InstNum, Ty); return ResVal == 0; @@ -300,7 +325,7 @@ private: /// getValue -- Version of getValue that returns ResVal directly, /// or 0 if there is an error. - Value *getValue(SmallVector &Record, unsigned Slot, + Value *getValue(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty) { if (Slot == Record.size()) return 0; unsigned ValNo = (unsigned)Record[Slot]; @@ -311,7 +336,7 @@ private: } /// getValueSigned -- Like getValue, but decodes signed VBRs. - Value *getValueSigned(SmallVector &Record, unsigned Slot, + Value *getValueSigned(SmallVectorImpl &Record, unsigned Slot, unsigned InstNum, Type *Ty) { if (Slot == Record.size()) return 0; unsigned ValNo = (unsigned)decodeSignRotatedValue(Record[Slot]); @@ -321,26 +346,27 @@ private: return getFnValueByID(ValNo, Ty); } - bool ParseModule(bool Resume); - bool ParseAttributeBlock(); - bool ParseAttributeGroupBlock(); - bool ParseTypeTable(); - bool ParseTypeTableBody(); - - bool ParseValueSymbolTable(); - bool ParseConstants(); - bool RememberAndSkipFunctionBody(); - bool ParseFunctionBody(Function *F); - bool GlobalCleanup(); - bool ResolveGlobalAndAliasInits(); - bool ParseMetadata(); - bool ParseMetadataAttachment(); - bool ParseModuleTriple(std::string &Triple); - bool ParseUseLists(); - bool InitStream(); - bool InitStreamFromBuffer(); - bool InitLazyStream(); - bool FindFunctionInStream(Function *F, + error_code ParseAttrKind(uint64_t Code, Attribute::AttrKind *Kind); + error_code ParseModule(bool Resume); + error_code ParseAttributeBlock(); + error_code ParseAttributeGroupBlock(); + error_code ParseTypeTable(); + error_code ParseTypeTableBody(); + + error_code ParseValueSymbolTable(); + error_code ParseConstants(); + error_code RememberAndSkipFunctionBody(); + error_code ParseFunctionBody(Function *F); + error_code GlobalCleanup(); + error_code ResolveGlobalAndAliasInits(); + error_code ParseMetadata(); + error_code ParseMetadataAttachment(); + error_code ParseModuleTriple(std::string &Triple); + error_code ParseUseLists(); + error_code InitStream(); + error_code InitStreamFromBuffer(); + error_code InitLazyStream(); + error_code FindFunctionInStream(Function *F, DenseMap::iterator DeferredFunctionInfoIterator); }; diff --git a/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp b/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp index 9dafe2a..1fd9abd 100644 --- a/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp +++ b/contrib/llvm/lib/Bitcode/Reader/BitstreamReader.cpp @@ -204,7 +204,16 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); - for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { + // Read the record code first. + assert(Abbv->getNumOperandInfos() != 0 && "no record code in abbreviation?"); + const BitCodeAbbrevOp &CodeOp = Abbv->getOperandInfo(0); + if (CodeOp.isLiteral()) + readAbbreviatedLiteral(CodeOp, Vals); + else + readAbbreviatedField(CodeOp, Vals); + unsigned Code = (unsigned)Vals.pop_back_val(); + + for (unsigned i = 1, e = Abbv->getNumOperandInfos(); i != e; ++i) { const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); if (Op.isLiteral()) { readAbbreviatedLiteral(Op, Vals); @@ -264,8 +273,6 @@ unsigned BitstreamCursor::readRecord(unsigned AbbrevID, JumpToBit(NewEnd); } - unsigned Code = (unsigned)Vals[0]; - Vals.erase(Vals.begin()); return Code; } diff --git a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp index 985208c..cd1ada2 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitWriter.cpp @@ -18,7 +18,7 @@ using namespace llvm; int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) { std::string ErrorInfo; - raw_fd_ostream OS(Path, ErrorInfo, raw_fd_ostream::F_Binary); + raw_fd_ostream OS(Path, ErrorInfo, sys::fs::F_Binary); if (!ErrorInfo.empty()) return -1; diff --git a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp index 1b73f23..4cfc6bd 100644 --- a/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -60,10 +60,7 @@ enum { FUNCTION_INST_CAST_ABBREV, FUNCTION_INST_RET_VOID_ABBREV, FUNCTION_INST_RET_VAL_ABBREV, - FUNCTION_INST_UNREACHABLE_ABBREV, - - // SwitchInst Magic - SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex + FUNCTION_INST_UNREACHABLE_ABBREV }; static unsigned GetEncodedCastOpcode(unsigned Opcode) { @@ -81,6 +78,7 @@ static unsigned GetEncodedCastOpcode(unsigned Opcode) { case Instruction::PtrToInt: return bitc::CAST_PTRTOINT; case Instruction::IntToPtr: return bitc::CAST_INTTOPTR; case Instruction::BitCast : return bitc::CAST_BITCAST; + case Instruction::AddrSpaceCast: return bitc::CAST_ADDRSPACECAST; } } @@ -161,6 +159,91 @@ static void WriteStringRecord(unsigned Code, StringRef Str, Stream.EmitRecord(Code, Vals, AbbrevToUse); } +static uint64_t getAttrKindEncoding(Attribute::AttrKind Kind) { + switch (Kind) { + case Attribute::Alignment: + return bitc::ATTR_KIND_ALIGNMENT; + case Attribute::AlwaysInline: + return bitc::ATTR_KIND_ALWAYS_INLINE; + case Attribute::Builtin: + return bitc::ATTR_KIND_BUILTIN; + case Attribute::ByVal: + return bitc::ATTR_KIND_BY_VAL; + case Attribute::Cold: + return bitc::ATTR_KIND_COLD; + case Attribute::InlineHint: + return bitc::ATTR_KIND_INLINE_HINT; + case Attribute::InReg: + return bitc::ATTR_KIND_IN_REG; + case Attribute::MinSize: + return bitc::ATTR_KIND_MIN_SIZE; + case Attribute::Naked: + return bitc::ATTR_KIND_NAKED; + case Attribute::Nest: + return bitc::ATTR_KIND_NEST; + case Attribute::NoAlias: + return bitc::ATTR_KIND_NO_ALIAS; + case Attribute::NoBuiltin: + return bitc::ATTR_KIND_NO_BUILTIN; + case Attribute::NoCapture: + return bitc::ATTR_KIND_NO_CAPTURE; + case Attribute::NoDuplicate: + return bitc::ATTR_KIND_NO_DUPLICATE; + case Attribute::NoImplicitFloat: + return bitc::ATTR_KIND_NO_IMPLICIT_FLOAT; + case Attribute::NoInline: + return bitc::ATTR_KIND_NO_INLINE; + case Attribute::NonLazyBind: + return bitc::ATTR_KIND_NON_LAZY_BIND; + case Attribute::NoRedZone: + return bitc::ATTR_KIND_NO_RED_ZONE; + case Attribute::NoReturn: + return bitc::ATTR_KIND_NO_RETURN; + case Attribute::NoUnwind: + return bitc::ATTR_KIND_NO_UNWIND; + case Attribute::OptimizeForSize: + return bitc::ATTR_KIND_OPTIMIZE_FOR_SIZE; + case Attribute::OptimizeNone: + return bitc::ATTR_KIND_OPTIMIZE_NONE; + case Attribute::ReadNone: + return bitc::ATTR_KIND_READ_NONE; + case Attribute::ReadOnly: + return bitc::ATTR_KIND_READ_ONLY; + case Attribute::Returned: + return bitc::ATTR_KIND_RETURNED; + case Attribute::ReturnsTwice: + return bitc::ATTR_KIND_RETURNS_TWICE; + case Attribute::SExt: + return bitc::ATTR_KIND_S_EXT; + case Attribute::StackAlignment: + return bitc::ATTR_KIND_STACK_ALIGNMENT; + case Attribute::StackProtect: + return bitc::ATTR_KIND_STACK_PROTECT; + case Attribute::StackProtectReq: + return bitc::ATTR_KIND_STACK_PROTECT_REQ; + case Attribute::StackProtectStrong: + return bitc::ATTR_KIND_STACK_PROTECT_STRONG; + case Attribute::StructRet: + return bitc::ATTR_KIND_STRUCT_RET; + case Attribute::SanitizeAddress: + return bitc::ATTR_KIND_SANITIZE_ADDRESS; + case Attribute::SanitizeThread: + return bitc::ATTR_KIND_SANITIZE_THREAD; + case Attribute::SanitizeMemory: + return bitc::ATTR_KIND_SANITIZE_MEMORY; + case Attribute::UWTable: + return bitc::ATTR_KIND_UW_TABLE; + case Attribute::ZExt: + return bitc::ATTR_KIND_Z_EXT; + case Attribute::EndAttrKinds: + llvm_unreachable("Can not encode end-attribute kinds marker."); + case Attribute::None: + llvm_unreachable("Can not encode none-attribute."); + } + + llvm_unreachable("Trying to encode unknown attribute"); +} + static void WriteAttributeGroupTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { const std::vector &AttrGrps = VE.getAttributeGroups(); @@ -182,10 +265,10 @@ static void WriteAttributeGroupTable(const ValueEnumerator &VE, Attribute Attr = *I; if (Attr.isEnumAttribute()) { Record.push_back(0); - Record.push_back(Attr.getKindAsEnum()); + Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); } else if (Attr.isAlignAttribute()) { Record.push_back(1); - Record.push_back(Attr.getKindAsEnum()); + Record.push_back(getAttrKindEncoding(Attr.getKindAsEnum())); Record.push_back(Attr.getValueAsInt()); } else { StringRef Kind = Attr.getKindAsString(); @@ -407,7 +490,6 @@ static unsigned getEncodedLinkage(const GlobalValue *GV) { case GlobalValue::AvailableExternallyLinkage: return 12; case GlobalValue::LinkerPrivateLinkage: return 13; case GlobalValue::LinkerPrivateWeakLinkage: return 14; - case GlobalValue::LinkOnceODRAutoHideLinkage: return 15; } llvm_unreachable("Invalid linkage"); } @@ -524,7 +606,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, // GLOBALVAR: [type, isconst, initid, // linkage, alignment, section, visibility, threadlocal, - // unnamed_addr] + // unnamed_addr, externally_initialized] Vals.push_back(VE.getTypeID(GV->getType())); Vals.push_back(GV->isConstant()); Vals.push_back(GV->isDeclaration() ? 0 : @@ -550,7 +632,7 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, // Emit the function proto information. for (Module::const_iterator F = M->begin(), E = M->end(); F != E; ++F) { // FUNCTION: [type, callingconv, isproto, linkage, paramattrs, alignment, - // section, visibility, gc, unnamed_addr] + // section, visibility, gc, unnamed_addr, prefix] Vals.push_back(VE.getTypeID(F->getType())); Vals.push_back(F->getCallingConv()); Vals.push_back(F->isDeclaration()); @@ -561,6 +643,8 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, Vals.push_back(getEncodedVisibility(F)); Vals.push_back(F->hasGC() ? GCMap[F->getGC()] : 0); Vals.push_back(F->hasUnnamedAddr()); + Vals.push_back(F->hasPrefixData() ? (VE.getValueID(F->getPrefixData()) + 1) + : 0); unsigned AbbrevToUse = 0; Stream.EmitRecord(bitc::MODULE_CODE_FUNCTION, Vals, AbbrevToUse); @@ -614,7 +698,7 @@ static uint64_t GetOptimizationFlags(const Value *V) { static void WriteMDNode(const MDNode *N, const ValueEnumerator &VE, BitstreamWriter &Stream, - SmallVector &Record) { + SmallVectorImpl &Record) { for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { if (N->getOperand(i)) { Record.push_back(VE.getTypeID(N->getOperand(i)->getType())); @@ -701,7 +785,7 @@ static void WriteFunctionLocalMetadata(const Function &F, BitstreamWriter &Stream) { bool StartedMetadataBlock = false; SmallVector Record; - const SmallVector &Vals = VE.getFunctionLocalMDValues(); + const SmallVectorImpl &Vals = VE.getFunctionLocalMDValues(); for (unsigned i = 0, e = Vals.size(); i != e; ++i) if (const MDNode *N = Vals[i]) if (N->isFunctionLocal() && N->getFunction() == &F) { @@ -780,34 +864,6 @@ static void emitSignedInt64(SmallVectorImpl &Vals, uint64_t V) { Vals.push_back((-V << 1) | 1); } -static void EmitAPInt(SmallVectorImpl &Vals, - unsigned &Code, unsigned &AbbrevToUse, const APInt &Val, - bool EmitSizeForWideNumbers = false - ) { - if (Val.getBitWidth() <= 64) { - uint64_t V = Val.getSExtValue(); - emitSignedInt64(Vals, V); - Code = bitc::CST_CODE_INTEGER; - AbbrevToUse = CONSTANTS_INTEGER_ABBREV; - } else { - // Wide integers, > 64 bits in size. - // We have an arbitrary precision integer value to write whose - // bit width is > 64. However, in canonical unsigned integer - // format it is likely that the high bits are going to be zero. - // So, we only write the number of active words. - unsigned NWords = Val.getActiveWords(); - - if (EmitSizeForWideNumbers) - Vals.push_back(NWords); - - const uint64_t *RawWords = Val.getRawData(); - for (unsigned i = 0; i != NWords; ++i) { - emitSignedInt64(Vals, RawWords[i]); - } - Code = bitc::CST_CODE_WIDE_INTEGER; - } -} - static void WriteConstants(unsigned FirstVal, unsigned LastVal, const ValueEnumerator &VE, BitstreamWriter &Stream, bool isGlobal) { @@ -891,7 +947,23 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, } else if (isa(C)) { Code = bitc::CST_CODE_UNDEF; } else if (const ConstantInt *IV = dyn_cast(C)) { - EmitAPInt(Record, Code, AbbrevToUse, IV->getValue()); + if (IV->getBitWidth() <= 64) { + uint64_t V = IV->getSExtValue(); + emitSignedInt64(Record, V); + Code = bitc::CST_CODE_INTEGER; + AbbrevToUse = CONSTANTS_INTEGER_ABBREV; + } else { // Wide integers, > 64 bits in size. + // We have an arbitrary precision integer value to write whose + // bit width is > 64. However, in canonical unsigned integer + // format it is likely that the high bits are going to be zero. + // So, we only write the number of active words. + unsigned NWords = IV->getValue().getActiveWords(); + const uint64_t *RawWords = IV->getValue().getRawData(); + for (unsigned i = 0; i != NWords; ++i) { + emitSignedInt64(Record, RawWords[i]); + } + Code = bitc::CST_CODE_WIDE_INTEGER; + } } else if (const ConstantFP *CFP = dyn_cast(C)) { Code = bitc::CST_CODE_FLOAT; Type *Ty = CFP->getType(); @@ -1078,7 +1150,7 @@ static void WriteModuleConstants(const ValueEnumerator &VE, /// instruction ID, then it is a forward reference, and it also includes the /// type ID. The value ID that is written is encoded relative to the InstID. static bool PushValueAndType(const Value *V, unsigned InstID, - SmallVector &Vals, + SmallVectorImpl &Vals, ValueEnumerator &VE) { unsigned ValID = VE.getValueID(V); // Make encoding relative to the InstID. @@ -1093,21 +1165,14 @@ static bool PushValueAndType(const Value *V, unsigned InstID, /// pushValue - Like PushValueAndType, but where the type of the value is /// omitted (perhaps it was already encoded in an earlier operand). static void pushValue(const Value *V, unsigned InstID, - SmallVector &Vals, + SmallVectorImpl &Vals, ValueEnumerator &VE) { unsigned ValID = VE.getValueID(V); Vals.push_back(InstID - ValID); } -static void pushValue64(const Value *V, unsigned InstID, - SmallVector &Vals, - ValueEnumerator &VE) { - uint64_t ValID = VE.getValueID(V); - Vals.push_back(InstID - ValID); -} - static void pushValueSigned(const Value *V, unsigned InstID, - SmallVector &Vals, + SmallVectorImpl &Vals, ValueEnumerator &VE) { unsigned ValID = VE.getValueID(V); int64_t diff = ((int32_t)InstID - (int32_t)ValID); @@ -1117,7 +1182,7 @@ static void pushValueSigned(const Value *V, unsigned InstID, /// WriteInstruction - Emit an instruction to the specified stream. static void WriteInstruction(const Instruction &I, unsigned InstID, ValueEnumerator &VE, BitstreamWriter &Stream, - SmallVector &Vals) { + SmallVectorImpl &Vals) { unsigned Code = 0; unsigned AbbrevToUse = 0; VE.setInstructionID(&I); @@ -1229,63 +1294,16 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, break; case Instruction::Switch: { - // Redefine Vals, since here we need to use 64 bit values - // explicitly to store large APInt numbers. - SmallVector Vals64; - Code = bitc::FUNC_CODE_INST_SWITCH; const SwitchInst &SI = cast(I); - - uint32_t SwitchRecordHeader = SI.hash() | (SWITCH_INST_MAGIC << 16); - Vals64.push_back(SwitchRecordHeader); - - Vals64.push_back(VE.getTypeID(SI.getCondition()->getType())); - pushValue64(SI.getCondition(), InstID, Vals64, VE); - Vals64.push_back(VE.getValueID(SI.getDefaultDest())); - Vals64.push_back(SI.getNumCases()); + Vals.push_back(VE.getTypeID(SI.getCondition()->getType())); + pushValue(SI.getCondition(), InstID, Vals, VE); + Vals.push_back(VE.getValueID(SI.getDefaultDest())); for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { - const IntegersSubset& CaseRanges = i.getCaseValueEx(); - unsigned Code, Abbrev; // will unused. - - if (CaseRanges.isSingleNumber()) { - Vals64.push_back(1/*NumItems = 1*/); - Vals64.push_back(true/*IsSingleNumber = true*/); - EmitAPInt(Vals64, Code, Abbrev, CaseRanges.getSingleNumber(0), true); - } else { - - Vals64.push_back(CaseRanges.getNumItems()); - - if (CaseRanges.isSingleNumbersOnly()) { - for (unsigned ri = 0, rn = CaseRanges.getNumItems(); - ri != rn; ++ri) { - - Vals64.push_back(true/*IsSingleNumber = true*/); - - EmitAPInt(Vals64, Code, Abbrev, - CaseRanges.getSingleNumber(ri), true); - } - } else - for (unsigned ri = 0, rn = CaseRanges.getNumItems(); - ri != rn; ++ri) { - IntegersSubset::Range r = CaseRanges.getItem(ri); - bool IsSingleNumber = CaseRanges.isSingleNumber(ri); - - Vals64.push_back(IsSingleNumber); - - EmitAPInt(Vals64, Code, Abbrev, r.getLow(), true); - if (!IsSingleNumber) - EmitAPInt(Vals64, Code, Abbrev, r.getHigh(), true); - } - } - Vals64.push_back(VE.getValueID(i.getCaseSuccessor())); + Vals.push_back(VE.getValueID(i.getCaseValue())); + Vals.push_back(VE.getValueID(i.getCaseSuccessor())); } - - Stream.EmitRecord(Code, Vals64, AbbrevToUse); - - // Also do expected action - clear external Vals collection: - Vals.clear(); - return; } break; case Instruction::IndirectBr: @@ -1847,6 +1865,8 @@ static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE, WriteUseList(FI, VE, Stream); if (!FI->isDeclaration()) WriteFunctionUseList(FI, VE, Stream); + if (FI->hasPrefixData()) + WriteUseList(FI->getPrefixData(), VE, Stream); } // Write the aliases. diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp index 8bac6da..a164104 100644 --- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -60,6 +60,11 @@ ValueEnumerator::ValueEnumerator(const Module *M) { I != E; ++I) EnumerateValue(I->getAliasee()); + // Enumerate the prefix data constants. + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) + if (I->hasPrefixData()) + EnumerateValue(I->getPrefixData()); + // Insert constants and metadata that are named at module level into the slot // pool so that the module symbol table can refer to them... EnumerateValueSymbolTable(M->getValueSymbolTable()); diff --git a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h index 0af6164c..d1ca15f 100644 --- a/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h +++ b/contrib/llvm/lib/Bitcode/Writer/ValueEnumerator.h @@ -125,7 +125,7 @@ public: const ValueList &getValues() const { return Values; } const ValueList &getMDValues() const { return MDValues; } - const SmallVector &getFunctionLocalMDValues() const { + const SmallVectorImpl &getFunctionLocalMDValues() const { return FunctionLocalMDs; } const TypeList &getTypes() const { return Types; } diff --git a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp index c50f8b5..2ee7767 100644 --- a/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -247,8 +247,8 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI, if ((MO.isDef() && MI->isRegTiedToUseOperand(i)) || IsImplicitDefUse(MI, MO)) { const unsigned Reg = MO.getReg(); - PassthruRegs.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) PassthruRegs.insert(*SubRegs); } } @@ -782,7 +782,7 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies( if (MI == CriticalPathMI) { CriticalPathSU = CriticalPathStep(CriticalPathSU); CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0; - } else { + } else if (CriticalPathSet.any()) { ExcludeRegs = &CriticalPathSet; } diff --git a/contrib/llvm/lib/CodeGen/Analysis.cpp b/contrib/llvm/lib/CodeGen/Analysis.cpp index 4731af5..1600c67 100644 --- a/contrib/llvm/lib/CodeGen/Analysis.cpp +++ b/contrib/llvm/lib/CodeGen/Analysis.cpp @@ -202,161 +202,272 @@ ISD::CondCode llvm::getICmpCondCode(ICmpInst::Predicate Pred) { } static bool isNoopBitcast(Type *T1, Type *T2, - const TargetLowering& TLI) { + const TargetLoweringBase& TLI) { return T1 == T2 || (T1->isPointerTy() && T2->isPointerTy()) || (isa(T1) && isa(T2) && TLI.isTypeLegal(EVT::getEVT(T1)) && TLI.isTypeLegal(EVT::getEVT(T2))); } -/// sameNoopInput - Return true if V1 == V2, else if either V1 or V2 is a noop -/// (i.e., lowers to no machine code), look through it (and any transitive noop -/// operands to it) and check if it has the same noop input value. This is -/// used to determine if a tail call can be formed. -static bool sameNoopInput(const Value *V1, const Value *V2, - SmallVectorImpl &Els1, - SmallVectorImpl &Els2, - const TargetLowering &TLI) { - using std::swap; - bool swapParity = false; - bool equalEls = Els1 == Els2; +/// Look through operations that will be free to find the earliest source of +/// this value. +/// +/// @param ValLoc If V has aggegate type, we will be interested in a particular +/// scalar component. This records its address; the reverse of this list gives a +/// sequence of indices appropriate for an extractvalue to locate the important +/// value. This value is updated during the function and on exit will indicate +/// similar information for the Value returned. +/// +/// @param DataBits If this function looks through truncate instructions, this +/// will record the smallest size attained. +static const Value *getNoopInput(const Value *V, + SmallVectorImpl &ValLoc, + unsigned &DataBits, + const TargetLoweringBase &TLI) { while (true) { - if ((equalEls && V1 == V2) || isa(V1) || isa(V2)) { - if (swapParity) - // Revert to original Els1 and Els2 to avoid confusing recursive calls - swap(Els1, Els2); - return true; - } - // Try to look through V1; if V1 is not an instruction, it can't be looked // through. - const Instruction *I = dyn_cast(V1); + const Instruction *I = dyn_cast(V); + if (!I || I->getNumOperands() == 0) return V; const Value *NoopInput = 0; - if (I != 0 && I->getNumOperands() > 0) { - Value *Op = I->getOperand(0); - if (isa(I)) { - // Look through truly no-op truncates. - if (TLI.isTruncateFree(Op->getType(), I->getType())) - NoopInput = Op; - } else if (isa(I)) { - // Look through truly no-op bitcasts. - if (isNoopBitcast(Op->getType(), I->getType(), TLI)) - NoopInput = Op; - } else if (isa(I)) { - // Look through getelementptr - if (cast(I)->hasAllZeroIndices()) - NoopInput = Op; - } else if (isa(I)) { - // Look through inttoptr. - // Make sure this isn't a truncating or extending cast. We could - // support this eventually, but don't bother for now. - if (!isa(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast(Op->getType())->getBitWidth()) - NoopInput = Op; - } else if (isa(I)) { - // Look through ptrtoint. - // Make sure this isn't a truncating or extending cast. We could - // support this eventually, but don't bother for now. - if (!isa(I->getType()) && - TLI.getPointerTy().getSizeInBits() == - cast(I->getType())->getBitWidth()) - NoopInput = Op; - } else if (isa(I)) { - // Look through call - for (User::const_op_iterator i = I->op_begin(), - // Skip Callee - e = I->op_end() - 1; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } + + Value *Op = I->getOperand(0); + if (isa(I)) { + // Look through truly no-op bitcasts. + if (isNoopBitcast(Op->getType(), I->getType(), TLI)) + NoopInput = Op; + } else if (isa(I)) { + // Look through getelementptr + if (cast(I)->hasAllZeroIndices()) + NoopInput = Op; + } else if (isa(I)) { + // Look through inttoptr. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast(Op->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa(I)) { + // Look through ptrtoint. + // Make sure this isn't a truncating or extending cast. We could + // support this eventually, but don't bother for now. + if (!isa(I->getType()) && + TLI.getPointerTy().getSizeInBits() == + cast(I->getType())->getBitWidth()) + NoopInput = Op; + } else if (isa(I) && + TLI.allowTruncateForTailCall(Op->getType(), I->getType())) { + DataBits = std::min(DataBits, I->getType()->getPrimitiveSizeInBits()); + NoopInput = Op; + } else if (isa(I)) { + // Look through call (skipping callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 1; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; } - } else if (isa(I)) { - // Look through invoke - for (User::const_op_iterator i = I->op_begin(), - // Skip BB, BB, Callee - e = I->op_end() - 3; - i != e; ++i) { - unsigned attrInd = i - I->op_begin() + 1; - if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && - isNoopBitcast((*i)->getType(), I->getType(), TLI)) { - NoopInput = *i; - break; - } + } + } else if (isa(I)) { + // Look through invoke (skipping BB, BB, Callee) + for (User::const_op_iterator i = I->op_begin(), e = I->op_end() - 3; + i != e; ++i) { + unsigned attrInd = i - I->op_begin() + 1; + if (cast(I)->paramHasAttr(attrInd, Attribute::Returned) && + isNoopBitcast((*i)->getType(), I->getType(), TLI)) { + NoopInput = *i; + break; } } + } else if (const InsertValueInst *IVI = dyn_cast(V)) { + // Value may come from either the aggregate or the scalar + ArrayRef InsertLoc = IVI->getIndices(); + if (std::equal(InsertLoc.rbegin(), InsertLoc.rend(), + ValLoc.rbegin())) { + // The type being inserted is a nested sub-type of the aggregate; we + // have to remove those initial indices to get the location we're + // interested in for the operand. + ValLoc.resize(ValLoc.size() - InsertLoc.size()); + NoopInput = IVI->getInsertedValueOperand(); + } else { + // The struct we're inserting into has the value we're interested in, no + // change of address. + NoopInput = Op; + } + } else if (const ExtractValueInst *EVI = dyn_cast(V)) { + // The part we're interested in will inevitably be some sub-section of the + // previous aggregate. Combine the two paths to obtain the true address of + // our element. + ArrayRef ExtractLoc = EVI->getIndices(); + std::copy(ExtractLoc.rbegin(), ExtractLoc.rend(), + std::back_inserter(ValLoc)); + NoopInput = Op; } + // Terminate if we couldn't find anything to look through. + if (!NoopInput) + return V; - if (NoopInput) { - V1 = NoopInput; - continue; - } + V = NoopInput; + } +} + +/// Return true if this scalar return value only has bits discarded on its path +/// from the "tail call" to the "ret". This includes the obvious noop +/// instructions handled by getNoopInput above as well as free truncations (or +/// extensions prior to the call). +static bool slotOnlyDiscardsData(const Value *RetVal, const Value *CallVal, + SmallVectorImpl &RetIndices, + SmallVectorImpl &CallIndices, + bool AllowDifferingSizes, + const TargetLoweringBase &TLI) { + + // Trace the sub-value needed by the return value as far back up the graph as + // possible, in the hope that it will intersect with the value produced by the + // call. In the simple case with no "returned" attribute, the hope is actually + // that we end up back at the tail call instruction itself. + unsigned BitsRequired = UINT_MAX; + RetVal = getNoopInput(RetVal, RetIndices, BitsRequired, TLI); + + // If this slot in the value returned is undef, it doesn't matter what the + // call puts there, it'll be fine. + if (isa(RetVal)) + return true; - // If we already swapped, avoid infinite loop - if (swapParity) - break; + // Now do a similar search up through the graph to find where the value + // actually returned by the "tail call" comes from. In the simple case without + // a "returned" attribute, the search will be blocked immediately and the loop + // a Noop. + unsigned BitsProvided = UINT_MAX; + CallVal = getNoopInput(CallVal, CallIndices, BitsProvided, TLI); + + // There's no hope if we can't actually trace them to (the same part of!) the + // same value. + if (CallVal != RetVal || CallIndices != RetIndices) + return false; + + // However, intervening truncates may have made the call non-tail. Make sure + // all the bits that are needed by the "ret" have been provided by the "tail + // call". FIXME: with sufficiently cunning bit-tracking, we could look through + // extensions too. + if (BitsProvided < BitsRequired || + (!AllowDifferingSizes && BitsProvided != BitsRequired)) + return false; - // Otherwise, swap V1<->V2, Els1<->Els2 - swap(V1, V2); - swap(Els1, Els2); - swapParity = !swapParity; + return true; +} + +/// For an aggregate type, determine whether a given index is within bounds or +/// not. +static bool indexReallyValid(CompositeType *T, unsigned Idx) { + if (ArrayType *AT = dyn_cast(T)) + return Idx < AT->getNumElements(); + + return Idx < cast(T)->getNumElements(); +} + +/// Move the given iterators to the next leaf type in depth first traversal. +/// +/// Performs a depth-first traversal of the type as specified by its arguments, +/// stopping at the next leaf node (which may be a legitimate scalar type or an +/// empty struct or array). +/// +/// @param SubTypes List of the partial components making up the type from +/// outermost to innermost non-empty aggregate. The element currently +/// represented is SubTypes.back()->getTypeAtIndex(Path.back() - 1). +/// +/// @param Path Set of extractvalue indices leading from the outermost type +/// (SubTypes[0]) to the leaf node currently represented. +/// +/// @returns true if a new type was found, false otherwise. Calling this +/// function again on a finished iterator will repeatedly return +/// false. SubTypes.back()->getTypeAtIndex(Path.back()) is either an empty +/// aggregate or a non-aggregate +static bool advanceToNextLeafType(SmallVectorImpl &SubTypes, + SmallVectorImpl &Path) { + // First march back up the tree until we can successfully increment one of the + // coordinates in Path. + while (!Path.empty() && !indexReallyValid(SubTypes.back(), Path.back() + 1)) { + Path.pop_back(); + SubTypes.pop_back(); } - for (unsigned n = 0; n < 2; ++n) { - if (isa(V1)) { - if (isa(V1->getType())) { - // Look through insertvalue - unsigned i, e; - for (i = 0, e = cast(V1->getType())->getNumElements(); - i != e; ++i) { - const Value *InScalar = FindInsertedValue(const_cast(V1), i); - if (InScalar == 0) - break; - Els1.push_back(i); - if (!sameNoopInput(InScalar, V2, Els1, Els2, TLI)) { - Els1.pop_back(); - break; - } - Els1.pop_back(); - } - if (i == e) { - if (swapParity) - swap(Els1, Els2); - return true; - } - } - } else if (!Els1.empty() && isa(V1)) { - const ExtractValueInst *EVI = cast(V1); - unsigned i = Els1.back(); - // If the scalar value being inserted is an extractvalue of the right - // index from the call, then everything is good. - if (isa(EVI->getOperand(0)->getType()) && - EVI->getNumIndices() == 1 && EVI->getIndices()[0] == i) { - // Look through extractvalue - Els1.pop_back(); - if (sameNoopInput(EVI->getOperand(0), V2, Els1, Els2, TLI)) { - Els1.push_back(i); - if (swapParity) - swap(Els1, Els2); - return true; - } - Els1.push_back(i); - } - } + // If we reached the top, then the iterator is done. + if (Path.empty()) + return false; - swap(V1, V2); - swap(Els1, Els2); - swapParity = !swapParity; + // We know there's *some* valid leaf now, so march back down the tree picking + // out the left-most element at each node. + ++Path.back(); + Type *DeeperType = SubTypes.back()->getTypeAtIndex(Path.back()); + while (DeeperType->isAggregateType()) { + CompositeType *CT = cast(DeeperType); + if (!indexReallyValid(CT, 0)) + return true; + + SubTypes.push_back(CT); + Path.push_back(0); + + DeeperType = CT->getTypeAtIndex(0U); } - if (swapParity) - swap(Els1, Els2); - return false; + return true; } +/// Find the first non-empty, scalar-like type in Next and setup the iterator +/// components. +/// +/// Assuming Next is an aggregate of some kind, this function will traverse the +/// tree from left to right (i.e. depth-first) looking for the first +/// non-aggregate type which will play a role in function return. +/// +/// For example, if Next was {[0 x i64], {{}, i32, {}}, i32} then we would setup +/// Path as [1, 1] and SubTypes as [Next, {{}, i32, {}}] to represent the first +/// i32 in that type. +static bool firstRealType(Type *Next, + SmallVectorImpl &SubTypes, + SmallVectorImpl &Path) { + // First initialise the iterator components to the first "leaf" node + // (i.e. node with no valid sub-type at any index, so {} does count as a leaf + // despite nominally being an aggregate). + while (Next->isAggregateType() && + indexReallyValid(cast(Next), 0)) { + SubTypes.push_back(cast(Next)); + Path.push_back(0); + Next = cast(Next)->getTypeAtIndex(0U); + } + + // If there's no Path now, Next was originally scalar already (or empty + // leaf). We're done. + if (Path.empty()) + return true; + + // Otherwise, use normal iteration to keep looking through the tree until we + // find a non-aggregate type. + while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()) { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + } + + return true; +} + +/// Set the iterator data-structures to the next non-empty, non-aggregate +/// subtype. +static bool nextRealType(SmallVectorImpl &SubTypes, + SmallVectorImpl &Path) { + do { + if (!advanceToNextLeafType(SubTypes, Path)) + return false; + + assert(!Path.empty() && "found a leaf but didn't set the path?"); + } while (SubTypes.back()->getTypeAtIndex(Path.back())->isAggregateType()); + + return true; +} + + /// Test if the given instruction is in a position to be optimized /// with a tail-call. This roughly means that it's in a block with /// a return and there's nothing that needs to be scheduled @@ -399,6 +510,13 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, return false; } + return returnTypeIsEligibleForTailCall(ExitBB->getParent(), I, Ret, TLI); +} + +bool llvm::returnTypeIsEligibleForTailCall(const Function *F, + const Instruction *I, + const ReturnInst *Ret, + const TargetLoweringBase &TLI) { // If the block ends with a void return or unreachable, it doesn't matter // what the call's return type is. if (!Ret || Ret->getNumOperands() == 0) return true; @@ -407,22 +525,85 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, // return type is. if (isa(Ret->getOperand(0))) return true; - // Conservatively require the attributes of the call to match those of - // the return. Ignore noalias because it doesn't affect the call sequence. - const Function *F = ExitBB->getParent(); - AttributeSet CallerAttrs = F->getAttributes(); - if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). - removeAttribute(Attribute::NoAlias) != - AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). - removeAttribute(Attribute::NoAlias)) - return false; + // Make sure the attributes attached to each return are compatible. + AttrBuilder CallerAttrs(F->getAttributes(), + AttributeSet::ReturnIndex); + AttrBuilder CalleeAttrs(cast(I)->getAttributes(), + AttributeSet::ReturnIndex); + + // Noalias is completely benign as far as calling convention goes, it + // shouldn't affect whether the call is a tail call. + CallerAttrs = CallerAttrs.removeAttribute(Attribute::NoAlias); + CalleeAttrs = CalleeAttrs.removeAttribute(Attribute::NoAlias); + + bool AllowDifferingSizes = true; + if (CallerAttrs.contains(Attribute::ZExt)) { + if (!CalleeAttrs.contains(Attribute::ZExt)) + return false; - // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || - CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::ZExt); + CalleeAttrs.removeAttribute(Attribute::ZExt); + } else if (CallerAttrs.contains(Attribute::SExt)) { + if (!CalleeAttrs.contains(Attribute::SExt)) + return false; + + AllowDifferingSizes = false; + CallerAttrs.removeAttribute(Attribute::SExt); + CalleeAttrs.removeAttribute(Attribute::SExt); + } + + // If they're still different, there's some facet we don't understand + // (currently only "inreg", but in future who knows). It may be OK but the + // only safe option is to reject the tail call. + if (CallerAttrs != CalleeAttrs) return false; - // Otherwise, make sure the return value and I have the same value - SmallVector Els1, Els2; - return sameNoopInput(Ret->getOperand(0), I, Els1, Els2, TLI); + const Value *RetVal = Ret->getOperand(0), *CallVal = I; + SmallVector RetPath, CallPath; + SmallVector RetSubTypes, CallSubTypes; + + bool RetEmpty = !firstRealType(RetVal->getType(), RetSubTypes, RetPath); + bool CallEmpty = !firstRealType(CallVal->getType(), CallSubTypes, CallPath); + + // Nothing's actually returned, it doesn't matter what the callee put there + // it's a valid tail call. + if (RetEmpty) + return true; + + // Iterate pairwise through each of the value types making up the tail call + // and the corresponding return. For each one we want to know whether it's + // essentially going directly from the tail call to the ret, via operations + // that end up not generating any code. + // + // We allow a certain amount of covariance here. For example it's permitted + // for the tail call to define more bits than the ret actually cares about + // (e.g. via a truncate). + do { + if (CallEmpty) { + // We've exhausted the values produced by the tail call instruction, the + // rest are essentially undef. The type doesn't really matter, but we need + // *something*. + Type *SlotType = RetSubTypes.back()->getTypeAtIndex(RetPath.back()); + CallVal = UndefValue::get(SlotType); + } + + // The manipulations performed when we're looking through an insertvalue or + // an extractvalue would happen at the front of the RetPath list, so since + // we have to copy it anyway it's more efficient to create a reversed copy. + using std::copy; + SmallVector TmpRetPath, TmpCallPath; + copy(RetPath.rbegin(), RetPath.rend(), std::back_inserter(TmpRetPath)); + copy(CallPath.rbegin(), CallPath.rend(), std::back_inserter(TmpCallPath)); + + // Finally, we can check whether the value produced by the tail call at this + // index is compatible with the value we return. + if (!slotOnlyDiscardsData(RetVal, CallVal, TmpRetPath, TmpCallPath, + AllowDifferingSizes, TLI)) + return false; + + CallEmpty = !nextRealType(CallSubTypes, CallPath); + } while(nextRealType(RetSubTypes, RetPath)); + + return true; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp index 188047d..5d82dd9 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -47,13 +47,18 @@ ARMException::ARMException(AsmPrinter *A) ARMException::~ARMException() {} +ARMTargetStreamer &ARMException::getTargetStreamer() { + MCTargetStreamer &TS = Asm->OutStreamer.getTargetStreamer(); + return static_cast(TS); +} + void ARMException::EndModule() { } /// BeginFunction - Gather pre-function exception information. Assumes it's /// being emitted immediately after the function entry point. void ARMException::BeginFunction(const MachineFunction *MF) { - Asm->OutStreamer.EmitFnStart(); + getTargetStreamer().emitFnStart(); if (Asm->MF->getFunction()->needsUnwindTableEntry()) Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", Asm->getFunctionNumber())); @@ -62,8 +67,9 @@ void ARMException::BeginFunction(const MachineFunction *MF) { /// EndFunction - Gather and emit post-function exception information. /// void ARMException::EndFunction() { + ARMTargetStreamer &ATS = getTargetStreamer(); if (!Asm->MF->getFunction()->needsUnwindTableEntry()) - Asm->OutStreamer.EmitCantUnwind(); + ATS.emitCantUnwind(); else { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); @@ -76,13 +82,13 @@ void ARMException::EndFunction() { // Emit references to personality. if (const Function * Personality = MMI->getPersonalities()[MMI->getPersonalityIndex()]) { - MCSymbol *PerSym = Asm->Mang->getSymbol(Personality); + MCSymbol *PerSym = Asm->getSymbol(Personality); Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); - Asm->OutStreamer.EmitPersonality(PerSym); + ATS.emitPersonality(PerSym); } // Emit .handlerdata directive. - Asm->OutStreamer.EmitHandlerData(); + ATS.emitHandlerData(); // Emit actual exception table EmitExceptionTable(); @@ -90,7 +96,7 @@ void ARMException::EndFunction() { } } - Asm->OutStreamer.EmitFnEnd(); + ATS.emitFnEnd(); } void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 84162ac..308b0e0 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -42,16 +42,18 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/Timer.h" #include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" using namespace llvm; -static const char *DWARFGroupName = "DWARF Emission"; -static const char *DbgTimerName = "DWARF Debug Writer"; -static const char *EHTimerName = "DWARF Exception Writer"; +static const char *const DWARFGroupName = "DWARF Emission"; +static const char *const DbgTimerName = "DWARF Debug Writer"; +static const char *const EHTimerName = "DWARF Exception Writer"; STATISTIC(EmittedInsts, "Number of machine instrs printed"); @@ -93,11 +95,11 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) : MachineFunctionPass(ID), - TM(tm), MAI(tm.getMCAsmInfo()), + TM(tm), MAI(tm.getMCAsmInfo()), MII(tm.getInstrInfo()), OutContext(Streamer.getContext()), OutStreamer(Streamer), LastMI(0), LastFn(0), Counter(~0U), SetCounter(0) { - DD = 0; DE = 0; MMI = 0; LI = 0; + DD = 0; DE = 0; MMI = 0; LI = 0; MF = 0; CurrentFnSym = CurrentFnSymForSize = 0; GCMetadataPrinters = 0; VerboseAsm = Streamer.isVerboseAsm(); @@ -154,8 +156,6 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { } bool AsmPrinter::doInitialization(Module &M) { - OutStreamer.InitStreamer(); - MMI = getAnalysisIfAvailable(); MMI->AnalyzeModule(M); @@ -163,7 +163,9 @@ bool AsmPrinter::doInitialization(Module &M) { const_cast(getObjFileLowering()) .Initialize(OutContext, TM); - Mang = new Mangler(OutContext, *TM.getDataLayout()); + OutStreamer.InitStreamer(); + + Mang = new Mangler(&TM); // Allow the target to emit any magic that it wants at the start of the file. EmitStartOfAsmFile(M); @@ -211,12 +213,12 @@ bool AsmPrinter::doInitialization(Module &M) { llvm_unreachable("Unknown exception type."); } -void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { - switch ((GlobalValue::LinkageTypes)Linkage) { +void AsmPrinter::EmitLinkage(const GlobalValue *GV, MCSymbol *GVSym) const { + GlobalValue::LinkageTypes Linkage = GV->getLinkage(); + switch (Linkage) { case GlobalValue::CommonLinkage: case GlobalValue::LinkOnceAnyLinkage: case GlobalValue::LinkOnceODRLinkage: - case GlobalValue::LinkOnceODRAutoHideLinkage: case GlobalValue::WeakAnyLinkage: case GlobalValue::WeakODRLinkage: case GlobalValue::LinkerPrivateWeakLinkage: @@ -224,8 +226,19 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - if ((GlobalValue::LinkageTypes)Linkage != - GlobalValue::LinkOnceODRAutoHideLinkage) + bool CanBeHidden = false; + + if (Linkage == GlobalValue::LinkOnceODRLinkage) { + if (GV->hasUnnamedAddr()) { + CanBeHidden = true; + } else { + GlobalStatus GS; + if (!GlobalStatus::analyzeGlobal(GV, GS) && !GS.IsCompared) + CanBeHidden = true; + } + } + + if (!CanBeHidden) // .weak_definition _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_WeakDefinition); else @@ -238,7 +251,7 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // .weak _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Weak); } - break; + return; case GlobalValue::DLLExportLinkage: case GlobalValue::AppendingLinkage: // FIXME: appending linkage variables should go into a section of @@ -247,16 +260,23 @@ void AsmPrinter::EmitLinkage(unsigned Linkage, MCSymbol *GVSym) const { // If external or appending, declare as a global symbol. // .globl _foo OutStreamer.EmitSymbolAttribute(GVSym, MCSA_Global); - break; + return; case GlobalValue::PrivateLinkage: case GlobalValue::InternalLinkage: case GlobalValue::LinkerPrivateLinkage: - break; - default: - llvm_unreachable("Unknown linkage type!"); + return; + case GlobalValue::AvailableExternallyLinkage: + llvm_unreachable("Should never emit this"); + case GlobalValue::DLLImportLinkage: + case GlobalValue::ExternalWeakLinkage: + llvm_unreachable("Don't know how to emit these"); } + llvm_unreachable("Unknown linkage type!"); } +MCSymbol *AsmPrinter::getSymbol(const GlobalValue *GV) const { + return getObjFileLowering().getSymbol(*Mang, GV); +} /// EmitGlobalVariable - Emit the specified global variable to the .s file. void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { @@ -272,7 +292,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { } } - MCSymbol *GVSym = Mang->getSymbol(GV); + MCSymbol *GVSym = getSymbol(GV); EmitVisibility(GVSym, GV->getVisibility(), !GV->isDeclaration()); if (!GV->hasInitializer()) // External globals require no extra code. @@ -283,13 +303,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { SectionKind GVKind = TargetLoweringObjectFile::getKindForGlobal(GV, TM); - const DataLayout *TD = TM.getDataLayout(); - uint64_t Size = TD->getTypeAllocSize(GV->getType()->getElementType()); + const DataLayout *DL = TM.getDataLayout(); + uint64_t Size = DL->getTypeAllocSize(GV->getType()->getElementType()); // If the alignment is specified, we *must* obey it. Overaligning a global // with a specified alignment is a prompt way to break globals emitted to // sections and expected to be contiguous (e.g. ObjC metadata). - unsigned AlignLog = getGVAlignmentLog2(GV, *TD); + unsigned AlignLog = getGVAlignmentLog2(GV, *DL); + + if (DD) + DD->setSymbolSize(GVSym, Size); // Handle common and BSS local symbols (.lcomm). if (GVKind.isCommon() || GVKind.isBSSLocal()) { @@ -367,9 +390,10 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { MCSymbol *MangSym = OutContext.GetOrCreateSymbol(GVSym->getName() + Twine("$tlv$init")); - if (GVKind.isThreadBSS()) + if (GVKind.isThreadBSS()) { + TheSection = getObjFileLowering().getTLSBSSSection(); OutStreamer.EmitTBSSSymbol(TheSection, MangSym, Size, 1 << AlignLog); - else if (GVKind.isThreadData()) { + } else if (GVKind.isThreadData()) { OutStreamer.SwitchSection(TheSection); EmitAlignment(AlignLog, GV); @@ -386,16 +410,16 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.SwitchSection(TLVSect); // Emit the linkage here. - EmitLinkage(GV->getLinkage(), GVSym); + EmitLinkage(GV, GVSym); OutStreamer.EmitLabel(GVSym); // Three pointers in size: // - __tlv_bootstrap - used to make sure support exists // - spare pointer, used when mapped by the runtime // - pointer to mangled symbol above with initializer - unsigned PtrSize = TD->getPointerSizeInBits()/8; + unsigned PtrSize = DL->getPointerTypeSize(GV->getType()); OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), - PtrSize); + PtrSize); OutStreamer.EmitIntValue(0, PtrSize); OutStreamer.EmitSymbolValue(MangSym, PtrSize); @@ -405,7 +429,7 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { OutStreamer.SwitchSection(TheSection); - EmitLinkage(GV->getLinkage(), GVSym); + EmitLinkage(GV, GVSym); EmitAlignment(AlignLog, GV); OutStreamer.EmitLabel(GVSym); @@ -431,7 +455,7 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM)); EmitVisibility(CurrentFnSym, F->getVisibility()); - EmitLinkage(F->getLinkage(), CurrentFnSym); + EmitLinkage(F, CurrentFnSym); EmitAlignment(MF->getAlignment(), F); if (MAI->hasDotTypeDotSizeDirective()) @@ -457,16 +481,6 @@ void AsmPrinter::EmitFunctionHeader() { OutStreamer.EmitLabel(DeadBlockSyms[i]); } - // Add some workaround for linkonce linkage on Cygwin\MinGW. - if (MAI->getLinkOnceDirective() != 0 && - (F->hasLinkOnceLinkage() || F->hasWeakLinkage())) { - // FIXME: What is this? - MCSymbol *FakeStub = - OutContext.GetOrCreateSymbol(Twine("Lllvm$workaround$fake$stub$")+ - CurrentFnSym->getName()); - OutStreamer.EmitLabel(FakeStub); - } - // Emit pre-function debug and/or EH information. if (DE) { NamedRegionTimer T(EHTimerName, DWARFGroupName, TimePassesIsEnabled); @@ -476,6 +490,10 @@ void AsmPrinter::EmitFunctionHeader() { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); DD->beginFunction(MF); } + + // Emit the prefix data. + if (F->hasPrefixData()) + EmitGlobalConstant(F->getPrefixData()); } /// EmitFunctionEntryLabel - Emit the label that is the entrypoint for the @@ -528,11 +546,11 @@ static void emitComments(const MachineInstr &MI, raw_ostream &CommentOS) { /// emitImplicitDef - This method emits the specified machine instruction /// that is an implicit def. -static void emitImplicitDef(const MachineInstr *MI, AsmPrinter &AP) { +void AsmPrinter::emitImplicitDef(const MachineInstr *MI) const { unsigned RegNo = MI->getOperand(0).getReg(); - AP.OutStreamer.AddComment(Twine("implicit-def: ") + - AP.TM.getRegisterInfo()->getName(RegNo)); - AP.OutStreamer.AddBlankLine(); + OutStreamer.AddComment(Twine("implicit-def: ") + + TM.getRegisterInfo()->getName(RegNo)); + OutStreamer.AddBlankLine(); } static void emitKill(const MachineInstr *MI, AsmPrinter &AP) { @@ -562,10 +580,17 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { // cast away const; DIetc do not take const operands for some reason. DIVariable V(const_cast(MI->getOperand(2).getMetadata())); - if (V.getContext().isSubprogram()) - OS << DISubprogram(V.getContext()).getDisplayName() << ":"; + if (V.getContext().isSubprogram()) { + StringRef Name = DISubprogram(V.getContext()).getDisplayName(); + if (!Name.empty()) + OS << Name << ":"; + } OS << V.getName() << " <- "; + // The second operand is only an offset if it's an immediate. + bool Deref = MI->getOperand(0).isReg() && MI->getOperand(1).isImm(); + int64_t Offset = Deref ? MI->getOperand(1).getImm() : 0; + // Register or immediate value. Register 0 means undef. if (MI->getOperand(0).isFPImm()) { APFloat APF = APFloat(MI->getOperand(0).getFPImm()->getValueAPF()); @@ -586,18 +611,31 @@ static bool emitDebugValueComment(const MachineInstr *MI, AsmPrinter &AP) { } else if (MI->getOperand(0).isCImm()) { MI->getOperand(0).getCImm()->getValue().print(OS, false /*isSigned*/); } else { - assert(MI->getOperand(0).isReg() && "Unknown operand type"); - if (MI->getOperand(0).getReg() == 0) { + unsigned Reg; + if (MI->getOperand(0).isReg()) { + Reg = MI->getOperand(0).getReg(); + } else { + assert(MI->getOperand(0).isFI() && "Unknown operand type"); + const TargetFrameLowering *TFI = AP.TM.getFrameLowering(); + Offset += TFI->getFrameIndexReference(*AP.MF, + MI->getOperand(0).getIndex(), Reg); + Deref = true; + } + if (Reg == 0) { // Suppress offset, it is not meaningful here. OS << "undef"; // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer.EmitRawText(OS.str()); return true; } - OS << AP.TM.getRegisterInfo()->getName(MI->getOperand(0).getReg()); + if (Deref) + OS << '['; + OS << AP.TM.getRegisterInfo()->getName(Reg); } - OS << '+' << MI->getOperand(1).getImm(); + if (Deref) + OS << '+' << Offset << ']'; + // NOTE: Want this comment at start of line, don't emit with AddComment. AP.OutStreamer.EmitRawText(OS.str()); return true; @@ -624,7 +662,7 @@ bool AsmPrinter::needsRelocationsForDwarfStringPool() const { } void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { - MCSymbol *Label = MI.getOperand(0).getMCSymbol(); + const MCSymbol *Label = MI.getOperand(0).getMCSymbol(); if (MAI->getExceptionHandlingType() != ExceptionHandling::DwarfCFI) return; @@ -635,14 +673,14 @@ void AsmPrinter::emitPrologLabel(const MachineInstr &MI) { if (MMI->getCompactUnwindEncoding() != 0) OutStreamer.EmitCompactUnwindEncoding(MMI->getCompactUnwindEncoding()); - MachineModuleInfo &MMI = MF->getMMI(); - std::vector &Moves = MMI.getFrameMoves(); + const MachineModuleInfo &MMI = MF->getMMI(); + const std::vector &Instrs = MMI.getFrameInstructions(); bool FoundOne = false; (void)FoundOne; - for (std::vector::iterator I = Moves.begin(), - E = Moves.end(); I != E; ++I) { + for (std::vector::const_iterator I = Instrs.begin(), + E = Instrs.end(); I != E; ++I) { if (I->getLabel() == Label) { - EmitCFIFrameMove(*I); + emitCFIInstruction(*I); FoundOne = true; } } @@ -702,7 +740,7 @@ void AsmPrinter::EmitFunctionBody() { } break; case TargetOpcode::IMPLICIT_DEF: - if (isVerbose()) emitImplicitDef(II, *this); + if (isVerbose()) emitImplicitDef(II); break; case TargetOpcode::KILL: if (isVerbose()) emitKill(II, *this); @@ -790,16 +828,9 @@ void AsmPrinter::EmitFunctionBody() { OutStreamer.AddBlankLine(); } -/// getDebugValueLocation - Get location information encoded by DBG_VALUE -/// operands. -MachineLocation AsmPrinter:: -getDebugValueLocation(const MachineInstr *MI) const { - // Target specific DBG_VALUE instructions are handled by each target. - return MachineLocation(); -} - /// EmitDwarfRegOp - Emit dwarf register operation. -void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { +void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc, + bool Indirect) const { const TargetRegisterInfo *TRI = TM.getRegisterInfo(); int Reg = TRI->getDwarfRegNum(MLoc.getReg(), false); @@ -817,7 +848,7 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { // caller might be in the middle of an dwarf expression. We should // probably assert that Reg >= 0 once debug info generation is more mature. - if (MLoc.isIndirect()) { + if (MLoc.isIndirect() || Indirect) { if (Reg < 32) { OutStreamer.AddComment( dwarf::OperationEncodingString(dwarf::DW_OP_breg0 + Reg)); @@ -828,7 +859,9 @@ void AsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { OutStreamer.AddComment(Twine(Reg)); EmitULEB128(Reg); } - EmitSLEB128(MLoc.getOffset()); + EmitSLEB128(!MLoc.isIndirect() ? 0 : MLoc.getOffset()); + if (MLoc.isIndirect() && Indirect) + EmitInt8(dwarf::DW_OP_deref); } else { if (Reg < 32) { OutStreamer.AddComment( @@ -860,7 +893,7 @@ bool AsmPrinter::doFinalization(Module &M) { if (V == GlobalValue::DefaultVisibility) continue; - MCSymbol *Name = Mang->getSymbol(&F); + MCSymbol *Name = getSymbol(&F); EmitVisibility(Name, V, false); } @@ -870,6 +903,9 @@ bool AsmPrinter::doFinalization(Module &M) { if (!ModuleFlags.empty()) getObjFileLowering().emitModuleFlags(OutStreamer, ModuleFlags, Mang, TM); + // Make sure we wrote out everything we need. + OutStreamer.Flush(); + // Finalize debug and EH information. if (DE) { { @@ -897,12 +933,12 @@ bool AsmPrinter::doFinalization(Module &M) { for (Module::const_global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) { if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); } for (Module::const_iterator I = M.begin(), E = M.end(); I != E; ++I) { if (!I->hasExternalWeakLinkage()) continue; - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(I), MCSA_WeakReference); + OutStreamer.EmitSymbolAttribute(getSymbol(I), MCSA_WeakReference); } } @@ -910,14 +946,19 @@ bool AsmPrinter::doFinalization(Module &M) { OutStreamer.AddBlankLine(); for (Module::const_alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) { - MCSymbol *Name = Mang->getSymbol(I); + MCSymbol *Name = getSymbol(I); const GlobalValue *GV = I->getAliasedGlobal(); - MCSymbol *Target = Mang->getSymbol(GV); + if (GV->isDeclaration()) { + report_fatal_error(Name->getName() + + ": Target doesn't support aliases to declarations"); + } + + MCSymbol *Target = getSymbol(GV); if (I->hasExternalLinkage() || !MAI->getWeakRefDirective()) OutStreamer.EmitSymbolAttribute(Name, MCSA_Global); - else if (I->hasWeakLinkage()) + else if (I->hasWeakLinkage() || I->hasLinkOnceLinkage()) OutStreamer.EmitSymbolAttribute(Name, MCSA_WeakReference); else assert(I->hasLocalLinkage() && "Invalid alias linkage"); @@ -936,6 +977,9 @@ bool AsmPrinter::doFinalization(Module &M) { if (GCMetadataPrinter *MP = GetOrCreateGCPrinter(*--I)) MP->finishAssembly(*this); + // Emit llvm.ident metadata in an '.ident' directive. + EmitModuleIdents(M); + // If we don't have any trampolines, then we don't require stack memory // to be executable. Some targets have a directive to declare this. Function *InitTrampolineIntrinsic = M.getFunction("llvm.init.trampoline"); @@ -959,7 +1003,7 @@ bool AsmPrinter::doFinalization(Module &M) { void AsmPrinter::SetupMachineFunction(MachineFunction &MF) { this->MF = &MF; // Get the function symbol. - CurrentFnSym = Mang->getSymbol(MF.getFunction()); + CurrentFnSym = getSymbol(MF.getFunction()); CurrentFnSymForSize = CurrentFnSym; if (isVerbose()) @@ -1266,16 +1310,10 @@ void AsmPrinter::EmitLLVMUsedList(const ConstantArray *InitList) { const GlobalValue *GV = dyn_cast(InitList->getOperand(i)->stripPointerCasts()); if (GV && getObjFileLowering().shouldEmitUsedDirectiveFor(GV, Mang)) - OutStreamer.EmitSymbolAttribute(Mang->getSymbol(GV), MCSA_NoDeadStrip); + OutStreamer.EmitSymbolAttribute(getSymbol(GV), MCSA_NoDeadStrip); } } -typedef std::pair Structor; - -static bool priority_order(const Structor& lhs, const Structor& rhs) { - return lhs.first < rhs.first; -} - /// EmitXXStructorList - Emit the ctor or dtor list taking into account the init /// priority. void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { @@ -1292,6 +1330,7 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { !isa(ETy->getTypeAtIndex(1U))) return; // Not (int, ptr). // Gather the structors in a form that's convenient for sorting by priority. + typedef std::pair Structor; SmallVector Structors; for (unsigned i = 0, e = InitList->getNumOperands(); i != e; ++i) { ConstantStruct *CS = dyn_cast(InitList->getOperand(i)); @@ -1305,9 +1344,9 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } // Emit the function pointers in the target-specific order - const DataLayout *TD = TM.getDataLayout(); - unsigned Align = Log2_32(TD->getPointerPrefAlignment()); - std::stable_sort(Structors.begin(), Structors.end(), priority_order); + const DataLayout *DL = TM.getDataLayout(); + unsigned Align = Log2_32(DL->getPointerPrefAlignment()); + std::stable_sort(Structors.begin(), Structors.end(), less_first()); for (unsigned i = 0, e = Structors.size(); i != e; ++i) { const MCSection *OutputSection = (isCtor ? @@ -1320,6 +1359,21 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { } } +void AsmPrinter::EmitModuleIdents(Module &M) { + if (!MAI->hasIdentDirective()) + return; + + if (const NamedMDNode *NMD = M.getNamedMetadata("llvm.ident")) { + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + const MDNode *N = NMD->getOperand(i); + assert(N->getNumOperands() == 1 && + "llvm.ident metadata entry can have only one operand"); + const MDString *S = cast(N->getOperand(0)); + OutStreamer.EmitIdent(S->getString()); + } + } +} + //===--------------------------------------------------------------------===// // Emission and print routines // @@ -1385,12 +1439,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, OutContext); if (!MAI->hasSetDirective()) - OutStreamer.EmitValue(Diff, 4); + OutStreamer.EmitValue(Diff, Size); else { // Otherwise, emit with .set (aka assignment). MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, 4); + OutStreamer.EmitSymbolValue(SetLabel, Size); } } @@ -1398,8 +1452,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, /// where the size in bytes of the directive is specified by Size and Label /// specifies the label. This implicitly uses .set if it is available. void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, - unsigned Size) + unsigned Size, bool IsSectionRelative) const { + if (MAI->needsDwarfSectionOffsetDirective() && IsSectionRelative) { + OutStreamer.EmitCOFFSecRel32(Label); + return; + } // Emit Label+Offset (or just Label if Offset is zero) const MCExpr *Expr = MCSymbolRefExpr::Create(Label, OutContext); @@ -1447,7 +1505,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return MCConstantExpr::Create(CI->getZExtValue(), Ctx); if (const GlobalValue *GV = dyn_cast(CV)) - return MCSymbolRefExpr::Create(AP.Mang->getSymbol(GV), Ctx); + return MCSymbolRefExpr::Create(AP.getSymbol(GV), Ctx); if (const BlockAddress *BA = dyn_cast(CV)) return MCSymbolRefExpr::Create(AP.GetBlockAddressSymbol(BA), Ctx); @@ -1477,10 +1535,10 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { report_fatal_error(OS.str()); } case Instruction::GetElementPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address - APInt OffsetAI(TD.getPointerSizeInBits(), 0); - cast(CE)->accumulateConstantOffset(TD, OffsetAI); + APInt OffsetAI(DL.getPointerTypeSizeInBits(CE->getType()), 0); + cast(CE)->accumulateConstantOffset(DL, OffsetAI); const MCExpr *Base = lowerConstant(CE->getOperand(0), AP); if (!OffsetAI) @@ -1501,17 +1559,17 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { return lowerConstant(CE->getOperand(0), AP); case Instruction::IntToPtr: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Handle casts to pointers by changing them into casts to the appropriate // integer type. This promotes constant folding and simplifies this code. Constant *Op = CE->getOperand(0); - Op = ConstantExpr::getIntegerCast(Op, TD.getIntPtrType(CV->getContext()), + Op = ConstantExpr::getIntegerCast(Op, DL.getIntPtrType(CV->getType()), false/*ZExt*/); return lowerConstant(Op, AP); } case Instruction::PtrToInt: { - const DataLayout &TD = *AP.TM.getDataLayout(); + const DataLayout &DL = *AP.TM.getDataLayout(); // Support only foldable casts to/from pointers that can be eliminated by // changing the pointer to the appropriately sized integer type. Constant *Op = CE->getOperand(0); @@ -1521,13 +1579,13 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { // We can emit the pointer value into this slot if the slot is an // integer slot equal to the size of the pointer. - if (TD.getTypeAllocSize(Ty) == TD.getTypeAllocSize(Op->getType())) + if (DL.getTypeAllocSize(Ty) == DL.getTypeAllocSize(Op->getType())) return OpExpr; // Otherwise the pointer is smaller than the resultant integer, mask off // the high bits so we are sure to get a proper truncation if the input is // a constant expr. - unsigned InBits = TD.getTypeAllocSizeInBits(Op->getType()); + unsigned InBits = DL.getTypeAllocSizeInBits(Op->getType()); const MCExpr *MaskExpr = MCConstantExpr::Create(~0ULL >> (64-InBits), Ctx); return MCBinaryExpr::CreateAnd(OpExpr, MaskExpr, Ctx); } @@ -1561,8 +1619,7 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { } } -static void emitGlobalConstantImpl(const Constant *C, unsigned AddrSpace, - AsmPrinter &AP); +static void emitGlobalConstantImpl(const Constant *C, AsmPrinter &AP); /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the @@ -1624,7 +1681,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { } static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, - unsigned AddrSpace,AsmPrinter &AP){ + AsmPrinter &AP){ // See if we can aggregate this into a .fill, if so, emit it as such. int Value = isRepeatedByteSequence(CDS, AP.TM); @@ -1632,12 +1689,12 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CDS->getType()); // Don't emit a 1-byte object as a .fill. if (Bytes > 1) - return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + return AP.OutStreamer.EmitFill(Bytes, Value); } // If this can be emitted with .ascii/.asciz, emit it as such. if (CDS->isString()) - return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace); + return AP.OutStreamer.EmitBytes(CDS->getAsString()); // Otherwise, emit the values in successive locations. unsigned ElementByteSize = CDS->getElementByteSize(); @@ -1647,7 +1704,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", CDS->getElementAsInteger(i)); AP.OutStreamer.EmitIntValue(CDS->getElementAsInteger(i), - ElementByteSize, AddrSpace); + ElementByteSize); } } else if (ElementByteSize == 4) { // FP Constants are printed as integer constants to avoid losing @@ -1662,7 +1719,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, F = CDS->getElementAsFloat(i); if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << "float " << F << '\n'; - AP.OutStreamer.EmitIntValue(I, 4, AddrSpace); + AP.OutStreamer.EmitIntValue(I, 4); } } else { assert(CDS->getElementType()->isDoubleTy()); @@ -1675,78 +1732,74 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, F = CDS->getElementAsDouble(i); if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << "double " << F << '\n'; - AP.OutStreamer.EmitIntValue(I, 8, AddrSpace); + AP.OutStreamer.EmitIntValue(I, 8); } } - const DataLayout &TD = *AP.TM.getDataLayout(); - unsigned Size = TD.getTypeAllocSize(CDS->getType()); - unsigned EmittedSize = TD.getTypeAllocSize(CDS->getType()->getElementType()) * + const DataLayout &DL = *AP.TM.getDataLayout(); + unsigned Size = DL.getTypeAllocSize(CDS->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CDS->getType()->getElementType()) * CDS->getNumElements(); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer.EmitZeros(Padding, AddrSpace); + AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantArray(const ConstantArray *CA, unsigned AddrSpace, - AsmPrinter &AP) { +static void emitGlobalConstantArray(const ConstantArray *CA, AsmPrinter &AP) { // See if we can aggregate some values. Make sure it can be // represented as a series of bytes of the constant value. int Value = isRepeatedByteSequence(CA, AP.TM); if (Value != -1) { uint64_t Bytes = AP.TM.getDataLayout()->getTypeAllocSize(CA->getType()); - AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); + AP.OutStreamer.EmitFill(Bytes, Value); } else { for (unsigned i = 0, e = CA->getNumOperands(); i != e; ++i) - emitGlobalConstantImpl(CA->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CA->getOperand(i), AP); } } -static void emitGlobalConstantVector(const ConstantVector *CV, - unsigned AddrSpace, AsmPrinter &AP) { +static void emitGlobalConstantVector(const ConstantVector *CV, AsmPrinter &AP) { for (unsigned i = 0, e = CV->getType()->getNumElements(); i != e; ++i) - emitGlobalConstantImpl(CV->getOperand(i), AddrSpace, AP); + emitGlobalConstantImpl(CV->getOperand(i), AP); - const DataLayout &TD = *AP.TM.getDataLayout(); - unsigned Size = TD.getTypeAllocSize(CV->getType()); - unsigned EmittedSize = TD.getTypeAllocSize(CV->getType()->getElementType()) * + const DataLayout &DL = *AP.TM.getDataLayout(); + unsigned Size = DL.getTypeAllocSize(CV->getType()); + unsigned EmittedSize = DL.getTypeAllocSize(CV->getType()->getElementType()) * CV->getType()->getNumElements(); if (unsigned Padding = Size - EmittedSize) - AP.OutStreamer.EmitZeros(Padding, AddrSpace); + AP.OutStreamer.EmitZeros(Padding); } -static void emitGlobalConstantStruct(const ConstantStruct *CS, - unsigned AddrSpace, AsmPrinter &AP) { +static void emitGlobalConstantStruct(const ConstantStruct *CS, AsmPrinter &AP) { // Print the fields in successive locations. Pad to align if needed! - const DataLayout *TD = AP.TM.getDataLayout(); - unsigned Size = TD->getTypeAllocSize(CS->getType()); - const StructLayout *Layout = TD->getStructLayout(CS->getType()); + const DataLayout *DL = AP.TM.getDataLayout(); + unsigned Size = DL->getTypeAllocSize(CS->getType()); + const StructLayout *Layout = DL->getStructLayout(CS->getType()); uint64_t SizeSoFar = 0; for (unsigned i = 0, e = CS->getNumOperands(); i != e; ++i) { const Constant *Field = CS->getOperand(i); // Check if padding is needed and insert one or more 0s. - uint64_t FieldSize = TD->getTypeAllocSize(Field->getType()); + uint64_t FieldSize = DL->getTypeAllocSize(Field->getType()); uint64_t PadSize = ((i == e-1 ? Size : Layout->getElementOffset(i+1)) - Layout->getElementOffset(i)) - FieldSize; SizeSoFar += FieldSize + PadSize; // Now print the actual field value. - emitGlobalConstantImpl(Field, AddrSpace, AP); + emitGlobalConstantImpl(Field, AP); // Insert padding - this may include padding to increase the size of the // current field up to the ABI size (if the struct is not packed) as well // as padding to ensure that the next field starts at the right offset. - AP.OutStreamer.EmitZeros(PadSize, AddrSpace); + AP.OutStreamer.EmitZeros(PadSize); } assert(SizeSoFar == Layout->getSizeInBytes() && "Layout of constant struct may be incorrect!"); } -static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, - AsmPrinter &AP) { +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP) { APInt API = CFP->getValueAPF().bitcastToAPInt(); // First print a comment with what we think the original floating-point value @@ -1772,47 +1825,86 @@ static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, int Chunk = API.getNumWords() - 1; if (TrailingBytes) - AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes); for (; Chunk >= 0; --Chunk) - AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t)); } else { unsigned Chunk; for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) - AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t)); if (TrailingBytes) - AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace); + AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes); } // Emit the tail padding for the long double. - const DataLayout &TD = *AP.TM.getDataLayout(); - AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - - TD.getTypeStoreSize(CFP->getType()), AddrSpace); + const DataLayout &DL = *AP.TM.getDataLayout(); + AP.OutStreamer.EmitZeros(DL.getTypeAllocSize(CFP->getType()) - + DL.getTypeStoreSize(CFP->getType())); } -static void emitGlobalConstantLargeInt(const ConstantInt *CI, - unsigned AddrSpace, AsmPrinter &AP) { - const DataLayout *TD = AP.TM.getDataLayout(); +static void emitGlobalConstantLargeInt(const ConstantInt *CI, AsmPrinter &AP) { + const DataLayout *DL = AP.TM.getDataLayout(); unsigned BitWidth = CI->getBitWidth(); - assert((BitWidth & 63) == 0 && "only support multiples of 64-bits"); + + // Copy the value as we may massage the layout for constants whose bit width + // is not a multiple of 64-bits. + APInt Realigned(CI->getValue()); + uint64_t ExtraBits = 0; + unsigned ExtraBitsSize = BitWidth & 63; + + if (ExtraBitsSize) { + // The bit width of the data is not a multiple of 64-bits. + // The extra bits are expected to be at the end of the chunk of the memory. + // Little endian: + // * Nothing to be done, just record the extra bits to emit. + // Big endian: + // * Record the extra bits to emit. + // * Realign the raw data to emit the chunks of 64-bits. + if (DL->isBigEndian()) { + // Basically the structure of the raw data is a chunk of 64-bits cells: + // 0 1 BitWidth / 64 + // [chunk1][chunk2] ... [chunkN]. + // The most significant chunk is chunkN and it should be emitted first. + // However, due to the alignment issue chunkN contains useless bits. + // Realign the chunks so that they contain only useless information: + // ExtraBits 0 1 (BitWidth / 64) - 1 + // chu[nk1 chu][nk2 chu] ... [nkN-1 chunkN] + ExtraBits = Realigned.getRawData()[0] & + (((uint64_t)-1) >> (64 - ExtraBitsSize)); + Realigned = Realigned.lshr(ExtraBitsSize); + } else + ExtraBits = Realigned.getRawData()[BitWidth / 64]; + } // We don't expect assemblers to support integer data directives // for more than 64 bits, so we emit the data in at most 64-bit // quantities at a time. - const uint64_t *RawData = CI->getValue().getRawData(); + const uint64_t *RawData = Realigned.getRawData(); for (unsigned i = 0, e = BitWidth / 64; i != e; ++i) { - uint64_t Val = TD->isBigEndian() ? RawData[e - i - 1] : RawData[i]; - AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); + uint64_t Val = DL->isBigEndian() ? RawData[e - i - 1] : RawData[i]; + AP.OutStreamer.EmitIntValue(Val, 8); + } + + if (ExtraBitsSize) { + // Emit the extra bits after the 64-bits chunks. + + // Emit a directive that fills the expected size. + uint64_t Size = AP.TM.getDataLayout()->getTypeAllocSize(CI->getType()); + Size -= (BitWidth / 64) * 8; + assert(Size && Size * 8 >= ExtraBitsSize && + (ExtraBits & (((uint64_t)-1) >> (64 - ExtraBitsSize))) + == ExtraBits && "Directive too small for extra bits."); + AP.OutStreamer.EmitIntValue(ExtraBits, Size); } } -static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, - AsmPrinter &AP) { - const DataLayout *TD = AP.TM.getDataLayout(); - uint64_t Size = TD->getTypeAllocSize(CV->getType()); +static void emitGlobalConstantImpl(const Constant *CV, AsmPrinter &AP) { + const DataLayout *DL = AP.TM.getDataLayout(); + uint64_t Size = DL->getTypeAllocSize(CV->getType()); if (isa(CV) || isa(CV)) - return AP.OutStreamer.EmitZeros(Size, AddrSpace); + return AP.OutStreamer.EmitZeros(Size); if (const ConstantInt *CI = dyn_cast(CV)) { switch (Size) { @@ -1823,64 +1915,64 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << format("0x%" PRIx64 "\n", CI->getZExtValue()); - AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size, AddrSpace); + AP.OutStreamer.EmitIntValue(CI->getZExtValue(), Size); return; default: - emitGlobalConstantLargeInt(CI, AddrSpace, AP); + emitGlobalConstantLargeInt(CI, AP); return; } } if (const ConstantFP *CFP = dyn_cast(CV)) - return emitGlobalConstantFP(CFP, AddrSpace, AP); + return emitGlobalConstantFP(CFP, AP); if (isa(CV)) { - AP.OutStreamer.EmitIntValue(0, Size, AddrSpace); + AP.OutStreamer.EmitIntValue(0, Size); return; } if (const ConstantDataSequential *CDS = dyn_cast(CV)) - return emitGlobalConstantDataSequential(CDS, AddrSpace, AP); + return emitGlobalConstantDataSequential(CDS, AP); if (const ConstantArray *CVA = dyn_cast(CV)) - return emitGlobalConstantArray(CVA, AddrSpace, AP); + return emitGlobalConstantArray(CVA, AP); if (const ConstantStruct *CVS = dyn_cast(CV)) - return emitGlobalConstantStruct(CVS, AddrSpace, AP); + return emitGlobalConstantStruct(CVS, AP); if (const ConstantExpr *CE = dyn_cast(CV)) { // Look through bitcasts, which might not be able to be MCExpr'ized (e.g. of // vectors). if (CE->getOpcode() == Instruction::BitCast) - return emitGlobalConstantImpl(CE->getOperand(0), AddrSpace, AP); + return emitGlobalConstantImpl(CE->getOperand(0), AP); if (Size > 8) { // If the constant expression's size is greater than 64-bits, then we have // to emit the value in chunks. Try to constant fold the value and emit it // that way. - Constant *New = ConstantFoldConstantExpression(CE, TD); + Constant *New = ConstantFoldConstantExpression(CE, DL); if (New && New != CE) - return emitGlobalConstantImpl(New, AddrSpace, AP); + return emitGlobalConstantImpl(New, AP); } } if (const ConstantVector *V = dyn_cast(CV)) - return emitGlobalConstantVector(V, AddrSpace, AP); + return emitGlobalConstantVector(V, AP); // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. - AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace); + AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size); } /// EmitGlobalConstant - Print a general LLVM constant to the .s file. -void AsmPrinter::EmitGlobalConstant(const Constant *CV, unsigned AddrSpace) { +void AsmPrinter::EmitGlobalConstant(const Constant *CV) { uint64_t Size = TM.getDataLayout()->getTypeAllocSize(CV->getType()); if (Size) - emitGlobalConstantImpl(CV, AddrSpace, *this); + emitGlobalConstantImpl(CV, *this); else if (MAI->hasSubsectionsViaSymbols()) { // If the global has zero size, emit a single byte so that two labels don't // look like they are at the same location. - OutStreamer.EmitIntValue(0, 1, AddrSpace); + OutStreamer.EmitIntValue(0, 1); } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index 31e42d4..b92f49c 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -33,7 +33,7 @@ using namespace llvm; //===----------------------------------------------------------------------===// /// EmitSLEB128 - emit the specified signed leb128 value. -void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { +void AsmPrinter::EmitSLEB128(int64_t Value, const char *Desc) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); @@ -41,7 +41,7 @@ void AsmPrinter::EmitSLEB128(int Value, const char *Desc) const { } /// EmitULEB128 - emit the specified signed leb128 value. -void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, +void AsmPrinter::EmitULEB128(uint64_t Value, const char *Desc, unsigned PadTo) const { if (isVerbose() && Desc) OutStreamer.AddComment(Desc); @@ -169,28 +169,27 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, // Dwarf Lowering Routines //===----------------------------------------------------------------------===// -/// EmitCFIFrameMove - Emit a frame instruction. -void AsmPrinter::EmitCFIFrameMove(const MachineMove &Move) const { - const TargetRegisterInfo *RI = TM.getRegisterInfo(); - - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - if (Src.getReg() == MachineLocation::VirtualFP) { - OutStreamer.EmitCFIDefCfaOffset(-Src.getOffset()); - } else { - // Reg + Offset - OutStreamer.EmitCFIDefCfa(RI->getDwarfRegNum(Src.getReg(), true), - Src.getOffset()); - } - } else if (Src.isReg() && Src.getReg() == MachineLocation::VirtualFP) { - assert(Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIDefCfaRegister(RI->getDwarfRegNum(Dst.getReg(), true)); - } else { - assert(!Dst.isReg() && "Machine move not supported yet."); - OutStreamer.EmitCFIOffset(RI->getDwarfRegNum(Src.getReg(), true), - Dst.getOffset()); +void AsmPrinter::emitCFIInstruction(const MCCFIInstruction &Inst) const { + switch (Inst.getOperation()) { + default: + llvm_unreachable("Unexpected instruction"); + case MCCFIInstruction::OpDefCfaOffset: + OutStreamer.EmitCFIDefCfaOffset(Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfa: + OutStreamer.EmitCFIDefCfa(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpDefCfaRegister: + OutStreamer.EmitCFIDefCfaRegister(Inst.getRegister()); + break; + case MCCFIInstruction::OpOffset: + OutStreamer.EmitCFIOffset(Inst.getRegister(), Inst.getOffset()); + break; + case MCCFIInstruction::OpRegister: + OutStreamer.EmitCFIRegister(Inst.getRegister(), Inst.getRegister2()); + break; + case MCCFIInstruction::OpWindowSave: + OutStreamer.EmitCFIWindowSave(); + break; } } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index abfa330..4f927f6 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -123,7 +123,7 @@ void AsmPrinter::EmitInlineAsm(StringRef Str, const MDNode *LocMDNode, TM.getTargetCPU(), TM.getTargetFeatureString())); OwningPtr - TAP(TM.getTarget().createMCAsmParser(*STI, *Parser)); + TAP(TM.getTarget().createMCAsmParser(*STI, *Parser, *MII)); if (!TAP) report_fatal_error("Inline asm not supported by this streamer because" " we don't have an asm parser for this target\n"); @@ -213,7 +213,7 @@ static void EmitMSInlineAsmStr(const char *AsmStr, const MachineInstr *MI, } else { unsigned OpFlags = MI->getOperand(OpNo).getImm(); ++OpNo; // Skip over the ID number. - + if (InlineAsm::isMemKind(OpFlags)) { Error = AP->PrintAsmMemoryOperand(MI, OpNo, InlineAsmVariant, /*Modifier*/ 0, OS); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp index 673867a..e39b374 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "DIE.h" +#include "DwarfDebug.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/IR/DataLayout.h" @@ -23,6 +24,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MD5.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -32,8 +34,10 @@ using namespace llvm; /// Profile - Used to gather unique data for the abbreviation folding set. /// void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(Attribute); - ID.AddInteger(Form); + // Explicitly cast to an integer type for which FoldingSetNodeID has + // overloads. Otherwise MSVC 2010 thinks this call is ambiguous. + ID.AddInteger(unsigned(Attribute)); + ID.AddInteger(unsigned(Form)); } //===----------------------------------------------------------------------===// @@ -43,7 +47,7 @@ void DIEAbbrevData::Profile(FoldingSetNodeID &ID) const { /// Profile - Used to gather unique data for the abbreviation folding set. /// void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { - ID.AddInteger(Tag); + ID.AddInteger(unsigned(Tag)); ID.AddInteger(ChildrenFlag); // For each attribute description. @@ -55,11 +59,9 @@ void DIEAbbrev::Profile(FoldingSetNodeID &ID) const { /// void DIEAbbrev::Emit(AsmPrinter *AP) const { // Emit its Dwarf tag type. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(Tag, dwarf::TagString(Tag)); // Emit whether it has children DIEs. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(ChildrenFlag, dwarf::ChildrenString(ChildrenFlag)); // For each attribute description. @@ -67,12 +69,10 @@ void DIEAbbrev::Emit(AsmPrinter *AP) const { const DIEAbbrevData &AttrData = Data[i]; // Emit attribute type. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(AttrData.getAttribute(), dwarf::AttributeString(AttrData.getAttribute())); // Emit form type. - // FIXME: Doing work even in non-asm-verbose runs. AP->EmitULEB128(AttrData.getForm(), dwarf::FormEncodingString(AttrData.getForm())); } @@ -114,14 +114,34 @@ DIE::~DIE() { /// Climb up the parent chain to get the compile unit DIE to which this DIE /// belongs. -DIE *DIE::getCompileUnit() const { - DIE *p = getParent(); +const DIE *DIE::getCompileUnit() const { + const DIE *Cu = getCompileUnitOrNull(); + assert(Cu && "We should not have orphaned DIEs."); + return Cu; +} + +/// Climb up the parent chain to get the compile unit DIE this DIE belongs +/// to. Return NULL if DIE is not added to an owner yet. +const DIE *DIE::getCompileUnitOrNull() const { + const DIE *p = this; while (p) { if (p->getTag() == dwarf::DW_TAG_compile_unit) return p; p = p->getParent(); } - llvm_unreachable("We should not have orphaned DIEs."); + return NULL; +} + +DIEValue *DIE::findAttribute(uint16_t Attribute) { + const SmallVectorImpl &Values = getValues(); + const DIEAbbrev &Abbrevs = getAbbrev(); + + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return NULL. + for (size_t i = 0; i < Values.size(); ++i) + if (Abbrevs.getData()[i].getAttribute() == Attribute) + return Values[i]; + return NULL; } #ifndef NDEBUG @@ -178,7 +198,7 @@ void DIE::dump() { void DIEValue::anchor() { } #ifndef NDEBUG -void DIEValue::dump() { +void DIEValue::dump() const { print(dbgs()); } #endif @@ -189,14 +209,14 @@ void DIEValue::dump() { /// EmitValue - Emit integer of appropriate size. /// -void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { +void DIEInteger::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { unsigned Size = ~0U; switch (Form) { case dwarf::DW_FORM_flag_present: // Emit something to keep the lines and comments in sync. // FIXME: Is there a better way to do this? if (Asm->OutStreamer.hasRawTextSupport()) - Asm->OutStreamer.EmitRawText(StringRef("")); + Asm->OutStreamer.EmitRawText(""); return; case dwarf::DW_FORM_flag: // Fall thru case dwarf::DW_FORM_ref1: // Fall thru @@ -221,7 +241,7 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// SizeOf - Determine size of integer value in bytes. /// -unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEInteger::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_flag_present: return 0; case dwarf::DW_FORM_flag: // Fall thru @@ -244,25 +264,54 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { } #ifndef NDEBUG -void DIEInteger::print(raw_ostream &O) { +void DIEInteger::print(raw_ostream &O) const { O << "Int: " << (int64_t)Integer << " 0x"; O.write_hex(Integer); } #endif //===----------------------------------------------------------------------===// +// DIEExpr Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit expression value. +/// +void DIEExpr::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + AP->OutStreamer.EmitValue(Expr, SizeOf(AP, Form)); +} + +/// SizeOf - Determine size of expression value in bytes. +/// +unsigned DIEExpr::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; + if (Form == dwarf::DW_FORM_strp) return 4; + return AP->getDataLayout().getPointerSize(); +} + +#ifndef NDEBUG +void DIEExpr::print(raw_ostream &O) const { + O << "Expr: "; + Expr->print(O); +} +#endif + +//===----------------------------------------------------------------------===// // DIELabel Implementation //===----------------------------------------------------------------------===// /// EmitValue - Emit label value. /// -void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { - AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form)); +void DIELabel::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + AP->EmitLabelReference(Label, SizeOf(AP, Form), + Form == dwarf::DW_FORM_strp || + Form == dwarf::DW_FORM_sec_offset || + Form == dwarf::DW_FORM_ref_addr); } /// SizeOf - Determine size of label value in bytes. /// -unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIELabel::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; @@ -270,7 +319,7 @@ unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { } #ifndef NDEBUG -void DIELabel::print(raw_ostream &O) { +void DIELabel::print(raw_ostream &O) const { O << "Lbl: " << Label->getName(); } #endif @@ -281,36 +330,70 @@ void DIELabel::print(raw_ostream &O) { /// EmitValue - Emit delta value. /// -void DIEDelta::EmitValue(AsmPrinter *AP, unsigned Form) const { +void DIEDelta::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitLabelDifference(LabelHi, LabelLo, SizeOf(AP, Form)); } /// SizeOf - Determine size of delta value in bytes. /// -unsigned DIEDelta::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEDelta::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; return AP->getDataLayout().getPointerSize(); } #ifndef NDEBUG -void DIEDelta::print(raw_ostream &O) { +void DIEDelta::print(raw_ostream &O) const { O << "Del: " << LabelHi->getName() << "-" << LabelLo->getName(); } #endif //===----------------------------------------------------------------------===// +// DIEString Implementation +//===----------------------------------------------------------------------===// + +/// EmitValue - Emit string value. +/// +void DIEString::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { + Access->EmitValue(AP, Form); +} + +/// SizeOf - Determine size of delta value in bytes. +/// +unsigned DIEString::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + return Access->SizeOf(AP, Form); +} + +#ifndef NDEBUG +void DIEString::print(raw_ostream &O) const { + O << "String: " << Str << "\tSymbol: "; + Access->print(O); +} +#endif + +//===----------------------------------------------------------------------===// // DIEEntry Implementation //===----------------------------------------------------------------------===// /// EmitValue - Emit debug information entry offset. /// -void DIEEntry::EmitValue(AsmPrinter *AP, unsigned Form) const { +void DIEEntry::EmitValue(AsmPrinter *AP, dwarf::Form Form) const { AP->EmitInt32(Entry->getOffset()); } +unsigned DIEEntry::getRefAddrSize(AsmPrinter *AP) { + // DWARF4: References that use the attribute form DW_FORM_ref_addr are + // specified to be four bytes in the DWARF 32-bit format and eight bytes + // in the DWARF 64-bit format, while DWARF Version 2 specifies that such + // references have the same size as an address on the target system. + if (AP->getDwarfDebug()->getDwarfVersion() == 2) + return AP->getDataLayout().getPointerSize(); + return sizeof(int32_t); +} + #ifndef NDEBUG -void DIEEntry::print(raw_ostream &O) { +void DIEEntry::print(raw_ostream &O) const { O << format("Die: 0x%lx", (long)(intptr_t)Entry); } #endif @@ -333,7 +416,7 @@ unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { /// EmitValue - Emit block data. /// -void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { +void DIEBlock::EmitValue(AsmPrinter *Asm, dwarf::Form Form) const { switch (Form) { default: llvm_unreachable("Improper form for block"); case dwarf::DW_FORM_block1: Asm->EmitInt8(Size); break; @@ -349,7 +432,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { /// SizeOf - Determine size of block data in bytes. /// -unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const { +unsigned DIEBlock::SizeOf(AsmPrinter *AP, dwarf::Form Form) const { switch (Form) { case dwarf::DW_FORM_block1: return Size + sizeof(int8_t); case dwarf::DW_FORM_block2: return Size + sizeof(int16_t); @@ -360,7 +443,7 @@ unsigned DIEBlock::SizeOf(AsmPrinter *AP, unsigned Form) const { } #ifndef NDEBUG -void DIEBlock::print(raw_ostream &O) { +void DIEBlock::print(raw_ostream &O) const { O << "Blk: "; DIE::print(O, 5); } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h index 3c06001..f4fa326 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIE.h @@ -18,30 +18,32 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Dwarf.h" +#include "llvm/MC/MCExpr.h" #include namespace llvm { class AsmPrinter; class MCSymbol; + class MCSymbolRefExpr; class raw_ostream; //===--------------------------------------------------------------------===// - /// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a + /// DIEAbbrevData - Dwarf abbreviation data, describes one attribute of a /// Dwarf abbreviation. class DIEAbbrevData { /// Attribute - Dwarf attribute code. /// - uint16_t Attribute; + dwarf::Attribute Attribute; /// Form - Dwarf form code. /// - uint16_t Form; + dwarf::Form Form; public: - DIEAbbrevData(uint16_t A, uint16_t F) : Attribute(A), Form(F) {} + DIEAbbrevData(dwarf::Attribute A, dwarf::Form F) : Attribute(A), Form(F) {} // Accessors. - uint16_t getAttribute() const { return Attribute; } - uint16_t getForm() const { return Form; } + dwarf::Attribute getAttribute() const { return Attribute; } + dwarf::Form getForm() const { return Form; } /// Profile - Used to gather unique data for the abbreviation folding set. /// @@ -54,7 +56,7 @@ namespace llvm { class DIEAbbrev : public FoldingSetNode { /// Tag - Dwarf tag code. /// - uint16_t Tag; + dwarf::Tag Tag; /// ChildrenFlag - Dwarf children flag. /// @@ -69,29 +71,22 @@ namespace llvm { SmallVector Data; public: - DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} + DIEAbbrev(dwarf::Tag T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} // Accessors. - uint16_t getTag() const { return Tag; } + dwarf::Tag getTag() const { return Tag; } unsigned getNumber() const { return Number; } uint16_t getChildrenFlag() const { return ChildrenFlag; } const SmallVectorImpl &getData() const { return Data; } - void setTag(uint16_t T) { Tag = T; } void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } void setNumber(unsigned N) { Number = N; } /// AddAttribute - Adds another set of attribute information to the /// abbreviation. - void AddAttribute(uint16_t Attribute, uint16_t Form) { + void AddAttribute(dwarf::Attribute Attribute, dwarf::Form Form) { Data.push_back(DIEAbbrevData(Attribute, Form)); } - /// AddFirstAttribute - Adds a set of attribute information to the front - /// of the abbreviation. - void AddFirstAttribute(uint16_t Attribute, uint16_t Form) { - Data.insert(Data.begin(), DIEAbbrevData(Attribute, Form)); - } - /// Profile - Used to gather unique data for the abbreviation folding set. /// void Profile(FoldingSetNodeID &ID) const; @@ -135,17 +130,17 @@ namespace llvm { /// SmallVector Values; - // Private data for print() - mutable unsigned IndentCount; public: explicit DIE(unsigned Tag) - : Offset(0), Size(0), Abbrev(Tag, dwarf::DW_CHILDREN_no), Parent(0) {} + : Offset(0), Size(0), Abbrev((dwarf::Tag)Tag, dwarf::DW_CHILDREN_no), + Parent(0) {} virtual ~DIE(); // Accessors. DIEAbbrev &getAbbrev() { return Abbrev; } + const DIEAbbrev &getAbbrev() const { return Abbrev; } unsigned getAbbrevNumber() const { return Abbrev.getNumber(); } - unsigned getTag() const { return Abbrev.getTag(); } + dwarf::Tag getTag() const { return Abbrev.getTag(); } unsigned getOffset() const { return Offset; } unsigned getSize() const { return Size; } const std::vector &getChildren() const { return Children; } @@ -153,14 +148,17 @@ namespace llvm { DIE *getParent() const { return Parent; } /// Climb up the parent chain to get the compile unit DIE this DIE belongs /// to. - DIE *getCompileUnit() const; - void setTag(unsigned Tag) { Abbrev.setTag(Tag); } + const DIE *getCompileUnit() const; + /// Similar to getCompileUnit, returns null when DIE is not added to an + /// owner yet. + const DIE *getCompileUnitOrNull() const; void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } /// addValue - Add a value and attributes to a DIE. /// - void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) { + void addValue(dwarf::Attribute Attribute, dwarf::Form Form, + DIEValue *Value) { Abbrev.AddAttribute(Attribute, Form); Values.push_back(Value); } @@ -168,15 +166,16 @@ namespace llvm { /// addChild - Add a child to the DIE. /// void addChild(DIE *Child) { - if (Child->getParent()) { - assert (Child->getParent() == this && "Unexpected DIE Parent!"); - return; - } + assert(!Child->getParent()); Abbrev.setChildrenFlag(dwarf::DW_CHILDREN_yes); Children.push_back(Child); Child->Parent = this; } + /// findAttribute - Find a value in the DIE with the attribute given, returns NULL + /// if no such attribute exists. + DIEValue *findAttribute(uint16_t Attribute); + #ifndef NDEBUG void print(raw_ostream &O, unsigned IndentCount = 0) const; void dump(); @@ -192,6 +191,7 @@ namespace llvm { enum { isInteger, isString, + isExpr, isLabel, isDelta, isEntry, @@ -210,15 +210,15 @@ namespace llvm { /// EmitValue - Emit value via the Dwarf writer. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const = 0; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const = 0; /// SizeOf - Return the size of a value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const = 0; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const = 0; #ifndef NDEBUG - virtual void print(raw_ostream &O) = 0; - void dump(); + virtual void print(raw_ostream &O) const = 0; + void dump() const; #endif }; @@ -232,7 +232,7 @@ namespace llvm { /// BestForm - Choose the best form for integer. /// - static unsigned BestForm(bool IsSigned, uint64_t Int) { + static dwarf::Form BestForm(bool IsSigned, uint64_t Int) { if (IsSigned) { const int64_t SignedInt = Int; if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; @@ -248,24 +248,52 @@ namespace llvm { /// EmitValue - Emit integer of appropriate size. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; uint64_t getValue() const { return Integer; } /// SizeOf - Determine size of integer value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *I) { return I->getType() == isInteger; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; //===--------------------------------------------------------------------===// - /// DIELabel - A label expression DIE. + /// DIEExpr - An expression DIE. + // + class DIEExpr : public DIEValue { + const MCExpr *Expr; + public: + explicit DIEExpr(const MCExpr *E) : DIEValue(isExpr), Expr(E) {} + + /// EmitValue - Emit expression value. + /// + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + + /// getValue - Get MCExpr. + /// + const MCExpr *getValue() const { return Expr; } + + /// SizeOf - Determine size of expression value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *E) { return E->getType() == isExpr; } + +#ifndef NDEBUG + virtual void print(raw_ostream &O) const; +#endif + }; + + //===--------------------------------------------------------------------===// + /// DIELabel - A label DIE. // class DIELabel : public DIEValue { const MCSymbol *Label; @@ -274,21 +302,21 @@ namespace llvm { /// EmitValue - Emit label value. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// getValue - Get MCSymbol. /// - const MCSymbol *getValue() const { return Label; } + const MCSymbol *getValue() const { return Label; } /// SizeOf - Determine size of label value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *L) { return L->getType() == isLabel; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; @@ -304,46 +332,82 @@ namespace llvm { /// EmitValue - Emit delta value. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of delta value in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *D) { return D->getType() == isDelta; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; //===--------------------------------------------------------------------===// + /// DIEString - A container for string values. + /// + class DIEString : public DIEValue { + const DIEValue *Access; + const StringRef Str; + + public: + DIEString(const DIEValue *Acc, const StringRef S) + : DIEValue(isString), Access(Acc), Str(S) {} + + /// getString - Grab the string out of the object. + StringRef getString() const { return Str; } + + /// EmitValue - Emit delta value. + /// + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; + + /// SizeOf - Determine size of delta value in bytes. + /// + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; + + // Implement isa/cast/dyncast. + static bool classof(const DIEValue *D) { return D->getType() == isString; } + + #ifndef NDEBUG + virtual void print(raw_ostream &O) const; + #endif + }; + + //===--------------------------------------------------------------------===// /// DIEEntry - A pointer to another debug information entry. An instance of /// this class can also be used as a proxy for a debug information entry not /// yet defined (ie. types.) class DIEEntry : public DIEValue { DIE *const Entry; public: - explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {} + explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) { + assert(E && "Cannot construct a DIEEntry with a null DIE"); + } DIE *getEntry() const { return Entry; } /// EmitValue - Emit debug information entry offset. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of debug information entry in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const { - return sizeof(int32_t); + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const { + return Form == dwarf::DW_FORM_ref_addr ? getRefAddrSize(AP) + : sizeof(int32_t); } + /// Returns size of a ref_addr entry. + static unsigned getRefAddrSize(AsmPrinter *AP); + // Implement isa/cast/dyncast. static bool classof(const DIEValue *E) { return E->getType() == isEntry; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; @@ -353,9 +417,7 @@ namespace llvm { class DIEBlock : public DIEValue, public DIE { unsigned Size; // Size in bytes excluding size header. public: - DIEBlock() - : DIEValue(isBlock), DIE(0), Size(0) {} - virtual ~DIEBlock() {} + DIEBlock() : DIEValue(isBlock), DIE(0), Size(0) {} /// ComputeSize - calculate the size of the block. /// @@ -363,7 +425,7 @@ namespace llvm { /// BestForm - Choose the best form for data. /// - unsigned BestForm() const { + dwarf::Form BestForm() const { if ((unsigned char)Size == Size) return dwarf::DW_FORM_block1; if ((unsigned short)Size == Size) return dwarf::DW_FORM_block2; if ((unsigned int)Size == Size) return dwarf::DW_FORM_block4; @@ -372,17 +434,17 @@ namespace llvm { /// EmitValue - Emit block data. /// - virtual void EmitValue(AsmPrinter *AP, unsigned Form) const; + virtual void EmitValue(AsmPrinter *AP, dwarf::Form Form) const; /// SizeOf - Determine size of block data in bytes. /// - virtual unsigned SizeOf(AsmPrinter *AP, unsigned Form) const; + virtual unsigned SizeOf(AsmPrinter *AP, dwarf::Form Form) const; // Implement isa/cast/dyncast. static bool classof(const DIEValue *E) { return E->getType() == isBlock; } #ifndef NDEBUG - virtual void print(raw_ostream &O); + virtual void print(raw_ostream &O) const; #endif }; diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp new file mode 100644 index 0000000..95eca90 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.cpp @@ -0,0 +1,507 @@ +//===-- llvm/CodeGen/DIEHash.cpp - Dwarf Hashing Framework ----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "dwarfdebug" + +#include "DIEHash.h" + +#include "DIE.h" +#include "DwarfCompileUnit.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Endian.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +/// \brief Grabs the string in whichever attribute is passed in and returns +/// a reference to it. +static StringRef getDIEStringAttr(const DIE &Die, uint16_t Attr) { + const SmallVectorImpl &Values = Die.getValues(); + const DIEAbbrev &Abbrevs = Die.getAbbrev(); + + // Iterate through all the attributes until we find the one we're + // looking for, if we can't find it return an empty string. + for (size_t i = 0; i < Values.size(); ++i) { + if (Abbrevs.getData()[i].getAttribute() == Attr) { + DIEValue *V = Values[i]; + assert(isa(V) && "String requested. Not a string."); + DIEString *S = cast(V); + return S->getString(); + } + } + return StringRef(""); +} + +/// \brief Adds the string in \p Str to the hash. This also hashes +/// a trailing NULL with the string. +void DIEHash::addString(StringRef Str) { + DEBUG(dbgs() << "Adding string " << Str << " to hash.\n"); + Hash.update(Str); + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +// FIXME: The LEB128 routines are copied and only slightly modified out of +// LEB128.h. + +/// \brief Adds the unsigned in \p Value to the hash encoded as a ULEB128. +void DIEHash::addULEB128(uint64_t Value) { + DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + if (Value != 0) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (Value != 0); +} + +void DIEHash::addSLEB128(int64_t Value) { + DEBUG(dbgs() << "Adding ULEB128 " << Value << " to hash.\n"); + bool More; + do { + uint8_t Byte = Value & 0x7f; + Value >>= 7; + More = !((((Value == 0 ) && ((Byte & 0x40) == 0)) || + ((Value == -1) && ((Byte & 0x40) != 0)))); + if (More) + Byte |= 0x80; // Mark this byte to show that more bytes will follow. + Hash.update(Byte); + } while (More); +} + +/// \brief Including \p Parent adds the context of Parent to the hash.. +void DIEHash::addParentContext(const DIE &Parent) { + + DEBUG(dbgs() << "Adding parent context to hash...\n"); + + // [7.27.2] For each surrounding type or namespace beginning with the + // outermost such construct... + SmallVector Parents; + const DIE *Cur = &Parent; + while (Cur->getTag() != dwarf::DW_TAG_compile_unit) { + Parents.push_back(Cur); + Cur = Cur->getParent(); + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + const DIE &Die = **I; + + // ... Append the letter "C" to the sequence... + addULEB128('C'); + + // ... Followed by the DWARF tag of the construct... + addULEB128(Die.getTag()); + + // ... Then the name, taken from the DW_AT_name attribute. + StringRef Name = getDIEStringAttr(Die, dwarf::DW_AT_name); + DEBUG(dbgs() << "... adding context: " << Name << "\n"); + if (!Name.empty()) + addString(Name); + } +} + +// Collect all of the attributes for a particular DIE in single structure. +void DIEHash::collectAttributes(const DIE &Die, DIEAttrs &Attrs) { + const SmallVectorImpl &Values = Die.getValues(); + const DIEAbbrev &Abbrevs = Die.getAbbrev(); + +#define COLLECT_ATTR(NAME) \ + case dwarf::NAME: \ + Attrs.NAME.Val = Values[i]; \ + Attrs.NAME.Desc = &Abbrevs.getData()[i]; \ + break + + for (size_t i = 0, e = Values.size(); i != e; ++i) { + DEBUG(dbgs() << "Attribute: " + << dwarf::AttributeString(Abbrevs.getData()[i].getAttribute()) + << " added.\n"); + switch (Abbrevs.getData()[i].getAttribute()) { + COLLECT_ATTR(DW_AT_name); + COLLECT_ATTR(DW_AT_accessibility); + COLLECT_ATTR(DW_AT_address_class); + COLLECT_ATTR(DW_AT_allocated); + COLLECT_ATTR(DW_AT_artificial); + COLLECT_ATTR(DW_AT_associated); + COLLECT_ATTR(DW_AT_binary_scale); + COLLECT_ATTR(DW_AT_bit_offset); + COLLECT_ATTR(DW_AT_bit_size); + COLLECT_ATTR(DW_AT_bit_stride); + COLLECT_ATTR(DW_AT_byte_size); + COLLECT_ATTR(DW_AT_byte_stride); + COLLECT_ATTR(DW_AT_const_expr); + COLLECT_ATTR(DW_AT_const_value); + COLLECT_ATTR(DW_AT_containing_type); + COLLECT_ATTR(DW_AT_count); + COLLECT_ATTR(DW_AT_data_bit_offset); + COLLECT_ATTR(DW_AT_data_location); + COLLECT_ATTR(DW_AT_data_member_location); + COLLECT_ATTR(DW_AT_decimal_scale); + COLLECT_ATTR(DW_AT_decimal_sign); + COLLECT_ATTR(DW_AT_default_value); + COLLECT_ATTR(DW_AT_digit_count); + COLLECT_ATTR(DW_AT_discr); + COLLECT_ATTR(DW_AT_discr_list); + COLLECT_ATTR(DW_AT_discr_value); + COLLECT_ATTR(DW_AT_encoding); + COLLECT_ATTR(DW_AT_enum_class); + COLLECT_ATTR(DW_AT_endianity); + COLLECT_ATTR(DW_AT_explicit); + COLLECT_ATTR(DW_AT_is_optional); + COLLECT_ATTR(DW_AT_location); + COLLECT_ATTR(DW_AT_lower_bound); + COLLECT_ATTR(DW_AT_mutable); + COLLECT_ATTR(DW_AT_ordering); + COLLECT_ATTR(DW_AT_picture_string); + COLLECT_ATTR(DW_AT_prototyped); + COLLECT_ATTR(DW_AT_small); + COLLECT_ATTR(DW_AT_segment); + COLLECT_ATTR(DW_AT_string_length); + COLLECT_ATTR(DW_AT_threads_scaled); + COLLECT_ATTR(DW_AT_upper_bound); + COLLECT_ATTR(DW_AT_use_location); + COLLECT_ATTR(DW_AT_use_UTF8); + COLLECT_ATTR(DW_AT_variable_parameter); + COLLECT_ATTR(DW_AT_virtuality); + COLLECT_ATTR(DW_AT_visibility); + COLLECT_ATTR(DW_AT_vtable_elem_location); + COLLECT_ATTR(DW_AT_type); + default: + break; + } + } +} + +void DIEHash::hashShallowTypeReference(dwarf::Attribute Attribute, + const DIE &Entry, StringRef Name) { + // append the letter 'N' + addULEB128('N'); + + // the DWARF attribute code (DW_AT_type or DW_AT_friend), + addULEB128(Attribute); + + // the context of the tag, + if (const DIE *Parent = Entry.getParent()) + addParentContext(*Parent); + + // the letter 'E', + addULEB128('E'); + + // and the name of the type. + addString(Name); + + // Currently DW_TAG_friends are not used by Clang, but if they do become so, + // here's the relevant spec text to implement: + // + // For DW_TAG_friend, if the referenced entry is the DW_TAG_subprogram, + // the context is omitted and the name to be used is the ABI-specific name + // of the subprogram (e.g., the mangled linker name). +} + +void DIEHash::hashRepeatedTypeReference(dwarf::Attribute Attribute, + unsigned DieNumber) { + // a) If T is in the list of [previously hashed types], use the letter + // 'R' as the marker + addULEB128('R'); + + addULEB128(Attribute); + + // and use the unsigned LEB128 encoding of [the index of T in the + // list] as the attribute value; + addULEB128(DieNumber); +} + +void DIEHash::hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry) { + assert(Tag != dwarf::DW_TAG_friend && "No current LLVM clients emit friend " + "tags. Add support here when there's " + "a use case"); + // Step 5 + // If the tag in Step 3 is one of [the below tags] + if ((Tag == dwarf::DW_TAG_pointer_type || + Tag == dwarf::DW_TAG_reference_type || + Tag == dwarf::DW_TAG_rvalue_reference_type || + Tag == dwarf::DW_TAG_ptr_to_member_type) && + // and the referenced type (via the [below attributes]) + // FIXME: This seems overly restrictive, and causes hash mismatches + // there's a decl/def difference in the containing type of a + // ptr_to_member_type, but it's what DWARF says, for some reason. + Attribute == dwarf::DW_AT_type) { + // ... has a DW_AT_name attribute, + StringRef Name = getDIEStringAttr(Entry, dwarf::DW_AT_name); + if (!Name.empty()) { + hashShallowTypeReference(Attribute, Entry, Name); + return; + } + } + + unsigned &DieNumber = Numbering[&Entry]; + if (DieNumber) { + hashRepeatedTypeReference(Attribute, DieNumber); + return; + } + + // otherwise, b) use the letter 'T' as a the marker, ... + addULEB128('T'); + + addULEB128(Attribute); + + // ... process the type T recursively by performing Steps 2 through 7, and + // use the result as the attribute value. + DieNumber = Numbering.size(); + computeHash(Entry); +} + +// Hash an individual attribute \param Attr based on the type of attribute and +// the form. +void DIEHash::hashAttribute(AttrEntry Attr, dwarf::Tag Tag) { + const DIEValue *Value = Attr.Val; + const DIEAbbrevData *Desc = Attr.Desc; + dwarf::Attribute Attribute = Desc->getAttribute(); + + // 7.27 Step 3 + // ... An attribute that refers to another type entry T is processed as + // follows: + if (const DIEEntry *EntryAttr = dyn_cast(Value)) { + hashDIEEntry(Attribute, Tag, *EntryAttr->getEntry()); + return; + } + + // Other attribute values use the letter 'A' as the marker, ... + addULEB128('A'); + + addULEB128(Attribute); + + // ... and the value consists of the form code (encoded as an unsigned LEB128 + // value) followed by the encoding of the value according to the form code. To + // ensure reproducibility of the signature, the set of forms used in the + // signature computation is limited to the following: DW_FORM_sdata, + // DW_FORM_flag, DW_FORM_string, and DW_FORM_block. + switch (Desc->getForm()) { + case dwarf::DW_FORM_string: + llvm_unreachable( + "Add support for DW_FORM_string if we ever start emitting them again"); + case dwarf::DW_FORM_GNU_str_index: + case dwarf::DW_FORM_strp: + addULEB128(dwarf::DW_FORM_string); + addString(cast(Value)->getString()); + break; + case dwarf::DW_FORM_data1: + case dwarf::DW_FORM_data2: + case dwarf::DW_FORM_data4: + case dwarf::DW_FORM_data8: + case dwarf::DW_FORM_udata: + addULEB128(dwarf::DW_FORM_sdata); + addSLEB128((int64_t)cast(Value)->getValue()); + break; + default: + llvm_unreachable("Add support for additional forms"); + } +} + +// Go through the attributes from \param Attrs in the order specified in 7.27.4 +// and hash them. +void DIEHash::hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag) { +#define ADD_ATTR(ATTR) \ + { \ + if (ATTR.Val != 0) \ + hashAttribute(ATTR, Tag); \ + } + + ADD_ATTR(Attrs.DW_AT_name); + ADD_ATTR(Attrs.DW_AT_accessibility); + ADD_ATTR(Attrs.DW_AT_address_class); + ADD_ATTR(Attrs.DW_AT_allocated); + ADD_ATTR(Attrs.DW_AT_artificial); + ADD_ATTR(Attrs.DW_AT_associated); + ADD_ATTR(Attrs.DW_AT_binary_scale); + ADD_ATTR(Attrs.DW_AT_bit_offset); + ADD_ATTR(Attrs.DW_AT_bit_size); + ADD_ATTR(Attrs.DW_AT_bit_stride); + ADD_ATTR(Attrs.DW_AT_byte_size); + ADD_ATTR(Attrs.DW_AT_byte_stride); + ADD_ATTR(Attrs.DW_AT_const_expr); + ADD_ATTR(Attrs.DW_AT_const_value); + ADD_ATTR(Attrs.DW_AT_containing_type); + ADD_ATTR(Attrs.DW_AT_count); + ADD_ATTR(Attrs.DW_AT_data_bit_offset); + ADD_ATTR(Attrs.DW_AT_data_location); + ADD_ATTR(Attrs.DW_AT_data_member_location); + ADD_ATTR(Attrs.DW_AT_decimal_scale); + ADD_ATTR(Attrs.DW_AT_decimal_sign); + ADD_ATTR(Attrs.DW_AT_default_value); + ADD_ATTR(Attrs.DW_AT_digit_count); + ADD_ATTR(Attrs.DW_AT_discr); + ADD_ATTR(Attrs.DW_AT_discr_list); + ADD_ATTR(Attrs.DW_AT_discr_value); + ADD_ATTR(Attrs.DW_AT_encoding); + ADD_ATTR(Attrs.DW_AT_enum_class); + ADD_ATTR(Attrs.DW_AT_endianity); + ADD_ATTR(Attrs.DW_AT_explicit); + ADD_ATTR(Attrs.DW_AT_is_optional); + ADD_ATTR(Attrs.DW_AT_location); + ADD_ATTR(Attrs.DW_AT_lower_bound); + ADD_ATTR(Attrs.DW_AT_mutable); + ADD_ATTR(Attrs.DW_AT_ordering); + ADD_ATTR(Attrs.DW_AT_picture_string); + ADD_ATTR(Attrs.DW_AT_prototyped); + ADD_ATTR(Attrs.DW_AT_small); + ADD_ATTR(Attrs.DW_AT_segment); + ADD_ATTR(Attrs.DW_AT_string_length); + ADD_ATTR(Attrs.DW_AT_threads_scaled); + ADD_ATTR(Attrs.DW_AT_upper_bound); + ADD_ATTR(Attrs.DW_AT_use_location); + ADD_ATTR(Attrs.DW_AT_use_UTF8); + ADD_ATTR(Attrs.DW_AT_variable_parameter); + ADD_ATTR(Attrs.DW_AT_virtuality); + ADD_ATTR(Attrs.DW_AT_visibility); + ADD_ATTR(Attrs.DW_AT_vtable_elem_location); + ADD_ATTR(Attrs.DW_AT_type); + + // FIXME: Add the extended attributes. +} + +// Add all of the attributes for \param Die to the hash. +void DIEHash::addAttributes(const DIE &Die) { + DIEAttrs Attrs = {}; + collectAttributes(Die, Attrs); + hashAttributes(Attrs, Die.getTag()); +} + +void DIEHash::hashNestedType(const DIE &Die, StringRef Name) { + // 7.27 Step 7 + // ... append the letter 'S', + addULEB128('S'); + + // the tag of C, + addULEB128(Die.getTag()); + + // and the name. + addString(Name); +} + +// Compute the hash of a DIE. This is based on the type signature computation +// given in section 7.27 of the DWARF4 standard. It is the md5 hash of a +// flattened description of the DIE. +void DIEHash::computeHash(const DIE &Die) { + // Append the letter 'D', followed by the DWARF tag of the DIE. + addULEB128('D'); + addULEB128(Die.getTag()); + + // Add each of the attributes of the DIE. + addAttributes(Die); + + // Then hash each of the children of the DIE. + for (std::vector::const_iterator I = Die.getChildren().begin(), + E = Die.getChildren().end(); + I != E; ++I) { + // 7.27 Step 7 + // If C is a nested type entry or a member function entry, ... + if (isType((*I)->getTag()) || (*I)->getTag() == dwarf::DW_TAG_subprogram) { + StringRef Name = getDIEStringAttr(**I, dwarf::DW_AT_name); + // ... and has a DW_AT_name attribute + if (!Name.empty()) { + hashNestedType(**I, Name); + continue; + } + } + computeHash(**I); + } + + // Following the last (or if there are no children), append a zero byte. + Hash.update(makeArrayRef((uint8_t)'\0')); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is the md5 hash of a flattened description of the DIE +/// with the exception that we are hashing only the context and the name of the +/// type. +uint64_t DIEHash::computeDIEODRSignature(const DIE &Die) { + + // Add the contexts to the hash. We won't be computing the ODR hash for + // function local types so it's safe to use the generic context hashing + // algorithm here. + // FIXME: If we figure out how to account for linkage in some way we could + // actually do this with a slight modification to the parent hash algorithm. + if (const DIE *Parent = Die.getParent()) + addParentContext(*Parent); + + // Add the current DIE information. + + // Add the DWARF tag of the DIE. + addULEB128(Die.getTag()); + + // Add the name of the type to the hash. + addString(getDIEStringAttr(Die, dwarf::DW_AT_name)); + + // Now get the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast(Result + 8); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of the full CU and all top level CU entities. +// TODO: Initialize the type chain at 0 instead of 1 for CU signatures. +uint64_t DIEHash::computeCUSignature(const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast(Result + 8); +} + +/// This is based on the type signature computation given in section 7.27 of the +/// DWARF4 standard. It is an md5 hash of the flattened description of the DIE +/// with the inclusion of additional forms not specifically called out in the +/// standard. +uint64_t DIEHash::computeTypeSignature(const DIE &Die) { + Numbering.clear(); + Numbering[&Die] = 1; + + if (const DIE *Parent = Die.getParent()) + addParentContext(*Parent); + + // Hash the DIE. + computeHash(Die); + + // Now return the result. + MD5::MD5Result Result; + Hash.final(Result); + + // ... take the least significant 8 bytes and return those. Our MD5 + // implementation always returns its results in little endian, swap bytes + // appropriately. + return *reinterpret_cast(Result + 8); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h new file mode 100644 index 0000000..f0c4ef9 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DIEHash.h @@ -0,0 +1,147 @@ +//===-- llvm/CodeGen/DIEHash.h - Dwarf Hashing Framework -------*- C++ -*--===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains support for DWARF4 hashing of DIEs. +// +//===----------------------------------------------------------------------===// + +#include "DIE.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/Support/MD5.h" + +namespace llvm { + +class CompileUnit; + +/// \brief An object containing the capability of hashing and adding hash +/// attributes onto a DIE. +class DIEHash { + // The entry for a particular attribute. + struct AttrEntry { + const DIEValue *Val; + const DIEAbbrevData *Desc; + }; + + // Collection of all attributes used in hashing a particular DIE. + struct DIEAttrs { + AttrEntry DW_AT_name; + AttrEntry DW_AT_accessibility; + AttrEntry DW_AT_address_class; + AttrEntry DW_AT_allocated; + AttrEntry DW_AT_artificial; + AttrEntry DW_AT_associated; + AttrEntry DW_AT_binary_scale; + AttrEntry DW_AT_bit_offset; + AttrEntry DW_AT_bit_size; + AttrEntry DW_AT_bit_stride; + AttrEntry DW_AT_byte_size; + AttrEntry DW_AT_byte_stride; + AttrEntry DW_AT_const_expr; + AttrEntry DW_AT_const_value; + AttrEntry DW_AT_containing_type; + AttrEntry DW_AT_count; + AttrEntry DW_AT_data_bit_offset; + AttrEntry DW_AT_data_location; + AttrEntry DW_AT_data_member_location; + AttrEntry DW_AT_decimal_scale; + AttrEntry DW_AT_decimal_sign; + AttrEntry DW_AT_default_value; + AttrEntry DW_AT_digit_count; + AttrEntry DW_AT_discr; + AttrEntry DW_AT_discr_list; + AttrEntry DW_AT_discr_value; + AttrEntry DW_AT_encoding; + AttrEntry DW_AT_enum_class; + AttrEntry DW_AT_endianity; + AttrEntry DW_AT_explicit; + AttrEntry DW_AT_is_optional; + AttrEntry DW_AT_location; + AttrEntry DW_AT_lower_bound; + AttrEntry DW_AT_mutable; + AttrEntry DW_AT_ordering; + AttrEntry DW_AT_picture_string; + AttrEntry DW_AT_prototyped; + AttrEntry DW_AT_small; + AttrEntry DW_AT_segment; + AttrEntry DW_AT_string_length; + AttrEntry DW_AT_threads_scaled; + AttrEntry DW_AT_upper_bound; + AttrEntry DW_AT_use_location; + AttrEntry DW_AT_use_UTF8; + AttrEntry DW_AT_variable_parameter; + AttrEntry DW_AT_virtuality; + AttrEntry DW_AT_visibility; + AttrEntry DW_AT_vtable_elem_location; + AttrEntry DW_AT_type; + + // Insert any additional ones here... + }; + +public: + /// \brief Computes the ODR signature. + uint64_t computeDIEODRSignature(const DIE &Die); + + /// \brief Computes the CU signature. + uint64_t computeCUSignature(const DIE &Die); + + /// \brief Computes the type signature. + uint64_t computeTypeSignature(const DIE &Die); + + // Helper routines to process parts of a DIE. +private: + /// \brief Adds the parent context of \param Die to the hash. + void addParentContext(const DIE &Die); + + /// \brief Adds the attributes of \param Die to the hash. + void addAttributes(const DIE &Die); + + /// \brief Computes the full DWARF4 7.27 hash of the DIE. + void computeHash(const DIE &Die); + + // Routines that add DIEValues to the hash. +private: + /// \brief Encodes and adds \param Value to the hash as a ULEB128. + void addULEB128(uint64_t Value); + + /// \brief Encodes and adds \param Value to the hash as a SLEB128. + void addSLEB128(int64_t Value); + + /// \brief Adds \param Str to the hash and includes a NULL byte. + void addString(StringRef Str); + + /// \brief Collects the attributes of DIE \param Die into the \param Attrs + /// structure. + void collectAttributes(const DIE &Die, DIEAttrs &Attrs); + + /// \brief Hashes the attributes in \param Attrs in order. + void hashAttributes(const DIEAttrs &Attrs, dwarf::Tag Tag); + + /// \brief Hashes an individual attribute. + void hashAttribute(AttrEntry Attr, dwarf::Tag Tag); + + /// \brief Hashes an attribute that refers to another DIE. + void hashDIEEntry(dwarf::Attribute Attribute, dwarf::Tag Tag, + const DIE &Entry); + + /// \brief Hashes a reference to a named type in such a way that is + /// independent of whether that type is described by a declaration or a + /// definition. + void hashShallowTypeReference(dwarf::Attribute Attribute, const DIE &Entry, + StringRef Name); + + /// \brief Hashes a reference to a previously referenced type DIE. + void hashRepeatedTypeReference(dwarf::Attribute Attribute, unsigned DieNumber); + + void hashNestedType(const DIE &Die, StringRef Name); + +private: + MD5 Hash; + DenseMap Numbering; +}; +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index f58ec9b..689aeda 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -24,27 +24,14 @@ using namespace llvm; -const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { - switch (AT) { - case eAtomTypeNULL: return "eAtomTypeNULL"; - case eAtomTypeDIEOffset: return "eAtomTypeDIEOffset"; - case eAtomTypeCUOffset: return "eAtomTypeCUOffset"; - case eAtomTypeTag: return "eAtomTypeTag"; - case eAtomTypeNameFlags: return "eAtomTypeNameFlags"; - case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags"; - } - llvm_unreachable("invalid AtomType!"); -} - // The length of the header data is always going to be 4 + 4 + 4*NumAtoms. -DwarfAccelTable::DwarfAccelTable(ArrayRef atomList) : - Header(8 + (atomList.size() * 4)), - HeaderData(atomList), - Entries(Allocator) { } +DwarfAccelTable::DwarfAccelTable(ArrayRef atomList) + : Header(8 + (atomList.size() * 4)), HeaderData(atomList), + Entries(Allocator) {} -DwarfAccelTable::~DwarfAccelTable() { } +DwarfAccelTable::~DwarfAccelTable() {} -void DwarfAccelTable::AddName(StringRef Name, DIE* die, char Flags) { +void DwarfAccelTable::AddName(StringRef Name, DIE *die, char Flags) { assert(Data.empty() && "Already finalized!"); // If the string is in the list already then add this die to the list // otherwise add a new one. @@ -59,13 +46,16 @@ void DwarfAccelTable::ComputeBucketCount(void) { uniques[i] = Data[i]->HashValue; array_pod_sort(uniques.begin(), uniques.end()); std::vector::iterator p = - std::unique(uniques.begin(), uniques.end()); + std::unique(uniques.begin(), uniques.end()); uint32_t num = std::distance(uniques.begin(), p); // Then compute the bucket size, minimum of 1 bucket. - if (num > 1024) Header.bucket_count = num/4; - if (num > 16) Header.bucket_count = num/2; - else Header.bucket_count = num > 0 ? num : 1; + if (num > 1024) + Header.bucket_count = num / 4; + if (num > 16) + Header.bucket_count = num / 2; + else + Header.bucket_count = num > 0 ? num : 1; Header.hashes_count = num; } @@ -76,15 +66,15 @@ static bool compareDIEs(const DwarfAccelTable::HashDataContents *A, return A->Die->getOffset() < B->Die->getOffset(); } -void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, const char *Prefix) { +void DwarfAccelTable::FinalizeTable(AsmPrinter *Asm, StringRef Prefix) { // Create the individual hash data outputs. - for (StringMap::iterator - EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + for (StringMap::iterator EI = Entries.begin(), EE = Entries.end(); + EI != EE; ++EI) { // Unique the entries. std::stable_sort(EI->second.begin(), EI->second.end(), compareDIEs); EI->second.erase(std::unique(EI->second.begin(), EI->second.end()), - EI->second.end()); + EI->second.end()); HashData *Entry = new (Allocator) HashData(EI->getKey(), EI->second); Data.push_back(Entry); @@ -126,7 +116,7 @@ void DwarfAccelTable::EmitHeader(AsmPrinter *Asm) { Asm->EmitInt32(HeaderData.Atoms.size()); for (size_t i = 0; i < HeaderData.Atoms.size(); i++) { Atom A = HeaderData.Atoms[i]; - Asm->OutStreamer.AddComment(Atom::AtomTypeString(A.type)); + Asm->OutStreamer.AddComment(dwarf::AtomTypeString(A.type)); Asm->EmitInt16(A.type); Asm->OutStreamer.AddComment(dwarf::FormEncodingString(A.form)); Asm->EmitInt16(A.form); @@ -152,7 +142,8 @@ void DwarfAccelTable::EmitBuckets(AsmPrinter *Asm) { void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); Asm->EmitInt32((*HI)->HashValue); } @@ -166,13 +157,13 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { Asm->OutStreamer.AddComment("Offset in Bucket " + Twine(i)); MCContext &Context = Asm->OutStreamer.getContext(); - const MCExpr *Sub = - MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), - MCSymbolRefExpr::Create(SecBegin, Context), - Context); + const MCExpr *Sub = MCBinaryExpr::CreateSub( + MCSymbolRefExpr::Create((*HI)->Sym, Context), + MCSymbolRefExpr::Create(SecBegin, Context), Context); Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t)); } } @@ -185,7 +176,8 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) { + HE = Buckets[i].end(); + HI != HE; ++HI) { // Remember to emit the label for our offset. Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); @@ -193,8 +185,9 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { D->getStringPoolSym()); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.size()); - for (ArrayRef::const_iterator - DI = (*HI)->Data.begin(), DE = (*HI)->Data.end(); + for (ArrayRef::const_iterator + DI = (*HI)->Data.begin(), + DE = (*HI)->Data.end(); DI != DE; ++DI) { // Emit the DIE offset Asm->EmitInt32((*DI)->Die->getOffset()); @@ -214,8 +207,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { } // Emit the entire data structure to the output file. -void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, - DwarfUnits *D) { +void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, DwarfUnits *D) { // Emit the header. EmitHeader(Asm); @@ -239,11 +231,12 @@ void DwarfAccelTable::print(raw_ostream &O) { HeaderData.print(O); O << "Entries: \n"; - for (StringMap::const_iterator - EI = Entries.begin(), EE = Entries.end(); EI != EE; ++EI) { + for (StringMap::const_iterator EI = Entries.begin(), + EE = Entries.end(); + EI != EE; ++EI) { O << "Name: " << EI->getKeyData() << "\n"; for (DataArray::const_iterator DI = EI->second.begin(), - DE = EI->second.end(); + DE = EI->second.end(); DI != DE; ++DI) (*DI)->print(O); } @@ -251,14 +244,14 @@ void DwarfAccelTable::print(raw_ostream &O) { O << "Buckets and Hashes: \n"; for (size_t i = 0, e = Buckets.size(); i < e; ++i) for (HashList::const_iterator HI = Buckets[i].begin(), - HE = Buckets[i].end(); HI != HE; ++HI) + HE = Buckets[i].end(); + HI != HE; ++HI) (*HI)->print(O); O << "Data: \n"; - for (std::vector::const_iterator - DI = Data.begin(), DE = Data.end(); DI != DE; ++DI) - (*DI)->print(O); - - + for (std::vector::const_iterator DI = Data.begin(), + DE = Data.end(); + DI != DE; ++DI) + (*DI)->print(O); } #endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 9915bca..7627313 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -67,11 +67,7 @@ class DwarfUnits; class DwarfAccelTable { - enum HashFunctionType { - eHashFunctionDJB = 0u - }; - - static uint32_t HashDJB (StringRef Str) { + static uint32_t HashDJB(StringRef Str) { uint32_t h = 5381; for (unsigned i = 0, e = Str.size(); i != e; ++i) h = ((h << 5) + h) + Str[i]; @@ -80,25 +76,25 @@ class DwarfAccelTable { // Helper function to compute the number of buckets needed based on // the number of unique hashes. - void ComputeBucketCount (void); + void ComputeBucketCount(void); struct TableHeader { - uint32_t magic; // 'HASH' magic value to allow endian detection - uint16_t version; // Version number. - uint16_t hash_function; // The hash function enumeration that was used. - uint32_t bucket_count; // The number of buckets in this hash table. - uint32_t hashes_count; // The total number of unique hash values - // and hash data offsets in this table. - uint32_t header_data_len; // The bytes to skip to get to the hash - // indexes (buckets) for correct alignment. + uint32_t magic; // 'HASH' magic value to allow endian detection + uint16_t version; // Version number. + uint16_t hash_function; // The hash function enumeration that was used. + uint32_t bucket_count; // The number of buckets in this hash table. + uint32_t hashes_count; // The total number of unique hash values + // and hash data offsets in this table. + uint32_t header_data_len; // The bytes to skip to get to the hash + // indexes (buckets) for correct alignment. // Also written to disk is the implementation specific header data. static const uint32_t MagicHash = 0x48415348; - TableHeader (uint32_t data_len) : - magic (MagicHash), version (1), hash_function (eHashFunctionDJB), - bucket_count (0), hashes_count (0), header_data_len (data_len) - {} + TableHeader(uint32_t data_len) + : magic(MagicHash), version(1), + hash_function(dwarf::DW_hash_function_djb), bucket_count(0), + hashes_count(0), header_data_len(data_len) {} #ifndef NDEBUG void print(raw_ostream &O) { @@ -124,62 +120,38 @@ public: // uint32_t die_offset_base // uint32_t atom_count // atom_count Atoms - enum AtomType { - eAtomTypeNULL = 0u, - eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding - eAtomTypeCUOffset = 2u, // DIE offset of the compiler unit header that - // contains the item in question - eAtomTypeTag = 3u, // DW_TAG_xxx value, should be encoded as - // DW_FORM_data1 (if no tags exceed 255) or - // DW_FORM_data2. - eAtomTypeNameFlags = 4u, // Flags from enum NameFlags - eAtomTypeTypeFlags = 5u // Flags from enum TypeFlags - }; - - enum TypeFlags { - eTypeFlagClassMask = 0x0000000fu, - - // Always set for C++, only set for ObjC if this is the - // @implementation for a class. - eTypeFlagClassIsImplementation = ( 1u << 1 ) - }; // Make these public so that they can be used as a general interface to // the class. struct Atom { - AtomType type; // enum AtomType + uint16_t type; // enum AtomType uint16_t form; // DWARF DW_FORM_ defines - Atom(AtomType type, uint16_t form) : type(type), form(form) {} - static const char * AtomTypeString(enum AtomType); + Atom(uint16_t type, uint16_t form) : type(type), form(form) {} #ifndef NDEBUG void print(raw_ostream &O) { - O << "Type: " << AtomTypeString(type) << "\n" + O << "Type: " << dwarf::AtomTypeString(type) << "\n" << "Form: " << dwarf::FormEncodingString(form) << "\n"; } - void dump() { - print(dbgs()); - } + void dump() { print(dbgs()); } #endif }; - private: +private: struct TableHeaderData { uint32_t die_offset_base; SmallVector Atoms; TableHeaderData(ArrayRef AtomList, uint32_t offset = 0) - : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) { } + : die_offset_base(offset), Atoms(AtomList.begin(), AtomList.end()) {} #ifndef NDEBUG - void print (raw_ostream &O) { + void print(raw_ostream &O) { O << "die_offset_base: " << die_offset_base << "\n"; for (size_t i = 0; i < Atoms.size(); i++) Atoms[i].print(O); } - void dump() { - print(dbgs()); - } + void dump() { print(dbgs()); } #endif }; @@ -193,37 +165,38 @@ public: // HashData[hash_data_count] public: struct HashDataContents { - DIE *Die; // Offsets + DIE *Die; // Offsets char Flags; // Specific flags to output - HashDataContents(DIE *D, char Flags) : - Die(D), - Flags(Flags) { } - #ifndef NDEBUG + HashDataContents(DIE *D, char Flags) : Die(D), Flags(Flags) {} +#ifndef NDEBUG void print(raw_ostream &O) const { O << " Offset: " << Die->getOffset() << "\n"; O << " Tag: " << dwarf::TagString(Die->getTag()) << "\n"; O << " Flags: " << Flags << "\n"; } - #endif +#endif }; + private: struct HashData { StringRef Str; uint32_t HashValue; MCSymbol *Sym; - ArrayRef Data; // offsets - HashData(StringRef S, ArrayRef Data) - : Str(S), Data(Data) { + ArrayRef Data; // offsets + HashData(StringRef S, ArrayRef Data) + : Str(S), Data(Data) { HashValue = DwarfAccelTable::HashDJB(S); } - #ifndef NDEBUG +#ifndef NDEBUG void print(raw_ostream &O) { O << "Name: " << Str << "\n"; O << " Hash Value: " << format("0x%x", HashValue) << "\n"; - O << " Symbol: " ; - if (Sym) Sym->print(O); - else O << ""; + O << " Symbol: "; + if (Sym) + Sym->print(O); + else + O << ""; O << "\n"; for (size_t i = 0; i < Data.size(); i++) { O << " Offset: " << Data[i]->Die->getOffset() << "\n"; @@ -231,14 +204,12 @@ private: O << " Flags: " << Data[i]->Flags << "\n"; } } - void dump() { - print(dbgs()); - } - #endif + void dump() { print(dbgs()); } +#endif }; - DwarfAccelTable(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; - void operator=(const DwarfAccelTable&) LLVM_DELETED_FUNCTION; + DwarfAccelTable(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; + void operator=(const DwarfAccelTable &) LLVM_DELETED_FUNCTION; // Internal Functions void EmitHeader(AsmPrinter *); @@ -253,31 +224,30 @@ private: // Output Variables TableHeader Header; TableHeaderData HeaderData; - std::vector Data; + std::vector Data; // String Data - typedef std::vector DataArray; - typedef StringMap StringEntries; + typedef std::vector DataArray; + typedef StringMap StringEntries; StringEntries Entries; // Buckets/Hashes/Offsets - typedef std::vector HashList; + typedef std::vector HashList; typedef std::vector BucketList; BucketList Buckets; HashList Hashes; // Public Implementation - public: +public: DwarfAccelTable(ArrayRef); ~DwarfAccelTable(); - void AddName(StringRef, DIE*, char = 0); - void FinalizeTable(AsmPrinter *, const char *); + void AddName(StringRef, DIE *, char = 0); + void FinalizeTable(AsmPrinter *, StringRef); void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } #endif }; - } #endif diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index fec5ced..8918f3d 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -68,7 +68,7 @@ void DwarfCFIException::EndModule() { for (size_t i = 0, e = Personalities.size(); i != e; ++i) { if (!Personalities[i]) continue; - MCSymbol *Sym = Asm->Mang->getSymbol(Personalities[i]); + MCSymbol *Sym = Asm->getSymbol(Personalities[i]); TLOF.emitPersonalityValue(Asm->OutStreamer, Asm->TM, Sym); AtLeastOne = true; } diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 89abcff..97ef687 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -22,21 +22,23 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" +#include "llvm/MC/MCSection.h" +#include "llvm/MC/MCStreamer.h" #include "llvm/Target/Mangler.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; /// CompileUnit - Compile unit constructor. -CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, - DwarfDebug *DW, DwarfUnits *DWU) - : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU), - IndexTyDie(0), DebugInfoOffset(0) { +CompileUnit::CompileUnit(unsigned UID, DIE *D, DICompileUnit Node, + AsmPrinter *A, DwarfDebug *DW, DwarfUnits *DWU) + : UniqueID(UID), Node(Node), CUDie(D), Asm(A), DD(DW), DU(DWU), + IndexTyDie(0), DebugInfoOffset(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); + insertDIE(Node, D); } /// ~CompileUnit - Destructor for compile unit. @@ -55,7 +57,7 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { /// getDefaultLowerBound - Return the default lower bound for an array. If the /// DWARF version doesn't handle the language, return -1. int64_t CompileUnit::getDefaultLowerBound() const { - switch (Language) { + switch (getLanguage()) { default: break; @@ -96,32 +98,71 @@ int64_t CompileUnit::getDefaultLowerBound() const { return -1; } +/// Check whether the DIE for this MDNode can be shared across CUs. +static bool isShareableAcrossCUs(DIDescriptor D) { + // When the MDNode can be part of the type system, the DIE can be + // shared across CUs. + return D.isType() || + (D.isSubprogram() && !DISubprogram(D).isDefinition()); +} + +/// getDIE - Returns the debug information entry map slot for the +/// specified debug variable. We delegate the request to DwarfDebug +/// when the DIE for this MDNode can be shared across CUs. The mappings +/// will be kept in DwarfDebug for shareable DIEs. +DIE *CompileUnit::getDIE(DIDescriptor D) const { + if (isShareableAcrossCUs(D)) + return DD->getDIE(D); + return MDNodeToDieMap.lookup(D); +} + +/// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug +/// when the DIE for this MDNode can be shared across CUs. The mappings +/// will be kept in DwarfDebug for shareable DIEs. +void CompileUnit::insertDIE(DIDescriptor Desc, DIE *D) { + if (isShareableAcrossCUs(Desc)) { + DD->insertDIE(Desc, D); + return; + } + MDNodeToDieMap.insert(std::make_pair(Desc, D)); +} + /// addFlag - Add a flag that is true. -void CompileUnit::addFlag(DIE *Die, unsigned Attribute) { - if (!DD->useDarwinGDBCompat()) - Die->addValue(Attribute, dwarf::DW_FORM_flag_present, - DIEIntegerOne); +void CompileUnit::addFlag(DIE *Die, dwarf::Attribute Attribute) { + if (DD->getDwarfVersion() >= 4) + Die->addValue(Attribute, dwarf::DW_FORM_flag_present, DIEIntegerOne); else - addUInt(Die, Attribute, dwarf::DW_FORM_flag, 1); + Die->addValue(Attribute, dwarf::DW_FORM_flag, DIEIntegerOne); } /// addUInt - Add an unsigned integer attribute data and value. /// -void CompileUnit::addUInt(DIE *Die, unsigned Attribute, - unsigned Form, uint64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(false, Integer); - DIEValue *Value = Integer == 1 ? - DIEIntegerOne : new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); +void CompileUnit::addUInt(DIE *Die, dwarf::Attribute Attribute, + Optional Form, uint64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(false, Integer); + DIEValue *Value = Integer == 1 ? DIEIntegerOne : new (DIEValueAllocator) + DIEInteger(Integer); + Die->addValue(Attribute, *Form, Value); +} + +void CompileUnit::addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer) { + addUInt(Block, (dwarf::Attribute)0, Form, Integer); } /// addSInt - Add an signed integer attribute data and value. /// -void CompileUnit::addSInt(DIE *Die, unsigned Attribute, - unsigned Form, int64_t Integer) { - if (!Form) Form = DIEInteger::BestForm(true, Integer); +void CompileUnit::addSInt(DIE *Die, dwarf::Attribute Attribute, + Optional Form, int64_t Integer) { + if (!Form) + Form = DIEInteger::BestForm(true, Integer); DIEValue *Value = new (DIEValueAllocator) DIEInteger(Integer); - Die->addValue(Attribute, Form, Value); + Die->addValue(Attribute, *Form, Value); +} + +void CompileUnit::addSInt(DIEBlock *Die, Optional Form, + int64_t Integer) { + addSInt(Die, (dwarf::Attribute)0, Form, Integer); } /// addString - Add a string attribute data and value. We always emit a @@ -129,27 +170,31 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute, /// more predictable sizes. In the case of split dwarf we emit an index /// into another table which gets us the static offset into the string /// table. -void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { +void CompileUnit::addString(DIE *Die, dwarf::Attribute Attribute, + StringRef String) { + DIEValue *Value; + dwarf::Form Form; if (!DD->useSplitDwarf()) { MCSymbol *Symb = DU->getStringPoolEntry(String); - DIEValue *Value; if (Asm->needsRelocationsForDwarfStringPool()) Value = new (DIEValueAllocator) DIELabel(Symb); else { MCSymbol *StringPool = DU->getStringPoolSym(); Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); } - Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); + Form = dwarf::DW_FORM_strp; } else { unsigned idx = DU->getStringPoolIndex(String); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Value); + Value = new (DIEValueAllocator) DIEInteger(idx); + Form = dwarf::DW_FORM_GNU_str_index; } + DIEValue *Str = new (DIEValueAllocator) DIEString(Value, String); + Die->addValue(Attribute, Form, Str); } /// addLocalString - Add a string attribute data and value. This is guaranteed /// to be in the local string pool instead of indirected. -void CompileUnit::addLocalString(DIE *Die, unsigned Attribute, +void CompileUnit::addLocalString(DIE *Die, dwarf::Attribute Attribute, StringRef String) { MCSymbol *Symb = DU->getStringPoolEntry(String); DIEValue *Value; @@ -162,19 +207,54 @@ void CompileUnit::addLocalString(DIE *Die, unsigned Attribute, Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); } +/// addExpr - Add a Dwarf expression attribute data and value. +/// +void CompileUnit::addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr) { + DIEValue *Value = new (DIEValueAllocator) DIEExpr(Expr); + Die->addValue((dwarf::Attribute)0, Form, Value); +} + /// addLabel - Add a Dwarf label attribute data and value. /// -void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Label) { +void CompileUnit::addLabel(DIE *Die, dwarf::Attribute Attribute, + dwarf::Form Form, const MCSymbol *Label) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); Die->addValue(Attribute, Form, Value); } +void CompileUnit::addLabel(DIEBlock *Die, dwarf::Form Form, + const MCSymbol *Label) { + addLabel(Die, (dwarf::Attribute)0, Form, Label); +} + +/// addSectionLabel - Add a Dwarf section label attribute data and value. +/// +void CompileUnit::addSectionLabel(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Label) { + if (DD->getDwarfVersion() >= 4) + addLabel(Die, Attribute, dwarf::DW_FORM_sec_offset, Label); + else + addLabel(Die, Attribute, dwarf::DW_FORM_data4, Label); +} + +/// addSectionOffset - Add an offset into a section attribute data and value. +/// +void CompileUnit::addSectionOffset(DIE *Die, dwarf::Attribute Attribute, + uint64_t Integer) { + if (DD->getDwarfVersion() >= 4) + addUInt(Die, Attribute, dwarf::DW_FORM_sec_offset, Integer); + else + addUInt(Die, Attribute, dwarf::DW_FORM_data4, Integer); +} + /// addLabelAddress - Add a dwarf label attribute data and value using /// DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, +void CompileUnit::addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label) { + if (Label) + DD->addArangeLabel(SymbolCU(this, Label)); + if (!DD->useSplitDwarf()) { if (Label != NULL) { DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); @@ -193,37 +273,62 @@ void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// -void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) { - +void CompileUnit::addOpAddress(DIEBlock *Die, const MCSymbol *Sym) { + DD->addArangeLabel(SymbolCU(this, Sym)); if (!DD->useSplitDwarf()) { - addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Die, 0, dwarf::DW_FORM_udata, Sym); + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, dwarf::DW_FORM_udata, Sym); } else { - unsigned idx = DU->getAddrPoolIndex(Sym); - DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); - addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); - Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value); + addUInt(Die, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + addUInt(Die, dwarf::DW_FORM_GNU_addr_index, DU->getAddrPoolIndex(Sym)); } } -/// addDelta - Add a label delta attribute data and value. +/// addSectionDelta - Add a section label delta attribute data and value. /// -void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo) { +void CompileUnit::addSectionDelta(DIE *Die, dwarf::Attribute Attribute, + const MCSymbol *Hi, const MCSymbol *Lo) { DIEValue *Value = new (DIEValueAllocator) DIEDelta(Hi, Lo); - Die->addValue(Attribute, Form, Value); + if (DD->getDwarfVersion() >= 4) + Die->addValue(Attribute, dwarf::DW_FORM_sec_offset, Value); + else + Die->addValue(Attribute, dwarf::DW_FORM_data4, Value); } /// addDIEEntry - Add a DIE attribute data and value. /// -void CompileUnit::addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, +void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry) { - Die->addValue(Attribute, Form, createDIEEntry(Entry)); + addDIEEntry(Die, Attribute, createDIEEntry(Entry)); +} + +void CompileUnit::addDIEEntry(DIE *Die, dwarf::Attribute Attribute, + DIEEntry *Entry) { + const DIE *DieCU = Die->getCompileUnitOrNull(); + const DIE *EntryCU = Entry->getEntry()->getCompileUnitOrNull(); + if (!DieCU) + // We assume that Die belongs to this CU, if it is not linked to any CU yet. + DieCU = getCUDie(); + if (!EntryCU) + EntryCU = getCUDie(); + Die->addValue(Attribute, EntryCU == DieCU ? dwarf::DW_FORM_ref4 + : dwarf::DW_FORM_ref_addr, + Entry); +} + +/// Create a DIE with the given Tag, add the DIE to its parent, and +/// call insertDIE if MD is not null. +DIE *CompileUnit::createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N) { + DIE *Die = new DIE(Tag); + Parent.addChild(Die); + if (N) + insertDIE(N, Die); + return Die; } /// addBlock - Add block data. /// -void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, +void CompileUnit::addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block) { Block->ComputeSize(Asm); DIEBlocks.push_back(Block); // Memoize so we can call the destructor later on. @@ -234,42 +339,42 @@ void CompileUnit::addBlock(DIE *Die, unsigned Attribute, unsigned Form, /// entry. void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { // Verify variable. - if (!V.Verify()) + if (!V.isVariable()) return; unsigned Line = V.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(), - V.getContext().getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(V.getContext().getFilename(), + V.getContext().getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { // Verify global variable. - if (!G.Verify()) + if (!G.isGlobalVariable()) return; unsigned Line = G.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { // Verify subprogram. - if (!SP.Verify()) + if (!SP.isSubprogram()) return; // If the line number is 0, don't add it. @@ -277,35 +382,35 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), - SP.getDirectory(), getUniqueID()); + unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), SP.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { // Verify type. - if (!Ty.Verify()) + if (!Ty.isType()) return; unsigned Line = Ty.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), - Ty.getDirectory(), getUniqueID()); + unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), Ty.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information /// entry. void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { // Verify type. - if (!Ty.Verify()) + if (!Ty.isObjCProperty()) return; unsigned Line = Ty.getLineNumber(); @@ -315,8 +420,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { unsigned FileID = DD->getOrCreateSourceID(File.getFilename(), File.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addSourceLine - Add location information to specified debug information @@ -331,68 +436,73 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { return; StringRef FN = NS.getFilename(); - unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(), - getUniqueID()); + unsigned FileID = + DD->getOrCreateSourceID(FN, NS.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); - addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); - addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); + addUInt(Die, dwarf::DW_AT_decl_file, None, FileID); + addUInt(Die, dwarf::DW_AT_decl_line, None, Line); } /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. -void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, +void CompileUnit::addVariableAddress(const DbgVariable &DV, DIE *Die, MachineLocation Location) { - if (DV->variableHasComplexAddress()) + if (DV.variableHasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); - else if (DV->isBlockByrefVariable()) + else if (DV.isBlockByrefVariable()) addBlockByrefAddress(DV, Die, dwarf::DW_AT_location, Location); else - addAddress(Die, dwarf::DW_AT_location, Location); + addAddress(Die, dwarf::DW_AT_location, Location, + DV.getVariable().isIndirect()); } /// addRegisterOp - Add register operand. -void CompileUnit::addRegisterOp(DIE *TheDie, unsigned Reg) { +void CompileUnit::addRegisterOp(DIEBlock *TheDie, unsigned Reg) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); if (DWReg < 32) - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_reg0 + DWReg); else { - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); - addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_regx); + addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); } } /// addRegisterOffset - Add register offset. -void CompileUnit::addRegisterOffset(DIE *TheDie, unsigned Reg, +void CompileUnit::addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset) { const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); unsigned DWReg = RI->getDwarfRegNum(Reg, false); const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); if (Reg == TRI->getFrameRegister(*Asm->MF)) // If variable offset is based in frame register then use fbreg. - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_fbreg); else if (DWReg < 32) - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_breg0 + DWReg); else { - addUInt(TheDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); - addUInt(TheDie, 0, dwarf::DW_FORM_udata, DWReg); + addUInt(TheDie, dwarf::DW_FORM_data1, dwarf::DW_OP_bregx); + addUInt(TheDie, dwarf::DW_FORM_udata, DWReg); } - addSInt(TheDie, 0, dwarf::DW_FORM_sdata, Offset); + addSInt(TheDie, dwarf::DW_FORM_sdata, Offset); } /// addAddress - Add an address attribute to a die based on the location /// provided. -void CompileUnit::addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location) { +void CompileUnit::addAddress(DIE *Die, dwarf::Attribute Attribute, + const MachineLocation &Location, bool Indirect) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - if (Location.isReg()) + if (Location.isReg() && !Indirect) addRegisterOp(Block, Location.getReg()); - else + else { addRegisterOffset(Block, Location.getReg(), Location.getOffset()); + if (Indirect && !Location.isReg()) { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } + } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /// addComplexAddress - Start with the address based on the location provided, @@ -400,37 +510,37 @@ void CompileUnit::addAddress(DIE *Die, unsigned Attribute, /// given the extra address information encoded in the DIVariable, starting from /// the starting location. Add the DWARF information to the die. /// -void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, +void CompileUnit::addComplexAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, const MachineLocation &Location) { DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - unsigned N = DV->getNumAddrElements(); + unsigned N = DV.getNumAddrElements(); unsigned i = 0; if (Location.isReg()) { - if (N >= 2 && DV->getAddrElement(0) == DIBuilder::OpPlus) { + if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - addRegisterOffset(Block, Location.getReg(), DV->getAddrElement(1)); + addRegisterOffset(Block, Location.getReg(), DV.getAddrElement(1)); i = 2; } else addRegisterOp(Block, Location.getReg()); - } - else + } else addRegisterOffset(Block, Location.getReg(), Location.getOffset()); - for (;i < N; ++i) { - uint64_t Element = DV->getAddrElement(i); + for (; i < N; ++i) { + uint64_t Element = DV.getAddrElement(i); if (Element == DIBuilder::OpPlus) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, DV->getAddrElement(++i)); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { if (!Location.isReg()) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - } else llvm_unreachable("unknown DIBuilder Opcode"); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + } else + llvm_unreachable("unknown DIBuilder Opcode"); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /* Byref variables, in Blocks, are declared by the programmer as "SomeType @@ -493,45 +603,42 @@ void CompileUnit::addComplexAddress(DbgVariable *&DV, DIE *Die, /// starting location. Add the DWARF information to the die. For /// more information, read large comment just above here. /// -void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, - unsigned Attribute, +void CompileUnit::addBlockByrefAddress(const DbgVariable &DV, DIE *Die, + dwarf::Attribute Attribute, const MachineLocation &Location) { - DIType Ty = DV->getType(); + DIType Ty = DV.getType(); DIType TmpTy = Ty; - unsigned Tag = Ty.getTag(); + uint16_t Tag = Ty.getTag(); bool isPointer = false; - StringRef varName = DV->getName(); + StringRef varName = DV.getName(); if (Tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - TmpTy = DTy.getTypeDerivedFrom(); + DIDerivedType DTy(Ty); + TmpTy = resolve(DTy.getTypeDerivedFrom()); isPointer = true; } - DICompositeType blockStruct = DICompositeType(TmpTy); + DICompositeType blockStruct(TmpTy); // Find the __forwarding field and the variable field in the __Block_byref // struct. DIArray Fields = blockStruct.getTypeArray(); - DIDescriptor varField = DIDescriptor(); - DIDescriptor forwardingField = DIDescriptor(); + DIDerivedType varField; + DIDerivedType forwardingField; for (unsigned i = 0, N = Fields.getNumElements(); i < N; ++i) { - DIDescriptor Element = Fields.getElement(i); - DIDerivedType DT = DIDerivedType(Element); + DIDerivedType DT(Fields.getElement(i)); StringRef fieldName = DT.getName(); if (fieldName == "__forwarding") - forwardingField = Element; + forwardingField = DT; else if (fieldName == varName) - varField = Element; + varField = DT; } // Get the offsets for the forwarding field and the variable field. - unsigned forwardingFieldOffset = - DIDerivedType(forwardingField).getOffsetInBits() >> 3; - unsigned varFieldOffset = - DIDerivedType(varField).getOffsetInBits() >> 3; + unsigned forwardingFieldOffset = forwardingField.getOffsetInBits() >> 3; + unsigned varFieldOffset = varField.getOffsetInBits() >> 2; // Decode the original location, and use that as the start of the byref // variable's location. @@ -545,76 +652,139 @@ void CompileUnit::addBlockByrefAddress(DbgVariable *&DV, DIE *Die, // If we started with a pointer to the __Block_byref... struct, then // the first thing we need to do is dereference the pointer (DW_OP_deref). if (isPointer) - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Next add the offset for the '__forwarding' field: // DW_OP_plus_uconst ForwardingFieldOffset. Note there's no point in // adding the offset if it's 0. if (forwardingFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, forwardingFieldOffset); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, forwardingFieldOffset); } // Now dereference the __forwarding field to get to the real __Block_byref // struct: DW_OP_deref. - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); // Now that we've got the real __Block_byref... struct, add the offset // for the variable's field to get to the location of the actual variable: // DW_OP_plus_uconst varFieldOffset. Again, don't add if it's 0. if (varFieldOffset > 0) { - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - addUInt(Block, 0, dwarf::DW_FORM_udata, varFieldOffset); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(Block, dwarf::DW_FORM_udata, varFieldOffset); } // Now attach the location information to the DIE. - addBlock(Die, Attribute, 0, Block); + addBlock(Die, Attribute, Block); } /// isTypeSigned - Return true if the type is signed. -static bool isTypeSigned(DIType Ty, int *SizeInBits) { +static bool isTypeSigned(DwarfDebug *DD, DIType Ty, int *SizeInBits) { if (Ty.isDerivedType()) - return isTypeSigned(DIDerivedType(Ty).getTypeDerivedFrom(), SizeInBits); + return isTypeSigned(DD, DD->resolve(DIDerivedType(Ty).getTypeDerivedFrom()), + SizeInBits); if (Ty.isBasicType()) - if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed - || DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { + if (DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed || + DIBasicType(Ty).getEncoding() == dwarf::DW_ATE_signed_char) { *SizeInBits = Ty.getSizeInBits(); return true; } return false; } +/// Return true if type encoding is unsigned. +static bool isUnsignedDIType(DwarfDebug *DD, DIType Ty) { + DIDerivedType DTy(Ty); + if (DTy.isDerivedType()) + return isUnsignedDIType(DD, DD->resolve(DTy.getTypeDerivedFrom())); + + DIBasicType BTy(Ty); + if (BTy.isBasicType()) { + unsigned Encoding = BTy.getEncoding(); + if (Encoding == dwarf::DW_ATE_unsigned || + Encoding == dwarf::DW_ATE_unsigned_char || + Encoding == dwarf::DW_ATE_boolean) + return true; + } + return false; +} + +/// If this type is derived from a base type then return base type size. +static uint64_t getBaseTypeSize(DwarfDebug *DD, DIDerivedType Ty) { + unsigned Tag = Ty.getTag(); + + if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && + Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && + Tag != dwarf::DW_TAG_restrict_type) + return Ty.getSizeInBits(); + + DIType BaseType = DD->resolve(Ty.getTypeDerivedFrom()); + + // If this type is not derived from any type then take conservative approach. + if (!BaseType.isValid()) + return Ty.getSizeInBits(); + + // If this is a derived type, go ahead and get the base type, unless it's a + // reference then it's just the size of the field. Pointer types have no need + // of this since they're a different type of qualification on the type. + if (BaseType.getTag() == dwarf::DW_TAG_reference_type || + BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type) + return Ty.getSizeInBits(); + + if (BaseType.isDerivedType()) + return getBaseTypeSize(DD, DIDerivedType(BaseType)); + + return BaseType.getSizeInBits(); +} + /// addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, +void CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty) { + // FIXME: This is a bit conservative/simple - it emits negative values at + // their maximum bit width which is a bit unfortunate (& doesn't prefer + // udata/sdata over dataN as suggested by the DWARF spec) assert(MO.isImm() && "Invalid machine operand!"); - DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); int SizeInBits = -1; - bool SignedConstant = isTypeSigned(Ty, &SizeInBits); - unsigned Form = SignedConstant ? dwarf::DW_FORM_sdata : dwarf::DW_FORM_udata; - switch (SizeInBits) { - case 8: Form = dwarf::DW_FORM_data1; break; - case 16: Form = dwarf::DW_FORM_data2; break; - case 32: Form = dwarf::DW_FORM_data4; break; - case 64: Form = dwarf::DW_FORM_data8; break; - default: break; + bool SignedConstant = isTypeSigned(DD, Ty, &SizeInBits); + dwarf::Form Form; + + // If we're a signed constant definitely use sdata. + if (SignedConstant) { + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, MO.getImm()); + return; } - SignedConstant ? addSInt(Block, 0, Form, MO.getImm()) - : addUInt(Block, 0, Form, MO.getImm()); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; + // Else use data for now unless it's larger than we can deal with. + switch (SizeInBits) { + case 8: + Form = dwarf::DW_FORM_data1; + break; + case 16: + Form = dwarf::DW_FORM_data2; + break; + case 32: + Form = dwarf::DW_FORM_data4; + break; + case 64: + Form = dwarf::DW_FORM_data8; + break; + default: + Form = dwarf::DW_FORM_udata; + addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); + return; + } + addUInt(Die, dwarf::DW_AT_const_value, Form, MO.getImm()); } /// addConstantFPValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { - assert (MO.isFPImm() && "Invalid machine operand!"); +void CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { + assert(MO.isFPImm() && "Invalid machine operand!"); DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); APFloat FPImm = MO.getFPImm()->getValueAPF(); // Get the raw data form of the floating point. const APInt FltVal = FPImm.bitcastToAPInt(); - const char *FltPtr = (const char*)FltVal.getRawData(); + const char *FltPtr = (const char *)FltVal.getRawData(); int NumBytes = FltVal.getBitWidth() / 8; // 8 bits per byte. bool LittleEndian = Asm->getDataLayout().isLittleEndian(); @@ -624,43 +794,56 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { // Output the constant to DWARF one byte at a time. for (; Start != Stop; Start += Incr) - addUInt(Block, 0, dwarf::DW_FORM_data1, - (unsigned char)0xFF & FltPtr[Start]); + addUInt(Block, dwarf::DW_FORM_data1, (unsigned char)0xFF & FltPtr[Start]); - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; + addBlock(Die, dwarf::DW_AT_const_value, Block); } /// addConstantFPValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { - return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false); +void CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { + // Pass this down to addConstantValue as an unsigned bag of bits. + addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), true); } /// addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, +void CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned) { - return addConstantValue(Die, CI->getValue(), Unsigned); + addConstantValue(Die, CI->getValue(), Unsigned); } // addConstantValue - Add constant value entry in variable DIE. -bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, - bool Unsigned) { +void CompileUnit::addConstantValue(DIE *Die, const APInt &Val, bool Unsigned) { unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { - unsigned form = 0; + // If we're a signed constant definitely use sdata. + if (!Unsigned) { + addSInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, + Val.getSExtValue()); + return; + } + + // Else use data for now unless it's larger than we can deal with. + dwarf::Form Form; switch (CIBitWidth) { - case 8: form = dwarf::DW_FORM_data1; break; - case 16: form = dwarf::DW_FORM_data2; break; - case 32: form = dwarf::DW_FORM_data4; break; - case 64: form = dwarf::DW_FORM_data8; break; + case 8: + Form = dwarf::DW_FORM_data1; + break; + case 16: + Form = dwarf::DW_FORM_data2; + break; + case 32: + Form = dwarf::DW_FORM_data4; + break; + case 64: + Form = dwarf::DW_FORM_data8; + break; default: - form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata; + addUInt(Die, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + Val.getZExtValue()); + return; } - if (Unsigned) - addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue()); - else - addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue()); - return true; + addUInt(Die, dwarf::DW_AT_const_value, Form, Val.getZExtValue()); + return; } DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); @@ -678,11 +861,10 @@ bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, c = Ptr64[i / 8] >> (8 * (i & 7)); else c = Ptr64[(NumBytes - 1 - i) / 8] >> (8 * ((NumBytes - 1 - i) & 7)); - addUInt(Block, 0, dwarf::DW_FORM_data1, c); + addUInt(Block, dwarf::DW_FORM_data1, c); } - addBlock(Die, dwarf::DW_AT_const_value, 0, Block); - return true; + addBlock(Die, dwarf::DW_AT_const_value, Block); } /// addTemplateParams - Add template parameters into buffer. @@ -691,47 +873,48 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { for (unsigned i = 0, e = TParams.getNumElements(); i != e; ++i) { DIDescriptor Element = TParams.getElement(i); if (Element.isTemplateTypeParameter()) - Buffer.addChild(getOrCreateTemplateTypeParameterDIE( - DITemplateTypeParameter(Element))); + constructTemplateTypeParameterDIE(Buffer, + DITemplateTypeParameter(Element)); else if (Element.isTemplateValueParameter()) - Buffer.addChild(getOrCreateTemplateValueParameterDIE( - DITemplateValueParameter(Element))); + constructTemplateValueParameterDIE(Buffer, + DITemplateValueParameter(Element)); } } /// getOrCreateContextDIE - Get context owner's DIE. -DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) { +DIE *CompileUnit::getOrCreateContextDIE(DIScope Context) { + if (!Context || Context.isFile()) + return getCUDie(); if (Context.isType()) return getOrCreateTypeDIE(DIType(Context)); - else if (Context.isNameSpace()) + if (Context.isNameSpace()) return getOrCreateNameSpace(DINameSpace(Context)); - else if (Context.isSubprogram()) + if (Context.isSubprogram()) return getOrCreateSubprogramDIE(DISubprogram(Context)); - else - return getDIE(Context); -} - -/// addToContextOwner - Add Die into the list of its context owner's children. -void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { - if (DIE *ContextDIE = getOrCreateContextDIE(Context)) - ContextDIE->addChild(Die); - else - addDie(Die); + return getDIE(Context); } /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the /// given DIType. DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { - DIType Ty(TyNode); - if (!Ty.Verify()) + if (!TyNode) return NULL; + + DIType Ty(TyNode); + assert(Ty.isType()); + + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(resolve(Ty.getContext())); + assert(ContextDIE); + DIE *TyDIE = getDIE(Ty); if (TyDIE) return TyDIE; // Create new type. - TyDIE = new DIE(dwarf::DW_TAG_base_type); - insertDIE(Ty, TyDIE); + TyDIE = createAndAddDIE(Ty.getTag(), *ContextDIE, Ty); + if (Ty.isBasicType()) constructTypeDIE(*TyDIE, DIBasicType(Ty)); else if (Ty.isCompositeType()) @@ -748,28 +931,24 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { DICompositeType CT(Ty); // A runtime language of 0 actually means C/C++ and that any // non-negative value is some version of Objective-C/C++. - IsImplementation = (CT.getRunTimeLang() == 0) || - CT.isObjcClassComplete(); + IsImplementation = (CT.getRunTimeLang() == 0) || CT.isObjcClassComplete(); } - unsigned Flags = IsImplementation ? - DwarfAccelTable::eTypeFlagClassIsImplementation : 0; + unsigned Flags = IsImplementation ? dwarf::DW_FLAG_type_implementation : 0; addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); } - addToContextOwner(TyDIE, Ty.getContext()); return TyDIE; } /// addType - Add a new type attribute to the specified entity. -void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { - if (!Ty.Verify()) - return; +void CompileUnit::addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute) { + assert(Ty && "Trying to add a type that doesn't exist?"); // Check for pre-existence. DIEEntry *Entry = getDIEEntry(Ty); // If it exists then use the existing value. if (Entry) { - Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); + addDIEEntry(Entity, Attribute, Entry); return; } @@ -779,35 +958,112 @@ void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { // Set up proxy. Entry = createDIEEntry(Buffer); insertDIEEntry(Ty, Entry); - Entity->addValue(Attribute, dwarf::DW_FORM_ref4, Entry); + addDIEEntry(Entity, Attribute, Entry); // If this is a complete composite type then include it in the // list of global types. addGlobalType(Ty); } +// Accelerator table mutators - add each name along with its companion +// DIE to the proper table while ensuring that the name that we're going +// to reference is in the string table. We do this since the names we +// add may not only be identical to the names in the DIE. +void CompileUnit::addAccelName(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector &DIEs = AccelNames[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelObjC(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector &DIEs = AccelObjC[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelNamespace(StringRef Name, DIE *Die) { + DU->getStringPoolEntry(Name); + std::vector &DIEs = AccelNamespace[Name]; + DIEs.push_back(Die); +} + +void CompileUnit::addAccelType(StringRef Name, std::pair Die) { + DU->getStringPoolEntry(Name); + std::vector > &DIEs = AccelTypes[Name]; + DIEs.push_back(Die); +} + +/// addGlobalName - Add a new global name to the compile unit. +void CompileUnit::addGlobalName(StringRef Name, DIE *Die, DIScope Context) { + std::string FullName = getParentContextString(Context) + Name.str(); + GlobalNames[FullName] = Die; +} + /// addGlobalType - Add a new global type to the compile unit. /// void CompileUnit::addGlobalType(DIType Ty) { - DIDescriptor Context = Ty.getContext(); - if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() - && (!Context || Context.isCompileUnit() || Context.isFile() - || Context.isNameSpace())) - if (DIEEntry *Entry = getDIEEntry(Ty)) - GlobalTypes[Ty.getName()] = Entry->getEntry(); + DIScope Context = resolve(Ty.getContext()); + if (!Ty.getName().empty() && !Ty.isForwardDecl() && + (!Context || Context.isCompileUnit() || Context.isFile() || + Context.isNameSpace())) + if (DIEEntry *Entry = getDIEEntry(Ty)) { + std::string FullName = + getParentContextString(Context) + Ty.getName().str(); + GlobalTypes[FullName] = Entry->getEntry(); + } +} + +/// getParentContextString - Walks the metadata parent chain in a language +/// specific manner (using the compile unit language) and returns +/// it as a string. This is done at the metadata level because DIEs may +/// not currently have been added to the parent context and walking the +/// DIEs looking for names is more expensive than walking the metadata. +std::string CompileUnit::getParentContextString(DIScope Context) const { + if (!Context) + return ""; + + // FIXME: Decide whether to implement this for non-C++ languages. + if (getLanguage() != dwarf::DW_LANG_C_plus_plus) + return ""; + + std::string CS; + SmallVector Parents; + while (!Context.isCompileUnit()) { + Parents.push_back(Context); + if (Context.getContext()) + Context = resolve(Context.getContext()); + else + // Structure, etc types will have a NULL context if they're at the top + // level. + break; + } + + // Reverse iterate over our list to go from the outermost construct to the + // innermost. + for (SmallVectorImpl::reverse_iterator I = Parents.rbegin(), + E = Parents.rend(); + I != E; ++I) { + DIScope Ctx = *I; + StringRef Name = Ctx.getName(); + if (!Name.empty()) { + CS += Name; + CS += "::"; + } + } + return CS; } -/// addPubTypes - Add type for pubtypes section. +/// addPubTypes - Add subprogram argument types for pubtypes section. void CompileUnit::addPubTypes(DISubprogram SP) { DICompositeType SPTy = SP.getType(); - unsigned SPTag = SPTy.getTag(); + uint16_t SPTag = SPTy.getTag(); if (SPTag != dwarf::DW_TAG_subroutine_type) return; DIArray Args = SPTy.getTypeArray(); for (unsigned i = 0, e = Args.getNumElements(); i != e; ++i) { DIType ATy(Args.getElement(i)); - if (!ATy.Verify()) + if (!ATy.isType()) continue; addGlobalType(ATy); } @@ -821,18 +1077,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIBasicType BTy) { if (!Name.empty()) addString(&Buffer, dwarf::DW_AT_name, Name); - if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) { - Buffer.setTag(dwarf::DW_TAG_unspecified_type); - // Unspecified types has only name, nothing else. + // An unspecified type only has a name attribute. + if (BTy.getTag() == dwarf::DW_TAG_unspecified_type) return; - } - Buffer.setTag(dwarf::DW_TAG_base_type); addUInt(&Buffer, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, BTy.getEncoding()); uint64_t Size = BTy.getSizeInBits() >> 3; - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); } /// constructTypeDIE - Construct derived type die from DIDerivedType. @@ -840,16 +1093,12 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Get core information. StringRef Name = DTy.getName(); uint64_t Size = DTy.getSizeInBits() >> 3; - unsigned Tag = DTy.getTag(); - - // FIXME - Workaround for templates. - if (Tag == dwarf::DW_TAG_inheritance) Tag = dwarf::DW_TAG_reference_type; - - Buffer.setTag(Tag); + uint16_t Tag = Buffer.getTag(); // Map to main type, void will not have a type. - DIType FromTy = DTy.getTypeDerivedFrom(); - addType(&Buffer, FromTy); + DIType FromTy = resolve(DTy.getTypeDerivedFrom()); + if (FromTy) + addType(&Buffer, FromTy); // Add name if not anonymous or intermediate type. if (!Name.empty()) @@ -857,97 +1106,102 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { // Add size if non-zero (derived types might be zero-sized.) if (Size && Tag != dwarf::DW_TAG_pointer_type) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); if (Tag == dwarf::DW_TAG_ptr_to_member_type) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DTy.getClassType())); + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, + getOrCreateTypeDIE(resolve(DTy.getClassType()))); // Add source line info if available and TyDesc is not a forward declaration. if (!DTy.isForwardDecl()) addSourceLine(&Buffer, DTy); } +/// Return true if the type is appropriately scoped to be contained inside +/// its own type unit. +static bool isTypeUnitScoped(DIType Ty, const DwarfDebug *DD) { + DIScope Parent = DD->resolve(Ty.getContext()); + while (Parent) { + // Don't generate a hash for anything scoped inside a function. + if (Parent.isSubprogram()) + return false; + Parent = DD->resolve(Parent.getContext()); + } + return true; +} + +/// Return true if the type should be split out into a type unit. +static bool shouldCreateTypeUnit(DICompositeType CTy, const DwarfDebug *DD) { + uint16_t Tag = CTy.getTag(); + + switch (Tag) { + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + case dwarf::DW_TAG_class_type: + // If this is a class, structure, union, or enumeration type + // that is a definition (not a declaration), and not scoped + // inside a function then separate this out as a type unit. + return !CTy.isForwardDecl() && isTypeUnitScoped(CTy, DD); + default: + return false; + } +} + /// constructTypeDIE - Construct type DIE from DICompositeType. void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Get core information. StringRef Name = CTy.getName(); uint64_t Size = CTy.getSizeInBits() >> 3; - unsigned Tag = CTy.getTag(); - Buffer.setTag(Tag); + uint16_t Tag = Buffer.getTag(); switch (Tag) { case dwarf::DW_TAG_array_type: - constructArrayTypeDIE(Buffer, &CTy); + constructArrayTypeDIE(Buffer, CTy); break; - case dwarf::DW_TAG_enumeration_type: { - DIArray Elements = CTy.getTypeArray(); - - // Add enumerators to enumeration type. - for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIE *ElemDie = NULL; - DIDescriptor Enum(Elements.getElement(i)); - if (Enum.isEnumerator()) { - ElemDie = constructEnumTypeDIE(DIEnumerator(Enum)); - Buffer.addChild(ElemDie); - } - } - DIType DTy = CTy.getTypeDerivedFrom(); - if (DTy.Verify()) { - addType(&Buffer, DTy); - addUInt(&Buffer, dwarf::DW_AT_enum_class, dwarf::DW_FORM_flag, 1); - } - } + case dwarf::DW_TAG_enumeration_type: + constructEnumTypeDIE(Buffer, CTy); break; case dwarf::DW_TAG_subroutine_type: { - // Add return type. + // Add return type. A void return won't have a type. DIArray Elements = CTy.getTypeArray(); - DIDescriptor RTy = Elements.getElement(0); - addType(&Buffer, DIType(RTy)); + DIType RTy(Elements.getElement(0)); + if (RTy) + addType(&Buffer, RTy); bool isPrototyped = true; // Add arguments. for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Ty = Elements.getElement(i); if (Ty.isUnspecifiedParameter()) { - DIE *Arg = new DIE(dwarf::DW_TAG_unspecified_parameters); - Buffer.addChild(Arg); + createAndAddDIE(dwarf::DW_TAG_unspecified_parameters, Buffer); isPrototyped = false; } else { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); + DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, Buffer); addType(Arg, DIType(Ty)); if (DIType(Ty).isArtificial()) addFlag(Arg, dwarf::DW_AT_artificial); - Buffer.addChild(Arg); } } // Add prototype flag if we're dealing with a C language and the // function has been prototyped. + uint16_t Language = getLanguage(); if (isPrototyped && - (Language == dwarf::DW_LANG_C89 || - Language == dwarf::DW_LANG_C99 || + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) addFlag(&Buffer, dwarf::DW_AT_prototyped); - } - break; + } break; case dwarf::DW_TAG_structure_type: case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_class_type: { // Add elements to structure type. DIArray Elements = CTy.getTypeArray(); - - // A forward struct declared type may not have elements available. - unsigned N = Elements.getNumElements(); - if (N == 0) - break; - - // Add elements to structure type. - for (unsigned i = 0; i < N; ++i) { + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); DIE *ElemDie = NULL; if (Element.isSubprogram()) { DISubprogram SP(Element); - ElemDie = getOrCreateSubprogramDIE(DISubprogram(Element)); + ElemDie = getOrCreateSubprogramDIE(SP); if (SP.isProtected()) addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_protected); @@ -956,21 +1210,23 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { dwarf::DW_ACCESS_private); else addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, - dwarf::DW_ACCESS_public); + dwarf::DW_ACCESS_public); if (SP.isExplicit()) addFlag(ElemDie, dwarf::DW_AT_explicit); } else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); if (DDTy.getTag() == dwarf::DW_TAG_friend) { - ElemDie = new DIE(dwarf::DW_TAG_friend); - addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); - } else if (DDTy.isStaticMember()) - ElemDie = createStaticMemberDIE(DDTy); - else - ElemDie = createMemberDIE(DDTy); + ElemDie = createAndAddDIE(dwarf::DW_TAG_friend, Buffer); + addType(ElemDie, resolve(DDTy.getTypeDerivedFrom()), + dwarf::DW_AT_friend); + } else if (DDTy.isStaticMember()) { + getOrCreateStaticMemberDIE(DDTy); + } else { + constructMemberDIE(Buffer, DDTy); + } } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); - ElemDie = new DIE(Property.getTag()); + ElemDie = createAndAddDIE(Property.getTag(), Buffer); StringRef PropertyName = Property.getObjCPropertyName(); addString(ElemDie, dwarf::DW_AT_APPLE_property_name, PropertyName); addType(ElemDie, Property.getType()); @@ -995,8 +1251,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (Property.isNonAtomicObjCProperty()) PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; if (PropertyAttributes) - addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, - PropertyAttributes); + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, None, + PropertyAttributes); DIEEntry *Entry = getDIEEntry(Element); if (!Entry) { @@ -1005,20 +1261,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } } else continue; - Buffer.addChild(ElemDie); } if (CTy.isAppleBlockExtension()) addFlag(&Buffer, dwarf::DW_AT_APPLE_block); - DICompositeType ContainingType = CTy.getContainingType(); - if (DIDescriptor(ContainingType).isCompositeType()) - addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, - getOrCreateTypeDIE(DIType(ContainingType))); - else { - DIDescriptor Context = CTy.getContext(); - addToContextOwner(&Buffer, Context); - } + DICompositeType ContainingType(resolve(CTy.getContainingType())); + if (ContainingType) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, + getOrCreateTypeDIE(ContainingType)); if (CTy.isObjcClassComplete()) addFlag(&Buffer, dwarf::DW_AT_APPLE_objc_complete_type); @@ -1026,8 +1277,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // Add template parameters to a class, structure or union types. // FIXME: The support isn't in the metadata for this yet. if (Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || - Tag == dwarf::DW_TAG_union_type) + Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) addTemplateParams(Buffer, CTy.getTemplateParams()); break; @@ -1041,16 +1291,15 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { addString(&Buffer, dwarf::DW_AT_name, Name); if (Tag == dwarf::DW_TAG_enumeration_type || - Tag == dwarf::DW_TAG_class_type || - Tag == dwarf::DW_TAG_structure_type || + Tag == dwarf::DW_TAG_class_type || Tag == dwarf::DW_TAG_structure_type || Tag == dwarf::DW_TAG_union_type) { // Add size if non-zero (derived types might be zero-sized.) // TODO: Do we care about size for enum forward declarations? if (Size) - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, Size); else if (!CTy.isForwardDecl()) // Add zero size if it is not a forward declaration. - addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, 0); + addUInt(&Buffer, dwarf::DW_AT_byte_size, None, 0); // If we're a forward decl, say so. if (CTy.isForwardDecl()) @@ -1063,117 +1312,128 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { // No harm in adding the runtime language to the declaration. unsigned RLang = CTy.getRunTimeLang(); if (RLang) - addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, - dwarf::DW_FORM_data1, RLang); + addUInt(&Buffer, dwarf::DW_AT_APPLE_runtime_class, dwarf::DW_FORM_data1, + RLang); } + // If this is a type applicable to a type unit it then add it to the + // list of types we'll compute a hash for later. + if (shouldCreateTypeUnit(CTy, DD)) + DD->addTypeUnitType(&Buffer); } -/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateTypeParameter. -DIE * -CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { - DIE *ParamDIE = getDIE(TP); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_type_parameter); - addType(ParamDIE, TP.getType()); - addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); - return ParamDIE; +/// constructTemplateTypeParameterDIE - Construct new DIE for the given +/// DITemplateTypeParameter. +void +CompileUnit::constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP) { + DIE *ParamDIE = + createAndAddDIE(dwarf::DW_TAG_template_type_parameter, Buffer); + // Add the type if it exists, it could be void and therefore no type. + if (TP.getType()) + addType(ParamDIE, resolve(TP.getType())); + if (!TP.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, TP.getName()); } -/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE -/// for the given DITemplateValueParameter. -DIE * -CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){ - DIE *ParamDIE = getDIE(TPV); - if (ParamDIE) - return ParamDIE; - - ParamDIE = new DIE(dwarf::DW_TAG_template_value_parameter); - addType(ParamDIE, TPV.getType()); - if (!TPV.getName().empty()) - addString(ParamDIE, dwarf::DW_AT_name, TPV.getName()); - addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, - TPV.getValue()); - return ParamDIE; +/// constructTemplateValueParameterDIE - Construct new DIE for the given +/// DITemplateValueParameter. +void +CompileUnit::constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter VP) { + DIE *ParamDIE = createAndAddDIE(VP.getTag(), Buffer); + + // Add the type if there is one, template template and template parameter + // packs will not have a type. + if (VP.getTag() == dwarf::DW_TAG_template_value_parameter) + addType(ParamDIE, resolve(VP.getType())); + if (!VP.getName().empty()) + addString(ParamDIE, dwarf::DW_AT_name, VP.getName()); + if (Value *Val = VP.getValue()) { + if (ConstantInt *CI = dyn_cast(Val)) + addConstantValue(ParamDIE, CI, + isUnsignedDIType(DD, resolve(VP.getType()))); + else if (GlobalValue *GV = dyn_cast(Val)) { + // For declaration non-type template parameters (such as global values and + // functions) + DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); + addOpAddress(Block, Asm->getSymbol(GV)); + // Emit DW_OP_stack_value to use the address as the immediate value of the + // parameter, rather than a pointer to it. + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_stack_value); + addBlock(ParamDIE, dwarf::DW_AT_location, Block); + } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_template_param) { + assert(isa(Val)); + addString(ParamDIE, dwarf::DW_AT_GNU_template_name, + cast(Val)->getString()); + } else if (VP.getTag() == dwarf::DW_TAG_GNU_template_parameter_pack) { + assert(isa(Val)); + DIArray A(cast(Val)); + addTemplateParams(*ParamDIE, A); + } + } } /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *CompileUnit::getOrCreateNameSpace(DINameSpace NS) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(NS.getContext()); + DIE *NDie = getDIE(NS); if (NDie) return NDie; - NDie = new DIE(dwarf::DW_TAG_namespace); - insertDIE(NS, NDie); + NDie = createAndAddDIE(dwarf::DW_TAG_namespace, *ContextDIE, NS); + if (!NS.getName().empty()) { addString(NDie, dwarf::DW_AT_name, NS.getName()); addAccelNamespace(NS.getName(), NDie); + addGlobalName(NS.getName(), NDie, NS.getContext()); } else addAccelNamespace("(anonymous namespace)", NDie); addSourceLine(NDie, NS); - addToContextOwner(NDie, NS.getContext()); return NDie; } -/// getRealLinkageName - If special LLVM prefix that is used to inform the asm -/// printer to not emit usual symbol prefix before the symbol name is used then -/// return linkage name after skipping this special LLVM prefix. -static StringRef getRealLinkageName(StringRef LinkageName) { - char One = '\1'; - if (LinkageName.startswith(StringRef(&One, 1))) - return LinkageName.substr(1); - return LinkageName; -} - /// getOrCreateSubprogramDIE - Create new DIE using SP. DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE (as is the case for member function + // declarations). + DIE *ContextDIE = getOrCreateContextDIE(resolve(SP.getContext())); + DIE *SPDie = getDIE(SP); if (SPDie) return SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); + DISubprogram SPDecl = SP.getFunctionDeclaration(); + if (SPDecl.isSubprogram()) + // Add subprogram definitions to the CU die directly. + ContextDIE = CUDie.get(); // DW_TAG_inlined_subroutine may refer to this DIE. - insertDIE(SP, SPDie); + SPDie = createAndAddDIE(dwarf::DW_TAG_subprogram, *ContextDIE, SP); - DISubprogram SPDecl = SP.getFunctionDeclaration(); DIE *DeclDie = NULL; - if (SPDecl.isSubprogram()) { + if (SPDecl.isSubprogram()) DeclDie = getOrCreateSubprogramDIE(SPDecl); - } - - // Add to context owner. - addToContextOwner(SPDie, SP.getContext()); // Add function template parameters. addTemplateParams(*SPDie, SP.getTemplateParams()); - // Unfortunately this code needs to stay here instead of below the - // AT_specification code in order to work around a bug in older - // gdbs that requires the linkage name to resolve multiple template - // functions. - // TODO: Remove this set of code when we get rid of the old gdb - // compatibility. - StringRef LinkageName = SP.getLinkageName(); - if (!LinkageName.empty() && DD->useDarwinGDBCompat()) - addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - // If this DIE is going to refer declaration info using AT_specification // then there is no need to add other attributes. if (DeclDie) { // Refer function declaration directly. - addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - DeclDie); + addDIEEntry(SPDie, dwarf::DW_AT_specification, DeclDie); return SPDie; } // Add the linkage name if we have one. - if (!LinkageName.empty() && !DD->useDarwinGDBCompat()) + StringRef LinkageName = SP.getLinkageName(); + if (!LinkageName.empty()) addString(SPDie, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); + GlobalValue::getRealLinkageName(LinkageName)); // Constructors and operators for anonymous aggregates do not have names. if (!SP.getName().empty()) @@ -1183,31 +1443,31 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add the prototype if we have a prototype and we have a C like // language. + uint16_t Language = getLanguage(); if (SP.isPrototyped() && - (Language == dwarf::DW_LANG_C89 || - Language == dwarf::DW_LANG_C99 || + (Language == dwarf::DW_LANG_C89 || Language == dwarf::DW_LANG_C99 || Language == dwarf::DW_LANG_ObjC)) addFlag(SPDie, dwarf::DW_AT_prototyped); - // Add Return Type. DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); + assert(SPTy.getTag() == dwarf::DW_TAG_subroutine_type && + "the type of a subprogram should be a subroutine"); - if (Args.getNumElements() == 0 || SPTag != dwarf::DW_TAG_subroutine_type) - addType(SPDie, SPTy); - else + DIArray Args = SPTy.getTypeArray(); + // Add a return type. If this is a type like a C/C++ void type we don't add a + // return type. + if (Args.getElement(0)) addType(SPDie, DIType(Args.getElement(0))); unsigned VK = SP.getVirtuality(); if (VK) { addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_data1, VK); DIEBlock *Block = getDIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(Block, 0, dwarf::DW_FORM_udata, SP.getVirtualIndex()); - addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block); - ContainingTypeMap.insert(std::make_pair(SPDie, - SP.getContainingType())); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(Block, dwarf::DW_FORM_udata, SP.getVirtualIndex()); + addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, Block); + ContainingTypeMap.insert( + std::make_pair(SPDie, resolve(SP.getContainingType()))); } if (!SP.isDefinition()) { @@ -1215,19 +1475,13 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. - DICompositeType SPTy = SP.getType(); - DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); - - if (SPTag == dwarf::DW_TAG_subroutine_type) - for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(Args.getElement(i)); - addType(Arg, ATy); - if (ATy.isArtificial()) - addFlag(Arg, dwarf::DW_AT_artificial); - SPDie->addChild(Arg); - } + for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { + DIE *Arg = createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); + DIType ATy(Args.getElement(i)); + addType(Arg, ATy); + if (ATy.isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); + } } if (SP.isArtificial()) @@ -1274,16 +1528,16 @@ static const ConstantExpr *getMergedGlobalExpr(const Value *V) { } /// createGlobalVariableDIE - create global variable DIE. -void CompileUnit::createGlobalVariableDIE(const MDNode *N) { +void CompileUnit::createGlobalVariableDIE(DIGlobalVariable GV) { + // Check for pre-existence. - if (getDIE(N)) + if (getDIE(GV)) return; - DIGlobalVariable GV(N); - if (!GV.Verify()) + if (!GV.isGlobalVariable()) return; - DIDescriptor GVContext = GV.getContext(); + DIScope GVContext = GV.getContext(); DIType GTy = GV.getType(); // If this is a static data member definition, some attributes belong @@ -1294,35 +1548,30 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (SDMDecl.Verify()) { assert(SDMDecl.isStaticMember() && "Expected static member decl"); // We need the declaration DIE that is in the static member's class. - // But that class might not exist in the DWARF yet. - // Creating the class will create the static member decl DIE. - getOrCreateContextDIE(SDMDecl.getContext()); - VariableDIE = getDIE(SDMDecl); - assert(VariableDIE && "Static member decl has no context?"); + VariableDIE = getOrCreateStaticMemberDIE(SDMDecl); IsStaticMember = true; } // If this is not a static data member definition, create the variable // DIE and add the initial set of attributes to it. if (!VariableDIE) { - VariableDIE = new DIE(GV.getTag()); + // Construct the context before querying for the existence of the DIE in + // case such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(GVContext); + // Add to map. - insertDIE(N, VariableDIE); + VariableDIE = createAndAddDIE(GV.getTag(), *ContextDIE, GV); // Add name and type. addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); addType(VariableDIE, GTy); // Add scoping info. - if (!GV.isLocalToUnit()) { + if (!GV.isLocalToUnit()) addFlag(VariableDIE, dwarf::DW_AT_external); - addGlobalName(GV.getName(), VariableDIE); - } // Add line number info. addSourceLine(VariableDIE, GV); - // Add to context owner. - addToContextOwner(VariableDIE, GVContext); } // Add location. @@ -1332,57 +1581,73 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal())); + const MCSymbol *Sym = Asm->getSymbol(GV.getGlobal()); + if (GV.getGlobal()->isThreadLocal()) { + // FIXME: Make this work with -gsplit-dwarf. + unsigned PointerSize = Asm->getDataLayout().getPointerSize(); + assert((PointerSize == 4 || PointerSize == 8) && + "Add support for other sizes if necessary"); + const MCExpr *Expr = + Asm->getObjFileLowering().getDebugThreadLocalSymbol(Sym); + // Based on GCC's support for TLS: + if (!DD->useSplitDwarf()) { + // 1) Start with a constNu of the appropriate pointer size + addUInt(Block, dwarf::DW_FORM_data1, + PointerSize == 4 ? dwarf::DW_OP_const4u : dwarf::DW_OP_const8u); + // 2) containing the (relocated) offset of the TLS variable + // within the module's TLS block. + addExpr(Block, dwarf::DW_FORM_udata, Expr); + } else { + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_const_index); + addUInt(Block, dwarf::DW_FORM_udata, DU->getAddrPoolIndex(Expr)); + } + // 3) followed by a custom OP to make the debugger do a TLS lookup. + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_push_tls_address); + } else + addOpAddress(Block, Sym); // Do not create specification DIE if context is either compile unit // or a subprogram. if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && - !GVContext.isFile() && !isSubprogramContext(GVContext)) { + !GVContext.isFile() && !DD->isSubprogramContext(GVContext)) { // Create specification DIE. - VariableSpecDIE = new DIE(dwarf::DW_TAG_variable); - addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, VariableDIE); - addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); + VariableSpecDIE = createAndAddDIE(dwarf::DW_TAG_variable, *CUDie); + addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, VariableDIE); + addBlock(VariableSpecDIE, dwarf::DW_AT_location, Block); // A static member's declaration is already flagged as such. if (!SDMDecl.Verify()) addFlag(VariableDIE, dwarf::DW_AT_declaration); - addDie(VariableSpecDIE); } else { - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + addBlock(VariableDIE, dwarf::DW_AT_location, Block); } - // Add linkage name. + // Add the linkage name. StringRef LinkageName = GV.getLinkageName(); - if (!LinkageName.empty()) { + if (!LinkageName.empty()) // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and // TAG_variable. - addString(IsStaticMember && VariableSpecDIE ? - VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - // In compatibility mode with older gdbs we put the linkage name on both - // the TAG_variable DIE and on the TAG_member DIE. - if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat()) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - } + addString(IsStaticMember && VariableSpecDIE ? VariableSpecDIE + : VariableDIE, + dwarf::DW_AT_MIPS_linkage_name, + GlobalValue::getRealLinkageName(LinkageName)); } else if (const ConstantInt *CI = - dyn_cast_or_null(GV.getConstant())) { + dyn_cast_or_null(GV.getConstant())) { // AT_const_value was added when the static member was created. To avoid // emitting AT_const_value multiple times, we only add AT_const_value when // it is not a static member. if (!IsStaticMember) - addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); - } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + addConstantValue(VariableDIE, CI, isUnsignedDIType(DD, GTy)); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(GV->getOperand(11))) { addToAccelTable = true; // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); Value *Ptr = CE->getOperand(0); - addOpAddress(Block, Asm->Mang->getSymbol(cast(Ptr))); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - SmallVector Idx(CE->op_begin()+1, CE->op_end()); - addUInt(Block, 0, dwarf::DW_FORM_udata, - Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); + addOpAddress(Block, Asm->getSymbol(cast(Ptr))); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + SmallVector Idx(CE->op_begin() + 1, CE->op_end()); + addUInt(Block, dwarf::DW_FORM_udata, + Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); + addUInt(Block, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + addBlock(VariableDIE, dwarf::DW_AT_location, Block); } if (addToAccelTable) { @@ -1395,14 +1660,16 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addAccelName(GV.getLinkageName(), AddrDIE); } - return; + if (!GV.isLocalToUnit()) + addGlobalName(GV.getName(), VariableSpecDIE ? VariableSpecDIE : VariableDIE, + GV.getContext()); } /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { - DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); - addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); + DIE *DW_Subrange = createAndAddDIE(dwarf::DW_TAG_subrange_type, Buffer); + addDIEEntry(DW_Subrange, dwarf::DW_AT_type, IndexTy); // The LowerBound value defines the lower bounds which is typically zero for // C/C++. The Count value is the number of elements. Values are 64 bit. If @@ -1415,26 +1682,22 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, int64_t Count = SR.getCount(); if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) - addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound); + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, None, LowerBound); if (Count != -1 && Count != 0) // FIXME: An unbounded array should reference the expression that defines // the array. - addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1); - - Buffer.addChild(DW_Subrange); + addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, None, + LowerBound + Count - 1); } /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. -void CompileUnit::constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy) { - Buffer.setTag(dwarf::DW_TAG_array_type); - if (CTy->isVector()) +void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy) { + if (CTy.isVector()) addFlag(&Buffer, dwarf::DW_AT_GNU_vector); - // Emit derived type. - addType(&Buffer, CTy->getTypeDerivedFrom()); - DIArray Elements = CTy->getTypeArray(); + // Emit the element type. + addType(&Buffer, resolve(CTy.getTypeDerivedFrom())); // Get an anonymous type for index type. // FIXME: This type should be passed down from the front end @@ -1442,16 +1705,16 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DIE *IdxTy = getIndexTyDie(); if (!IdxTy) { // Construct an anonymous type for index type. - IdxTy = new DIE(dwarf::DW_TAG_base_type); + IdxTy = createAndAddDIE(dwarf::DW_TAG_base_type, *CUDie.get()); addString(IdxTy, dwarf::DW_AT_name, "int"); - addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); + addUInt(IdxTy, dwarf::DW_AT_byte_size, None, sizeof(int32_t)); addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_signed); - addDie(IdxTy); setIndexTyDie(IdxTy); } // Add subranges to array type. + DIArray Elements = CTy.getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); if (Element.getTag() == dwarf::DW_TAG_subrange_type) @@ -1459,195 +1722,183 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, } } -/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. -DIE *CompileUnit::constructEnumTypeDIE(DIEnumerator ETy) { - DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator); - StringRef Name = ETy.getName(); - addString(Enumerator, dwarf::DW_AT_name, Name); - int64_t Value = ETy.getEnumValue(); - addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); - return Enumerator; +/// constructEnumTypeDIE - Construct an enum type DIE from DICompositeType. +void CompileUnit::constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy) { + DIArray Elements = CTy.getTypeArray(); + + // Add enumerators to enumeration type. + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { + DIEnumerator Enum(Elements.getElement(i)); + if (Enum.isEnumerator()) { + DIE *Enumerator = createAndAddDIE(dwarf::DW_TAG_enumerator, Buffer); + StringRef Name = Enum.getName(); + addString(Enumerator, dwarf::DW_AT_name, Name); + int64_t Value = Enum.getEnumValue(); + addSInt(Enumerator, dwarf::DW_AT_const_value, dwarf::DW_FORM_sdata, Value); + } + } + DIType DTy = resolve(CTy.getTypeDerivedFrom()); + if (DTy) { + addType(&Buffer, DTy); + addFlag(&Buffer, dwarf::DW_AT_enum_class); + } } /// constructContainingTypeDIEs - Construct DIEs for types that contain /// vtables. void CompileUnit::constructContainingTypeDIEs() { for (DenseMap::iterator CI = ContainingTypeMap.begin(), - CE = ContainingTypeMap.end(); CI != CE; ++CI) { + CE = ContainingTypeMap.end(); + CI != CE; ++CI) { DIE *SPDie = CI->first; - const MDNode *N = CI->second; - if (!N) continue; - DIE *NDie = getDIE(N); - if (!NDie) continue; - addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie); + DIDescriptor D(CI->second); + if (!D) + continue; + DIE *NDie = getDIE(D); + if (!NDie) + continue; + addDIEEntry(SPDie, dwarf::DW_AT_containing_type, NDie); } } /// constructVariableDIE - Construct a DIE for the given DbgVariable. -DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { - StringRef Name = DV->getName(); - - // Translate tag to proper Dwarf tag. - unsigned Tag = DV->getTag(); +DIE *CompileUnit::constructVariableDIE(DbgVariable &DV, bool isScopeAbstract) { + StringRef Name = DV.getName(); // Define variable debug information entry. - DIE *VariableDie = new DIE(Tag); - DbgVariable *AbsVar = DV->getAbstractVariable(); + DIE *VariableDie = new DIE(DV.getTag()); + DbgVariable *AbsVar = DV.getAbstractVariable(); DIE *AbsDIE = AbsVar ? AbsVar->getDIE() : NULL; if (AbsDIE) - addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsDIE); + addDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin, AbsDIE); else { - addString(VariableDie, dwarf::DW_AT_name, Name); - addSourceLine(VariableDie, DV->getVariable()); - addType(VariableDie, DV->getType()); + if (!Name.empty()) + addString(VariableDie, dwarf::DW_AT_name, Name); + addSourceLine(VariableDie, DV.getVariable()); + addType(VariableDie, DV.getType()); } - if (DV->isArtificial()) + if (DV.isArtificial()) addFlag(VariableDie, dwarf::DW_AT_artificial); if (isScopeAbstract) { - DV->setDIE(VariableDie); + DV.setDIE(VariableDie); return VariableDie; } // Add variable address. - unsigned Offset = DV->getDotDebugLocOffset(); + unsigned Offset = DV.getDotDebugLocOffset(); if (Offset != ~0U) { - addLabel(VariableDie, dwarf::DW_AT_location, - dwarf::DW_FORM_data4, - Asm->GetTempSymbol("debug_loc", Offset)); - DV->setDIE(VariableDie); + addSectionLabel(VariableDie, dwarf::DW_AT_location, + Asm->GetTempSymbol("debug_loc", Offset)); + DV.setDIE(VariableDie); return VariableDie; } // Check if variable is described by a DBG_VALUE instruction. - if (const MachineInstr *DVInsn = DV->getMInsn()) { - bool updated = false; - if (DVInsn->getNumOperands() == 3) { - if (DVInsn->getOperand(0).isReg()) { - const MachineOperand RegOp = DVInsn->getOperand(0); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); - if (DVInsn->getOperand(1).isImm() && - TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { - unsigned FrameReg = 0; - const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = - TFI->getFrameIndexReference(*Asm->MF, - DVInsn->getOperand(1).getImm(), - FrameReg); - MachineLocation Location(FrameReg, Offset); - addVariableAddress(DV, VariableDie, Location); - - } else if (RegOp.getReg()) - addVariableAddress(DV, VariableDie, - MachineLocation(RegOp.getReg())); - updated = true; - } - else if (DVInsn->getOperand(0).isImm()) - updated = - addConstantValue(VariableDie, DVInsn->getOperand(0), - DV->getType()); - else if (DVInsn->getOperand(0).isFPImm()) - updated = - addConstantFPValue(VariableDie, DVInsn->getOperand(0)); - else if (DVInsn->getOperand(0).isCImm()) - updated = - addConstantValue(VariableDie, - DVInsn->getOperand(0).getCImm(), - DV->getType().isUnsignedDIType()); - } else { - addVariableAddress(DV, VariableDie, - Asm->getDebugValueLocation(DVInsn)); - updated = true; - } - if (!updated) { - // If variableDie is not updated then DBG_VALUE instruction does not - // have valid variable info. - delete VariableDie; - return NULL; - } - DV->setDIE(VariableDie); + if (const MachineInstr *DVInsn = DV.getMInsn()) { + assert(DVInsn->getNumOperands() == 3); + if (DVInsn->getOperand(0).isReg()) { + const MachineOperand RegOp = DVInsn->getOperand(0); + // If the second operand is an immediate, this is an indirect value. + if (DVInsn->getOperand(1).isImm()) { + MachineLocation Location(RegOp.getReg(), + DVInsn->getOperand(1).getImm()); + addVariableAddress(DV, VariableDie, Location); + } else if (RegOp.getReg()) + addVariableAddress(DV, VariableDie, MachineLocation(RegOp.getReg())); + } else if (DVInsn->getOperand(0).isImm()) + addConstantValue(VariableDie, DVInsn->getOperand(0), DV.getType()); + else if (DVInsn->getOperand(0).isFPImm()) + addConstantFPValue(VariableDie, DVInsn->getOperand(0)); + else if (DVInsn->getOperand(0).isCImm()) + addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(), + isUnsignedDIType(DD, DV.getType())); + + DV.setDIE(VariableDie); return VariableDie; } else { // .. else use frame index. - int FI = DV->getFrameIndex(); + int FI = DV.getFrameIndex(); if (FI != ~0) { unsigned FrameReg = 0; const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = - TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); MachineLocation Location(FrameReg, Offset); addVariableAddress(DV, VariableDie, Location); } } - DV->setDIE(VariableDie); + DV.setDIE(VariableDie); return VariableDie; } -/// createMemberDIE - Create new member DIE. -DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { - DIE *MemberDie = new DIE(DT.getTag()); +/// constructMemberDIE - Construct member DIE from DIDerivedType. +void CompileUnit::constructMemberDIE(DIE &Buffer, DIDerivedType DT) { + DIE *MemberDie = createAndAddDIE(DT.getTag(), Buffer); StringRef Name = DT.getName(); if (!Name.empty()) addString(MemberDie, dwarf::DW_AT_name, Name); - addType(MemberDie, DT.getTypeDerivedFrom()); + addType(MemberDie, resolve(DT.getTypeDerivedFrom())); addSourceLine(MemberDie, DT); - DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(MemLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); - - uint64_t Size = DT.getSizeInBits(); - uint64_t FieldSize = DT.getOriginalTypeSize(); - - if (Size != FieldSize) { - // Handle bitfield. - addUInt(MemberDie, dwarf::DW_AT_byte_size, 0, DT.getOriginalTypeSize()>>3); - addUInt(MemberDie, dwarf::DW_AT_bit_size, 0, DT.getSizeInBits()); - - uint64_t Offset = DT.getOffsetInBits(); - uint64_t AlignMask = ~(DT.getAlignInBits() - 1); - uint64_t HiMark = (Offset + FieldSize) & AlignMask; - uint64_t FieldOffset = (HiMark - FieldSize); - Offset -= FieldOffset; - - // Maybe we need to work from the other end. - if (Asm->getDataLayout().isLittleEndian()) - Offset = FieldSize - (Offset + Size); - addUInt(MemberDie, dwarf::DW_AT_bit_offset, 0, Offset); - - // Here WD_AT_data_member_location points to the anonymous - // field that includes this bit field. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, FieldOffset >> 3); - - } else - // This is not a bitfield. - addUInt(MemLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits() >> 3); - - if (DT.getTag() == dwarf::DW_TAG_inheritance - && DT.isVirtual()) { + if (DT.getTag() == dwarf::DW_TAG_inheritance && DT.isVirtual()) { // For C++, virtual base classes are not at fixed offset. Use following // expression to extract appropriate offset from vtable. // BaseAddr = ObAddr + *((*ObAddr) - Offset) DIEBlock *VBaseLocationDie = new (DIEValueAllocator) DIEBlock(); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_udata, DT.getOffsetInBits()); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); - addUInt(VBaseLocationDie, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); - - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, - VBaseLocationDie); - } else - addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_dup); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); + addUInt(VBaseLocationDie, dwarf::DW_FORM_udata, DT.getOffsetInBits()); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_minus); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_deref); + addUInt(VBaseLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); + + addBlock(MemberDie, dwarf::DW_AT_data_member_location, VBaseLocationDie); + } else { + uint64_t Size = DT.getSizeInBits(); + uint64_t FieldSize = getBaseTypeSize(DD, DT); + uint64_t OffsetInBytes; + + if (Size != FieldSize) { + // Handle bitfield. + addUInt(MemberDie, dwarf::DW_AT_byte_size, None, + getBaseTypeSize(DD, DT) >> 3); + addUInt(MemberDie, dwarf::DW_AT_bit_size, None, DT.getSizeInBits()); + + uint64_t Offset = DT.getOffsetInBits(); + uint64_t AlignMask = ~(DT.getAlignInBits() - 1); + uint64_t HiMark = (Offset + FieldSize) & AlignMask; + uint64_t FieldOffset = (HiMark - FieldSize); + Offset -= FieldOffset; + + // Maybe we need to work from the other end. + if (Asm->getDataLayout().isLittleEndian()) + Offset = FieldSize - (Offset + Size); + addUInt(MemberDie, dwarf::DW_AT_bit_offset, None, Offset); + + // Here WD_AT_data_member_location points to the anonymous + // field that includes this bit field. + OffsetInBytes = FieldOffset >> 3; + } else + // This is not a bitfield. + OffsetInBytes = DT.getOffsetInBits() >> 3; + + if (DD->getDwarfVersion() <= 2) { + DIEBlock *MemLocationDie = new (DIEValueAllocator) DIEBlock(); + addUInt(MemLocationDie, dwarf::DW_FORM_data1, dwarf::DW_OP_plus_uconst); + addUInt(MemLocationDie, dwarf::DW_FORM_udata, OffsetInBytes); + addBlock(MemberDie, dwarf::DW_AT_data_member_location, MemLocationDie); + } else + addUInt(MemberDie, dwarf::DW_AT_data_member_location, None, + OffsetInBytes); + } if (DT.isProtected()) addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, @@ -1671,17 +1922,26 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { if (DT.isArtificial()) addFlag(MemberDie, dwarf::DW_AT_artificial); - - return MemberDie; } -/// createStaticMemberDIE - Create new DIE for C++ static member. -DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { +/// getOrCreateStaticMemberDIE - Create new DIE for C++ static member. +DIE *CompileUnit::getOrCreateStaticMemberDIE(DIDerivedType DT) { if (!DT.Verify()) return NULL; - DIE *StaticMemberDIE = new DIE(DT.getTag()); - DIType Ty = DT.getTypeDerivedFrom(); + // Construct the context before querying for the existence of the DIE in case + // such construction creates the DIE. + DIE *ContextDIE = getOrCreateContextDIE(resolve(DT.getContext())); + assert(dwarf::isType(ContextDIE->getTag()) && + "Static member should belong to a type."); + + DIE *StaticMemberDIE = getDIE(DT); + if (StaticMemberDIE) + return StaticMemberDIE; + + StaticMemberDIE = createAndAddDIE(DT.getTag(), *ContextDIE, DT); + + DIType Ty = resolve(DT.getTypeDerivedFrom()); addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName()); addType(StaticMemberDIE, Ty); @@ -1702,10 +1962,20 @@ DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { dwarf::DW_ACCESS_public); if (const ConstantInt *CI = dyn_cast_or_null(DT.getConstant())) - addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType()); + addConstantValue(StaticMemberDIE, CI, isUnsignedDIType(DD, Ty)); if (const ConstantFP *CFP = dyn_cast_or_null(DT.getConstant())) addConstantFPValue(StaticMemberDIE, CFP); - insertDIE(DT, StaticMemberDIE); return StaticMemberDIE; } + +void CompileUnit::emitHeader(const MCSection *ASection, + const MCSymbol *ASectionSym) { + Asm->OutStreamer.AddComment("DWARF version number"); + Asm->EmitInt16(DD->getDwarfVersion()); + Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), + ASectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); +} diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index 8f08c63..69a96df 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,15 +15,16 @@ #define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DIE.h" +#include "DwarfDebug.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringMap.h" #include "llvm/DebugInfo.h" +#include "llvm/MC/MCExpr.h" namespace llvm { -class DwarfDebug; -class DwarfUnits; class MachineLocation; class MachineOperand; class ConstantInt; @@ -38,11 +39,10 @@ class CompileUnit { /// unsigned UniqueID; - /// Language - The DW_AT_language of the compile unit - /// - unsigned Language; + /// Node - MDNode for the compile unit. + DICompileUnit Node; - /// Die - Compile unit debug information entry. + /// CUDie - Compile unit debug information entry. /// const OwningPtr CUDie; @@ -56,28 +56,28 @@ class CompileUnit { /// IndexTyDie - An anonymous type for index type. Owned by CUDie. DIE *IndexTyDie; - /// MDNodeToDieMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDieMap - Tracks the mapping of unit level debug information /// variables to debug information entries. DenseMap MDNodeToDieMap; - /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug informaton + /// MDNodeToDIEEntryMap - Tracks the mapping of unit level debug information /// descriptors to debug information entries using a DIEEntry proxy. DenseMap MDNodeToDIEEntryMap; /// GlobalNames - A map of globally visible named entities for this unit. /// - StringMap GlobalNames; + StringMap GlobalNames; /// GlobalTypes - A map of globally visible types for this unit. /// - StringMap GlobalTypes; + StringMap GlobalTypes; /// AccelNames - A map of names for the name accelerator table. /// - StringMap > AccelNames; - StringMap > AccelObjC; - StringMap > AccelNamespace; - StringMap > > AccelTypes; + StringMap > AccelNames; + StringMap > AccelObjC; + StringMap > AccelNamespace; + StringMap > > AccelTypes; /// DIEBlocks - A list of all the DIEBlocks in use. std::vector DIEBlocks; @@ -87,163 +87,161 @@ class CompileUnit { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap ContainingTypeMap; - /// Offset of the CUDie from beginning of debug info section. - unsigned DebugInfoOffset; + // DIEValueAllocator - All DIEValues are allocated through this allocator. + BumpPtrAllocator DIEValueAllocator; - /// getLowerBoundDefault - Return the default lower bound for an array. If the - /// DWARF version doesn't handle the language, return -1. - int64_t getDefaultLowerBound() const; + // DIEIntegerOne - A preallocated DIEValue because 1 is used frequently. + DIEInteger *DIEIntegerOne; public: - CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, - DwarfUnits *); + CompileUnit(unsigned UID, DIE *D, DICompileUnit CU, AsmPrinter *A, + DwarfDebug *DW, DwarfUnits *DWU); ~CompileUnit(); // Accessors. - unsigned getUniqueID() const { return UniqueID; } - unsigned getLanguage() const { return Language; } - DIE* getCUDie() const { return CUDie.get(); } - unsigned getDebugInfoOffset() const { return DebugInfoOffset; } - const StringMap &getGlobalNames() const { return GlobalNames; } - const StringMap &getGlobalTypes() const { return GlobalTypes; } - - const StringMap > &getAccelNames() const { + unsigned getUniqueID() const { return UniqueID; } + uint16_t getLanguage() const { return Node.getLanguage(); } + DICompileUnit getNode() const { return Node; } + DIE *getCUDie() const { return CUDie.get(); } + const StringMap &getGlobalNames() const { return GlobalNames; } + const StringMap &getGlobalTypes() const { return GlobalTypes; } + + const StringMap > &getAccelNames() const { return AccelNames; } - const StringMap > &getAccelObjC() const { + const StringMap > &getAccelObjC() const { return AccelObjC; } - const StringMap > &getAccelNamespace() const { + const StringMap > &getAccelNamespace() const { return AccelNamespace; } - const StringMap > > - &getAccelTypes() const { + const StringMap > > & + getAccelTypes() const { return AccelTypes; } + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } + /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } + /// getParentContextString - Get a string containing the language specific + /// context for a global name. + std::string getParentContextString(DIScope Context) const; + /// addGlobalName - Add a new global entity to the compile unit. /// - void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; } + void addGlobalName(StringRef Name, DIE *Die, DIScope Context); /// addGlobalType - Add a new global type to the compile unit. /// void addGlobalType(DIType Ty); + /// addPubTypes - Add a set of types from the subprogram to the global types. + void addPubTypes(DISubprogram SP); /// addAccelName - Add a new name to the name accelerator table. - void addAccelName(StringRef Name, DIE *Die) { - std::vector &DIEs = AccelNames[Name]; - DIEs.push_back(Die); - } - void addAccelObjC(StringRef Name, DIE *Die) { - std::vector &DIEs = AccelObjC[Name]; - DIEs.push_back(Die); - } - void addAccelNamespace(StringRef Name, DIE *Die) { - std::vector &DIEs = AccelNamespace[Name]; - DIEs.push_back(Die); - } - void addAccelType(StringRef Name, std::pair Die) { - std::vector > &DIEs = AccelTypes[Name]; - DIEs.push_back(Die); - } + void addAccelName(StringRef Name, DIE *Die); - /// getDIE - Returns the debug information entry map slot for the - /// specified debug variable. - DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } + /// addAccelObjC - Add a new name to the ObjC accelerator table. + void addAccelObjC(StringRef Name, DIE *Die); - DIEBlock *getDIEBlock() { - return new (DIEValueAllocator) DIEBlock(); - } + /// addAccelNamespace - Add a new name to the namespace accelerator table. + void addAccelNamespace(StringRef Name, DIE *Die); - /// insertDIE - Insert DIE into the map. - void insertDIE(const MDNode *N, DIE *D) { - MDNodeToDieMap.insert(std::make_pair(N, D)); - } + /// addAccelType - Add a new type to the type accelerator table. + void addAccelType(StringRef Name, std::pair Die); - /// getDIEEntry - Returns the debug information entry for the specified - /// debug variable. - DIEEntry *getDIEEntry(const MDNode *N) { - DenseMap::iterator I = - MDNodeToDIEEntryMap.find(N); - if (I == MDNodeToDIEEntryMap.end()) - return NULL; - return I->second; - } + /// getDIE - Returns the debug information entry map slot for the + /// specified debug variable. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + DIE *getDIE(DIDescriptor D) const; - /// insertDIEEntry - Insert debug information entry into the map. - void insertDIEEntry(const MDNode *N, DIEEntry *E) { - MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); - } + DIEBlock *getDIEBlock() { return new (DIEValueAllocator) DIEBlock(); } + + /// insertDIE - Insert DIE into the map. We delegate the request to DwarfDebug + /// when the MDNode can be part of the type system, since DIEs for + /// the type system can be shared across CUs and the mappings are + /// kept in DwarfDebug. + void insertDIE(DIDescriptor Desc, DIE *D); /// addDie - Adds or interns the DIE to the compile unit. /// - void addDie(DIE *Buffer) { - this->CUDie->addChild(Buffer); - } - - // getIndexTyDie - Get an anonymous type for index type. - DIE *getIndexTyDie() { - return IndexTyDie; - } - - // setIndexTyDie - Set D as anonymous type for index which can be reused - // later. - void setIndexTyDie(DIE *D) { - IndexTyDie = D; - } + void addDie(DIE *Buffer) { CUDie->addChild(Buffer); } /// addFlag - Add a flag that is true to the DIE. - void addFlag(DIE *Die, unsigned Attribute); + void addFlag(DIE *Die, dwarf::Attribute Attribute); /// addUInt - Add an unsigned integer attribute data and value. /// - void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); + void addUInt(DIE *Die, dwarf::Attribute Attribute, Optional Form, + uint64_t Integer); + + void addUInt(DIEBlock *Block, dwarf::Form Form, uint64_t Integer); /// addSInt - Add an signed integer attribute data and value. /// - void addSInt(DIE *Die, unsigned Attribute, unsigned Form, int64_t Integer); + void addSInt(DIE *Die, dwarf::Attribute Attribute, Optional Form, + int64_t Integer); + + void addSInt(DIEBlock *Die, Optional Form, int64_t Integer); /// addString - Add a string attribute data and value. /// - void addString(DIE *Die, unsigned Attribute, const StringRef Str); + void addString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); /// addLocalString - Add a string attribute data and value. /// - void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str); + void addLocalString(DIE *Die, dwarf::Attribute Attribute, const StringRef Str); + + /// addExpr - Add a Dwarf expression attribute data and value. + /// + void addExpr(DIEBlock *Die, dwarf::Form Form, const MCExpr *Expr); /// addLabel - Add a Dwarf label attribute data and value. /// - void addLabel(DIE *Die, unsigned Attribute, unsigned Form, + void addLabel(DIE *Die, dwarf::Attribute Attribute, dwarf::Form Form, const MCSymbol *Label); + void addLabel(DIEBlock *Die, dwarf::Form Form, const MCSymbol *Label); + + /// addSectionLabel - Add a Dwarf section label attribute data and value. + /// + void addSectionLabel(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Label); + + /// addSectionOffset - Add an offset into a section attribute data and value. + /// + void addSectionOffset(DIE *Die, dwarf::Attribute Attribute, uint64_t Integer); + /// addLabelAddress - Add a dwarf label attribute data and value using /// either DW_FORM_addr or DW_FORM_GNU_addr_index. /// - void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label); + void addLabelAddress(DIE *Die, dwarf::Attribute Attribute, MCSymbol *Label); /// addOpAddress - Add a dwarf op address data and value using the /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. /// - void addOpAddress(DIE *Die, MCSymbol *Label); + void addOpAddress(DIEBlock *Die, const MCSymbol *Label); - /// addDelta - Add a label delta attribute data and value. + /// addSectionDelta - Add a label delta attribute data and value. + void addSectionDelta(DIE *Die, dwarf::Attribute Attribute, const MCSymbol *Hi, + const MCSymbol *Lo); + + /// addDIEEntry - Add a DIE attribute data and value. /// - void addDelta(DIE *Die, unsigned Attribute, unsigned Form, - const MCSymbol *Hi, const MCSymbol *Lo); + void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIE *Entry); /// addDIEEntry - Add a DIE attribute data and value. /// - void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); + void addDIEEntry(DIE *Die, dwarf::Attribute Attribute, DIEEntry *Entry); /// addBlock - Add block data. /// - void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); + void addBlock(DIE *Die, dwarf::Attribute Attribute, DIEBlock *Block); /// addSourceLine - Add location information to specified debug information /// entry. @@ -256,33 +254,33 @@ public: /// addAddress - Add an address attribute to a die based on the location /// provided. - void addAddress(DIE *Die, unsigned Attribute, - const MachineLocation &Location); + void addAddress(DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location, + bool Indirect = false); /// addConstantValue - Add constant value entry in variable DIE. - bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); - bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); - bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); + void addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); + void addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); + void addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); /// addConstantFPValue - Add constant value entry in variable DIE. - bool addConstantFPValue(DIE *Die, const MachineOperand &MO); - bool addConstantFPValue(DIE *Die, const ConstantFP *CFP); + void addConstantFPValue(DIE *Die, const MachineOperand &MO); + void addConstantFPValue(DIE *Die, const ConstantFP *CFP); /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); /// addRegisterOp - Add register operand. - void addRegisterOp(DIE *TheDie, unsigned Reg); + void addRegisterOp(DIEBlock *TheDie, unsigned Reg); /// addRegisterOffset - Add register offset. - void addRegisterOffset(DIE *TheDie, unsigned Reg, int64_t Offset); + void addRegisterOffset(DIEBlock *TheDie, unsigned Reg, int64_t Offset); /// addComplexAddress - Start with the address based on the location provided, /// and generate the DWARF information necessary to find the actual variable /// (navigating the extra location information encoded in the type) based on /// the starting location. Add the DWARF information to the die. /// - void addComplexAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + void addComplexAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location); // FIXME: Should be reformulated in terms of addComplexAddress. @@ -292,20 +290,18 @@ public: /// starting location. Add the DWARF information to the die. Obsolete, /// please use addComplexAddress instead. /// - void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, + void addBlockByrefAddress(const DbgVariable &DV, DIE *Die, dwarf::Attribute Attribute, const MachineLocation &Location); /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. - void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location); - - /// addToContextOwner - Add Die into the list of its context owner's children. - void addToContextOwner(DIE *Die, DIDescriptor Context); + void addVariableAddress(const DbgVariable &DV, DIE *Die, + MachineLocation Location); /// addType - Add a new type attribute to the specified entity. This takes /// and attribute parameter because DW_AT_friend attributes are also /// type references. - void addType(DIE *Entity, DIType Ty, unsigned Attribute = dwarf::DW_AT_type); + void addType(DIE *Entity, DIType Ty, dwarf::Attribute Attribute = dwarf::DW_AT_type); /// getOrCreateNameSpace - Create a DIE for DINameSpace. DIE *getOrCreateNameSpace(DINameSpace NS); @@ -317,66 +313,103 @@ public: /// given DIType. DIE *getOrCreateTypeDIE(const MDNode *N); - /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE - /// for the given DITemplateTypeParameter. - DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIScope Context); - /// getOrCreateTemplateValueParameterDIE - Find existing DIE or create - /// new DIE for the given DITemplateValueParameter. - DIE *getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TVP); + /// createGlobalVariableDIE - create global variable DIE. + void createGlobalVariableDIE(DIGlobalVariable GV); - /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug - /// information entry. - DIEEntry *createDIEEntry(DIE *Entry); + /// constructContainingTypeDIEs - Construct DIEs for types that contain + /// vtables. + void constructContainingTypeDIEs(); - /// createGlobalVariableDIE - create global variable DIE. - void createGlobalVariableDIE(const MDNode *N); + /// constructVariableDIE - Construct a DIE for the given DbgVariable. + DIE *constructVariableDIE(DbgVariable &DV, bool isScopeAbstract); + + /// Create a DIE with the given Tag, add the DIE to its parent, and + /// call insertDIE if MD is not null. + DIE *createAndAddDIE(unsigned Tag, DIE &Parent, DIDescriptor N = DIDescriptor()); + + /// Compute the size of a header for this unit, not including the initial + /// length field. + unsigned getHeaderSize() const { + return sizeof(int16_t) + // DWARF version number + sizeof(int32_t) + // Offset Into Abbrev. Section + sizeof(int8_t); // Pointer Size (in bytes) + } - void addPubTypes(DISubprogram SP); + /// Emit the header for this unit, not including the initial length field. + void emitHeader(const MCSection *ASection, const MCSymbol *ASectionSym); +private: /// constructTypeDIE - Construct basic type die from DIBasicType. - void constructTypeDIE(DIE &Buffer, - DIBasicType BTy); + void constructTypeDIE(DIE &Buffer, DIBasicType BTy); /// constructTypeDIE - Construct derived type die from DIDerivedType. - void constructTypeDIE(DIE &Buffer, - DIDerivedType DTy); + void constructTypeDIE(DIE &Buffer, DIDerivedType DTy); /// constructTypeDIE - Construct type DIE from DICompositeType. - void constructTypeDIE(DIE &Buffer, - DICompositeType CTy); + void constructTypeDIE(DIE &Buffer, DICompositeType CTy); /// constructSubrangeDIE - Construct subrange DIE from DISubrange. void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(DIE &Buffer, - DICompositeType *CTy); + void constructArrayTypeDIE(DIE &Buffer, DICompositeType CTy); /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. - DIE *constructEnumTypeDIE(DIEnumerator ETy); + void constructEnumTypeDIE(DIE &Buffer, DICompositeType CTy); - /// constructContainingTypeDIEs - Construct DIEs for types that contain - /// vtables. - void constructContainingTypeDIEs(); + /// constructMemberDIE - Construct member DIE from DIDerivedType. + void constructMemberDIE(DIE &Buffer, DIDerivedType DT); - /// constructVariableDIE - Construct a DIE for the given DbgVariable. - DIE *constructVariableDIE(DbgVariable *DV, bool isScopeAbstract); + /// constructTemplateTypeParameterDIE - Construct new DIE for the given + /// DITemplateTypeParameter. + void constructTemplateTypeParameterDIE(DIE &Buffer, + DITemplateTypeParameter TP); - /// createMemberDIE - Create new member DIE. - DIE *createMemberDIE(DIDerivedType DT); + /// constructTemplateValueParameterDIE - Construct new DIE for the given + /// DITemplateValueParameter. + void constructTemplateValueParameterDIE(DIE &Buffer, + DITemplateValueParameter TVP); - /// createStaticMemberDIE - Create new static data member DIE. - DIE *createStaticMemberDIE(DIDerivedType DT); + /// getOrCreateStaticMemberDIE - Create new static data member DIE. + DIE *getOrCreateStaticMemberDIE(DIDerivedType DT); - /// getOrCreateContextDIE - Get context owner's DIE. - DIE *getOrCreateContextDIE(DIDescriptor Context); + /// Offset of the CUDie from beginning of debug info section. + unsigned DebugInfoOffset; -private: + /// getLowerBoundDefault - Return the default lower bound for an array. If the + /// DWARF version doesn't handle the language, return -1. + int64_t getDefaultLowerBound() const; - // DIEValueAllocator - All DIEValues are allocated through this allocator. - BumpPtrAllocator DIEValueAllocator; - DIEInteger *DIEIntegerOne; + /// getDIEEntry - Returns the debug information entry for the specified + /// debug variable. + DIEEntry *getDIEEntry(const MDNode *N) const { + return MDNodeToDIEEntryMap.lookup(N); + } + + /// insertDIEEntry - Insert debug information entry into the map. + void insertDIEEntry(const MDNode *N, DIEEntry *E) { + MDNodeToDIEEntryMap.insert(std::make_pair(N, E)); + } + + // getIndexTyDie - Get an anonymous type for index type. + DIE *getIndexTyDie() { return IndexTyDie; } + + // setIndexTyDie - Set D as anonymous type for index which can be reused + // later. + void setIndexTyDie(DIE *D) { IndexTyDie = D; } + + /// createDIEEntry - Creates a new DIEEntry to be a proxy for a debug + /// information entry. + DIEEntry *createDIEEntry(DIE *Entry); + + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template T resolve(DIRef Ref) const { + return DD->resolve(Ref); + } }; } // end llvm namespace diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 1e706cc..d1e1ad1 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "dwarfdebug" #include "DwarfDebug.h" #include "DIE.h" +#include "DIEHash.h" #include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" #include "llvm/ADT/STLExtras.h" @@ -34,8 +35,10 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MD5.h" #include "llvm/Support/Path.h" #include "llvm/Support/Timer.h" #include "llvm/Support/ValueHandle.h" @@ -46,61 +49,69 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -static cl::opt DisableDebugInfoPrinting("disable-debug-info-print", - cl::Hidden, - cl::desc("Disable debug info printing")); +static cl::opt +DisableDebugInfoPrinting("disable-debug-info-print", cl::Hidden, + cl::desc("Disable debug info printing")); -static cl::opt UnknownLocations("use-unknown-locations", cl::Hidden, - cl::desc("Make an absence of debug location information explicit."), - cl::init(false)); +static cl::opt UnknownLocations( + "use-unknown-locations", cl::Hidden, + cl::desc("Make an absence of debug location information explicit."), + cl::init(false)); -static cl::opt GenerateDwarfPubNamesSection("generate-dwarf-pubnames", - cl::Hidden, cl::init(false), - cl::desc("Generate DWARF pubnames section")); +static cl::opt +GenerateODRHash("generate-odr-hash", cl::Hidden, + cl::desc("Add an ODR hash to external type DIEs."), + cl::init(false)); -namespace { - enum DefaultOnOff { - Default, Enable, Disable - }; -} - -static cl::opt DwarfAccelTables("dwarf-accel-tables", cl::Hidden, - cl::desc("Output prototype dwarf accelerator tables."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); - -static cl::opt DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, - cl::desc("Compatibility with Darwin gdb."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); - -static cl::opt SplitDwarf("split-dwarf", cl::Hidden, - cl::desc("Output prototype dwarf split debug info."), - cl::values( - clEnumVal(Default, "Default for platform"), - clEnumVal(Enable, "Enabled"), - clEnumVal(Disable, "Disabled"), - clEnumValEnd), - cl::init(Default)); +static cl::opt +GenerateCUHash("generate-cu-hash", cl::Hidden, + cl::desc("Add the CU hash as the dwo_id."), + cl::init(false)); -namespace { - const char *DWARFGroupName = "DWARF Emission"; - const char *DbgTimerName = "DWARF Debug Writer"; +static cl::opt +GenerateGnuPubSections("generate-gnu-dwarf-pub-sections", cl::Hidden, + cl::desc("Generate GNU-style pubnames and pubtypes"), + cl::init(false)); - struct CompareFirst { - template bool operator()(const T &lhs, const T &rhs) const { - return lhs.first < rhs.first; - } - }; -} // end anonymous namespace +namespace { +enum DefaultOnOff { + Default, + Enable, + Disable +}; +} + +static cl::opt +DwarfAccelTables("dwarf-accel-tables", cl::Hidden, + cl::desc("Output prototype dwarf accelerator tables."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt +SplitDwarf("split-dwarf", cl::Hidden, + cl::desc("Output prototype dwarf split debug info."), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt +DwarfPubSections("generate-dwarf-pub-sections", cl::Hidden, + cl::desc("Generate DWARF pubnames and pubtypes sections"), + cl::values(clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), clEnumValEnd), + cl::init(Default)); + +static cl::opt +DwarfVersionNumber("dwarf-version", cl::Hidden, + cl::desc("Generate DWARF for dwarf version."), + cl::init(0)); + +static const char *const DWARFGroupName = "DWARF Emission"; +static const char *const DbgTimerName = "DWARF Debug Writer"; //===----------------------------------------------------------------------===// @@ -110,6 +121,13 @@ static const unsigned InitAbbreviationsSetSize = 9; // log2(512) namespace llvm { +/// resolve - Look in the DwarfDebug map for the MDNode that +/// corresponds to the reference. +template +T DbgVariable::resolve(DIRef Ref) const { + return DD->resolve(Ref); +} + DIType DbgVariable::getType() const { DIType Ty = Var.getType(); // FIXME: isBlockByrefVariable should be reformulated in terms of complex @@ -140,21 +158,16 @@ DIType DbgVariable::getType() const { the pointers and __Block_byref_x_VarName struct to find the actual value of the variable. The function addBlockByrefType does this. */ DIType subType = Ty; - unsigned tag = Ty.getTag(); - - if (tag == dwarf::DW_TAG_pointer_type) { - DIDerivedType DTy = DIDerivedType(Ty); - subType = DTy.getTypeDerivedFrom(); - } + uint16_t tag = Ty.getTag(); - DICompositeType blockStruct = DICompositeType(subType); - DIArray Elements = blockStruct.getTypeArray(); + if (tag == dwarf::DW_TAG_pointer_type) + subType = resolve(DIDerivedType(Ty).getTypeDerivedFrom()); + DIArray Elements = DICompositeType(subType).getTypeArray(); for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { - DIDescriptor Element = Elements.getElement(i); - DIDerivedType DT = DIDerivedType(Element); + DIDerivedType DT(Elements.getElement(i)); if (getName() == DT.getName()) - return (DT.getTypeDerivedFrom()); + return (resolve(DT.getTypeDerivedFrom())); } } return Ty; @@ -162,15 +175,23 @@ DIType DbgVariable::getType() const { } // end llvm namespace +/// Return Dwarf Version by checking module flags. +static unsigned getDwarfVersionFromModule(const Module *M) { + Value *Val = M->getModuleFlag("Dwarf Version"); + if (!Val) + return dwarf::DWARF_VERSION; + return cast(Val)->getZExtValue(); +} + DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), FirstCU(0), AbbreviationsSet(InitAbbreviationsSetSize), SourceIdMap(DIEValueAllocator), PrevLabel(NULL), GlobalCUIndexCount(0), - InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string", + InfoHolder(A, &AbbreviationsSet, Abbreviations, "info_string", DIEValueAllocator), SkeletonAbbrevSet(InitAbbreviationsSetSize), - SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string", + SkeletonHolder(A, &SkeletonAbbrevSet, SkeletonAbbrevs, "skel_string", DIEValueAllocator) { DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; @@ -180,37 +201,34 @@ DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; - // Turn on accelerator tables and older gdb compatibility - // for Darwin. + // Turn on accelerator tables for Darwin by default, pubnames by + // default for non-Darwin, and handle split dwarf. bool IsDarwin = Triple(A->getTargetTriple()).isOSDarwin(); - if (DarwinGDBCompat == Default) { - if (IsDarwin) - IsDarwinGDBCompat = true; - else - IsDarwinGDBCompat = false; - } else - IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false; - if (DwarfAccelTables == Default) { - if (IsDarwin) - HasDwarfAccelTables = true; - else - HasDwarfAccelTables = false; - } else - HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false; + if (DwarfAccelTables == Default) + HasDwarfAccelTables = IsDarwin; + else + HasDwarfAccelTables = DwarfAccelTables == Enable; if (SplitDwarf == Default) HasSplitDwarf = false; else - HasSplitDwarf = SplitDwarf == Enable ? true : false; + HasSplitDwarf = SplitDwarf == Enable; + + if (DwarfPubSections == Default) + HasDwarfPubSections = !IsDarwin; + else + HasDwarfPubSections = DwarfPubSections == Enable; + + DwarfVersion = DwarfVersionNumber + ? DwarfVersionNumber + : getDwarfVersionFromModule(MMI->getModule()); { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); beginModule(); } } -DwarfDebug::~DwarfDebug() { -} // Switch to the specified MCSection and emit an assembler // temporary label to it if SymbolStem is specified. @@ -247,48 +265,37 @@ unsigned DwarfUnits::getStringPoolIndex(StringRef Str) { return Entry.second; } -unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) { - std::pair &Entry = AddressPool[Sym]; - if (Entry.first) return Entry.second; +unsigned DwarfUnits::getAddrPoolIndex(const MCSymbol *Sym) { + return getAddrPoolIndex(MCSymbolRefExpr::Create(Sym, Asm->OutContext)); +} - Entry.second = NextAddrPoolNumber++; - Entry.first = Sym; - return Entry.second; +unsigned DwarfUnits::getAddrPoolIndex(const MCExpr *Sym) { + std::pair::iterator, bool> P = + AddressPool.insert(std::make_pair(Sym, NextAddrPoolNumber)); + if (P.second) + ++NextAddrPoolNumber; + return P.first->second; } // Define a unique number for the abbreviation. // void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { - // Profile the node so that we can make it unique. - FoldingSetNodeID ID; - Abbrev.Profile(ID); - // Check the set for priors. DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev); // If it's newly added. if (InSet == &Abbrev) { // Add to abbreviation list. - Abbreviations->push_back(&Abbrev); + Abbreviations.push_back(&Abbrev); // Assign the vector position + 1 as its number. - Abbrev.setNumber(Abbreviations->size()); + Abbrev.setNumber(Abbreviations.size()); } else { // Assign existing abbreviation number. Abbrev.setNumber(InSet->getNumber()); } } -// If special LLVM prefix that is used to inform the asm -// printer to not emit usual symbol prefix before the symbol name is used then -// return linkage name after skipping this special LLVM prefix. -static StringRef getRealLinkageName(StringRef LinkageName) { - char One = '\1'; - if (LinkageName.startswith(StringRef(&One, 1))) - return LinkageName.substr(1); - return LinkageName; -} - static bool isObjCClass(StringRef Name) { return Name.startswith("+") || Name.startswith("-"); } @@ -296,12 +303,7 @@ static bool isObjCClass(StringRef Name) { static bool hasObjCCategory(StringRef Name) { if (!isObjCClass(Name)) return false; - size_t pos = Name.find(')'); - if (pos != std::string::npos) { - if (Name[pos+1] != ' ') return false; - return true; - } - return false; + return Name.find(") ") != StringRef::npos; } static void getObjCClassCategory(StringRef In, StringRef &Class, @@ -321,11 +323,20 @@ static StringRef getObjCMethodName(StringRef In) { return In.slice(In.find(' ') + 1, In.find(']')); } +// Helper for sorting sections into a stable output order. +static bool SectionSort(const MCSection *A, const MCSection *B) { + std::string LA = (A ? A->getLabelBeginName() : ""); + std::string LB = (B ? B->getLabelBeginName() : ""); + return LA < LB; +} + // Add the various names to the Dwarf accelerator table names. +// TODO: Determine whether or not we should add names for programs +// that do not have a DW_AT_name or DW_AT_linkage_name field - this +// is only slightly different than the lookup of non-standard ObjC names. static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, DIE* Die) { if (!SP.isDefinition()) return; - TheCU->addAccelName(SP.getName(), Die); // If the linkage name is different than the name, go ahead and output @@ -346,30 +357,34 @@ static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, } } +/// isSubprogramContext - Return true if Context is either a subprogram +/// or another context nested inside a subprogram. +bool DwarfDebug::isSubprogramContext(const MDNode *Context) { + if (!Context) + return false; + DIDescriptor D(Context); + if (D.isSubprogram()) + return true; + if (D.isType()) + return isSubprogramContext(resolve(DIType(Context).getContext())); + return false; +} + // Find DIE for the given subprogram and attach appropriate DW_AT_low_pc // and DW_AT_high_pc attributes. If there are global variables in this // scope then create and insert DIEs for these variables. -DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, - const MDNode *SPNode) { - DIE *SPDie = SPCU->getDIE(SPNode); +DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP) { + DIE *SPDie = SPCU->getDIE(SP); assert(SPDie && "Unable to find subprogram DIE!"); - DISubprogram SP(SPNode); // If we're updating an abstract DIE, then we will be adding the children and // object pointer later on. But what we don't want to do is process the // concrete DIE twice. - DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode); - if (AbsSPDIE) { - bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie()); + if (DIE *AbsSPDIE = AbstractSPDies.lookup(SP)) { // Pick up abstract subprogram DIE. - SPDie = new DIE(dwarf::DW_TAG_subprogram); - // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of - // DW_FORM_ref4. - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, - AbsSPDIE); - SPCU->addDie(SPDie); + SPDie = SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, AbsSPDIE); } else { DISubprogram SPDecl = SP.getFunctionDeclaration(); if (!SPDecl.isSubprogram()) { @@ -378,32 +393,31 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, // function then gdb prefers the definition at top level and but does not // expect specification DIE in parent function. So avoid creating // specification DIE for a function defined inside a function. - if (SP.isDefinition() && !SP.getContext().isCompileUnit() && - !SP.getContext().isFile() && - !isSubprogramContext(SP.getContext())) { + DIScope SPContext = resolve(SP.getContext()); + if (SP.isDefinition() && !SPContext.isCompileUnit() && + !SPContext.isFile() && + !isSubprogramContext(SPContext)) { SPCU->addFlag(SPDie, dwarf::DW_AT_declaration); // Add arguments. DICompositeType SPTy = SP.getType(); DIArray Args = SPTy.getTypeArray(); - unsigned SPTag = SPTy.getTag(); + uint16_t SPTag = SPTy.getTag(); if (SPTag == dwarf::DW_TAG_subroutine_type) for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) { - DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); - DIType ATy = DIType(Args.getElement(i)); + DIE *Arg = + SPCU->createAndAddDIE(dwarf::DW_TAG_formal_parameter, *SPDie); + DIType ATy(Args.getElement(i)); SPCU->addType(Arg, ATy); if (ATy.isArtificial()) SPCU->addFlag(Arg, dwarf::DW_AT_artificial); if (ATy.isObjectPointer()) - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, - dwarf::DW_FORM_ref4, Arg); - SPDie->addChild(Arg); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_object_pointer, Arg); } DIE *SPDeclDie = SPDie; - SPDie = new DIE(dwarf::DW_TAG_subprogram); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, - dwarf::DW_FORM_ref4, SPDeclDie); - SPCU->addDie(SPDie); + SPDie = + SPCU->createAndAddDIE(dwarf::DW_TAG_subprogram, *SPCU->getCUDie()); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, SPDeclDie); } } } @@ -425,40 +439,64 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, return SPDie; } +/// Check whether we should create a DIE for the given Scope, return true +/// if we don't create a DIE (the corresponding DIE is null). +bool DwarfDebug::isLexicalScopeDIENull(LexicalScope *Scope) { + if (Scope->isAbstractScope()) + return false; + + // We don't create a DIE if there is no Range. + const SmallVectorImpl &Ranges = Scope->getRanges(); + if (Ranges.empty()) + return true; + + if (Ranges.size() > 1) + return false; + + // We don't create a DIE if we have a single Range and the end label + // is null. + SmallVectorImpl::const_iterator RI = Ranges.begin(); + MCSymbol *End = getLabelAfterInsn(RI->second); + return !End; +} + // Construct new DW_TAG_lexical_block for this scope and attach // DW_AT_low_pc/DW_AT_high_pc labels. DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { + if (isLexicalScopeDIENull(Scope)) + return 0; + DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) return ScopeDIE; - const SmallVector &Ranges = Scope->getRanges(); - if (Ranges.empty()) - return 0; - - SmallVector::const_iterator RI = Ranges.begin(); + const SmallVectorImpl &Ranges = Scope->getRanges(); + // If we have multiple ranges, emit them into the range section. if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); - for (SmallVector::const_iterator RI = Ranges.begin(), + TheCU->addSectionOffset(ScopeDIE, dwarf::DW_AT_ranges, + DebugRangeSymbols.size() * + Asm->getDataLayout().getPointerSize()); + for (SmallVectorImpl::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); } + + // Terminate the range list. DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); return ScopeDIE; } + // Construct the address range for this DIE. + SmallVectorImpl::const_iterator RI = Ranges.begin(); MCSymbol *Start = getLabelBeforeInsn(RI->first); MCSymbol *End = getLabelAfterInsn(RI->second); - - if (End == 0) return 0; + assert(End && "End label should not be null!"); assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); @@ -473,7 +511,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, // represent this concrete inlined copy of the function. DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - const SmallVector &Ranges = Scope->getRanges(); + const SmallVectorImpl &Ranges = Scope->getRanges(); assert(Ranges.empty() == false && "LexicalScope does not have instruction markers!"); @@ -487,30 +525,17 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, return NULL; } - SmallVector::const_iterator RI = Ranges.begin(); - MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); - MCSymbol *EndLabel = getLabelAfterInsn(RI->second); - - if (StartLabel == 0 || EndLabel == 0) { - llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); - } - assert(StartLabel->isDefined() && - "Invalid starting label for an inlined scope!"); - assert(EndLabel->isDefined() && - "Invalid end label for an inlined scope!"); - DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine); - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, OriginDIE); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin, OriginDIE); if (Ranges.size() > 1) { // .debug_range section has not been laid out yet. Emit offset in // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() - * Asm->getDataLayout().getPointerSize()); - for (SmallVector::const_iterator RI = Ranges.begin(), + TheCU->addSectionOffset(ScopeDIE, dwarf::DW_AT_ranges, + DebugRangeSymbols.size() * + Asm->getDataLayout().getPointerSize()); + for (SmallVectorImpl::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { DebugRangeSymbols.push_back(getLabelBeforeInsn(RI->first)); DebugRangeSymbols.push_back(getLabelAfterInsn(RI->second)); @@ -518,31 +543,29 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); } else { + SmallVectorImpl::const_iterator RI = Ranges.begin(); + MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); + MCSymbol *EndLabel = getLabelAfterInsn(RI->second); + + if (StartLabel == 0 || EndLabel == 0) + llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); + + assert(StartLabel->isDefined() && + "Invalid starting label for an inlined scope!"); + assert(EndLabel->isDefined() && "Invalid end label for an inlined scope!"); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel); TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel); } InlinedSubprogramDIEs.insert(OriginDIE); - // Track the start label for this inlined function. - //.debug_inlined section specification does not clearly state how - // to emit inlined scope that is split into multiple instruction ranges. - // For now, use first instruction range and emit low_pc/high_pc pair and - // corresponding .debug_inlined section entry for this pair. - DenseMap >::iterator - I = InlineInfo.find(InlinedSP); - - if (I == InlineInfo.end()) { - InlineInfo[InlinedSP].push_back(std::make_pair(StartLabel, ScopeDIE)); - InlinedSPNodes.push_back(InlinedSP); - } else - I->second.push_back(std::make_pair(StartLabel, ScopeDIE)); - + // Add the call site information to the DIE. DILocation DL(Scope->getInlinedAt()); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, None, getOrCreateSourceID(DL.getFilename(), DL.getDirectory(), TheCU->getUniqueID())); - TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); + TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, None, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. @@ -551,42 +574,49 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, return ScopeDIE; } -// Construct a DIE for this scope. -DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { - if (!Scope || !Scope->getScopeNode()) - return NULL; - - DIScope DS(Scope->getScopeNode()); - // Early return to avoid creating dangling variable|scope DIEs. - if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() && - !TheCU->getDIE(DS)) - return NULL; - - SmallVector Children; - DIE *ObjectPointer = NULL; +DIE *DwarfDebug::createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, + SmallVectorImpl &Children) { + DIE *ObjectPointer = NULL; // Collect arguments for current function. if (LScopes.isCurrentFunctionScope(Scope)) for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) if (DbgVariable *ArgDV = CurrentFnArguments[i]) if (DIE *Arg = - TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) { + TheCU->constructVariableDIE(*ArgDV, Scope->isAbstractScope())) { Children.push_back(Arg); if (ArgDV->isObjectPointer()) ObjectPointer = Arg; } // Collect lexical scope children first. - const SmallVector &Variables = ScopeVariables.lookup(Scope); + const SmallVectorImpl &Variables =ScopeVariables.lookup(Scope); for (unsigned i = 0, N = Variables.size(); i < N; ++i) if (DIE *Variable = - TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) { + TheCU->constructVariableDIE(*Variables[i], Scope->isAbstractScope())) { Children.push_back(Variable); if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; } - const SmallVector &Scopes = Scope->getChildren(); + const SmallVectorImpl &Scopes = Scope->getChildren(); for (unsigned j = 0, M = Scopes.size(); j < M; ++j) if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) Children.push_back(Nested); + return ObjectPointer; +} + +// Construct a DIE for this scope. +DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { + if (!Scope || !Scope->getScopeNode()) + return NULL; + + DIScope DS(Scope->getScopeNode()); + + SmallVector Children; + DIE *ObjectPointer = NULL; + bool ChildrenCreated = false; + + // We try to create the scope DIE first, then the children DIEs. This will + // avoid creating un-used children then removing them later when we find out + // the scope DIE is null. DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); @@ -597,34 +627,49 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { // Note down abstract DIE. if (ScopeDIE) AbstractSPDies.insert(std::make_pair(DS, ScopeDIE)); - } - else - ScopeDIE = updateSubprogramScopeDIE(TheCU, DS); - } - else { + } else + ScopeDIE = updateSubprogramScopeDIE(TheCU, DISubprogram(DS)); + } else { + // Early exit when we know the scope DIE is going to be null. + if (isLexicalScopeDIENull(Scope)) + return NULL; + + // We create children here when we know the scope DIE is not going to be + // null and the children will be added to the scope DIE. + ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); + ChildrenCreated = true; + // There is no need to emit empty lexical block DIE. std::pair Range = std::equal_range( ScopesWithImportedEntities.begin(), ScopesWithImportedEntities.end(), std::pair(DS, (const MDNode*)0), - CompareFirst()); + less_first()); if (Children.empty() && Range.first == Range.second) return NULL; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); - for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; ++i) - constructImportedModuleDIE(TheCU, i->second, ScopeDIE); + assert(ScopeDIE && "Scope DIE should not be null."); + for (ImportedEntityMap::const_iterator i = Range.first; i != Range.second; + ++i) + constructImportedEntityDIE(TheCU, i->second, ScopeDIE); } - if (!ScopeDIE) return NULL; + if (!ScopeDIE) { + assert(Children.empty() && + "We create children only when the scope DIE is not null."); + return NULL; + } + if (!ChildrenCreated) + // We create children when the scope DIE is not null. + ObjectPointer = createScopeChildrenDIE(TheCU, Scope, Children); // Add children - for (SmallVector::iterator I = Children.begin(), + for (SmallVectorImpl::iterator I = Children.begin(), E = Children.end(); I != E; ++I) ScopeDIE->addChild(*I); if (DS.isSubprogram() && ObjectPointer != NULL) - TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, - dwarf::DW_FORM_ref4, ObjectPointer); + TheCU->addDIEEntry(ScopeDIE, dwarf::DW_AT_object_pointer, ObjectPointer); if (DS.isSubprogram()) TheCU->addPubTypes(DISubprogram(DS)); @@ -640,8 +685,10 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, StringRef DirName, unsigned CUID) { // If we use .loc in assembly, we can't separate .file entries according to // compile units. Thus all files will belong to the default compile unit. - if (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + + // FIXME: add a better feature test than hasRawTextSupport. Even better, + // extend .file to support this. + if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) CUID = 0; // If FE did not provide a file name, then assume stdin. @@ -676,14 +723,12 @@ unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, // Create new CompileUnit for the given metadata node with tag // DW_TAG_compile_unit. -CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { - DICompileUnit DIUnit(N); +CompileUnit *DwarfDebug::constructCompileUnit(DICompileUnit DIUnit) { StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, - DIUnit.getLanguage(), Die, Asm, + CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, Die, DIUnit, Asm, this, &InfoHolder); FileIDCUMap[NewCU->getUniqueID()] = 0; @@ -710,31 +755,56 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { // Use a single line table if we are using .loc and generating assembly. bool UseTheFirstCU = - (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) || - (NewCU->getUniqueID() == 0); + (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) || + (NewCU->getUniqueID() == 0); - // DW_AT_stmt_list is a offset of line number information for this - // compile unit in debug_line section. For split dwarf this is - // left in the skeleton CU and so not included. - // The line table entries are not always emitted in assembly, so it - // is not okay to use line_table_start here. if (!useSplitDwarf()) { + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. For split dwarf this is + // left in the skeleton CU and so not included. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - UseTheFirstCU ? - Asm->GetTempSymbol("section_line") : LineTableStartSym); + NewCU->addSectionLabel( + Die, dwarf::DW_AT_stmt_list, + UseTheFirstCU ? Asm->GetTempSymbol("section_line") + : LineTableStartSym); else if (UseTheFirstCU) - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_stmt_list, 0); else - NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - LineTableStartSym, DwarfLineSectionSym); + NewCU->addSectionDelta(Die, dwarf::DW_AT_stmt_list, + LineTableStartSym, DwarfLineSectionSym); + + // If we're using split dwarf the compilation dir is going to be in the + // skeleton CU and so we don't need to duplicate it here. + if (!CompilationDir.empty()) + NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + + // Flags to let the linker know we have emitted new style pubnames. Only + // emit it here if we don't have a skeleton CU for split dwarf. + if (GenerateGnuPubSections) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()), + DwarfGnuPubNamesSectionSym); + + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()), + DwarfGnuPubTypesSectionSym); + } } - // If we're using split dwarf the compilation dir is going to be in the - // skeleton CU and so we don't need to duplicate it here. - if (!useSplitDwarf() && !CompilationDir.empty()) - NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); if (DIUnit.isOptimized()) NewCU->addFlag(Die, dwarf::DW_AT_APPLE_optimized); @@ -751,13 +821,17 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { InfoHolder.addUnit(NewCU); - CUMap.insert(std::make_pair(N, NewCU)); + CUMap.insert(std::make_pair(DIUnit, NewCU)); + CUDieMap.insert(std::make_pair(Die, NewCU)); return NewCU; } // Construct subprogram DIE. -void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, - const MDNode *N) { +void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { + // FIXME: We should only call this routine once, however, during LTO if a + // program is defined in multiple CUs we could end up calling it out of + // beginModule as we walk the CUs. + CompileUnit *&CURef = SPMap[N]; if (CURef) return; @@ -771,49 +845,54 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, DIE *SubprogramDie = TheCU->getOrCreateSubprogramDIE(SP); - // Add to map. - TheCU->insertDIE(N, SubprogramDie); - - // Add to context owner. - TheCU->addToContextOwner(SubprogramDie, SP.getContext()); - - // Expose as global, if requested. - if (GenerateDwarfPubNamesSection) - TheCU->addGlobalName(SP.getName(), SubprogramDie); + // Expose as a global name. + TheCU->addGlobalName(SP.getName(), SubprogramDie, resolve(SP.getContext())); } -void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, +void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N) { - DIImportedModule Module(N); + DIImportedEntity Module(N); if (!Module.Verify()) return; if (DIE *D = TheCU->getOrCreateContextDIE(Module.getContext())) - constructImportedModuleDIE(TheCU, Module, D); + constructImportedEntityDIE(TheCU, Module, D); } -void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N, +void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, DIE *Context) { - DIImportedModule Module(N); + DIImportedEntity Module(N); if (!Module.Verify()) return; - return constructImportedModuleDIE(TheCU, Module, Context); + return constructImportedEntityDIE(TheCU, Module, Context); } -void DwarfDebug::constructImportedModuleDIE(CompileUnit *TheCU, - const DIImportedModule &Module, +void DwarfDebug::constructImportedEntityDIE(CompileUnit *TheCU, + const DIImportedEntity &Module, DIE *Context) { assert(Module.Verify() && "Use one of the MDNode * overloads to handle invalid metadata"); assert(Context && "Should always have a context for an imported_module"); - DIE *IMDie = new DIE(dwarf::DW_TAG_imported_module); + DIE *IMDie = new DIE(Module.getTag()); TheCU->insertDIE(Module, IMDie); - DIE *NSDie = TheCU->getOrCreateNameSpace(Module.getNameSpace()); + DIE *EntityDie; + DIDescriptor Entity = Module.getEntity(); + if (Entity.isNameSpace()) + EntityDie = TheCU->getOrCreateNameSpace(DINameSpace(Entity)); + else if (Entity.isSubprogram()) + EntityDie = TheCU->getOrCreateSubprogramDIE(DISubprogram(Entity)); + else if (Entity.isType()) + EntityDie = TheCU->getOrCreateTypeDIE(DIType(Entity)); + else + EntityDie = TheCU->getDIE(Entity); unsigned FileID = getOrCreateSourceID(Module.getContext().getFilename(), Module.getContext().getDirectory(), TheCU->getUniqueID()); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, 0, FileID); - TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, 0, Module.getLineNumber()); - TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, dwarf::DW_FORM_ref4, NSDie); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_file, None, FileID); + TheCU->addUInt(IMDie, dwarf::DW_AT_decl_line, None, Module.getLineNumber()); + TheCU->addDIEEntry(IMDie, dwarf::DW_AT_import, EntityDie); + StringRef Name = Module.getName(); + if (!Name.empty()) + TheCU->addString(IMDie, dwarf::DW_AT_name, Name); Context->addChild(IMDie); } @@ -831,6 +910,7 @@ void DwarfDebug::beginModule() { NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); if (!CU_Nodes) return; + TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes); // Emit initial sections so we can reference labels later. emitSectionLabels(); @@ -838,16 +918,16 @@ void DwarfDebug::beginModule() { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CUNode(CU_Nodes->getOperand(i)); CompileUnit *CU = constructCompileUnit(CUNode); - DIArray ImportedModules = CUNode.getImportedModules(); - for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i) + DIArray ImportedEntities = CUNode.getImportedEntities(); + for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) ScopesWithImportedEntities.push_back(std::make_pair( - DIImportedModule(ImportedModules.getElement(i)).getContext(), - ImportedModules.getElement(i))); + DIImportedEntity(ImportedEntities.getElement(i)).getContext(), + ImportedEntities.getElement(i))); std::sort(ScopesWithImportedEntities.begin(), - ScopesWithImportedEntities.end(), CompareFirst()); + ScopesWithImportedEntities.end(), less_first()); DIArray GVs = CUNode.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU->createGlobalVariableDIE(GVs.getElement(i)); + CU->createGlobalVariableDIE(DIGlobalVariable(GVs.getElement(i))); DIArray SPs = CUNode.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) constructSubprogramDIE(CU, SPs.getElement(i)); @@ -859,24 +939,15 @@ void DwarfDebug::beginModule() { CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); // Emit imported_modules last so that the relevant context is already // available. - for (unsigned i = 0, e = ImportedModules.getNumElements(); i != e; ++i) - constructImportedModuleDIE(CU, ImportedModules.getElement(i)); - // If we're splitting the dwarf out now that we've got the entire - // CU then construct a skeleton CU based upon it. - if (useSplitDwarf()) { - // This should be a unique identifier when we want to build .dwp files. - CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, - dwarf::DW_FORM_data8, 0); - // Now construct the skeleton CU associated. - constructSkeletonCU(CUNode); - } + for (unsigned i = 0, e = ImportedEntities.getNumElements(); i != e; ++i) + constructImportedEntityDIE(CU, ImportedEntities.getElement(i)); } // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); // Prime section data. - SectionMap.insert(Asm->getObjFileLowering().getTextSection()); + SectionMap[Asm->getObjFileLowering().getTextSection()]; } // Attach DW_AT_inline attribute with inlined subprogram DIEs. @@ -885,21 +956,20 @@ void DwarfDebug::computeInlinedDIEs() { for (SmallPtrSet::iterator AI = InlinedSubprogramDIEs.begin(), AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { DIE *ISP = *AI; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); } for (DenseMap::iterator AI = AbstractSPDies.begin(), AE = AbstractSPDies.end(); AI != AE; ++AI) { DIE *ISP = AI->second; if (InlinedSubprogramDIEs.count(ISP)) continue; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, None, dwarf::DW_INL_inlined); } } // Collect info for variables that were optimized out. void DwarfDebug::collectDeadVariables() { const Module *M = MMI->getModule(); - DenseMap DeadFnScopeMap; if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { @@ -907,33 +977,70 @@ void DwarfDebug::collectDeadVariables() { DIArray Subprograms = TheCU.getSubprograms(); for (unsigned i = 0, e = Subprograms.getNumElements(); i != e; ++i) { DISubprogram SP(Subprograms.getElement(i)); - if (ProcessedSPNodes.count(SP) != 0) continue; - if (!SP.Verify()) continue; - if (!SP.isDefinition()) continue; + if (ProcessedSPNodes.count(SP) != 0) + continue; + if (!SP.isSubprogram()) + continue; + if (!SP.isDefinition()) + continue; DIArray Variables = SP.getVariables(); - if (Variables.getNumElements() == 0) continue; - - LexicalScope *Scope = - new LexicalScope(NULL, DIDescriptor(SP), NULL, false); - DeadFnScopeMap[SP] = Scope; + if (Variables.getNumElements() == 0) + continue; // Construct subprogram DIE and add variables DIEs. CompileUnit *SPCU = CUMap.lookup(TheCU); assert(SPCU && "Unable to find Compile Unit!"); + // FIXME: See the comment in constructSubprogramDIE about duplicate + // subprogram DIEs. constructSubprogramDIE(SPCU, SP); - DIE *ScopeDIE = SPCU->getDIE(SP); + DIE *SPDIE = SPCU->getDIE(SP); for (unsigned vi = 0, ve = Variables.getNumElements(); vi != ve; ++vi) { DIVariable DV(Variables.getElement(vi)); - if (!DV.Verify()) continue; - DbgVariable *NewVar = new DbgVariable(DV, NULL); + if (!DV.isVariable()) + continue; + DbgVariable NewVar(DV, NULL, this); if (DIE *VariableDIE = - SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope())) - ScopeDIE->addChild(VariableDIE); + SPCU->constructVariableDIE(NewVar, false)) + SPDIE->addChild(VariableDIE); } } } } - DeleteContainerSeconds(DeadFnScopeMap); +} + +// Type Signature [7.27] and ODR Hash code. + +/// \brief Grabs the string in whichever attribute is passed in and returns +/// a reference to it. Returns "" if the attribute doesn't exist. +static StringRef getDIEStringAttr(DIE *Die, unsigned Attr) { + DIEValue *V = Die->findAttribute(Attr); + + if (DIEString *S = dyn_cast_or_null(V)) + return S->getString(); + + return StringRef(""); +} + +/// Return true if the current DIE is contained within an anonymous namespace. +static bool isContainedInAnonNamespace(DIE *Die) { + DIE *Parent = Die->getParent(); + + while (Parent) { + if (Parent->getTag() == dwarf::DW_TAG_namespace && + getDIEStringAttr(Parent, dwarf::DW_AT_name) == "") + return true; + Parent = Parent->getParent(); + } + + return false; +} + +/// Test if the current CU language is C++ and that we have +/// a named type that is not contained in an anonymous namespace. +static bool shouldAddODRHash(CompileUnit *CU, DIE *Die) { + return CU->getLanguage() == dwarf::DW_LANG_C_plus_plus && + getDIEStringAttr(Die, dwarf::DW_AT_name) != "" && + !isContainedInAnonNamespace(Die); } void DwarfDebug::finalizeModuleInfo() { @@ -943,31 +1050,102 @@ void DwarfDebug::finalizeModuleInfo() { // Attach DW_AT_inline attribute with inlined subprogram DIEs. computeInlinedDIEs(); - // Emit DW_AT_containing_type attribute to connect types with their - // vtable holding type. + // Split out type units and conditionally add an ODR tag to the split + // out type. + // FIXME: Do type splitting. + for (unsigned i = 0, e = TypeUnits.size(); i != e; ++i) { + DIE *Die = TypeUnits[i]; + DIEHash Hash; + // If we've requested ODR hashes and it's applicable for an ODR hash then + // add the ODR signature now. + // FIXME: This should be added onto the type unit, not the type, but this + // works as an intermediate stage. + if (GenerateODRHash && shouldAddODRHash(CUMap.begin()->second, Die)) + CUMap.begin()->second->addUInt(Die, dwarf::DW_AT_GNU_odr_signature, + dwarf::DW_FORM_data8, + Hash.computeDIEODRSignature(*Die)); + } + + // Handle anything that needs to be done on a per-cu basis. for (DenseMap::iterator CUI = CUMap.begin(), - CUE = CUMap.end(); CUI != CUE; ++CUI) { + CUE = CUMap.end(); + CUI != CUE; ++CUI) { CompileUnit *TheCU = CUI->second; + // Emit DW_AT_containing_type attribute to connect types with their + // vtable holding type. TheCU->constructContainingTypeDIEs(); + + // If we're splitting the dwarf out now that we've got the entire + // CU then construct a skeleton CU based upon it. + if (useSplitDwarf()) { + uint64_t ID = 0; + if (GenerateCUHash) { + DIEHash CUHash; + ID = CUHash.computeCUSignature(*TheCU->getCUDie()); + } + // This should be a unique identifier when we want to build .dwp files. + TheCU->addUInt(TheCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + // Now construct the skeleton CU associated. + CompileUnit *SkCU = constructSkeletonCU(TheCU); + // This should be a unique identifier when we want to build .dwp files. + SkCU->addUInt(SkCU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, + dwarf::DW_FORM_data8, ID); + } } - // Compute DIE offsets and sizes. + // Compute DIE offsets and sizes. InfoHolder.computeSizeAndOffsets(); if (useSplitDwarf()) SkeletonHolder.computeSizeAndOffsets(); } void DwarfDebug::endSections() { - // Standard sections final addresses. - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end")); - Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getDataSection()); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("data_end")); + // Filter labels by section. + for (size_t n = 0; n < ArangeLabels.size(); n++) { + const SymbolCU &SCU = ArangeLabels[n]; + if (SCU.Sym->isInSection()) { + // Make a note of this symbol and it's section. + const MCSection *Section = &SCU.Sym->getSection(); + if (!Section->getKind().isMetadata()) + SectionMap[Section].push_back(SCU); + } else { + // Some symbols (e.g. common/bss on mach-o) can have no section but still + // appear in the output. This sucks as we rely on sections to build + // arange spans. We can do it without, but it's icky. + SectionMap[NULL].push_back(SCU); + } + } - // End text sections. - for (unsigned I = 0, E = SectionMap.size(); I != E; ++I) { - Asm->OutStreamer.SwitchSection(SectionMap[I]); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1)); + // Build a list of sections used. + std::vector Sections; + for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); + it++) { + const MCSection *Section = it->first; + Sections.push_back(Section); + } + + // Sort the sections into order. + // This is only done to ensure consistent output order across different runs. + std::sort(Sections.begin(), Sections.end(), SectionSort); + + // Add terminating symbols for each section. + for (unsigned ID=0;IDGetTempSymbol("debug_end", ID); + Asm->OutStreamer.SwitchSection(Section); + Asm->OutStreamer.EmitLabel(Sym); + } + + // Insert a final terminator. + SectionMap[Section].push_back(SymbolCU(NULL, Sym)); } } @@ -984,6 +1162,8 @@ void DwarfDebug::endModule() { finalizeModuleInfo(); if (!useSplitDwarf()) { + emitDebugStr(); + // Emit all the DIEs into a debug info section. emitDebugInfo(); @@ -1002,15 +1182,12 @@ void DwarfDebug::endModule() { // Emit info into a debug macinfo section. emitDebugMacInfo(); - // Emit inline info. - // TODO: When we don't need the option anymore we - // can remove all of the code that this section - // depends upon. - if (useDarwinGDBCompat()) - emitDebugInlineInfo(); } else { // TODO: Fill this in for separated debug sections and separate // out information into new sections. + emitDebugStr(); + if (useSplitDwarf()) + emitDebugStrDWO(); // Emit the debug info section and compile units. emitDebugInfo(); @@ -1035,12 +1212,6 @@ void DwarfDebug::endModule() { // Emit DWO addresses. InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); - // Emit inline info. - // TODO: When we don't need the option anymore we - // can remove all of the code that this section - // depends upon. - if (useDarwinGDBCompat()) - emitDebugInlineInfo(); } // Emit info into the dwarf accelerator table sections. @@ -1051,20 +1222,11 @@ void DwarfDebug::endModule() { emitAccelTypes(); } - // Emit info into a debug pubnames section, if requested. - if (GenerateDwarfPubNamesSection) - emitDebugPubnames(); - - // Emit info into a debug pubtypes section. - // TODO: When we don't need the option anymore we can - // remove all of the code that adds to the table. - if (useDarwinGDBCompat()) - emitDebugPubTypes(); - - // Finally emit string information into a string table. - emitDebugStr(); - if (useSplitDwarf()) - emitDebugStrDWO(); + // Emit the pubnames and pubtypes sections if requested. + if (HasDwarfPubSections) { + emitDebugPubNames(GenerateGnuPubSections); + emitDebugPubTypes(GenerateGnuPubSections); + } // clean up. SPMap.clear(); @@ -1072,7 +1234,7 @@ void DwarfDebug::endModule() { E = CUMap.end(); I != E; ++I) delete I->second; - for (SmallVector::iterator I = SkeletonCUs.begin(), + for (SmallVectorImpl::iterator I = SkeletonCUs.begin(), E = SkeletonCUs.end(); I != E; ++I) delete *I; @@ -1094,7 +1256,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, if (!Scope) return NULL; - AbsDbgVariable = new DbgVariable(Var, NULL); + AbsDbgVariable = new DbgVariable(Var, NULL, this); addScopeVariable(Scope, AbsDbgVariable); AbstractVariables[Var] = AbsDbgVariable; return AbsDbgVariable; @@ -1143,7 +1305,7 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, continue; DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.second); - DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable); + DbgVariable *RegVar = new DbgVariable(DV, AbsDbgVariable, this); RegVar->setFrameIndex(VP.first); if (!addCurrentFnArgument(MF, RegVar, Scope)) addScopeVariable(Scope, RegVar); @@ -1158,7 +1320,8 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); return MI->getNumOperands() == 3 && MI->getOperand(0).isReg() && MI->getOperand(0).getReg() && - MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; + (MI->getOperand(1).isImm() || + (MI->getOperand(1).isReg() && MI->getOperand(1).getReg() == 0U)); } // Get .debug_loc entry for the instruction range starting at MI. @@ -1168,16 +1331,12 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, const MachineInstr *MI) { const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); - if (MI->getNumOperands() != 3) { - MachineLocation MLoc = Asm->getDebugValueLocation(MI); - return DotDebugLocEntry(FLabel, SLabel, MLoc, Var); - } - if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) { + assert(MI->getNumOperands() == 3); + if (MI->getOperand(0).isReg()) { MachineLocation MLoc; - // TODO: Currently an offset of 0 in a DBG_VALUE means - // we need to generate a direct register value. - // There is no way to specify an indirect value with offset 0. - if (MI->getOperand(1).getImm() == 0) + // If the second operand is an immediate, this is a + // register-indirect address. + if (!MI->getOperand(1).isImm()) MLoc.set(MI->getOperand(0).getReg()); else MLoc.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); @@ -1198,7 +1357,7 @@ void DwarfDebug::collectVariableInfo(const MachineFunction *MF, SmallPtrSet &Processed) { - // collection info from MMI table. + // Grab the variable info that was squirreled away in the MMI side-table. collectVariableInfoFromMMITable(MF, Processed); for (SmallVectorImpl::const_iterator @@ -1231,7 +1390,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, Processed.insert(DV); assert(MInsn->isDebugValue() && "History must begin with debug value"); DbgVariable *AbsVar = findAbstractVariable(DV, MInsn->getDebugLoc()); - DbgVariable *RegVar = new DbgVariable(DV, AbsVar); + DbgVariable *RegVar = new DbgVariable(DV, AbsVar, this); if (!addCurrentFnArgument(MF, RegVar, Scope)) addScopeVariable(Scope, RegVar); if (AbsVar) @@ -1291,10 +1450,10 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, DIArray Variables = DISubprogram(FnScope->getScopeNode()).getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.Verify() || !Processed.insert(DV)) + if (!DV || !DV.isVariable() || !Processed.insert(DV)) continue; if (LexicalScope *Scope = LScopes.findLexicalScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL)); + addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); } } @@ -1388,19 +1547,19 @@ void DwarfDebug::identifyScopeMarkers() { while (!WorkList.empty()) { LexicalScope *S = WorkList.pop_back_val(); - const SmallVector &Children = S->getChildren(); + const SmallVectorImpl &Children = S->getChildren(); if (!Children.empty()) - for (SmallVector::const_iterator SI = Children.begin(), + for (SmallVectorImpl::const_iterator SI = Children.begin(), SE = Children.end(); SI != SE; ++SI) WorkList.push_back(*SI); if (S->isAbstractScope()) continue; - const SmallVector &Ranges = S->getRanges(); + const SmallVectorImpl &Ranges = S->getRanges(); if (Ranges.empty()) continue; - for (SmallVector::const_iterator RI = Ranges.begin(), + for (SmallVectorImpl::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { assert(RI->first && "InsnRange does not have first instruction!"); assert(RI->second && "InsnRange does not have second instruction!"); @@ -1422,7 +1581,7 @@ static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { const MDNode *Scope = getScopeNode(DL, Ctx); DISubprogram SP = getDISubprogram(Scope); - if (SP.Verify()) { + if (SP.isSubprogram()) { // Check for number of operands since the compatibility is // cheap here. if (SP->getNumOperands() > 19) @@ -1437,36 +1596,45 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { // Gather pre-function debug information. Assumes being called immediately // after the function entry point has been emitted. void DwarfDebug::beginFunction(const MachineFunction *MF) { - if (!MMI->hasDebugInfo()) return; + + // If there's no debug info for the function we're not going to do anything. + if (!MMI->hasDebugInfo()) + return; + + // Grab the lexical scopes for the function, if we don't have any of those + // then we're not going to be able to do anything. LScopes.initialize(*MF); - if (LScopes.empty()) return; + if (LScopes.empty()) + return; + + assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); + + // Make sure that each lexical scope will have a begin/end label. identifyScopeMarkers(); // Set DwarfCompileUnitID in MCContext to the Compile Unit this function - // belongs to. + // belongs to so that we add to the correct per-cu line table in the + // non-asm case. LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); - if (Asm->TM.hasMCUseLoc() && - Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + if (Asm->TM.hasMCUseLoc() && Asm->OutStreamer.hasRawTextSupport()) // Use a single line table if we are using .loc and generating assembly. Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); else Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); - FunctionBeginSym = Asm->GetTempSymbol("func_begin", - Asm->getFunctionNumber()); + // Emit a label for the function so that we have a beginning address. + FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionBeginSym); - assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); - const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); // LiveUserVar - Map physreg numbers to the MDNode they contain. - std::vector LiveUserVar(TRI->getNumRegs()); + std::vector LiveUserVar(TRI->getNumRegs()); - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { bool AtBlockEntry = true; for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end(); II != IE; ++II) { @@ -1477,22 +1645,21 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Keep track of user variables. const MDNode *Var = - MI->getOperand(MI->getNumOperands() - 1).getMetadata(); + MI->getOperand(MI->getNumOperands() - 1).getMetadata(); // Variable is in a register, we need to check for clobbers. if (isDbgValueInDefinedReg(MI)) LiveUserVar[MI->getOperand(0).getReg()] = Var; // Check the history of this variable. - SmallVectorImpl &History = DbgValues[Var]; + SmallVectorImpl &History = DbgValues[Var]; if (History.empty()) { UserVariables.push_back(Var); // The first mention of a function argument gets the FunctionBeginSym // label, so arguments are visible when breaking at function entry. DIVariable DV(Var); - if (DV.Verify() && DV.getTag() == dwarf::DW_TAG_arg_variable && - DISubprogram(getDISubprogram(DV.getContext())) - .describes(MF->getFunction())) + if (DV.isVariable() && DV.getTag() == dwarf::DW_TAG_arg_variable && + getDISubprogram(DV.getContext()).describes(MF->getFunction())) LabelsBeforeInsn[MI] = FunctionBeginSym; } else { // We have seen this variable before. Try to coalesce DBG_VALUEs. @@ -1502,8 +1669,8 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (History.size() >= 2 && Prev->isIdenticalTo(History[History.size() - 2])) { DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" - << "\t" << *Prev - << "\t" << *History[History.size() - 2] << "\n"); + << "\t" << *Prev << "\t" + << *History[History.size() - 2] << "\n"); History.pop_back(); } @@ -1514,17 +1681,15 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Previous register assignment needs to terminate at the end of // its basic block. MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); + PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) { // Drop DBG_VALUE for empty range. DEBUG(dbgs() << "Dropping DBG_VALUE for empty range:\n" - << "\t" << *Prev << "\n"); + << "\t" << *Prev << "\n"); History.pop_back(); - } - else { + } else if (llvm::next(PrevMBB) != PrevMBB->getParent()->end()) // Terminate after LastMI. History.push_back(LastMI); - } } } } @@ -1542,11 +1707,12 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Check if the instruction clobbers any registers with debug vars. for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { + MOE = MI->operands_end(); + MOI != MOE; ++MOI) { if (!MOI->isReg() || !MOI->isDef() || !MOI->getReg()) continue; - for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); - AI.isValid(); ++AI) { + for (MCRegAliasIterator AI(MOI->getReg(), TRI, true); AI.isValid(); + ++AI) { unsigned Reg = *AI; const MDNode *Var = LiveUserVar[Reg]; if (!Var) @@ -1558,7 +1724,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { DbgValueHistoryMap::iterator HistI = DbgValues.find(Var); if (HistI == DbgValues.end()) continue; - SmallVectorImpl &History = HistI->second; + SmallVectorImpl &History = HistI->second; if (History.empty()) continue; const MachineInstr *Prev = History.back(); @@ -1580,7 +1746,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { for (DbgValueHistoryMap::iterator I = DbgValues.begin(), E = DbgValues.end(); I != E; ++I) { - SmallVectorImpl &History = I->second; + SmallVectorImpl &History = I->second; if (History.empty()) continue; @@ -1589,11 +1755,11 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { const MachineBasicBlock *PrevMBB = Prev->getParent(); MachineBasicBlock::const_iterator LastMI = - PrevMBB->getLastNonDebugInstr(); + PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) // Drop DBG_VALUE for empty range. History.pop_back(); - else { + else if (PrevMBB != &PrevMBB->getParent()->back()) { // Terminate after LastMI. History.push_back(LastMI); } @@ -1613,45 +1779,43 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { // Record beginning of function. if (!PrologEndLoc.isUnknown()) { - DebugLoc FnStartDL = getFnDebugLoc(PrologEndLoc, - MF->getFunction()->getContext()); - recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), - FnStartDL.getScope(MF->getFunction()->getContext()), - // We'd like to list the prologue as "not statements" but GDB behaves - // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. - DWARF2_FLAG_IS_STMT); + DebugLoc FnStartDL = + getFnDebugLoc(PrologEndLoc, MF->getFunction()->getContext()); + recordSourceLine( + FnStartDL.getLine(), FnStartDL.getCol(), + FnStartDL.getScope(MF->getFunction()->getContext()), + // We'd like to list the prologue as "not statements" but GDB behaves + // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. + DWARF2_FLAG_IS_STMT); } } void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { SmallVectorImpl &Vars = ScopeVariables[LS]; DIVariable DV = Var->getVariable(); - if (DV.getTag() == dwarf::DW_TAG_arg_variable) { - DISubprogram Ctxt(DV.getContext()); - DIArray Variables = Ctxt.getVariables(); - // If the variable is a parameter (arg_variable) and this is an optimized - // build (the subprogram has a 'variables' list) make sure we keep the - // parameters in order. Otherwise we would produce an incorrect function - // type with parameters out of order if function parameters were used out of - // order or unused (see the call to addScopeVariable in endFunction where - // the remaining unused variables (including parameters) are added). - if (unsigned NumVariables = Variables.getNumElements()) { - // Keep the parameters at the start of the variables list. Search through - // current variable list (Vars) and the full function variable list in - // lock-step looking for this parameter in the full list to find the - // insertion point. - SmallVectorImpl::iterator I = Vars.begin(); - unsigned j = 0; - while (I != Vars.end() && j != NumVariables && - Variables.getElement(j) != DV && - (*I)->getVariable().getTag() == dwarf::DW_TAG_arg_variable) { - if (Variables.getElement(j) == (*I)->getVariable()) - ++I; - ++j; - } - Vars.insert(I, Var); - return; + // Variables with positive arg numbers are parameters. + if (unsigned ArgNum = DV.getArgNumber()) { + // Keep all parameters in order at the start of the variable list to ensure + // function types are correct (no out-of-order parameters) + // + // This could be improved by only doing it for optimized builds (unoptimized + // builds have the right order to begin with), searching from the back (this + // would catch the unoptimized case quickly), or doing a binary search + // rather than linear search. + SmallVectorImpl::iterator I = Vars.begin(); + while (I != Vars.end()) { + unsigned CurNum = (*I)->getVariable().getArgNumber(); + // A local (non-parameter) variable has been found, insert immediately + // before it. + if (CurNum == 0) + break; + // A later indexed parameter has been found, insert immediately before it. + if (CurNum > ArgNum) + break; + ++I; } + Vars.insert(I, Var); + return; } Vars.push_back(Var); @@ -1681,12 +1845,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { for (unsigned i = 0, e = AList.size(); i != e; ++i) { LexicalScope *AScope = AList[i]; DISubprogram SP(AScope->getScopeNode()); - if (SP.Verify()) { + if (SP.isSubprogram()) { // Collect info for variables that were optimized out. DIArray Variables = SP.getVariables(); for (unsigned i = 0, e = Variables.getNumElements(); i != e; ++i) { DIVariable DV(Variables.getElement(i)); - if (!DV || !DV.Verify() || !ProcessedVars.insert(DV)) + if (!DV || !DV.isVariable() || !ProcessedVars.insert(DV)) continue; // Check that DbgVariable for DV wasn't created earlier, when // findAbstractVariable() was called for inlined instance of DV. @@ -1695,7 +1859,7 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { if (AbstractVariables.lookup(CleanDV)) continue; if (LexicalScope *Scope = LScopes.findAbstractScope(DV.getContext())) - addScopeVariable(Scope, new DbgVariable(DV, NULL)); + addScopeVariable(Scope, new DbgVariable(DV, NULL, this)); } } if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0) @@ -1707,11 +1871,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); - DebugFrames.push_back(FunctionDebugFrameInfo(Asm->getFunctionNumber(), - MMI->getFrameMoves())); - // Clear debug info - for (DenseMap >::iterator + for (ScopeVariablesMap::iterator I = ScopeVariables.begin(), E = ScopeVariables.end(); I != E; ++I) DeleteContainerPointers(I->second); ScopeVariables.clear(); @@ -1767,7 +1928,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, // Emit Methods //===----------------------------------------------------------------------===// -// Compute the size and offset of a DIE. +// Compute the size and offset of a DIE. The offset is relative to start of the +// CU. It returns the offset after laying out the DIE. unsigned DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Get the children. @@ -1778,7 +1940,7 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1); + const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; // Set DIE offset Die->setOffset(Offset); @@ -1810,21 +1972,25 @@ DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { return Offset; } -// Compute the size and offset of all the DIEs. +// Compute the size and offset for each DIE. void DwarfUnits::computeSizeAndOffsets() { - // Offset from the beginning of debug info section. - unsigned AccuOffset = 0; + // Offset from the first CU in the debug info section is 0 initially. + unsigned SecOffset = 0; + + // Iterate over each compile unit and set the size and offsets for each + // DIE within each compile unit. All offsets are CU relative. for (SmallVectorImpl::iterator I = CUs.begin(), E = CUs.end(); I != E; ++I) { - (*I)->setDebugInfoOffset(AccuOffset); - unsigned Offset = - sizeof(int32_t) + // Length of Compilation Unit Info - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) + (*I)->setDebugInfoOffset(SecOffset); + // CU-relative offset is reset to 0 here. + unsigned Offset = sizeof(int32_t) + // Length of Unit Info + (*I)->getHeaderSize(); // Unit-specific headers + + // EndOffset here is CU-relative, after laying out + // all of the CU DIE. unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); - AccuOffset += EndOffset; + SecOffset += EndOffset; } } @@ -1849,9 +2015,16 @@ void DwarfDebug::emitSectionLabels() { DwarfLineSectionSym = emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); emitSectionSym(Asm, TLOF.getDwarfLocSection()); - if (GenerateDwarfPubNamesSection) + if (GenerateGnuPubSections) { + DwarfGnuPubNamesSectionSym = + emitSectionSym(Asm, TLOF.getDwarfGnuPubNamesSection()); + DwarfGnuPubTypesSectionSym = + emitSectionSym(Asm, TLOF.getDwarfGnuPubTypesSection()); + } else if (HasDwarfPubSections) { emitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); - emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); + emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); + } + DwarfStrSectionSym = emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); if (useSplitDwarf()) { @@ -1871,10 +2044,10 @@ void DwarfDebug::emitSectionLabels() { } // Recursively emits a debug information entry. -void DwarfDebug::emitDIE(DIE *Die, std::vector *Abbrevs) { +void DwarfDebug::emitDIE(DIE *Die, ArrayRef Abbrevs) { // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbrevs->at(AbbrevNumber - 1); + const DIEAbbrev *Abbrev = Abbrevs[AbbrevNumber - 1]; // Emit the code (index) for the abbreviation. if (Asm->isVerbose()) @@ -1889,26 +2062,44 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector *Abbrevs) { // Emit the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) { - unsigned Attr = AbbrevData[i].getAttribute(); - unsigned Form = AbbrevData[i].getForm(); + dwarf::Attribute Attr = AbbrevData[i].getAttribute(); + dwarf::Form Form = AbbrevData[i].getForm(); assert(Form && "Too many attributes for DIE (check abbreviation)"); if (Asm->isVerbose()) Asm->OutStreamer.AddComment(dwarf::AttributeString(Attr)); switch (Attr) { - case dwarf::DW_AT_abstract_origin: { + case dwarf::DW_AT_abstract_origin: + case dwarf::DW_AT_type: + case dwarf::DW_AT_friend: + case dwarf::DW_AT_specification: + case dwarf::DW_AT_import: + case dwarf::DW_AT_containing_type: { DIEEntry *E = cast(Values[i]); DIE *Origin = E->getEntry(); unsigned Addr = Origin->getOffset(); if (Form == dwarf::DW_FORM_ref_addr) { + assert(!useSplitDwarf() && "TODO: dwo files can't have relocations."); // For DW_FORM_ref_addr, output the offset from beginning of debug info // section. Origin->getOffset() returns the offset from start of the // compile unit. - DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; - Addr += Holder.getCUOffset(Origin->getCompileUnit()); + CompileUnit *CU = CUDieMap.lookup(Origin->getCompileUnit()); + assert(CU && "CUDie should belong to a CU."); + Addr += CU->getDebugInfoOffset(); + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + Asm->EmitLabelPlusOffset(DwarfInfoSectionSym, Addr, + DIEEntry::getRefAddrSize(Asm)); + else + Asm->EmitLabelOffsetDifference(DwarfInfoSectionSym, Addr, + DwarfInfoSectionSym, + DIEEntry::getRefAddrSize(Asm)); + } else { + // Make sure Origin belong to the same CU. + assert(Die->getCompileUnit() == Origin->getCompileUnit() && + "The referenced DIE should belong to the same CU in ref4"); + Asm->EmitInt32(Addr); } - Asm->EmitInt32(Addr); break; } case dwarf::DW_AT_ranges: { @@ -1930,7 +2121,7 @@ void DwarfDebug::emitDIE(DIE *Die, std::vector *Abbrevs) { case dwarf::DW_AT_location: { if (DIELabel *L = dyn_cast(Values[i])) { if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - Asm->EmitLabelReference(L->getValue(), 4); + Asm->EmitSectionOffset(L->getValue(), DwarfDebugLocSectionSym); else Asm->EmitLabelDifference(L->getValue(), DwarfDebugLocSectionSym, 4); } else { @@ -1984,20 +2175,10 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, TheCU->getUniqueID())); // Emit size of content not including length itself - unsigned ContentSize = Die->getSize() + - sizeof(int16_t) + // DWARF version number - sizeof(int32_t) + // Offset Into Abbrev. Section - sizeof(int8_t); // Pointer Size (in bytes) + Asm->OutStreamer.AddComment("Length of Unit"); + Asm->EmitInt32(TheCU->getHeaderSize() + Die->getSize()); - Asm->OutStreamer.AddComment("Length of Compilation Unit Info"); - Asm->EmitInt32(ContentSize); - Asm->OutStreamer.AddComment("DWARF version number"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), - ASectionSym); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); + TheCU->emitHeader(ASection, ASectionSym); DD->emitDIE(Die, Abbreviations); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(), @@ -2005,19 +2186,6 @@ void DwarfUnits::emitUnits(DwarfDebug *DD, } } -/// For a given compile unit DIE, returns offset from beginning of debug info. -unsigned DwarfUnits::getCUOffset(DIE *Die) { - assert(Die->getTag() == dwarf::DW_TAG_compile_unit && - "Input DIE should be compile unit in getCUOffset."); - for (SmallVectorImpl::iterator I = CUs.begin(), - E = CUs.end(); I != E; ++I) { - CompileUnit *TheCU = *I; - if (TheCU->getCUDie() == Die) - return TheCU->getDebugInfoOffset(); - } - llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits."); -} - // Emit the debug info section. void DwarfDebug::emitDebugInfo() { DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; @@ -2091,7 +2259,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2099,7 +2267,7 @@ void DwarfDebug::emitAccelNames() { const StringMap > &Names = TheCU->getAccelNames(); for (StringMap >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector &Entities = GI->second; for (std::vector::const_iterator DI = Entities.begin(), DE = Entities.end(); DI != DE; ++DI) @@ -2120,7 +2288,7 @@ void DwarfDebug::emitAccelNames() { // Emit objective C classes and categories into a hashed accelerator table // section. void DwarfDebug::emitAccelObjC() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2128,7 +2296,7 @@ void DwarfDebug::emitAccelObjC() { const StringMap > &Names = TheCU->getAccelObjC(); for (StringMap >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector &Entities = GI->second; for (std::vector::const_iterator DI = Entities.begin(), DE = Entities.end(); DI != DE; ++DI) @@ -2148,7 +2316,7 @@ void DwarfDebug::emitAccelObjC() { // Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { - DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + DwarfAccelTable AT(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { @@ -2156,7 +2324,7 @@ void DwarfDebug::emitAccelNamespaces() { const StringMap > &Names = TheCU->getAccelNamespace(); for (StringMap >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector &Entities = GI->second; for (std::vector::const_iterator DI = Entities.begin(), DE = Entities.end(); DI != DE; ++DI) @@ -2177,11 +2345,11 @@ void DwarfDebug::emitAccelNamespaces() { // Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { std::vector Atoms; - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_offset, dwarf::DW_FORM_data4)); - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTag, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_die_tag, dwarf::DW_FORM_data2)); - Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeTypeFlags, + Atoms.push_back(DwarfAccelTable::Atom(dwarf::DW_ATOM_type_flags, dwarf::DW_FORM_data1)); DwarfAccelTable AT(Atoms); for (DenseMap::iterator I = CUMap.begin(), @@ -2191,7 +2359,7 @@ void DwarfDebug::emitAccelTypes() { = TheCU->getAccelTypes(); for (StringMap > >::const_iterator GI = Names.begin(), GE = Names.end(); GI != GE; ++GI) { - const char *Name = GI->getKeyData(); + StringRef Name = GI->getKey(); const std::vector > &Entities = GI->second; for (std::vector >::const_iterator DI = Entities.begin(), DE = Entities.end(); DI !=DE; ++DI) @@ -2209,23 +2377,85 @@ void DwarfDebug::emitAccelTypes() { AT.Emit(Asm, SectionBegin, &InfoHolder); } -/// emitDebugPubnames - Emit visible names into a debug pubnames section. +// Public name handling. +// The format for the various pubnames: +// +// dwarf pubnames - offset/name pairs where the offset is the offset into the CU +// for the DIE that is named. +// +// gnu pubnames - offset/index value/name tuples where the offset is the offset +// into the CU and the index value is computed according to the type of value +// for the DIE that is named. +// +// For type units the offset is the offset of the skeleton DIE. For split dwarf +// it's the offset within the debug_info/debug_types dwo section, however, the +// reference in the pubname header doesn't change. + +/// computeIndexValue - Compute the gdb index value for the DIE and CU. +static dwarf::PubIndexEntryDescriptor computeIndexValue(CompileUnit *CU, + DIE *Die) { + dwarf::GDBIndexEntryLinkage Linkage = dwarf::GIEL_STATIC; + + // We could have a specification DIE that has our most of our knowledge, + // look for that now. + DIEValue *SpecVal = Die->findAttribute(dwarf::DW_AT_specification); + if (SpecVal) { + DIE *SpecDIE = cast(SpecVal)->getEntry(); + if (SpecDIE->findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + } else if (Die->findAttribute(dwarf::DW_AT_external)) + Linkage = dwarf::GIEL_EXTERNAL; + + switch (Die->getTag()) { + case dwarf::DW_TAG_class_type: + case dwarf::DW_TAG_structure_type: + case dwarf::DW_TAG_union_type: + case dwarf::DW_TAG_enumeration_type: + return dwarf::PubIndexEntryDescriptor( + dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus + ? dwarf::GIEL_STATIC + : dwarf::GIEL_EXTERNAL); + case dwarf::DW_TAG_typedef: + case dwarf::DW_TAG_base_type: + case dwarf::DW_TAG_subrange_type: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_TYPE, dwarf::GIEL_STATIC); + case dwarf::DW_TAG_namespace: + return dwarf::GIEK_TYPE; + case dwarf::DW_TAG_subprogram: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_FUNCTION, Linkage); + case dwarf::DW_TAG_constant: + case dwarf::DW_TAG_variable: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, Linkage); + case dwarf::DW_TAG_enumerator: + return dwarf::PubIndexEntryDescriptor(dwarf::GIEK_VARIABLE, + dwarf::GIEL_STATIC); + default: + return dwarf::GIEK_NONE; + } +} + +/// emitDebugPubNames - Emit visible names into a debug pubnames section. /// -void DwarfDebug::emitDebugPubnames() { +void DwarfDebug::emitDebugPubNames(bool GnuStyle) { const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + const MCSection *PSec = + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubNamesSection() + : Asm->getObjFileLowering().getDwarfPubNamesSection(); typedef DenseMap CUMapType; for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { CompileUnit *TheCU = I->second; unsigned ID = TheCU->getUniqueID(); - if (TheCU->getGlobalNames().empty()) - continue; - // Start the dwarf pubnames section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubNamesSection()); + Asm->OutStreamer.SwitchSection(PSec); + + // Emit a label so we can reference the beginning of this pubname section. + if (GnuStyle) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubnames", + TheCU->getUniqueID())); + // Emit the header. Asm->OutStreamer.AddComment("Length of Public Names Info"); Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID), Asm->GetTempSymbol("pubnames_begin", ID), 4); @@ -2233,7 +2463,7 @@ void DwarfDebug::emitDebugPubnames() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + Asm->EmitInt16(dwarf::DW_PUBNAMES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), @@ -2244,18 +2474,27 @@ void DwarfDebug::emitDebugPubnames() { Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), 4); + // Emit the pubnames for this compilation unit. const StringMap &Globals = TheCU->getGlobalNames(); for (StringMap::const_iterator GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { const char *Name = GI->getKeyData(); - const DIE *Entity = GI->second; + DIE *Entity = GI->second; Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); + if (GnuStyle) { + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + Asm->OutStreamer.AddComment( + Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Asm->EmitInt8(Desc.toBits()); + } + if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); - Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); } Asm->OutStreamer.AddComment("End Mark"); @@ -2264,55 +2503,78 @@ void DwarfDebug::emitDebugPubnames() { } } -void DwarfDebug::emitDebugPubTypes() { +void DwarfDebug::emitDebugPubTypes(bool GnuStyle) { + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + const MCSection *PSec = + GnuStyle ? Asm->getObjFileLowering().getDwarfGnuPubTypesSection() + : Asm->getObjFileLowering().getDwarfPubTypesSection(); + for (DenseMap::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { + E = CUMap.end(); + I != E; ++I) { CompileUnit *TheCU = I->second; // Start the dwarf pubtypes section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfPubTypesSection()); + Asm->OutStreamer.SwitchSection(PSec); + + // Emit a label so we can reference the beginning of this pubtype section. + if (GnuStyle) + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("gnu_pubtypes", + TheCU->getUniqueID())); + + // Emit the header. Asm->OutStreamer.AddComment("Length of Public Types Info"); Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), - Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); + Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), + Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel( + Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID())); - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DW_PUBTYPES_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); - Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), - TheCU->getUniqueID()), - DwarfInfoSectionSym); + Asm->EmitSectionOffset( + Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), + DwarfInfoSectionSym); Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), - TheCU->getUniqueID()), - Asm->GetTempSymbol(ISec->getLabelBeginName(), - TheCU->getUniqueID()), - 4); - - const StringMap &Globals = TheCU->getGlobalTypes(); - for (StringMap::const_iterator - GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + Asm->EmitLabelDifference( + Asm->GetTempSymbol(ISec->getLabelEndName(), TheCU->getUniqueID()), + Asm->GetTempSymbol(ISec->getLabelBeginName(), TheCU->getUniqueID()), 4); + + // Emit the pubtypes. + const StringMap &Globals = TheCU->getGlobalTypes(); + for (StringMap::const_iterator GI = Globals.begin(), + GE = Globals.end(); + GI != GE; ++GI) { const char *Name = GI->getKeyData(); DIE *Entity = GI->second; - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(Entity->getOffset()); - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); + if (GnuStyle) { + dwarf::PubIndexEntryDescriptor Desc = computeIndexValue(TheCU, Entity); + Asm->OutStreamer.AddComment( + Twine("Kind: ") + dwarf::GDBIndexEntryKindString(Desc.Kind) + ", " + + dwarf::GDBIndexEntryLinkageString(Desc.Linkage)); + Asm->EmitInt8(Desc.toBits()); + } + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("External Name"); + // Emit the name with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength() + 1)); } Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", - TheCU->getUniqueID())); + Asm->OutStreamer.EmitLabel( + Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID())); } } @@ -2367,24 +2629,18 @@ void DwarfUnits::emitAddresses(const MCSection *AddrSection) { // Start the dwarf addr section. Asm->OutStreamer.SwitchSection(AddrSection); - // Get all of the string pool entries and put them in an array by their ID so - // we can sort them. - SmallVector* >, 64> Entries; + // Order the address pool entries by ID + SmallVector Entries(AddressPool.size()); - for (DenseMap >::iterator - I = AddressPool.begin(), E = AddressPool.end(); + for (DenseMap::iterator I = AddressPool.begin(), + E = AddressPool.end(); I != E; ++I) - Entries.push_back(std::make_pair(I->second.second, &(I->second))); - - array_pod_sort(Entries.begin(), Entries.end()); + Entries[I->second] = I->first; for (unsigned i = 0, e = Entries.size(); i != e; ++i) { - // Emit a label for reference from debug information entries. - MCSymbol *Sym = Entries[i].second->first; - if (Sym) - Asm->EmitLabelReference(Entries[i].second->first, - Asm->getDataLayout().getPointerSize()); + // Emit an expression for reference from debug information entries. + if (const MCExpr *Expr = Entries[i]) + Asm->OutStreamer.EmitValue(Expr, Asm->getDataLayout().getPointerSize()); else Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize()); } @@ -2397,7 +2653,7 @@ void DwarfDebug::emitDebugStr() { Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); } -// Emit visible names into a debug loc section. +// Emit locations into the debug loc section. void DwarfDebug::emitDebugLoc() { if (DotDebugLocEntries.empty()) return; @@ -2426,9 +2682,9 @@ void DwarfDebug::emitDebugLoc() { Asm->OutStreamer.EmitIntValue(0, Size); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); } else { - Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size); - Asm->OutStreamer.EmitSymbolValue(Entry.End, Size); - DIVariable DV(Entry.Variable); + Asm->OutStreamer.EmitSymbolValue(Entry.getBeginSym(), Size); + Asm->OutStreamer.EmitSymbolValue(Entry.getEndSym(), Size); + DIVariable DV(Entry.getVariable()); Asm->OutStreamer.AddComment("Loc expr size"); MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); MCSymbol *end = Asm->OutStreamer.getContext().CreateTempSymbol(); @@ -2448,17 +2704,18 @@ void DwarfDebug::emitDebugLoc() { Asm->EmitULEB128(Entry.getInt()); } } else if (Entry.isLocation()) { + MachineLocation Loc = Entry.getLoc(); if (!DV.hasComplexAddress()) // Regular entry. - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); else { // Complex address entry. unsigned N = DV.getNumAddrElements(); unsigned i = 0; if (N >= 2 && DV.getAddrElement(0) == DIBuilder::OpPlus) { - if (Entry.Loc.getOffset()) { + if (Loc.getOffset()) { i = 2; - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); Asm->OutStreamer.AddComment("DW_OP_deref"); Asm->EmitInt8(dwarf::DW_OP_deref); Asm->OutStreamer.AddComment("DW_OP_plus_uconst"); @@ -2467,12 +2724,12 @@ void DwarfDebug::emitDebugLoc() { } else { // If first address element is OpPlus then emit // DW_OP_breg + Offset instead of DW_OP_reg + Offset. - MachineLocation Loc(Entry.Loc.getReg(), DV.getAddrElement(1)); - Asm->EmitDwarfRegOp(Loc); + MachineLocation TLoc(Loc.getReg(), DV.getAddrElement(1)); + Asm->EmitDwarfRegOp(TLoc, DV.isIndirect()); i = 2; } } else { - Asm->EmitDwarfRegOp(Entry.Loc); + Asm->EmitDwarfRegOp(Loc, DV.isIndirect()); } // Emit remaining complex address elements. @@ -2482,7 +2739,7 @@ void DwarfDebug::emitDebugLoc() { Asm->EmitInt8(dwarf::DW_OP_plus_uconst); Asm->EmitULEB128(DV.getAddrElement(++i)); } else if (Element == DIBuilder::OpDeref) { - if (!Entry.Loc.isReg()) + if (!Loc.isReg()) Asm->EmitInt8(dwarf::DW_OP_deref); } else llvm_unreachable("unknown Opcode found in complex address"); @@ -2496,18 +2753,178 @@ void DwarfDebug::emitDebugLoc() { } } -// Emit visible names into a debug aranges section. +struct SymbolCUSorter { + SymbolCUSorter(const MCStreamer &s) : Streamer(s) {} + const MCStreamer &Streamer; + + bool operator() (const SymbolCU &A, const SymbolCU &B) { + unsigned IA = A.Sym ? Streamer.GetSymbolOrder(A.Sym) : 0; + unsigned IB = B.Sym ? Streamer.GetSymbolOrder(B.Sym) : 0; + + // Symbols with no order assigned should be placed at the end. + // (e.g. section end labels) + if (IA == 0) + IA = (unsigned)(-1); + if (IB == 0) + IB = (unsigned)(-1); + return IA < IB; + } +}; + +static bool CUSort(const CompileUnit *A, const CompileUnit *B) { + return (A->getUniqueID() < B->getUniqueID()); +} + +struct ArangeSpan { + const MCSymbol *Start, *End; +}; + +// Emit a debug aranges section, containing a CU lookup for any +// address we can tie back to a CU. void DwarfDebug::emitDebugARanges() { // Start the dwarf aranges section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfARangesSection()); + Asm->OutStreamer + .SwitchSection(Asm->getObjFileLowering().getDwarfARangesSection()); + + typedef DenseMap > SpansType; + + SpansType Spans; + + // Build a list of sections used. + std::vector Sections; + for (SectionMapType::iterator it = SectionMap.begin(); it != SectionMap.end(); + it++) { + const MCSection *Section = it->first; + Sections.push_back(Section); + } + + // Sort the sections into order. + // This is only done to ensure consistent output order across different runs. + std::sort(Sections.begin(), Sections.end(), SectionSort); + + // Build a set of address spans, sorted by CU. + for (size_t SecIdx=0;SecIdx &List = SectionMap[Section]; + if (List.size() < 2) + continue; + + // Sort the symbols by offset within the section. + SymbolCUSorter sorter(Asm->OutStreamer); + std::sort(List.begin(), List.end(), sorter); + + // If we have no section (e.g. common), just write out + // individual spans for each symbol. + if (Section == NULL) { + for (size_t n = 0; n < List.size(); n++) { + const SymbolCU &Cur = List[n]; + + ArangeSpan Span; + Span.Start = Cur.Sym; + Span.End = NULL; + if (Cur.CU) + Spans[Cur.CU].push_back(Span); + } + } else { + // Build spans between each label. + const MCSymbol *StartSym = List[0].Sym; + for (size_t n = 1; n < List.size(); n++) { + const SymbolCU &Prev = List[n - 1]; + const SymbolCU &Cur = List[n]; + + // Try and build the longest span we can within the same CU. + if (Cur.CU != Prev.CU) { + ArangeSpan Span; + Span.Start = StartSym; + Span.End = Cur.Sym; + Spans[Prev.CU].push_back(Span); + StartSym = Cur.Sym; + } + } + } + } + + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + unsigned PtrSize = Asm->getDataLayout().getPointerSize(); + + // Build a list of CUs used. + std::vector CUs; + for (SpansType::iterator it = Spans.begin(); it != Spans.end(); it++) { + CompileUnit *CU = it->first; + CUs.push_back(CU); + } + + // Sort the CU list (again, to ensure consistent output order). + std::sort(CUs.begin(), CUs.end(), CUSort); + + // Emit an arange table for each CU we used. + for (size_t CUIdx=0;CUIdx &List = Spans[CU]; + + // Emit size of content not including length itself. + unsigned ContentSize + = sizeof(int16_t) // DWARF ARange version number + + sizeof(int32_t) // Offset of CU in the .debug_info section + + sizeof(int8_t) // Pointer Size (in bytes) + + sizeof(int8_t); // Segment Size (in bytes) + + unsigned TupleSize = PtrSize * 2; + + // 7.20 in the Dwarf specs requires the table to be aligned to a tuple. + unsigned Padding = 0; + while (((sizeof(int32_t) + ContentSize + Padding) % TupleSize) != 0) + Padding++; + + ContentSize += Padding; + ContentSize += (List.size() + 1) * TupleSize; + + // For each compile unit, write the list of spans it covers. + Asm->OutStreamer.AddComment("Length of ARange Set"); + Asm->EmitInt32(ContentSize); + Asm->OutStreamer.AddComment("DWARF Arange version number"); + Asm->EmitInt16(dwarf::DW_ARANGES_VERSION); + Asm->OutStreamer.AddComment("Offset Into Debug Info Section"); + Asm->EmitSectionOffset( + Asm->GetTempSymbol(ISec->getLabelBeginName(), CU->getUniqueID()), + DwarfInfoSectionSym); + Asm->OutStreamer.AddComment("Address Size (in bytes)"); + Asm->EmitInt8(PtrSize); + Asm->OutStreamer.AddComment("Segment Size (in bytes)"); + Asm->EmitInt8(0); + + for (unsigned n = 0; n < Padding; n++) + Asm->EmitInt8(0xff); + + for (unsigned n = 0; n < List.size(); n++) { + const ArangeSpan &Span = List[n]; + Asm->EmitLabelReference(Span.Start, PtrSize); + + // Calculate the size as being from the span start to it's end. + if (Span.End) { + Asm->EmitLabelDifference(Span.End, Span.Start, PtrSize); + } else { + // For symbols without an end marker (e.g. common), we + // write a single arange entry containing just that one symbol. + uint64_t Size = SymSize[Span.Start]; + if (Size == 0) + Size = 1; + + Asm->OutStreamer.EmitIntValue(Size, PtrSize); + } + } + + Asm->OutStreamer.AddComment("ARange terminator"); + Asm->OutStreamer.EmitIntValue(0, PtrSize); + Asm->OutStreamer.EmitIntValue(0, PtrSize); + } } // Emit visible names into a debug ranges section. void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfRangesSection()); + Asm->OutStreamer + .SwitchSection(Asm->getObjFileLowering().getDwarfRangesSection()); unsigned char Size = Asm->getDataLayout().getPointerSize(); for (SmallVectorImpl::iterator I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); @@ -2528,113 +2945,27 @@ void DwarfDebug::emitDebugMacInfo() { } } -// Emit inline info using following format. -// Section Header: -// 1. length of section -// 2. Dwarf version number -// 3. address size. -// -// Entries (one "entry" for each function that was inlined): -// -// 1. offset into __debug_str section for MIPS linkage name, if exists; -// otherwise offset into __debug_str for regular function name. -// 2. offset into __debug_str section for regular function name. -// 3. an unsigned LEB128 number indicating the number of distinct inlining -// instances for the function. -// -// The rest of the entry consists of a {die_offset, low_pc} pair for each -// inlined instance; the die_offset points to the inlined_subroutine die in the -// __debug_info section, and the low_pc is the starting address for the -// inlining instance. -void DwarfDebug::emitDebugInlineInfo() { - if (!Asm->MAI->doesDwarfUseInlineInfoSection()) - return; - - if (!FirstCU) - return; - - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfDebugInlineSection()); - - Asm->OutStreamer.AddComment("Length of Debug Inlined Information Entry"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("debug_inlined_end", 1), - Asm->GetTempSymbol("debug_inlined_begin", 1), 4); - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_begin", 1)); - - Asm->OutStreamer.AddComment("Dwarf Version"); - Asm->EmitInt16(dwarf::DWARF_VERSION); - Asm->OutStreamer.AddComment("Address Size (in bytes)"); - Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); - - for (SmallVectorImpl::iterator I = InlinedSPNodes.begin(), - E = InlinedSPNodes.end(); I != E; ++I) { - - const MDNode *Node = *I; - DenseMap >::iterator II - = InlineInfo.find(Node); - SmallVectorImpl &Labels = II->second; - DISubprogram SP(Node); - StringRef LName = SP.getLinkageName(); - StringRef Name = SP.getName(); - - Asm->OutStreamer.AddComment("MIPS linkage name"); - if (LName.empty()) - Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), - DwarfStrSectionSym); - else - Asm->EmitSectionOffset(InfoHolder - .getStringPoolEntry(getRealLinkageName(LName)), - DwarfStrSectionSym); - - Asm->OutStreamer.AddComment("Function name"); - Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), - DwarfStrSectionSym); - Asm->EmitULEB128(Labels.size(), "Inline count"); - - for (SmallVectorImpl::iterator LI = Labels.begin(), - LE = Labels.end(); LI != LE; ++LI) { - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); - Asm->EmitInt32(LI->second->getOffset()); - - if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); - Asm->OutStreamer.EmitSymbolValue(LI->first, - Asm->getDataLayout().getPointerSize()); - } - } - - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1)); -} - // DWARF5 Experimental Separate Dwarf emitters. // This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, // DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, -// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present, -// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa. -CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { - DICompileUnit DIUnit(N); - CompilationDir = DIUnit.getDirectory(); +// DW_AT_ranges_base, DW_AT_addr_base. +CompileUnit *DwarfDebug::constructSkeletonCU(const CompileUnit *CU) { DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, - DIUnit.getLanguage(), Die, Asm, - this, &SkeletonHolder); + CompileUnit *NewCU = new CompileUnit(CU->getUniqueID(), Die, CU->getNode(), + Asm, this, &SkeletonHolder); NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, - DIUnit.getSplitDebugFilename()); - - // This should be a unique identifier when we want to build .dwp files. - NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + CU->getNode().getSplitDebugFilename()); // Relocate to the beginning of the addr_base section, else 0 for the // beginning of the one for this compile unit. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, - DwarfAddrSectionSym); + NewCU->addSectionLabel(Die, dwarf::DW_AT_GNU_addr_base, + DwarfAddrSectionSym); else - NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, - dwarf::DW_FORM_sec_offset, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_GNU_addr_base, 0); // 2.17.1 requires that we use DW_AT_low_pc for a single entry point // into an entity. We're using 0, or a NULL label for this. @@ -2644,14 +2975,47 @@ CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { // compile unit in debug_line section. // FIXME: Should handle multiple compile units. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) - NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, - DwarfLineSectionSym); + NewCU->addSectionLabel(Die, dwarf::DW_AT_stmt_list, + DwarfLineSectionSym); else - NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0); + NewCU->addSectionOffset(Die, dwarf::DW_AT_stmt_list, 0); if (!CompilationDir.empty()) NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + // Flags to let the linker know we have emitted new style pubnames. + if (GenerateGnuPubSections) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubnames, + Asm->GetTempSymbol("gnu_pubnames", NewCU->getUniqueID()), + DwarfGnuPubNamesSectionSym); + + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID())); + else + NewCU->addSectionDelta( + Die, dwarf::DW_AT_GNU_pubtypes, + Asm->GetTempSymbol("gnu_pubtypes", NewCU->getUniqueID()), + DwarfGnuPubTypesSectionSym); + } + + // Flag if we've emitted any ranges and their location for the compile unit. + if (DebugRangeSymbols.size()) { + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addSectionLabel(Die, dwarf::DW_AT_GNU_ranges_base, + DwarfDebugRangeSectionSym); + else + NewCU->addUInt(Die, dwarf::DW_AT_GNU_ranges_base, dwarf::DW_FORM_data4, + 0); + } + SkeletonHolder.addUnit(NewCU); SkeletonCUs.push_back(NewCU); diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h index 24f758d..cebac39 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -41,7 +41,6 @@ class DIEAbbrev; class DIE; class DIEBlock; class DIEEntry; -class DwarfDebug; //===----------------------------------------------------------------------===// /// \brief This class is used to record source line correspondence. @@ -63,13 +62,12 @@ public: /// \brief This struct describes location entries emitted in the .debug_loc /// section. -typedef struct DotDebugLocEntry { +class DotDebugLocEntry { + // Begin and end symbols for the address range that this location is valid. const MCSymbol *Begin; const MCSymbol *End; - MachineLocation Loc; - const MDNode *Variable; - bool Merged; - bool Constant; + + // Type of entry that this represents. enum EntryType { E_Location, E_Integer, @@ -83,23 +81,42 @@ typedef struct DotDebugLocEntry { const ConstantFP *CFP; const ConstantInt *CIP; } Constants; - DotDebugLocEntry() - : Begin(0), End(0), Variable(0), Merged(false), - Constant(false) { Constants.Int = 0;} + + // The location in the machine frame. + MachineLocation Loc; + + // The variable to which this location entry corresponds. + const MDNode *Variable; + + // Whether this location has been merged. + bool Merged; + +public: + DotDebugLocEntry() : Begin(0), End(0), Variable(0), Merged(false) { + Constants.Int = 0; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, const MDNode *V) - : Begin(B), End(E), Loc(L), Variable(V), Merged(false), - Constant(false) { Constants.Int = 0; EntryKind = E_Location; } + : Begin(B), End(E), Loc(L), Variable(V), Merged(false) { + Constants.Int = 0; + EntryKind = E_Location; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.Int = i; EntryKind = E_Integer; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.Int = i; + EntryKind = E_Integer; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.CFP = FPtr; + EntryKind = E_ConstantFP; + } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr) - : Begin(B), End(E), Variable(0), Merged(false), - Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; } + : Begin(B), End(E), Variable(0), Merged(false) { + Constants.CIP = IPtr; + EntryKind = E_ConstantInt; + } /// \brief Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. @@ -115,10 +132,14 @@ typedef struct DotDebugLocEntry { bool isInt() const { return EntryKind == E_Integer; } bool isConstantFP() const { return EntryKind == E_ConstantFP; } bool isConstantInt() const { return EntryKind == E_ConstantInt; } - int64_t getInt() { return Constants.Int; } - const ConstantFP *getConstantFP() { return Constants.CFP; } - const ConstantInt *getConstantInt() { return Constants.CIP; } -} DotDebugLocEntry; + int64_t getInt() const { return Constants.Int; } + const ConstantFP *getConstantFP() const { return Constants.CFP; } + const ConstantInt *getConstantInt() const { return Constants.CIP; } + const MDNode *getVariable() const { return Variable; } + const MCSymbol *getBeginSym() const { return Begin; } + const MCSymbol *getEndSym() const { return End; } + MachineLocation getLoc() const { return Loc; } +}; //===----------------------------------------------------------------------===// /// \brief This class is used to track local variable information. @@ -129,11 +150,12 @@ class DbgVariable { DbgVariable *AbsVar; // Corresponding Abstract variable, if any. const MachineInstr *MInsn; // DBG_VALUE instruction of the variable. int FrameIndex; + DwarfDebug *DD; public: // AbsVar may be NULL. - DbgVariable(DIVariable V, DbgVariable *AV) + DbgVariable(DIVariable V, DbgVariable *AV, DwarfDebug *DD) : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), - FrameIndex(~0) {} + FrameIndex(~0), DD(DD) {} // Accessors. DIVariable getVariable() const { return Var; } @@ -148,7 +170,7 @@ public: int getFrameIndex() const { return FrameIndex; } void setFrameIndex(int FI) { FrameIndex = FI; } // Translate tag to proper Dwarf tag. - unsigned getTag() const { + uint16_t getTag() const { if (Var.getTag() == dwarf::DW_TAG_arg_variable) return dwarf::DW_TAG_formal_parameter; @@ -172,32 +194,27 @@ public: } bool variableHasComplexAddress() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); } bool isBlockByrefVariable() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.isBlockByrefVariable(); } unsigned getNumAddrElements() const { - assert(Var.Verify() && "Invalid complex DbgVariable!"); + assert(Var.isVariable() && "Invalid complex DbgVariable!"); return Var.getNumAddrElements(); } uint64_t getAddrElement(unsigned i) const { return Var.getAddrElement(i); } DIType getType() const; -}; - -// A String->Symbol mapping of strings used by indirect -// references. -typedef StringMap, - BumpPtrAllocator&> StrPool; - -// A Symbol->pair mapping of addresses used by indirect -// references. -typedef DenseMap > AddrPool; +private: + /// resolve - Look in the DwarfDebug map for the MDNode that + /// corresponds to the reference. + template T resolve(DIRef Ref) const; +}; /// \brief Collects and handles information specific to a particular /// collection of units. @@ -209,27 +226,34 @@ class DwarfUnits { FoldingSet *AbbreviationsSet; // A list of all the unique abbreviations in use. - std::vector *Abbreviations; + std::vector &Abbreviations; // A pointer to all units in the section. SmallVector CUs; // Collection of strings for this unit and assorted symbols. + // A String->Symbol mapping of strings used by indirect + // references. + typedef StringMap, + BumpPtrAllocator&> StrPool; StrPool StringPool; unsigned NextStringPoolNumber; std::string StringPref; // Collection of addresses for this unit and assorted labels. + // A Symbol->unsigned mapping of addresses used by indirect + // references. + typedef DenseMap AddrPool; AddrPool AddressPool; unsigned NextAddrPoolNumber; public: DwarfUnits(AsmPrinter *AP, FoldingSet *AS, - std::vector *A, const char *Pref, - BumpPtrAllocator &DA) : - Asm(AP), AbbreviationsSet(AS), Abbreviations(A), - StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), - AddressPool(), NextAddrPoolNumber(0) {} + std::vector &A, const char *Pref, + BumpPtrAllocator &DA) + : Asm(AP), AbbreviationsSet(AS), Abbreviations(A), StringPool(DA), + NextStringPoolNumber(0), StringPref(Pref), AddressPool(), + NextAddrPoolNumber(0) {} /// \brief Compute the size and offset of a DIE given an incoming Offset. unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); @@ -245,14 +269,15 @@ public: /// \brief Emit all of the units to the section listed with the given /// abbreviation section. - void emitUnits(DwarfDebug *, const MCSection *, const MCSection *, - const MCSymbol *); + void emitUnits(DwarfDebug *DD, const MCSection *USection, + const MCSection *ASection, const MCSymbol *ASectionSym); /// \brief Emit all of the strings to the section given. - void emitStrings(const MCSection *, const MCSection *, const MCSymbol *); + void emitStrings(const MCSection *StrSection, const MCSection *OffsetSection, + const MCSymbol *StrSecSym); /// \brief Emit all of the addresses to the section given. - void emitAddresses(const MCSection *); + void emitAddresses(const MCSection *AddrSection); /// \brief Returns the entry into the start of the pool. MCSymbol *getStringPoolSym(); @@ -270,14 +295,18 @@ public: /// \brief Returns the index into the address pool with the given /// label/symbol. - unsigned getAddrPoolIndex(MCSymbol *); + unsigned getAddrPoolIndex(const MCExpr *Sym); + unsigned getAddrPoolIndex(const MCSymbol *Sym); /// \brief Returns the address pool. AddrPool *getAddrPool() { return &AddressPool; } +}; - /// \brief for a given compile unit DIE, returns offset from beginning of - /// debug info. - unsigned getCUOffset(DIE *Die); +/// \brief Helper used to pair up a symbol and its DWARF compile unit. +struct SymbolCU { + SymbolCU(CompileUnit *CU, const MCSymbol *Sym) : Sym(Sym), CU(CU) {} + const MCSymbol *Sym; + CompileUnit *CU; }; /// \brief Collects and handles dwarf debug information. @@ -291,10 +320,7 @@ class DwarfDebug { // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; - //===--------------------------------------------------------------------===// - // Attribute used to construct specific Dwarf sections. - // - + // Handle to the a compile unit used for the inline extension handling. CompileUnit *FirstCU; // Maps MDNode with its corresponding CompileUnit. @@ -303,6 +329,14 @@ class DwarfDebug { // Maps subprogram MDNode with its corresponding CompileUnit. DenseMap SPMap; + // Maps a CU DIE with its corresponding CompileUnit. + DenseMap CUDieMap; + + /// Maps MDNodes for type sysstem with the corresponding DIEs. These DIEs can + /// be shared across CUs, that is why we keep the map here instead + /// of in CompileUnit. + DenseMap MDTypeNodeToDieMap; + // Used to uniquely define abbreviations. FoldingSet AbbreviationsSet; @@ -315,10 +349,17 @@ class DwarfDebug { // separated by a zero byte, mapped to a unique id. StringMap SourceIdMap; + // List of all labels used in aranges generation. + std::vector ArangeLabels; + + // Size of each symbol emitted (for those symbols that have a specific size). + DenseMap SymSize; + // Provides a unique id per text section. - SetVector SectionMap; + typedef DenseMap > SectionMapType; + SectionMapType SectionMap; - // List of Arguments (DbgValues) for current function. + // List of arguments for current function. SmallVector CurrentFnArguments; LexicalScopes LScopes; @@ -327,7 +368,9 @@ class DwarfDebug { DenseMap AbstractSPDies; // Collection of dbg variables of a scope. - DenseMap > ScopeVariables; + typedef DenseMap > ScopeVariablesMap; + ScopeVariablesMap ScopeVariables; // Collection of abstract variables. DenseMap AbstractVariables; @@ -339,12 +382,6 @@ class DwarfDebug { // as DW_AT_inline. SmallPtrSet InlinedSubprogramDIEs; - // Keep track of inlined functions and their location. This - // information is used to populate the debug_inlined section. - typedef std::pair InlineInfoLabels; - DenseMap > InlineInfo; - SmallVector InlinedSPNodes; - // This is a collection of subprogram MDNodes that are processed to // create DIEs. SmallPtrSet ProcessedSPNodes; @@ -377,16 +414,6 @@ class DwarfDebug { // body. DebugLoc PrologEndLoc; - struct FunctionDebugFrameInfo { - unsigned Number; - std::vector Moves; - - FunctionDebugFrameInfo(unsigned Num, const std::vector &M) - : Number(Num), Moves(M) {} - }; - - std::vector DebugFrames; - // Section Symbols: these are assembler temporary labels that are emitted at // the beginning of each supported dwarf section. These are used to form // section offsets and are created by EmitSectionLabels. @@ -395,9 +422,10 @@ class DwarfDebug { MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym, *DwarfAddrSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; + MCSymbol *DwarfGnuPubNamesSectionSym, *DwarfGnuPubTypesSectionSym; // As an optimization, there is no need to emit an entry in the directory - // table for the same directory as DW_at_comp_dir. + // table for the same directory as DW_AT_comp_dir. StringRef CompilationDir; // Counter for assigning globally unique IDs for CUs. @@ -409,8 +437,19 @@ class DwarfDebug { // Holders for the various debug information flags that we might need to // have exposed. See accessor functions below for description. - // Whether or not we're emitting info for older versions of gdb on darwin. - bool IsDarwinGDBCompat; + // Holder for imported entities. + typedef SmallVector, 32> + ImportedEntityMap; + ImportedEntityMap ScopesWithImportedEntities; + + // Holder for types that are going to be extracted out into a type unit. + std::vector TypeUnits; + + // Whether to emit the pubnames/pubtypes sections. + bool HasDwarfPubSections; + + // Version of dwarf we're emitting. + unsigned DwarfVersion; // DWARF5 Experimental Options bool HasDwarfAccelTables; @@ -433,9 +472,8 @@ class DwarfDebug { // Holder for the skeleton information. DwarfUnits SkeletonHolder; - typedef SmallVector, 32> - ImportedEntityMap; - ImportedEntityMap ScopesWithImportedEntities; + // Maps from a type identifier to the actual MDNode. + DITypeIdentifierMap TypeIdentifierMap; private: @@ -448,11 +486,14 @@ private: /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global /// variables in this scope then create and insert DIEs for these /// variables. - DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode); + DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, DISubprogram SP); /// \brief Construct new DW_TAG_lexical_block for this scope and /// attach DW_AT_low_pc/DW_AT_high_pc labels. DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + /// A helper function to check whether the DIE for a given Scope is going + /// to be null. + bool isLexicalScopeDIENull(LexicalScope *Scope); /// \brief This scope represents inlined body of a function. Construct /// DIE to represent this concrete inlined copy of the function. @@ -460,6 +501,9 @@ private: /// \brief Construct a DIE for this scope. DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); + /// A helper function to create children of a Scope DIE. + DIE *createScopeChildrenDIE(CompileUnit *TheCU, LexicalScope *Scope, + SmallVectorImpl &Children); /// \brief Emit initial Dwarf sections with a label at the start of each one. void emitSectionLabels(); @@ -511,10 +555,16 @@ private: void emitAccelTypes(); /// \brief Emit visible names into a debug pubnames section. - void emitDebugPubnames(); + /// \param GnuStyle determines whether or not we want to emit + /// additional information into the table ala newer gcc for gdb + /// index. + void emitDebugPubNames(bool GnuStyle = false); /// \brief Emit visible types into a debug pubtypes section. - void emitDebugPubTypes(); + /// \param GnuStyle determines whether or not we want to emit + /// additional information into the table ala newer gcc for gdb + /// index. + void emitDebugPubTypes(bool GnuStyle = false); /// \brief Emit visible names into a debug str section. void emitDebugStr(); @@ -538,7 +588,7 @@ private: /// \brief Construct the split debug info compile unit for the debug info /// section. - CompileUnit *constructSkeletonCU(const MDNode *); + CompileUnit *constructSkeletonCU(const CompileUnit *CU); /// \brief Emit the local split abbreviations. void emitSkeletonAbbrevs(const MCSection *); @@ -554,21 +604,21 @@ private: /// \brief Create new CompileUnit for the given metadata node with tag /// DW_TAG_compile_unit. - CompileUnit *constructCompileUnit(const MDNode *N); + CompileUnit *constructCompileUnit(DICompileUnit DIUnit); /// \brief Construct subprogram DIE. void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); - /// \brief Construct import_module DIE. - void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N); + /// \brief Construct imported_module or imported_declaration DIE. + void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N); /// \brief Construct import_module DIE. - void constructImportedModuleDIE(CompileUnit *TheCU, const MDNode *N, + void constructImportedEntityDIE(CompileUnit *TheCU, const MDNode *N, DIE *Context); /// \brief Construct import_module DIE. - void constructImportedModuleDIE(CompileUnit *TheCU, - const DIImportedModule &Module, + void constructImportedEntityDIE(CompileUnit *TheCU, + const DIImportedEntity &Module, DIE *Context); /// \brief Register a source line with debug info. Returns the unique @@ -616,7 +666,13 @@ public: // Main entry points. // DwarfDebug(AsmPrinter *A, Module *M); - ~DwarfDebug(); + + void insertDIE(const MDNode *TypeMD, DIE *Die) { + MDTypeNodeToDieMap.insert(std::make_pair(TypeMD, Die)); + } + DIE *getDIE(const MDNode *TypeMD) { + return MDTypeNodeToDieMap.lookup(TypeMD); + } /// \brief Emit all Dwarf sections that should come prior to the /// content. @@ -637,6 +693,17 @@ public: /// \brief Process end of an instruction. void endInstruction(const MachineInstr *MI); + /// \brief Add a DIE to the set of types that we're going to pull into + /// type units. + void addTypeUnitType(DIE *Die) { TypeUnits.push_back(Die); } + + /// \brief Add a label so that arange data can be generated for it. + void addArangeLabel(SymbolCU SCU) { ArangeLabels.push_back(SCU); } + + /// \brief For symbols that have a size designated (e.g. common symbols), + /// this tracks that size. + void setSymbolSize(const MCSymbol *Sym, uint64_t Size) { SymSize[Sym] = Size;} + /// \brief Look up the source id with the given directory and source file /// names. If none currently exists, create a new id and insert it in the /// SourceIds map. @@ -644,11 +711,7 @@ public: unsigned CUID); /// \brief Recursively Emits a debug information entry. - void emitDIE(DIE *Die, std::vector *Abbrevs); - - /// \brief Returns whether or not to limit some of our debug - /// output to the limitations of darwin gdb. - bool useDarwinGDBCompat() { return IsDarwinGDBCompat; } + void emitDIE(DIE *Die, ArrayRef Abbrevs); // Experimental DWARF5 features. @@ -659,6 +722,19 @@ public: /// \brief Returns whether or not to change the current debug info for the /// split dwarf proposal support. bool useSplitDwarf() { return HasSplitDwarf; } + + /// Returns the Dwarf Version. + unsigned getDwarfVersion() const { return DwarfVersion; } + + /// Find the MDNode for the given reference. + template T resolve(DIRef Ref) const { + return Ref.resolve(TypeIdentifierMap); + } + + /// isSubprogramContext - Return true if Context is either a subprogram + /// or another context nested inside a subprogram. + bool isSubprogramContext(const MDNode *Context); + }; } // End of namespace llvm diff --git a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h index 74b1b13..1575161 100644 --- a/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/contrib/llvm/lib/CodeGen/AsmPrinter/DwarfException.h @@ -23,13 +23,13 @@ namespace llvm { template class SmallVectorImpl; struct LandingPadInfo; class MachineModuleInfo; -class MachineMove; class MachineInstr; class MachineFunction; class MCAsmInfo; class MCExpr; class MCSymbol; class Function; +class ARMTargetStreamer; class AsmPrinter; //===----------------------------------------------------------------------===// @@ -178,6 +178,8 @@ public: class ARMException : public DwarfException { void EmitTypeInfos(unsigned TTypeEncoding); + ARMTargetStreamer &getTargetStreamer(); + public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp index 4a99184..24aa1ab 100644 --- a/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp +++ b/contrib/llvm/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -26,18 +26,20 @@ using namespace llvm; namespace { class BasicTTI : public ImmutablePass, public TargetTransformInfo { - const TargetLoweringBase *TLI; + const TargetMachine *TM; /// Estimate the overhead of scalarizing an instruction. Insert and Extract /// are set if the result needs to be inserted and/or extracted from vectors. unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + const TargetLoweringBase *getTLI() const { return TM->getTargetLowering(); } + public: - BasicTTI() : ImmutablePass(ID), TLI(0) { + BasicTTI() : ImmutablePass(ID), TM(0) { llvm_unreachable("This pass cannot be directly constructed"); } - BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) { + BasicTTI(const TargetMachine *TM) : ImmutablePass(ID), TM(TM) { initializeBasicTTIPass(*PassRegistry::getPassRegistry()); } @@ -63,6 +65,8 @@ public: return this; } + virtual bool hasBranchDivergence() const; + /// \name Scalar TTI Implementations /// @{ @@ -71,11 +75,16 @@ public: virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, bool HasBaseReg, int64_t Scale) const; + virtual int getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const; virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; virtual bool isTypeLegal(Type *Ty) const; virtual unsigned getJumpBufAlignment() const; virtual unsigned getJumpBufSize() const; virtual bool shouldBuildLookupTables() const; + virtual bool haveFastSqrt(Type *Ty) const; + virtual void getUnrollingPreferences(Loop *L, UnrollingPreferences &UP) const; /// @} @@ -103,7 +112,8 @@ public: virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, ArrayRef Tys) const; virtual unsigned getNumberOfParts(Type *Tp) const; - virtual unsigned getAddressComputationCost(Type *Ty) const; + virtual unsigned getAddressComputationCost(Type *Ty, bool IsComplex) const; + virtual unsigned getReductionCost(unsigned Opcode, Type *Ty, bool IsPairwise) const; /// @} }; @@ -115,17 +125,18 @@ INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", char BasicTTI::ID = 0; ImmutablePass * -llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) { - return new BasicTTI(TLI); +llvm::createBasicTargetTransformInfoPass(const TargetMachine *TM) { + return new BasicTTI(TM); } +bool BasicTTI::hasBranchDivergence() const { return false; } bool BasicTTI::isLegalAddImmediate(int64_t imm) const { - return TLI->isLegalAddImmediate(imm); + return getTLI()->isLegalAddImmediate(imm); } bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { - return TLI->isLegalICmpImmediate(imm); + return getTLI()->isLegalICmpImmediate(imm); } bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, @@ -136,32 +147,52 @@ bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, AM.BaseOffs = BaseOffset; AM.HasBaseReg = HasBaseReg; AM.Scale = Scale; - return TLI->isLegalAddressingMode(AM, Ty); + return getTLI()->isLegalAddressingMode(AM, Ty); +} + +int BasicTTI::getScalingFactorCost(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const { + TargetLoweringBase::AddrMode AM; + AM.BaseGV = BaseGV; + AM.BaseOffs = BaseOffset; + AM.HasBaseReg = HasBaseReg; + AM.Scale = Scale; + return getTLI()->getScalingFactorCost(AM, Ty); } bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { - return TLI->isTruncateFree(Ty1, Ty2); + return getTLI()->isTruncateFree(Ty1, Ty2); } bool BasicTTI::isTypeLegal(Type *Ty) const { - EVT T = TLI->getValueType(Ty); - return TLI->isTypeLegal(T); + EVT T = getTLI()->getValueType(Ty); + return getTLI()->isTypeLegal(T); } unsigned BasicTTI::getJumpBufAlignment() const { - return TLI->getJumpBufAlignment(); + return getTLI()->getJumpBufAlignment(); } unsigned BasicTTI::getJumpBufSize() const { - return TLI->getJumpBufSize(); + return getTLI()->getJumpBufSize(); } bool BasicTTI::shouldBuildLookupTables() const { + const TargetLoweringBase *TLI = getTLI(); return TLI->supportJumpTables() && (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); } +bool BasicTTI::haveFastSqrt(Type *Ty) const { + const TargetLoweringBase *TLI = getTLI(); + EVT VT = TLI->getValueType(Ty); + return TLI->isTypeLegal(VT) && TLI->isOperationLegalOrCustom(ISD::FSQRT, VT); +} + +void BasicTTI::getUnrollingPreferences(Loop *, UnrollingPreferences &) const { } + //===----------------------------------------------------------------------===// // // Calls used by the vectorizers. @@ -199,6 +230,7 @@ unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, OperandValueKind) const { // Check if any of the operands are vector operands. + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -245,6 +277,7 @@ unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) const { + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -338,6 +371,7 @@ unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, Type *CondTy) const { + const TargetLoweringBase *TLI = getTLI(); int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); @@ -382,7 +416,7 @@ unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, unsigned Alignment, unsigned AddressSpace) const { assert(!Src->isVoidTy() && "Invalid type"); - std::pair LT = TLI->getTypeLegalizationCost(Src); + std::pair LT = getTLI()->getTypeLegalizationCost(Src); // Assume that all loads of legal types cost 1. return LT.first; @@ -420,15 +454,23 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, case Intrinsic::log10: ISD = ISD::FLOG10; break; case Intrinsic::log2: ISD = ISD::FLOG2; break; case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::copysign: ISD = ISD::FCOPYSIGN; break; case Intrinsic::floor: ISD = ISD::FFLOOR; break; case Intrinsic::ceil: ISD = ISD::FCEIL; break; case Intrinsic::trunc: ISD = ISD::FTRUNC; break; + case Intrinsic::nearbyint: + ISD = ISD::FNEARBYINT; break; case Intrinsic::rint: ISD = ISD::FRINT; break; + case Intrinsic::round: ISD = ISD::FROUND; break; case Intrinsic::pow: ISD = ISD::FPOW; break; case Intrinsic::fma: ISD = ISD::FMA; break; case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + return 0; } + const TargetLoweringBase *TLI = getTLI(); std::pair LT = TLI->getTypeLegalizationCost(RetTy); if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { @@ -462,10 +504,24 @@ unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, } unsigned BasicTTI::getNumberOfParts(Type *Tp) const { - std::pair LT = TLI->getTypeLegalizationCost(Tp); + std::pair LT = getTLI()->getTypeLegalizationCost(Tp); return LT.first; } -unsigned BasicTTI::getAddressComputationCost(Type *Ty) const { +unsigned BasicTTI::getAddressComputationCost(Type *Ty, bool IsComplex) const { return 0; } + +unsigned BasicTTI::getReductionCost(unsigned Opcode, Type *Ty, + bool IsPairwise) const { + assert(Ty->isVectorTy() && "Expect a vector type"); + unsigned NumVecElts = Ty->getVectorNumElements(); + unsigned NumReduxLevels = Log2_32(NumVecElts); + unsigned ArithCost = NumReduxLevels * + TopTTI->getArithmeticInstrCost(Opcode, Ty); + // Assume the pairwise shuffles add a cost. + unsigned ShuffleCost = + NumReduxLevels * (IsPairwise + 1) * + TopTTI->getShuffleCost(SK_ExtractSubvector, Ty, NumVecElts / 2, Ty); + return ShuffleCost + ArithCost + getScalarizationOverhead(Ty, false, true); +} diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.cpp b/contrib/llvm/lib/CodeGen/BranchFolding.cpp index f8cc3b3..9cd4208 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.cpp +++ b/contrib/llvm/lib/CodeGen/BranchFolding.cpp @@ -135,8 +135,8 @@ bool BranchFolder::OptimizeImpDefsBlock(MachineBasicBlock *MBB) { if (!I->isImplicitDef()) break; unsigned Reg = I->getOperand(0).getReg(); - ImpDefRegs.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) ImpDefRegs.insert(*SubRegs); ++I; } @@ -406,7 +406,8 @@ void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, /// MBB so that the part before the iterator falls into the part starting at the /// iterator. This returns the new MBB. MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1) { + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB) { if (!TII->isLegalToSplitMBBAt(CurMBB, BBI1)) return 0; @@ -414,7 +415,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB, // Create the fall-through block. MachineFunction::iterator MBBI = &CurMBB; - MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(CurMBB.getBasicBlock()); + MachineBasicBlock *NewMBB =MF.CreateMachineBasicBlock(BB); CurMBB.getParent()->insert(++MBBI, NewMBB); // Move all the successors of this block to the specified block. @@ -647,6 +648,7 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash, /// CreateCommonTailOnlyBlock - None of the blocks to be tail-merged consist /// only of the common tail. Create a block that does by splitting one. bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, unsigned maxCommonTailLength, unsigned &commonTailIndex) { commonTailIndex = 0; @@ -676,7 +678,12 @@ bool BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, DEBUG(dbgs() << "\nSplitting BB#" << MBB->getNumber() << ", size " << maxCommonTailLength); - MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI); + // If the split block unconditionally falls-thru to SuccBB, it will be + // merged. In control flow terms it should then take SuccBB's name. e.g. If + // SuccBB is an inner loop, the common tail is still part of the inner loop. + const BasicBlock *BB = (SuccBB && MBB->succ_size() == 1) ? + SuccBB->getBasicBlock() : MBB->getBasicBlock(); + MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI, BB); if (!newMBB) { DEBUG(dbgs() << "... failed!"); return false; @@ -784,7 +791,7 @@ bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB, !SameTails[commonTailIndex].tailIsWholeBlock())) { // None of the blocks consist entirely of the common tail. // Split a block so that one does. - if (!CreateCommonTailOnlyBlock(PredBB, + if (!CreateCommonTailOnlyBlock(PredBB, SuccBB, maxCommonTailLength, commonTailIndex)) { RemoveBlocksWithHash(CurHash, SuccBB, PredBB); continue; diff --git a/contrib/llvm/lib/CodeGen/BranchFolding.h b/contrib/llvm/lib/CodeGen/BranchFolding.h index df795df..0d15ed7 100644 --- a/contrib/llvm/lib/CodeGen/BranchFolding.h +++ b/contrib/llvm/lib/CodeGen/BranchFolding.h @@ -1,4 +1,4 @@ -//===-- BranchFolding.h - Fold machine code branch instructions --*- C++ -*===// +//===-- BranchFolding.h - Fold machine code branch instructions -*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -100,13 +100,15 @@ namespace llvm { void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst, MachineBasicBlock *NewDest); MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB, - MachineBasicBlock::iterator BBI1); + MachineBasicBlock::iterator BBI1, + const BasicBlock *BB); unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength, MachineBasicBlock *SuccBB, MachineBasicBlock *PredBB); void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB, MachineBasicBlock* PredBB); bool CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB, + MachineBasicBlock *SuccBB, unsigned maxCommonTailLength, unsigned &commonTailIndex); diff --git a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp index 38ae17d..4925c4d 100644 --- a/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp +++ b/contrib/llvm/lib/CodeGen/CalcSpillWeights.cpp @@ -9,13 +9,12 @@ #define DEBUG_TYPE "calcspillweights" -#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/SlotIndexes.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -23,36 +22,22 @@ #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; -char CalculateSpillWeights::ID = 0; -INITIALIZE_PASS_BEGIN(CalculateSpillWeights, "calcspillweights", - "Calculate spill weights", false, false) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(CalculateSpillWeights, "calcspillweights", - "Calculate spill weights", false, false) - -void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const { - au.addRequired(); - au.addRequired(); - au.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(au); -} - -bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &MF) { - +void llvm::calculateSpillWeightsAndHints(LiveIntervals &LIS, + MachineFunction &MF, + const MachineLoopInfo &MLI, + const MachineBlockFrequencyInfo &MBFI, + VirtRegAuxInfo::NormalizingFn norm) { DEBUG(dbgs() << "********** Compute Spill Weights **********\n" << "********** Function: " << MF.getName() << '\n'); - LiveIntervals &LIS = getAnalysis(); MachineRegisterInfo &MRI = MF.getRegInfo(); - VirtRegAuxInfo VRAI(MF, LIS, getAnalysis()); + VirtRegAuxInfo VRAI(MF, LIS, MLI, MBFI, norm); for (unsigned i = 0, e = MRI.getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI.reg_nodbg_empty(Reg)) continue; - VRAI.CalculateWeightAndHint(LIS.getInterval(Reg)); + VRAI.calculateSpillWeightAndHint(LIS.getInterval(Reg)); } - return false; } // Return the preferred allocation register for reg, given a COPY instruction. @@ -107,12 +92,12 @@ static bool isRematerializable(const LiveInterval &LI, return true; } -void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { +void +VirtRegAuxInfo::calculateSpillWeightAndHint(LiveInterval &li) { MachineRegisterInfo &mri = MF.getRegInfo(); const TargetRegisterInfo &tri = *MF.getTarget().getRegisterInfo(); MachineBasicBlock *mbb = 0; MachineLoop *loop = 0; - unsigned loopDepth = 0; bool isExiting = false; float totalWeight = 0; SmallPtrSet visited; @@ -140,14 +125,14 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { if (mi->getParent() != mbb) { mbb = mi->getParent(); loop = Loops.getLoopFor(mbb); - loopDepth = loop ? loop->getLoopDepth() : 0; isExiting = loop ? loop->isLoopExiting(mbb) : false; } // Calculate instr weight. bool reads, writes; tie(reads, writes) = mi->readsWritesVirtualRegister(li.reg); - weight = LiveIntervals::getSpillWeight(writes, reads, loopDepth); + weight = LiveIntervals::getSpillWeight( + writes, reads, MBFI.getBlockFreq(mi->getParent())); // Give extra weight to what looks like a loop induction variable update. if (writes && isExiting && LIS.isLiveOutOfMBB(li, mbb)) @@ -198,5 +183,5 @@ void VirtRegAuxInfo::CalculateWeightAndHint(LiveInterval &li) { if (isRematerializable(li, LIS, *MF.getTarget().getInstrInfo())) totalWeight *= 0.5F; - li.weight = normalizeSpillWeight(totalWeight, li.getSize()); + li.weight = normalize(totalWeight, li.getSize()); } diff --git a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp index 75f4b96..fcfc9dc 100644 --- a/contrib/llvm/lib/CodeGen/CallingConvLower.cpp +++ b/contrib/llvm/lib/CodeGen/CallingConvLower.cpp @@ -24,7 +24,7 @@ using namespace llvm; CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, - const TargetMachine &tm, SmallVector &locs, + const TargetMachine &tm, SmallVectorImpl &locs, LLVMContext &C) : CallingConv(CC), IsVarArg(isVarArg), MF(mf), TM(tm), TRI(*TM.getRegisterInfo()), Locs(locs), Context(C), diff --git a/contrib/llvm/lib/CodeGen/CodeGen.cpp b/contrib/llvm/lib/CodeGen/CodeGen.cpp index c641991..7430c53 100644 --- a/contrib/llvm/lib/CodeGen/CodeGen.cpp +++ b/contrib/llvm/lib/CodeGen/CodeGen.cpp @@ -22,7 +22,6 @@ using namespace llvm; void llvm::initializeCodeGen(PassRegistry &Registry) { initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); - initializeCalculateSpillWeightsPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); @@ -60,7 +59,6 @@ void llvm::initializeCodeGen(PassRegistry &Registry) { initializeStackProtectorPass(Registry); initializeStackColoringPass(Registry); initializeStackSlotColoringPass(Registry); - initializeStrongPHIEliminationPass(Registry); initializeTailDuplicatePassPass(Registry); initializeTargetPassConfigPass(Registry); initializeTwoAddressInstructionPassPass(Registry); diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp index 0eb74a4..18c8e0a 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -201,8 +201,8 @@ void CriticalAntiDepBreaker::PrescanInstruction(MachineInstr *MI) { if (MO.isUse() && Special) { if (!KeepRegs.test(Reg)) { - KeepRegs.set(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) KeepRegs.set(*SubRegs); } } @@ -361,7 +361,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *RC, - SmallVector &Forbid) + SmallVectorImpl &Forbid) { ArrayRef Order = RegClassInfo.getOrder(RC); for (unsigned i = 0; i != Order.size(); ++i) { @@ -388,7 +388,7 @@ findSuitableFreeRegister(RegRefIter RegRefBegin, continue; // If NewReg overlaps any of the forbidden registers, we can't use it. bool Forbidden = false; - for (SmallVector::iterator it = Forbid.begin(), + for (SmallVectorImpl::iterator it = Forbid.begin(), ite = Forbid.end(); it != ite; ++it) if (TRI->regsOverlap(NewReg, *it)) { Forbidden = true; diff --git a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h index df13dd3..565d20b 100644 --- a/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/contrib/llvm/lib/CodeGen/CriticalAntiDepBreaker.h @@ -103,7 +103,7 @@ class TargetRegisterInfo; unsigned AntiDepReg, unsigned LastNewReg, const TargetRegisterClass *RC, - SmallVector &Forbid); + SmallVectorImpl &Forbid); }; } diff --git a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp index 840a101..6619bcf 100644 --- a/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp +++ b/contrib/llvm/lib/CodeGen/DFAPacketizer.cpp @@ -160,7 +160,8 @@ void VLIWPacketizerList::PacketizeMIs(MachineBasicBlock *MBB, MachineBasicBlock::iterator EndItr) { assert(VLIWScheduler && "VLIW Scheduler is not initialized!"); VLIWScheduler->startBlock(MBB); - VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, MBB->size()); + VLIWScheduler->enterRegion(MBB, BeginItr, EndItr, + std::distance(BeginItr, EndItr)); VLIWScheduler->schedule(); // Generate MI -> SU map. diff --git a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp index a54217f..5efe1ff 100644 --- a/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/contrib/llvm/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -154,11 +154,11 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { if (MO.isReg() && MO.isDef()) { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - LivePhysRegs.reset(Reg); // Check the subreg set, not the alias set, because a def // of a super-register may still be partially live after // this def. - for (MCSubRegIterator SR(Reg, TRI); SR.isValid(); ++SR) + for (MCSubRegIterator SR(Reg, TRI,/*IncludeSelf=*/true); + SR.isValid(); ++SR) LivePhysRegs.reset(*SR); } } else if (MO.isRegMask()) { diff --git a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp index f27ec77..c7c1752 100644 --- a/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/DwarfEHPrepare.cpp @@ -33,7 +33,6 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { const TargetMachine *TM; - const TargetLoweringBase *TLI; // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; @@ -43,9 +42,8 @@ namespace { public: static char ID; // Pass identification, replacement for typeid. - DwarfEHPrepare(const TargetMachine *tm) : - FunctionPass(ID), TM(tm), TLI(TM->getTargetLowering()), - RewindFunction(0) { + DwarfEHPrepare(const TargetMachine *TM) : + FunctionPass(ID), TM(TM), RewindFunction(0) { initializeDominatorTreePass(*PassRegistry::getPassRegistry()); } @@ -61,8 +59,8 @@ namespace { char DwarfEHPrepare::ID = 0; -FunctionPass *llvm::createDwarfEHPass(const TargetMachine *tm) { - return new DwarfEHPrepare(tm); +FunctionPass *llvm::createDwarfEHPass(const TargetMachine *TM) { + return new DwarfEHPrepare(TM); } /// GetExceptionObject - Return the exception object from the value passed into @@ -108,20 +106,18 @@ Value *DwarfEHPrepare::GetExceptionObject(ResumeInst *RI) { /// InsertUnwindResumeCalls - Convert the ResumeInsts that are still present /// into calls to the appropriate _Unwind_Resume function. bool DwarfEHPrepare::InsertUnwindResumeCalls(Function &Fn) { - bool UsesNewEH = false; SmallVector Resumes; for (Function::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I) { TerminatorInst *TI = I->getTerminator(); if (ResumeInst *RI = dyn_cast(TI)) Resumes.push_back(RI); - else if (InvokeInst *II = dyn_cast(TI)) - UsesNewEH = II->getUnwindDest()->isLandingPad(); } if (Resumes.empty()) - return UsesNewEH; + return false; // Find the rewind function if we didn't already. + const TargetLowering *TLI = TM->getTargetLowering(); if (!RewindFunction) { LLVMContext &Ctx = Resumes[0]->getContext(); FunctionType *FTy = FunctionType::get(Type::getVoidTy(Ctx), diff --git a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp index 9b0e76f..031f19c 100644 --- a/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp +++ b/contrib/llvm/lib/CodeGen/ExecutionDepsFix.cpp @@ -23,6 +23,7 @@ #define DEBUG_TYPE "execution-fix" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Allocator.h" @@ -91,7 +92,7 @@ struct DomainValue { // First domain available. unsigned getFirstDomain() const { - return CountTrailingZeros_32(AvailableDomains); + return countTrailingZeros(AvailableDomains); } DomainValue() : Refs(0) { clear(); } @@ -136,6 +137,12 @@ class ExeDepsFix : public MachineFunctionPass { typedef DenseMap LiveOutMap; LiveOutMap LiveOuts; + /// List of undefined register reads in this block in forward order. + std::vector > UndefReads; + + /// Storage for register unit liveness. + LiveRegUnits LiveUnits; + /// Current instruction number. /// The first instruction in each basic block is 0. int CurInstr; @@ -185,6 +192,8 @@ private: void processDefs(MachineInstr*, bool Kill); void visitSoftInstr(MachineInstr*, unsigned mask); void visitHardInstr(MachineInstr*, unsigned domain); + bool shouldBreakDependence(MachineInstr*, unsigned OpIdx, unsigned Pref); + void processUndefReads(MachineBasicBlock*); }; } @@ -341,6 +350,10 @@ void ExeDepsFix::enterBasicBlock(MachineBasicBlock *MBB) { // Reset instruction counter in each basic block. CurInstr = 0; + // Set up UndefReads to track undefined register reads. + UndefReads.clear(); + LiveUnits.clear(); + // Set up LiveRegs to represent registers entering MBB. if (!LiveRegs) LiveRegs = new LiveReg[NumRegs]; @@ -448,10 +461,46 @@ void ExeDepsFix::visitInstr(MachineInstr *MI) { processDefs(MI, !DomP.first); } +/// \brief Return true to if it makes sense to break dependence on a partial def +/// or undef use. +bool ExeDepsFix::shouldBreakDependence(MachineInstr *MI, unsigned OpIdx, + unsigned Pref) { + int rx = regIndex(MI->getOperand(OpIdx).getReg()); + if (rx < 0) + return false; + + unsigned Clearance = CurInstr - LiveRegs[rx].Def; + DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); + + if (Pref > Clearance) { + DEBUG(dbgs() << ": Break dependency.\n"); + return true; + } + // The current clearance seems OK, but we may be ignoring a def from a + // back-edge. + if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { + DEBUG(dbgs() << ": OK .\n"); + return false; + } + // A def from an unprocessed back-edge may make us break this dependency. + DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + return false; +} + // Update def-ages for registers defined by MI. // If Kill is set, also kill off DomainValues clobbered by the defs. +// +// Also break dependencies on partial defs and undef uses. void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { assert(!MI->isDebugValue() && "Won't process debug values"); + + // Break dependence on undef uses. Do this before updating LiveRegs below. + unsigned OpNum; + unsigned Pref = TII->getUndefRegClearance(MI, OpNum, TRI); + if (Pref) { + if (shouldBreakDependence(MI, OpNum, Pref)) + UndefReads.push_back(std::make_pair(MI, OpNum)); + } const MCInstrDesc &MCID = MI->getDesc(); for (unsigned i = 0, e = MI->isVariadic() ? MI->getNumOperands() : MCID.getNumDefs(); @@ -471,37 +520,58 @@ void ExeDepsFix::processDefs(MachineInstr *MI, bool Kill) { DEBUG(dbgs() << TRI->getName(RC->getRegister(rx)) << ":\t" << CurInstr << '\t' << *MI); + // Check clearance before partial register updates. + // Call breakDependence before setting LiveRegs[rx].Def. + unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); + if (Pref && shouldBreakDependence(MI, i, Pref)) + TII->breakPartialRegDependency(MI, i, TRI); + // How many instructions since rx was last written? - unsigned Clearance = CurInstr - LiveRegs[rx].Def; LiveRegs[rx].Def = CurInstr; // Kill off domains redefined by generic instructions. if (Kill) kill(rx); + } + ++CurInstr; +} - // Verify clearance before partial register updates. - unsigned Pref = TII->getPartialRegUpdateClearance(MI, i, TRI); - if (!Pref) - continue; - DEBUG(dbgs() << "Clearance: " << Clearance << ", want " << Pref); - if (Pref > Clearance) { - DEBUG(dbgs() << ": Break dependency.\n"); - TII->breakPartialRegDependency(MI, i, TRI); - continue; - } - - // The current clearance seems OK, but we may be ignoring a def from a - // back-edge. - if (!SeenUnknownBackEdge || Pref <= unsigned(CurInstr)) { - DEBUG(dbgs() << ": OK.\n"); - continue; - } +/// \break Break false dependencies on undefined register reads. +/// +/// Walk the block backward computing precise liveness. This is expensive, so we +/// only do it on demand. Note that the occurrence of undefined register reads +/// that should be broken is very rare, but when they occur we may have many in +/// a single block. +void ExeDepsFix::processUndefReads(MachineBasicBlock *MBB) { + if (UndefReads.empty()) + return; - // A def from an unprocessed back-edge may make us break this dependency. - DEBUG(dbgs() << ": Wait for back-edge to resolve.\n"); + // Collect this block's live out register units. + LiveUnits.init(TRI); + for (MachineBasicBlock::const_succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + LiveUnits.addLiveIns(*SI, *TRI); } + MachineInstr *UndefMI = UndefReads.back().first; + unsigned OpIdx = UndefReads.back().second; - ++CurInstr; + for (MachineBasicBlock::reverse_iterator I = MBB->rbegin(), E = MBB->rend(); + I != E; ++I) { + // Update liveness, including the current instrucion's defs. + LiveUnits.stepBackward(*I, *TRI); + + if (UndefMI == &*I) { + if (!LiveUnits.contains(UndefMI->getOperand(OpIdx).getReg(), *TRI)) + TII->breakPartialRegDependency(UndefMI, OpIdx, TRI); + + UndefReads.pop_back(); + if (UndefReads.empty()) + return; + + UndefMI = UndefReads.back().first; + OpIdx = UndefReads.back().second; + } + } } // A hard instruction only works in one domain. All input registers will be @@ -549,7 +619,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Is it possible to use this collapsed register for free? if (dv->isCollapsed()) { // Restrict available domains to the ones in common with the operand. - // If there are no common domains, we must pay the cross-domain + // If there are no common domains, we must pay the cross-domain // penalty for this operand. if (common) available = common; } else if (common) @@ -564,7 +634,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // If the collapsed operands force a single domain, propagate the collapse. if (isPowerOf2_32(available)) { - unsigned domain = CountTrailingZeros_32(available); + unsigned domain = countTrailingZeros(available); TII->setExecutionDomain(mi, domain); visitHardInstr(mi, domain); return; @@ -573,7 +643,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { // Kill off any remaining uses that don't match available, and build a list of // incoming DomainValues that we want to merge. SmallVector Regs; - for (SmallVector::iterator i=used.begin(), e=used.end(); i!=e; ++i) { + for (SmallVectorImpl::iterator i=used.begin(), e=used.end(); i!=e; ++i) { int rx = *i; const LiveReg &LR = LiveRegs[rx]; // This useless DomainValue could have been missed above. @@ -583,7 +653,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { } // Sorted insertion. bool Inserted = false; - for (SmallVector::iterator i = Regs.begin(), e = Regs.end(); + for (SmallVectorImpl::iterator i = Regs.begin(), e = Regs.end(); i != e && !Inserted; ++i) { if (LR.Def < i->Def) { Inserted = true; @@ -614,7 +684,7 @@ void ExeDepsFix::visitSoftInstr(MachineInstr *mi, unsigned mask) { continue; // If latest didn't merge, it is useless now. Kill all registers using it. - for (SmallVector::iterator i=used.begin(), e=used.end(); i != e; ++i) + for (SmallVectorImpl::iterator i=used.begin(), e=used.end(); i!=e; ++i) if (LiveRegs[*i].Value == Latest) kill(*i); } @@ -686,6 +756,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) visitInstr(I); + processUndefReads(MBB); leaveBasicBlock(MBB); } @@ -698,6 +769,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { ++I) if (!I->isDebugValue()) processDefs(I, false); + processUndefReads(MBB); leaveBasicBlock(MBB); } @@ -713,6 +785,7 @@ bool ExeDepsFix::runOnMachineFunction(MachineFunction &mf) { delete[] FI->second; } LiveOuts.clear(); + UndefReads.clear(); Avail.clear(); Allocator.DestroyAll(); diff --git a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp index 1611db8..6c73fff 100644 --- a/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/contrib/llvm/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -104,7 +104,7 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } if (DstSubReg == InsReg) { - // No need to insert an identify copy instruction. + // No need to insert an identity copy instruction. // Watch out for case like this: // %RAX = SUBREG_TO_REG 0, %EAX, 3 // We must leave %RAX live. diff --git a/contrib/llvm/lib/CodeGen/IfConversion.cpp b/contrib/llvm/lib/CodeGen/IfConversion.cpp index 8264d6d..e2d0eb4 100644 --- a/contrib/llvm/lib/CodeGen/IfConversion.cpp +++ b/contrib/llvm/lib/CodeGen/IfConversion.cpp @@ -22,6 +22,8 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/CodeGen/LiveRegUnits.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" @@ -31,6 +33,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" + using namespace llvm; // Hidden options for help debugging. @@ -150,14 +154,17 @@ namespace { /// BBAnalysis - Results of if-conversion feasibility analysis indexed by /// basic block number. std::vector BBAnalysis; + TargetSchedModel SchedModel; const TargetLoweringBase *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; - const InstrItineraryData *InstrItins; const MachineBranchProbabilityInfo *MBPI; MachineRegisterInfo *MRI; + LiveRegUnits Redefs; + LiveRegUnits DontKill; + bool PreRegAlloc; bool MadeChange; int FnNum; @@ -198,11 +205,9 @@ namespace { void PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl &Cond, - SmallSet &Redefs, SmallSet *LaterRedefs = 0); void CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl &Cond, - SmallSet &Redefs, bool IgnoreBr = false); void MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges = true); @@ -267,7 +272,11 @@ bool IfConverter::runOnMachineFunction(MachineFunction &MF) { TRI = MF.getTarget().getRegisterInfo(); MBPI = &getAnalysis(); MRI = &MF.getRegInfo(); - InstrItins = MF.getTarget().getInstrItineraryData(); + + const TargetSubtargetInfo &ST = + MF.getTarget().getSubtarget(); + SchedModel.init(*ST.getSchedModel(), &ST, TII); + if (!TII) return false; PreRegAlloc = MRI->isSSA(); @@ -666,32 +675,28 @@ void IfConverter::ScanInstructions(BBInfo &BBI) { bool isPredicated = TII->isPredicated(I); bool isCondBr = BBI.IsBrAnalyzable && I->isConditionalBranch(); - if (!isCondBr) { - if (!isPredicated) { - BBI.NonPredSize++; - unsigned ExtraPredCost = 0; - unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, - &ExtraPredCost); - if (NumCycles > 1) - BBI.ExtraCost += NumCycles-1; - BBI.ExtraCost2 += ExtraPredCost; - } else if (!AlreadyPredicated) { - // FIXME: This instruction is already predicated before the - // if-conversion pass. It's probably something like a conditional move. - // Mark this block unpredicable for now. - BBI.IsUnpredicable = true; - return; - } + // A conditional branch is not predicable, but it may be eliminated. + if (isCondBr) + continue; + + if (!isPredicated) { + BBI.NonPredSize++; + unsigned ExtraPredCost = TII->getPredicationCost(&*I); + unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); + if (NumCycles > 1) + BBI.ExtraCost += NumCycles-1; + BBI.ExtraCost2 += ExtraPredCost; + } else if (!AlreadyPredicated) { + // FIXME: This instruction is already predicated before the + // if-conversion pass. It's probably something like a conditional move. + // Mark this block unpredicable for now. + BBI.IsUnpredicable = true; + return; } if (BBI.ClobbersPred && !isPredicated) { // Predicate modification instruction should end the block (except for // already predicated instructions and end of block branches). - if (isCondBr) { - // A conditional branch is not predicable, but it may be eliminated. - continue; - } - // Predicate may have been modified, the subsequent (currently) // unpredicated instructions cannot be correctly predicated. BBI.IsUnpredicable = true; @@ -720,9 +725,9 @@ bool IfConverter::FeasibilityAnalysis(BBInfo &BBI, if (BBI.IsDone || BBI.IsUnpredicable) return false; - // If it is already predicated, check if its predicate subsumes the new - // predicate. - if (BBI.Predicate.size() && !TII->SubsumesPredicate(BBI.Predicate, Pred)) + // If it is already predicated, check if the new predicate subsumes + // its predicate. + if (BBI.Predicate.size() && !TII->SubsumesPredicate(Pred, BBI.Predicate)) return false; if (BBI.BrCond.size()) { @@ -961,64 +966,58 @@ void IfConverter::RemoveExtraEdges(BBInfo &BBI) { BBI.BB->CorrectExtraCFGEdges(TBB, FBB, !Cond.empty()); } -/// InitPredRedefs / UpdatePredRedefs - Defs by predicated instructions are -/// modeled as read + write (sort like two-address instructions). These -/// routines track register liveness and add implicit uses to if-converted -/// instructions to conform to the model. -static void InitPredRedefs(MachineBasicBlock *BB, SmallSet &Redefs, - const TargetRegisterInfo *TRI) { - for (MachineBasicBlock::livein_iterator I = BB->livein_begin(), - E = BB->livein_end(); I != E; ++I) { - unsigned Reg = *I; - Redefs.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Redefs.insert(*SubRegs); - } -} - -static void UpdatePredRedefs(MachineInstr *MI, SmallSet &Redefs, - const TargetRegisterInfo *TRI, - bool AddImpUse = false) { - SmallVector Defs; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) +/// Behaves like LiveRegUnits::StepForward() but also adds implicit uses to all +/// values defined in MI which are not live/used by MI. +static void UpdatePredRedefs(MachineInstr *MI, LiveRegUnits &Redefs, + const TargetRegisterInfo *TRI) { + for (ConstMIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { + if (!Ops->isReg() || !Ops->isKill()) continue; - unsigned Reg = MO.getReg(); - if (!Reg) + unsigned Reg = Ops->getReg(); + if (Reg == 0) continue; - if (MO.isDef()) - Defs.push_back(Reg); - else if (MO.isKill()) { - Redefs.erase(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Redefs.erase(*SubRegs); - } + Redefs.removeReg(Reg, *TRI); } - MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); - for (unsigned i = 0, e = Defs.size(); i != e; ++i) { - unsigned Reg = Defs[i]; - if (!Redefs.insert(Reg)) { - if (AddImpUse) - // Treat predicated update as read + write. - MIB.addReg(Reg, RegState::Implicit | RegState::Undef); - } else { - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - Redefs.insert(*SubRegs); - } + for (MIBundleOperands Ops(MI); Ops.isValid(); ++Ops) { + if (!Ops->isReg() || !Ops->isDef()) + continue; + unsigned Reg = Ops->getReg(); + if (Reg == 0 || Redefs.contains(Reg, *TRI)) + continue; + Redefs.addReg(Reg, *TRI); + + MachineOperand &Op = *Ops; + MachineInstr *MI = Op.getParent(); + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); + MIB.addReg(Reg, RegState::Implicit | RegState::Undef); } } -static void UpdatePredRedefs(MachineBasicBlock::iterator I, - MachineBasicBlock::iterator E, - SmallSet &Redefs, - const TargetRegisterInfo *TRI) { - while (I != E) { - UpdatePredRedefs(I, Redefs, TRI); - ++I; +/** + * Remove kill flags from operands with a registers in the @p DontKill set. + */ +static void RemoveKills(MachineInstr &MI, const LiveRegUnits &DontKill, + const MCRegisterInfo &MCRI) { + for (MIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->isKill()) + continue; + if (DontKill.contains(O->getReg(), MCRI)) + O->setIsKill(false); } } +/** + * Walks a range of machine instructions and removes kill flags for registers + * in the @p DontKill set. + */ +static void RemoveKills(MachineBasicBlock::iterator I, + MachineBasicBlock::iterator E, + const LiveRegUnits &DontKill, + const MCRegisterInfo &MCRI) { + for ( ; I != E; ++I) + RemoveKills(*I, DontKill, MCRI); +} + /// IfConvertSimple - If convert a simple (split, no rejoin) sub-CFG. /// bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { @@ -1049,21 +1048,27 @@ bool IfConverter::IfConvertSimple(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentiall redefined by // predicated instructions. - SmallSet Redefs; - InitPredRedefs(CvtBBI->BB, Redefs, TRI); - InitPredRedefs(NextBBI->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(CvtBBI->BB, *TRI); + Redefs.addLiveIns(NextBBI->BB, *TRI); + + // Compute a set of registers which must not be killed by instructions in + // BB1: This is everything live-in to BB2. + DontKill.init(TRI); + DontKill.addLiveIns(NextBBI->BB, *TRI); if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond); // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. BBI.BB->removeSuccessor(CvtBBI->BB); } else { - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + RemoveKills(CvtBBI->BB->begin(), CvtBBI->BB->end(), DontKill, *TRI); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); // Merge converted block into entry block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1148,16 +1153,18 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. - SmallSet Redefs; - InitPredRedefs(CvtBBI->BB, Redefs, TRI); - InitPredRedefs(NextBBI->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(CvtBBI->BB, *TRI); + Redefs.addLiveIns(NextBBI->BB, *TRI); + + DontKill.clear(); bool HasEarlyExit = CvtBBI->FalseBB != NULL; if (CvtBBI->BB->pred_size() > 1) { BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); // Copy instructions in the true block, predicate them, and add them to // the entry block. - CopyAndPredicateBlock(BBI, *CvtBBI, Cond, Redefs, true); + CopyAndPredicateBlock(BBI, *CvtBBI, Cond, true); // RemoveExtraEdges won't work if the block has an unanalyzable branch, so // explicitly remove CvtBBI as a successor. @@ -1165,7 +1172,7 @@ bool IfConverter::IfConvertTriangle(BBInfo &BBI, IfcvtKind Kind) { } else { // Predicate the 'true' block after removing its branch. CvtBBI->NonPredSize -= TII->RemoveBranch(*CvtBBI->BB); - PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond, Redefs); + PredicateBlock(*CvtBBI, CvtBBI->BB->end(), Cond); // Now merge the entry of the triangle with the true block. BBI.NonPredSize -= TII->RemoveBranch(*BBI.BB); @@ -1276,8 +1283,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, // Initialize liveins to the first BB. These are potentially redefined by // predicated instructions. - SmallSet Redefs; - InitPredRedefs(BBI1->BB, Redefs, TRI); + Redefs.init(TRI); + Redefs.addLiveIns(BBI1->BB, *TRI); // Remove the duplicated instructions at the beginnings of both paths. MachineBasicBlock::iterator DI1 = BBI1->BB->begin(); @@ -1304,7 +1311,19 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, --NumDups1; } - UpdatePredRedefs(BBI1->BB->begin(), DI1, Redefs, TRI); + // Compute a set of registers which must not be killed by instructions in BB1: + // This is everything used+live in BB2 after the duplicated instructions. We + // can compute this set by simulating liveness backwards from the end of BB2. + DontKill.init(TRI); + for (MachineBasicBlock::reverse_iterator I = BBI2->BB->rbegin(), + E = MachineBasicBlock::reverse_iterator(DI2); I != E; ++I) { + DontKill.stepBackward(*I, *TRI); + } + + for (MachineBasicBlock::const_iterator I = BBI1->BB->begin(), E = DI1; I != E; + ++I) { + Redefs.stepForward(*I, *TRI); + } BBI.BB->splice(BBI.BB->end(), BBI1->BB, BBI1->BB->begin(), DI1); BBI2->BB->erase(BBI2->BB->begin(), DI2); @@ -1322,6 +1341,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } BBI1->BB->erase(DI1, BBI1->BB->end()); + // Kill flags in the true block for registers living into the false block + // must be removed. + RemoveKills(BBI1->BB->begin(), BBI1->BB->end(), DontKill, *TRI); + // Remove 'false' block branch and find the last instruction to predicate. BBI2->NonPredSize -= TII->RemoveBranch(*BBI2->BB); DI2 = BBI2->BB->end(); @@ -1362,8 +1385,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } else if (!RedefsByFalse.count(Reg)) { // These are defined before ctrl flow reach the 'false' instructions. // They cannot be modified by the 'true' instructions. - ExtUses.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) ExtUses.insert(*SubRegs); } } @@ -1371,8 +1394,8 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; if (!ExtUses.count(Reg)) { - RedefsByFalse.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) RedefsByFalse.insert(*SubRegs); } } @@ -1380,10 +1403,10 @@ bool IfConverter::IfConvertDiamond(BBInfo &BBI, IfcvtKind Kind, } // Predicate the 'true' block. - PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, Redefs, &RedefsByFalse); + PredicateBlock(*BBI1, BBI1->BB->end(), *Cond1, &RedefsByFalse); // Predicate the 'false' block. - PredicateBlock(*BBI2, DI2, *Cond2, Redefs); + PredicateBlock(*BBI2, DI2, *Cond2); // Merge the true block into the entry of the diamond. MergeBlocks(BBI, *BBI1, TailBB == 0); @@ -1458,7 +1481,6 @@ static bool MaySpeculate(const MachineInstr *MI, void IfConverter::PredicateBlock(BBInfo &BBI, MachineBasicBlock::iterator E, SmallVectorImpl &Cond, - SmallSet &Redefs, SmallSet *LaterRedefs) { bool AnyUnpred = false; bool MaySpec = LaterRedefs != 0; @@ -1484,7 +1506,7 @@ void IfConverter::PredicateBlock(BBInfo &BBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(I, Redefs, TRI, true); + UpdatePredRedefs(I, Redefs, TRI); } std::copy(Cond.begin(), Cond.end(), std::back_inserter(BBI.Predicate)); @@ -1501,7 +1523,6 @@ void IfConverter::PredicateBlock(BBInfo &BBI, /// the destination block. Skip end of block branches if IgnoreBr is true. void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, SmallVectorImpl &Cond, - SmallSet &Redefs, bool IgnoreBr) { MachineFunction &MF = *ToBBI.BB->getParent(); @@ -1514,8 +1535,8 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, MachineInstr *MI = MF.CloneMachineInstr(I); ToBBI.BB->insert(ToBBI.BB->end(), MI); ToBBI.NonPredSize++; - unsigned ExtraPredCost = 0; - unsigned NumCycles = TII->getInstrLatency(InstrItins, &*I, &ExtraPredCost); + unsigned ExtraPredCost = TII->getPredicationCost(&*I); + unsigned NumCycles = SchedModel.computeInstrLatency(&*I, false); if (NumCycles > 1) ToBBI.ExtraCost += NumCycles-1; ToBBI.ExtraCost2 += ExtraPredCost; @@ -1531,7 +1552,11 @@ void IfConverter::CopyAndPredicateBlock(BBInfo &ToBBI, BBInfo &FromBBI, // If the predicated instruction now redefines a register as the result of // if-conversion, add an implicit kill. - UpdatePredRedefs(MI, Redefs, TRI, true); + UpdatePredRedefs(MI, Redefs, TRI); + + // Some kill flags may not be correct anymore. + if (!DontKill.empty()) + RemoveKills(*MI, DontKill, *TRI); } if (!IgnoreBr) { diff --git a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp index 35295fe..bb0e642 100644 --- a/contrib/llvm/lib/CodeGen/InlineSpiller.cpp +++ b/contrib/llvm/lib/CodeGen/InlineSpiller.cpp @@ -14,6 +14,7 @@ #define DEBUG_TYPE "regalloc" #include "Spiller.h" +#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -21,8 +22,10 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" @@ -63,6 +66,7 @@ class InlineSpiller : public Spiller { MachineRegisterInfo &MRI; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; + const MachineBlockFrequencyInfo &MBFI; // Variables that are valid during spill(), but used by multiple methods. LiveRangeEdit *Edit; @@ -146,7 +150,8 @@ public: MFI(*mf.getFrameInfo()), MRI(mf.getRegInfo()), TII(*mf.getTarget().getInstrInfo()), - TRI(*mf.getTarget().getRegisterInfo()) {} + TRI(*mf.getTarget().getRegisterInfo()), + MBFI(pass.getAnalysis()) {} void spill(LiveRangeEdit &); @@ -174,10 +179,8 @@ private: bool coalesceStackAccess(MachineInstr *MI, unsigned Reg); bool foldMemoryOperand(ArrayRef >, MachineInstr *LoadMI = 0); - void insertReload(LiveInterval &NewLI, SlotIndex, - MachineBasicBlock::iterator MI); - void insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, - SlotIndex, MachineBasicBlock::iterator MI); + void insertReload(unsigned VReg, SlotIndex, MachineBasicBlock::iterator MI); + void insertSpill(unsigned VReg, bool isKill, MachineBasicBlock::iterator MI); void spillAroundUses(unsigned Reg); void spillAll(); @@ -337,10 +340,12 @@ static raw_ostream &operator<<(raw_ostream &OS, /// propagateSiblingValue - Propagate the value in SVI to dependents if it is /// known. Otherwise remember the dependency for later. /// -/// @param SVI SibValues entry to propagate. +/// @param SVIIter SibValues entry to propagate. /// @param VNI Dependent value, or NULL to propagate to all saved dependents. -void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI, +void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVIIter, VNInfo *VNI) { + SibValueMap::value_type *SVI = &*SVIIter; + // When VNI is non-NULL, add it to SVI's deps, and only propagate to that. TinyPtrVector FirstDeps; if (VNI) { @@ -352,14 +357,12 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI, if (!SVI->second.hasDef()) return; - // Work list of values to propagate. It would be nice to use a SetVector - // here, but then we would be forced to use a SmallSet. - SmallVector WorkList(1, SVI); - SmallPtrSet WorkSet; + // Work list of values to propagate. + SmallSetVector WorkList; + WorkList.insert(SVI); do { SVI = WorkList.pop_back_val(); - WorkSet.erase(SVI->first); TinyPtrVector *Deps = VNI ? &FirstDeps : &SVI->second.Deps; VNI = 0; @@ -450,8 +453,7 @@ void InlineSpiller::propagateSiblingValue(SibValueMap::iterator SVI, continue; // Something changed in DepSVI. Propagate to dependents. - if (WorkSet.insert(DepSVI->first)) - WorkList.push_back(DepSVI); + WorkList.insert(&*DepSVI); DEBUG(dbgs() << " update " << DepSVI->first->id << '@' << DepSVI->first->def << " to:\t" << DepSV); @@ -576,7 +578,7 @@ MachineInstr *InlineSpiller::traceSiblingValue(unsigned UseReg, VNInfo *UseVNI, if (unsigned SrcReg = isFullCopyOf(MI, Reg)) { if (isSibling(SrcReg)) { LiveInterval &SrcLI = LIS.getInterval(SrcReg); - LiveRangeQuery SrcQ(SrcLI, VNI->def); + LiveQueryResult SrcQ = SrcLI.Query(VNI->def); assert(SrcQ.valueIn() && "Copy from non-existing value"); // Check if this COPY kills its source. SVI->second.KillsSource = SrcQ.isKill(); @@ -881,12 +883,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, } // Alocate a new register for the remat. - LiveInterval &NewLI = Edit->createFrom(Original); - NewLI.markNotSpillable(); + unsigned NewVReg = Edit->createFrom(Original); // Finally we can rematerialize OrigMI before MI. - SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewLI.reg, RM, + SlotIndex DefIdx = Edit->rematerializeAt(*MI->getParent(), MI, NewVReg, RM, TRI); + (void)DefIdx; DEBUG(dbgs() << "\tremat: " << DefIdx << '\t' << *LIS.getInstructionFromIndex(DefIdx)); @@ -894,15 +896,12 @@ bool InlineSpiller::reMaterializeFor(LiveInterval &VirtReg, for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = MI->getOperand(Ops[i].second); if (MO.isReg() && MO.isUse() && MO.getReg() == VirtReg.reg) { - MO.setReg(NewLI.reg); + MO.setReg(NewVReg); MO.setIsKill(); } } - DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI); + DEBUG(dbgs() << "\t " << UseIdx << '\t' << *MI << '\n'); - VNInfo *DefVNI = NewLI.getNextValue(DefIdx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(DefIdx, UseIdx.getRegSlot(), DefVNI)); - DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); ++NumRemats; return true; } @@ -1005,6 +1004,40 @@ bool InlineSpiller::coalesceStackAccess(MachineInstr *MI, unsigned Reg) { return true; } +#if !defined(NDEBUG) +// Dump the range of instructions from B to E with their slot indexes. +static void dumpMachineInstrRangeWithSlotIndex(MachineBasicBlock::iterator B, + MachineBasicBlock::iterator E, + LiveIntervals const &LIS, + const char *const header, + unsigned VReg =0) { + char NextLine = '\n'; + char SlotIndent = '\t'; + + if (llvm::next(B) == E) { + NextLine = ' '; + SlotIndent = ' '; + } + + dbgs() << '\t' << header << ": " << NextLine; + + for (MachineBasicBlock::iterator I = B; I != E; ++I) { + SlotIndex Idx = LIS.getInstructionIndex(I).getRegSlot(); + + // If a register was passed in and this instruction has it as a + // destination that is marked as an early clobber, print the + // early-clobber slot index. + if (VReg) { + MachineOperand *MO = I->findRegisterDefOperand(VReg); + if (MO && MO->isEarlyClobber()) + Idx = Idx.getRegSlot(true); + } + + dbgs() << SlotIndent << Idx << '\t' << *I; + } +} +#endif + /// foldMemoryOperand - Try folding stack slot references in Ops into their /// instructions. /// @@ -1024,6 +1057,9 @@ foldMemoryOperand(ArrayRef > Ops, bool WasCopy = MI->isCopy(); unsigned ImpReg = 0; + bool SpillSubRegs = (MI->getOpcode() == TargetOpcode::PATCHPOINT || + MI->getOpcode() == TargetOpcode::STACKMAP); + // TargetInstrInfo::foldMemoryOperand only expects explicit, non-tied // operands. SmallVector FoldOps; @@ -1035,7 +1071,7 @@ foldMemoryOperand(ArrayRef > Ops, continue; } // FIXME: Teach targets to deal with subregs. - if (MO.getSubReg()) + if (!SpillSubRegs && MO.getSubReg()) return false; // We cannot fold a load instruction into a def. if (LoadMI && MO.isDef()) @@ -1045,14 +1081,52 @@ foldMemoryOperand(ArrayRef > Ops, FoldOps.push_back(Idx); } + MachineInstrSpan MIS(MI); + MachineInstr *FoldMI = LoadMI ? TII.foldMemoryOperand(MI, FoldOps, LoadMI) : TII.foldMemoryOperand(MI, FoldOps, StackSlot); if (!FoldMI) return false; + + // Remove LIS for any dead defs in the original MI not in FoldMI. + for (MIBundleOperands MO(MI); MO.isValid(); ++MO) { + if (!MO->isReg()) + continue; + unsigned Reg = MO->getReg(); + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || + MRI.isReserved(Reg)) { + continue; + } + MIBundleOperands::PhysRegInfo RI = + MIBundleOperands(FoldMI).analyzePhysReg(Reg, &TRI); + if (MO->readsReg()) { + assert(RI.Reads && "Cannot fold physreg reader"); + continue; + } + if (RI.Defines) + continue; + // FoldMI does not define this physreg. Remove the LI segment. + assert(MO->isDead() && "Cannot fold physreg def"); + for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) { + if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); + if (VNInfo *VNI = LR->getVNInfoAt(Idx)) + LR->removeValNo(VNI); + } + } + } + LIS.ReplaceMachineInstrInMaps(MI, FoldMI); MI->eraseFromParent(); + // Insert any new instructions other than FoldMI into the LIS maps. + assert(!MIS.empty() && "Unexpected empty span of instructions!"); + for (MachineBasicBlock::iterator MII = MIS.begin(), End = MIS.end(); + MII != End; ++MII) + if (&*MII != FoldMI) + LIS.InsertMachineInstrInMaps(&*MII); + // TII.foldMemoryOperand may have left some implicit operands on the // instruction. Strip them. if (ImpReg) @@ -1064,8 +1138,9 @@ foldMemoryOperand(ArrayRef > Ops, FoldMI->RemoveOperand(i - 1); } - DEBUG(dbgs() << "\tfolded: " << LIS.getInstructionIndex(FoldMI) << '\t' - << *FoldMI); + DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MIS.end(), LIS, + "folded")); + if (!WasCopy) ++NumFolded; else if (Ops.front().second == 0) @@ -1075,36 +1150,35 @@ foldMemoryOperand(ArrayRef > Ops, return true; } -/// insertReload - Insert a reload of NewLI.reg before MI. -void InlineSpiller::insertReload(LiveInterval &NewLI, +void InlineSpiller::insertReload(unsigned NewVReg, SlotIndex Idx, MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - TII.loadRegFromStackSlot(MBB, MI, NewLI.reg, StackSlot, - MRI.getRegClass(NewLI.reg), &TRI); - --MI; // Point to load instruction. - SlotIndex LoadIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); - // Some (out-of-tree) targets have EC reload instructions. - if (MachineOperand *MO = MI->findRegisterDefOperand(NewLI.reg)) - if (MO->isEarlyClobber()) - LoadIdx = LoadIdx.getRegSlot(true); - DEBUG(dbgs() << "\treload: " << LoadIdx << '\t' << *MI); - VNInfo *LoadVNI = NewLI.getNextValue(LoadIdx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(LoadIdx, Idx, LoadVNI)); + + MachineInstrSpan MIS(MI); + TII.loadRegFromStackSlot(MBB, MI, NewVReg, StackSlot, + MRI.getRegClass(NewVReg), &TRI); + + LIS.InsertMachineInstrRangeInMaps(MIS.begin(), MI); + + DEBUG(dumpMachineInstrRangeWithSlotIndex(MIS.begin(), MI, LIS, "reload", + NewVReg)); ++NumReloads; } -/// insertSpill - Insert a spill of NewLI.reg after MI. -void InlineSpiller::insertSpill(LiveInterval &NewLI, const LiveInterval &OldLI, - SlotIndex Idx, MachineBasicBlock::iterator MI) { +/// insertSpill - Insert a spill of NewVReg after MI. +void InlineSpiller::insertSpill(unsigned NewVReg, bool isKill, + MachineBasicBlock::iterator MI) { MachineBasicBlock &MBB = *MI->getParent(); - TII.storeRegToStackSlot(MBB, ++MI, NewLI.reg, true, StackSlot, - MRI.getRegClass(NewLI.reg), &TRI); - --MI; // Point to store instruction. - SlotIndex StoreIdx = LIS.InsertMachineInstrInMaps(MI).getRegSlot(); - DEBUG(dbgs() << "\tspilled: " << StoreIdx << '\t' << *MI); - VNInfo *StoreVNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, StoreIdx, StoreVNI)); + + MachineInstrSpan MIS(MI); + TII.storeRegToStackSlot(MBB, llvm::next(MI), NewVReg, isKill, StackSlot, + MRI.getRegClass(NewVReg), &TRI); + + LIS.InsertMachineInstrRangeInMaps(llvm::next(MI), MIS.end()); + + DEBUG(dumpMachineInstrRangeWithSlotIndex(llvm::next(MI), MIS.end(), LIS, + "spill")); ++NumSpills; } @@ -1120,18 +1194,14 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { // Debug values are not allowed to affect codegen. if (MI->isDebugValue()) { // Modify DBG_VALUE now that the value is in a spill slot. - uint64_t Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *MDPtr = MI->getOperand(2).getMetadata(); DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = TII.emitFrameIndexDebugValue(MF, StackSlot, - Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - } else { - DEBUG(dbgs() << "Removing debug info due to spill:" << "\t" << *MI); - MI->eraseFromParent(); - } + DEBUG(dbgs() << "Modifying debug info due to spill:" << "\t" << *MI); + MachineBasicBlock *MBB = MI->getParent(); + BuildMI(*MBB, MBB->erase(MI), DL, TII.get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(StackSlot).addImm(Offset).addMetadata(MDPtr); continue; } @@ -1184,19 +1254,18 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { if (foldMemoryOperand(Ops)) continue; - // Allocate interval around instruction. + // Create a new virtual register for spill/fill. // FIXME: Infer regclass from instruction alone. - LiveInterval &NewLI = Edit->createFrom(Reg); - NewLI.markNotSpillable(); + unsigned NewVReg = Edit->createFrom(Reg); if (RI.Reads) - insertReload(NewLI, Idx, MI); + insertReload(NewVReg, Idx, MI); // Rewrite instruction operands. bool hasLiveDef = false; for (unsigned i = 0, e = Ops.size(); i != e; ++i) { MachineOperand &MO = Ops[i].first->getOperand(Ops[i].second); - MO.setReg(NewLI.reg); + MO.setReg(NewVReg); if (MO.isUse()) { if (!Ops[i].first->isRegTiedToDefOperand(Ops[i].second)) MO.setIsKill(); @@ -1205,21 +1274,12 @@ void InlineSpiller::spillAroundUses(unsigned Reg) { hasLiveDef = true; } } - DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI); + DEBUG(dbgs() << "\trewrite: " << Idx << '\t' << *MI << '\n'); // FIXME: Use a second vreg if instruction has no tied ops. - if (RI.Writes) { + if (RI.Writes) if (hasLiveDef) - insertSpill(NewLI, OldLI, Idx, MI); - else { - // This instruction defines a dead value. We don't need to spill it, - // but do create a live range for the dead value. - VNInfo *VNI = NewLI.getNextValue(Idx, LIS.getVNInfoAllocator()); - NewLI.addRange(LiveRange(Idx, Idx.getDeadSlot(), VNI)); - } - } - - DEBUG(dbgs() << "\tinterval: " << NewLI << '\n'); + insertSpill(NewVReg, true, MI); } } @@ -1238,8 +1298,8 @@ void InlineSpiller::spillAll() { assert(StackInt->getNumValNums() == 1 && "Bad stack interval values"); for (unsigned i = 0, e = RegsToSpill.size(); i != e; ++i) - StackInt->MergeRangesInAsValue(LIS.getInterval(RegsToSpill[i]), - StackInt->getValNumInfo(0)); + StackInt->MergeSegmentsInAsValue(LIS.getInterval(RegsToSpill[i]), + StackInt->getValNumInfo(0)); DEBUG(dbgs() << "Merged spilled regs: " << *StackInt << '\n'); // Spill around uses of all RegsToSpill. @@ -1280,8 +1340,8 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { DEBUG(dbgs() << "Inline spilling " << MRI.getRegClass(edit.getReg())->getName() - << ':' << PrintReg(edit.getReg()) << ' ' << edit.getParent() - << "\nFrom original " << LIS.getInterval(Original) << '\n'); + << ':' << edit.getParent() + << "\nFrom original " << PrintReg(Original) << '\n'); assert(edit.getParent().isSpillable() && "Attempting to spill already spilled value."); assert(DeadDefs.empty() && "Previous spill didn't remove dead defs"); @@ -1294,5 +1354,5 @@ void InlineSpiller::spill(LiveRangeEdit &edit) { if (!RegsToSpill.empty()) spillAll(); - Edit->calculateRegClassAndHint(MF, Loops); + Edit->calculateRegClassAndHint(MF, Loops, MBFI); } diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp index a8e711e..427225d 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.cpp +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.cpp @@ -204,11 +204,11 @@ void InterferenceCache::Entry::update(unsigned MBBNum) { // Fixed interference. for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { LiveInterval::iterator &I = RegUnits[i].FixedI; - LiveInterval *LI = RegUnits[i].Fixed; - if (I == LI->end() || I->start >= Stop) + LiveRange *LR = RegUnits[i].Fixed; + if (I == LR->end() || I->start >= Stop) continue; - I = LI->advanceTo(I, Stop); - bool Backup = I == LI->end() || I->start >= Stop; + I = LR->advanceTo(I, Stop); + bool Backup = I == LR->end() || I->start >= Stop; if (Backup) --I; SlotIndex StopI = I->end; diff --git a/contrib/llvm/lib/CodeGen/InterferenceCache.h b/contrib/llvm/lib/CodeGen/InterferenceCache.h index c02fb9a..800f705 100644 --- a/contrib/llvm/lib/CodeGen/InterferenceCache.h +++ b/contrib/llvm/lib/CodeGen/InterferenceCache.h @@ -72,7 +72,7 @@ class InterferenceCache { unsigned VirtTag; /// Fixed interference in RegUnit. - LiveInterval *Fixed; + LiveRange *Fixed; /// Iterator pointing into the fixed RegUnit interference. LiveInterval::iterator FixedI; diff --git a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp index d894f66..c38d4fb 100644 --- a/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp +++ b/contrib/llvm/lib/CodeGen/IntrinsicLowering.cpp @@ -485,11 +485,12 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) { break; } case Intrinsic::memset: { - Type *IntPtr = TD.getIntPtrType(Context); + Value *Op0 = CI->getArgOperand(0); + Type *IntPtr = TD.getIntPtrType(Op0->getType()); Value *Size = Builder.CreateIntCast(CI->getArgOperand(2), IntPtr, /* isSigned */ false); Value *Ops[3]; - Ops[0] = CI->getArgOperand(0); + Ops[0] = Op0; // Extend the amount to i32. Ops[1] = Builder.CreateIntCast(CI->getArgOperand(1), Type::getInt32Ty(Context), diff --git a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp index 1a09837..ad2c553 100644 --- a/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp +++ b/contrib/llvm/lib/CodeGen/LLVMTargetMachine.cpp @@ -62,6 +62,17 @@ static bool getVerboseAsm() { llvm_unreachable("Invalid verbose asm state"); } +void LLVMTargetMachine::initAsmInfo() { + AsmInfo = TheTarget.createMCAsmInfo(*getRegisterInfo(), TargetTriple); + // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, + // and if the old one gets included then MCAsmInfo will be NULL and + // we'll crash later. + // Provide the user with a useful error message about what's wrong. + assert(AsmInfo && "MCAsmInfo not initialized. " + "Make sure you include the correct TargetSelect.h" + "and that InitializeAllTargetMCs() is being invoked!"); +} + LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, StringRef CPU, StringRef FS, TargetOptions Options, @@ -69,18 +80,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, CodeGenOpt::Level OL) : TargetMachine(T, Triple, CPU, FS, Options) { CodeGenInfo = T.createMCCodeGenInfo(Triple, RM, CM, OL); - AsmInfo = T.createMCAsmInfo(Triple); - // TargetSelect.h moved to a different directory between LLVM 2.9 and 3.0, - // and if the old one gets included then MCAsmInfo will be NULL and - // we'll crash later. - // Provide the user with a useful error message about what's wrong. - assert(AsmInfo && "MCAsmInfo not initialized." - "Make sure you include the correct TargetSelect.h" - "and that InitializeAllTargetMCs() is being invoked!"); } void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) { - PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); + PM.add(createBasicTargetTransformInfoPass(this)); } /// addPassesToX helper drives creation and initialization of TargetPassConfig. @@ -112,7 +115,6 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, new MachineModuleInfo(*TM->getMCAsmInfo(), *TM->getRegisterInfo(), &TM->getTargetLowering()->getObjFileLowering()); PM.add(MMI); - MCContext *Context = &MMI->getContext(); // Return the MCContext by-ref. // Set up a MachineFunction for the rest of CodeGen to work on. PM.add(new MachineFunctionAnalysis(*TM)); @@ -131,7 +133,7 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassConfig->setInitialized(); - return Context; + return &MMI->getContext(); } bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, @@ -161,6 +163,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, const MCAsmInfo &MAI = *getMCAsmInfo(); const MCRegisterInfo &MRI = *getRegisterInfo(); + const MCInstrInfo &MII = *getInstrInfo(); const MCSubtargetInfo &STI = getSubtarget(); OwningPtr AsmStreamer; @@ -168,19 +171,15 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_AssemblyFile: { MCInstPrinter *InstPrinter = getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, - *getInstrInfo(), - Context->getRegisterInfo(), STI); + MII, MRI, STI); // Create a code emitter if asked to show the encoding. MCCodeEmitter *MCE = 0; - MCAsmBackend *MAB = 0; - if (ShowMCEncoding) { - const MCSubtargetInfo &STI = getSubtarget(); - MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, - *Context); - MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); - } + if (ShowMCEncoding) + MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, *Context); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), + TargetCPU); MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, getVerboseAsm(), hasMCUseLoc(), @@ -195,9 +194,9 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, case CGFT_ObjectFile: { // Create the code emitter for the target if it exists. If not, .o file // emission fails. - MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, - STI, *Context); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), + MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(MII, MRI, STI, + *Context); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), TargetCPU); if (MCE == 0 || MAB == 0) return true; @@ -232,7 +231,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, /// addPassesToEmitMachineCode - Add passes to the specified pass manager to /// get machine code emitted. This uses a JITCodeEmitter object to handle /// actually outputting the machine code and resolving things like the address -/// of functions. This method should returns true if machine code emission is +/// of functions. This method should return true if machine code emission is /// not supported. /// bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, @@ -271,7 +270,8 @@ bool LLVMTargetMachine::addPassesToEmitMC(PassManagerBase &PM, const MCSubtargetInfo &STI = getSubtarget(); MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, *Ctx); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(MRI, getTargetTriple(), + TargetCPU); if (MCE == 0 || MAB == 0) return true; diff --git a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp index 8172154..ffe407a 100644 --- a/contrib/llvm/lib/CodeGen/LexicalScopes.cpp +++ b/contrib/llvm/lib/CodeGen/LexicalScopes.cpp @@ -212,15 +212,15 @@ LexicalScope *LexicalScopes::getOrCreateAbstractScope(const MDNode *N) { /// constructScopeNest void LexicalScopes::constructScopeNest(LexicalScope *Scope) { - assert (Scope && "Unable to calculate scop edominance graph!"); + assert (Scope && "Unable to calculate scope dominance graph!"); SmallVector WorkStack; WorkStack.push_back(Scope); unsigned Counter = 0; while (!WorkStack.empty()) { LexicalScope *WS = WorkStack.back(); - const SmallVector &Children = WS->getChildren(); + const SmallVectorImpl &Children = WS->getChildren(); bool visitedChildren = false; - for (SmallVector::const_iterator SI = Children.begin(), + for (SmallVectorImpl::const_iterator SI = Children.begin(), SE = Children.end(); SI != SE; ++SI) { LexicalScope *ChildScope = *SI; if (!ChildScope->getDFSOut()) { @@ -279,8 +279,8 @@ getMachineBasicBlocks(DebugLoc DL, return; } - SmallVector &InsnRanges = Scope->getRanges(); - for (SmallVector::iterator I = InsnRanges.begin(), + SmallVectorImpl &InsnRanges = Scope->getRanges(); + for (SmallVectorImpl::iterator I = InsnRanges.begin(), E = InsnRanges.end(); I != E; ++I) { InsnRange &R = *I; MBBs.insert(R.first->getParent()); diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp index 0b117ac..25645e0 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.cpp @@ -108,6 +108,7 @@ class LDVImpl; class UserValue { const MDNode *variable; ///< The debug info variable we are part of. unsigned offset; ///< Byte offset into variable. + bool IsIndirect; ///< true if this is a register-indirect+offset value. DebugLoc dl; ///< The debug location for the variable. This is ///< used by dwarf writer to find lexical scope. UserValue *leader; ///< Equivalence class leader. @@ -130,13 +131,15 @@ class UserValue { /// splitLocation - Replace OldLocNo ranges with NewRegs ranges where NewRegs /// is live. Returns true if any changes were made. - bool splitLocation(unsigned OldLocNo, ArrayRef NewRegs); + bool splitLocation(unsigned OldLocNo, ArrayRef NewRegs, + LiveIntervals &LIS); public: /// UserValue - Create a new UserValue. - UserValue(const MDNode *var, unsigned o, DebugLoc L, + UserValue(const MDNode *var, unsigned o, bool i, DebugLoc L, LocMap::Allocator &alloc) - : variable(var), offset(o), dl(L), leader(this), next(0), locInts(alloc) + : variable(var), offset(o), IsIndirect(i), dl(L), leader(this), + next(0), locInts(alloc) {} /// getLeader - Get the leader of this value's equivalence class. @@ -217,13 +220,13 @@ public: /// End points where VNI is no longer live are added to Kills. /// @param Idx Starting point for the definition. /// @param LocNo Location number to propagate. - /// @param LI Restrict liveness to where LI has the value VNI. May be null. - /// @param VNI When LI is not null, this is the value to restrict to. + /// @param LR Restrict liveness to where LR has the value VNI. May be null. + /// @param VNI When LR is not null, this is the value to restrict to. /// @param Kills Append end points of VNI's live range to Kills. /// @param LIS Live intervals analysis. /// @param MDT Dominator tree. void extendDef(SlotIndex Idx, unsigned LocNo, - LiveInterval *LI, const VNInfo *VNI, + LiveRange *LR, const VNInfo *VNI, SmallVectorImpl *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS); @@ -249,7 +252,8 @@ public: /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. - bool splitRegister(unsigned OldLocNo, ArrayRef NewRegs); + bool splitRegister(unsigned OldLocNo, ArrayRef NewRegs, + LiveIntervals &LIS); /// rewriteLocations - Rewrite virtual register locations according to the /// provided virtual register map. @@ -299,7 +303,8 @@ class LDVImpl { UVMap userVarMap; /// getUserValue - Find or create a UserValue. - UserValue *getUserValue(const MDNode *Var, unsigned Offset, DebugLoc DL); + UserValue *getUserValue(const MDNode *Var, unsigned Offset, + bool IsIndirect, DebugLoc DL); /// lookupVirtReg - Find the EC leader for VirtReg or null. UserValue *lookupVirtReg(unsigned VirtReg); @@ -342,7 +347,7 @@ public: void mapVirtReg(unsigned VirtReg, UserValue *EC); /// splitRegister - Replace all references to OldReg with NewRegs. - void splitRegister(unsigned OldReg, ArrayRef NewRegs); + void splitRegister(unsigned OldReg, ArrayRef NewRegs); /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM); @@ -414,7 +419,7 @@ void UserValue::mapVirtRegs(LDVImpl *LDV) { } UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, - DebugLoc DL) { + bool IsIndirect, DebugLoc DL) { UserValue *&Leader = userVarMap[Var]; if (Leader) { UserValue *UV = Leader->getLeader(); @@ -424,7 +429,7 @@ UserValue *LDVImpl::getUserValue(const MDNode *Var, unsigned Offset, return UV; } - UserValue *UV = new UserValue(Var, Offset, DL, allocator); + UserValue *UV = new UserValue(Var, Offset, IsIndirect, DL, allocator); userValues.push_back(UV); Leader = UserValue::merge(Leader, UV); return UV; @@ -445,15 +450,18 @@ UserValue *LDVImpl::lookupVirtReg(unsigned VirtReg) { bool LDVImpl::handleDebugValue(MachineInstr *MI, SlotIndex Idx) { // DBG_VALUE loc, offset, variable if (MI->getNumOperands() != 3 || - !MI->getOperand(1).isImm() || !MI->getOperand(2).isMetadata()) { + !(MI->getOperand(1).isReg() || MI->getOperand(1).isImm()) || + !MI->getOperand(2).isMetadata()) { DEBUG(dbgs() << "Can't handle " << *MI); return false; } // Get or create the UserValue for (variable,offset). - unsigned Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *Var = MI->getOperand(2).getMetadata(); - UserValue *UV = getUserValue(Var, Offset, MI->getDebugLoc()); + //here. + UserValue *UV = getUserValue(Var, Offset, IsIndirect, MI->getDebugLoc()); UV->addDef(Idx, MI->getOperand(0)); return true; } @@ -487,7 +495,7 @@ bool LDVImpl::collectDebugValues(MachineFunction &mf) { } void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, - LiveInterval *LI, const VNInfo *VNI, + LiveRange *LR, const VNInfo *VNI, SmallVectorImpl *Kills, LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS) { @@ -501,15 +509,15 @@ void UserValue::extendDef(SlotIndex Idx, unsigned LocNo, // Limit to VNI's live range. bool ToEnd = true; - if (LI && VNI) { - LiveRange *Range = LI->getLiveRangeContaining(Start); - if (!Range || Range->valno != VNI) { + if (LR && VNI) { + LiveInterval::Segment *Segment = LR->getSegmentContaining(Start); + if (!Segment || Segment->valno != VNI) { if (Kills) Kills->push_back(Start); continue; } - if (Range->end < Stop) - Stop = Range->end, ToEnd = false; + if (Segment->end < Stop) + Stop = Segment->end, ToEnd = false; } // There could already be a short def at Start. @@ -661,10 +669,10 @@ UserValue::computeIntervals(MachineRegisterInfo &MRI, // For physregs, use the live range of the first regunit as a guide. unsigned Unit = *MCRegUnitIterator(Loc.getReg(), &TRI); - LiveInterval *LI = &LIS.getRegUnit(Unit); - const VNInfo *VNI = LI->getVNInfoAt(Idx); + LiveRange *LR = &LIS.getRegUnit(Unit); + const VNInfo *VNI = LR->getVNInfoAt(Idx); // Don't track copies from physregs, it is too expensive. - extendDef(Idx, LocNo, LI, VNI, 0, LIS, MDT, UVS); + extendDef(Idx, LocNo, LR, VNI, 0, LIS, MDT, UVS); } // Finally, erase all the undefs. @@ -724,7 +732,8 @@ LiveDebugVariables::~LiveDebugVariables() { //===----------------------------------------------------------------------===// bool -UserValue::splitLocation(unsigned OldLocNo, ArrayRef NewRegs) { +UserValue::splitLocation(unsigned OldLocNo, ArrayRef NewRegs, + LiveIntervals& LIS) { DEBUG({ dbgs() << "Splitting Loc" << OldLocNo << '\t'; print(dbgs(), 0); @@ -733,7 +742,7 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef NewRegs) { LocMap::iterator LocMapI; LocMapI.setMap(locInts); for (unsigned i = 0; i != NewRegs.size(); ++i) { - LiveInterval *LI = NewRegs[i]; + LiveInterval *LI = &LIS.getInterval(NewRegs[i]); if (LI->empty()) continue; @@ -822,7 +831,8 @@ UserValue::splitLocation(unsigned OldLocNo, ArrayRef NewRegs) { } bool -UserValue::splitRegister(unsigned OldReg, ArrayRef NewRegs) { +UserValue::splitRegister(unsigned OldReg, ArrayRef NewRegs, + LiveIntervals &LIS) { bool DidChange = false; // Split locations referring to OldReg. Iterate backwards so splitLocation can // safely erase unused locations. @@ -831,15 +841,15 @@ UserValue::splitRegister(unsigned OldReg, ArrayRef NewRegs) { const MachineOperand *Loc = &locations[LocNo]; if (!Loc->isReg() || Loc->getReg() != OldReg) continue; - DidChange |= splitLocation(LocNo, NewRegs); + DidChange |= splitLocation(LocNo, NewRegs, LIS); } return DidChange; } -void LDVImpl::splitRegister(unsigned OldReg, ArrayRef NewRegs) { +void LDVImpl::splitRegister(unsigned OldReg, ArrayRef NewRegs) { bool DidChange = false; for (UserValue *UV = lookupVirtReg(OldReg); UV; UV = UV->getNext()) - DidChange |= UV->splitRegister(OldReg, NewRegs); + DidChange |= UV->splitRegister(OldReg, NewRegs, *LIS); if (!DidChange) return; @@ -847,11 +857,11 @@ void LDVImpl::splitRegister(unsigned OldReg, ArrayRef NewRegs) { // Map all of the new virtual registers. UserValue *UV = lookupVirtReg(OldReg); for (unsigned i = 0; i != NewRegs.size(); ++i) - mapVirtReg(NewRegs[i]->reg, UV); + mapVirtReg(NewRegs[i], UV); } void LiveDebugVariables:: -splitRegister(unsigned OldReg, ArrayRef NewRegs) { +splitRegister(unsigned OldReg, ArrayRef NewRegs, LiveIntervals &LIS) { if (pImpl) static_cast(pImpl)->splitRegister(OldReg, NewRegs); } @@ -921,19 +931,12 @@ void UserValue::insertDebugValue(MachineBasicBlock *MBB, SlotIndex Idx, MachineOperand &Loc = locations[LocNo]; ++NumInsertedDebugValues; - // Frame index locations may require a target callback. - if (Loc.isFI()) { - MachineInstr *MI = TII.emitFrameIndexDebugValue(*MBB->getParent(), - Loc.getIndex(), offset, variable, - findDebugLoc()); - if (MI) { - MBB->insert(I, MI); - return; - } - } - // This is not a frame index, or the target is happy with a standard FI. - BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) - .addOperand(Loc).addImm(offset).addMetadata(variable); + if (Loc.isReg()) + BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, Loc.getReg(), offset, variable); + else + BuildMI(*MBB, I, findDebugLoc(), TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(Loc).addImm(offset).addMetadata(variable); } void UserValue::emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, @@ -992,4 +995,3 @@ void LiveDebugVariables::dump() { static_cast(pImpl)->print(dbgs()); } #endif - diff --git a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h index 3ce3c39..58a3f0f 100644 --- a/contrib/llvm/lib/CodeGen/LiveDebugVariables.h +++ b/contrib/llvm/lib/CodeGen/LiveDebugVariables.h @@ -27,6 +27,7 @@ namespace llvm { class LiveInterval; +class LiveIntervals; class VirtRegMap; class LiveDebugVariables : public MachineFunctionPass { @@ -47,7 +48,8 @@ public: /// splitRegister - Move any user variables in OldReg to the live ranges in /// NewRegs where they are live. Mark the values as unavailable where no new /// register is live. - void splitRegister(unsigned OldReg, ArrayRef NewRegs); + void splitRegister(unsigned OldReg, ArrayRef NewRegs, + LiveIntervals &LIS); /// emitDebugValues - Emit new DBG_VALUE instructions reflecting the changes /// that happened during register allocation. diff --git a/contrib/llvm/lib/CodeGen/LiveInterval.cpp b/contrib/llvm/lib/CodeGen/LiveInterval.cpp index dccd847..2b8feb8 100644 --- a/contrib/llvm/lib/CodeGen/LiveInterval.cpp +++ b/contrib/llvm/lib/CodeGen/LiveInterval.cpp @@ -9,12 +9,12 @@ // // This file implements the LiveRange and LiveInterval classes. Given some // numbering of each the machine instructions an interval [i, j) is said to be a -// live interval for register v if there is no instruction with number j' > j +// live range for register v if there is no instruction with number j' >= j // such that v is live at j' and there is no instruction with number i' < i such -// that v is live at i'. In this implementation intervals can have holes, -// i.e. an interval might look like [1,20), [50,65), [1000,1001). Each -// individual range is represented as an instance of LiveRange, and the whole -// interval is represented as an instance of LiveInterval. +// that v is live at i'. In this implementation ranges can have holes, +// i.e. a range might look like [1,20), [50,65), [1000,1001). Each +// individual segment is represented as an instance of LiveRange::Segment, +// and the whole range is represented as an instance of LiveRange. // //===----------------------------------------------------------------------===// @@ -31,14 +31,14 @@ #include using namespace llvm; -LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { +LiveRange::iterator LiveRange::find(SlotIndex Pos) { // This algorithm is basically std::upper_bound. // Unfortunately, std::upper_bound cannot be used with mixed types until we // adopt C++0x. Many libraries can do it, but not all. if (empty() || Pos >= endIndex()) return end(); iterator I = begin(); - size_t Len = ranges.size(); + size_t Len = size(); do { size_t Mid = Len >> 1; if (Pos < I[Mid].end) @@ -49,13 +49,13 @@ LiveInterval::iterator LiveInterval::find(SlotIndex Pos) { return I; } -VNInfo *LiveInterval::createDeadDef(SlotIndex Def, - VNInfo::Allocator &VNInfoAllocator) { +VNInfo *LiveRange::createDeadDef(SlotIndex Def, + VNInfo::Allocator &VNInfoAllocator) { assert(!Def.isDead() && "Cannot define a value at the dead slot"); iterator I = find(Def); if (I == end()) { VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - ranges.push_back(LiveRange(Def, Def.getDeadSlot(), VNI)); + segments.push_back(Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } if (SlotIndex::isSameInstr(Def, I->start)) { @@ -73,11 +73,11 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, } assert(SlotIndex::isEarlierInstr(Def, I->start) && "Already live at def"); VNInfo *VNI = getNextValue(Def, VNInfoAllocator); - ranges.insert(I, LiveRange(Def, Def.getDeadSlot(), VNI)); + segments.insert(I, Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } -// overlaps - Return true if the intersection of the two live intervals is +// overlaps - Return true if the intersection of the two live ranges is // not empty. // // An example for overlaps(): @@ -86,7 +86,7 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, // 4: B = ... // 8: C = A + B ;; last use of A // -// The live intervals should look like: +// The live ranges should look like: // // A = [3, 11) // B = [7, x) @@ -95,9 +95,9 @@ VNInfo *LiveInterval::createDeadDef(SlotIndex Def, // A->overlaps(C) should return false since we want to be able to join // A and C. // -bool LiveInterval::overlapsFrom(const LiveInterval& other, - const_iterator StartPos) const { - assert(!empty() && "empty interval"); +bool LiveRange::overlapsFrom(const LiveRange& other, + const_iterator StartPos) const { + assert(!empty() && "empty range"); const_iterator i = begin(); const_iterator ie = end(); const_iterator j = StartPos; @@ -108,13 +108,13 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, if (i->start < j->start) { i = std::upper_bound(i, ie, j->start); - if (i != ranges.begin()) --i; + if (i != begin()) --i; } else if (j->start < i->start) { ++StartPos; if (StartPos != other.end() && StartPos->start <= i->start) { assert(StartPos < other.end() && i < end()); j = std::upper_bound(j, je, i->start); - if (j != other.ranges.begin()) --j; + if (j != other.begin()) --j; } } else { return true; @@ -136,10 +136,9 @@ bool LiveInterval::overlapsFrom(const LiveInterval& other, return false; } -bool LiveInterval::overlaps(const LiveInterval &Other, - const CoalescerPair &CP, - const SlotIndexes &Indexes) const { - assert(!empty() && "empty interval"); +bool LiveRange::overlaps(const LiveRange &Other, const CoalescerPair &CP, + const SlotIndexes &Indexes) const { + assert(!empty() && "empty range"); if (Other.empty()) return false; @@ -178,9 +177,9 @@ bool LiveInterval::overlaps(const LiveInterval &Other, } } -/// overlaps - Return true if the live interval overlaps a range specified +/// overlaps - Return true if the live range overlaps an interval specified /// by [Start, End). -bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { +bool LiveRange::overlaps(SlotIndex Start, SlotIndex End) const { assert(Start < End && "Invalid range"); const_iterator I = std::lower_bound(begin(), end(), End); return I != begin() && (--I)->end > Start; @@ -190,7 +189,7 @@ bool LiveInterval::overlaps(SlotIndex Start, SlotIndex End) const { /// ValNo is dead, remove it. If it is the largest value number, just nuke it /// (and any other deleted values neighboring it), otherwise mark it as ~1U so /// it can be nuked later. -void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { +void LiveRange::markValNoForDeletion(VNInfo *ValNo) { if (ValNo->id == getNumValNums()-1) { do { valnos.pop_back(); @@ -202,137 +201,135 @@ void LiveInterval::markValNoForDeletion(VNInfo *ValNo) { /// RenumberValues - Renumber all values in order of appearance and delete the /// remaining unused values. -void LiveInterval::RenumberValues(LiveIntervals &lis) { +void LiveRange::RenumberValues() { SmallPtrSet Seen; valnos.clear(); for (const_iterator I = begin(), E = end(); I != E; ++I) { VNInfo *VNI = I->valno; if (!Seen.insert(VNI)) continue; - assert(!VNI->isUnused() && "Unused valno used by live range"); + assert(!VNI->isUnused() && "Unused valno used by live segment"); VNI->id = (unsigned)valnos.size(); valnos.push_back(VNI); } } -/// extendIntervalEndTo - This method is used when we want to extend the range -/// specified by I to end at the specified endpoint. To do this, we should -/// merge and eliminate all ranges that this will overlap with. The iterator is -/// not invalidated. -void LiveInterval::extendIntervalEndTo(Ranges::iterator I, SlotIndex NewEnd) { - assert(I != ranges.end() && "Not a valid interval!"); +/// This method is used when we want to extend the segment specified by I to end +/// at the specified endpoint. To do this, we should merge and eliminate all +/// segments that this will overlap with. The iterator is not invalidated. +void LiveRange::extendSegmentEndTo(iterator I, SlotIndex NewEnd) { + assert(I != end() && "Not a valid segment!"); VNInfo *ValNo = I->valno; - // Search for the first interval that we can't merge with. - Ranges::iterator MergeTo = llvm::next(I); - for (; MergeTo != ranges.end() && NewEnd >= MergeTo->end; ++MergeTo) { + // Search for the first segment that we can't merge with. + iterator MergeTo = llvm::next(I); + for (; MergeTo != end() && NewEnd >= MergeTo->end; ++MergeTo) { assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); } - // If NewEnd was in the middle of an interval, make sure to get its endpoint. + // If NewEnd was in the middle of a segment, make sure to get its endpoint. I->end = std::max(NewEnd, prior(MergeTo)->end); - // If the newly formed range now touches the range after it and if they have - // the same value number, merge the two ranges into one range. - if (MergeTo != ranges.end() && MergeTo->start <= I->end && + // If the newly formed segment now touches the segment after it and if they + // have the same value number, merge the two segments into one segment. + if (MergeTo != end() && MergeTo->start <= I->end && MergeTo->valno == ValNo) { I->end = MergeTo->end; ++MergeTo; } - // Erase any dead ranges. - ranges.erase(llvm::next(I), MergeTo); + // Erase any dead segments. + segments.erase(llvm::next(I), MergeTo); } -/// extendIntervalStartTo - This method is used when we want to extend the range -/// specified by I to start at the specified endpoint. To do this, we should -/// merge and eliminate all ranges that this will overlap with. -LiveInterval::Ranges::iterator -LiveInterval::extendIntervalStartTo(Ranges::iterator I, SlotIndex NewStart) { - assert(I != ranges.end() && "Not a valid interval!"); +/// This method is used when we want to extend the segment specified by I to +/// start at the specified endpoint. To do this, we should merge and eliminate +/// all segments that this will overlap with. +LiveRange::iterator +LiveRange::extendSegmentStartTo(iterator I, SlotIndex NewStart) { + assert(I != end() && "Not a valid segment!"); VNInfo *ValNo = I->valno; - // Search for the first interval that we can't merge with. - Ranges::iterator MergeTo = I; + // Search for the first segment that we can't merge with. + iterator MergeTo = I; do { - if (MergeTo == ranges.begin()) { + if (MergeTo == begin()) { I->start = NewStart; - ranges.erase(MergeTo, I); + segments.erase(MergeTo, I); return I; } assert(MergeTo->valno == ValNo && "Cannot merge with differing values!"); --MergeTo; } while (NewStart <= MergeTo->start); - // If we start in the middle of another interval, just delete a range and - // extend that interval. + // If we start in the middle of another segment, just delete a range and + // extend that segment. if (MergeTo->end >= NewStart && MergeTo->valno == ValNo) { MergeTo->end = I->end; } else { - // Otherwise, extend the interval right after. + // Otherwise, extend the segment right after. ++MergeTo; MergeTo->start = NewStart; MergeTo->end = I->end; } - ranges.erase(llvm::next(MergeTo), llvm::next(I)); + segments.erase(llvm::next(MergeTo), llvm::next(I)); return MergeTo; } -LiveInterval::iterator -LiveInterval::addRangeFrom(LiveRange LR, iterator From) { - SlotIndex Start = LR.start, End = LR.end; - iterator it = std::upper_bound(From, ranges.end(), Start); +LiveRange::iterator LiveRange::addSegmentFrom(Segment S, iterator From) { + SlotIndex Start = S.start, End = S.end; + iterator it = std::upper_bound(From, end(), Start); - // If the inserted interval starts in the middle or right at the end of - // another interval, just extend that interval to contain the range of LR. - if (it != ranges.begin()) { + // If the inserted segment starts in the middle or right at the end of + // another segment, just extend that segment to contain the segment of S. + if (it != begin()) { iterator B = prior(it); - if (LR.valno == B->valno) { + if (S.valno == B->valno) { if (B->start <= Start && B->end >= Start) { - extendIntervalEndTo(B, End); + extendSegmentEndTo(B, End); return B; } } else { - // Check to make sure that we are not overlapping two live ranges with + // Check to make sure that we are not overlapping two live segments with // different valno's. assert(B->end <= Start && - "Cannot overlap two LiveRanges with differing ValID's" + "Cannot overlap two segments with differing ValID's" " (did you def the same reg twice in a MachineInstr?)"); } } - // Otherwise, if this range ends in the middle of, or right next to, another - // interval, merge it into that interval. - if (it != ranges.end()) { - if (LR.valno == it->valno) { + // Otherwise, if this segment ends in the middle of, or right next to, another + // segment, merge it into that segment. + if (it != end()) { + if (S.valno == it->valno) { if (it->start <= End) { - it = extendIntervalStartTo(it, Start); + it = extendSegmentStartTo(it, Start); - // If LR is a complete superset of an interval, we may need to grow its + // If S is a complete superset of a segment, we may need to grow its // endpoint as well. if (End > it->end) - extendIntervalEndTo(it, End); + extendSegmentEndTo(it, End); return it; } } else { - // Check to make sure that we are not overlapping two live ranges with + // Check to make sure that we are not overlapping two live segments with // different valno's. assert(it->start >= End && - "Cannot overlap two LiveRanges with differing ValID's"); + "Cannot overlap two segments with differing ValID's"); } } - // Otherwise, this is just a new range that doesn't interact with anything. + // Otherwise, this is just a new segment that doesn't interact with anything. // Insert it. - return ranges.insert(it, LR); + return segments.insert(it, S); } -/// extendInBlock - If this interval is live before Kill in the basic +/// extendInBlock - If this range is live before Kill in the basic /// block that starts at StartIdx, extend it to be live up to Kill and return /// the value. If there is no live range before Kill, return NULL. -VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { +VNInfo *LiveRange::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { if (empty()) return 0; iterator I = std::upper_bound(begin(), end(), Kill.getPrevSlot()); @@ -342,20 +339,21 @@ VNInfo *LiveInterval::extendInBlock(SlotIndex StartIdx, SlotIndex Kill) { if (I->end <= StartIdx) return 0; if (I->end < Kill) - extendIntervalEndTo(I, Kill); + extendSegmentEndTo(I, Kill); return I->valno; } -/// removeRange - Remove the specified range from this interval. Note that -/// the range must be in a single LiveRange in its entirety. -void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, - bool RemoveDeadValNo) { - // Find the LiveRange containing this span. - Ranges::iterator I = find(Start); - assert(I != ranges.end() && "Range is not in interval!"); - assert(I->containsRange(Start, End) && "Range is not entirely in interval!"); +/// Remove the specified segment from this range. Note that the segment must +/// be in a single Segment in its entirety. +void LiveRange::removeSegment(SlotIndex Start, SlotIndex End, + bool RemoveDeadValNo) { + // Find the Segment containing this span. + iterator I = find(Start); + assert(I != end() && "Segment is not in range!"); + assert(I->containsInterval(Start, End) + && "Segment is not entirely in range!"); - // If the span we are removing is at the start of the LiveRange, adjust it. + // If the span we are removing is at the start of the Segment, adjust it. VNInfo *ValNo = I->valno; if (I->start == Start) { if (I->end == End) { @@ -373,54 +371,50 @@ void LiveInterval::removeRange(SlotIndex Start, SlotIndex End, } } - ranges.erase(I); // Removed the whole LiveRange. + segments.erase(I); // Removed the whole Segment. } else I->start = End; return; } - // Otherwise if the span we are removing is at the end of the LiveRange, + // Otherwise if the span we are removing is at the end of the Segment, // adjust the other way. if (I->end == End) { I->end = Start; return; } - // Otherwise, we are splitting the LiveRange into two pieces. + // Otherwise, we are splitting the Segment into two pieces. SlotIndex OldEnd = I->end; - I->end = Start; // Trim the old interval. + I->end = Start; // Trim the old segment. // Insert the new one. - ranges.insert(llvm::next(I), LiveRange(End, OldEnd, ValNo)); + segments.insert(llvm::next(I), Segment(End, OldEnd, ValNo)); } -/// removeValNo - Remove all the ranges defined by the specified value#. +/// removeValNo - Remove all the segments defined by the specified value#. /// Also remove the value# from value# list. -void LiveInterval::removeValNo(VNInfo *ValNo) { +void LiveRange::removeValNo(VNInfo *ValNo) { if (empty()) return; - Ranges::iterator I = ranges.end(); - Ranges::iterator E = ranges.begin(); + iterator I = end(); + iterator E = begin(); do { --I; if (I->valno == ValNo) - ranges.erase(I); + segments.erase(I); } while (I != E); // Now that ValNo is dead, remove it. markValNoForDeletion(ValNo); } -/// join - Join two live intervals (this, and other) together. This applies -/// mappings to the value numbers in the LHS/RHS intervals as specified. If -/// the intervals are not joinable, this aborts. -void LiveInterval::join(LiveInterval &Other, - const int *LHSValNoAssignments, - const int *RHSValNoAssignments, - SmallVector &NewVNInfo, - MachineRegisterInfo *MRI) { +void LiveRange::join(LiveRange &Other, + const int *LHSValNoAssignments, + const int *RHSValNoAssignments, + SmallVectorImpl &NewVNInfo) { verify(); - // Determine if any of our live range values are mapped. This is uncommon, so - // we want to avoid the interval scan if not. + // Determine if any of our values are mapped. This is uncommon, so we want + // to avoid the range scan if not. bool MustMapCurValNos = false; unsigned NumVals = getNumValNums(); unsigned NumNewVals = NewVNInfo.size(); @@ -433,8 +427,7 @@ void LiveInterval::join(LiveInterval &Other, } } - // If we have to apply a mapping to our base interval assignment, rewrite it - // now. + // If we have to apply a mapping to our base range assignment, rewrite it now. if (MustMapCurValNos && !empty()) { // Map the first live range. @@ -445,12 +438,12 @@ void LiveInterval::join(LiveInterval &Other, assert(nextValNo != 0 && "Huh?"); // If this live range has the same value # as its immediate predecessor, - // and if they are neighbors, remove one LiveRange. This happens when we + // and if they are neighbors, remove one Segment. This happens when we // have [0,4:0)[4,7:1) and map 0/1 onto the same value #. if (OutIt->valno == nextValNo && OutIt->end == I->start) { OutIt->end = I->end; } else { - // Didn't merge. Move OutIt to the next interval, + // Didn't merge. Move OutIt to the next segment, ++OutIt; OutIt->valno = nextValNo; if (OutIt != I) { @@ -459,9 +452,9 @@ void LiveInterval::join(LiveInterval &Other, } } } - // If we merge some live ranges, chop off the end. + // If we merge some segments, chop off the end. ++OutIt; - ranges.erase(OutIt, end()); + segments.erase(OutIt, end()); } // Rewrite Other values before changing the VNInfo ids. @@ -472,7 +465,7 @@ void LiveInterval::join(LiveInterval &Other, I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]]; // Update val# info. Renumber them and make sure they all belong to this - // LiveInterval now. Also remove dead val#'s. + // LiveRange now. Also remove dead val#'s. unsigned NumValNos = 0; for (unsigned i = 0; i < NumNewVals; ++i) { VNInfo *VNI = NewVNInfo[i]; @@ -487,31 +480,31 @@ void LiveInterval::join(LiveInterval &Other, if (NumNewVals < NumVals) valnos.resize(NumNewVals); // shrinkify - // Okay, now insert the RHS live ranges into the LHS. + // Okay, now insert the RHS live segments into the LHS. LiveRangeUpdater Updater(this); for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) Updater.add(*I); } -/// MergeRangesInAsValue - Merge all of the intervals in RHS into this live -/// interval as the specified value number. The LiveRanges in RHS are -/// allowed to overlap with LiveRanges in the current interval, but only if -/// the overlapping LiveRanges have the specified value number. -void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, - VNInfo *LHSValNo) { +/// Merge all of the segments in RHS into this live range as the specified +/// value number. The segments in RHS are allowed to overlap with segments in +/// the current range, but only if the overlapping segments have the +/// specified value number. +void LiveRange::MergeSegmentsInAsValue(const LiveRange &RHS, + VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) Updater.add(I->start, I->end, LHSValNo); } -/// MergeValueInAsValue - Merge all of the live ranges of a specific val# -/// in RHS into this live interval as the specified value number. -/// The LiveRanges in RHS are allowed to overlap with LiveRanges in the -/// current interval, it will replace the value numbers of the overlaped -/// live ranges with the specified value number. -void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, - const VNInfo *RHSValNo, - VNInfo *LHSValNo) { +/// MergeValueInAsValue - Merge all of the live segments of a specific val# +/// in RHS into this live range as the specified value number. +/// The segments in RHS are allowed to overlap with segments in the +/// current range, it will replace the value numbers of the overlaped +/// segments with the specified value number. +void LiveRange::MergeValueInAsValue(const LiveRange &RHS, + const VNInfo *RHSValNo, + VNInfo *LHSValNo) { LiveRangeUpdater Updater(this); for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) if (I->valno == RHSValNo) @@ -520,9 +513,9 @@ void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, /// MergeValueNumberInto - This method is called when two value nubmers /// are found to be equivalent. This eliminates V1, replacing all -/// LiveRanges with the V1 value number with the V2 value number. This can +/// segments with the V1 value number with the V2 value number. This can /// cause merging of V1/V2 values numbers and compaction of the value space. -VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { +VNInfo *LiveRange::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { assert(V1 != V2 && "Identical value#'s are always equivalent!"); // This code actually merges the (numerically) larger value number into the @@ -536,37 +529,37 @@ VNInfo* LiveInterval::MergeValueNumberInto(VNInfo *V1, VNInfo *V2) { std::swap(V1, V2); } - // Merge V1 live ranges into V2. + // Merge V1 segments into V2. for (iterator I = begin(); I != end(); ) { - iterator LR = I++; - if (LR->valno != V1) continue; // Not a V1 LiveRange. + iterator S = I++; + if (S->valno != V1) continue; // Not a V1 Segment. // Okay, we found a V1 live range. If it had a previous, touching, V2 live // range, extend it. - if (LR != begin()) { - iterator Prev = LR-1; - if (Prev->valno == V2 && Prev->end == LR->start) { - Prev->end = LR->end; + if (S != begin()) { + iterator Prev = S-1; + if (Prev->valno == V2 && Prev->end == S->start) { + Prev->end = S->end; // Erase this live-range. - ranges.erase(LR); + segments.erase(S); I = Prev+1; - LR = Prev; + S = Prev; } } // Okay, now we have a V1 or V2 live range that is maximally merged forward. // Ensure that it is a V2 live-range. - LR->valno = V2; + S->valno = V2; - // If we can merge it into later V2 live ranges, do so now. We ignore any - // following V1 live ranges, as they will be merged in subsequent iterations + // If we can merge it into later V2 segments, do so now. We ignore any + // following V1 segments, as they will be merged in subsequent iterations // of the loop. if (I != end()) { - if (I->start == LR->end && I->valno == V2) { - LR->end = I->end; - ranges.erase(I); - I = LR+1; + if (I->start == S->end && I->valno == V2) { + S->end = I->end; + segments.erase(I); + I = S+1; } } } @@ -584,22 +577,21 @@ unsigned LiveInterval::getSize() const { return Sum; } -raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange &LR) { - return os << '[' << LR.start << ',' << LR.end << ':' << LR.valno->id << ")"; +raw_ostream& llvm::operator<<(raw_ostream& os, const LiveRange::Segment &S) { + return os << '[' << S.start << ',' << S.end << ':' << S.valno->id << ")"; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -void LiveRange::dump() const { +void LiveRange::Segment::dump() const { dbgs() << *this << "\n"; } #endif -void LiveInterval::print(raw_ostream &OS) const { +void LiveRange::print(raw_ostream &OS) const { if (empty()) OS << "EMPTY"; else { - for (LiveInterval::Ranges::const_iterator I = ranges.begin(), - E = ranges.end(); I != E; ++I) { + for (const_iterator I = begin(), E = end(); I != E; ++I) { OS << *I; assert(I->valno == getValNumInfo(I->valno->id) && "Bad VNInfo"); } @@ -625,19 +617,29 @@ void LiveInterval::print(raw_ostream &OS) const { } } +void LiveInterval::print(raw_ostream &OS) const { + OS << PrintReg(reg) << ' '; + super::print(OS); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +void LiveRange::dump() const { + dbgs() << *this << "\n"; +} + void LiveInterval::dump() const { dbgs() << *this << "\n"; } #endif #ifndef NDEBUG -void LiveInterval::verify() const { +void LiveRange::verify() const { for (const_iterator I = begin(), E = end(); I != E; ++I) { assert(I->start.isValid()); assert(I->end.isValid()); assert(I->start < I->end); assert(I->valno != 0); + assert(I->valno->id < valnos.size()); assert(I->valno == valnos[I->valno->id]); if (llvm::next(I) != E) { assert(I->end <= llvm::next(I)->start); @@ -649,10 +651,6 @@ void LiveInterval::verify() const { #endif -void LiveRange::print(raw_ostream &os) const { - os << *this; -} - //===----------------------------------------------------------------------===// // LiveRangeUpdater class //===----------------------------------------------------------------------===// @@ -665,11 +663,11 @@ void LiveRange::print(raw_ostream &os) const { // // Otherwise, segments are kept in three separate areas: // -// 1. [begin; WriteI) at the front of LI. -// 2. [ReadI; end) at the back of LI. +// 1. [begin; WriteI) at the front of LR. +// 2. [ReadI; end) at the back of LR. // 3. Spills. // -// - LI.begin() <= WriteI <= ReadI <= LI.end(). +// - LR.begin() <= WriteI <= ReadI <= LR.end(). // - Segments in all three areas are fully ordered and coalesced. // - Segments in area 1 precede and can't coalesce with segments in area 2. // - Segments in Spills precede and can't coalesce with segments in area 2. @@ -684,23 +682,23 @@ void LiveRange::print(raw_ostream &os) const { void LiveRangeUpdater::print(raw_ostream &OS) const { if (!isDirty()) { - if (LI) - OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n'; + if (LR) + OS << "Clean updater: " << *LR << '\n'; else OS << "Null updater.\n"; return; } - assert(LI && "Can't have null LI in dirty updater."); - OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI) + assert(LR && "Can't have null LR in dirty updater."); + OS << " updater with gap = " << (ReadI - WriteI) << ", last start = " << LastStart << ":\n Area 1:"; - for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I) + for (LiveRange::const_iterator I = LR->begin(); I != WriteI; ++I) OS << ' ' << *I; OS << "\n Spills:"; for (unsigned I = 0, E = Spills.size(); I != E; ++I) OS << ' ' << Spills[I]; OS << "\n Area 2:"; - for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I) + for (LiveRange::const_iterator I = ReadI, E = LR->end(); I != E; ++I) OS << ' ' << *I; OS << '\n'; } @@ -711,8 +709,9 @@ void LiveRangeUpdater::dump() const } // Determine if A and B should be coalesced. -static inline bool coalescable(const LiveRange &A, const LiveRange &B) { - assert(A.start <= B.start && "Unordered live ranges."); +static inline bool coalescable(const LiveRange::Segment &A, + const LiveRange::Segment &B) { + assert(A.start <= B.start && "Unordered live segments."); if (A.end == B.start) return A.valno == B.valno; if (A.end < B.start) @@ -721,8 +720,8 @@ static inline bool coalescable(const LiveRange &A, const LiveRange &B) { return true; } -void LiveRangeUpdater::add(LiveRange Seg) { - assert(LI && "Cannot add to a null destination"); +void LiveRangeUpdater::add(LiveRange::Segment Seg) { + assert(LR && "Cannot add to a null destination"); // Flush the state if Start moves backwards. if (!LastStart.isValid() || LastStart > Seg.start) { @@ -730,21 +729,21 @@ void LiveRangeUpdater::add(LiveRange Seg) { flush(); // This brings us to an uninitialized state. Reinitialize. assert(Spills.empty() && "Leftover spilled segments"); - WriteI = ReadI = LI->begin(); + WriteI = ReadI = LR->begin(); } // Remember start for next time. LastStart = Seg.start; // Advance ReadI until it ends after Seg.start. - LiveInterval::iterator E = LI->end(); + LiveRange::iterator E = LR->end(); if (ReadI != E && ReadI->end <= Seg.start) { // First try to close the gap between WriteI and ReadI with spills. if (ReadI != WriteI) mergeSpills(); // Then advance ReadI. if (ReadI == WriteI) - ReadI = WriteI = LI->find(Seg.start); + ReadI = WriteI = LR->find(Seg.start); else while (ReadI != E && ReadI->end <= Seg.start) *WriteI++ = *ReadI++; @@ -777,7 +776,7 @@ void LiveRangeUpdater::add(LiveRange Seg) { } // Try coalescing Seg into WriteI[-1]. - if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) { + if (WriteI != LR->begin() && coalescable(WriteI[-1], Seg)) { WriteI[-1].end = std::max(WriteI[-1].end, Seg.end); return; } @@ -788,10 +787,10 @@ void LiveRangeUpdater::add(LiveRange Seg) { return; } - // Finally, append to LI or Spills. + // Finally, append to LR or Spills. if (WriteI == E) { - LI->ranges.push_back(Seg); - WriteI = ReadI = LI->ranges.end(); + LR->segments.push_back(Seg); + WriteI = ReadI = LR->end(); } else Spills.push_back(Seg); } @@ -802,10 +801,10 @@ void LiveRangeUpdater::mergeSpills() { // Perform a backwards merge of Spills and [SpillI;WriteI). size_t GapSize = ReadI - WriteI; size_t NumMoved = std::min(Spills.size(), GapSize); - LiveInterval::iterator Src = WriteI; - LiveInterval::iterator Dst = Src + NumMoved; - LiveInterval::iterator SpillSrc = Spills.end(); - LiveInterval::iterator B = LI->begin(); + LiveRange::iterator Src = WriteI; + LiveRange::iterator Dst = Src + NumMoved; + LiveRange::iterator SpillSrc = Spills.end(); + LiveRange::iterator B = LR->begin(); // This is the new WriteI position after merging spills. WriteI = Dst; @@ -827,12 +826,12 @@ void LiveRangeUpdater::flush() { // Clear the dirty state. LastStart = SlotIndex(); - assert(LI && "Cannot add to a null destination"); + assert(LR && "Cannot add to a null destination"); // Nothing to merge? if (Spills.empty()) { - LI->ranges.erase(WriteI, ReadI); - LI->verify(); + LR->segments.erase(WriteI, ReadI); + LR->verify(); return; } @@ -840,17 +839,17 @@ void LiveRangeUpdater::flush() { size_t GapSize = ReadI - WriteI; if (GapSize < Spills.size()) { // The gap is too small. Make some room. - size_t WritePos = WriteI - LI->begin(); - LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange()); + size_t WritePos = WriteI - LR->begin(); + LR->segments.insert(ReadI, Spills.size() - GapSize, LiveRange::Segment()); // This also invalidated ReadI, but it is recomputed below. - WriteI = LI->ranges.begin() + WritePos; + WriteI = LR->begin() + WritePos; } else { // Shrink the gap if necessary. - LI->ranges.erase(WriteI + Spills.size(), ReadI); + LR->segments.erase(WriteI + Spills.size(), ReadI); } ReadI = WriteI + Spills.size(); mergeSpills(); - LI->verify(); + LR->verify(); } unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { @@ -909,8 +908,16 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], MachineOperand &MO = RI.getOperand(); MachineInstr *MI = MO.getParent(); ++RI; - // DBG_VALUE instructions should have been eliminated earlier. - LiveRangeQuery LRQ(LI, LIS.getInstructionIndex(MI)); + // DBG_VALUE instructions don't have slot indexes, so get the index of the + // instruction before them. + // Normally, DBG_VALUE instructions are removed before this function is + // called, but it is not a requirement. + SlotIndex Idx; + if (MI->isDebugValue()) + Idx = LIS.getSlotIndexes()->getIndexBefore(MI); + else + Idx = LIS.getInstructionIndex(MI); + LiveQueryResult LRQ = LI.Query(Idx); const VNInfo *VNI = MO.readsReg() ? LRQ.valueIn() : LRQ.valueDefined(); // In the case of an use that isn't tied to any def, VNI will be // NULL. If the use is tied to a def, VNI will be the defined value. @@ -927,11 +934,11 @@ void ConnectedVNInfoEqClasses::Distribute(LiveInterval *LIV[], if (unsigned eq = EqClass[I->valno->id]) { assert((LIV[eq]->empty() || LIV[eq]->expiredAt(I->start)) && "New intervals should be empty"); - LIV[eq]->ranges.push_back(*I); + LIV[eq]->segments.push_back(*I); } else *J++ = *I; } - LI.ranges.erase(J, E); + LI.segments.erase(J, E); // Transfer VNInfos to their new owners and renumber them. unsigned j = 0, e = LI.getNumValNums(); diff --git a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp index f1b8394..e1c3217 100644 --- a/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/contrib/llvm/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/VirtRegMap.h" #include "llvm/IR/Value.h" +#include "llvm/Support/BlockFrequency.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -51,6 +52,14 @@ INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_END(LiveIntervals, "liveintervals", "Live Interval Analysis", false, false) +#ifndef NDEBUG +static cl::opt EnablePrecomputePhysRegs( + "precompute-phys-liveness", cl::Hidden, + cl::desc("Eagerly compute live intervals for all physreg units.")); +#else +static bool EnablePrecomputePhysRegs = false; +#endif // NDEBUG + void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); @@ -86,15 +95,15 @@ void LiveIntervals::releaseMemory() { RegMaskBits.clear(); RegMaskBlocks.clear(); - for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) - delete RegUnitIntervals[i]; - RegUnitIntervals.clear(); + for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i) + delete RegUnitRanges[i]; + RegUnitRanges.clear(); // Release VNInfo memory regions, VNInfo objects don't need to be dtor'd. VNInfoAllocator.Reset(); } -/// runOnMachineFunction - Register allocate the whole function +/// runOnMachineFunction - calculates LiveIntervals /// bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { MF = &fn; @@ -115,6 +124,12 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { computeRegMasks(); computeLiveInRegUnits(); + if (EnablePrecomputePhysRegs) { + // For stress testing, precompute live ranges of all physical register + // units, including reserved registers. + for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) + getRegUnit(i); + } DEBUG(dump()); return true; } @@ -124,15 +139,15 @@ void LiveIntervals::print(raw_ostream &OS, const Module* ) const { OS << "********** INTERVALS **********\n"; // Dump the regunits. - for (unsigned i = 0, e = RegUnitIntervals.size(); i != e; ++i) - if (LiveInterval *LI = RegUnitIntervals[i]) - OS << PrintRegUnit(i, TRI) << " = " << *LI << '\n'; + for (unsigned i = 0, e = RegUnitRanges.size(); i != e; ++i) + if (LiveRange *LR = RegUnitRanges[i]) + OS << PrintRegUnit(i, TRI) << ' ' << *LR << '\n'; // Dump the virtregs. for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (hasInterval(Reg)) - OS << PrintReg(Reg) << " = " << getInterval(Reg) << '\n'; + OS << getInterval(Reg) << '\n'; } OS << "RegMasks:"; @@ -155,16 +170,17 @@ void LiveIntervals::dumpInstrs() const { #endif LiveInterval* LiveIntervals::createInterval(unsigned reg) { - float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F; + float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? + llvm::huge_valf : 0.0F; return new LiveInterval(reg, Weight); } /// computeVirtRegInterval - Compute the live interval of a virtual register, /// based on defs and uses. -void LiveIntervals::computeVirtRegInterval(LiveInterval *LI) { +void LiveIntervals::computeVirtRegInterval(LiveInterval &LI) { assert(LRCalc && "LRCalc not initialized."); - assert(LI->empty() && "Should only compute empty intervals."); + assert(LI.empty() && "Should only compute empty intervals."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); LRCalc->createDeadDefs(LI); LRCalc->extendToUses(LI); @@ -175,9 +191,7 @@ void LiveIntervals::computeVirtRegs() { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); if (MRI->reg_nodbg_empty(Reg)) continue; - LiveInterval *LI = createInterval(Reg); - VirtRegIntervals[Reg] = LI; - computeVirtRegInterval(LI); + createAndComputeVirtRegInterval(Reg); } } @@ -214,12 +228,10 @@ void LiveIntervals::computeRegMasks() { // interference. // -/// computeRegUnitInterval - Compute the live interval of a register unit, based -/// on the uses and defs of aliasing registers. The interval should be empty, +/// computeRegUnitInterval - Compute the live range of a register unit, based +/// on the uses and defs of aliasing registers. The range should be empty, /// or contain only dead phi-defs from ABI blocks. -void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { - unsigned Unit = LI->reg; - +void LiveIntervals::computeRegUnitRange(LiveRange &LR, unsigned Unit) { assert(LRCalc && "LRCalc not initialized."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); @@ -229,25 +241,21 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { // idempotent. It is very rare for a register unit to have multiple roots, so // uniquing super-registers is probably not worthwhile. for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { - unsigned Root = *Roots; - if (!MRI->reg_empty(Root)) - LRCalc->createDeadDefs(LI, Root); - for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true); + Supers.isValid(); ++Supers) { if (!MRI->reg_empty(*Supers)) - LRCalc->createDeadDefs(LI, *Supers); + LRCalc->createDeadDefs(LR, *Supers); } } - // Now extend LI to reach all uses. + // Now extend LR to reach all uses. // Ignore uses of reserved registers. We only track defs of those. for (MCRegUnitRootIterator Roots(Unit, TRI); Roots.isValid(); ++Roots) { - unsigned Root = *Roots; - if (!MRI->isReserved(Root) && !MRI->reg_empty(Root)) - LRCalc->extendToUses(LI, Root); - for (MCSuperRegIterator Supers(Root, TRI); Supers.isValid(); ++Supers) { + for (MCSuperRegIterator Supers(*Roots, TRI, /*IncludeSelf=*/true); + Supers.isValid(); ++Supers) { unsigned Reg = *Supers; if (!MRI->isReserved(Reg) && !MRI->reg_empty(Reg)) - LRCalc->extendToUses(LI, Reg); + LRCalc->extendToUses(LR, Reg); } } } @@ -258,11 +266,11 @@ void LiveIntervals::computeRegUnitInterval(LiveInterval *LI) { /// without a corresponding def when entering the entry block or a landing pad. /// void LiveIntervals::computeLiveInRegUnits() { - RegUnitIntervals.resize(TRI->getNumRegUnits()); + RegUnitRanges.resize(TRI->getNumRegUnits()); DEBUG(dbgs() << "Computing live-in reg-units in ABI blocks.\n"); - // Keep track of the intervals allocated. - SmallVector NewIntvs; + // Keep track of the live range sets allocated. + SmallVector NewRanges; // Check all basic blocks for live-ins. for (MachineFunction::const_iterator MFI = MF->begin(), MFE = MF->end(); @@ -280,23 +288,25 @@ void LiveIntervals::computeLiveInRegUnits() { LIE = MBB->livein_end(); LII != LIE; ++LII) { for (MCRegUnitIterator Units(*LII, TRI); Units.isValid(); ++Units) { unsigned Unit = *Units; - LiveInterval *Intv = RegUnitIntervals[Unit]; - if (!Intv) { - Intv = RegUnitIntervals[Unit] = new LiveInterval(Unit, HUGE_VALF); - NewIntvs.push_back(Intv); + LiveRange *LR = RegUnitRanges[Unit]; + if (!LR) { + LR = RegUnitRanges[Unit] = new LiveRange(); + NewRanges.push_back(Unit); } - VNInfo *VNI = Intv->createDeadDef(Begin, getVNInfoAllocator()); + VNInfo *VNI = LR->createDeadDef(Begin, getVNInfoAllocator()); (void)VNI; DEBUG(dbgs() << ' ' << PrintRegUnit(Unit, TRI) << '#' << VNI->id); } } DEBUG(dbgs() << '\n'); } - DEBUG(dbgs() << "Created " << NewIntvs.size() << " new intervals.\n"); + DEBUG(dbgs() << "Created " << NewRanges.size() << " new intervals.\n"); - // Compute the 'normal' part of the intervals. - for (unsigned i = 0, e = NewIntvs.size(); i != e; ++i) - computeRegUnitInterval(NewIntvs[i]); + // Compute the 'normal' part of the ranges. + for (unsigned i = 0, e = NewRanges.size(); i != e; ++i) { + unsigned Unit = NewRanges[i]; + computeRegUnitRange(*RegUnitRanges[Unit], Unit); + } } @@ -320,7 +330,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, if (UseMI->isDebugValue() || !UseMI->readsVirtualRegister(li->reg)) continue; SlotIndex Idx = getInstructionIndex(UseMI).getRegSlot(); - LiveRangeQuery LRQ(*li, Idx); + LiveQueryResult LRQ = li->Query(Idx); VNInfo *VNI = LRQ.valueIn(); if (!VNI) { // This shouldn't happen: readsVirtualRegister returns true, but there is @@ -339,14 +349,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, WorkList.push_back(std::make_pair(Idx, VNI)); } - // Create a new live interval with only minimal live segments per def. - LiveInterval NewLI(li->reg, 0); + // Create new live ranges with only minimal live segments per def. + LiveRange NewLR; for (LiveInterval::vni_iterator I = li->vni_begin(), E = li->vni_end(); I != E; ++I) { VNInfo *VNI = *I; if (VNI->isUnused()) continue; - NewLI.addRange(LiveRange(VNI->def, VNI->def.getDeadSlot(), VNI)); + NewLR.addSegment(LiveRange::Segment(VNI->def, VNI->def.getDeadSlot(), VNI)); } // Keep track of the PHIs that are in use. @@ -361,7 +371,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, SlotIndex BlockStart = getMBBStartIdx(MBB); // Extend the live range for VNI to be live at Idx. - if (VNInfo *ExtVNI = NewLI.extendInBlock(BlockStart, Idx)) { + if (VNInfo *ExtVNI = NewLR.extendInBlock(BlockStart, Idx)) { (void)ExtVNI; assert(ExtVNI == VNI && "Unexpected existing value number"); // Is this a PHIDef we haven't seen before? @@ -382,7 +392,7 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, // VNI is live-in to MBB. DEBUG(dbgs() << " live-in at " << BlockStart << '\n'); - NewLI.addRange(LiveRange(BlockStart, Idx, VNI)); + NewLR.addSegment(LiveRange::Segment(BlockStart, Idx, VNI)); // Make sure VNI is live-out from the predecessors. for (MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), @@ -403,14 +413,14 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, VNInfo *VNI = *I; if (VNI->isUnused()) continue; - LiveInterval::iterator LII = NewLI.FindLiveRangeContaining(VNI->def); - assert(LII != NewLI.end() && "Missing live range for PHI"); - if (LII->end != VNI->def.getDeadSlot()) + LiveRange::iterator LRI = NewLR.FindSegmentContaining(VNI->def); + assert(LRI != NewLR.end() && "Missing segment for PHI"); + if (LRI->end != VNI->def.getDeadSlot()) continue; if (VNI->isPHIDef()) { // This is a dead PHI. Remove it. VNI->markUnused(); - NewLI.removeRange(*LII); + NewLR.removeSegment(LRI->start, LRI->end); DEBUG(dbgs() << "Dead PHI at " << VNI->def << " may separate interval\n"); CanSeparate = true; } else { @@ -425,23 +435,23 @@ bool LiveIntervals::shrinkToUses(LiveInterval *li, } } - // Move the trimmed ranges back. - li->ranges.swap(NewLI.ranges); + // Move the trimmed segments back. + li->segments.swap(NewLR.segments); DEBUG(dbgs() << "Shrunk: " << *li << '\n'); return CanSeparate; } -void LiveIntervals::extendToIndices(LiveInterval *LI, +void LiveIntervals::extendToIndices(LiveRange &LR, ArrayRef Indices) { assert(LRCalc && "LRCalc not initialized."); LRCalc->reset(MF, getSlotIndexes(), DomTree, &getVNInfoAllocator()); for (unsigned i = 0, e = Indices.size(); i != e; ++i) - LRCalc->extend(LI, Indices[i]); + LRCalc->extend(LR, Indices[i]); } void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, SmallVectorImpl *EndPoints) { - LiveRangeQuery LRQ(*LI, Kill); + LiveQueryResult LRQ = LI->Query(Kill); VNInfo *VNI = LRQ.valueOut(); if (!VNI) return; @@ -452,13 +462,13 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, // If VNI isn't live out from KillMBB, the value is trivially pruned. if (LRQ.endPoint() < MBBEnd) { - LI->removeRange(Kill, LRQ.endPoint()); + LI->removeSegment(Kill, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); return; } // VNI is live out of KillMBB. - LI->removeRange(Kill, MBBEnd); + LI->removeSegment(Kill, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); // Find all blocks that are reachable from KillMBB without leaving VNI's live @@ -476,23 +486,23 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, // Check if VNI is live in to MBB. tie(MBBStart, MBBEnd) = Indexes->getMBBRange(MBB); - LiveRangeQuery LRQ(*LI, MBBStart); + LiveQueryResult LRQ = LI->Query(MBBStart); if (LRQ.valueIn() != VNI) { - // This block isn't part of the VNI live range. Prune the search. + // This block isn't part of the VNI segment. Prune the search. I.skipChildren(); continue; } // Prune the search if VNI is killed in MBB. if (LRQ.endPoint() < MBBEnd) { - LI->removeRange(MBBStart, LRQ.endPoint()); + LI->removeSegment(MBBStart, LRQ.endPoint()); if (EndPoints) EndPoints->push_back(LRQ.endPoint()); I.skipChildren(); continue; } // VNI is live through MBB. - LI->removeRange(MBBStart, MBBEnd); + LI->removeSegment(MBBStart, MBBEnd); if (EndPoints) EndPoints->push_back(MBBEnd); ++I; } @@ -505,7 +515,7 @@ void LiveIntervals::pruneValue(LiveInterval *LI, SlotIndex Kill, void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // Keep track of regunit ranges. - SmallVector, 8> RU; + SmallVector, 8> RU; for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { unsigned Reg = TargetRegisterInfo::index2VirtReg(i); @@ -520,13 +530,14 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { RU.clear(); for (MCRegUnitIterator Units(VRM->getPhys(Reg), TRI); Units.isValid(); ++Units) { - LiveInterval *RUInt = &getRegUnit(*Units); - if (RUInt->empty()) + LiveRange &RURanges = getRegUnit(*Units); + if (RURanges.empty()) continue; - RU.push_back(std::make_pair(RUInt, RUInt->find(LI->begin()->end))); + RU.push_back(std::make_pair(&RURanges, RURanges.find(LI->begin()->end))); } - // Every instruction that kills Reg corresponds to a live range end point. + // Every instruction that kills Reg corresponds to a segment range end + // point. for (LiveInterval::iterator RI = LI->begin(), RE = LI->end(); RI != RE; ++RI) { // A block index indicates an MBB edge. @@ -536,7 +547,7 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { if (!MI) continue; - // Check if any of the reguints are live beyond the end of RI. That could + // Check if any of the regunits are live beyond the end of RI. That could // happen when a physreg is defined as a copy of a virtreg: // // %EAX = COPY %vreg5 @@ -546,12 +557,12 @@ void LiveIntervals::addKillFlags(const VirtRegMap *VRM) { // There should be no kill flag on FOO when %vreg5 is rewritten as %EAX. bool CancelKill = false; for (unsigned u = 0, e = RU.size(); u != e; ++u) { - LiveInterval *RInt = RU[u].first; - LiveInterval::iterator &I = RU[u].second; - if (I == RInt->end()) + LiveRange &RRanges = *RU[u].first; + LiveRange::iterator &I = RU[u].second; + if (I == RRanges.end()) continue; - I = RInt->advanceTo(I, RI->end); - if (I == RInt->end() || I->start >= RI->end) + I = RRanges.advanceTo(I, RI->end); + if (I == RRanges.end() || I->start >= RI->end) continue; // I is overlapping RI. CancelKill = true; @@ -609,35 +620,23 @@ LiveIntervals::hasPHIKill(const LiveInterval &LI, const VNInfo *VNI) const { } float -LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) { - // Limit the loop depth ridiculousness. - if (loopDepth > 200) - loopDepth = 200; - - // The loop depth is used to roughly estimate the number of times the - // instruction is executed. Something like 10^d is simple, but will quickly - // overflow a float. This expression behaves like 10^d for small d, but is - // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of - // headroom before overflow. - // By the way, powf() might be unavailable here. For consistency, - // We may take pow(double,double). - float lc = std::pow(1 + (100.0 / (loopDepth + 10)), (double)loopDepth); - - return (isDef + isUse) * lc; +LiveIntervals::getSpillWeight(bool isDef, bool isUse, BlockFrequency freq) { + const float Scale = 1.0f / BlockFrequency::getEntryFrequency(); + return (isDef + isUse) * (freq.getFrequency() * Scale); } -LiveRange LiveIntervals::addLiveRangeToEndOfBlock(unsigned reg, - MachineInstr* startInst) { - LiveInterval& Interval = getOrCreateInterval(reg); +LiveRange::Segment +LiveIntervals::addSegmentToEndOfBlock(unsigned reg, MachineInstr* startInst) { + LiveInterval& Interval = createEmptyInterval(reg); VNInfo* VN = Interval.getNextValue( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getVNInfoAllocator()); - LiveRange LR( + LiveRange::Segment S( SlotIndex(getInstructionIndex(startInst).getRegSlot()), getMBBEndIdx(startInst->getParent()), VN); - Interval.addRange(LR); + Interval.addSegment(S); - return LR; + return S; } @@ -712,7 +711,7 @@ private: const TargetRegisterInfo& TRI; SlotIndex OldIdx; SlotIndex NewIdx; - SmallPtrSet Updated; + SmallPtrSet Updated; bool UpdateFlags; public: @@ -726,7 +725,7 @@ public: // physregs, even those that aren't needed for regalloc, in order to update // kill flags. This is wasteful. Eventually, LiveVariables will strip all kill // flags, and postRA passes will use a live register utility instead. - LiveInterval *getRegUnitLI(unsigned Unit) { + LiveRange *getRegUnitLI(unsigned Unit) { if (UpdateFlags) return &LIS.getRegUnit(Unit); return LIS.getCachedRegUnit(Unit); @@ -751,15 +750,16 @@ public: if (!Reg) continue; if (TargetRegisterInfo::isVirtualRegister(Reg)) { - updateRange(LIS.getInterval(Reg)); + LiveInterval &LI = LIS.getInterval(Reg); + updateRange(LI, Reg); continue; } // For physregs, only update the regunits that actually have a // precomputed live range. for (MCRegUnitIterator Units(Reg, &TRI); Units.isValid(); ++Units) - if (LiveInterval *LI = getRegUnitLI(*Units)) - updateRange(*LI); + if (LiveRange *LR = getRegUnitLI(*Units)) + updateRange(*LR, *Units); } if (hasRegMask) updateRegMaskSlots(); @@ -768,26 +768,26 @@ public: private: /// Update a single live range, assuming an instruction has been moved from /// OldIdx to NewIdx. - void updateRange(LiveInterval &LI) { - if (!Updated.insert(&LI)) + void updateRange(LiveRange &LR, unsigned Reg) { + if (!Updated.insert(&LR)) return; DEBUG({ dbgs() << " "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - dbgs() << PrintReg(LI.reg); + if (TargetRegisterInfo::isVirtualRegister(Reg)) + dbgs() << PrintReg(Reg); else - dbgs() << PrintRegUnit(LI.reg, &TRI); - dbgs() << ":\t" << LI << '\n'; + dbgs() << PrintRegUnit(Reg, &TRI); + dbgs() << ":\t" << LR << '\n'; }); if (SlotIndex::isEarlierInstr(OldIdx, NewIdx)) - handleMoveDown(LI); + handleMoveDown(LR); else - handleMoveUp(LI); - DEBUG(dbgs() << " -->\t" << LI << '\n'); - LI.verify(); + handleMoveUp(LR, Reg); + DEBUG(dbgs() << " -->\t" << LR << '\n'); + LR.verify(); } - /// Update LI to reflect an instruction has been moved downwards from OldIdx + /// Update LR to reflect an instruction has been moved downwards from OldIdx /// to NewIdx. /// /// 1. Live def at OldIdx: @@ -801,17 +801,17 @@ private: /// Move def to NewIdx, possibly across another live value. /// /// 4. Def at OldIdx AND at NewIdx: - /// Remove live range [OldIdx;NewIdx) and value defined at OldIdx. + /// Remove segment [OldIdx;NewIdx) and value defined at OldIdx. /// (Happens when bundling multiple defs together). /// /// 5. Value read at OldIdx, killed before NewIdx: /// Extend kill to NewIdx. /// - void handleMoveDown(LiveInterval &LI) { + void handleMoveDown(LiveRange &LR) { // First look for a kill at OldIdx. - LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); - LiveInterval::iterator E = LI.end(); - // Is LI even live at OldIdx? + LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); + LiveRange::iterator E = LR.end(); + // Is LR even live at OldIdx? if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) return; @@ -828,7 +828,7 @@ private: for (MIBundleOperands MO(KillMI); MO.isValid(); ++MO) if (MO->isReg() && MO->isUse()) MO->setIsKill(false); - // Adjust I->end to reach NewIdx. This may temporarily make LI invalid by + // Adjust I->end to reach NewIdx. This may temporarily make LR invalid by // overlapping ranges. Case 5 above. I->end = NewIdx.getRegSlot(I->end.isEarlyClobber()); // If this was a kill, there may also be a def. Otherwise we're done. @@ -857,24 +857,25 @@ private: assert((I->end == OldIdx.getDeadSlot() || SlotIndex::isSameInstr(I->end, NewIdx)) && "Cannot move def below kill"); - LiveInterval::iterator NewI = LI.advanceTo(I, NewIdx.getRegSlot()); + LiveRange::iterator NewI = LR.advanceTo(I, NewIdx.getRegSlot()); if (NewI != E && SlotIndex::isSameInstr(NewI->start, NewIdx)) { // There is an existing def at NewIdx, case 4 above. The def at OldIdx is // coalesced into that value. assert(NewI->valno != DefVNI && "Multiple defs of value?"); - LI.removeValNo(DefVNI); + LR.removeValNo(DefVNI); return; } // There was no existing def at NewIdx. Turn *I into a dead def at NewIdx. - // If the def at OldIdx was dead, we allow it to be moved across other LI + // If the def at OldIdx was dead, we allow it to be moved across other LR // values. The new range should be placed immediately before NewI, move any // intermediate ranges up. assert(NewI != I && "Inconsistent iterators"); std::copy(llvm::next(I), NewI, I); - *llvm::prior(NewI) = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + *llvm::prior(NewI) + = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } - /// Update LI to reflect an instruction has been moved upwards from OldIdx + /// Update LR to reflect an instruction has been moved upwards from OldIdx /// to NewIdx. /// /// 1. Live def at OldIdx: @@ -894,11 +895,11 @@ private: /// Hoist kill to NewIdx, then scan for last kill between NewIdx and /// OldIdx. /// - void handleMoveUp(LiveInterval &LI) { + void handleMoveUp(LiveRange &LR, unsigned Reg) { // First look for a kill at OldIdx. - LiveInterval::iterator I = LI.find(OldIdx.getBaseIndex()); - LiveInterval::iterator E = LI.end(); - // Is LI even live at OldIdx? + LiveRange::iterator I = LR.find(OldIdx.getBaseIndex()); + LiveRange::iterator E = LR.end(); + // Is LR even live at OldIdx? if (I == E || SlotIndex::isEarlierInstr(OldIdx, I->start)) return; @@ -915,7 +916,7 @@ private: if (I == E || !SlotIndex::isSameInstr(I->start, OldIdx)) { // No def, search for the new kill. // This can never be an early clobber kill since there is no def. - llvm::prior(I)->end = findLastUseBefore(LI.reg).getRegSlot(); + llvm::prior(I)->end = findLastUseBefore(Reg).getRegSlot(); return; } } @@ -927,18 +928,18 @@ private: DefVNI->def = NewIdx.getRegSlot(I->start.isEarlyClobber()); // Check for an existing def at NewIdx. - LiveInterval::iterator NewI = LI.find(NewIdx.getRegSlot()); + LiveRange::iterator NewI = LR.find(NewIdx.getRegSlot()); if (SlotIndex::isSameInstr(NewI->start, NewIdx)) { assert(NewI->valno != DefVNI && "Same value defined more than once?"); // There is an existing def at NewIdx. if (I->end.isDead()) { // Case 3: Remove the dead def at OldIdx. - LI.removeValNo(DefVNI); + LR.removeValNo(DefVNI); return; } // Case 4: Replace def at NewIdx with live def at OldIdx. I->start = DefVNI->def; - LI.removeValNo(NewI->valno); + LR.removeValNo(NewI->valno); return; } @@ -949,10 +950,10 @@ private: return; } - // DefVNI is a dead def. It may have been moved across other values in LI, + // DefVNI is a dead def. It may have been moved across other values in LR, // so move I up to NewI. Slide [NewI;I) down one position. std::copy_backward(NewI, I, llvm::next(I)); - *NewI = LiveRange(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); + *NewI = LiveRange::Segment(DefVNI->def, NewIdx.getDeadSlot(), DefVNI); } void updateRegMaskSlots() { @@ -1075,8 +1076,7 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (MOI->isReg() && TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && !hasInterval(MOI->getReg())) { - LiveInterval &LI = getOrCreateInterval(MOI->getReg()); - computeVirtRegInterval(&LI); + createAndComputeVirtRegInterval(MOI->getReg()); } } } @@ -1123,9 +1123,9 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (LII != LI.begin()) prevStart = llvm::prior(LII)->start; - // FIXME: This could be more efficient if there was a removeRange - // method that returned an iterator. - LI.removeRange(*LII, true); + // FIXME: This could be more efficient if there was a + // removeSegment method that returned an iterator. + LI.removeSegment(*LII, true); if (prevStart.isValid()) LII = LI.find(prevStart); else @@ -1144,13 +1144,14 @@ LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, if (!lastUseIdx.isValid()) { VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); - LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI); - LII = LI.addRange(LR); + LiveRange::Segment S(instrIdx.getRegSlot(), + instrIdx.getDeadSlot(), VNI); + LII = LI.addSegment(S); } else if (LII->start != instrIdx.getRegSlot()) { VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), VNInfoAllocator); - LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI); - LII = LI.addRange(LR); + LiveRange::Segment S(instrIdx.getRegSlot(), lastUseIdx, VNI); + LII = LI.addSegment(S); } if (MO.getSubReg() && !MO.isUndef()) diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp index dede490..ae086bc 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.cpp @@ -36,11 +36,11 @@ void LiveRangeCalc::reset(const MachineFunction *mf, } -void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { +void LiveRangeCalc::createDeadDefs(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); // Visit all def operands. If the same instruction has multiple defs of Reg, - // LI->createDeadDef() will deduplicate. + // LR.createDeadDef() will deduplicate. for (MachineRegisterInfo::def_iterator I = MRI->def_begin(Reg), E = MRI->def_end(); I != E; ++I) { const MachineInstr *MI = &*I; @@ -54,13 +54,13 @@ void LiveRangeCalc::createDeadDefs(LiveInterval *LI, unsigned Reg) { Idx = Indexes->getInstructionIndex(MI) .getRegSlot(I.getOperand().isEarlyClobber()); - // Create the def in LI. This may find an existing def. - LI->createDeadDef(Idx, *Alloc); + // Create the def in LR. This may find an existing def. + LR.createDeadDef(Idx, *Alloc); } } -void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { +void LiveRangeCalc::extendToUses(LiveRange &LR, unsigned Reg) { assert(MRI && Indexes && "call reset() first"); // Visit all operands that read Reg. This may include partial defs. @@ -99,7 +99,7 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { Idx = Idx.getRegSlot(true); } } - extend(LI, Idx, Reg); + extend(LR, Idx, Reg); } } @@ -125,17 +125,14 @@ void LiveRangeCalc::updateLiveIns() { assert(Seen.test(MBB->getNumber())); LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0); } - Updater.setDest(I->LI); + Updater.setDest(&I->LR); Updater.add(Start, End, I->Value); } LiveIn.clear(); } -void LiveRangeCalc::extend(LiveInterval *LI, - SlotIndex Kill, - unsigned PhysReg) { - assert(LI && "Missing live range"); +void LiveRangeCalc::extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg) { assert(Kill.isValid() && "Invalid SlotIndex"); assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); @@ -144,14 +141,14 @@ void LiveRangeCalc::extend(LiveInterval *LI, assert(KillMBB && "No MBB at Kill"); // Is there a def in the same MBB we can extend? - if (LI->extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) + if (LR.extendInBlock(Indexes->getMBBStartIdx(KillMBB), Kill)) return; // Find the single reaching def, or determine if Kill is jointly dominated by // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - if (findReachingDefs(LI, KillMBB, Kill, PhysReg)) + if (findReachingDefs(LR, *KillMBB, Kill, PhysReg)) return; // When there were multiple different values, we may need new PHIs. @@ -170,13 +167,11 @@ void LiveRangeCalc::calculateValues() { } -bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg) { - unsigned KillMBBNum = KillMBB->getNumber(); +bool LiveRangeCalc::findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, + SlotIndex Kill, unsigned PhysReg) { + unsigned KillMBBNum = KillMBB.getNumber(); - // Block numbers where LI should be live-in. + // Block numbers where LR should be live-in. SmallVector WorkList(1, KillMBBNum); // Remember if we have seen more than one value. @@ -203,7 +198,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, #endif for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { + PE = MBB->pred_end(); PI != PE; ++PI) { MachineBasicBlock *Pred = *PI; // Is this a known live-out block? @@ -221,7 +216,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, // First time we see Pred. Try to determine the live-out value, but set // it as null if Pred is live-through with an unknown value. - VNInfo *VNI = LI->extendInBlock(Start, End); + VNInfo *VNI = LR.extendInBlock(Start, End); setLiveOutValue(Pred, VNI); if (VNI) { if (TheVNI && TheVNI != VNI) @@ -231,7 +226,7 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, } // No, we need a live-in value for Pred as well - if (Pred != KillMBB) + if (Pred != &KillMBB) WorkList.push_back(Pred->getNumber()); else // Loopback to KillMBB, so value is really live through. @@ -248,9 +243,9 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, // If a unique reaching def was found, blit in the live ranges immediately. if (UniqueVNI) { - LiveRangeUpdater Updater(LI); - for (SmallVectorImpl::const_iterator - I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + LiveRangeUpdater Updater(&LR); + for (SmallVectorImpl::const_iterator I = WorkList.begin(), + E = WorkList.end(); I != E; ++I) { SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(*I); // Trim the live range in KillMBB. @@ -270,8 +265,8 @@ bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, for (SmallVectorImpl::const_iterator I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { MachineBasicBlock *MBB = MF->getBlockNumbered(*I); - addLiveInBlock(LI, DomTree->getNode(MBB)); - if (MBB == KillMBB) + addLiveInBlock(LR, DomTree->getNode(MBB)); + if (MBB == &KillMBB) LiveIn.back().Kill = Kill; } @@ -348,16 +343,17 @@ void LiveRangeCalc::updateSSA() { assert(Alloc && "Need VNInfo allocator to create PHI-defs"); SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); - VNInfo *VNI = I->LI->getNextValue(Start, *Alloc); + LiveRange &LR = I->LR; + VNInfo *VNI = LR.getNextValue(Start, *Alloc); I->Value = VNI; // This block is done, we know the final value. I->DomNode = 0; // Add liveness since updateLiveIns now skips this node. if (I->Kill.isValid()) - I->LI->addRange(LiveRange(Start, I->Kill, VNI)); + LR.addSegment(LiveInterval::Segment(Start, I->Kill, VNI)); else { - I->LI->addRange(LiveRange(Start, End, VNI)); + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); LOP = LiveOutPair(VNI, Node); } } else if (IDomValue.first) { diff --git a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h index 57cab7b..a3a3fbb 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeCalc.h +++ b/contrib/llvm/lib/CodeGen/LiveRangeCalc.h @@ -75,9 +75,9 @@ class LiveRangeCalc { /// LiveInBlock - Information about a basic block where a live range is known /// to be live-in, but the value has not yet been determined. struct LiveInBlock { - // LI - The live range that is live-in to this block. The algorithms can + // The live range set that is live-in to this block. The algorithms can // handle multiple non-overlapping live ranges simultaneously. - LiveInterval *LI; + LiveRange &LR; // DomNode - Dominator tree node for the block. // Cleared when the final value has been determined and LI has been updated. @@ -91,8 +91,8 @@ class LiveRangeCalc { // Live-in value filled in by updateSSA once it is known. VNInfo *Value; - LiveInBlock(LiveInterval *li, MachineDomTreeNode *node, SlotIndex kill) - : LI(li), DomNode(node), Kill(kill), Value(0) {} + LiveInBlock(LiveRange &LR, MachineDomTreeNode *node, SlotIndex kill) + : LR(LR), DomNode(node), Kill(kill), Value(0) {} }; /// LiveIn - Work list of blocks where the live-in value has yet to be @@ -111,10 +111,8 @@ class LiveRangeCalc { /// are added to the LiveIn array, and the function returns false. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - bool findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg); + bool findReachingDefs(LiveRange &LR, MachineBasicBlock &KillMBB, + SlotIndex Kill, unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. @@ -146,10 +144,6 @@ public: MachineDominatorTree*, VNInfo::Allocator*); - /// calculate - Calculate the live range of a virtual register from its defs - /// and uses. LI must be empty with no values. - void calculate(LiveInterval *LI); - //===--------------------------------------------------------------------===// // Mid-level interface. //===--------------------------------------------------------------------===// @@ -165,27 +159,27 @@ public: /// single existing value, Alloc may be null. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - void extend(LiveInterval *LI, SlotIndex Kill, unsigned PhysReg = 0); + void extend(LiveRange &LR, SlotIndex Kill, unsigned PhysReg = 0); /// createDeadDefs - Create a dead def in LI for every def operand of Reg. /// Each instruction defining Reg gets a new VNInfo with a corresponding /// minimal live range. - void createDeadDefs(LiveInterval *LI, unsigned Reg); + void createDeadDefs(LiveRange &LR, unsigned Reg); /// createDeadDefs - Create a dead def in LI for every def of LI->reg. - void createDeadDefs(LiveInterval *LI) { - createDeadDefs(LI, LI->reg); + void createDeadDefs(LiveInterval &LI) { + createDeadDefs(LI, LI.reg); } /// extendToUses - Extend the live range of LI to reach all uses of Reg. /// /// All uses must be jointly dominated by existing liveness. PHI-defs are /// inserted as needed to preserve SSA form. - void extendToUses(LiveInterval *LI, unsigned Reg); + void extendToUses(LiveRange &LR, unsigned Reg); /// extendToUses - Extend the live range of LI to reach all uses of LI->reg. - void extendToUses(LiveInterval *LI) { - extendToUses(LI, LI->reg); + void extendToUses(LiveInterval &LI) { + extendToUses(LI, LI.reg); } //===--------------------------------------------------------------------===// @@ -216,15 +210,15 @@ public: /// function can only be called once per basic block. Once the live-in value /// has been determined, calculateValues() will add liveness to LI. /// - /// @param LI The live range that is live-in to the block. + /// @param LR The live range that is live-in to the block. /// @param DomNode The domtree node for the block. /// @param Kill Index in block where LI is killed. If the value is /// live-through, set Kill = SLotIndex() and also call /// setLiveOutValue(MBB, 0). - void addLiveInBlock(LiveInterval *LI, + void addLiveInBlock(LiveRange &LR, MachineDomTreeNode *DomNode, SlotIndex Kill = SlotIndex()) { - LiveIn.push_back(LiveInBlock(LI, DomNode, Kill)); + LiveIn.push_back(LiveInBlock(LR, DomNode, Kill)); } /// calculateValues - Calculate the value that will be live-in to each block diff --git a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp index 7793e96..cb70c43 100644 --- a/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRangeEdit.cpp @@ -13,7 +13,6 @@ #define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveRangeEdit.h" -#include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -31,17 +30,23 @@ STATISTIC(NumFracRanges, "Number of live ranges fractured by DCE"); void LiveRangeEdit::Delegate::anchor() { } -LiveInterval &LiveRangeEdit::createFrom(unsigned OldReg) { +LiveInterval &LiveRangeEdit::createEmptyIntervalFrom(unsigned OldReg) { unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); if (VRM) { - VRM->grow(); VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); } - LiveInterval &LI = LIS.getOrCreateInterval(VReg); - NewRegs.push_back(&LI); + LiveInterval &LI = LIS.createEmptyInterval(VReg); return LI; } +unsigned LiveRangeEdit::createFrom(unsigned OldReg) { + unsigned VReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); + if (VRM) { + VRM->setIsSplitFromReg(VReg, VRM->getOriginal(OldReg)); + } + return VReg; +} + bool LiveRangeEdit::checkRematerializable(VNInfo *VNI, const MachineInstr *DefMI, AliasAnalysis *aa) { @@ -216,108 +221,122 @@ bool LiveRangeEdit::foldAsLoad(LiveInterval *LI, return true; } -void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, - ArrayRef RegsBeingSpilled) { - SetVector, - SmallPtrSet > ToShrink; +/// Find all live intervals that need to shrink, then remove the instruction. +void LiveRangeEdit::eliminateDeadDef(MachineInstr *MI, ToShrinkSet &ToShrink) { + assert(MI->allDefsAreDead() && "Def isn't really dead"); + SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); - for (;;) { - // Erase all dead defs. - while (!Dead.empty()) { - MachineInstr *MI = Dead.pop_back_val(); - assert(MI->allDefsAreDead() && "Def isn't really dead"); - SlotIndex Idx = LIS.getInstructionIndex(MI).getRegSlot(); - - // Never delete inline asm. - if (MI->isInlineAsm()) { - DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); - continue; - } + // Never delete a bundled instruction. + if (MI->isBundled()) { + return; + } + // Never delete inline asm. + if (MI->isInlineAsm()) { + DEBUG(dbgs() << "Won't delete: " << Idx << '\t' << *MI); + return; + } - // Use the same criteria as DeadMachineInstructionElim. - bool SawStore = false; - if (!MI->isSafeToMove(&TII, 0, SawStore)) { - DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); - continue; - } + // Use the same criteria as DeadMachineInstructionElim. + bool SawStore = false; + if (!MI->isSafeToMove(&TII, 0, SawStore)) { + DEBUG(dbgs() << "Can't delete: " << Idx << '\t' << *MI); + return; + } - DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); - - // Collect virtual registers to be erased after MI is gone. - SmallVector RegsToErase; - bool ReadsPhysRegs = false; - - // Check for live intervals that may shrink - for (MachineInstr::mop_iterator MOI = MI->operands_begin(), - MOE = MI->operands_end(); MOI != MOE; ++MOI) { - if (!MOI->isReg()) - continue; - unsigned Reg = MOI->getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) { - // Check if MI reads any unreserved physregs. - if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) - ReadsPhysRegs = true; - continue; - } - LiveInterval &LI = LIS.getInterval(Reg); - - // Shrink read registers, unless it is likely to be expensive and - // unlikely to change anything. We typically don't want to shrink the - // PIC base register that has lots of uses everywhere. - // Always shrink COPY uses that probably come from live range splitting. - if (MI->readsVirtualRegister(Reg) && - (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || - LI.killedAt(Idx))) - ToShrink.insert(&LI); - - // Remove defined value. - if (MOI->isDef()) { - if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { - if (TheDelegate) - TheDelegate->LRE_WillShrinkVirtReg(LI.reg); - LI.removeValNo(VNI); - if (LI.empty()) - RegsToErase.push_back(Reg); + DEBUG(dbgs() << "Deleting dead def " << Idx << '\t' << *MI); + + // Collect virtual registers to be erased after MI is gone. + SmallVector RegsToErase; + bool ReadsPhysRegs = false; + + // Check for live intervals that may shrink + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (!MOI->isReg()) + continue; + unsigned Reg = MOI->getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Check if MI reads any unreserved physregs. + if (Reg && MOI->readsReg() && !MRI.isReserved(Reg)) + ReadsPhysRegs = true; + else if (MOI->isDef()) { + for (MCRegUnitIterator Units(Reg, MRI.getTargetRegisterInfo()); + Units.isValid(); ++Units) { + if (LiveRange *LR = LIS.getCachedRegUnit(*Units)) { + if (VNInfo *VNI = LR->getVNInfoAt(Idx)) + LR->removeValNo(VNI); } } } - - // Currently, we don't support DCE of physreg live ranges. If MI reads - // any unreserved physregs, don't erase the instruction, but turn it into - // a KILL instead. This way, the physreg live ranges don't end up - // dangling. - // FIXME: It would be better to have something like shrinkToUses() for - // physregs. That could potentially enable more DCE and it would free up - // the physreg. It would not happen often, though. - if (ReadsPhysRegs) { - MI->setDesc(TII.get(TargetOpcode::KILL)); - // Remove all operands that aren't physregs. - for (unsigned i = MI->getNumOperands(); i; --i) { - const MachineOperand &MO = MI->getOperand(i-1); - if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) - continue; - MI->RemoveOperand(i-1); - } - DEBUG(dbgs() << "Converted physregs to:\t" << *MI); - } else { + continue; + } + LiveInterval &LI = LIS.getInterval(Reg); + + // Shrink read registers, unless it is likely to be expensive and + // unlikely to change anything. We typically don't want to shrink the + // PIC base register that has lots of uses everywhere. + // Always shrink COPY uses that probably come from live range splitting. + if (MI->readsVirtualRegister(Reg) && + (MI->isCopy() || MOI->isDef() || MRI.hasOneNonDBGUse(Reg) || + LI.Query(Idx).isKill())) + ToShrink.insert(&LI); + + // Remove defined value. + if (MOI->isDef()) { + if (VNInfo *VNI = LI.getVNInfoAt(Idx)) { if (TheDelegate) - TheDelegate->LRE_WillEraseInstruction(MI); - LIS.RemoveMachineInstrFromMaps(MI); - MI->eraseFromParent(); - ++NumDCEDeleted; + TheDelegate->LRE_WillShrinkVirtReg(LI.reg); + LI.removeValNo(VNI); + if (LI.empty()) + RegsToErase.push_back(Reg); } + } + } - // Erase any virtregs that are now empty and unused. There may be - // uses around. Keep the empty live range in that case. - for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { - unsigned Reg = RegsToErase[i]; - if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { - ToShrink.remove(&LIS.getInterval(Reg)); - eraseVirtReg(Reg); - } - } + // Currently, we don't support DCE of physreg live ranges. If MI reads + // any unreserved physregs, don't erase the instruction, but turn it into + // a KILL instead. This way, the physreg live ranges don't end up + // dangling. + // FIXME: It would be better to have something like shrinkToUses() for + // physregs. That could potentially enable more DCE and it would free up + // the physreg. It would not happen often, though. + if (ReadsPhysRegs) { + MI->setDesc(TII.get(TargetOpcode::KILL)); + // Remove all operands that aren't physregs. + for (unsigned i = MI->getNumOperands(); i; --i) { + const MachineOperand &MO = MI->getOperand(i-1); + if (MO.isReg() && TargetRegisterInfo::isPhysicalRegister(MO.getReg())) + continue; + MI->RemoveOperand(i-1); } + DEBUG(dbgs() << "Converted physregs to:\t" << *MI); + } else { + if (TheDelegate) + TheDelegate->LRE_WillEraseInstruction(MI); + LIS.RemoveMachineInstrFromMaps(MI); + MI->eraseFromParent(); + ++NumDCEDeleted; + } + + // Erase any virtregs that are now empty and unused. There may be + // uses around. Keep the empty live range in that case. + for (unsigned i = 0, e = RegsToErase.size(); i != e; ++i) { + unsigned Reg = RegsToErase[i]; + if (LIS.hasInterval(Reg) && MRI.reg_nodbg_empty(Reg)) { + ToShrink.remove(&LIS.getInterval(Reg)); + eraseVirtReg(Reg); + } + } +} + +void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, + ArrayRef RegsBeingSpilled) { + ToShrinkSet ToShrink; + + for (;;) { + // Erase all dead defs. + while (!Dead.empty()) + eliminateDeadDef(Dead.pop_back_val(), ToShrink); if (ToShrink.empty()) break; @@ -331,7 +350,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, TheDelegate->LRE_WillShrinkVirtReg(LI->reg); if (!LIS.shrinkToUses(LI, &Dead)) continue; - + // Don't create new intervals for a register being spilled. // The new intervals would have to be spilled anyway so its not worth it. // Also they currently aren't spilled so creating them and not spilling @@ -343,11 +362,11 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, break; } } - + if (BeingSpilled) continue; // LI may have been separated, create new intervals. - LI->RenumberValues(LIS); + LI->RenumberValues(); ConnectedVNInfoEqClasses ConEQ(LIS); unsigned NumComp = ConEQ.Classify(LI); if (NumComp <= 1) @@ -357,7 +376,7 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, DEBUG(dbgs() << NumComp << " components: " << *LI << '\n'); SmallVector Dups(1, LI); for (unsigned i = 1; i != NumComp; ++i) { - Dups.push_back(&createFrom(LI->reg)); + Dups.push_back(&createEmptyIntervalFrom(LI->reg)); // If LI is an original interval that hasn't been split yet, make the new // intervals their own originals instead of referring to LI. The original // interval must contain all the split products, and LI doesn't. @@ -374,14 +393,27 @@ void LiveRangeEdit::eliminateDeadDefs(SmallVectorImpl &Dead, } } -void LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, - const MachineLoopInfo &Loops) { - VirtRegAuxInfo VRAI(MF, LIS, Loops); - for (iterator I = begin(), E = end(); I != E; ++I) { - LiveInterval &LI = **I; +// Keep track of new virtual registers created via +// MachineRegisterInfo::createVirtualRegister. +void +LiveRangeEdit::MRI_NoteNewVirtualRegister(unsigned VReg) +{ + if (VRM) + VRM->grow(); + + NewRegs.push_back(VReg); +} + +void +LiveRangeEdit::calculateRegClassAndHint(MachineFunction &MF, + const MachineLoopInfo &Loops, + const MachineBlockFrequencyInfo &MBFI) { + VirtRegAuxInfo VRAI(MF, LIS, Loops, MBFI); + for (unsigned I = 0, Size = size(); I < Size; ++I) { + LiveInterval &LI = LIS.getInterval(get(I)); if (MRI.recomputeRegClass(LI.reg, MF.getTarget())) DEBUG(dbgs() << "Inflated " << PrintReg(LI.reg) << " to " << MRI.getRegClass(LI.reg)->getName() << '\n'); - VRAI.CalculateWeightAndHint(LI); + VRAI.calculateSpillWeightAndHint(LI); } } diff --git a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp index 0ef069f..1d801ac 100644 --- a/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp +++ b/contrib/llvm/lib/CodeGen/LiveRegMatrix.cpp @@ -119,9 +119,11 @@ bool LiveRegMatrix::checkRegUnitInterference(LiveInterval &VirtReg, if (VirtReg.empty()) return false; CoalescerPair CP(VirtReg.reg, PhysReg, *TRI); - for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) - if (VirtReg.overlaps(LIS->getRegUnit(*Units), CP, *LIS->getSlotIndexes())) + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { + const LiveRange &UnitRange = LIS->getRegUnit(*Units); + if (VirtReg.overlaps(UnitRange, CP, *LIS->getSlotIndexes())) return true; + } return false; } diff --git a/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp new file mode 100644 index 0000000..6221ca2 --- /dev/null +++ b/contrib/llvm/lib/CodeGen/LiveRegUnits.cpp @@ -0,0 +1,111 @@ +//===-- LiveInterval.cpp - Live Interval Representation -------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the LiveRegUnits utility for tracking liveness of +// physical register units across machine instructions in forward or backward +// order. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/LiveRegUnits.h" +#include "llvm/CodeGen/MachineInstrBundle.h" +using namespace llvm; + +/// Return true if the given MachineOperand clobbers the given register unit. +/// A register unit is only clobbered if all its super-registers are clobbered. +static bool operClobbersUnit(const MachineOperand *MO, unsigned Unit, + const MCRegisterInfo *MCRI) { + for (MCRegUnitRootIterator RI(Unit, MCRI); RI.isValid(); ++RI) { + for (MCSuperRegIterator SI(*RI, MCRI, true); SI.isValid(); ++SI) { + if (!MO->clobbersPhysReg(*SI)) + return false; + } + } + return true; +} + +/// We assume the high bits of a physical super register are not preserved +/// unless the instruction has an implicit-use operand reading the +/// super-register or a register unit for the upper bits is available. +void LiveRegUnits::removeRegsInMask(const MachineOperand &Op, + const MCRegisterInfo &MCRI) { + SparseSet::iterator LUI = LiveUnits.begin(); + while (LUI != LiveUnits.end()) { + if (operClobbersUnit(&Op, *LUI, &MCRI)) + LUI = LiveUnits.erase(LUI); + else + ++LUI; + } +} + +void LiveRegUnits::stepBackward(const MachineInstr &MI, + const MCRegisterInfo &MCRI) { + // Remove defined registers and regmask kills from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + if (!O->isDef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + removeReg(Reg, MCRI); + } else if (O->isRegMask()) { + removeRegsInMask(*O, MCRI); + } + } + // Add uses to the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (!O->isReg() || !O->readsReg() || O->isUndef()) + continue; + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + addReg(Reg, MCRI); + } +} + +/// Uses with kill flag get removed from the set, defs added. If possible +/// use StepBackward() instead of this function because some kill flags may +/// be missing. +void LiveRegUnits::stepForward(const MachineInstr &MI, + const MCRegisterInfo &MCRI) { + SmallVector Defs; + // Remove killed registers from the set. + for (ConstMIBundleOperands O(&MI); O.isValid(); ++O) { + if (O->isReg()) { + unsigned Reg = O->getReg(); + if (Reg == 0) + continue; + if (O->isDef()) { + if (!O->isDead()) + Defs.push_back(Reg); + } else { + if (!O->isKill()) + continue; + assert(O->isUse()); + removeReg(Reg, MCRI); + } + } else if (O->isRegMask()) { + removeRegsInMask(*O, MCRI); + } + } + // Add defs to the set. + for (unsigned i = 0, e = Defs.size(); i != e; ++i) { + addReg(Defs[i], MCRI); + } +} + +/// Adds all registers in the live-in list of block @p BB. +void LiveRegUnits::addLiveIns(const MachineBasicBlock *MBB, + const MCRegisterInfo &MCRI) { + for (MachineBasicBlock::livein_iterator L = MBB->livein_begin(), + LE = MBB->livein_end(); L != LE; ++L) { + addReg(*L, MCRI); + } +} diff --git a/contrib/llvm/lib/CodeGen/LiveVariables.cpp b/contrib/llvm/lib/CodeGen/LiveVariables.cpp index 789eddc..ed55d7a 100644 --- a/contrib/llvm/lib/CodeGen/LiveVariables.cpp +++ b/contrib/llvm/lib/CodeGen/LiveVariables.cpp @@ -217,8 +217,8 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg, continue; unsigned DefReg = MO.getReg(); if (TRI->isSubRegister(Reg, DefReg)) { - PartDefRegs.insert(DefReg); - for (MCSubRegIterator SubRegs(DefReg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(DefReg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) PartDefRegs.insert(*SubRegs); } } @@ -271,8 +271,8 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) { true/*IsImp*/)); // Remember this use. - PhysRegUse[Reg] = MI; - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) PhysRegUse[*SubRegs] = MI; } @@ -350,8 +350,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { continue; } if (MachineInstr *Use = PhysRegUse[SubReg]) { - PartUses.insert(SubReg); - for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); SS.isValid(); + ++SS) PartUses.insert(*SS); unsigned Dist = DistanceMap[Use]; if (Dist > LastRefOrPartRefDist) { @@ -387,8 +387,8 @@ bool LiveVariables::HandlePhysRegKill(unsigned Reg, MachineInstr *MI) { LastSubRef->addRegisterKilled(SubReg, TRI, true); else { LastRefOrPartRef->addRegisterKilled(SubReg, TRI, true); - PhysRegUse[SubReg] = LastRefOrPartRef; - for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); + SS.isValid(); ++SS) PhysRegUse[*SS] = LastRefOrPartRef; } for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) @@ -441,12 +441,12 @@ void LiveVariables::HandleRegMask(const MachineOperand &MO) { } void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, - SmallVector &Defs) { + SmallVectorImpl &Defs) { // What parts of the register are previously defined? SmallSet Live; if (PhysRegDef[Reg] || PhysRegUse[Reg]) { - Live.insert(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) Live.insert(*SubRegs); } else { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { @@ -460,8 +460,8 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, if (Live.count(SubReg)) continue; if (PhysRegDef[SubReg] || PhysRegUse[SubReg]) { - Live.insert(SubReg); - for (MCSubRegIterator SS(SubReg, TRI); SS.isValid(); ++SS) + for (MCSubRegIterator SS(SubReg, TRI, /*IncludeSelf=*/true); + SS.isValid(); ++SS) Live.insert(*SS); } } @@ -484,13 +484,12 @@ void LiveVariables::HandlePhysRegDef(unsigned Reg, MachineInstr *MI, } void LiveVariables::UpdatePhysRegDefs(MachineInstr *MI, - SmallVector &Defs) { + SmallVectorImpl &Defs) { while (!Defs.empty()) { unsigned Reg = Defs.back(); Defs.pop_back(); - PhysRegDef[Reg] = MI; - PhysRegUse[Reg] = NULL; - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) { + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) { unsigned SubReg = *SubRegs; PhysRegDef[SubReg] = MI; PhysRegUse[SubReg] = NULL; @@ -610,9 +609,9 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { // if they have PHI nodes, and if so, we simulate an assignment at the end // of the current block. if (!PHIVarInfo[MBB->getNumber()].empty()) { - SmallVector& VarInfoVec = PHIVarInfo[MBB->getNumber()]; + SmallVectorImpl &VarInfoVec = PHIVarInfo[MBB->getNumber()]; - for (SmallVector::iterator I = VarInfoVec.begin(), + for (SmallVectorImpl::iterator I = VarInfoVec.begin(), E = VarInfoVec.end(); I != E; ++I) // Mark it alive only in the block we are representing. MarkVirtRegAliveInBlock(getVarInfo(*I),MRI->getVRegDef(*I)->getParent(), diff --git a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp index 91810bd..ca71e3b 100644 --- a/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBasicBlock.cpp @@ -52,7 +52,7 @@ MCSymbol *MachineBasicBlock::getSymbol() const { if (!CachedMCSymbol) { const MachineFunction *MF = getParent(); MCContext &Ctx = MF->getContext(); - const char *Prefix = Ctx.getAsmInfo().getPrivateGlobalPrefix(); + const char *Prefix = Ctx.getAsmInfo()->getPrivateGlobalPrefix(); CachedMCSymbol = Ctx.GetOrCreateSymbol(Twine(Prefix) + "BB" + Twine(MF->getFunctionNumber()) + "_" + Twine(getNumber())); @@ -861,7 +861,7 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LiveInterval &LI = LIS->getInterval(Reg); VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "PHI sources should be live out of their predecessors."); - LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } } } @@ -880,9 +880,9 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { if (isLiveOut && isLastMBB) { VNInfo *VNI = LI.getVNInfoAt(PrevIndex); assert(VNI && "LiveInterval should have VNInfo where it is live."); - LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + LI.addSegment(LiveInterval::Segment(StartIndex, EndIndex, VNI)); } else if (!isLiveOut && !isLastMBB) { - LI.removeRange(StartIndex, EndIndex); + LI.removeSegment(StartIndex, EndIndex); } } diff --git a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp index 070daf2..e269d24 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -50,11 +50,6 @@ bool MachineBlockFrequencyInfo::runOnMachineFunction(MachineFunction &F) { return false; } -/// getblockFreq - Return block frequency. Return 0 if we don't have the -/// information. Please note that initial frequency is equal to 1024. It means -/// that we should not rely on the value itself, but only on the comparison to -/// the other block frequencies. We do this to avoid using of floating points. -/// BlockFrequency MachineBlockFrequencyInfo:: getBlockFreq(const MachineBasicBlock *MBB) const { return MBFI->getBlockFreq(MBB); diff --git a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp index bfba503..4b0f7f3 100644 --- a/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/MachineBlockPlacement.cpp @@ -991,6 +991,28 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { Cond.clear(); MachineBasicBlock *TBB = 0, *FBB = 0; // For AnalyzeBranch. if (!TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // The "PrevBB" is not yet updated to reflect current code layout, so, + // o. it may fall-through to a block without explict "goto" instruction + // before layout, and no longer fall-through it after layout; or + // o. just opposite. + // + // AnalyzeBranch() may return erroneous value for FBB when these two + // situations take place. For the first scenario FBB is mistakenly set + // NULL; for the 2nd scenario, the FBB, which is expected to be NULL, + // is mistakenly pointing to "*BI". + // + bool needUpdateBr = true; + if (!Cond.empty() && (!FBB || FBB == *BI)) { + PrevBB->updateTerminator(); + needUpdateBr = false; + Cond.clear(); + TBB = FBB = 0; + if (TII->AnalyzeBranch(*PrevBB, TBB, FBB, Cond)) { + // FIXME: This should never take place. + TBB = FBB = 0; + } + } + // If PrevBB has a two-way branch, try to re-order the branches // such that we branch to the successor with higher weight first. if (TBB && !Cond.empty() && FBB && @@ -1003,8 +1025,10 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { DebugLoc dl; // FIXME: this is nowhere TII->RemoveBranch(*PrevBB); TII->InsertBranch(*PrevBB, FBB, TBB, Cond, dl); + needUpdateBr = true; } - PrevBB->updateTerminator(); + if (needUpdateBr) + PrevBB->updateTerminator(); } } diff --git a/contrib/llvm/lib/CodeGen/MachineCSE.cpp b/contrib/llvm/lib/CodeGen/MachineCSE.cpp index 61d8d38..d228286 100644 --- a/contrib/llvm/lib/CodeGen/MachineCSE.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCSE.cpp @@ -84,11 +84,11 @@ namespace { bool hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet &PhysRefs, - SmallVector &PhysDefs, + SmallVectorImpl &PhysDefs, bool &PhysUseDef) const; bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet &PhysRefs, - SmallVector &PhysDefs, + SmallVectorImpl &PhysDefs, bool &NonLocal) const; bool isCSECandidate(MachineInstr *MI); bool isProfitableToCSE(unsigned CSReg, unsigned Reg, @@ -193,7 +193,7 @@ MachineCSE::isPhysDefTriviallyDead(unsigned Reg, bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, const MachineBasicBlock *MBB, SmallSet &PhysRefs, - SmallVector &PhysDefs, + SmallVectorImpl &PhysDefs, bool &PhysUseDef) const{ // First, add all uses to PhysRefs. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { @@ -244,7 +244,7 @@ bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI, bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI, SmallSet &PhysRefs, - SmallVector &PhysDefs, + SmallVectorImpl &PhysDefs, bool &NonLocal) const { // For now conservatively returns false if the common subexpression is // not in the same basic block as the given instruction. The only exception diff --git a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp index dc8a224..4f48e2c 100644 --- a/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp +++ b/contrib/llvm/lib/CodeGen/MachineCopyPropagation.cpp @@ -213,9 +213,8 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { CopyMap.erase(*AI); AvailCopyMap.erase(*AI); } - CopyMap[Def] = MI; - AvailCopyMap[Def] = MI; - for (MCSubRegIterator SR(Def, TRI); SR.isValid(); ++SR) { + for (MCSubRegIterator SR(Def, TRI, /*IncludeSelf=*/true); SR.isValid(); + ++SR) { CopyMap[*SR] = MI; AvailCopyMap[*SR] = MI; } diff --git a/contrib/llvm/lib/CodeGen/MachineFunction.cpp b/contrib/llvm/lib/CodeGen/MachineFunction.cpp index 04321f3..0703df0 100644 --- a/contrib/llvm/lib/CodeGen/MachineFunction.cpp +++ b/contrib/llvm/lib/CodeGen/MachineFunction.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallString.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" @@ -54,23 +55,28 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, GCModuleInfo* gmi) : Fn(F), Target(TM), Ctx(mmi.getContext()), MMI(mmi), GMI(gmi) { if (TM.getRegisterInfo()) - RegInfo = new (Allocator) MachineRegisterInfo(*TM.getRegisterInfo()); + RegInfo = new (Allocator) MachineRegisterInfo(TM); else RegInfo = 0; + MFInfo = 0; - FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(), - TM.Options.RealignStack); + FrameInfo = + new (Allocator) MachineFrameInfo(TM,!F->hasFnAttribute("no-realign-stack")); + if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackAlignment)) FrameInfo->ensureMaxAlignment(Fn->getAttributes(). getStackAlignment(AttributeSet::FunctionIndex)); - ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout()); + + ConstantPool = new (Allocator) MachineConstantPool(TM); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); + // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) Alignment = std::max(Alignment, TM.getTargetLowering()->getPrefFunctionAlignment()); + FunctionNumber = FunctionNum; JumpTableInfo = 0; } @@ -456,11 +462,15 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const { // MachineFrameInfo implementation //===----------------------------------------------------------------------===// +const TargetFrameLowering *MachineFrameInfo::getFrameLowering() const { + return TM.getFrameLowering(); +} + /// ensureMaxAlignment - Make sure the function is at least Align bytes /// aligned. void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { - if (!TFI.isStackRealignable() || !RealignOption) - assert(Align <= TFI.getStackAlignment() && + if (!getFrameLowering()->isStackRealignable() || !RealignOption) + assert(Align <= getFrameLowering()->getStackAlignment() && "For targets without stack realignment, Align is out of limit!"); if (MaxAlignment < Align) MaxAlignment = Align; } @@ -482,8 +492,10 @@ static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { assert(Size != 0 && "Cannot allocate zero size stack objects!"); - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, Alloca)); int Index = (int)Objects.size() - NumFixedObjects - 1; @@ -498,8 +510,10 @@ int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, /// int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, unsigned Alignment) { - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); CreateStackObject(Size, Alignment, true, false); int Index = (int)Objects.size() - NumFixedObjects - 1; ensureMaxAlignment(Alignment); @@ -513,8 +527,10 @@ int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, /// int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) { HasVarSizedObjects = true; - Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Alignment, TFI.getStackAlignment()); + Alignment = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Alignment, getFrameLowering()->getStackAlignment()); Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); ensureMaxAlignment(Alignment); return (int)Objects.size()-NumFixedObjects-1; @@ -532,10 +548,12 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // the incoming frame position. If the frame object is at offset 32 and // the stack is guaranteed to be 16-byte aligned, then we know that the // object is 16-byte aligned. - unsigned StackAlign = TFI.getStackAlignment(); + unsigned StackAlign = getFrameLowering()->getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); - Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, - Align, TFI.getStackAlignment()); + Align = + clampStackAlignment(!getFrameLowering()->isStackRealignable() || + !RealignOption, + Align, getFrameLowering()->getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*NeedSP*/ false, @@ -769,6 +787,10 @@ void MachineJumpTableInfo::dump() const { print(dbgs()); } void MachineConstantPoolValue::anchor() { } +const DataLayout *MachineConstantPool::getDataLayout() const { + return TM.getDataLayout(); +} + Type *MachineConstantPoolEntry::getType() const { if (isMachineConstantPoolEntry()) return Val.MachineCPVal->getType(); @@ -850,7 +872,8 @@ unsigned MachineConstantPool::getConstantPoolIndex(const Constant *C, // FIXME, this could be made much more efficient for large constant pools. for (unsigned i = 0, e = Constants.size(); i != e; ++i) if (!Constants[i].isMachineConstantPoolEntry() && - CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, TD)) { + CanShareConstantPoolEntry(Constants[i].Val.ConstVal, C, + getDataLayout())) { if ((unsigned)Constants[i].getAlignment() < Alignment) Constants[i].Alignment = Alignment; return i; @@ -887,7 +910,7 @@ void MachineConstantPool::print(raw_ostream &OS) const { if (Constants[i].isMachineConstantPoolEntry()) Constants[i].Val.MachineCPVal->print(OS); else - OS << *(const Value*)Constants[i].Val.ConstVal; + WriteAsOperand(OS, Constants[i].Val.ConstVal, /*PrintType=*/false); OS << ", align=" << Constants[i].getAlignment(); OS << "\n"; } diff --git a/contrib/llvm/lib/CodeGen/MachineInstr.cpp b/contrib/llvm/lib/CodeGen/MachineInstr.cpp index 32d0668..295b450 100644 --- a/contrib/llvm/lib/CodeGen/MachineInstr.cpp +++ b/contrib/llvm/lib/CodeGen/MachineInstr.cpp @@ -647,12 +647,15 @@ void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { } } +#ifndef NDEBUG + bool isMetaDataOp = Op.getType() == MachineOperand::MO_Metadata; // OpNo now points as the desired insertion point. Unless this is a variadic // instruction, only implicit regs are allowed beyond MCID->getNumOperands(). // RegMask operands go between the explicit and implicit operands. assert((isImpReg || Op.isRegMask() || MCID->isVariadic() || - OpNo < MCID->getNumOperands()) && + OpNo < MCID->getNumOperands() || isMetaDataOp) && "Trying to add an operand to a machine instr that is already done!"); +#endif MachineRegisterInfo *MRI = getRegInfo(); @@ -1253,32 +1256,6 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII, return true; } -/// isSafeToReMat - Return true if it's safe to rematerialize the specified -/// instruction which defined the specified register instead of copying it. -bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII, - AliasAnalysis *AA, - unsigned DstReg) const { - bool SawStore = false; - if (!TII->isTriviallyReMaterializable(this, AA) || - !isSafeToMove(TII, AA, SawStore)) - return false; - for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { - const MachineOperand &MO = getOperand(i); - if (!MO.isReg()) - continue; - // FIXME: For now, do not remat any instruction with register operands. - // Later on, we can loosen the restriction is the register operands have - // not been modified between the def and use. Note, this is different from - // MachineSink because the code is no longer in two-address form (at least - // partially). - if (MO.isUse()) - return false; - else if (!MO.isDead() && MO.getReg() != DstReg) - return false; - } - return true; -} - /// hasOrderedMemoryRef - Return true if this instruction may have an ordered /// or volatile memory reference, or if the information describing the memory /// reference is not available. Return false if it is known to have no ordered @@ -1411,8 +1388,10 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, const LLVMContext &Ctx = MF->getFunction()->getContext(); if (!DL.isUnknown()) { // Print source line info. DIScope Scope(DL.getScope(Ctx)); + assert((!Scope || Scope.isScope()) && + "Scope of a DebugLoc should be null or a DIScope."); // Omit the directory, because it's likely to be long and uninteresting. - if (Scope.Verify()) + if (Scope) CommentOS << Scope.getFilename(); else CommentOS << ""; @@ -1726,31 +1705,31 @@ void MachineInstr::clearRegisterKills(unsigned Reg, } } -bool MachineInstr::addRegisterDead(unsigned IncomingReg, +bool MachineInstr::addRegisterDead(unsigned Reg, const TargetRegisterInfo *RegInfo, bool AddIfNotFound) { - bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(IncomingReg); + bool isPhysReg = TargetRegisterInfo::isPhysicalRegister(Reg); bool hasAliases = isPhysReg && - MCRegAliasIterator(IncomingReg, RegInfo, false).isValid(); + MCRegAliasIterator(Reg, RegInfo, false).isValid(); bool Found = false; SmallVector DeadOps; for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { MachineOperand &MO = getOperand(i); if (!MO.isReg() || !MO.isDef()) continue; - unsigned Reg = MO.getReg(); - if (!Reg) + unsigned MOReg = MO.getReg(); + if (!MOReg) continue; - if (Reg == IncomingReg) { + if (MOReg == Reg) { MO.setIsDead(); Found = true; } else if (hasAliases && MO.isDead() && - TargetRegisterInfo::isPhysicalRegister(Reg)) { + TargetRegisterInfo::isPhysicalRegister(MOReg)) { // There exists a super-register that's marked dead. - if (RegInfo->isSuperRegister(IncomingReg, Reg)) + if (RegInfo->isSuperRegister(Reg, MOReg)) return true; - if (RegInfo->isSubRegister(IncomingReg, Reg)) + if (RegInfo->isSubRegister(Reg, MOReg)) DeadOps.push_back(i); } } @@ -1770,7 +1749,7 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, if (Found || !AddIfNotFound) return Found; - addOperand(MachineOperand::CreateReg(IncomingReg, + addOperand(MachineOperand::CreateReg(Reg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/, @@ -1778,21 +1757,21 @@ bool MachineInstr::addRegisterDead(unsigned IncomingReg, return true; } -void MachineInstr::addRegisterDefined(unsigned IncomingReg, +void MachineInstr::addRegisterDefined(unsigned Reg, const TargetRegisterInfo *RegInfo) { - if (TargetRegisterInfo::isPhysicalRegister(IncomingReg)) { - MachineOperand *MO = findRegisterDefOperand(IncomingReg, false, RegInfo); + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + MachineOperand *MO = findRegisterDefOperand(Reg, false, RegInfo); if (MO) return; } else { for (unsigned i = 0, e = getNumOperands(); i != e; ++i) { const MachineOperand &MO = getOperand(i); - if (MO.isReg() && MO.getReg() == IncomingReg && MO.isDef() && + if (MO.isReg() && MO.getReg() == Reg && MO.isDef() && MO.getSubReg() == 0) return; } } - addOperand(MachineOperand::CreateReg(IncomingReg, + addOperand(MachineOperand::CreateReg(Reg, true /*IsDef*/, true /*IsImp*/)); } diff --git a/contrib/llvm/lib/CodeGen/MachineLICM.cpp b/contrib/llvm/lib/CodeGen/MachineLICM.cpp index ed3ed4d..104eacd 100644 --- a/contrib/llvm/lib/CodeGen/MachineLICM.cpp +++ b/contrib/llvm/lib/CodeGen/MachineLICM.cpp @@ -172,7 +172,7 @@ namespace { BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet &StoredFIs, - SmallVector &Candidates); + SmallVectorImpl &Candidates); /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the /// current loop. @@ -404,7 +404,7 @@ void MachineLICM::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, BitVector &PhysRegClobbers, SmallSet &StoredFIs, - SmallVector &Candidates) { + SmallVectorImpl &Candidates) { bool RuledOut = false; bool HasNonInvariantUse = false; unsigned Def = 0; @@ -468,12 +468,12 @@ void MachineLICM::ProcessMI(MachineInstr *MI, for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { if (PhysRegDefs.test(*AS)) PhysRegClobbers.set(*AS); - if (PhysRegClobbers.test(*AS)) - // MI defined register is seen defined by another instruction in - // the loop, it cannot be a LICM candidate. - RuledOut = true; PhysRegDefs.set(*AS); } + if (PhysRegClobbers.test(Reg)) + // MI defined register is seen defined by another instruction in + // the loop, it cannot be a LICM candidate. + RuledOut = true; } // Only consider reloads for now and remats which do not have register @@ -502,7 +502,7 @@ void MachineLICM::HoistRegionPostRA() { // Walk the entire region, count number of defs for each register, and // collect potential LICM candidates. - const std::vector Blocks = CurLoop->getBlocks(); + const std::vector &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; @@ -584,7 +584,7 @@ void MachineLICM::HoistRegionPostRA() { /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current /// loop, and make sure it is not killed by any instructions in the loop. void MachineLICM::AddToLiveIns(unsigned Reg) { - const std::vector Blocks = CurLoop->getBlocks(); + const std::vector &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { MachineBasicBlock *BB = Blocks[i]; if (!BB->isLiveIn(Reg)) @@ -1084,7 +1084,7 @@ bool MachineLICM::CanCauseHighRegPressure(DenseMap &Cost, return true; for (unsigned i = BackTrace.size(); i != 0; --i) { - SmallVector &RP = BackTrace[i-1]; + SmallVectorImpl &RP = BackTrace[i-1]; if (RP[RCId] + Cost >= Limit) return true; } @@ -1130,7 +1130,7 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { // Update register pressure of blocks from loop header to current block. for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) { - SmallVector &RP = BackTrace[i]; + SmallVectorImpl &RP = BackTrace[i]; for (DenseMap::iterator CI = Cost.begin(), CE = Cost.end(); CI != CE; ++CI) { unsigned RCId = CI->first; diff --git a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp index 8af9d05..bb54284 100644 --- a/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineModuleInfo.cpp @@ -253,13 +253,12 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, const MCObjectFileInfo *MOFI) - : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) { + : ImmutablePass(ID), Context(&MAI, &MRI, MOFI, 0, false) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); } MachineModuleInfo::MachineModuleInfo() - : ImmutablePass(ID), - Context(*(MCAsmInfo*)0, *(MCRegisterInfo*)0, (MCObjectFileInfo*)0) { + : ImmutablePass(ID), Context(0, 0, 0) { llvm_unreachable("This MachineModuleInfo constructor should never be called, " "MMI should always be explicitly constructed by " "LLVMTargetMachine"); @@ -303,7 +302,7 @@ bool MachineModuleInfo::doFinalization(Module &M) { /// void MachineModuleInfo::EndFunction() { // Clean up frame info. - FrameMoves.clear(); + FrameInstructions.clear(); // Clean up exception info. LandingPads.clear(); diff --git a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp index 68372f6..f8b8796 100644 --- a/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/MachineRegisterInfo.cpp @@ -19,16 +19,21 @@ using namespace llvm; -MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) - : TRI(&TRI), IsSSA(true), TracksLiveness(true) { +// Pin the vtable to this file. +void MachineRegisterInfo::Delegate::anchor() {} + +MachineRegisterInfo::MachineRegisterInfo(const TargetMachine &TM) + : TM(TM), TheDelegate(0), IsSSA(true), TracksLiveness(true) { VRegInfo.reserve(256); RegAllocHints.reserve(256); - UsedRegUnits.resize(TRI.getNumRegUnits()); - UsedPhysRegMask.resize(TRI.getNumRegs()); + UsedRegUnits.resize(getTargetRegisterInfo()->getNumRegUnits()); + UsedPhysRegMask.resize(getTargetRegisterInfo()->getNumRegs()); // Create the physreg use/def lists. - PhysRegUseDefLists = new MachineOperand*[TRI.getNumRegs()]; - memset(PhysRegUseDefLists, 0, sizeof(MachineOperand*)*TRI.getNumRegs()); + PhysRegUseDefLists = + new MachineOperand*[getTargetRegisterInfo()->getNumRegs()]; + memset(PhysRegUseDefLists, 0, + sizeof(MachineOperand*)*getTargetRegisterInfo()->getNumRegs()); } MachineRegisterInfo::~MachineRegisterInfo() { @@ -50,7 +55,8 @@ MachineRegisterInfo::constrainRegClass(unsigned Reg, const TargetRegisterClass *OldRC = getRegClass(Reg); if (OldRC == RC) return RC; - const TargetRegisterClass *NewRC = TRI->getCommonSubClass(OldRC, RC); + const TargetRegisterClass *NewRC = + getTargetRegisterInfo()->getCommonSubClass(OldRC, RC); if (!NewRC || NewRC == OldRC) return NewRC; if (NewRC->getNumRegs() < MinNumRegs) @@ -63,7 +69,8 @@ bool MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterClass *OldRC = getRegClass(Reg); - const TargetRegisterClass *NewRC = TRI->getLargestLegalSuperClass(OldRC); + const TargetRegisterClass *NewRC = + getTargetRegisterInfo()->getLargestLegalSuperClass(OldRC); // Stop early if there is no room to grow. if (NewRC == OldRC) @@ -73,14 +80,16 @@ MachineRegisterInfo::recomputeRegClass(unsigned Reg, const TargetMachine &TM) { for (reg_nodbg_iterator I = reg_nodbg_begin(Reg), E = reg_nodbg_end(); I != E; ++I) { const TargetRegisterClass *OpRC = - I->getRegClassConstraint(I.getOperandNo(), TII, TRI); + I->getRegClassConstraint(I.getOperandNo(), TII, + getTargetRegisterInfo()); if (unsigned SubIdx = I.getOperand().getSubReg()) { if (OpRC) - NewRC = TRI->getMatchingSuperRegClass(NewRC, OpRC, SubIdx); + NewRC = getTargetRegisterInfo()->getMatchingSuperRegClass(NewRC, OpRC, + SubIdx); else - NewRC = TRI->getSubClassWithSubReg(NewRC, SubIdx); + NewRC = getTargetRegisterInfo()->getSubClassWithSubReg(NewRC, SubIdx); } else if (OpRC) - NewRC = TRI->getCommonSubClass(NewRC, OpRC); + NewRC = getTargetRegisterInfo()->getCommonSubClass(NewRC, OpRC); if (!NewRC || NewRC == OldRC) return false; } @@ -102,6 +111,8 @@ MachineRegisterInfo::createVirtualRegister(const TargetRegisterClass *RegClass){ VRegInfo.grow(Reg); VRegInfo[Reg].first = RegClass; RegAllocHints.grow(Reg); + if (TheDelegate) + TheDelegate->MRI_NoteNewVirtualRegister(Reg); return Reg; } @@ -126,24 +137,28 @@ void MachineRegisterInfo::verifyUseList(unsigned Reg) const { MachineOperand *MO = &I.getOperand(); MachineInstr *MI = MO->getParent(); if (!MI) { - errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use list MachineOperand " << MO << " has no parent instruction.\n"; Valid = false; } MachineOperand *MO0 = &MI->getOperand(0); unsigned NumOps = MI->getNumOperands(); if (!(MO >= MO0 && MO < MO0+NumOps)) { - errs() << PrintReg(Reg, TRI) << " use list MachineOperand " << MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use list MachineOperand " << MO << " doesn't belong to parent MI: " << *MI; Valid = false; } if (!MO->isReg()) { - errs() << PrintReg(Reg, TRI) << " MachineOperand " << MO << ": " << *MO + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " MachineOperand " << MO << ": " << *MO << " is not a register\n"; Valid = false; } if (MO->getReg() != Reg) { - errs() << PrintReg(Reg, TRI) << " use-list MachineOperand " << MO << ": " + errs() << PrintReg(Reg, getTargetRegisterInfo()) + << " use-list MachineOperand " << MO << ": " << *MO << " is the wrong register\n"; Valid = false; } @@ -156,7 +171,7 @@ void MachineRegisterInfo::verifyUseLists() const { #ifndef NDEBUG for (unsigned i = 0, e = getNumVirtRegs(); i != e; ++i) verifyUseList(TargetRegisterInfo::index2VirtReg(i)); - for (unsigned i = 1, e = TRI->getNumRegs(); i != e; ++i) + for (unsigned i = 1, e = getTargetRegisterInfo()->getNumRegs(); i != e; ++i) verifyUseList(i); #endif } @@ -390,8 +405,8 @@ void MachineRegisterInfo::dumpUses(unsigned Reg) const { #endif void MachineRegisterInfo::freezeReservedRegs(const MachineFunction &MF) { - ReservedRegs = TRI->getReservedRegs(MF); - assert(ReservedRegs.size() == TRI->getNumRegs() && + ReservedRegs = getTargetRegisterInfo()->getReservedRegs(MF); + assert(ReservedRegs.size() == getTargetRegisterInfo()->getNumRegs() && "Invalid ReservedRegs vector from target"); } @@ -401,7 +416,8 @@ bool MachineRegisterInfo::isConstantPhysReg(unsigned PhysReg, // Check if any overlapping register is modified, or allocatable so it may be // used later. - for (MCRegAliasIterator AI(PhysReg, TRI, true); AI.isValid(); ++AI) + for (MCRegAliasIterator AI(PhysReg, getTargetRegisterInfo(), true); + AI.isValid(); ++AI) if (!def_empty(*AI) || isAllocatable(*AI)) return false; return true; diff --git a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp index bb6aad7..17f0af8 100644 --- a/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSSAUpdater.cpp @@ -77,7 +77,7 @@ unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) { static unsigned LookForIdenticalPHI(MachineBasicBlock *BB, - SmallVector, 8> &PredValues) { + SmallVectorImpl > &PredValues) { if (BB->empty()) return 0; diff --git a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp index fff6b2b..e71c4df 100644 --- a/contrib/llvm/lib/CodeGen/MachineScheduler.cpp +++ b/contrib/llvm/lib/CodeGen/MachineScheduler.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDFS.h" @@ -30,6 +31,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" #include using namespace llvm; @@ -51,10 +53,11 @@ static cl::opt MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// FIXME: remove this flag after initial testing. It should always be a good -// thing. -static cl::opt EnableCopyConstrain("misched-vcopy", cl::Hidden, - cl::desc("Constrain vreg copies."), cl::init(true)); +static cl::opt EnableRegPressure("misched-regpressure", cl::Hidden, + cl::desc("Enable register pressure scheduling."), cl::init(true)); + +static cl::opt EnableCyclicPath("misched-cyclicpath", cl::Hidden, + cl::desc("Enable cyclic critical path analysis."), cl::init(true)); static cl::opt EnableLoadCluster("misched-cluster", cl::Hidden, cl::desc("Enable load clustering."), cl::init(true)); @@ -69,6 +72,10 @@ static cl::opt VerifyScheduling("verify-misched", cl::Hidden, // DAG subtrees must have at least this many nodes. static const unsigned MinSubtreeSize = 8; +// Pin the vtables to this file. +void MachineSchedStrategy::anchor() {} +void ScheduleDAGMutation::anchor() {} + //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry //===----------------------------------------------------------------------===// @@ -98,6 +105,9 @@ public: virtual void print(raw_ostream &O, const Module* = 0) const; static char ID; // Class identification, replacement for typeinfo + +protected: + ScheduleDAGInstrs *createMachineScheduler(); }; } // namespace @@ -152,12 +162,13 @@ DefaultSchedRegistry("default", "Use the target's default scheduler choice.", /// Forward declare the standard machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C); +static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C); /// Decrement this iterator until reaching the top or a non-debug instr. -static MachineBasicBlock::iterator -priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { +static MachineBasicBlock::const_iterator +priorNonDebug(MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator Beg) { assert(I != Beg && "reached the top of the region, cannot decrement"); while (--I != Beg) { if (!I->isDebugValue()) @@ -166,10 +177,19 @@ priorNonDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator Beg) { return I; } +/// Non-const version. +static MachineBasicBlock::iterator +priorNonDebug(MachineBasicBlock::iterator I, + MachineBasicBlock::const_iterator Beg) { + return const_cast( + &*priorNonDebug(MachineBasicBlock::const_iterator(I), Beg)); +} + /// If this iterator is a debug value, increment until reaching the End or a /// non-debug instruction. -static MachineBasicBlock::iterator -nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { +static MachineBasicBlock::const_iterator +nextIfDebug(MachineBasicBlock::const_iterator I, + MachineBasicBlock::const_iterator End) { for(; I != End; ++I) { if (!I->isDebugValue()) break; @@ -177,6 +197,34 @@ nextIfDebug(MachineBasicBlock::iterator I, MachineBasicBlock::iterator End) { return I; } +/// Non-const version. +static MachineBasicBlock::iterator +nextIfDebug(MachineBasicBlock::iterator I, + MachineBasicBlock::const_iterator End) { + // Cast the return value to nonconst MachineInstr, then cast to an + // instr_iterator, which does not check for null, finally return a + // bundle_iterator. + return MachineBasicBlock::instr_iterator( + const_cast( + &*nextIfDebug(MachineBasicBlock::const_iterator(I), End))); +} + +/// Instantiate a ScheduleDAGInstrs that will be owned by the caller. +ScheduleDAGInstrs *MachineScheduler::createMachineScheduler() { + // Select the scheduler, or set the default. + MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; + if (Ctor != useDefaultMachineSched) + return Ctor(this); + + // Get the default scheduler set by the target for this function. + ScheduleDAGInstrs *Scheduler = PassConfig->createMachineScheduler(this); + if (Scheduler) + return Scheduler; + + // Default to GenericScheduler. + return createGenericSched(this); +} + /// Top-level MachineScheduler pass driver. /// /// Visit blocks in function order. Divide each block into scheduling regions @@ -207,23 +255,14 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); if (VerifyScheduling) { - DEBUG(LIS->print(dbgs())); + DEBUG(LIS->dump()); MF->verify(this, "Before machine scheduling."); } RegClassInfo->runOnMachineFunction(*MF); - // Select the scheduler, or set the default. - MachineSchedRegistry::ScheduleDAGCtor Ctor = MachineSchedOpt; - if (Ctor == useDefaultMachineSched) { - // Get the default scheduler set by the target. - Ctor = MachineSchedRegistry::getDefault(); - if (!Ctor) { - Ctor = createConvergingSched; - MachineSchedRegistry::setDefault(Ctor); - } - } - // Instantiate the selected scheduler. - OwningPtr Scheduler(Ctor(this)); + // Instantiate the selected scheduler for this target, function, and + // optimization level. + OwningPtr Scheduler(createMachineScheduler()); // Visit all machine basic blocks. // @@ -258,14 +297,15 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { // The next region starts above the previous region. Look backward in the // instruction stream until we find the nearest boundary. + unsigned NumRegionInstrs = 0; MachineBasicBlock::iterator I = RegionEnd; - for(;I != MBB->begin(); --I, --RemainingInstrs) { + for(;I != MBB->begin(); --I, --RemainingInstrs, ++NumRegionInstrs) { if (TII->isSchedulingBoundary(llvm::prior(I), MBB, *MF)) break; } // Notify the scheduler of the region, even if we may skip scheduling // it. Perhaps it still needs to be bundled. - Scheduler->enterRegion(MBB, I, RegionEnd, RemainingInstrs); + Scheduler->enterRegion(MBB, I, RegionEnd, NumRegionInstrs); // Skip empty scheduling regions (0 or 1 schedulable instructions). if (I == RegionEnd || I == llvm::prior(RegionEnd)) { @@ -280,7 +320,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; - dbgs() << " Remaining: " << RemainingInstrs << "\n"); + dbgs() << " RegionInstrs: " << NumRegionInstrs + << " Remaining: " << RemainingInstrs << "\n"); // Schedule a region: possibly reorder instructions. // This invalidates 'RegionEnd' and 'I'. @@ -297,7 +338,7 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { Scheduler->finishBlock(); } Scheduler->finalizeSchedule(); - DEBUG(LIS->print(dbgs())); + DEBUG(LIS->dump()); if (VerifyScheduling) MF->verify(this, "After machine scheduling."); return true; @@ -309,7 +350,7 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ReadyQueue::dump() { - dbgs() << " " << Name << ": "; + dbgs() << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; @@ -449,13 +490,19 @@ bool ScheduleDAGMI::checkSchedLimit() { void ScheduleDAGMI::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) + unsigned regioninstrs) { - ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); // For convenience remember the end of the liveness region. LiveRegionEnd = (RegionEnd == bb->end()) ? RegionEnd : llvm::next(RegionEnd); + + SUPressureDiffs.clear(); + + SchedImpl->initPolicy(begin, end, regioninstrs); + + ShouldTrackPressure = SchedImpl->shouldTrackPressure(); } // Setup the register pressure trackers for the top scheduled top and bottom @@ -467,7 +514,7 @@ void ScheduleDAGMI::initRegPressure() { // Close the RPTracker to finalize live ins. RPTracker.closeRegion(); - DEBUG(RPTracker.getPressure().dump(TRI)); + DEBUG(RPTracker.dump()); // Initialize the live ins and live outs. TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs); @@ -479,9 +526,23 @@ void ScheduleDAGMI::initRegPressure() { TopRPTracker.closeTop(); BotRPTracker.closeBottom(); + BotRPTracker.initLiveThru(RPTracker); + if (!BotRPTracker.getLiveThru().empty()) { + TopRPTracker.initLiveThru(BotRPTracker.getLiveThru()); + DEBUG(dbgs() << "Live Thru: "; + dumpRegSetPressure(BotRPTracker.getLiveThru(), TRI)); + }; + + // For each live out vreg reduce the pressure change associated with other + // uses of the same vreg below the live-out reaching def. + updatePressureDiffs(RPTracker.getPressure().LiveOutRegs); + // Account for liveness generated by the region boundary. - if (LiveRegionEnd != RegionEnd) - BotRPTracker.recede(); + if (LiveRegionEnd != RegionEnd) { + SmallVector LiveUses; + BotRPTracker.recede(&LiveUses); + updatePressureDiffs(LiveUses); + } assert(BotRPTracker.getPos() == RegionEnd && "Can't find the region bottom"); @@ -491,38 +552,88 @@ void ScheduleDAGMI::initRegPressure() { const std::vector &RegionPressure = RPTracker.getPressure().MaxSetPressure; for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { - unsigned Limit = TRI->getRegPressureSetLimit(i); - DEBUG(dbgs() << TRI->getRegPressureSetName(i) - << "Limit " << Limit - << " Actual " << RegionPressure[i] << "\n"); - if (RegionPressure[i] > Limit) - RegionCriticalPSets.push_back(PressureElement(i, 0)); + unsigned Limit = RegClassInfo->getRegPressureSetLimit(i); + if (RegionPressure[i] > Limit) { + DEBUG(dbgs() << TRI->getRegPressureSetName(i) + << " Limit " << Limit + << " Actual " << RegionPressure[i] << "\n"); + RegionCriticalPSets.push_back(PressureChange(i)); + } } DEBUG(dbgs() << "Excess PSets: "; for (unsigned i = 0, e = RegionCriticalPSets.size(); i != e; ++i) dbgs() << TRI->getRegPressureSetName( - RegionCriticalPSets[i].PSetID) << " "; + RegionCriticalPSets[i].getPSet()) << " "; dbgs() << "\n"); } -// FIXME: When the pressure tracker deals in pressure differences then we won't -// iterate over all RegionCriticalPSets[i]. void ScheduleDAGMI:: -updateScheduledPressure(const std::vector &NewMaxPressure) { - for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { - unsigned ID = RegionCriticalPSets[i].PSetID; - int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; - if ((int)NewMaxPressure[ID] > MaxUnits) - MaxUnits = NewMaxPressure[ID]; +updateScheduledPressure(const SUnit *SU, + const std::vector &NewMaxPressure) { + const PressureDiff &PDiff = getPressureDiff(SU); + unsigned CritIdx = 0, CritEnd = RegionCriticalPSets.size(); + for (PressureDiff::const_iterator I = PDiff.begin(), E = PDiff.end(); + I != E; ++I) { + if (!I->isValid()) + break; + unsigned ID = I->getPSet(); + while (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() < ID) + ++CritIdx; + if (CritIdx != CritEnd && RegionCriticalPSets[CritIdx].getPSet() == ID) { + if ((int)NewMaxPressure[ID] > RegionCriticalPSets[CritIdx].getUnitInc() + && NewMaxPressure[ID] <= INT16_MAX) + RegionCriticalPSets[CritIdx].setUnitInc(NewMaxPressure[ID]); + } + unsigned Limit = RegClassInfo->getRegPressureSetLimit(ID); + if (NewMaxPressure[ID] >= Limit - 2) { + DEBUG(dbgs() << " " << TRI->getRegPressureSetName(ID) << ": " + << NewMaxPressure[ID] << " > " << Limit << "(+ " + << BotRPTracker.getLiveThru()[ID] << " livethru)\n"); + } } - DEBUG( - for (unsigned i = 0, e = NewMaxPressure.size(); i < e; ++i) { - unsigned Limit = TRI->getRegPressureSetLimit(i); - if (NewMaxPressure[i] > Limit ) { - dbgs() << " " << TRI->getRegPressureSetName(i) << ": " - << NewMaxPressure[i] << " > " << Limit << "\n"; +} + +/// Update the PressureDiff array for liveness after scheduling this +/// instruction. +void ScheduleDAGMI::updatePressureDiffs(ArrayRef LiveUses) { + for (unsigned LUIdx = 0, LUEnd = LiveUses.size(); LUIdx != LUEnd; ++LUIdx) { + /// FIXME: Currently assuming single-use physregs. + unsigned Reg = LiveUses[LUIdx]; + DEBUG(dbgs() << " LiveReg: " << PrintVRegOrUnit(Reg, TRI) << "\n"); + if (!TRI->isVirtualRegister(Reg)) + continue; + + // This may be called before CurrentBottom has been initialized. However, + // BotRPTracker must have a valid position. We want the value live into the + // instruction or live out of the block, so ask for the previous + // instruction's live-out. + const LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI; + MachineBasicBlock::const_iterator I = + nextIfDebug(BotRPTracker.getPos(), BB->end()); + if (I == BB->end()) + VNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + else { + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(I)); + VNI = LRQ.valueIn(); + } + // RegisterPressureTracker guarantees that readsReg is true for LiveUses. + assert(VNI && "No live value at use."); + for (VReg2UseMap::iterator + UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { + SUnit *SU = UI->SU; + DEBUG(dbgs() << " UpdateRegP: SU(" << SU->NodeNum << ") " + << *SU->getInstr()); + // If this use comes before the reaching def, it cannot be a last use, so + // descrease its pressure change. + if (!SU->isScheduled && SU != &ExitSU) { + LiveQueryResult LRQ + = LI.Query(LIS->getInstructionIndex(SU->getInstr())); + if (LRQ.valueIn() == VNI) + getPressureDiff(SU).addPressureChange(Reg, true, &MRI); } - }); + } + } } /// schedule - Called back from MachineScheduler::runOnMachineFunction @@ -580,15 +691,23 @@ void ScheduleDAGMI::schedule() { /// Build the DAG and setup three register pressure trackers. void ScheduleDAGMI::buildDAGWithRegPressure() { + if (!ShouldTrackPressure) { + RPTracker.reset(); + RegionCriticalPSets.clear(); + buildSchedGraph(AA); + return; + } + // Initialize the register pressure tracker used by buildSchedGraph. - RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd); + RPTracker.init(&MF, RegClassInfo, LIS, BB, LiveRegionEnd, + /*TrackUntiedDefs=*/true); // Account for liveness generate by the region boundary. if (LiveRegionEnd != RegionEnd) RPTracker.recede(); // Build the DAG, and compute current register pressure. - buildSchedGraph(AA, &RPTracker); + buildSchedGraph(AA, &RPTracker, &SUPressureDiffs); // Initialize top/bottom trackers after computing region pressure. initRegPressure(); @@ -631,6 +750,91 @@ void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl &TopRoots, ExitSU.biasCriticalPath(); } +/// Compute the max cyclic critical path through the DAG. The scheduling DAG +/// only provides the critical path for single block loops. To handle loops that +/// span blocks, we could use the vreg path latencies provided by +/// MachineTraceMetrics instead. However, MachineTraceMetrics is not currently +/// available for use in the scheduler. +/// +/// The cyclic path estimation identifies a def-use pair that crosses the back +/// edge and considers the depth and height of the nodes. For example, consider +/// the following instruction sequence where each instruction has unit latency +/// and defines an epomymous virtual register: +/// +/// a->b(a,c)->c(b)->d(c)->exit +/// +/// The cyclic critical path is a two cycles: b->c->b +/// The acyclic critical path is four cycles: a->b->c->d->exit +/// LiveOutHeight = height(c) = len(c->d->exit) = 2 +/// LiveOutDepth = depth(c) + 1 = len(a->b->c) + 1 = 3 +/// LiveInHeight = height(b) + 1 = len(b->c->d->exit) + 1 = 4 +/// LiveInDepth = depth(b) = len(a->b) = 1 +/// +/// LiveOutDepth - LiveInDepth = 3 - 1 = 2 +/// LiveInHeight - LiveOutHeight = 4 - 2 = 2 +/// CyclicCriticalPath = min(2, 2) = 2 +unsigned ScheduleDAGMI::computeCyclicCriticalPath() { + // This only applies to single block loop. + if (!BB->isSuccessor(BB)) + return 0; + + unsigned MaxCyclicLatency = 0; + // Visit each live out vreg def to find def/use pairs that cross iterations. + ArrayRef LiveOuts = RPTracker.getPressure().LiveOutRegs; + for (ArrayRef::iterator RI = LiveOuts.begin(), RE = LiveOuts.end(); + RI != RE; ++RI) { + unsigned Reg = *RI; + if (!TRI->isVirtualRegister(Reg)) + continue; + const LiveInterval &LI = LIS->getInterval(Reg); + const VNInfo *DefVNI = LI.getVNInfoBefore(LIS->getMBBEndIdx(BB)); + if (!DefVNI) + continue; + + MachineInstr *DefMI = LIS->getInstructionFromIndex(DefVNI->def); + const SUnit *DefSU = getSUnit(DefMI); + if (!DefSU) + continue; + + unsigned LiveOutHeight = DefSU->getHeight(); + unsigned LiveOutDepth = DefSU->getDepth() + DefSU->Latency; + // Visit all local users of the vreg def. + for (VReg2UseMap::iterator + UI = VRegUses.find(Reg); UI != VRegUses.end(); ++UI) { + if (UI->SU == &ExitSU) + continue; + + // Only consider uses of the phi. + LiveQueryResult LRQ = + LI.Query(LIS->getInstructionIndex(UI->SU->getInstr())); + if (!LRQ.valueIn()->isPHIDef()) + continue; + + // Assume that a path spanning two iterations is a cycle, which could + // overestimate in strange cases. This allows cyclic latency to be + // estimated as the minimum slack of the vreg's depth or height. + unsigned CyclicLatency = 0; + if (LiveOutDepth > UI->SU->getDepth()) + CyclicLatency = LiveOutDepth - UI->SU->getDepth(); + + unsigned LiveInHeight = UI->SU->getHeight() + DefSU->Latency; + if (LiveInHeight > LiveOutHeight) { + if (LiveInHeight - LiveOutHeight < CyclicLatency) + CyclicLatency = LiveInHeight - LiveOutHeight; + } + else + CyclicLatency = 0; + + DEBUG(dbgs() << "Cyclic Path: SU(" << DefSU->NodeNum << ") -> SU(" + << UI->SU->NodeNum << ") = " << CyclicLatency << "c\n"); + if (CyclicLatency > MaxCyclicLatency) + MaxCyclicLatency = CyclicLatency; + } + } + DEBUG(dbgs() << "Cyclic Critical Path: " << MaxCyclicLatency << "c\n"); + return MaxCyclicLatency; +} + /// Identify DAG roots and setup scheduler queues. void ScheduleDAGMI::initQueues(ArrayRef TopRoots, ArrayRef BotRoots) { @@ -658,11 +862,13 @@ void ScheduleDAGMI::initQueues(ArrayRef TopRoots, SchedImpl->registerRoots(); // Advance past initial DebugValues. - assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); CurrentTop = nextIfDebug(RegionBegin, RegionEnd); - TopRPTracker.setPos(CurrentTop); - CurrentBottom = RegionEnd; + + if (ShouldTrackPressure) { + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); + TopRPTracker.setPos(CurrentTop); + } } /// Move an instruction and update register pressure. @@ -679,10 +885,12 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { TopRPTracker.setPos(MI); } - // Update top scheduled pressure. - TopRPTracker.advance(); - assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); - updateScheduledPressure(TopRPTracker.getPressure().MaxSetPressure); + if (ShouldTrackPressure) { + // Update top scheduled pressure. + TopRPTracker.advance(); + assert(TopRPTracker.getPos() == CurrentTop && "out of sync"); + updateScheduledPressure(SU, TopRPTracker.getPressure().MaxSetPressure); + } } else { assert(SU->isBottomReady() && "node still has unscheduled dependencies"); @@ -698,10 +906,14 @@ void ScheduleDAGMI::scheduleMI(SUnit *SU, bool IsTopNode) { moveInstruction(MI, CurrentBottom); CurrentBottom = MI; } - // Update bottom scheduled pressure. - BotRPTracker.recede(); - assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); - updateScheduledPressure(BotRPTracker.getPressure().MaxSetPressure); + if (ShouldTrackPressure) { + // Update bottom scheduled pressure. + SmallVector LiveUses; + BotRPTracker.recede(&LiveUses); + assert(BotRPTracker.getPos() == CurrentBottom && "out of sync"); + updateScheduledPressure(SU, BotRPTracker.getPressure().MaxSetPressure); + updatePressureDiffs(LiveUses); + } } } @@ -1019,6 +1231,12 @@ void CopyConstrain::constrainLocalCopy(SUnit *CopySU, ScheduleDAGMI *DAG) { GlobalSegment->start)) { return; } + // If the prior global segment may be defined by the same two-address + // instruction that also defines LocalLI, then can't make a hole here. + if (SlotIndex::isSameInstr(llvm::prior(GlobalSegment)->start, + LocalLI->beginIndex())) { + return; + } // If GlobalLI has a prior segment, it must be live into the EBB. Otherwise // it would be a disconnected component in the live range. assert(llvm::prior(GlobalSegment)->start < LocalLI->beginIndex() && @@ -1101,24 +1319,23 @@ void CopyConstrain::apply(ScheduleDAGMI *DAG) { } //===----------------------------------------------------------------------===// -// ConvergingScheduler - Implementation of the standard MachineSchedStrategy. +// GenericScheduler - Implementation of the generic MachineSchedStrategy. //===----------------------------------------------------------------------===// namespace { -/// ConvergingScheduler shrinks the unscheduled zone using heuristics to balance +/// GenericScheduler shrinks the unscheduled zone using heuristics to balance /// the schedule. -class ConvergingScheduler : public MachineSchedStrategy { +class GenericScheduler : public MachineSchedStrategy { public: /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason { - NoCand, PhysRegCopy, SingleExcess, SingleCritical, Cluster, Weak, + NoCand, PhysRegCopy, RegExcess, RegCritical, Cluster, Weak, RegMax, ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, - TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, - NodeOrder}; + TopDepthReduce, TopPathReduce, NextDefUse, NodeOrder}; #ifndef NDEBUG - static const char *getReasonStr(ConvergingScheduler::CandReason Reason); + static const char *getReasonStr(GenericScheduler::CandReason Reason); #endif /// Policy for scheduling the next instruction in the candidate's zone. @@ -1149,7 +1366,7 @@ public: } }; - /// Store the state used by ConvergingScheduler heuristics, required for the + /// Store the state used by GenericScheduler heuristics, required for the /// lifetime of one invocation of pickNode(). struct SchedCandidate { CandPolicy Policy; @@ -1160,6 +1377,9 @@ public: // The reason for this candidate. CandReason Reason; + // Set of reasons that apply to multiple candidates. + uint32_t RepeatReasonSet; + // Register pressure values for the best candidate. RegPressureDelta RPDelta; @@ -1167,7 +1387,7 @@ public: SchedResourceDelta ResDelta; SchedCandidate(const CandPolicy &policy) - : Policy(policy), SU(NULL), Reason(NoCand) {} + : Policy(policy), SU(NULL), Reason(NoCand), RepeatReasonSet(0) {} bool isValid() const { return SU; } @@ -1180,6 +1400,9 @@ public: ResDelta = Best.ResDelta; } + bool isRepeat(CandReason R) { return RepeatReasonSet & (1 << R); } + void setRepeat(CandReason R) { RepeatReasonSet |= (1 << R); } + void initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); }; @@ -1188,33 +1411,27 @@ public: struct SchedRemainder { // Critical path through the DAG in expected latency. unsigned CriticalPath; + unsigned CyclicCritPath; + + // Scaled count of micro-ops left to schedule. + unsigned RemIssueCount; + + bool IsAcyclicLatencyLimited; // Unscheduled resources SmallVector RemainingCounts; - // Critical resource for the unscheduled zone. - unsigned CritResIdx; - // Number of micro-ops left to schedule. - unsigned RemainingMicroOps; void reset() { CriticalPath = 0; + CyclicCritPath = 0; + RemIssueCount = 0; + IsAcyclicLatencyLimited = false; RemainingCounts.clear(); - CritResIdx = 0; - RemainingMicroOps = 0; } SchedRemainder() { reset(); } void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); - - unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const { - if (!SchedModel->hasInstrSchedModel()) - return 0; - - return std::max( - RemainingMicroOps * SchedModel->getMicroOpFactor(), - RemainingCounts[CritResIdx]); - } }; /// Each Scheduling boundary is associated with ready queues. It tracks the @@ -1235,8 +1452,13 @@ public: ScheduleHazardRecognizer *HazardRec; + /// Number of cycles it takes to issue the instructions scheduled in this + /// zone. It is defined as: scheduled-micro-ops / issue-width + stalls. + /// See getStalls(). unsigned CurrCycle; - unsigned IssueCount; + + /// Micro-ops issued in the current cycle + unsigned CurrMOps; /// MinReadyCycle - Cycle of the soonest available instruction. unsigned MinReadyCycle; @@ -1244,52 +1466,71 @@ public: // The expected latency of the critical path in this scheduled zone. unsigned ExpectedLatency; - // Resources used in the scheduled zone beyond this boundary. - SmallVector ResourceCounts; + // The latency of dependence chains leading into this zone. + // For each node scheduled bottom-up: DLat = max DLat, N.Depth. + // For each cycle scheduled: DLat -= 1. + unsigned DependentLatency; + + /// Count the scheduled (issued) micro-ops that can be retired by + /// time=CurrCycle assuming the first scheduled instr is retired at time=0. + unsigned RetiredMOps; + + // Count scheduled resources that have been executed. Resources are + // considered executed if they become ready in the time that it takes to + // saturate any resource including the one in question. Counts are scaled + // for direct comparison with other resources. Counts can be compared with + // MOps * getMicroOpFactor and Latency * getLatencyFactor. + SmallVector ExecutedResCounts; + + /// Cache the max count for a single resource. + unsigned MaxExecutedResCount; // Cache the critical resources ID in this scheduled zone. - unsigned CritResIdx; + unsigned ZoneCritResIdx; // Is the scheduled region resource limited vs. latency limited. bool IsResourceLimited; - unsigned ExpectedCount; - #ifndef NDEBUG - // Remember the greatest min operand latency. - unsigned MaxMinLatency; + // Remember the greatest operand latency as an upper bound on the number of + // times we should retry the pending queue because of a hazard. + unsigned MaxObservedLatency; #endif void reset() { // A new HazardRec is created for each DAG and owned by SchedBoundary. - delete HazardRec; - + // Destroying and reconstructing it is very expensive though. So keep + // invalid, placeholder HazardRecs. + if (HazardRec && HazardRec->isEnabled()) { + delete HazardRec; + HazardRec = 0; + } Available.clear(); Pending.clear(); CheckPending = false; NextSUs.clear(); - HazardRec = 0; CurrCycle = 0; - IssueCount = 0; + CurrMOps = 0; MinReadyCycle = UINT_MAX; ExpectedLatency = 0; - ResourceCounts.resize(1); - assert(!ResourceCounts[0] && "nonzero count for bad resource"); - CritResIdx = 0; + DependentLatency = 0; + RetiredMOps = 0; + MaxExecutedResCount = 0; + ZoneCritResIdx = 0; IsResourceLimited = false; - ExpectedCount = 0; #ifndef NDEBUG - MaxMinLatency = 0; + MaxObservedLatency = 0; #endif // Reserve a zero-count for invalid CritResIdx. - ResourceCounts.resize(1); + ExecutedResCounts.resize(1); + assert(!ExecutedResCounts[0] && "nonzero count for bad resource"); } /// Pending queues extend the ready queues with the same ID and the /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), - Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), + Pending(ID << GenericScheduler::LogMaxQID, Name+".P"), HazardRec(0) { reset(); } @@ -1300,28 +1541,63 @@ public: SchedRemainder *rem); bool isTop() const { - return Available.getID() == ConvergingScheduler::TopQID; + return Available.getID() == GenericScheduler::TopQID; + } + +#ifndef NDEBUG + const char *getResourceName(unsigned PIdx) { + if (!PIdx) + return "MOps"; + return SchedModel->getProcResource(PIdx)->Name; + } +#endif + + /// Get the number of latency cycles "covered" by the scheduled + /// instructions. This is the larger of the critical path within the zone + /// and the number of cycles required to issue the instructions. + unsigned getScheduledLatency() const { + return std::max(ExpectedLatency, CurrCycle); } unsigned getUnscheduledLatency(SUnit *SU) const { - if (isTop()) - return SU->getHeight(); - return SU->getDepth() + SU->Latency; + return isTop() ? SU->getHeight() : SU->getDepth(); + } + + unsigned getResourceCount(unsigned ResIdx) const { + return ExecutedResCounts[ResIdx]; } + /// Get the scaled count of scheduled micro-ops and resources, including + /// executed resources. unsigned getCriticalCount() const { - return ResourceCounts[CritResIdx]; + if (!ZoneCritResIdx) + return RetiredMOps * SchedModel->getMicroOpFactor(); + return getResourceCount(ZoneCritResIdx); + } + + /// Get a scaled count for the minimum execution time of the scheduled + /// micro-ops that are ready to execute by getExecutedCount. Notice the + /// feedback loop. + unsigned getExecutedCount() const { + return std::max(CurrCycle * SchedModel->getLatencyFactor(), + MaxExecutedResCount); } bool checkHazard(SUnit *SU); - void setLatencyPolicy(CandPolicy &Policy); + unsigned findMaxLatency(ArrayRef ReadySUs); + + unsigned getOtherResourceCount(unsigned &OtherCritIdx); + + void setPolicy(CandPolicy &Policy, SchedBoundary &OtherZone); void releaseNode(SUnit *SU, unsigned ReadyCycle); - void bumpCycle(); + void bumpCycle(unsigned NextCycle); - void countResource(unsigned PIdx, unsigned Cycles); + void incExecutedResources(unsigned PIdx, unsigned Count); + + unsigned countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle); void bumpNode(SUnit *SU); @@ -1330,9 +1606,14 @@ public: void removeReady(SUnit *SU); SUnit *pickOnlyChoice(); + +#ifndef NDEBUG + void dumpScheduledState(); +#endif }; private: + const MachineSchedContext *Context; ScheduleDAGMI *DAG; const TargetSchedModel *SchedModel; const TargetRegisterInfo *TRI; @@ -1342,6 +1623,7 @@ private: SchedBoundary Top; SchedBoundary Bot; + MachineSchedPolicy RegionPolicy; public: /// SUnit::NodeQueueId: 0 (none), 1 (top), 2 (bot), 3 (both) enum { @@ -1350,8 +1632,15 @@ public: LogMaxQID = 2 }; - ConvergingScheduler(): - DAG(0), SchedModel(0), TRI(0), Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + GenericScheduler(const MachineSchedContext *C): + Context(C), DAG(0), SchedModel(0), TRI(0), + Top(TopQID, "TopQ"), Bot(BotQID, "BotQ") {} + + virtual void initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs); + + bool shouldTrackPressure() const { return RegionPolicy.ShouldTrackPressure; } virtual void initialize(ScheduleDAGMI *dag); @@ -1366,14 +1655,7 @@ public: virtual void registerRoots(); protected: - void balanceZones( - ConvergingScheduler::SchedBoundary &CriticalZone, - ConvergingScheduler::SchedCandidate &CriticalCand, - ConvergingScheduler::SchedBoundary &OppositeZone, - ConvergingScheduler::SchedCandidate &OppositeCand); - - void checkResourceLimits(ConvergingScheduler::SchedCandidate &TopCand, - ConvergingScheduler::SchedCandidate &BotCand); + void checkAcyclicLatency(); void tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, @@ -1395,7 +1677,7 @@ protected: }; } // namespace -void ConvergingScheduler::SchedRemainder:: +void GenericScheduler::SchedRemainder:: init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { reset(); if (!SchedModel->hasInstrSchedModel()) @@ -1404,7 +1686,8 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { for (std::vector::iterator I = DAG->SUnits.begin(), E = DAG->SUnits.end(); I != E; ++I) { const MCSchedClassDesc *SC = DAG->getSchedClass(&*I); - RemainingMicroOps += SchedModel->getNumMicroOps(I->getInstr(), SC); + RemIssueCount += SchedModel->getNumMicroOps(I->getInstr(), SC) + * SchedModel->getMicroOpFactor(); for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { @@ -1413,26 +1696,61 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { RemainingCounts[PIdx] += (Factor * PI->Cycles); } } - for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds(); - PIdx != PEnd; ++PIdx) { - if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx]) - >= (int)SchedModel->getLatencyFactor()) { - CritResIdx = PIdx; - } - } } -void ConvergingScheduler::SchedBoundary:: +void GenericScheduler::SchedBoundary:: init(ScheduleDAGMI *dag, const TargetSchedModel *smodel, SchedRemainder *rem) { reset(); DAG = dag; SchedModel = smodel; Rem = rem; if (SchedModel->hasInstrSchedModel()) - ResourceCounts.resize(SchedModel->getNumProcResourceKinds()); + ExecutedResCounts.resize(SchedModel->getNumProcResourceKinds()); +} + +/// Initialize the per-region scheduling policy. +void GenericScheduler::initPolicy(MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + unsigned NumRegionInstrs) { + const TargetMachine &TM = Context->MF->getTarget(); + + // Avoid setting up the register pressure tracker for small regions to save + // compile time. As a rough heuristic, only track pressure when the number of + // schedulable instructions exceeds half the integer register file. + unsigned NIntRegs = Context->RegClassInfo->getNumAllocatableRegs( + TM.getTargetLowering()->getRegClassFor(MVT::i32)); + + RegionPolicy.ShouldTrackPressure = NumRegionInstrs > (NIntRegs / 2); + + // For generic targets, we default to bottom-up, because it's simpler and more + // compile-time optimizations have been implemented in that direction. + RegionPolicy.OnlyBottomUp = true; + + // Allow the subtarget to override default policy. + const TargetSubtargetInfo &ST = TM.getSubtarget(); + ST.overrideSchedPolicy(RegionPolicy, Begin, End, NumRegionInstrs); + + // After subtarget overrides, apply command line options. + if (!EnableRegPressure) + RegionPolicy.ShouldTrackPressure = false; + + // Check -misched-topdown/bottomup can force or unforce scheduling direction. + // e.g. -misched-bottomup=false allows scheduling in both directions. + assert((!ForceTopDown || !ForceBottomUp) && + "-misched-topdown incompatible with -misched-bottomup"); + if (ForceBottomUp.getNumOccurrences() > 0) { + RegionPolicy.OnlyBottomUp = ForceBottomUp; + if (RegionPolicy.OnlyBottomUp) + RegionPolicy.OnlyTopDown = false; + } + if (ForceTopDown.getNumOccurrences() > 0) { + RegionPolicy.OnlyTopDown = ForceTopDown; + if (RegionPolicy.OnlyTopDown) + RegionPolicy.OnlyBottomUp = false; + } } -void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { +void GenericScheduler::initialize(ScheduleDAGMI *dag) { DAG = dag; SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; @@ -1447,31 +1765,36 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { // are disabled, then these HazardRecs will be disabled. const InstrItineraryData *Itin = SchedModel->getInstrItineraries(); const TargetMachine &TM = DAG->MF.getTarget(); - Top.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - Bot.HazardRec = TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); - - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); + if (!Top.HazardRec) { + Top.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } + if (!Bot.HazardRec) { + Bot.HazardRec = + TM.getInstrInfo()->CreateTargetMIHazardRecognizer(Itin, DAG); + } } -void ConvergingScheduler::releaseTopNode(SUnit *SU) { +void GenericScheduler::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { + if (I->isWeak()) + continue; unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; - unsigned MinLatency = I->getMinLatency(); + unsigned Latency = I->getLatency(); #ifndef NDEBUG - Top.MaxMinLatency = std::max(MinLatency, Top.MaxMinLatency); + Top.MaxObservedLatency = std::max(Latency, Top.MaxObservedLatency); #endif - if (SU->TopReadyCycle < PredReadyCycle + MinLatency) - SU->TopReadyCycle = PredReadyCycle + MinLatency; + if (SU->TopReadyCycle < PredReadyCycle + Latency) + SU->TopReadyCycle = PredReadyCycle + Latency; } Top.releaseNode(SU, SU->TopReadyCycle); } -void ConvergingScheduler::releaseBottomNode(SUnit *SU) { +void GenericScheduler::releaseBottomNode(SUnit *SU) { if (SU->isScheduled) return; @@ -1482,18 +1805,56 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { if (I->isWeak()) continue; unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; - unsigned MinLatency = I->getMinLatency(); + unsigned Latency = I->getLatency(); #ifndef NDEBUG - Bot.MaxMinLatency = std::max(MinLatency, Bot.MaxMinLatency); + Bot.MaxObservedLatency = std::max(Latency, Bot.MaxObservedLatency); #endif - if (SU->BotReadyCycle < SuccReadyCycle + MinLatency) - SU->BotReadyCycle = SuccReadyCycle + MinLatency; + if (SU->BotReadyCycle < SuccReadyCycle + Latency) + SU->BotReadyCycle = SuccReadyCycle + Latency; } Bot.releaseNode(SU, SU->BotReadyCycle); } -void ConvergingScheduler::registerRoots() { +/// Set IsAcyclicLatencyLimited if the acyclic path is longer than the cyclic +/// critical path by more cycles than it takes to drain the instruction buffer. +/// We estimate an upper bounds on in-flight instructions as: +/// +/// CyclesPerIteration = max( CyclicPath, Loop-Resource-Height ) +/// InFlightIterations = AcyclicPath / CyclesPerIteration +/// InFlightResources = InFlightIterations * LoopResources +/// +/// TODO: Check execution resources in addition to IssueCount. +void GenericScheduler::checkAcyclicLatency() { + if (Rem.CyclicCritPath == 0 || Rem.CyclicCritPath >= Rem.CriticalPath) + return; + + // Scaled number of cycles per loop iteration. + unsigned IterCount = + std::max(Rem.CyclicCritPath * SchedModel->getLatencyFactor(), + Rem.RemIssueCount); + // Scaled acyclic critical path. + unsigned AcyclicCount = Rem.CriticalPath * SchedModel->getLatencyFactor(); + // InFlightCount = (AcyclicPath / IterCycles) * InstrPerLoop + unsigned InFlightCount = + (AcyclicCount * Rem.RemIssueCount + IterCount-1) / IterCount; + unsigned BufferLimit = + SchedModel->getMicroOpBufferSize() * SchedModel->getMicroOpFactor(); + + Rem.IsAcyclicLatencyLimited = InFlightCount > BufferLimit; + + DEBUG(dbgs() << "IssueCycles=" + << Rem.RemIssueCount / SchedModel->getLatencyFactor() << "c " + << "IterCycles=" << IterCount / SchedModel->getLatencyFactor() + << "c NumIters=" << (AcyclicCount + IterCount-1) / IterCount + << " InFlight=" << InFlightCount / SchedModel->getMicroOpFactor() + << "m BufferLim=" << SchedModel->getMicroOpBufferSize() << "m\n"; + if (Rem.IsAcyclicLatencyLimited) + dbgs() << " ACYCLIC LATENCY LIMIT\n"); +} + +void GenericScheduler::registerRoots() { Rem.CriticalPath = DAG->ExitSU.getDepth(); + // Some roots may not feed into ExitSU. Check all of them in case. for (std::vector::const_iterator I = Bot.Available.begin(), E = Bot.Available.end(); I != E; ++I) { @@ -1501,6 +1862,11 @@ void ConvergingScheduler::registerRoots() { Rem.CriticalPath = (*I)->getDepth(); } DEBUG(dbgs() << "Critical Path: " << Rem.CriticalPath << '\n'); + + if (EnableCyclicPath) { + Rem.CyclicCritPath = DAG->computeCyclicCriticalPath(); + checkAcyclicLatency(); + } } /// Does this SU have a hazard within the current instruction group. @@ -1516,12 +1882,12 @@ void ConvergingScheduler::registerRoots() { /// can dispatch per cycle. /// /// TODO: Also check whether the SU must start a new group. -bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { +bool GenericScheduler::SchedBoundary::checkHazard(SUnit *SU) { if (HazardRec->isEnabled()) return HazardRec->getHazardType(SU) != ScheduleHazardRecognizer::NoHazard; unsigned uops = SchedModel->getNumMicroOps(SU->getInstr()); - if ((IssueCount > 0) && (IssueCount + uops > SchedModel->getIssueWidth())) { + if ((CurrMOps > 0) && (CurrMOps + uops > SchedModel->getIssueWidth())) { DEBUG(dbgs() << " SU(" << SU->NodeNum << ") uops=" << SchedModel->getNumMicroOps(SU->getInstr()) << '\n'); return true; @@ -1529,45 +1895,125 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { return false; } -/// Compute the remaining latency to determine whether ILP should be increased. -void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { - // FIXME: compile time. In all, we visit four queues here one we should only - // need to visit the one that was last popped if we cache the result. +// Find the unscheduled node in ReadySUs with the highest latency. +unsigned GenericScheduler::SchedBoundary:: +findMaxLatency(ArrayRef ReadySUs) { + SUnit *LateSU = 0; unsigned RemLatency = 0; - for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); + for (ArrayRef::iterator I = ReadySUs.begin(), E = ReadySUs.end(); I != E; ++I) { unsigned L = getUnscheduledLatency(*I); - DEBUG(dbgs() << " " << Available.getName() - << " RemLatency SU(" << (*I)->NodeNum << ") " << L << '\n'); - if (L > RemLatency) + if (L > RemLatency) { RemLatency = L; + LateSU = *I; + } } - for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end(); - I != E; ++I) { - unsigned L = getUnscheduledLatency(*I); - if (L > RemLatency) - RemLatency = L; + if (LateSU) { + DEBUG(dbgs() << Available.getName() << " RemLatency SU(" + << LateSU->NodeNum << ") " << RemLatency << "c\n"); } - unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); - DEBUG(dbgs() << " " << Available.getName() - << " ExpectedLatency " << ExpectedLatency - << " CP Limit " << CriticalPathLimit << '\n'); - if (RemLatency + ExpectedLatency >= CriticalPathLimit - && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { - Policy.ReduceLatency = true; - DEBUG(dbgs() << " Increase ILP: " << Available.getName() << '\n'); + return RemLatency; +} + +// Count resources in this zone and the remaining unscheduled +// instruction. Return the max count, scaled. Set OtherCritIdx to the critical +// resource index, or zero if the zone is issue limited. +unsigned GenericScheduler::SchedBoundary:: +getOtherResourceCount(unsigned &OtherCritIdx) { + OtherCritIdx = 0; + if (!SchedModel->hasInstrSchedModel()) + return 0; + + unsigned OtherCritCount = Rem->RemIssueCount + + (RetiredMOps * SchedModel->getMicroOpFactor()); + DEBUG(dbgs() << " " << Available.getName() << " + Remain MOps: " + << OtherCritCount / SchedModel->getMicroOpFactor() << '\n'); + for (unsigned PIdx = 1, PEnd = SchedModel->getNumProcResourceKinds(); + PIdx != PEnd; ++PIdx) { + unsigned OtherCount = getResourceCount(PIdx) + Rem->RemainingCounts[PIdx]; + if (OtherCount > OtherCritCount) { + OtherCritCount = OtherCount; + OtherCritIdx = PIdx; + } + } + if (OtherCritIdx) { + DEBUG(dbgs() << " " << Available.getName() << " + Remain CritRes: " + << OtherCritCount / SchedModel->getResourceFactor(OtherCritIdx) + << " " << getResourceName(OtherCritIdx) << "\n"); } + return OtherCritCount; } -void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, - unsigned ReadyCycle) { +/// Set the CandPolicy for this zone given the current resources and latencies +/// inside and outside the zone. +void GenericScheduler::SchedBoundary::setPolicy(CandPolicy &Policy, + SchedBoundary &OtherZone) { + // Now that potential stalls have been considered, apply preemptive heuristics + // based on the the total latency and resources inside and outside this + // zone. + + // Compute remaining latency. We need this both to determine whether the + // overall schedule has become latency-limited and whether the instructions + // outside this zone are resource or latency limited. + // + // The "dependent" latency is updated incrementally during scheduling as the + // max height/depth of scheduled nodes minus the cycles since it was + // scheduled: + // DLat = max (N.depth - (CurrCycle - N.ReadyCycle) for N in Zone + // + // The "independent" latency is the max ready queue depth: + // ILat = max N.depth for N in Available|Pending + // + // RemainingLatency is the greater of independent and dependent latency. + unsigned RemLatency = DependentLatency; + RemLatency = std::max(RemLatency, findMaxLatency(Available.elements())); + RemLatency = std::max(RemLatency, findMaxLatency(Pending.elements())); + + // Compute the critical resource outside the zone. + unsigned OtherCritIdx; + unsigned OtherCount = OtherZone.getOtherResourceCount(OtherCritIdx); + + bool OtherResLimited = false; + if (SchedModel->hasInstrSchedModel()) { + unsigned LFactor = SchedModel->getLatencyFactor(); + OtherResLimited = (int)(OtherCount - (RemLatency * LFactor)) > (int)LFactor; + } + if (!OtherResLimited && (RemLatency + CurrCycle > Rem->CriticalPath)) { + Policy.ReduceLatency |= true; + DEBUG(dbgs() << " " << Available.getName() << " RemainingLatency " + << RemLatency << " + " << CurrCycle << "c > CritPath " + << Rem->CriticalPath << "\n"); + } + // If the same resource is limiting inside and outside the zone, do nothing. + if (ZoneCritResIdx == OtherCritIdx) + return; + DEBUG( + if (IsResourceLimited) { + dbgs() << " " << Available.getName() << " ResourceLimited: " + << getResourceName(ZoneCritResIdx) << "\n"; + } + if (OtherResLimited) + dbgs() << " RemainingLimit: " << getResourceName(OtherCritIdx) << "\n"; + if (!IsResourceLimited && !OtherResLimited) + dbgs() << " Latency limited both directions.\n"); + + if (IsResourceLimited && !Policy.ReduceResIdx) + Policy.ReduceResIdx = ZoneCritResIdx; + + if (OtherResLimited) + Policy.DemandResIdx = OtherCritIdx; +} + +void GenericScheduler::SchedBoundary::releaseNode(SUnit *SU, + unsigned ReadyCycle) { if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; // Check for interlocks first. For the purpose of other heuristics, an // instruction that cannot issue appears as if it's not in the ReadyQueue. - if (ReadyCycle > CurrCycle || checkHazard(SU)) + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; + if ((!IsBuffered && ReadyCycle > CurrCycle) || checkHazard(SU)) Pending.push(SU); else Available.push(SU); @@ -1577,16 +2023,21 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, } /// Move the boundary of scheduled code by one cycle. -void ConvergingScheduler::SchedBoundary::bumpCycle() { - unsigned Width = SchedModel->getIssueWidth(); - IssueCount = (IssueCount <= Width) ? 0 : IssueCount - Width; - - unsigned NextCycle = CurrCycle + 1; - assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); - if (MinReadyCycle > NextCycle) { - IssueCount = 0; - NextCycle = MinReadyCycle; - } +void GenericScheduler::SchedBoundary::bumpCycle(unsigned NextCycle) { + if (SchedModel->getMicroOpBufferSize() == 0) { + assert(MinReadyCycle < UINT_MAX && "MinReadyCycle uninitialized"); + if (MinReadyCycle > NextCycle) + NextCycle = MinReadyCycle; + } + // Update the current micro-ops, which will issue in the next cycle. + unsigned DecMOps = SchedModel->getIssueWidth() * (NextCycle - CurrCycle); + CurrMOps = (CurrMOps <= DecMOps) ? 0 : CurrMOps - DecMOps; + + // Decrement DependentLatency based on the next cycle. + if ((NextCycle - CurrCycle) > DependentLatency) + DependentLatency = 0; + else + DependentLatency -= (NextCycle - CurrCycle); if (!HazardRec->isEnabled()) { // Bypass HazardRec virtual calls. @@ -1602,38 +2053,54 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() { } } CheckPending = true; - IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; + + DEBUG(dbgs() << "Cycle: " << CurrCycle << ' ' << Available.getName() << '\n'); +} - DEBUG(dbgs() << " " << Available.getName() - << " Cycle: " << CurrCycle << '\n'); +void GenericScheduler::SchedBoundary::incExecutedResources(unsigned PIdx, + unsigned Count) { + ExecutedResCounts[PIdx] += Count; + if (ExecutedResCounts[PIdx] > MaxExecutedResCount) + MaxExecutedResCount = ExecutedResCounts[PIdx]; } /// Add the given processor resource to this scheduled zone. -void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, - unsigned Cycles) { +/// +/// \param Cycles indicates the number of consecutive (non-pipelined) cycles +/// during which this resource is consumed. +/// +/// \return the next cycle at which the instruction may execute without +/// oversubscribing resources. +unsigned GenericScheduler::SchedBoundary:: +countResource(unsigned PIdx, unsigned Cycles, unsigned ReadyCycle) { unsigned Factor = SchedModel->getResourceFactor(PIdx); - DEBUG(dbgs() << " " << SchedModel->getProcResource(PIdx)->Name - << " +(" << Cycles << "x" << Factor - << ") / " << SchedModel->getLatencyFactor() << '\n'); - unsigned Count = Factor * Cycles; - ResourceCounts[PIdx] += Count; + DEBUG(dbgs() << " " << getResourceName(PIdx) + << " +" << Cycles << "x" << Factor << "u\n"); + + // Update Executed resources counts. + incExecutedResources(PIdx, Count); assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); Rem->RemainingCounts[PIdx] -= Count; - // Check if this resource exceeds the current critical resource by a full - // cycle. If so, it becomes the critical resource. - if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) - >= (int)SchedModel->getLatencyFactor()) { - CritResIdx = PIdx; + // Check if this resource exceeds the current critical resource. If so, it + // becomes the critical resource. + if (ZoneCritResIdx != PIdx && (getResourceCount(PIdx) > getCriticalCount())) { + ZoneCritResIdx = PIdx; DEBUG(dbgs() << " *** Critical resource " - << SchedModel->getProcResource(PIdx)->Name << " x" - << ResourceCounts[PIdx] << '\n'); + << getResourceName(PIdx) << ": " + << getResourceCount(PIdx) / SchedModel->getLatencyFactor() << "c\n"); } + // TODO: We don't yet model reserved resources. It's not hard though. + return CurrCycle; } /// Move the boundary of scheduled code by one SUnit. -void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { +void GenericScheduler::SchedBoundary::bumpNode(SUnit *SU) { // Update the reservation table. if (HazardRec->isEnabled()) { if (!isTop() && SU->isCall) { @@ -1643,51 +2110,108 @@ void ConvergingScheduler::SchedBoundary::bumpNode(SUnit *SU) { } HazardRec->EmitInstruction(SU); } + const MCSchedClassDesc *SC = DAG->getSchedClass(SU); + unsigned IncMOps = SchedModel->getNumMicroOps(SU->getInstr()); + CurrMOps += IncMOps; + // checkHazard prevents scheduling multiple instructions per cycle that exceed + // issue width. However, we commonly reach the maximum. In this case + // opportunistically bump the cycle to avoid uselessly checking everything in + // the readyQ. Furthermore, a single instruction may produce more than one + // cycle's worth of micro-ops. + // + // TODO: Also check if this SU must end a dispatch group. + unsigned NextCycle = CurrCycle; + if (CurrMOps >= SchedModel->getIssueWidth()) { + ++NextCycle; + DEBUG(dbgs() << " *** Max MOps " << CurrMOps + << " at cycle " << CurrCycle << '\n'); + } + unsigned ReadyCycle = (isTop() ? SU->TopReadyCycle : SU->BotReadyCycle); + DEBUG(dbgs() << " Ready @" << ReadyCycle << "c\n"); + + switch (SchedModel->getMicroOpBufferSize()) { + case 0: + assert(ReadyCycle <= CurrCycle && "Broken PendingQueue"); + break; + case 1: + if (ReadyCycle > NextCycle) { + NextCycle = ReadyCycle; + DEBUG(dbgs() << " *** Stall until: " << ReadyCycle << "\n"); + } + break; + default: + // We don't currently model the OOO reorder buffer, so consider all + // scheduled MOps to be "retired". + break; + } + RetiredMOps += IncMOps; + // Update resource counts and critical resource. if (SchedModel->hasInstrSchedModel()) { - const MCSchedClassDesc *SC = DAG->getSchedClass(SU); - Rem->RemainingMicroOps -= SchedModel->getNumMicroOps(SU->getInstr(), SC); + unsigned DecRemIssue = IncMOps * SchedModel->getMicroOpFactor(); + assert(Rem->RemIssueCount >= DecRemIssue && "MOps double counted"); + Rem->RemIssueCount -= DecRemIssue; + if (ZoneCritResIdx) { + // Scale scheduled micro-ops for comparing with the critical resource. + unsigned ScaledMOps = + RetiredMOps * SchedModel->getMicroOpFactor(); + + // If scaled micro-ops are now more than the previous critical resource by + // a full cycle, then micro-ops issue becomes critical. + if ((int)(ScaledMOps - getResourceCount(ZoneCritResIdx)) + >= (int)SchedModel->getLatencyFactor()) { + ZoneCritResIdx = 0; + DEBUG(dbgs() << " *** Critical resource NumMicroOps: " + << ScaledMOps / SchedModel->getLatencyFactor() << "c\n"); + } + } for (TargetSchedModel::ProcResIter PI = SchedModel->getWriteProcResBegin(SC), PE = SchedModel->getWriteProcResEnd(SC); PI != PE; ++PI) { - countResource(PI->ProcResourceIdx, PI->Cycles); + unsigned RCycle = + countResource(PI->ProcResourceIdx, PI->Cycles, ReadyCycle); + if (RCycle > NextCycle) + NextCycle = RCycle; } } - if (isTop()) { - if (SU->getDepth() > ExpectedLatency) - ExpectedLatency = SU->getDepth(); + // Update ExpectedLatency and DependentLatency. + unsigned &TopLatency = isTop() ? ExpectedLatency : DependentLatency; + unsigned &BotLatency = isTop() ? DependentLatency : ExpectedLatency; + if (SU->getDepth() > TopLatency) { + TopLatency = SU->getDepth(); + DEBUG(dbgs() << " " << Available.getName() + << " TopLatency SU(" << SU->NodeNum << ") " << TopLatency << "c\n"); } - else { - if (SU->getHeight() > ExpectedLatency) - ExpectedLatency = SU->getHeight(); + if (SU->getHeight() > BotLatency) { + BotLatency = SU->getHeight(); + DEBUG(dbgs() << " " << Available.getName() + << " BotLatency SU(" << SU->NodeNum << ") " << BotLatency << "c\n"); } - - IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); - - // Check the instruction group dispatch limit. - // TODO: Check if this SU must end a dispatch group. - IssueCount += SchedModel->getNumMicroOps(SU->getInstr()); - - // checkHazard prevents scheduling multiple instructions per cycle that exceed - // issue width. However, we commonly reach the maximum. In this case - // opportunistically bump the cycle to avoid uselessly checking everything in - // the readyQ. Furthermore, a single instruction may produce more than one - // cycle's worth of micro-ops. - if (IssueCount >= SchedModel->getIssueWidth()) { - DEBUG(dbgs() << " *** Max instrs at cycle " << CurrCycle << '\n'); - bumpCycle(); + // If we stall for any reason, bump the cycle. + if (NextCycle > CurrCycle) { + bumpCycle(NextCycle); + } + else { + // After updating ZoneCritResIdx and ExpectedLatency, check if we're + // resource limited. If a stall occured, bumpCycle does this. + unsigned LFactor = SchedModel->getLatencyFactor(); + IsResourceLimited = + (int)(getCriticalCount() - (getScheduledLatency() * LFactor)) + > (int)LFactor; } + DEBUG(dumpScheduledState()); } /// Release pending ready nodes in to the available queue. This makes them /// visible to heuristics. -void ConvergingScheduler::SchedBoundary::releasePending() { +void GenericScheduler::SchedBoundary::releasePending() { // If the available queue is empty, it is safe to reset MinReadyCycle. if (Available.empty()) MinReadyCycle = UINT_MAX; // Check to see if any of the pending instructions are ready to issue. If // so, add them to the available queue. + bool IsBuffered = SchedModel->getMicroOpBufferSize() != 0; for (unsigned i = 0, e = Pending.size(); i != e; ++i) { SUnit *SU = *(Pending.begin()+i); unsigned ReadyCycle = isTop() ? SU->TopReadyCycle : SU->BotReadyCycle; @@ -1695,7 +2219,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { if (ReadyCycle < MinReadyCycle) MinReadyCycle = ReadyCycle; - if (ReadyCycle > CurrCycle) + if (!IsBuffered && ReadyCycle > CurrCycle) continue; if (checkHazard(SU)) @@ -1710,7 +2234,7 @@ void ConvergingScheduler::SchedBoundary::releasePending() { } /// Remove SU from the ready set for this boundary. -void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { +void GenericScheduler::SchedBoundary::removeReady(SUnit *SU) { if (Available.isInQueue(SU)) Available.remove(Available.find(SU)); else { @@ -1722,11 +2246,11 @@ void ConvergingScheduler::SchedBoundary::removeReady(SUnit *SU) { /// If this queue only has one ready candidate, return it. As a side effect, /// defer any nodes that now hit a hazard, and advance the cycle until at least /// one node is ready. If multiple instructions are ready, return NULL. -SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { +SUnit *GenericScheduler::SchedBoundary::pickOnlyChoice() { if (CheckPending) releasePending(); - if (IssueCount > 0) { + if (CurrMOps > 0) { // Defer any ready instrs that now have a hazard. for (ReadyQueue::iterator I = Available.begin(); I != Available.end();) { if (checkHazard(*I)) { @@ -1738,9 +2262,9 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { } } for (unsigned i = 0; Available.empty(); ++i) { - assert(i <= (HazardRec->getMaxLookAhead() + MaxMinLatency) && + assert(i <= (HazardRec->getMaxLookAhead() + MaxObservedLatency) && "permanent hazard"); (void)i; - bumpCycle(); + bumpCycle(CurrCycle + 1); releasePending(); } if (Available.size() == 1) @@ -1748,106 +2272,33 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { return NULL; } -/// Record the candidate policy for opposite zones with different critical -/// resources. -/// -/// If the CriticalZone is latency limited, don't force a policy for the -/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed. -void ConvergingScheduler::balanceZones( - ConvergingScheduler::SchedBoundary &CriticalZone, - ConvergingScheduler::SchedCandidate &CriticalCand, - ConvergingScheduler::SchedBoundary &OppositeZone, - ConvergingScheduler::SchedCandidate &OppositeCand) { - - if (!CriticalZone.IsResourceLimited) - return; - assert(SchedModel->hasInstrSchedModel() && "required schedmodel"); - - SchedRemainder *Rem = CriticalZone.Rem; - - // If the critical zone is overconsuming a resource relative to the - // remainder, try to reduce it. - unsigned RemainingCritCount = - Rem->RemainingCounts[CriticalZone.CritResIdx]; - if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) - > (int)SchedModel->getLatencyFactor()) { - CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << " Balance " << CriticalZone.Available.getName() - << " reduce " - << SchedModel->getProcResource(CriticalZone.CritResIdx)->Name - << '\n'); - } - // If the other zone is underconsuming a resource relative to the full zone, - // try to increase it. - unsigned OppositeCount = - OppositeZone.ResourceCounts[CriticalZone.CritResIdx]; - if ((int)(OppositeZone.ExpectedCount - OppositeCount) - > (int)SchedModel->getLatencyFactor()) { - OppositeCand.Policy.DemandResIdx = CriticalZone.CritResIdx; - DEBUG(dbgs() << " Balance " << OppositeZone.Available.getName() - << " demand " - << SchedModel->getProcResource(OppositeZone.CritResIdx)->Name - << '\n'); - } -} - -/// Determine if the scheduled zones exceed resource limits or critical path and -/// set each candidate's ReduceHeight policy accordingly. -void ConvergingScheduler::checkResourceLimits( - ConvergingScheduler::SchedCandidate &TopCand, - ConvergingScheduler::SchedCandidate &BotCand) { - - // Set ReduceLatency to true if needed. - Bot.setLatencyPolicy(BotCand.Policy); - Top.setLatencyPolicy(TopCand.Policy); - - // Handle resource-limited regions. - if (Top.IsResourceLimited && Bot.IsResourceLimited - && Top.CritResIdx == Bot.CritResIdx) { - // If the scheduled critical resource in both zones is no longer the - // critical remaining resource, attempt to reduce resource height both ways. - if (Top.CritResIdx != Rem.CritResIdx) { - TopCand.Policy.ReduceResIdx = Top.CritResIdx; - BotCand.Policy.ReduceResIdx = Bot.CritResIdx; - DEBUG(dbgs() << " Reduce scheduled " - << SchedModel->getProcResource(Top.CritResIdx)->Name << '\n'); - } - return; - } - // Handle latency-limited regions. - if (!Top.IsResourceLimited && !Bot.IsResourceLimited) { - // If the total scheduled expected latency exceeds the region's critical - // path then reduce latency both ways. - // - // Just because a zone is not resource limited does not mean it is latency - // limited. Unbuffered resource, such as max micro-ops may cause CurrCycle - // to exceed expected latency. - if ((Top.ExpectedLatency + Bot.ExpectedLatency >= Rem.CriticalPath) - && (Rem.CriticalPath > Top.CurrCycle + Bot.CurrCycle)) { - TopCand.Policy.ReduceLatency = true; - BotCand.Policy.ReduceLatency = true; - DEBUG(dbgs() << " Reduce scheduled latency " << Top.ExpectedLatency - << " + " << Bot.ExpectedLatency << '\n'); - } - return; +#ifndef NDEBUG +// This is useful information to dump after bumpNode. +// Note that the Queue contents are more useful before pickNodeFromQueue. +void GenericScheduler::SchedBoundary::dumpScheduledState() { + unsigned ResFactor; + unsigned ResCount; + if (ZoneCritResIdx) { + ResFactor = SchedModel->getResourceFactor(ZoneCritResIdx); + ResCount = getResourceCount(ZoneCritResIdx); } - // The critical resource is different in each zone, so request balancing. - - // Compute the cost of each zone. - Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); - Top.ExpectedCount = std::max( - Top.getCriticalCount(), - Top.ExpectedCount * SchedModel->getLatencyFactor()); - Bot.ExpectedCount = std::max(Bot.ExpectedLatency, Bot.CurrCycle); - Bot.ExpectedCount = std::max( - Bot.getCriticalCount(), - Bot.ExpectedCount * SchedModel->getLatencyFactor()); - - balanceZones(Top, TopCand, Bot, BotCand); - balanceZones(Bot, BotCand, Top, TopCand); + else { + ResFactor = SchedModel->getMicroOpFactor(); + ResCount = RetiredMOps * SchedModel->getMicroOpFactor(); + } + unsigned LFactor = SchedModel->getLatencyFactor(); + dbgs() << Available.getName() << " @" << CurrCycle << "c\n" + << " Retired: " << RetiredMOps; + dbgs() << "\n Executed: " << getExecutedCount() / LFactor << "c"; + dbgs() << "\n Critical: " << ResCount / LFactor << "c, " + << ResCount / ResFactor << " " << getResourceName(ZoneCritResIdx) + << "\n ExpectedLatency: " << ExpectedLatency << "c\n" + << (IsResourceLimited ? " - Resource" : " - Latency") + << " limited.\n"; } +#endif -void ConvergingScheduler::SchedCandidate:: +void GenericScheduler::SchedCandidate:: initResourceDelta(const ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { if (!Policy.ReduceResIdx && !Policy.DemandResIdx) @@ -1864,11 +2315,12 @@ initResourceDelta(const ScheduleDAGMI *DAG, } } + /// Return true if this heuristic determines order. static bool tryLess(int TryVal, int CandVal, - ConvergingScheduler::SchedCandidate &TryCand, - ConvergingScheduler::SchedCandidate &Cand, - ConvergingScheduler::CandReason Reason) { + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { if (TryVal < CandVal) { TryCand.Reason = Reason; return true; @@ -1878,13 +2330,14 @@ static bool tryLess(int TryVal, int CandVal, Cand.Reason = Reason; return true; } + Cand.setRepeat(Reason); return false; } static bool tryGreater(int TryVal, int CandVal, - ConvergingScheduler::SchedCandidate &TryCand, - ConvergingScheduler::SchedCandidate &Cand, - ConvergingScheduler::CandReason Reason) { + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { if (TryVal > CandVal) { TryCand.Reason = Reason; return true; @@ -1894,9 +2347,34 @@ static bool tryGreater(int TryVal, int CandVal, Cand.Reason = Reason; return true; } + Cand.setRepeat(Reason); return false; } +static bool tryPressure(const PressureChange &TryP, + const PressureChange &CandP, + GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::CandReason Reason) { + int TryRank = TryP.getPSetOrMax(); + int CandRank = CandP.getPSetOrMax(); + // If both candidates affect the same set, go with the smallest increase. + if (TryRank == CandRank) { + return tryLess(TryP.getUnitInc(), CandP.getUnitInc(), TryCand, Cand, + Reason); + } + // If one candidate decreases and the other increases, go with it. + // Invalid candidates have UnitInc==0. + if (tryLess(TryP.getUnitInc() < 0, CandP.getUnitInc() < 0, TryCand, Cand, + Reason)) { + return true; + } + // If the candidates are decreasing pressure, reverse priority. + if (TryP.getUnitInc() < 0) + std::swap(TryRank, CandRank); + return tryGreater(TryRank, CandRank, TryCand, Cand, Reason); +} + static unsigned getWeakLeft(const SUnit *SU, bool isTop) { return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; } @@ -1929,6 +2407,32 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { return 0; } +static bool tryLatency(GenericScheduler::SchedCandidate &TryCand, + GenericScheduler::SchedCandidate &Cand, + GenericScheduler::SchedBoundary &Zone) { + if (Zone.isTop()) { + if (Cand.SU->getDepth() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericScheduler::TopDepthReduce)) + return true; + } + if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericScheduler::TopPathReduce)) + return true; + } + else { + if (Cand.SU->getHeight() > Zone.getScheduledLatency()) { + if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), + TryCand, Cand, GenericScheduler::BotHeightReduce)) + return true; + } + if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), + TryCand, Cand, GenericScheduler::BotPathReduce)) + return true; + } + return false; +} + /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -1940,16 +2444,44 @@ static int biasPhysRegCopy(const SUnit *SU, bool isTop) { /// \param Zone describes the scheduled zone that we are extending. /// \param RPTracker describes reg pressure within the scheduled zone. /// \param TempTracker is a scratch pressure tracker to reuse in queries. -void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, +void GenericScheduler::tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary &Zone, const RegPressureTracker &RPTracker, RegPressureTracker &TempTracker) { - // Always initialize TryCand's RPDelta. - TempTracker.getMaxPressureDelta(TryCand.SU->getInstr(), TryCand.RPDelta, - DAG->getRegionCriticalPSets(), - DAG->getRegPressure().MaxSetPressure); + if (DAG->isTrackingPressure()) { + // Always initialize TryCand's RPDelta. + if (Zone.isTop()) { + TempTracker.getMaxDownwardPressureDelta( + TryCand.SU->getInstr(), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + else { + if (VerifyScheduling) { + TempTracker.getMaxUpwardPressureDelta( + TryCand.SU->getInstr(), + &DAG->getPressureDiff(TryCand.SU), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + else { + RPTracker.getUpwardPressureDelta( + TryCand.SU->getInstr(), + DAG->getPressureDiff(TryCand.SU), + TryCand.RPDelta, + DAG->getRegionCriticalPSets(), + DAG->getRegPressure().MaxSetPressure); + } + } + } + DEBUG(if (TryCand.RPDelta.Excess.isValid()) + dbgs() << " SU(" << TryCand.SU->NodeNum << ") " + << TRI->getRegPressureSetName(TryCand.RPDelta.Excess.getPSet()) + << ":" << TryCand.RPDelta.Excess.getUnitInc() << "\n"); // Initialize the candidate if needed. if (!Cand.isValid()) { @@ -1962,20 +2494,25 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, TryCand, Cand, PhysRegCopy)) return; - // Avoid exceeding the target's limit. - if (tryLess(TryCand.RPDelta.Excess.UnitIncrease, - Cand.RPDelta.Excess.UnitIncrease, TryCand, Cand, SingleExcess)) + // Avoid exceeding the target's limit. If signed PSetID is negative, it is + // invalid; convert it to INT_MAX to give it lowest priority. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.Excess, + Cand.RPDelta.Excess, + TryCand, Cand, RegExcess)) return; - if (Cand.Reason == SingleExcess) - Cand.Reason = MultiPressure; // Avoid increasing the max critical pressure in the scheduled region. - if (tryLess(TryCand.RPDelta.CriticalMax.UnitIncrease, - Cand.RPDelta.CriticalMax.UnitIncrease, - TryCand, Cand, SingleCritical)) + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CriticalMax, + Cand.RPDelta.CriticalMax, + TryCand, Cand, RegCritical)) + return; + + // For loops that are acyclic path limited, aggressively schedule for latency. + // This can result in very long dependence chains scheduled in sequence, so + // once every cycle (when CurrMOps == 0), switch to normal heuristics. + if (Rem.IsAcyclicLatencyLimited && !Zone.CurrMOps + && tryLatency(TryCand, Cand, Zone)) return; - if (Cand.Reason == SingleCritical) - Cand.Reason = MultiPressure; // Keep clustered nodes together to encourage downstream peephole // optimizations which may reduce resource requirements. @@ -1990,17 +2527,17 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, return; // Weak edges are for clustering and other constraints. - // - // Deferring TryCand here does not change Cand's reason. This is good in the - // sense that a bad candidate shouldn't affect a previous candidate's - // goodness, but bad in that it is assymetric and depends on queue order. - CandReason OrigReason = Cand.Reason; if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), getWeakLeft(Cand.SU, Zone.isTop()), TryCand, Cand, Weak)) { - Cand.Reason = OrigReason; return; } + // Avoid increasing the max pressure of the entire region. + if (DAG->isTrackingPressure() && tryPressure(TryCand.RPDelta.CurrentMax, + Cand.RPDelta.CurrentMax, + TryCand, Cand, RegMax)) + return; + // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, @@ -2012,41 +2549,15 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, return; // Avoid serializing long latency dependence chains. - if (Cand.Policy.ReduceLatency) { - if (Zone.isTop()) { - if (Cand.SU->getDepth() * SchedModel->getLatencyFactor() - > Zone.ExpectedCount) { - if (tryLess(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, TopDepthReduce)) - return; - } - if (tryGreater(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, TopPathReduce)) - return; - } - else { - if (Cand.SU->getHeight() * SchedModel->getLatencyFactor() - > Zone.ExpectedCount) { - if (tryLess(TryCand.SU->getHeight(), Cand.SU->getHeight(), - TryCand, Cand, BotHeightReduce)) - return; - } - if (tryGreater(TryCand.SU->getDepth(), Cand.SU->getDepth(), - TryCand, Cand, BotPathReduce)) - return; - } - } - - // Avoid increasing the max pressure of the entire region. - if (tryLess(TryCand.RPDelta.CurrentMax.UnitIncrease, - Cand.RPDelta.CurrentMax.UnitIncrease, TryCand, Cand, SingleMax)) + // For acyclic path limited loops, latency was already checked above. + if (Cand.Policy.ReduceLatency && !Rem.IsAcyclicLatencyLimited + && tryLatency(TryCand, Cand, Zone)) { return; - if (Cand.Reason == SingleMax) - Cand.Reason = MultiPressure; + } // Prefer immediate defs/users of the last scheduled instruction. This is a - // nice pressure avoidance strategy that also conserves the processor's - // register renaming resources and keeps the machine code readable. + // local pressure avoidance strategy that also makes the machine code + // readable. if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), TryCand, Cand, NextDefUse)) return; @@ -2058,49 +2569,17 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, } } -/// pickNodeFromQueue helper that returns true if the LHS reg pressure effect is -/// more desirable than RHS from scheduling standpoint. -static bool compareRPDelta(const RegPressureDelta &LHS, - const RegPressureDelta &RHS) { - // Compare each component of pressure in decreasing order of importance - // without checking if any are valid. Invalid PressureElements are assumed to - // have UnitIncrease==0, so are neutral. - - // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.Excess.UnitIncrease != RHS.Excess.UnitIncrease) { - DEBUG(dbgs() << " RP excess top - bot: " - << (LHS.Excess.UnitIncrease - RHS.Excess.UnitIncrease) << '\n'); - return LHS.Excess.UnitIncrease < RHS.Excess.UnitIncrease; - } - // Avoid increasing the max critical pressure in the scheduled region. - if (LHS.CriticalMax.UnitIncrease != RHS.CriticalMax.UnitIncrease) { - DEBUG(dbgs() << " RP critical top - bot: " - << (LHS.CriticalMax.UnitIncrease - RHS.CriticalMax.UnitIncrease) - << '\n'); - return LHS.CriticalMax.UnitIncrease < RHS.CriticalMax.UnitIncrease; - } - // Avoid increasing the max pressure of the entire region. - if (LHS.CurrentMax.UnitIncrease != RHS.CurrentMax.UnitIncrease) { - DEBUG(dbgs() << " RP current top - bot: " - << (LHS.CurrentMax.UnitIncrease - RHS.CurrentMax.UnitIncrease) - << '\n'); - return LHS.CurrentMax.UnitIncrease < RHS.CurrentMax.UnitIncrease; - } - return false; -} - #ifndef NDEBUG -const char *ConvergingScheduler::getReasonStr( - ConvergingScheduler::CandReason Reason) { +const char *GenericScheduler::getReasonStr( + GenericScheduler::CandReason Reason) { switch (Reason) { case NoCand: return "NOCAND "; case PhysRegCopy: return "PREG-COPY"; - case SingleExcess: return "REG-EXCESS"; - case SingleCritical: return "REG-CRIT "; + case RegExcess: return "REG-EXCESS"; + case RegCritical: return "REG-CRIT "; case Cluster: return "CLUSTER "; case Weak: return "WEAK "; - case SingleMax: return "REG-MAX "; - case MultiPressure: return "REG-MULTI "; + case RegMax: return "REG-MAX "; case ResourceReduce: return "RES-REDUCE"; case ResourceDemand: return "RES-DEMAND"; case TopDepthReduce: return "TOP-DEPTH "; @@ -2113,20 +2592,20 @@ const char *ConvergingScheduler::getReasonStr( llvm_unreachable("Unknown reason!"); } -void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { - PressureElement P; +void GenericScheduler::traceCandidate(const SchedCandidate &Cand) { + PressureChange P; unsigned ResIdx = 0; unsigned Latency = 0; switch (Cand.Reason) { default: break; - case SingleExcess: + case RegExcess: P = Cand.RPDelta.Excess; break; - case SingleCritical: + case RegCritical: P = Cand.RPDelta.CriticalMax; break; - case SingleMax: + case RegMax: P = Cand.RPDelta.CurrentMax; break; case ResourceReduce: @@ -2150,8 +2629,8 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { } dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); if (P.isValid()) - dbgs() << " " << TRI->getRegPressureSetName(P.PSetID) - << ":" << P.UnitIncrease << " "; + dbgs() << " " << TRI->getRegPressureSetName(P.getPSet()) + << ":" << P.getUnitInc() << " "; else dbgs() << " "; if (ResIdx) @@ -2166,12 +2645,12 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { } #endif -/// Pick the best candidate from the top queue. +/// Pick the best candidate from the queue. /// /// TODO: getMaxPressureDelta results can be mostly cached for each SUnit during /// DAG building. To adjust for the current scheduling location we need to /// maintain the number of vreg uses remaining to be top-scheduled. -void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, +void GenericScheduler::pickNodeFromQueue(SchedBoundary &Zone, const RegPressureTracker &RPTracker, SchedCandidate &Cand) { ReadyQueue &Q = Zone.Available; @@ -2196,30 +2675,31 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, } } -static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, +static void tracePick(const GenericScheduler::SchedCandidate &Cand, bool IsTop) { DEBUG(dbgs() << "Pick " << (IsTop ? "Top " : "Bot ") - << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); + << GenericScheduler::getReasonStr(Cand.Reason) << '\n'); } /// Pick the best candidate node from either the top or bottom queue. -SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { +SUnit *GenericScheduler::pickNodeBidirectional(bool &IsTopNode) { // Schedule as far as possible in the direction of no choice. This is most // efficient, but also provides the best heuristics for CriticalPSets. if (SUnit *SU = Bot.pickOnlyChoice()) { IsTopNode = false; - DEBUG(dbgs() << "Pick Top NOCAND\n"); + DEBUG(dbgs() << "Pick Bot NOCAND\n"); return SU; } if (SUnit *SU = Top.pickOnlyChoice()) { IsTopNode = true; - DEBUG(dbgs() << "Pick Bot NOCAND\n"); + DEBUG(dbgs() << "Pick Top NOCAND\n"); return SU; } CandPolicy NoPolicy; SchedCandidate BotCand(NoPolicy); SchedCandidate TopCand(NoPolicy); - checkResourceLimits(TopCand, BotCand); + Bot.setPolicy(BotCand.Policy, Top); + Top.setPolicy(TopCand.Policy, Bot); // Prefer bottom scheduling when heuristics are silent. pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); @@ -2232,7 +2712,10 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { // affects picking from either Q. If scheduling in one direction must // increase pressure for one of the excess PSets, then schedule in that // direction first to provide more freedom in the other direction. - if (BotCand.Reason == SingleExcess || BotCand.Reason == SingleCritical) { + if ((BotCand.Reason == RegExcess && !BotCand.isRepeat(RegExcess)) + || (BotCand.Reason == RegCritical + && !BotCand.isRepeat(RegCritical))) + { IsTopNode = false; tracePick(BotCand, IsTopNode); return BotCand.SU; @@ -2241,37 +2724,20 @@ SUnit *ConvergingScheduler::pickNodeBidirectional(bool &IsTopNode) { pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); assert(TopCand.Reason != NoCand && "failed to find the first candidate"); - // If either Q has a single candidate that minimizes pressure above the - // original region's pressure pick it. - if (TopCand.Reason <= SingleMax || BotCand.Reason <= SingleMax) { - if (TopCand.Reason < BotCand.Reason) { - IsTopNode = true; - tracePick(TopCand, IsTopNode); - return TopCand.SU; - } - IsTopNode = false; - tracePick(BotCand, IsTopNode); - return BotCand.SU; - } - // Check for a salient pressure difference and pick the best from either side. - if (compareRPDelta(TopCand.RPDelta, BotCand.RPDelta)) { - IsTopNode = true; - tracePick(TopCand, IsTopNode); - return TopCand.SU; - } - // Otherwise prefer the bottom candidate, in node order if all else failed. + // Choose the queue with the most important (lowest enum) reason. if (TopCand.Reason < BotCand.Reason) { IsTopNode = true; tracePick(TopCand, IsTopNode); return TopCand.SU; } + // Otherwise prefer the bottom candidate, in node order if all else failed. IsTopNode = false; tracePick(BotCand, IsTopNode); return BotCand.SU; } /// Pick the best node to balance the schedule. Implements MachineSchedStrategy. -SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { +SUnit *GenericScheduler::pickNode(bool &IsTopNode) { if (DAG->top() == DAG->bottom()) { assert(Top.Available.empty() && Top.Pending.empty() && Bot.Available.empty() && Bot.Pending.empty() && "ReadyQ garbage"); @@ -2279,24 +2745,26 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { } SUnit *SU; do { - if (ForceTopDown) { + if (RegionPolicy.OnlyTopDown) { SU = Top.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; SchedCandidate TopCand(NoPolicy); pickNodeFromQueue(Top, DAG->getTopRPTracker(), TopCand); - assert(TopCand.Reason != NoCand && "failed to find the first candidate"); + assert(TopCand.Reason != NoCand && "failed to find a candidate"); + tracePick(TopCand, true); SU = TopCand.SU; } IsTopNode = true; } - else if (ForceBottomUp) { + else if (RegionPolicy.OnlyBottomUp) { SU = Bot.pickOnlyChoice(); if (!SU) { CandPolicy NoPolicy; SchedCandidate BotCand(NoPolicy); pickNodeFromQueue(Bot, DAG->getBotRPTracker(), BotCand); - assert(BotCand.Reason != NoCand && "failed to find the first candidate"); + assert(BotCand.Reason != NoCand && "failed to find a candidate"); + tracePick(BotCand, false); SU = BotCand.SU; } IsTopNode = false; @@ -2315,7 +2783,7 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { return SU; } -void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { +void GenericScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { MachineBasicBlock::iterator InsertPos = SU->getInstr(); if (!isTop) @@ -2346,15 +2814,15 @@ void ConvergingScheduler::reschedulePhysRegCopies(SUnit *SU, bool isTop) { /// /// FIXME: Eventually, we may bundle physreg copies rather than rescheduling /// them here. See comments in biasPhysRegCopy. -void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { +void GenericScheduler::schedNode(SUnit *SU, bool IsTopNode) { if (IsTopNode) { - SU->TopReadyCycle = Top.CurrCycle; + SU->TopReadyCycle = std::max(SU->TopReadyCycle, Top.CurrCycle); Top.bumpNode(SU); if (SU->hasPhysRegUses) reschedulePhysRegCopies(SU, true); } else { - SU->BotReadyCycle = Bot.CurrCycle; + SU->BotReadyCycle = std::max(SU->BotReadyCycle, Bot.CurrCycle); Bot.bumpNode(SU); if (SU->hasPhysRegDefs) reschedulePhysRegCopies(SU, false); @@ -2363,26 +2831,23 @@ void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { /// Create the standard converging machine scheduler. This will be used as the /// default scheduler if the target does not set a default. -static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { - assert((!ForceTopDown || !ForceBottomUp) && - "-misched-topdown incompatible with -misched-bottomup"); - ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); +static ScheduleDAGInstrs *createGenericSched(MachineSchedContext *C) { + ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new GenericScheduler(C)); // Register DAG post-processors. // // FIXME: extend the mutation API to allow earlier mutations to instantiate // data and pass it to later mutations. Have a single mutation that gathers // the interesting nodes in one pass. - if (EnableCopyConstrain) - DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); - if (EnableLoadCluster) + DAG->addMutation(new CopyConstrain(DAG->TII, DAG->TRI)); + if (EnableLoadCluster && DAG->TII->enableClusterLoads()) DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); if (EnableMacroFusion) DAG->addMutation(new MacroFusion(DAG->TII)); return DAG; } static MachineSchedRegistry -ConvergingSchedRegistry("converge", "Standard converging scheduler.", - createConvergingSched); +GenericSchedRegistry("converge", "Standard converging scheduler.", + createGenericSched); //===----------------------------------------------------------------------===// // ILP Scheduler. Currently for experimental analysis of heuristics. @@ -2424,15 +2889,6 @@ struct ILPOrder { /// \brief Schedule based on the ILP metric. class ILPScheduler : public MachineSchedStrategy { - /// In case all subtrees are eventually connected to a common root through - /// data dependence (e.g. reduction), place an upper limit on their size. - /// - /// FIXME: A subtree limit is generally good, but in the situation commented - /// above, where multiple similar subtrees feed a common root, we should - /// only split at a point where the resulting subtrees will be balanced. - /// (a motivating test case must be found). - static const unsigned SubtreeLimit = 16; - ScheduleDAGMI *DAG; ILPOrder Cmp; @@ -2616,7 +3072,7 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { } static bool isNodeHidden(const SUnit *Node) { - return (Node->NumPreds > 10 || Node->NumSuccs > 10); + return (Node->Preds.size() > 10 || Node->Succs.size() > 10); } static bool hasNodeAddressLabel(const SUnit *Node, @@ -2639,7 +3095,11 @@ struct DOTGraphTraits : public DefaultDOTGraphTraits { static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { std::string Str; raw_string_ostream SS(Str); - SS << "SU(" << SU->NodeNum << ')'; + const SchedDFSResult *DFS = + static_cast(G)->getDFSResult(); + SS << "SU:" << SU->NodeNum; + if (DFS) + SS << " I:" << DFS->getNumInstrs(SU); return SS.str(); } static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { diff --git a/contrib/llvm/lib/CodeGen/MachineSink.cpp b/contrib/llvm/lib/CodeGen/MachineSink.cpp index 4dafbe5..105d7c2 100644 --- a/contrib/llvm/lib/CodeGen/MachineSink.cpp +++ b/contrib/llvm/lib/CodeGen/MachineSink.cpp @@ -308,12 +308,29 @@ bool MachineSinking::isWorthBreakingCriticalEdge(MachineInstr *MI, // to be sunk then it's probably worth it. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; + if (!MO.isReg() || !MO.isUse()) + continue; unsigned Reg = MO.getReg(); - if (Reg == 0 || !TargetRegisterInfo::isPhysicalRegister(Reg)) + if (Reg == 0) continue; - if (MRI->hasOneNonDBGUse(Reg)) - return true; + + // We don't move live definitions of physical registers, + // so sinking their uses won't enable any opportunities. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) + continue; + + // If this instruction is the only user of a virtual register, + // check if breaking the edge will enable sinking + // both this instruction and the defining instruction. + if (MRI->hasOneNonDBGUse(Reg)) { + // If the definition resides in same MBB, + // claim it's likely we can sink these together. + // If definition resides elsewhere, we aren't + // blocking it from being sunk so don't break the edge. + MachineInstr *DefMI = MRI->getVRegDef(Reg); + if (DefMI->getParent() == MI->getParent()) + return true; + } } return false; @@ -394,7 +411,7 @@ static bool AvoidsSinking(MachineInstr *MI, MachineRegisterInfo *MRI) { /// collectDebgValues - Scan instructions following MI and collect any /// matching DBG_VALUEs. static void collectDebugValues(MachineInstr *MI, - SmallVector & DbgValues) { + SmallVectorImpl &DbgValues) { DbgValues.clear(); if (!MI->getOperand(0).isReg()) return; @@ -537,8 +554,8 @@ MachineBasicBlock *MachineSinking::FindSuccToSinkTo(MachineInstr *MI, // We give successors with smaller loop depth higher priority. SmallVector Succs(MBB->succ_begin(), MBB->succ_end()); std::stable_sort(Succs.begin(), Succs.end(), SuccessorSorter(LI)); - for (SmallVector::iterator SI = Succs.begin(), - E = Succs.end(); SI != E; ++SI) { + for (SmallVectorImpl::iterator SI = Succs.begin(), + E = Succs.end(); SI != E; ++SI) { MachineBasicBlock *SuccBlock = *SI; bool LocalUse = false; if (AllUsesDominatedByBlock(Reg, SuccBlock, MBB, @@ -615,9 +632,8 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { DEBUG(dbgs() << "Sink instr " << *MI << "\tinto block " << *SuccToSinkTo); - // If the block has multiple predecessors, this would introduce computation on - // a path that it doesn't already exist. We could split the critical edge, - // but for now we just punt. + // If the block has multiple predecessors, this is a critical edge. + // Decide if we can sink along it or need to break the edge. if (SuccToSinkTo->pred_size() > 1) { // We cannot sink a load across a critical edge - there may be stores in // other code paths. @@ -697,7 +713,7 @@ bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) { ++MachineBasicBlock::iterator(MI)); // Move debug values. - for (SmallVector::iterator DBI = DbgValuesToSink.begin(), + for (SmallVectorImpl::iterator DBI = DbgValuesToSink.begin(), DBE = DbgValuesToSink.end(); DBI != DBE; ++DBI) { MachineInstr *DbgMI = *DBI; SuccToSinkTo->splice(InsertPos, ParentBlock, DbgMI, diff --git a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp index 00f702c..6aa3f67 100644 --- a/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp +++ b/contrib/llvm/lib/CodeGen/MachineTraceMetrics.cpp @@ -853,8 +853,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) DepCycle += MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp, - /* FindMin = */ false); + .computeOperandLatency(Dep.DefMI, Dep.DefOp, UseMI, Dep.UseOp); Cycle = std::max(Cycle, DepCycle); } // Remember the instruction depth. @@ -902,8 +901,7 @@ static unsigned updatePhysDepsUpwards(const MachineInstr *MI, unsigned Height, // We may not know the UseMI of this dependency, if it came from the // live-in list. SchedModel can handle a NULL UseMI. DepHeight += SchedModel - .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op, - /* FindMin = */ false); + .computeOperandLatency(MI, MO.getOperandNo(), I->MI, I->Op); } Height = std::max(Height, DepHeight); // This regunit is dead above MI. @@ -941,7 +939,7 @@ static bool pushDepHeight(const DataDep &Dep, // Adjust height by Dep.DefMI latency. if (!Dep.DefMI->isTransient()) UseHeight += SchedModel.computeOperandLatency(Dep.DefMI, Dep.DefOp, - UseMI, Dep.UseOp, false); + UseMI, Dep.UseOp); // Update Heights[DefMI] to be the maximum height seen. MIHeightMap::iterator I; @@ -1171,7 +1169,7 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { // Add latency if DefMI is a real instruction. Transients get latency 0. if (!Dep.DefMI->isTransient()) DepCycle += TE.MTM.SchedModel - .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp, false); + .computeOperandLatency(Dep.DefMI, Dep.DefOp, PHI, Dep.UseOp); return DepCycle; } diff --git a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp index 037043f..d61470c 100644 --- a/contrib/llvm/lib/CodeGen/MachineVerifier.cpp +++ b/contrib/llvm/lib/CodeGen/MachineVerifier.cpp @@ -25,6 +25,7 @@ #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -212,6 +213,10 @@ namespace { const LiveInterval &LI); void report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI); + void report(const char *msg, const MachineFunction *MF, + const LiveRange &LR); + void report(const char *msg, const MachineBasicBlock *MBB, + const LiveRange &LR); void verifyInlineAsm(const MachineInstr *MI); @@ -224,9 +229,12 @@ namespace { void verifyLiveVariables(); void verifyLiveIntervals(); void verifyLiveInterval(const LiveInterval&); - void verifyLiveIntervalValue(const LiveInterval&, VNInfo*); - void verifyLiveIntervalSegment(const LiveInterval&, - LiveInterval::const_iterator); + void verifyLiveRangeValue(const LiveRange&, const VNInfo*, unsigned); + void verifyLiveRangeSegment(const LiveRange&, + const LiveRange::const_iterator I, unsigned); + void verifyLiveRange(const LiveRange&, unsigned); + + void verifyStackFrame(); }; struct MachineVerifierPass : public MachineFunctionPass { @@ -268,8 +276,7 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { raw_ostream *OutFile = 0; if (OutFileName) { std::string ErrorInfo; - OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, - raw_fd_ostream::F_Append); + OutFile = new raw_fd_ostream(OutFileName, ErrorInfo, sys::fs::F_Append); if (!ErrorInfo.empty()) { errs() << "Error opening '" << OutFileName << "': " << ErrorInfo << '\n'; exit(1); @@ -412,23 +419,25 @@ void MachineVerifier::report(const char *msg, void MachineVerifier::report(const char *msg, const MachineFunction *MF, const LiveInterval &LI) { report(msg, MF); - *OS << "- interval: "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - *OS << PrintReg(LI.reg, TRI); - else - *OS << PrintRegUnit(LI.reg, TRI); - *OS << ' ' << LI << '\n'; + *OS << "- interval: " << LI << '\n'; } void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, const LiveInterval &LI) { report(msg, MBB); - *OS << "- interval: "; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) - *OS << PrintReg(LI.reg, TRI); - else - *OS << PrintRegUnit(LI.reg, TRI); - *OS << ' ' << LI << '\n'; + *OS << "- interval: " << LI << '\n'; +} + +void MachineVerifier::report(const char *msg, const MachineBasicBlock *MBB, + const LiveRange &LR) { + report(msg, MBB); + *OS << "- liverange: " << LR << "\n"; +} + +void MachineVerifier::report(const char *msg, const MachineFunction *MF, + const LiveRange &LR) { + report(msg, MF); + *OS << "- liverange: " << LR << "\n"; } void MachineVerifier::markReachable(const MachineBasicBlock *MBB) { @@ -475,6 +484,8 @@ void MachineVerifier::visitMachineFunctionBefore() { // Check that the register use lists are sane. MRI->verifyUseLists(); + + verifyStackFrame(); } // Does iterator point to a and b as the first two elements? @@ -669,8 +680,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { report("MBB live-in list contains non-physical register", MBB); continue; } - regsLive.insert(*I); - for (MCSubRegIterator SubRegs(*I, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(*I, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) regsLive.insert(*SubRegs); } regsLiveInButUnused = regsLive; @@ -679,8 +690,8 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { assert(MFI && "Function has no frame info"); BitVector PR = MFI->getPristineRegs(MBB); for (int I = PR.find_first(); I>0; I = PR.find_next(I)) { - regsLive.insert(I); - for (MCSubRegIterator SubRegs(I, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(I, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) regsLive.insert(*SubRegs); } @@ -764,7 +775,7 @@ void MachineVerifier::visitMachineInstrBefore(const MachineInstr *MI) { if (MI->getNumOperands() < MCID.getNumOperands()) { report("Too few operands", MI); *OS << MCID.getNumOperands() << " operands expected, but " - << MI->getNumExplicitOperands() << " given.\n"; + << MI->getNumOperands() << " given.\n"; } // Check the tied operands. @@ -822,7 +833,7 @@ MachineVerifier::visitMachineOperand(const MachineOperand *MO, unsigned MONum) { if (MO->isReg() && !(MI->isVariadic() && MONum == MCID.getNumOperands()-1)) { if (MO->isDef() && !MCOI.isOptionalDef()) - report("Explicit operand marked as def", MO, MONum); + report("Explicit operand marked as def", MO, MONum); if (MO->isImplicit()) report("Explicit operand marked as implicit", MO, MONum); } @@ -997,16 +1008,16 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { // Check the cached regunit intervals. if (TargetRegisterInfo::isPhysicalRegister(Reg) && !isReserved(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { - if (const LiveInterval *LI = LiveInts->getCachedRegUnit(*Units)) { - LiveRangeQuery LRQ(*LI, UseIdx); + if (const LiveRange *LR = LiveInts->getCachedRegUnit(*Units)) { + LiveQueryResult LRQ = LR->Query(UseIdx); if (!LRQ.valueIn()) { - report("No live range at use", MO, MONum); + report("No live segment at use", MO, MONum); *OS << UseIdx << " is not live in " << PrintRegUnit(*Units, TRI) - << ' ' << *LI << '\n'; + << ' ' << *LR << '\n'; } if (MO->isKill() && !LRQ.isKill()) { report("Live range continues after kill flag", MO, MONum); - *OS << PrintRegUnit(*Units, TRI) << ' ' << *LI << '\n'; + *OS << PrintRegUnit(*Units, TRI) << ' ' << *LR << '\n'; } } } @@ -1016,9 +1027,9 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { if (LiveInts->hasInterval(Reg)) { // This is a virtual register interval. const LiveInterval &LI = LiveInts->getInterval(Reg); - LiveRangeQuery LRQ(LI, UseIdx); + LiveQueryResult LRQ = LI.Query(UseIdx); if (!LRQ.valueIn()) { - report("No live range at use", MO, MONum); + report("No live segment at use", MO, MONum); *OS << UseIdx << " is not live in " << LI << '\n'; } // Check for extra kill flags. @@ -1067,7 +1078,7 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { llvm::next(MRI->def_begin(Reg)) != MRI->def_end()) report("Multiple virtual register defs in SSA form", MO, MONum); - // Check LiveInts for a live range, but only for virtual registers. + // Check LiveInts for a live segment, but only for virtual registers. if (LiveInts && TargetRegisterInfo::isVirtualRegister(Reg) && !LiveInts->isNotInMIMap(MI)) { SlotIndex DefIdx = LiveInts->getInstructionIndex(MI); @@ -1082,9 +1093,17 @@ void MachineVerifier::checkLiveness(const MachineOperand *MO, unsigned MONum) { << DefIdx << " in " << LI << '\n'; } } else { - report("No live range at def", MO, MONum); + report("No live segment at def", MO, MONum); *OS << DefIdx << " is not live in " << LI << '\n'; } + // Check that, if the dead def flag is present, LiveInts agree. + if (MO->isDead()) { + LiveQueryResult LRQ = LI.Query(DefIdx); + if (!LRQ.isDeadDef()) { + report("Live range continues after dead def flag", MO, MONum); + *OS << "Live range: " << LI << '\n'; + } + } } else { report("Virtual register has no Live interval", MO, MONum); } @@ -1331,25 +1350,26 @@ void MachineVerifier::verifyLiveIntervals() { // Verify all the cached regunit intervals. for (unsigned i = 0, e = TRI->getNumRegUnits(); i != e; ++i) - if (const LiveInterval *LI = LiveInts->getCachedRegUnit(i)) - verifyLiveInterval(*LI); + if (const LiveRange *LR = LiveInts->getCachedRegUnit(i)) + verifyLiveRange(*LR, i); } -void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, - VNInfo *VNI) { +void MachineVerifier::verifyLiveRangeValue(const LiveRange &LR, + const VNInfo *VNI, + unsigned Reg) { if (VNI->isUnused()) return; - const VNInfo *DefVNI = LI.getVNInfoAt(VNI->def); + const VNInfo *DefVNI = LR.getVNInfoAt(VNI->def); if (!DefVNI) { - report("Valno not live at def and not marked unused", MF, LI); + report("Valno not live at def and not marked unused", MF, LR); *OS << "Valno #" << VNI->id << '\n'; return; } if (DefVNI != VNI) { - report("Live range at def has different valno", MF, LI); + report("Live segment at def has different valno", MF, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << " where valno #" << DefVNI->id << " is live\n"; return; @@ -1357,15 +1377,15 @@ void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(VNI->def); if (!MBB) { - report("Invalid definition index", MF, LI); + report("Invalid definition index", MF, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def - << " in " << LI << '\n'; + << " in " << LR << '\n'; return; } if (VNI->isPHIDef()) { if (VNI->def != LiveInts->getMBBStartIdx(MBB)) { - report("PHIDef value is not defined at MBB start", MBB, LI); + report("PHIDef value is not defined at MBB start", MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << ", not at the beginning of BB#" << MBB->getNumber() << '\n'; } @@ -1375,161 +1395,154 @@ void MachineVerifier::verifyLiveIntervalValue(const LiveInterval &LI, // Non-PHI def. const MachineInstr *MI = LiveInts->getInstructionFromIndex(VNI->def); if (!MI) { - report("No instruction at def index", MBB, LI); + report("No instruction at def index", MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; return; } - bool hasDef = false; - bool isEarlyClobber = false; - for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || !MOI->isDef()) - continue; - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - if (MOI->getReg() != LI.reg) - continue; - } else { - if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || - !TRI->hasRegUnit(MOI->getReg(), LI.reg)) + if (Reg != 0) { + bool hasDef = false; + bool isEarlyClobber = false; + for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { + if (!MOI->isReg() || !MOI->isDef()) continue; + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (MOI->getReg() != Reg) + continue; + } else { + if (!TargetRegisterInfo::isPhysicalRegister(MOI->getReg()) || + !TRI->hasRegUnit(MOI->getReg(), Reg)) + continue; + } + hasDef = true; + if (MOI->isEarlyClobber()) + isEarlyClobber = true; } - hasDef = true; - if (MOI->isEarlyClobber()) - isEarlyClobber = true; - } - if (!hasDef) { - report("Defining instruction does not modify register", MI); - *OS << "Valno #" << VNI->id << " in " << LI << '\n'; - } + if (!hasDef) { + report("Defining instruction does not modify register", MI); + *OS << "Valno #" << VNI->id << " in " << LR << '\n'; + } - // Early clobber defs begin at USE slots, but other defs must begin at - // DEF slots. - if (isEarlyClobber) { - if (!VNI->def.isEarlyClobber()) { - report("Early clobber def must be at an early-clobber slot", MBB, LI); + // Early clobber defs begin at USE slots, but other defs must begin at + // DEF slots. + if (isEarlyClobber) { + if (!VNI->def.isEarlyClobber()) { + report("Early clobber def must be at an early-clobber slot", MBB, LR); + *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; + } + } else if (!VNI->def.isRegister()) { + report("Non-PHI, non-early clobber def must be at a register slot", + MBB, LR); *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } - } else if (!VNI->def.isRegister()) { - report("Non-PHI, non-early clobber def must be at a register slot", - MBB, LI); - *OS << "Valno #" << VNI->id << " is defined at " << VNI->def << '\n'; } } -void -MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, - LiveInterval::const_iterator I) { - const VNInfo *VNI = I->valno; - assert(VNI && "Live range has no valno"); - - if (VNI->id >= LI.getNumValNums() || VNI != LI.getValNumInfo(VNI->id)) { - report("Foreign valno in live range", MF, LI); - *OS << *I << " has a bad valno\n"; +void MachineVerifier::verifyLiveRangeSegment(const LiveRange &LR, + const LiveRange::const_iterator I, + unsigned Reg) { + const LiveRange::Segment &S = *I; + const VNInfo *VNI = S.valno; + assert(VNI && "Live segment has no valno"); + + if (VNI->id >= LR.getNumValNums() || VNI != LR.getValNumInfo(VNI->id)) { + report("Foreign valno in live segment", MF, LR); + *OS << S << " has a bad valno\n"; } if (VNI->isUnused()) { - report("Live range valno is marked unused", MF, LI); - *OS << *I << '\n'; + report("Live segment valno is marked unused", MF, LR); + *OS << S << '\n'; } - const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(I->start); + const MachineBasicBlock *MBB = LiveInts->getMBBFromIndex(S.start); if (!MBB) { - report("Bad start of live segment, no basic block", MF, LI); - *OS << *I << '\n'; + report("Bad start of live segment, no basic block", MF, LR); + *OS << S << '\n'; return; } SlotIndex MBBStartIdx = LiveInts->getMBBStartIdx(MBB); - if (I->start != MBBStartIdx && I->start != VNI->def) { - report("Live segment must begin at MBB entry or valno def", MBB, LI); - *OS << *I << '\n'; + if (S.start != MBBStartIdx && S.start != VNI->def) { + report("Live segment must begin at MBB entry or valno def", MBB, LR); + *OS << S << '\n'; } const MachineBasicBlock *EndMBB = - LiveInts->getMBBFromIndex(I->end.getPrevSlot()); + LiveInts->getMBBFromIndex(S.end.getPrevSlot()); if (!EndMBB) { - report("Bad end of live segment, no basic block", MF, LI); - *OS << *I << '\n'; + report("Bad end of live segment, no basic block", MF, LR); + *OS << S << '\n'; return; } // No more checks for live-out segments. - if (I->end == LiveInts->getMBBEndIdx(EndMBB)) + if (S.end == LiveInts->getMBBEndIdx(EndMBB)) return; // RegUnit intervals are allowed dead phis. - if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && VNI->isPHIDef() && - I->start == VNI->def && I->end == VNI->def.getDeadSlot()) + if (!TargetRegisterInfo::isVirtualRegister(Reg) && VNI->isPHIDef() && + S.start == VNI->def && S.end == VNI->def.getDeadSlot()) return; // The live segment is ending inside EndMBB const MachineInstr *MI = - LiveInts->getInstructionFromIndex(I->end.getPrevSlot()); + LiveInts->getInstructionFromIndex(S.end.getPrevSlot()); if (!MI) { - report("Live segment doesn't end at a valid instruction", EndMBB, LI); - *OS << *I << '\n'; + report("Live segment doesn't end at a valid instruction", EndMBB, LR); + *OS << S << '\n'; return; } // The block slot must refer to a basic block boundary. - if (I->end.isBlock()) { - report("Live segment ends at B slot of an instruction", EndMBB, LI); - *OS << *I << '\n'; + if (S.end.isBlock()) { + report("Live segment ends at B slot of an instruction", EndMBB, LR); + *OS << S << '\n'; } - if (I->end.isDead()) { + if (S.end.isDead()) { // Segment ends on the dead slot. // That means there must be a dead def. - if (!SlotIndex::isSameInstr(I->start, I->end)) { - report("Live segment ending at dead slot spans instructions", EndMBB, LI); - *OS << *I << '\n'; + if (!SlotIndex::isSameInstr(S.start, S.end)) { + report("Live segment ending at dead slot spans instructions", EndMBB, LR); + *OS << S << '\n'; } } // A live segment can only end at an early-clobber slot if it is being // redefined by an early-clobber def. - if (I->end.isEarlyClobber()) { - if (I+1 == LI.end() || (I+1)->start != I->end) { + if (S.end.isEarlyClobber()) { + if (I+1 == LR.end() || (I+1)->start != S.end) { report("Live segment ending at early clobber slot must be " - "redefined by an EC def in the same instruction", EndMBB, LI); - *OS << *I << '\n'; + "redefined by an EC def in the same instruction", EndMBB, LR); + *OS << S << '\n'; } } // The following checks only apply to virtual registers. Physreg liveness // is too weird to check. - if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { - // A live range can end with either a redefinition, a kill flag on a + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + // A live segment can end with either a redefinition, a kill flag on a // use, or a dead flag on a def. bool hasRead = false; - bool hasDeadDef = false; for (ConstMIBundleOperands MOI(MI); MOI.isValid(); ++MOI) { - if (!MOI->isReg() || MOI->getReg() != LI.reg) + if (!MOI->isReg() || MOI->getReg() != Reg) continue; if (MOI->readsReg()) hasRead = true; - if (MOI->isDef() && MOI->isDead()) - hasDeadDef = true; } - - if (I->end.isDead()) { - if (!hasDeadDef) { - report("Instruction doesn't have a dead def operand", MI); - I->print(*OS); - *OS << " in " << LI << '\n'; - } - } else { + if (!S.end.isDead()) { if (!hasRead) { - report("Instruction ending live range doesn't read the register", MI); - *OS << *I << " in " << LI << '\n'; + report("Instruction ending live segment doesn't read the register", MI); + *OS << S << " in " << LR << '\n'; } } } // Now check all the basic blocks in this live segment. MachineFunction::const_iterator MFI = MBB; - // Is this live range the beginning of a non-PHIDef VN? - if (I->start == VNI->def && !VNI->isPHIDef()) { + // Is this live segment the beginning of a non-PHIDef VN? + if (S.start == VNI->def && !VNI->isPHIDef()) { // Not live-in to any blocks. if (MBB == EndMBB) return; @@ -1537,9 +1550,9 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, ++MFI; } for (;;) { - assert(LiveInts->isLiveInToMBB(LI, MFI)); + assert(LiveInts->isLiveInToMBB(LR, MFI)); // We don't know how to track physregs into a landing pad. - if (!TargetRegisterInfo::isVirtualRegister(LI.reg) && + if (!TargetRegisterInfo::isVirtualRegister(Reg) && MFI->isLandingPad()) { if (&*MFI == EndMBB) break; @@ -1555,11 +1568,11 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, for (MachineBasicBlock::const_pred_iterator PI = MFI->pred_begin(), PE = MFI->pred_end(); PI != PE; ++PI) { SlotIndex PEnd = LiveInts->getMBBEndIdx(*PI); - const VNInfo *PVNI = LI.getVNInfoBefore(PEnd); + const VNInfo *PVNI = LR.getVNInfoBefore(PEnd); // All predecessors must have a live-out value. if (!PVNI) { - report("Register not marked live out of predecessor", *PI, LI); + report("Register not marked live out of predecessor", *PI, LR); *OS << "Valno #" << VNI->id << " live into BB#" << MFI->getNumber() << '@' << LiveInts->getMBBStartIdx(MFI) << ", not live before " << PEnd << '\n'; @@ -1568,7 +1581,7 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, // Only PHI-defs can take different predecessor values. if (!IsPHI && PVNI != VNI) { - report("Different value live out of predecessor", *PI, LI); + report("Different value live out of predecessor", *PI, LR); *OS << "Valno #" << PVNI->id << " live out of BB#" << (*PI)->getNumber() << '@' << PEnd << "\nValno #" << VNI->id << " live into BB#" << MFI->getNumber() @@ -1581,13 +1594,17 @@ MachineVerifier::verifyLiveIntervalSegment(const LiveInterval &LI, } } -void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { - for (LiveInterval::const_vni_iterator I = LI.vni_begin(), E = LI.vni_end(); - I!=E; ++I) - verifyLiveIntervalValue(LI, *I); +void MachineVerifier::verifyLiveRange(const LiveRange &LR, unsigned Reg) { + for (LiveRange::const_vni_iterator I = LR.vni_begin(), E = LR.vni_end(); + I != E; ++I) + verifyLiveRangeValue(LR, *I, Reg); + + for (LiveRange::const_iterator I = LR.begin(), E = LR.end(); I != E; ++I) + verifyLiveRangeSegment(LR, I, Reg); +} - for (LiveInterval::const_iterator I = LI.begin(), E = LI.end(); I!=E; ++I) - verifyLiveIntervalSegment(LI, I); +void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { + verifyLiveRange(LI, LI.reg); // Check the LI only has one connected component. if (TargetRegisterInfo::isVirtualRegister(LI.reg)) { @@ -1606,3 +1623,130 @@ void MachineVerifier::verifyLiveInterval(const LiveInterval &LI) { } } } + +namespace { + // FrameSetup and FrameDestroy can have zero adjustment, so using a single + // integer, we can't tell whether it is a FrameSetup or FrameDestroy if the + // value is zero. + // We use a bool plus an integer to capture the stack state. + struct StackStateOfBB { + StackStateOfBB() : EntryValue(0), ExitValue(0), EntryIsSetup(false), + ExitIsSetup(false) { } + StackStateOfBB(int EntryVal, int ExitVal, bool EntrySetup, bool ExitSetup) : + EntryValue(EntryVal), ExitValue(ExitVal), EntryIsSetup(EntrySetup), + ExitIsSetup(ExitSetup) { } + // Can be negative, which means we are setting up a frame. + int EntryValue; + int ExitValue; + bool EntryIsSetup; + bool ExitIsSetup; + }; +} + +/// Make sure on every path through the CFG, a FrameSetup is always followed +/// by a FrameDestroy , stack adjustments are identical on all +/// CFG edges to a merge point, and frame is destroyed at end of a return block. +void MachineVerifier::verifyStackFrame() { + int FrameSetupOpcode = TII->getCallFrameSetupOpcode(); + int FrameDestroyOpcode = TII->getCallFrameDestroyOpcode(); + + SmallVector SPState; + SPState.resize(MF->getNumBlockIDs()); + SmallPtrSet Reachable; + + // Visit the MBBs in DFS order. + for (df_ext_iterator > + DFI = df_ext_begin(MF, Reachable), DFE = df_ext_end(MF, Reachable); + DFI != DFE; ++DFI) { + const MachineBasicBlock *MBB = *DFI; + + StackStateOfBB BBState; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + const MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + BBState.EntryValue = SPState[StackPred->getNumber()].ExitValue; + BBState.EntryIsSetup = SPState[StackPred->getNumber()].ExitIsSetup; + BBState.ExitValue = BBState.EntryValue; + BBState.ExitIsSetup = BBState.EntryIsSetup; + } + + // Update stack state by checking contents of MBB. + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); + I != E; ++I) { + if (I->getOpcode() == FrameSetupOpcode) { + // The first operand of a FrameOpcode should be i32. + int Size = I->getOperand(0).getImm(); + assert(Size >= 0 && + "Value should be non-negative in FrameSetup and FrameDestroy.\n"); + + if (BBState.ExitIsSetup) + report("FrameSetup is after another FrameSetup", I); + BBState.ExitValue -= Size; + BBState.ExitIsSetup = true; + } + + if (I->getOpcode() == FrameDestroyOpcode) { + // The first operand of a FrameOpcode should be i32. + int Size = I->getOperand(0).getImm(); + assert(Size >= 0 && + "Value should be non-negative in FrameSetup and FrameDestroy.\n"); + + if (!BBState.ExitIsSetup) + report("FrameDestroy is not after a FrameSetup", I); + int AbsSPAdj = BBState.ExitValue < 0 ? -BBState.ExitValue : + BBState.ExitValue; + if (BBState.ExitIsSetup && AbsSPAdj != Size) { + report("FrameDestroy is after FrameSetup ", I); + *OS << "FrameDestroy <" << Size << "> is after FrameSetup <" + << AbsSPAdj << ">.\n"; + } + BBState.ExitValue += Size; + BBState.ExitIsSetup = false; + } + } + SPState[MBB->getNumber()] = BBState; + + // Make sure the exit state of any predecessor is consistent with the entry + // state. + for (MachineBasicBlock::const_pred_iterator I = MBB->pred_begin(), + E = MBB->pred_end(); I != E; ++I) { + if (Reachable.count(*I) && + (SPState[(*I)->getNumber()].ExitValue != BBState.EntryValue || + SPState[(*I)->getNumber()].ExitIsSetup != BBState.EntryIsSetup)) { + report("The exit stack state of a predecessor is inconsistent.", MBB); + *OS << "Predecessor BB#" << (*I)->getNumber() << " has exit state (" + << SPState[(*I)->getNumber()].ExitValue << ", " + << SPState[(*I)->getNumber()].ExitIsSetup + << "), while BB#" << MBB->getNumber() << " has entry state (" + << BBState.EntryValue << ", " << BBState.EntryIsSetup << ").\n"; + } + } + + // Make sure the entry state of any successor is consistent with the exit + // state. + for (MachineBasicBlock::const_succ_iterator I = MBB->succ_begin(), + E = MBB->succ_end(); I != E; ++I) { + if (Reachable.count(*I) && + (SPState[(*I)->getNumber()].EntryValue != BBState.ExitValue || + SPState[(*I)->getNumber()].EntryIsSetup != BBState.ExitIsSetup)) { + report("The entry stack state of a successor is inconsistent.", MBB); + *OS << "Successor BB#" << (*I)->getNumber() << " has entry state (" + << SPState[(*I)->getNumber()].EntryValue << ", " + << SPState[(*I)->getNumber()].EntryIsSetup + << "), while BB#" << MBB->getNumber() << " has exit state (" + << BBState.ExitValue << ", " << BBState.ExitIsSetup << ").\n"; + } + } + + // Make sure a basic block with return ends with zero stack adjustment. + if (!MBB->empty() && MBB->back().isReturn()) { + if (BBState.ExitIsSetup) + report("A return block ends with a FrameSetup.", MBB); + if (BBState.ExitValue) + report("A return block ends with a nonzero stack adjustment.", MBB); + } + } +} diff --git a/contrib/llvm/lib/CodeGen/PHIElimination.cpp b/contrib/llvm/lib/CodeGen/PHIElimination.cpp index 5584708..dcd9072 100644 --- a/contrib/llvm/lib/CodeGen/PHIElimination.cpp +++ b/contrib/llvm/lib/CodeGen/PHIElimination.cpp @@ -66,7 +66,7 @@ namespace { /// bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); void LowerPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt); + MachineBasicBlock::iterator LastPHIIt); /// analyzePHINodes - Gather information about the PHI nodes in /// here. In particular, we want to map the number of uses of a virtual @@ -185,10 +185,11 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, // Get an iterator to the first instruction after the last PHI node (this may // also be the end of the basic block). - MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin()); + MachineBasicBlock::iterator LastPHIIt = + prior(MBB.SkipPHIsAndLabels(MBB.begin())); while (MBB.front().isPHI()) - LowerPHINode(MBB, AfterPHIsIt); + LowerPHINode(MBB, LastPHIIt); return true; } @@ -218,8 +219,11 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, /// LowerPHINode - Lower the PHI node at the top of the specified block, /// void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt) { + MachineBasicBlock::iterator LastPHIIt) { ++NumLowered; + + MachineBasicBlock::iterator AfterPHIsIt = llvm::next(LastPHIIt); + // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); @@ -309,14 +313,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (IncomingReg) { // Add the region from the beginning of MBB to the copy instruction to // IncomingReg's live interval. - LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg); + LiveInterval &IncomingLI = LIS->createEmptyInterval(IncomingReg); VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex); if (!IncomingVNI) IncomingVNI = IncomingLI.getNextValue(MBBStartIndex, LIS->getVNInfoAllocator()); - IncomingLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getRegSlot(), - IncomingVNI)); + IncomingLI.addSegment(LiveInterval::Segment(MBBStartIndex, + DestCopyIndex.getRegSlot(), + IncomingVNI)); } LiveInterval &DestLI = LIS->getInterval(DestReg); @@ -328,14 +332,14 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, // the copy instruction. VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex); assert(OrigDestVNI && "PHI destination should be live at block entry."); - DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot()); + DestLI.removeSegment(MBBStartIndex, MBBStartIndex.getDeadSlot()); DestLI.createDeadDef(DestCopyIndex.getRegSlot(), LIS->getVNInfoAllocator()); DestLI.removeValNo(OrigDestVNI); } else { // Otherwise, remove the region from the beginning of MBB to the copy // instruction from DestReg's live interval. - DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot()); + DestLI.removeSegment(MBBStartIndex, DestCopyIndex.getRegSlot()); VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); assert(DestVNI && "PHI destination should be live at its definition."); DestVNI->def = DestCopyIndex.getRegSlot(); @@ -456,7 +460,7 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, if (LIS) { if (NewSrcInstr) { LIS->InsertMachineInstrInMaps(NewSrcInstr); - LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr); + LIS->addSegmentToEndOfBlock(IncomingReg, NewSrcInstr); } if (!SrcUndef && @@ -507,8 +511,8 @@ void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, "Cannot find kill instruction"); SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst); - SrcLI.removeRange(LastUseIndex.getRegSlot(), - LIS->getMBBEndIdx(&opBlock)); + SrcLI.removeSegment(LastUseIndex.getRegSlot(), + LIS->getMBBEndIdx(&opBlock)); } } } diff --git a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h index 9ac47fb4..48234ae 100644 --- a/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h +++ b/contrib/llvm/lib/CodeGen/PHIEliminationUtils.h @@ -1,4 +1,4 @@ -//=- PHIEliminationUtils.h - Helper functions for PHI elimination *- C++ -*--=// +//=- PHIEliminationUtils.h - Helper functions for PHI elimination -*- C++ -*-=// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/CodeGen/Passes.cpp b/contrib/llvm/lib/CodeGen/Passes.cpp index bfbc062..f4ffd03 100644 --- a/contrib/llvm/lib/CodeGen/Passes.cpp +++ b/contrib/llvm/lib/CodeGen/Passes.cpp @@ -58,8 +58,6 @@ OptimizeRegAlloc("optimize-regalloc", cl::Hidden, static cl::opt EnableMachineSched("enable-misched", cl::Hidden, cl::desc("Enable the machine instruction scheduling pass.")); -static cl::opt EnableStrongPHIElim("strong-phi-elim", cl::Hidden, - cl::desc("Use strong PHI elimination.")); static cl::opt DisablePostRAMachineLICM("disable-postra-machine-licm", cl::Hidden, cl::desc("Disable Machine LICM")); @@ -236,7 +234,7 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) // Temporarily disable experimental passes. const TargetSubtargetInfo &ST = TM->getSubtarget(); - if (!ST.enableMachineScheduler()) + if (!ST.useMachineScheduler()) disablePass(&MachineSchedulerID); } @@ -300,6 +298,8 @@ void TargetPassConfig::addPass(Pass *P) { if (Started && !Stopped) PM->add(P); + else + delete P; if (StopAfter == PassID) Stopped = true; if (StartAfter == PassID) @@ -331,7 +331,7 @@ AnalysisID TargetPassConfig::addPass(AnalysisID PassID) { addPass(P); // Ends the lifetime of P. // Add the passes after the pass P if there is any. - for (SmallVector, 4>::iterator + for (SmallVectorImpl >::iterator I = Impl->InsertedPasses.begin(), E = Impl->InsertedPasses.end(); I != E; ++I) { if ((*I).first == PassID) { @@ -396,7 +396,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { // removed from the parent invoke(s). This could happen when a landing // pad is shared by multiple invokes and is also a target of a normal // edge from elsewhere. - addPass(createSjLjEHPreparePass(TM->getTargetLowering())); + addPass(createSjLjEHPreparePass(TM)); // FALLTHROUGH case ExceptionHandling::DwarfCFI: case ExceptionHandling::ARM: @@ -404,7 +404,7 @@ void TargetPassConfig::addPassesToHandleExceptions() { addPass(createDwarfEHPass(TM)); break; case ExceptionHandling::None: - addPass(createLowerInvokePass(TM->getTargetLowering())); + addPass(createLowerInvokePass(TM)); // The lower invoke pass may create unreachable code. Remove it. addPass(createUnreachableBlockEliminationPass()); @@ -416,13 +416,13 @@ void TargetPassConfig::addPassesToHandleExceptions() { /// before exception handling preparation passes. void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) - addPass(createCodeGenPreparePass(getTargetLowering())); + addPass(createCodeGenPreparePass(TM)); } /// Add common passes that perform LLVM IR to IR transforms in preparation for /// instruction selection. void TargetPassConfig::addISelPrepare() { - addPass(createStackProtectorPass(getTargetLowering())); + addPass(createStackProtectorPass(TM)); addPreISel(); @@ -673,24 +673,15 @@ void TargetPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { // preferably fix the scavenger to not depend on them). addPass(&LiveVariablesID); - // Add passes that move from transformed SSA into conventional SSA. This is a - // "copy coalescing" problem. - // - if (!EnableStrongPHIElim) { - // Edge splitting is smarter with machine loop info. - addPass(&MachineLoopInfoID); - addPass(&PHIEliminationID); - } + // Edge splitting is smarter with machine loop info. + addPass(&MachineLoopInfoID); + addPass(&PHIEliminationID); // Eventually, we want to run LiveIntervals before PHI elimination. if (EarlyLiveIntervals) addPass(&LiveIntervalsID); addPass(&TwoAddressInstructionPassID); - - if (EnableStrongPHIElim) - addPass(&StrongPHIEliminationID); - addPass(&RegisterCoalescerID); // PreRA instruction scheduling. diff --git a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp index a7439b5..28f2d2f 100644 --- a/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp +++ b/contrib/llvm/lib/CodeGen/PeepholeOptimizer.cpp @@ -40,20 +40,30 @@ // If the branch instruction can use flag from "sub", then we can replace // "sub" with "subs" and eliminate the "cmp" instruction. // -// - Optimize Bitcast pairs: -// -// v1 = bitcast v0 -// v2 = bitcast v1 -// = v2 -// => -// v1 = bitcast v0 -// = v0 -// // - Optimize Loads: // // Loads that can be folded into a later instruction. A load is foldable // if it loads to virtual registers and the virtual register defined has // a single use. +// +// - Optimize Copies and Bitcast: +// +// Rewrite copies and bitcasts to avoid cross register bank copies +// when possible. +// E.g., Consider the following example, where capital and lower +// letters denote different register file: +// b = copy A <-- cross-bank copy +// C = copy b <-- cross-bank copy +// => +// b = copy A <-- cross-bank copy +// C = copy A <-- same-bank copy +// +// E.g., for bitcast: +// b = bitcast A <-- cross-bank copy +// C = bitcast b <-- cross-bank copy +// => +// b = bitcast A <-- cross-bank copy +// C = copy A <-- same-bank copy //===----------------------------------------------------------------------===// #define DEBUG_TYPE "peephole-opt" @@ -81,11 +91,11 @@ DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), cl::desc("Disable the peephole optimizer")); STATISTIC(NumReuse, "Number of extension results reused"); -STATISTIC(NumBitcasts, "Number of bitcasts eliminated"); STATISTIC(NumCmps, "Number of compares eliminated"); STATISTIC(NumImmFold, "Number of move immediate folded"); STATISTIC(NumLoadFold, "Number of loads folded"); STATISTIC(NumSelects, "Number of selects optimized"); +STATISTIC(NumCopiesBitcasts, "Number of copies/bitcasts optimized"); namespace { class PeepholeOptimizer : public MachineFunctionPass { @@ -112,11 +122,11 @@ namespace { } private: - bool optimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, SmallPtrSet &LocalMIs); bool optimizeSelect(MachineInstr *MI); + bool optimizeCopyOrBitcast(MachineInstr *MI); bool isMoveImmediate(MachineInstr *MI, SmallSet &ImmDefRegs, DenseMap &ImmDefMIs); @@ -298,78 +308,6 @@ optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, return Changed; } -/// optimizeBitcastInstr - If the instruction is a bitcast instruction A that -/// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast -/// a value cross register classes), and the source is defined by another -/// bitcast instruction B. And if the register class of source of B matches -/// the register class of instruction A, then it is legal to replace all uses -/// of the def of A with source of B. e.g. -/// %vreg0 = VMOVSR %vreg1 -/// %vreg3 = VMOVRS %vreg0 -/// Replace all uses of vreg3 with vreg1. - -bool PeepholeOptimizer::optimizeBitcastInstr(MachineInstr *MI, - MachineBasicBlock *MBB) { - unsigned NumDefs = MI->getDesc().getNumDefs(); - unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs; - if (NumDefs != 1) - return false; - - unsigned Def = 0; - unsigned Src = 0; - for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (MO.isDef()) - Def = Reg; - else if (Src) - // Multiple sources? - return false; - else - Src = Reg; - } - - assert(Def && Src && "Malformed bitcast instruction!"); - - MachineInstr *DefMI = MRI->getVRegDef(Src); - if (!DefMI || !DefMI->isBitcast()) - return false; - - unsigned SrcSrc = 0; - NumDefs = DefMI->getDesc().getNumDefs(); - NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs; - if (NumDefs != 1) - return false; - for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) { - const MachineOperand &MO = DefMI->getOperand(i); - if (!MO.isReg() || MO.isDef()) - continue; - unsigned Reg = MO.getReg(); - if (!Reg) - continue; - if (!MO.isDef()) { - if (SrcSrc) - // Multiple sources? - return false; - else - SrcSrc = Reg; - } - } - - if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def)) - return false; - - MRI->replaceRegWith(Def, SrcSrc); - MRI->clearKillFlags(SrcSrc); - MI->eraseFromParent(); - ++NumBitcasts; - return true; -} - /// optimizeCmpInstr - If the instruction is a compare and the previous /// instruction it's comparing against all ready sets (or could be modified to /// set) the same flag as the compare, then we can remove the comparison and use @@ -411,6 +349,150 @@ bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI) { return true; } +/// \brief Check if the registers defined by the pair (RegisterClass, SubReg) +/// share the same register file. +static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, + const TargetRegisterClass *DefRC, + unsigned DefSubReg, + const TargetRegisterClass *SrcRC, + unsigned SrcSubReg) { + // Same register class. + if (DefRC == SrcRC) + return true; + + // Both operands are sub registers. Check if they share a register class. + unsigned SrcIdx, DefIdx; + if (SrcSubReg && DefSubReg) + return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, + SrcIdx, DefIdx) != NULL; + // At most one of the register is a sub register, make it Src to avoid + // duplicating the test. + if (!SrcSubReg) { + std::swap(DefSubReg, SrcSubReg); + std::swap(DefRC, SrcRC); + } + + // One of the register is a sub register, check if we can get a superclass. + if (SrcSubReg) + return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != NULL; + // Plain copy. + return TRI.getCommonSubClass(DefRC, SrcRC) != NULL; +} + +/// \brief Get the index of the definition and source for \p Copy +/// instruction. +/// \pre Copy.isCopy() or Copy.isBitcast(). +/// \return True if the Copy instruction has only one register source +/// and one register definition. Otherwise, \p DefIdx and \p SrcIdx +/// are invalid. +static bool getCopyOrBitcastDefUseIdx(const MachineInstr &Copy, + unsigned &DefIdx, unsigned &SrcIdx) { + assert((Copy.isCopy() || Copy.isBitcast()) && "Wrong operation type."); + if (Copy.isCopy()) { + // Copy instruction are supposed to be: Def = Src. + if (Copy.getDesc().getNumOperands() != 2) + return false; + DefIdx = 0; + SrcIdx = 1; + assert(Copy.getOperand(DefIdx).isDef() && "Use comes before def!"); + return true; + } + // Bitcast case. + // Bitcasts with more than one def are not supported. + if (Copy.getDesc().getNumDefs() != 1) + return false; + // Initialize SrcIdx to an undefined operand. + SrcIdx = Copy.getDesc().getNumOperands(); + for (unsigned OpIdx = 0, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; ++OpIdx) { + const MachineOperand &MO = Copy.getOperand(OpIdx); + if (!MO.isReg() || !MO.getReg()) + continue; + if (MO.isDef()) + DefIdx = OpIdx; + else if (SrcIdx != EndOpIdx) + // Multiple sources? + return false; + SrcIdx = OpIdx; + } + return true; +} + +/// \brief Optimize a copy or bitcast instruction to avoid cross +/// register bank copy. The optimization looks through a chain of +/// copies and try to find a source that has a compatible register +/// class. +/// Two register classes are considered to be compatible if they share +/// the same register bank. +/// New copies issued by this optimization are register allocator +/// friendly. This optimization does not remove any copy as it may +/// overconstraint the register allocator, but replaces some when +/// possible. +/// \pre \p MI is a Copy (MI->isCopy() is true) +/// \return True, when \p MI has been optimized. In that case, \p MI has +/// been removed from its parent. +bool PeepholeOptimizer::optimizeCopyOrBitcast(MachineInstr *MI) { + unsigned DefIdx, SrcIdx; + if (!MI || !getCopyOrBitcastDefUseIdx(*MI, DefIdx, SrcIdx)) + return false; + + const MachineOperand &MODef = MI->getOperand(DefIdx); + assert(MODef.isReg() && "Copies must be between registers."); + unsigned Def = MODef.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Def)) + return false; + + const TargetRegisterClass *DefRC = MRI->getRegClass(Def); + unsigned DefSubReg = MODef.getSubReg(); + + unsigned Src; + unsigned SrcSubReg; + bool ShouldRewrite = false; + MachineInstr *Copy = MI; + const TargetRegisterInfo &TRI = *TM->getRegisterInfo(); + + // Follow the chain of copies until we reach the top or find a + // more suitable source. + do { + unsigned CopyDefIdx, CopySrcIdx; + if (!getCopyOrBitcastDefUseIdx(*Copy, CopyDefIdx, CopySrcIdx)) + break; + const MachineOperand &MO = Copy->getOperand(CopySrcIdx); + assert(MO.isReg() && "Copies must be between registers."); + Src = MO.getReg(); + + if (TargetRegisterInfo::isPhysicalRegister(Src)) + break; + + const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); + SrcSubReg = MO.getSubReg(); + + // If this source does not incur a cross register bank copy, use it. + ShouldRewrite = shareSameRegisterFile(TRI, DefRC, DefSubReg, SrcRC, + SrcSubReg); + // Follow the chain of copies: get the definition of Src. + Copy = MRI->getVRegDef(Src); + } while (!ShouldRewrite && Copy && (Copy->isCopy() || Copy->isBitcast())); + + // If we did not find a more suitable source, there is nothing to optimize. + if (!ShouldRewrite || Src == MI->getOperand(SrcIdx).getReg()) + return false; + + // Rewrite the copy to avoid a cross register bank penalty. + unsigned NewVR = TargetRegisterInfo::isPhysicalRegister(Def) ? Def : + MRI->createVirtualRegister(DefRC); + MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY), NewVR) + .addReg(Src, 0, SrcSubReg); + NewCopy->getOperand(0).setSubReg(DefSubReg); + + MRI->replaceRegWith(Def, NewVR); + MRI->clearKillFlags(NewVR); + MI->eraseFromParent(); + ++NumCopiesBitcasts; + return true; +} + /// isLoadFoldable - Check whether MI is a candidate for folding into a later /// instruction. We only fold loads to virtual registers and the virtual /// register defined has a single use. @@ -523,7 +605,7 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->mayStore() || MI->isCall()) FoldAsLoadDefReg = 0; - if ((MI->isBitcast() && optimizeBitcastInstr(MI, MBB)) || + if (((MI->isBitcast() || MI->isCopy()) && optimizeCopyOrBitcast(MI)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || (MI->isSelect() && optimizeSelect(MI))) { // MI is deleted. diff --git a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp index 53fe273..1afc1ec 100644 --- a/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp +++ b/contrib/llvm/lib/CodeGen/PostRASchedulerList.cpp @@ -127,6 +127,12 @@ namespace { /// The schedule. Null SUnit*'s represent noop instructions. std::vector Sequence; + /// The index in BB of RegionEnd. + /// + /// This is the instruction number from the top of the current block, not + /// the SlotIndex. It is only used by the AntiDepBreaker. + unsigned EndIndex; + public: SchedulePostRATDList( MachineFunction &MF, MachineLoopInfo &MLI, MachineDominatorTree &MDT, @@ -141,11 +147,14 @@ namespace { /// void startBlock(MachineBasicBlock *BB); + // Set the index of RegionEnd within the current BB. + void setEndIndex(unsigned EndIdx) { EndIndex = EndIdx; } + /// Initialize the scheduler state for the next scheduling region. virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount); + unsigned regioninstrs); /// Notify that the scheduler has finished scheduling the current region. virtual void exitRegion(); @@ -197,7 +206,7 @@ SchedulePostRATDList::SchedulePostRATDList( TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl &CriticalPathRCs) : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), - LiveRegs(TRI->getNumRegs()) + LiveRegs(TRI->getNumRegs()), EndIndex(0) { const TargetMachine &TM = MF.getTarget(); const InstrItineraryData *InstrItins = TM.getInstrItineraryData(); @@ -223,8 +232,8 @@ SchedulePostRATDList::~SchedulePostRATDList() { void SchedulePostRATDList::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) { - ScheduleDAGInstrs::enterRegion(bb, begin, end, endcount); + unsigned regioninstrs) { + ScheduleDAGInstrs::enterRegion(bb, begin, end, regioninstrs); Sequence.clear(); } @@ -312,20 +321,21 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { unsigned Count = MBB->size(), CurrentCount = Count; for (MachineBasicBlock::iterator I = Current; I != MBB->begin(); ) { MachineInstr *MI = llvm::prior(I); + --Count; // Calls are not scheduling boundaries before register allocation, but // post-ra we don't gain anything by scheduling across calls since we // don't need to worry about register pressure. if (MI->isCall() || TII->isSchedulingBoundary(MI, MBB, Fn)) { - Scheduler.enterRegion(MBB, I, Current, CurrentCount); + Scheduler.enterRegion(MBB, I, Current, CurrentCount - Count); + Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); Scheduler.EmitSchedule(); Current = MI; - CurrentCount = Count - 1; + CurrentCount = Count; Scheduler.Observe(MI, CurrentCount); } I = MI; - --Count; if (MI->isBundle()) Count -= MI->getBundleSize(); } @@ -333,6 +343,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) { assert((MBB->begin() == Current || CurrentCount != 0) && "Instruction count mismatch!"); Scheduler.enterRegion(MBB, MBB->begin(), Current, CurrentCount); + Scheduler.setEndIndex(CurrentCount); Scheduler.schedule(); Scheduler.exitRegion(); Scheduler.EmitSchedule(); @@ -424,9 +435,9 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; - LiveRegs.set(Reg); - // Repeat, for all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + // Repeat, for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) LiveRegs.set(*SubRegs); } } @@ -496,20 +507,19 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { // Ignore two-addr defs. if (MI->isRegTiedToUseOperand(i)) continue; - LiveRegs.reset(Reg); - - // Repeat for all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + // Repeat for reg and all subregs. + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) LiveRegs.reset(*SubRegs); } // Examine all used registers and set/clear kill flag. When a // register is used multiple times we only set the kill flag on - // the first use. + // the first use. Don't set kill flags on undef operands. killedRegs.reset(); for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) continue; + if (!MO.isReg() || !MO.isUse() || MO.isUndef()) continue; unsigned Reg = MO.getReg(); if ((Reg == 0) || MRI.isReserved(Reg)) continue; @@ -548,9 +558,8 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { unsigned Reg = MO.getReg(); if ((Reg == 0) || MRI.isReserved(Reg)) continue; - LiveRegs.set(Reg); - - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) LiveRegs.set(*SubRegs); } } diff --git a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp index e4e18c3..0c5173a 100644 --- a/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp +++ b/contrib/llvm/lib/CodeGen/ProcessImplicitDefs.cpp @@ -78,7 +78,7 @@ void ProcessImplicitDefs::processImplicitDef(MachineInstr *MI) { unsigned Reg = MI->getOperand(0).getReg(); if (TargetRegisterInfo::isVirtualRegister(Reg)) { - // For virtual regiusters, mark all uses as , and convert users to + // For virtual registers, mark all uses as , and convert users to // implicit-def when possible. for (MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(Reg), diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp index 9487cbd..b0e494f 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.cpp @@ -14,9 +14,6 @@ // This pass must be run after register allocation. After this pass is // executed, it is illegal to construct MO_FrameIndex operands. // -// This pass provides an optional shrink wrapping variant of prolog/epilog -// insertion, enabled via --shrink-wrap. See ShrinkWrapping.cpp. -// //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pei" @@ -36,6 +33,7 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" @@ -47,6 +45,11 @@ using namespace llvm; char PEI::ID = 0; char &llvm::PrologEpilogCodeInserterID = PEI::ID; +static cl::opt +WarnStackSize("warn-stack-size", cl::Hidden, cl::init((unsigned)-1), + cl::desc("Warn for stack size bigger than the given" + " number")); + INITIALIZE_PASS_BEGIN(PEI, "prologepilog", "Prologue/Epilogue Insertion", false, false) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) @@ -60,6 +63,38 @@ STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); +void PEI::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesCFG(); + AU.addPreserved(); + AU.addPreserved(); + AU.addRequired(); + MachineFunctionPass::getAnalysisUsage(AU); +} + +bool PEI::isReturnBlock(MachineBasicBlock* MBB) { + return (MBB && !MBB->empty() && MBB->back().isReturn()); +} + +/// Compute the set of return blocks +void PEI::calculateSets(MachineFunction &Fn) { + // Sets used to compute spill, restore placement sets. + const std::vector &CSI = + Fn.getFrameInfo()->getCalleeSavedInfo(); + + // If no CSRs used, we are done. + if (CSI.empty()) + return; + + // Save refs to entry and return blocks. + EntryBlock = Fn.begin(); + for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); + MBB != E; ++MBB) + if (isReturnBlock(MBB)) + ReturnBlocks.push_back(MBB); + + return; +} + /// runOnMachineFunction - Insert prolog/epilog code and replace abstract /// frame indexes with appropriate references. /// @@ -87,16 +122,11 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { calculateCalleeSavedRegisters(Fn); // Determine placement of CSR spill/restore code: - // - With shrink wrapping, place spills and restores to tightly - // enclose regions in the Machine CFG of the function where - // they are used. - // - Without shink wrapping (default), place all spills in the - // entry block, all restores in return blocks. - placeCSRSpillsAndRestores(Fn); + // place all spills in the entry block, all restores in return blocks. + calculateSets(Fn); // Add the code to save and restore the callee saved registers - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (!F->hasFnAttribute(Attribute::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function @@ -111,8 +141,7 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (!F->hasFnAttribute(Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -129,8 +158,15 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // Clear any vregs created by virtual scavenging. Fn.getRegInfo().clearVirtRegs(); + // Warn on stack size when we exceeds the given limit. + MachineFrameInfo *MFI = Fn.getFrameInfo(); + if (WarnStackSize.getNumOccurrences() > 0 && + WarnStackSize < MFI->getStackSize()) + errs() << "warning: Stack size limit exceeded (" << MFI->getStackSize() + << ") in " << Fn.getName() << ".\n"; + delete RS; - clearAllSets(); + ReturnBlocks.clear(); return true; } @@ -208,8 +244,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { return; // In Naked functions we aren't going to save any registers. - if (F.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::Naked)) + if (F.getFunction()->hasFnAttribute(Attribute::Naked)) return; std::vector CSI; @@ -273,7 +308,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { } /// insertCSRSpillsAndRestores - Insert spill and restore code for -/// callee saved registers used in the function, handling shrink wrapping. +/// callee saved registers used in the function. /// void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Get callee saved register information. @@ -291,133 +326,33 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); MachineBasicBlock::iterator I; - if (!ShrinkWrapThisFunction) { - // Spill using target interface. - I = EntryBlock->begin(); - if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - // Add the callee-saved register as live-in. - // It's killed at the spill. - EntryBlock->addLiveIn(CSI[i].getReg()); - - // Insert the spill to the stack frame. - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, - CSI[i].getFrameIdx(), RC, TRI); - } - } - - // Restore using target interface. - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { - MachineBasicBlock* MBB = ReturnBlocks[ri]; - I = MBB->end(); --I; - - // Skip over all terminator instructions, which are part of the return - // sequence. - MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->isTerminator()) - I = I2; - - bool AtStart = I == MBB->begin(); - MachineBasicBlock::iterator BeforeI = I; - if (!AtStart) - --BeforeI; - - // Restore all registers immediately before the return and any - // terminators that precede it. - if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { - for (unsigned i = 0, e = CSI.size(); i != e; ++i) { - unsigned Reg = CSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(*MBB, I, Reg, - CSI[i].getFrameIdx(), - RC, TRI); - assert(I != MBB->begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. - if (AtStart) - I = MBB->begin(); - else { - I = BeforeI; - ++I; - } - } - } - } - return; - } - - // Insert spills. - std::vector blockCSI; - for (CSRegBlockMap::iterator BI = CSRSave.begin(), - BE = CSRSave.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet save = BI->second; - - if (save.empty()) - continue; - - blockCSI.clear(); - for (CSRegSet::iterator RI = save.begin(), - RE = save.end(); RI != RE; ++RI) { - blockCSI.push_back(CSI[*RI]); - } - assert(blockCSI.size() > 0 && - "Could not collect callee saved register info"); - - I = MBB->begin(); - - // When shrink wrapping, use stack slot stores/loads. - for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { + // Spill using target interface. + I = EntryBlock->begin(); + if (!TFI->spillCalleeSavedRegisters(*EntryBlock, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { // Add the callee-saved register as live-in. // It's killed at the spill. - MBB->addLiveIn(blockCSI[i].getReg()); + EntryBlock->addLiveIn(CSI[i].getReg()); // Insert the spill to the stack frame. - unsigned Reg = blockCSI[i].getReg(); + unsigned Reg = CSI[i].getReg(); const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.storeRegToStackSlot(*MBB, I, Reg, - true, - blockCSI[i].getFrameIdx(), + TII.storeRegToStackSlot(*EntryBlock, I, Reg, true, CSI[i].getFrameIdx(), RC, TRI); } } - for (CSRegBlockMap::iterator BI = CSRRestore.begin(), - BE = CSRRestore.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet restore = BI->second; - - if (restore.empty()) - continue; + // Restore using target interface. + for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) { + MachineBasicBlock *MBB = ReturnBlocks[ri]; + I = MBB->end(); + --I; - blockCSI.clear(); - for (CSRegSet::iterator RI = restore.begin(), - RE = restore.end(); RI != RE; ++RI) { - blockCSI.push_back(CSI[*RI]); - } - assert(blockCSI.size() > 0 && - "Could not find callee saved register info"); - - // If MBB is empty and needs restores, insert at the _beginning_. - if (MBB->empty()) { - I = MBB->begin(); - } else { - I = MBB->end(); - --I; - - // Skip over all terminator instructions, which are part of the - // return sequence. - if (! I->isTerminator()) { - ++I; - } else { - MachineBasicBlock::iterator I2 = I; - while (I2 != MBB->begin() && (--I2)->isTerminator()) - I = I2; - } - } + // Skip over all terminator instructions, which are part of the return + // sequence. + MachineBasicBlock::iterator I2 = I; + while (I2 != MBB->begin() && (--I2)->isTerminator()) + I = I2; bool AtStart = I == MBB->begin(); MachineBasicBlock::iterator BeforeI = I; @@ -426,21 +361,21 @@ void PEI::insertCSRSpillsAndRestores(MachineFunction &Fn) { // Restore all registers immediately before the return and any // terminators that precede it. - for (unsigned i = 0, e = blockCSI.size(); i != e; ++i) { - unsigned Reg = blockCSI[i].getReg(); - const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); - TII.loadRegFromStackSlot(*MBB, I, Reg, - blockCSI[i].getFrameIdx(), - RC, TRI); - assert(I != MBB->begin() && - "loadRegFromStackSlot didn't insert any code!"); - // Insert in reverse order. loadRegFromStackSlot can insert - // multiple instructions. - if (AtStart) - I = MBB->begin(); - else { - I = BeforeI; - ++I; + if (!TFI->restoreCalleeSavedRegisters(*MBB, I, CSI, TRI)) { + for (unsigned i = 0, e = CSI.size(); i != e; ++i) { + unsigned Reg = CSI[i].getReg(); + const TargetRegisterClass *RC = TRI->getMinimalPhysRegClass(Reg); + TII.loadRegFromStackSlot(*MBB, I, Reg, CSI[i].getFrameIdx(), RC, TRI); + assert(I != MBB->begin() && + "loadRegFromStackSlot didn't insert any code!"); + // Insert in reverse order. loadRegFromStackSlot can insert + // multiple instructions. + if (AtStart) + I = MBB->begin(); + else { + I = BeforeI; + ++I; + } } } } @@ -545,14 +480,18 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { unsigned MaxAlign = MFI->getMaxAlignment(); // Make sure the special register scavenging spill slot is closest to the - // frame pointer if a frame pointer is required. + // incoming stack pointer if a frame pointer is required and is closer + // to the incoming rather than the final stack pointer. const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && - !RegInfo->needsStackRealignment(Fn)) { + bool EarlyScavengingSlots = (TFI.hasFP(Fn) && + TFI.isFPCloseToIncomingSP() && + RegInfo->useFPForScavengingIndex(Fn) && + !RegInfo->needsStackRealignment(Fn)); + if (RS && EarlyScavengingSlots) { SmallVector SFIs; RS->getScavengingFrameIndices(SFIs); - for (SmallVector::iterator I = SFIs.begin(), - IE = SFIs.end(); I != IE; ++I) + for (SmallVectorImpl::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } @@ -632,12 +571,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // Make sure the special register scavenging spill slot is closest to the // stack pointer. - if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || - !RegInfo->useFPForScavengingIndex(Fn))) { + if (RS && !EarlyScavengingSlots) { SmallVector SFIs; RS->getScavengingFrameIndices(SFIs); - for (SmallVector::iterator I = SFIs.begin(), - IE = SFIs.end(); I != IE; ++I) + for (SmallVectorImpl::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } @@ -712,6 +650,40 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { void PEI::replaceFrameIndices(MachineFunction &Fn) { if (!Fn.getFrameInfo()->hasStackObjects()) return; // Nothing to do? + // Store SPAdj at exit of a basic block. + SmallVector SPState; + SPState.resize(Fn.getNumBlockIDs()); + SmallPtrSet Reachable; + + // Iterate over the reachable blocks in DFS order. + for (df_ext_iterator > + DFI = df_ext_begin(&Fn, Reachable), DFE = df_ext_end(&Fn, Reachable); + DFI != DFE; ++DFI) { + int SPAdj = 0; + // Check the exit state of the DFS stack predecessor. + if (DFI.getPathLength() >= 2) { + MachineBasicBlock *StackPred = DFI.getPath(DFI.getPathLength() - 2); + assert(Reachable.count(StackPred) && + "DFS stack predecessor is already visited.\n"); + SPAdj = SPState[StackPred->getNumber()]; + } + MachineBasicBlock *BB = *DFI; + replaceFrameIndices(BB, Fn, SPAdj); + SPState[BB->getNumber()] = SPAdj; + } + + // Handle the unreachable blocks. + for (MachineFunction::iterator BB = Fn.begin(), E = Fn.end(); BB != E; ++BB) { + if (Reachable.count(BB)) + // Already handled in DFS traversal. + continue; + int SPAdj = 0; + replaceFrameIndices(BB, Fn, SPAdj); + } +} + +void PEI::replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj) { const TargetMachine &TM = Fn.getTarget(); assert(TM.getRegisterInfo() && "TM::getRegisterInfo() must be implemented!"); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); @@ -722,89 +694,85 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { int FrameSetupOpcode = TII.getCallFrameSetupOpcode(); int FrameDestroyOpcode = TII.getCallFrameDestroyOpcode(); - for (MachineFunction::iterator BB = Fn.begin(), - E = Fn.end(); BB != E; ++BB) { -#ifndef NDEBUG - int SPAdjCount = 0; // frame setup / destroy count. -#endif - int SPAdj = 0; // SP offset due to call frame setup / destroy. - if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); + if (RS && !FrameIndexVirtualScavenging) RS->enterBasicBlock(BB); - for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { + for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { - if (I->getOpcode() == FrameSetupOpcode || - I->getOpcode() == FrameDestroyOpcode) { -#ifndef NDEBUG - // Track whether we see even pairs of them - SPAdjCount += I->getOpcode() == FrameSetupOpcode ? 1 : -1; -#endif - // Remember how much SP has been adjusted to create the call - // frame. - int Size = I->getOperand(0).getImm(); - - if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || - (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) - Size = -Size; - - SPAdj += Size; - - MachineBasicBlock::iterator PrevI = BB->end(); - if (I != BB->begin()) PrevI = prior(I); - TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); - - // Visit the instructions created by eliminateCallFramePseudoInstr(). - if (PrevI == BB->end()) - I = BB->begin(); // The replaced instr was the first in the block. - else - I = llvm::next(PrevI); - continue; - } + if (I->getOpcode() == FrameSetupOpcode || + I->getOpcode() == FrameDestroyOpcode) { + // Remember how much SP has been adjusted to create the call + // frame. + int Size = I->getOperand(0).getImm(); - MachineInstr *MI = I; - bool DoIncr = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (!MI->getOperand(i).isFI()) - continue; + if ((!StackGrowsDown && I->getOpcode() == FrameSetupOpcode) || + (StackGrowsDown && I->getOpcode() == FrameDestroyOpcode)) + Size = -Size; - // Some instructions (e.g. inline asm instructions) can have - // multiple frame indices and/or cause eliminateFrameIndex - // to insert more than one instruction. We need the register - // scavenger to go through all of these instructions so that - // it can update its register information. We keep the - // iterator at the point before insertion so that we can - // revisit them in full. - bool AtBeginning = (I == BB->begin()); - if (!AtBeginning) --I; - - // If this instruction has a FrameIndex operand, we need to - // use that target machine register info object to eliminate - // it. - TRI.eliminateFrameIndex(MI, SPAdj, i, - FrameIndexVirtualScavenging ? NULL : RS); - - // Reset the iterator if we were at the beginning of the BB. - if (AtBeginning) { - I = BB->begin(); - DoIncr = false; - } + SPAdj += Size; + + MachineBasicBlock::iterator PrevI = BB->end(); + if (I != BB->begin()) PrevI = prior(I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); - MI = 0; - break; + // Visit the instructions created by eliminateCallFramePseudoInstr(). + if (PrevI == BB->end()) + I = BB->begin(); // The replaced instr was the first in the block. + else + I = llvm::next(PrevI); + continue; + } + + MachineInstr *MI = I; + bool DoIncr = true; + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (!MI->getOperand(i).isFI()) + continue; + + // Frame indicies in debug values are encoded in a target independent + // way with simply the frame index and offset rather than any + // target-specific addressing mode. + if (MI->isDebugValue()) { + assert(i == 0 && "Frame indicies can only appear as the first " + "operand of a DBG_VALUE machine instruction"); + unsigned Reg; + MachineOperand &Offset = MI->getOperand(1); + Offset.setImm(Offset.getImm() + + TFI->getFrameIndexReference( + Fn, MI->getOperand(0).getIndex(), Reg)); + MI->getOperand(0).ChangeToRegister(Reg, false /*isDef*/); + continue; } - if (DoIncr && I != BB->end()) ++I; + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? NULL : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; + } - // Update register states. - if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); + MI = 0; + break; } - // If we have evenly matched pairs of frame setup / destroy instructions, - // make sure the adjustments come out to zero. If we don't have matched - // pairs, we can't be sure the missing bit isn't in another basic block - // due to a custom inserter playing tricks, so just asserting SPAdj==0 - // isn't sufficient. See tMOVCC on Thumb1, for example. - assert((SPAdjCount || SPAdj == 0) && - "Unbalanced call frame setup / destroy pairs?"); + if (DoIncr && I != BB->end()) ++I; + + // Update register states. + if (RS && !FrameIndexVirtualScavenging && MI) RS->forward(MI); } } diff --git a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h index 87fff9a..77cfa2b 100644 --- a/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h +++ b/contrib/llvm/lib/CodeGen/PrologEpilogInserter.h @@ -1,4 +1,4 @@ -//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -* --===// +//===-- PrologEpilogInserter.h - Prolog/Epilog code insertion -*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -14,9 +14,6 @@ // This pass must be run after register allocation. After this pass is // executed, it is illegal to construct MO_FrameIndex operands. // -// This pass also implements a shrink wrapping variant of prolog/epilog -// insertion. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CODEGEN_PEI_H @@ -54,120 +51,28 @@ namespace llvm { // stack frame indexes. unsigned MinCSFrameIndex, MaxCSFrameIndex; - // Analysis info for spill/restore placement. - // "CSR": "callee saved register". - - // CSRegSet contains indices into the Callee Saved Register Info - // vector built by calculateCalleeSavedRegisters() and accessed - // via MF.getFrameInfo()->getCalleeSavedInfo(). - typedef SparseBitVector<> CSRegSet; - - // CSRegBlockMap maps MachineBasicBlocks to sets of callee - // saved register indices. - typedef DenseMap CSRegBlockMap; - - // Set and maps for computing CSR spill/restore placement: - // used in function (UsedCSRegs) - // used in a basic block (CSRUsed) - // anticipatable in a basic block (Antic{In,Out}) - // available in a basic block (Avail{In,Out}) - // to be spilled at the entry to a basic block (CSRSave) - // to be restored at the end of a basic block (CSRRestore) - CSRegSet UsedCSRegs; - CSRegBlockMap CSRUsed; - CSRegBlockMap AnticIn, AnticOut; - CSRegBlockMap AvailIn, AvailOut; - CSRegBlockMap CSRSave; - CSRegBlockMap CSRRestore; - // Entry and return blocks of the current function. MachineBasicBlock* EntryBlock; SmallVector ReturnBlocks; - // Map of MBBs to top level MachineLoops. - DenseMap TLLoops; - - // Flag to control shrink wrapping per-function: - // may choose to skip shrink wrapping for certain - // functions. - bool ShrinkWrapThisFunction; - // Flag to control whether to use the register scavenger to resolve // frame index materialization registers. Set according to // TRI->requiresFrameIndexScavenging() for the curren function. bool FrameIndexVirtualScavenging; -#ifndef NDEBUG - // Machine function handle. - MachineFunction* MF; - - // Flag indicating that the current function - // has at least one "short" path in the machine - // CFG from the entry block to an exit block. - bool HasFastExitPath; -#endif - - bool calculateSets(MachineFunction &Fn); - bool calcAnticInOut(MachineBasicBlock* MBB); - bool calcAvailInOut(MachineBasicBlock* MBB); - void calculateAnticAvail(MachineFunction &Fn); - bool addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVector& blks); - bool addUsesForTopLevelLoops(SmallVector& blks); - bool calcSpillPlacements(MachineBasicBlock* MBB, - SmallVector &blks, - CSRegBlockMap &prevSpills); - bool calcRestorePlacements(MachineBasicBlock* MBB, - SmallVector &blks, - CSRegBlockMap &prevRestores); - void placeSpillsAndRestores(MachineFunction &Fn); - void placeCSRSpillsAndRestores(MachineFunction &Fn); + void calculateSets(MachineFunction &Fn); void calculateCallsInformation(MachineFunction &Fn); void calculateCalleeSavedRegisters(MachineFunction &Fn); void insertCSRSpillsAndRestores(MachineFunction &Fn); void calculateFrameObjectOffsets(MachineFunction &Fn); void replaceFrameIndices(MachineFunction &Fn); + void replaceFrameIndices(MachineBasicBlock *BB, MachineFunction &Fn, + int &SPAdj); void scavengeFrameVirtualRegs(MachineFunction &Fn); void insertPrologEpilogCode(MachineFunction &Fn); - // Initialize DFA sets, called before iterations. - void clearAnticAvailSets(); - // Clear all sets constructed by shrink wrapping. - void clearAllSets(); - - // Initialize all shrink wrapping data. - void initShrinkWrappingInfo(); - - // Convienences for dealing with machine loops. - MachineBasicBlock* getTopLevelLoopPreheader(MachineLoop* LP); - MachineLoop* getTopLevelLoopParent(MachineLoop *LP); - - // Propgate CSRs used in MBB to all MBBs of loop LP. - void propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP); - // Convenience for recognizing return blocks. bool isReturnBlock(MachineBasicBlock* MBB); - -#ifndef NDEBUG - // Debugging methods. - - // Mark this function as having fast exit paths. - void findFastExitPath(); - - // Verify placement of spills/restores. - void verifySpillRestorePlacement(); - - std::string getBasicBlockName(const MachineBasicBlock* MBB); - std::string stringifyCSRegSet(const CSRegSet& s); - void dumpSet(const CSRegSet& s); - void dumpUsed(MachineBasicBlock* MBB); - void dumpAllUsed(); - void dumpSets(MachineBasicBlock* MBB); - void dumpSets1(MachineBasicBlock* MBB); - void dumpAllSets(); - void dumpSRSets(); -#endif - }; } // End llvm namespace #endif diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp index c035590..293e306 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.cpp @@ -43,13 +43,16 @@ static cl::opt VerifyRegAlloc("verify-regalloc", cl::location(RegAllocBase::VerifyEnabled), cl::desc("Verify during register allocation")); -const char *RegAllocBase::TimerGroupName = "Register Allocation"; +const char RegAllocBase::TimerGroupName[] = "Register Allocation"; bool RegAllocBase::VerifyEnabled = false; //===----------------------------------------------------------------------===// // RegAllocBase Implementation //===----------------------------------------------------------------------===// +// Pin the vtable to this file. +void RegAllocBase::anchor() {} + void RegAllocBase::init(VirtRegMap &vrm, LiveIntervals &lis, LiveRegMatrix &mat) { @@ -99,14 +102,13 @@ void RegAllocBase::allocatePhysRegs() { // result from splitting. DEBUG(dbgs() << "\nselectOrSplit " << MRI->getRegClass(VirtReg->reg)->getName() - << ':' << PrintReg(VirtReg->reg) << ' ' << *VirtReg << '\n'); - typedef SmallVector VirtRegVec; + << ':' << *VirtReg << '\n'); + typedef SmallVector VirtRegVec; VirtRegVec SplitVRegs; unsigned AvailablePhysReg = selectOrSplit(*VirtReg, SplitVRegs); if (AvailablePhysReg == ~0u) { // selectOrSplit failed to find a register! - const char *Msg = "ran out of registers during register allocation"; // Probably caused by an inline asm. MachineInstr *MI; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(VirtReg->reg); @@ -114,9 +116,9 @@ void RegAllocBase::allocatePhysRegs() { if (MI->isInlineAsm()) break; if (MI) - MI->emitError(Msg); + MI->emitError("inline assembly requires more registers than available"); else - report_fatal_error(Msg); + report_fatal_error("ran out of registers during register allocation"); // Keep going after reporting the error. VRM->assignVirt2Phys(VirtReg->reg, RegClassInfo.getOrder(MRI->getRegClass(VirtReg->reg)).front()); @@ -128,7 +130,7 @@ void RegAllocBase::allocatePhysRegs() { for (VirtRegVec::iterator I = SplitVRegs.begin(), E = SplitVRegs.end(); I != E; ++I) { - LiveInterval *SplitVirtReg = *I; + LiveInterval *SplitVirtReg = &LIS->getInterval(*I); assert(!VRM->hasPhys(SplitVirtReg->reg) && "Register already assigned"); if (MRI->reg_nodbg_empty(SplitVirtReg->reg)) { DEBUG(dbgs() << "not queueing unused " << *SplitVirtReg << '\n'); diff --git a/contrib/llvm/lib/CodeGen/RegAllocBase.h b/contrib/llvm/lib/CodeGen/RegAllocBase.h index 064e40f..c17a8d9 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBase.h +++ b/contrib/llvm/lib/CodeGen/RegAllocBase.h @@ -38,7 +38,7 @@ #define LLVM_CODEGEN_REGALLOCBASE #include "llvm/ADT/OwningPtr.h" -#include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/RegisterClassInfo.h" namespace llvm { @@ -57,6 +57,7 @@ class Spiller; /// live range splitting. They must also override enqueue/dequeue to provide an /// assignment order. class RegAllocBase { + virtual void anchor(); protected: const TargetRegisterInfo *TRI; MachineRegisterInfo *MRI; @@ -90,10 +91,10 @@ protected: // or new set of split live virtual registers. It is up to the splitter to // converge quickly toward fully spilled live ranges. virtual unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl &splitLVRs) = 0; + SmallVectorImpl &splitLVRs) = 0; // Use this group name for NamedRegionTimer. - static const char *TimerGroupName; + static const char TimerGroupName[]; public: /// VerifyEnabled - True when -verify-regalloc is given. diff --git a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp index 7fcfe9e..6768e45 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocBasic.cpp @@ -24,6 +24,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -101,7 +102,7 @@ public: } virtual unsigned selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl &SplitVRegs); + SmallVectorImpl &SplitVRegs); /// Perform register allocation. virtual bool runOnMachineFunction(MachineFunction &mf); @@ -110,7 +111,7 @@ public: // that interfere with the most recently queried lvr. Return true if spilling // was successful, and append any new spilled/split intervals to splitLVRs. bool spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl &SplitVRegs); + SmallVectorImpl &SplitVRegs); static char ID; }; @@ -125,7 +126,6 @@ RABasic::RABasic(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -142,9 +142,10 @@ void RABasic::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addRequiredID(MachineDominatorsID); AU.addPreservedID(MachineDominatorsID); AU.addRequired(); @@ -165,7 +166,7 @@ void RABasic::releaseMemory() { // that interfere with VirtReg. The newly spilled or split live intervals are // returned by appending them to SplitVRegs. bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl &SplitVRegs) { + SmallVectorImpl &SplitVRegs) { // Record each interference and determine if all are spillable before mutating // either the union or live intervals. SmallVector Intfs; @@ -219,7 +220,7 @@ bool RABasic::spillInterferences(LiveInterval &VirtReg, unsigned PhysReg, // minimal, there is no value in caching them outside the scope of // selectOrSplit(). unsigned RABasic::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl &SplitVRegs) { + SmallVectorImpl &SplitVRegs) { // Populate a list of physical register spill candidates. SmallVector PhysRegSpillCands; @@ -276,6 +277,11 @@ bool RABasic::runOnMachineFunction(MachineFunction &mf) { RegAllocBase::init(getAnalysis(), getAnalysis(), getAnalysis()); + + calculateSpillWeightsAndHints(*LIS, *MF, + getAnalysis(), + getAnalysis()); + SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); allocatePhysRegs(); diff --git a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp index bb9c05c..e92dbd2 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocFast.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocFast.cpp @@ -144,7 +144,7 @@ namespace { // not be erased. bool isBulkSpilling; - enum { + enum LLVM_ENUM_INT_TYPE(unsigned) { spillClean = 1, spillDirty = 100, spillImpossible = ~0u @@ -293,29 +293,26 @@ void RAFast::spillVirtReg(MachineBasicBlock::iterator MI, // If this register is used by DBG_VALUE then insert new DBG_VALUE to // identify spilled location as the place to find corresponding variable's // value. - SmallVector &LRIDbgValues = + SmallVectorImpl &LRIDbgValues = LiveDbgValueMap[LRI->VirtReg]; for (unsigned li = 0, le = LRIDbgValues.size(); li != le; ++li) { MachineInstr *DBG = LRIDbgValues[li]; - const MDNode *MDPtr = - DBG->getOperand(DBG->getNumOperands()-1).getMetadata(); - int64_t Offset = 0; - if (DBG->getOperand(1).isImm()) - Offset = DBG->getOperand(1).getImm(); + const MDNode *MDPtr = DBG->getOperand(2).getMetadata(); + bool IsIndirect = DBG->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? DBG->getOperand(1).getImm() : 0; DebugLoc DL; if (MI == MBB->end()) { // If MI is at basic block end then use last instruction's location. MachineBasicBlock::iterator EI = MI; DL = (--EI)->getDebugLoc(); - } - else + } else DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = - TII->emitFrameIndexDebugValue(*MF, FI, Offset, MDPtr, DL)) { - MachineBasicBlock *MBB = DBG->getParent(); - MBB->insert(MI, NewDV); - DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); - } + MachineBasicBlock *MBB = DBG->getParent(); + MachineInstr *NewDV = + BuildMI(*MBB, MI, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(FI).addImm(Offset).addMetadata(MDPtr); + (void)NewDV; + DEBUG(dbgs() << "Inserting debug info due to spill:" << "\n" << *NewDV); } // Now this register is spilled there is should not be any DBG_VALUE // pointing to this register because they are all pointing to spilled value @@ -572,7 +569,10 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, } // Nothing we can do. Report an error and keep going with a bad allocation. - MI->emitError("ran out of registers during register allocation"); + if (MI->isInlineAsm()) + MI->emitError("inline assembly requires more registers than available"); + else + MI->emitError("ran out of registers during register allocation"); definePhysReg(MI, *AO.begin(), regFree); return assignVirtToPhysReg(VirtReg, *AO.begin()); } @@ -859,25 +859,21 @@ void RAFast::AllocateBasicBlock() { } else { // Modify DBG_VALUE now that the value is in a spill slot. - int64_t Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + uint64_t Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; const MDNode *MDPtr = MI->getOperand(MI->getNumOperands()-1).getMetadata(); DebugLoc DL = MI->getDebugLoc(); - if (MachineInstr *NewDV = - TII->emitFrameIndexDebugValue(*MF, SS, Offset, MDPtr, DL)) { - DEBUG(dbgs() << "Modifying debug info due to spill:" << - "\t" << *MI); - MachineBasicBlock *MBB = MI->getParent(); - MBB->insert(MBB->erase(MI), NewDV); - // Scan NewDV operands from the beginning. - MI = NewDV; - ScanDbgValue = true; - break; - } else { - // We can't allocate a physreg for a DebugValue; sorry! - DEBUG(dbgs() << "Unable to allocate vreg used by DBG_VALUE"); - MO.setReg(0); - } + MachineBasicBlock *MBB = MI->getParent(); + MachineInstr *NewDV = BuildMI(*MBB, MBB->erase(MI), DL, + TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SS).addImm(Offset).addMetadata(MDPtr); + DEBUG(dbgs() << "Modifying debug info due to spill:" + << "\t" << *NewDV); + // Scan NewDV operands from the beginning. + MI = NewDV; + ScanDbgValue = true; + break; } } LiveDbgValueMap[Reg].push_back(MI); diff --git a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp index 9eed1fc..c08d955 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocGreedy.cpp @@ -29,6 +29,7 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -71,6 +72,7 @@ class RAGreedy : public MachineFunctionPass, // analyses SlotIndexes *Indexes; + MachineBlockFrequencyInfo *MBFI; MachineDominatorTree *DomTree; MachineLoopInfo *Loops; EdgeBundles *Bundles; @@ -118,7 +120,9 @@ class RAGreedy : public MachineFunctionPass, RS_Done }; +#ifndef NDEBUG static const char *const StageName[]; +#endif // RegInfo - Keep additional information about each live range. struct RegInfo { @@ -145,7 +149,7 @@ class RAGreedy : public MachineFunctionPass, void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { ExtraRegInfo.resize(MRI->getNumVirtRegs()); for (;Begin != End; ++Begin) { - unsigned Reg = (*Begin)->reg; + unsigned Reg = *Begin; if (ExtraRegInfo[Reg].Stage == RS_New) ExtraRegInfo[Reg].Stage = NewStage; } @@ -158,6 +162,8 @@ class RAGreedy : public MachineFunctionPass, EvictionCost(unsigned B = 0) : BrokenHints(B), MaxWeight(0) {} + bool isMax() const { return BrokenHints == ~0u; } + bool operator<(const EvictionCost &O) const { if (BrokenHints != O.BrokenHints) return BrokenHints < O.BrokenHints; @@ -216,7 +222,7 @@ class RAGreedy : public MachineFunctionPass, /// class. SmallVector GlobalCand; - enum { NoCand = ~0u }; + enum LLVM_ENUM_INT_TYPE(unsigned) { NoCand = ~0u }; /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to /// NoCand which indicates the stack interval. @@ -237,7 +243,7 @@ public: virtual void enqueue(LiveInterval *LI); virtual LiveInterval *dequeue(); virtual unsigned selectOrSplit(LiveInterval&, - SmallVectorImpl&); + SmallVectorImpl&); /// Perform register allocation. virtual bool runOnMachineFunction(MachineFunction &mf); @@ -249,33 +255,34 @@ private: void LRE_WillShrinkVirtReg(unsigned); void LRE_DidCloneVirtReg(unsigned, unsigned); - float calcSpillCost(); - bool addSplitConstraints(InterferenceCache::Cursor, float&); + BlockFrequency calcSpillCost(); + bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); void addThroughConstraints(InterferenceCache::Cursor, ArrayRef); void growRegion(GlobalSplitCandidate &Cand); - float calcGlobalSplitCost(GlobalSplitCandidate&); + BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); bool calcCompactRegion(GlobalSplitCandidate&); void splitAroundRegion(LiveRangeEdit&, ArrayRef); void calcGapWeights(unsigned, SmallVectorImpl&); + unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); void evictInterference(LiveInterval&, unsigned, - SmallVectorImpl&); + SmallVectorImpl&); unsigned tryAssign(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); + SmallVectorImpl&); unsigned tryEvict(LiveInterval&, AllocationOrder&, - SmallVectorImpl&, unsigned = ~0u); + SmallVectorImpl&, unsigned = ~0u); unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); + SmallVectorImpl&); unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); + SmallVectorImpl&); unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); + SmallVectorImpl&); unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); + SmallVectorImpl&); unsigned trySplit(LiveInterval&, AllocationOrder&, - SmallVectorImpl&); + SmallVectorImpl&); }; } // end anonymous namespace @@ -308,7 +315,6 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); @@ -320,6 +326,8 @@ RAGreedy::RAGreedy(): MachineFunctionPass(ID) { void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); + AU.addRequired(); + AU.addPreserved(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -330,7 +338,6 @@ void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); AU.addRequired(); AU.addPreserved(); AU.addRequired(); @@ -407,15 +414,28 @@ void RAGreedy::enqueue(LiveInterval *LI) { // everything else has been allocated. Prio = Size; } else { - // Everything is allocated in long->short order. Long ranges that don't fit - // should be spilled (or split) ASAP so they don't create interference. - Prio = (1u << 31) + Size; + if (ExtraRegInfo[Reg].Stage == RS_Assign && !LI->empty() && + LIS->intervalIsInOneMBB(*LI)) { + // Allocate original local ranges in linear instruction order. Since they + // are singly defined, this produces optimal coloring in the absence of + // global interference and other constraints. + Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); + } + else { + // Allocate global and split ranges in long->short order. Long ranges that + // don't fit should be spilled (or split) ASAP so they don't create + // interference. Mark a bit to prioritize global above local ranges. + Prio = (1u << 29) + Size; + } + // Mark a higher bit to prioritize global and local above RS_Split. + Prio |= (1u << 31); // Boost ranges that have a physical register hint. if (VRM->hasKnownPreference(Reg)) Prio |= (1u << 30); } - + // The virtual register number is a tie breaker for same-sized ranges. + // Give lower vreg numbers higher priority to assign them first. Queue.push(std::make_pair(Prio, ~Reg)); } @@ -435,7 +455,7 @@ LiveInterval *RAGreedy::dequeue() { /// tryAssign - Try to assign VirtReg to an available register. unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl &NewVRegs) { + SmallVectorImpl &NewVRegs) { Order.rewind(); unsigned PhysReg; while ((PhysReg = Order.next())) @@ -476,6 +496,31 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, // Interference eviction //===----------------------------------------------------------------------===// +unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { + AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); + unsigned PhysReg; + while ((PhysReg = Order.next())) { + if (PhysReg == PrevReg) + continue; + + MCRegUnitIterator Units(PhysReg, TRI); + for (; Units.isValid(); ++Units) { + // Instantiate a "subquery", not to be confused with the Queries array. + LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); + if (subQ.checkInterference()) + break; + } + // If no units have interference, break out with the current PhysReg. + if (!Units.isValid()) + break; + } + if (PhysReg) + DEBUG(dbgs() << "can reassign: " << VirtReg << " from " + << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) + << '\n'); + return PhysReg; +} + /// shouldEvict - determine if A should evict the assigned live range B. The /// eviction policy defined by this function together with the allocation order /// defined by enqueue() decides which registers ultimately end up being split @@ -516,6 +561,8 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) return false; + bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); + // Find VirtReg's cascade number. This will be unassigned if VirtReg was never // involved in an eviction before. If a cascade number was assigned, deny // evicting anything with the same or a newer cascade number. This prevents @@ -569,8 +616,17 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, // Abort if this would be too expensive. if (!(Cost < MaxCost)) return false; + if (Urgent) + continue; + // If !MaxCost.isMax(), then we're just looking for a cheap register. + // Evicting another local live range in this case could lead to suboptimal + // coloring. + if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && + !canReassign(*Intf, PhysReg)) { + return false; + } // Finally, apply the eviction policy for non-urgent evictions. - if (!Urgent && !shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) + if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) return false; } } @@ -582,7 +638,7 @@ bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// from being assigned to Physreg. This assumes that canEvictInterference /// returned true. void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, - SmallVectorImpl &NewVRegs) { + SmallVectorImpl &NewVRegs) { // Make sure that VirtReg has a cascade number, and assign that cascade // number to every evicted register. These live ranges than then only be // evicted by a newer cascade, preventing infinite loops. @@ -614,7 +670,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, "Cannot decrease cascade number, illegal eviction"); ExtraRegInfo[Intf->reg].Cascade = Cascade; ++NumEvicted; - NewVRegs.push_back(Intf); + NewVRegs.push_back(Intf->reg); } } @@ -624,7 +680,7 @@ void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, /// @return Physreg to assign VirtReg, or 0. unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl &NewVRegs, + SmallVectorImpl &NewVRegs, unsigned CostPerUseLimit) { NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); @@ -699,12 +755,12 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, /// that all preferences in SplitConstraints are met. /// Return false if there are no bundles with positive bias. bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, - float &Cost) { + BlockFrequency &Cost) { ArrayRef UseBlocks = SA->getUseBlocks(); // Reset interference dependent info. SplitConstraints.resize(UseBlocks.size()); - float StaticCost = 0; + BlockFrequency StaticCost = 0; for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; @@ -713,7 +769,7 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, Intf.moveToBlock(BC.Number); BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; - BC.ChangesValue = BI.FirstDef; + BC.ChangesValue = BI.FirstDef.isValid(); if (!Intf.hasInterference()) continue; @@ -742,8 +798,8 @@ bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, } // Accumulate the total frequency of inserted spill code. - if (Ins) - StaticCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + while (Ins--) + StaticCost += SpillPlacer->getBlockFrequency(BC.Number); } Cost = StaticCost; @@ -876,7 +932,7 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { SpillPlacer->prepare(Cand.LiveBundles); // The static split cost will be zero since Cand.Intf reports no interference. - float Cost; + BlockFrequency Cost; if (!addSplitConstraints(Cand.Intf, Cost)) { DEBUG(dbgs() << ", none.\n"); return false; @@ -901,8 +957,8 @@ bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { /// calcSpillCost - Compute how expensive it would be to split the live range in /// SA around all use blocks instead of forming bundle regions. -float RAGreedy::calcSpillCost() { - float Cost = 0; +BlockFrequency RAGreedy::calcSpillCost() { + BlockFrequency Cost = 0; ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; @@ -921,8 +977,8 @@ float RAGreedy::calcSpillCost() { /// pattern in LiveBundles. This cost should be added to the local cost of the /// interference pattern in SplitConstraints. /// -float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { - float GlobalCost = 0; +BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { + BlockFrequency GlobalCost = 0; const BitVector &LiveBundles = Cand.LiveBundles; ArrayRef UseBlocks = SA->getUseBlocks(); for (unsigned i = 0; i != UseBlocks.size(); ++i) { @@ -936,8 +992,8 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); if (BI.LiveOut) Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); - if (Ins) - GlobalCost += Ins * SpillPlacer->getBlockFrequency(BC.Number); + while (Ins--) + GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); } for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { @@ -949,8 +1005,10 @@ float RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { if (RegIn && RegOut) { // We need double spill code if this block has interference. Cand.Intf.moveToBlock(Number); - if (Cand.Intf.hasInterference()) - GlobalCost += 2*SpillPlacer->getBlockFrequency(Number); + if (Cand.Intf.hasInterference()) { + GlobalCost += SpillPlacer->getBlockFrequency(Number); + GlobalCost += SpillPlacer->getBlockFrequency(Number); + } continue; } // live-in / stack-out or stack-in live-out. @@ -1067,7 +1125,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, SmallVector IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(Reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); unsigned OrigBlocks = SA->getNumLiveBlocks(); @@ -1078,7 +1136,7 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, // - Block-local splits are candidates for local splitting. // - DCE leftovers should go back on the queue. for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - LiveInterval &Reg = *LREdit.get(i); + LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); // Ignore old intervals from DCE. if (getStage(Reg) != RS_New) @@ -1112,10 +1170,10 @@ void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, } unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl &NewVRegs) { + SmallVectorImpl &NewVRegs) { unsigned NumCands = 0; unsigned BestCand = NoCand; - float BestCost; + BlockFrequency BestCost; SmallVector UsedCands; // Check if we can split this live range around a compact region. @@ -1123,11 +1181,11 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, if (HasCompact) { // Yes, keep GlobalCand[0] as the compact region candidate. NumCands = 1; - BestCost = HUGE_VALF; + BestCost = BlockFrequency::getMaxFrequency(); } else { // No benefit from the compact region, our fallback will be per-block // splitting. Make sure we find a solution that is cheaper than spilling. - BestCost = Hysteresis * calcSpillCost(); + BestCost = calcSpillCost(); DEBUG(dbgs() << "Cost of isolating all blocks = " << BestCost << '\n'); } @@ -1157,7 +1215,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, Cand.reset(IntfCache, PhysReg); SpillPlacer->prepare(Cand.LiveBundles); - float Cost; + BlockFrequency Cost; if (!addSplitConstraints(Cand.Intf, Cost)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); continue; @@ -1193,7 +1251,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, }); if (Cost < BestCost) { BestCand = NumCands; - BestCost = Hysteresis * Cost; // Prevent rounding effects. + BestCost = Cost; } ++NumCands; } @@ -1247,7 +1305,7 @@ unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// creates a lot of local live ranges, that will be split by tryLocalSplit if /// they don't allocate. unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl &NewVRegs) { + SmallVectorImpl &NewVRegs) { assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); unsigned Reg = VirtReg.reg; bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); @@ -1268,14 +1326,14 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, SE->finish(&IntvMap); // Tell LiveDebugVariables about the new ranges. - DebugVars->splitRegister(Reg, LREdit.regs()); + DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); // Sort out the new intervals created by splitting. The remainder interval // goes straight to spilling, the new local ranges get to stay RS_New. for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { - LiveInterval &LI = *LREdit.get(i); + LiveInterval &LI = LIS->getInterval(LREdit.get(i)); if (getStage(LI) == RS_New && IntvMap[i] == 0) setStage(LI, RS_Spill); } @@ -1299,7 +1357,7 @@ unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// This is similar to spilling to a larger register class. unsigned RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl &NewVRegs) { + SmallVectorImpl &NewVRegs) { // There is no point to this if there are no larger sub-classes. if (!RegClassInfo.isProperSubClass(MRI->getRegClass(VirtReg.reg))) return 0; @@ -1335,7 +1393,7 @@ RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, SmallVector IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); ExtraRegInfo.resize(MRI->getNumVirtRegs()); // Assign all new registers to RS_Spill. This was the last chance. @@ -1406,9 +1464,9 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, // Add fixed interference. for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { - const LiveInterval &LI = LIS->getRegUnit(*Units); - LiveInterval::const_iterator I = LI.find(StartIdx); - LiveInterval::const_iterator E = LI.end(); + const LiveRange &LR = LIS->getRegUnit(*Units); + LiveRange::const_iterator I = LR.find(StartIdx); + LiveRange::const_iterator E = LR.end(); // Same loop as above. Mark any overlapped gaps as HUGE_VALF. for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { @@ -1419,7 +1477,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, break; for (; Gap != NumGaps; ++Gap) { - GapWeight[Gap] = HUGE_VALF; + GapWeight[Gap] = llvm::huge_valf; if (Uses[Gap+1].getBaseIndex() >= I->end) break; } @@ -1433,7 +1491,7 @@ void RAGreedy::calcGapWeights(unsigned PhysReg, /// basic block. /// unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl &NewVRegs) { + SmallVectorImpl &NewVRegs) { assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); @@ -1511,7 +1569,9 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, unsigned BestAfter = 0; float BestDiff = 0; - const float blockFreq = SpillPlacer->getBlockFrequency(BI.MBB->getNumber()); + const float blockFreq = + SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * + (1.0f / BlockFrequency::getEntryFrequency()); SmallVector GapWeight; Order.rewind(); @@ -1523,7 +1583,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Remove any gaps with regmask clobbers. if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) - GapWeight[RegMaskGaps[i]] = HUGE_VALF; + GapWeight[RegMaskGaps[i]] = llvm::huge_valf; // Try to find the best sequence of gaps to close. // The new spill weight must be larger than any gap interference. @@ -1558,7 +1618,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, // Legally, without causing looping? bool Legal = !ProgressRequired || NewGaps < NumGaps; - if (Legal && MaxGap < HUGE_VALF) { + if (Legal && MaxGap < llvm::huge_valf) { // Estimate the new spill weight. Each instruction reads or writes the // register. Conservatively assume there are no read-modify-write // instructions. @@ -1625,7 +1685,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, SE->useIntv(SegStart, SegStop); SmallVector IntvMap; SE->finish(&IntvMap); - DebugVars->splitRegister(VirtReg.reg, LREdit.regs()); + DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); // If the new range has the same number of instructions as before, mark it as // RS_Split2 so the next split will be forced to make progress. Otherwise, @@ -1638,8 +1698,8 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, assert(!ProgressRequired && "Didn't make progress when it was required."); for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) if (IntvMap[i] == 1) { - setStage(*LREdit.get(i), RS_Split2); - DEBUG(dbgs() << PrintReg(LREdit.get(i)->reg)); + setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); + DEBUG(dbgs() << PrintReg(LREdit.get(i))); } DEBUG(dbgs() << '\n'); } @@ -1656,7 +1716,7 @@ unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, /// assignable. /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, - SmallVectorImpl&NewVRegs) { + SmallVectorImpl&NewVRegs) { // Ranges must be Split2 or less. if (getStage(VirtReg) >= RS_Spill) return 0; @@ -1705,7 +1765,7 @@ unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, //===----------------------------------------------------------------------===// unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, - SmallVectorImpl &NewVRegs) { + SmallVectorImpl &NewVRegs) { // First try assigning a free register. AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo); if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) @@ -1730,7 +1790,7 @@ unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, if (Stage < RS_Split) { setStage(VirtReg, RS_Split); DEBUG(dbgs() << "wait for second round\n"); - NewVRegs.push_back(&VirtReg); + NewVRegs.push_back(VirtReg.reg); return 0; } @@ -1770,6 +1830,7 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { getAnalysis(), getAnalysis()); Indexes = &getAnalysis(); + MBFI = &getAnalysis(); DomTree = &getAnalysis(); SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); Loops = &getAnalysis(); @@ -1777,8 +1838,12 @@ bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { SpillPlacer = &getAnalysis(); DebugVars = &getAnalysis(); + calculateSpillWeightsAndHints(*LIS, mf, *Loops, *MBFI); + + DEBUG(LIS->dump()); + SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); - SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree)); + SE.reset(new SplitEditor(*SA, *LIS, *VRM, *DomTree, *MBFI)); ExtraRegInfo.clear(); ExtraRegInfo.resize(MRI->getNumVirtRegs()); NextCascade = 1; diff --git a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp index 15a88e2..88c8201 100644 --- a/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp +++ b/contrib/llvm/lib/CodeGen/RegAllocPBQP.cpp @@ -40,6 +40,7 @@ #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -94,9 +95,7 @@ public: : MachineFunctionPass(ID), builder(b.take()), customPassID(cPassID) { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); - initializeCalculateSpillWeightsPass(*PassRegistry::getPassRegistry()); initializeLiveStacksPass(*PassRegistry::getPassRegistry()); - initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); } @@ -130,8 +129,8 @@ private: const TargetMachine *tm; const TargetRegisterInfo *tri; const TargetInstrInfo *tii; - const MachineLoopInfo *loopInfo; MachineRegisterInfo *mri; + const MachineBlockFrequencyInfo *mbfi; OwningPtr spiller; LiveIntervals *lis; @@ -158,13 +157,13 @@ char RegAllocPBQP::ID = 0; } // End anonymous namespace. -unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::ConstNodeItr node) const { +unsigned PBQPRAProblem::getVRegForNode(PBQP::Graph::NodeId node) const { Node2VReg::const_iterator vregItr = node2VReg.find(node); assert(vregItr != node2VReg.end() && "No vreg for node."); return vregItr->second; } -PBQP::Graph::NodeItr PBQPRAProblem::getNodeForVReg(unsigned vreg) const { +PBQP::Graph::NodeId PBQPRAProblem::getNodeForVReg(unsigned vreg) const { VReg2Node::const_iterator nodeItr = vreg2Node.find(vreg); assert(nodeItr != vreg2Node.end() && "No node for vreg."); return nodeItr->second; @@ -188,7 +187,7 @@ unsigned PBQPRAProblem::getPRegForOption(unsigned vreg, unsigned option) const { } PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, + const MachineBlockFrequencyInfo *mbfi, const RegSet &vregs) { LiveIntervals *LIS = const_cast(lis); @@ -247,7 +246,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, } // Construct the node. - PBQP::Graph::NodeItr node = + PBQP::Graph::NodeId node = g.addNode(PBQP::Vector(vrAllowed.size() + 1, 0)); // Record the mapping and allowed set in the problem. @@ -273,7 +272,7 @@ PBQPRAProblem *PBQPBuilder::build(MachineFunction *mf, const LiveIntervals *lis, assert(!l2.empty() && "Empty interval in vreg set?"); if (l1.overlaps(l2)) { - PBQP::Graph::EdgeItr edge = + PBQP::Graph::EdgeId edge = g.addEdge(p->getNodeForVReg(vr1), p->getNodeForVReg(vr2), PBQP::Matrix(vr1Allowed.size()+1, vr2Allowed.size()+1, 0)); @@ -313,10 +312,10 @@ void PBQPBuilder::addInterferenceCosts( PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, const LiveIntervals *lis, - const MachineLoopInfo *loopInfo, + const MachineBlockFrequencyInfo *mbfi, const RegSet &vregs) { - OwningPtr p(PBQPBuilder::build(mf, lis, loopInfo, vregs)); + OwningPtr p(PBQPBuilder::build(mf, lis, mbfi, vregs)); PBQP::Graph &g = p->getGraph(); const TargetMachine &tm = mf->getTarget(); @@ -350,7 +349,7 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, PBQP::PBQPNum cBenefit = copyFactor * LiveIntervals::getSpillWeight(false, true, - loopInfo->getLoopDepth(mbb)); + mbfi->getBlockFreq(mbb)); if (cp.isPhys()) { if (!mf->getRegInfo().isAllocatable(dst)) { @@ -364,16 +363,16 @@ PBQPRAProblem *PBQPBuilderWithCoalescing::build(MachineFunction *mf, } if (pregOpt < allowed.size()) { ++pregOpt; // +1 to account for spill option. - PBQP::Graph::NodeItr node = p->getNodeForVReg(src); + PBQP::Graph::NodeId node = p->getNodeForVReg(src); addPhysRegCoalesce(g.getNodeCosts(node), pregOpt, cBenefit); } } else { const PBQPRAProblem::AllowedSet *allowed1 = &p->getAllowedSet(dst); const PBQPRAProblem::AllowedSet *allowed2 = &p->getAllowedSet(src); - PBQP::Graph::NodeItr node1 = p->getNodeForVReg(dst); - PBQP::Graph::NodeItr node2 = p->getNodeForVReg(src); - PBQP::Graph::EdgeItr edge = g.findEdge(node1, node2); - if (edge == g.edgesEnd()) { + PBQP::Graph::NodeId node1 = p->getNodeForVReg(dst); + PBQP::Graph::NodeId node2 = p->getNodeForVReg(src); + PBQP::Graph::EdgeId edge = g.findEdge(node1, node2); + if (edge == g.invalidEdgeId()) { edge = g.addEdge(node1, node2, PBQP::Matrix(allowed1->size() + 1, allowed2->size() + 1, 0)); @@ -432,13 +431,14 @@ void RegAllocPBQP::getAnalysisUsage(AnalysisUsage &au) const { //au.addRequiredID(SplitCriticalEdgesID); if (customPassID) au.addRequiredID(*customPassID); - au.addRequired(); au.addRequired(); au.addPreserved(); - au.addRequired(); - au.addPreserved(); + au.addRequired(); + au.addPreserved(); au.addRequired(); au.addPreserved(); + au.addRequired(); + au.addPreserved(); au.addRequired(); au.addPreserved(); MachineFunctionPass::getAnalysisUsage(au); @@ -475,11 +475,11 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, const PBQP::Graph &g = problem.getGraph(); // Iterate over the nodes mapping the PBQP solution to a register // assignment. - for (PBQP::Graph::ConstNodeItr node = g.nodesBegin(), - nodeEnd = g.nodesEnd(); - node != nodeEnd; ++node) { - unsigned vreg = problem.getVRegForNode(node); - unsigned alloc = solution.getSelection(node); + for (PBQP::Graph::NodeItr nodeItr = g.nodesBegin(), + nodeEnd = g.nodesEnd(); + nodeItr != nodeEnd; ++nodeItr) { + unsigned vreg = problem.getVRegForNode(*nodeItr); + unsigned alloc = solution.getSelection(*nodeItr); if (problem.isPRegOption(vreg, alloc)) { unsigned preg = problem.getPRegForOption(vreg, alloc); @@ -489,7 +489,7 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, vrm->assignVirt2Phys(vreg, preg); } else if (problem.isSpillOption(vreg, alloc)) { vregsToAlloc.erase(vreg); - SmallVector newSpills; + SmallVector newSpills; LiveRangeEdit LRE(&lis->getInterval(vreg), newSpills, *mf, *lis, vrm); spiller->spill(LRE); @@ -500,9 +500,10 @@ bool RegAllocPBQP::mapPBQPToRegAlloc(const PBQPRAProblem &problem, // allocate. for (LiveRangeEdit::iterator itr = LRE.begin(), end = LRE.end(); itr != end; ++itr) { - assert(!(*itr)->empty() && "Empty spill range."); - DEBUG(dbgs() << PrintReg((*itr)->reg, tri) << " "); - vregsToAlloc.insert((*itr)->reg); + LiveInterval &li = lis->getInterval(*itr); + assert(!li.empty() && "Empty spill range."); + DEBUG(dbgs() << PrintReg(li.reg, tri) << " "); + vregsToAlloc.insert(li.reg); } DEBUG(dbgs() << ")\n"); @@ -546,7 +547,10 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { lis = &getAnalysis(); lss = &getAnalysis(); - loopInfo = &getAnalysis(); + mbfi = &getAnalysis(); + + calculateSpillWeightsAndHints(*lis, MF, getAnalysis(), + *mbfi); vrm = &getAnalysis(); spiller.reset(createInlineSpiller(*this, MF, *vrm)); @@ -584,7 +588,7 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { DEBUG(dbgs() << " PBQP Regalloc round " << round << ":\n"); OwningPtr problem( - builder->build(mf, lis, loopInfo, vregsToAlloc)); + builder->build(mf, lis, mbfi, vregsToAlloc)); #ifndef NDEBUG if (pbqpDumpGraphs) { diff --git a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp index 87382d8..cacd7de 100644 --- a/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterClassInfo.cpp @@ -40,6 +40,9 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { if (MF->getTarget().getRegisterInfo() != TRI) { TRI = MF->getTarget().getRegisterInfo(); RegClass.reset(new RCInfo[TRI->getNumRegClasses()]); + unsigned NumPSets = TRI->getNumRegPressureSets(); + PSetLimits.reset(new unsigned[NumPSets]); + std::fill(&PSetLimits[0], &PSetLimits[NumPSets], 0); Update = true; } @@ -144,3 +147,32 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { RCI.Tag = Tag; } +/// This is not accurate because two overlapping register sets may have some +/// nonoverlapping reserved registers. However, computing the allocation order +/// for all register classes would be too expensive. +unsigned RegisterClassInfo::computePSetLimit(unsigned Idx) const { + const TargetRegisterClass *RC = 0; + unsigned NumRCUnits = 0; + for (TargetRegisterInfo::regclass_iterator + RI = TRI->regclass_begin(), RE = TRI->regclass_end(); RI != RE; ++RI) { + const int *PSetID = TRI->getRegClassPressureSets(*RI); + for (; *PSetID != -1; ++PSetID) { + if ((unsigned)*PSetID == Idx) + break; + } + if (*PSetID == -1) + continue; + + // Found a register class that counts against this pressure set. + // For efficiency, only compute the set order for the largest set. + unsigned NUnits = TRI->getRegClassWeight(*RI).WeightLimit; + if (!RC || NUnits > NumRCUnits) { + RC = *RI; + NumRCUnits = NUnits; + } + } + compute(RC); + unsigned NReserved = RC->getNumRegs() - getNumAllocatableRegs(RC); + return TRI->getRegPressureSetLimit(Idx) + - TRI->getRegClassWeight(RC).RegWeight * NReserved; +} diff --git a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp index d85646d..dd86c1f 100644 --- a/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterCoalescer.cpp @@ -166,7 +166,8 @@ namespace { /// reMaterializeTrivialDef - If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI); + bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI, + bool &IsDefCopy); /// canJoinPhys - Return true if a physreg copy should be joined. bool canJoinPhys(const CoalescerPair &CP); @@ -397,7 +398,7 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { } void RegisterCoalescer::eliminateDeadDefs() { - SmallVector NewRegs; + SmallVector NewRegs; LiveRangeEdit(0, NewRegs, *MF, *LIS, 0, this).eliminateDeadDefs(DeadDefs); } @@ -433,11 +434,11 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); - // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. - LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); - if (BLR == IntB.end()) return false; - VNInfo *BValNo = BLR->valno; + LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx); + if (BS == IntB.end()) return false; + VNInfo *BValNo = BS->valno; // Get the location that B is defined at. Two options: either this value has // an unknown definition point or it is defined at CopyIdx. If unknown, we @@ -446,10 +447,10 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, // AValNo is the value number in A that defines the copy, A3 in the example. SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); - LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); - // The live range might not exist after fun with physreg coalescing. - if (ALR == IntA.end()) return false; - VNInfo *AValNo = ALR->valno; + LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx); + // The live segment might not exist after fun with physreg coalescing. + if (AS == IntA.end()) return false; + VNInfo *AValNo = AS->valno; // If AValNo is defined as a copy from IntB, we can potentially process this. // Get the instruction that defines this value number. @@ -458,54 +459,54 @@ bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP, if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy()) return false; - // Get the LiveRange in IntB that this value number starts with. - LiveInterval::iterator ValLR = - IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); - if (ValLR == IntB.end()) + // Get the Segment in IntB that this value number starts with. + LiveInterval::iterator ValS = + IntB.FindSegmentContaining(AValNo->def.getPrevSlot()); + if (ValS == IntB.end()) return false; - // Make sure that the end of the live range is inside the same block as + // Make sure that the end of the live segment is inside the same block as // CopyMI. - MachineInstr *ValLREndInst = - LIS->getInstructionFromIndex(ValLR->end.getPrevSlot()); - if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) + MachineInstr *ValSEndInst = + LIS->getInstructionFromIndex(ValS->end.getPrevSlot()); + if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent()) return false; - // Okay, we now know that ValLR ends in the same block that the CopyMI - // live-range starts. If there are no intervening live ranges between them in - // IntB, we can merge them. - if (ValLR+1 != BLR) return false; + // Okay, we now know that ValS ends in the same block that the CopyMI + // live-range starts. If there are no intervening live segments between them + // in IntB, we can merge them. + if (ValS+1 != BS) return false; DEBUG(dbgs() << "Extending: " << PrintReg(IntB.reg, TRI)); - SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; + SlotIndex FillerStart = ValS->end, FillerEnd = BS->start; // We are about to delete CopyMI, so need to remove it as the 'instruction // that defines this value #'. Update the valnum with the new defining // instruction #. BValNo->def = FillerStart; // Okay, we can merge them. We need to insert a new liverange: - // [ValLR.end, BLR.begin) of either value number, then we merge the + // [ValS.end, BS.begin) of either value number, then we merge the // two value numbers. - IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); + IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo)); // Okay, merge "B1" into the same value number as "B0". - if (BValNo != ValLR->valno) - IntB.MergeValueNumberInto(BValNo, ValLR->valno); + if (BValNo != ValS->valno) + IntB.MergeValueNumberInto(BValNo, ValS->valno); DEBUG(dbgs() << " result = " << IntB << '\n'); // If the source instruction was killing the source register before the // merge, unset the isKill marker given the live range has been extended. - int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); + int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true); if (UIdx != -1) { - ValLREndInst->getOperand(UIdx).setIsKill(false); + ValSEndInst->getOperand(UIdx).setIsKill(false); } // Rewrite the copy. If the copy instruction was killing the destination // register before the merge, find the last use and trim the live range. That // will also add the isKill marker. CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI); - if (ALR->end == CopyIdx) + if (AS->end == CopyIdx) LIS->shrinkToUses(&IntA); ++numExtends; @@ -526,11 +527,11 @@ bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA, for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; - LiveInterval::Ranges::iterator BI = - std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); - if (BI != IntB.ranges.begin()) + LiveInterval::iterator BI = + std::upper_bound(IntB.begin(), IntB.end(), AI->start); + if (BI != IntB.begin()) --BI; - for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { + for (; BI != IntB.end() && AI->end >= BI->start; ++BI) { if (BI->valno == BValNo) continue; if (BI->start <= AI->start && BI->end > AI->start) @@ -576,14 +577,12 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, LiveInterval &IntB = LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); - // BValNo is a value number in B that is defined by a copy from A. 'B3' in + // BValNo is a value number in B that is defined by a copy from A. 'B1' in // the example above. VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); if (!BValNo || BValNo->def != CopyIdx) return false; - assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); - // AValNo is the value number in A that defines the copy, A3 in the example. VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); assert(AValNo && "COPY source not live"); @@ -613,7 +612,7 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); unsigned NewReg = NewDstMO.getReg(); - if (NewReg != IntB.reg || !LiveRangeQuery(IntB, AValNo->def).isKill()) + if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill()) return false; // Make sure there are no other definitions of IntB that would reach the @@ -628,8 +627,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, UE = MRI->use_nodbg_end(); UI != UE; ++UI) { MachineInstr *UseMI = &*UI; SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end() || ULR->valno != AValNo) + LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); + if (US == IntA.end() || US->valno != AValNo) continue; // If this use is tied to a def, we can't rewrite the register. if (UseMI->isRegTiedToDefOperand(UI.getOperandNo())) @@ -680,8 +679,8 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, continue; } SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); - LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); - if (ULR == IntA.end() || ULR->valno != AValNo) + LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx); + if (US == IntA.end() || US->valno != AValNo) continue; // Kill flags are no longer accurate. They are recomputed after RA. UseMO.setIsKill(false); @@ -711,14 +710,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, UseMI->eraseFromParent(); } - // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition + // Extend BValNo by merging in IntA live segments of AValNo. Val# definition // is updated. VNInfo *ValNo = BValNo; ValNo->def = AValNo->def; for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); AI != AE; ++AI) { if (AI->valno != AValNo) continue; - IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); + IntB.addSegment(LiveInterval::Segment(AI->start, AI->end, ValNo)); } DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); @@ -731,23 +730,29 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, /// reMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, - MachineInstr *CopyMI) { + MachineInstr *CopyMI, + bool &IsDefCopy) { + IsDefCopy = false; unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); + unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx(); unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); + unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx(); if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) return false; LiveInterval &SrcInt = LIS->getInterval(SrcReg); - SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); - LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); - assert(SrcLR != SrcInt.end() && "Live range not found!"); - VNInfo *ValNo = SrcLR->valno; + SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI); + VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn(); + assert(ValNo && "CopyMI input register not live"); if (ValNo->isPHIDef() || ValNo->isUnused()) return false; MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); if (!DefMI) return false; - assert(DefMI && "Defining instruction disappeared"); + if (DefMI->isCopyLike()) { + IsDefCopy = true; + return false; + } if (!DefMI->isAsCheapAsAMove()) return false; if (!TII->isTriviallyReMaterializable(DefMI, AA)) @@ -760,31 +765,41 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, return false; // Only support subregister destinations when the def is read-undef. MachineOperand &DstOperand = CopyMI->getOperand(0); + unsigned CopyDstReg = DstOperand.getReg(); if (DstOperand.getSubReg() && !DstOperand.isUndef()) return false; + + const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF); if (!DefMI->isImplicitDef()) { - // Make sure the copy destination register class fits the instruction - // definition register class. The mismatch can happen as a result of earlier - // extract_subreg, insert_subreg, subreg_to_reg coalescing. - const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); - if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - if (!MRI->constrainRegClass(DstReg, RC)) + if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { + unsigned NewDstReg = DstReg; + + unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(), + DefMI->getOperand(0).getSubReg()); + if (NewDstIdx) + NewDstReg = TRI->getSubReg(DstReg, NewDstIdx); + + // Finally, make sure that the physical subregister that will be + // constructed later is permitted for the instruction. + if (!DefRC->contains(NewDstReg)) return false; - } else if (!RC->contains(DstReg)) - return false; + } else { + // Theoretically, some stack frame reference could exist. Just make sure + // it hasn't actually happened. + assert(TargetRegisterInfo::isVirtualRegister(DstReg) && + "Only expect to deal with virtual or physical registers"); + } } MachineBasicBlock *MBB = CopyMI->getParent(); MachineBasicBlock::iterator MII = llvm::next(MachineBasicBlock::iterator(CopyMI)); - TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); + TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, DefMI, *TRI); MachineInstr *NewMI = prior(MII); - // The original DefMI may have been a subregister def, but the full register - // class of its destination matches the destination of CopyMI, and CopyMI is - // either a full register def or is read-undef. Therefore we can clear the - // subregister index on the rematerialized instruction. - NewMI->getOperand(0).setSubReg(0); + LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); + CopyMI->eraseFromParent(); + ErasedInstrs.insert(CopyMI); // NewMI may have dead implicit defs (E.g. EFLAGS for MOVr0 on X86). // We need to remember these so we can add intervals once we insert @@ -800,6 +815,47 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, } } + if (TargetRegisterInfo::isVirtualRegister(DstReg)) { + unsigned NewIdx = NewMI->getOperand(0).getSubReg(); + const TargetRegisterClass *RCForInst; + if (NewIdx) + RCForInst = TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg), DefRC, + NewIdx); + + if (MRI->constrainRegClass(DstReg, DefRC)) { + // The materialized instruction is quite capable of setting DstReg + // directly, but it may still have a now-trivial subregister index which + // we should clear. + NewMI->getOperand(0).setSubReg(0); + } else if (NewIdx && RCForInst) { + // The subreg index on NewMI is essential; we still have to make sure + // DstReg:idx is in a class that NewMI can use. + MRI->constrainRegClass(DstReg, RCForInst); + } else { + // DstReg is actually incompatible with NewMI, we have to move to a + // super-reg's class. This could come from a sequence like: + // GR32 = MOV32r0 + // GR8 = COPY GR32:sub_8 + MRI->setRegClass(DstReg, CP.getNewRC()); + updateRegDefsUses(DstReg, DstReg, DstIdx); + NewMI->getOperand(0).setSubReg( + TRI->composeSubRegIndices(SrcIdx, DefMI->getOperand(0).getSubReg())); + } + } else if (NewMI->getOperand(0).getReg() != CopyDstReg) { + // The New instruction may be defining a sub-register of what's actually + // been asked for. If so it must implicitly define the whole thing. + assert(TargetRegisterInfo::isPhysicalRegister(DstReg) && + "Only expect virtual or physical registers in remat"); + NewMI->getOperand(0).setIsDead(true); + NewMI->addOperand(MachineOperand::CreateReg(CopyDstReg, + true /*IsDef*/, + true /*IsImp*/, + false /*IsKill*/)); + } + + if (NewMI->getOperand(0).getSubReg()) + NewMI->getOperand(0).setIsUndef(); + // CopyMI may have implicit operands, transfer them over to the newly // rematerialized instruction. And update implicit def interval valnos. for (unsigned i = CopyMI->getDesc().getNumOperands(), @@ -814,18 +870,14 @@ bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, } } - LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); - SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { unsigned Reg = NewMIImplDefs[i]; for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) - if (LiveInterval *LI = LIS->getCachedRegUnit(*Units)) - LI->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); + if (LiveRange *LR = LIS->getCachedRegUnit(*Units)) + LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator()); } - CopyMI->eraseFromParent(); - ErasedInstrs.insert(CopyMI); DEBUG(dbgs() << "Remat: " << *NewMI); ++NumReMats; @@ -994,7 +1046,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (CP.getSrcReg() == CP.getDstReg()) { LiveInterval &LI = LIS->getInterval(CP.getSrcReg()); DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n'); - LiveRangeQuery LRQ(LI, LIS->getInstructionIndex(CopyMI)); + LiveQueryResult LRQ = LI.Query(LIS->getInstructionIndex(CopyMI)); if (VNInfo *DefVNI = LRQ.valueDefined()) { VNInfo *ReadVNI = LRQ.valueIn(); assert(ReadVNI && "No value before copy and no flag."); @@ -1015,8 +1067,11 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. - if (reMaterializeTrivialDef(CP, CopyMI)) + bool IsDefCopy; + if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) return true; + if (IsDefCopy) + Again = true; // May be possible to coalesce later. return false; } } else { @@ -1034,8 +1089,8 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { }); // When possible, let DstReg be the larger interval. - if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).ranges.size() > - LIS->getInterval(CP.getDstReg()).ranges.size()) + if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() > + LIS->getInterval(CP.getDstReg()).size()) CP.flip(); } @@ -1048,10 +1103,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. - if (reMaterializeTrivialDef(CP, CopyMI)) + bool IsDefCopy; + if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy)) return true; - // If we can eliminate the copy without merging the live ranges, do so now. + // If we can eliminate the copy without merging the live segments, do so + // now. if (!CP.isPartial() && !CP.isPhys()) { if (adjustCopiesBackFrom(CP, CopyMI) || removeCopyByCommutingDef(CP, CopyMI)) { @@ -1099,10 +1156,12 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); DEBUG({ - dbgs() << "\tJoined. Result = " << PrintReg(CP.getDstReg(), TRI); - if (!CP.isPhys()) + dbgs() << "\tJoined. Result = "; + if (CP.isPhys()) + dbgs() << PrintReg(CP.getDstReg(), TRI); + else dbgs() << LIS->getInterval(CP.getDstReg()); - dbgs() << '\n'; + dbgs() << '\n'; }); ++numJoins; @@ -1114,8 +1173,7 @@ bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) { assert(CP.isPhys() && "Must be a physreg copy"); assert(MRI->isReserved(CP.getDstReg()) && "Not a reserved register"); LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); - DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS - << '\n'); + DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n'); assert(CP.isFlipped() && RHS.containsOneValue() && "Invalid join with reserved register"); @@ -1384,7 +1442,7 @@ VNInfo *JoinVals::stripCopies(VNInfo *VNI) { unsigned Reg = MI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) break; - LiveRangeQuery LRQ(LIS->getInterval(Reg), VNI->def); + LiveQueryResult LRQ = LIS->getInterval(Reg).Query(VNI->def); if (!LRQ.valueIn()) break; VNI = LRQ.valueIn(); @@ -1435,7 +1493,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // The flag on the def operand means that old lane values are // not important. if (Redef) { - V.RedefVNI = LiveRangeQuery(LI, VNI->def).valueIn(); + V.RedefVNI = LI.Query(VNI->def).valueIn(); assert(V.RedefVNI && "Instruction is reading nonexistent value"); computeAssignment(V.RedefVNI->id, Other); V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes; @@ -1452,7 +1510,7 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { } // Find the value in Other that overlaps VNI->def, if any. - LiveRangeQuery OtherLRQ(Other.LI, VNI->def); + LiveQueryResult OtherLRQ = Other.LI.Query(VNI->def); // It is possible that both values are defined by the same instruction, or // the values are PHIs defined in the same block. When that happens, the two @@ -1911,8 +1969,8 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { JoinVals RHSVals(RHS, CP.getSrcIdx(), NewVNInfo, CP, LIS, TRI); JoinVals LHSVals(LHS, CP.getDstIdx(), NewVNInfo, CP, LIS, TRI); - DEBUG(dbgs() << "\t\tRHS = " << PrintReg(CP.getSrcReg()) << ' ' << RHS - << "\n\t\tLHS = " << PrintReg(CP.getDstReg()) << ' ' << LHS + DEBUG(dbgs() << "\t\tRHS = " << RHS + << "\n\t\tLHS = " << LHS << '\n'); // First compute NewVNInfo and the simple value mappings. @@ -1943,8 +2001,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { LIS->shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val())); // Join RHS into LHS. - LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo, - MRI); + LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo); // Kill flags are going to be wrong if the live ranges were overlapping. // Eventually, we should simply clear all kill flags when computing live @@ -1959,7 +2016,7 @@ bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) { // CR_Replace conflicts. DEBUG(dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: " << LHS << '\n'); - LIS->extendToIndices(&LHS, EndPoints); + LIS->extendToIndices(LHS, EndPoints); return true; } @@ -1985,9 +2042,8 @@ struct MBBPriorityInfo { // block (the unsigned), and then on the MBB number. // // EnableGlobalCopies assumes that the primary sort key is loop depth. -static int compareMBBPriority(const void *L, const void *R) { - const MBBPriorityInfo *LHS = static_cast(L); - const MBBPriorityInfo *RHS = static_cast(R); +static int compareMBBPriority(const MBBPriorityInfo *LHS, + const MBBPriorityInfo *RHS) { // Deeper loops first if (LHS->Depth != RHS->Depth) return LHS->Depth > RHS->Depth ? -1 : 1; @@ -2012,6 +2068,9 @@ static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { if (!Copy->isCopy()) return false; + if (Copy->getOperand(1).isUndef()) + return false; + unsigned SrcReg = Copy->getOperand(1).getReg(); unsigned DstReg = Copy->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(SrcReg) @@ -2057,8 +2116,8 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // are not inherently easier to resolve, but slightly preferable until we // have local live range splitting. In particular this is required by // cmp+jmp macro fusion. - for (MachineBasicBlock::reverse_iterator - MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) { + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) { if (!MII->isCopyLike()) continue; if (isLocalCopy(&(*MII), LIS)) @@ -2142,7 +2201,7 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { const TargetSubtargetInfo &ST = TM->getSubtarget(); if (EnableGlobalCopies == cl::BOU_UNSET) - JoinGlobalCopies = ST.enableMachineScheduler(); + JoinGlobalCopies = ST.useMachineScheduler(); else JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); diff --git a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp index 97f22e1..092ecdd 100644 --- a/contrib/llvm/lib/CodeGen/RegisterPressure.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterPressure.cpp @@ -25,68 +25,39 @@ using namespace llvm; /// Increase pressure for each pressure set provided by TargetRegisterInfo. static void increaseSetPressure(std::vector &CurrSetPressure, - std::vector &MaxSetPressure, - const int *PSet, unsigned Weight) { - for (; *PSet != -1; ++PSet) { - CurrSetPressure[*PSet] += Weight; - if (&CurrSetPressure != &MaxSetPressure - && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) { - MaxSetPressure[*PSet] = CurrSetPressure[*PSet]; - } - } + PSetIterator PSetI) { + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) + CurrSetPressure[*PSetI] += Weight; } /// Decrease pressure for each pressure set provided by TargetRegisterInfo. static void decreaseSetPressure(std::vector &CurrSetPressure, - const int *PSet, unsigned Weight) { - for (; *PSet != -1; ++PSet) { - assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow"); - CurrSetPressure[*PSet] -= Weight; - } -} - -/// Directly increase pressure only within this RegisterPressure result. -void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI, - const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - increaseSetPressure(MaxSetPressure, MaxSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - increaseSetPressure(MaxSetPressure, MaxSetPressure, - TRI->getRegUnitPressureSets(Reg), - TRI->getRegUnitWeight(Reg)); - } -} - -/// Directly decrease pressure only within this RegisterPressure result. -void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI, - const MachineRegisterInfo *MRI) { - if (TargetRegisterInfo::isVirtualRegister(Reg)) { - const TargetRegisterClass *RC = MRI->getRegClass(Reg); - decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg), - TRI->getRegUnitWeight(Reg)); + PSetIterator PSetI) { + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + assert(CurrSetPressure[*PSetI] >= Weight && "register pressure underflow"); + CurrSetPressure[*PSetI] -= Weight; } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -static void dumpSetPressure(const std::vector &SetPressure, - const TargetRegisterInfo *TRI) { +void llvm::dumpRegSetPressure(ArrayRef SetPressure, + const TargetRegisterInfo *TRI) { + bool Empty = true; for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) { - if (SetPressure[i] != 0) + if (SetPressure[i] != 0) { dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n'; + Empty = false; + } } + if (Empty) + dbgs() << "\n"; } void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { dbgs() << "Max Pressure: "; - dumpSetPressure(MaxSetPressure, TRI); + dumpRegSetPressure(MaxSetPressure, TRI); dbgs() << "Live In: "; for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) dbgs() << PrintReg(LiveInRegs[i], TRI) << " "; @@ -98,44 +69,33 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { } void RegPressureTracker::dump() const { - dbgs() << "Curr Pressure: "; - dumpSetPressure(CurrSetPressure, TRI); + if (!isTopClosed() || !isBottomClosed()) { + dbgs() << "Curr Pressure: "; + dumpRegSetPressure(CurrSetPressure, TRI); + } P.dump(TRI); } #endif /// Increase the current pressure as impacted by these registers and bump /// the high water mark if needed. -void RegPressureTracker::increaseRegPressure(ArrayRef Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) { - if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { - const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); - increaseSetPressure(CurrSetPressure, P.MaxSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - increaseSetPressure(CurrSetPressure, P.MaxSetPressure, - TRI->getRegUnitPressureSets(Regs[I]), - TRI->getRegUnitWeight(Regs[I])); +void RegPressureTracker::increaseRegPressure(ArrayRef RegUnits) { + for (unsigned i = 0, e = RegUnits.size(); i != e; ++i) { + PSetIterator PSetI = MRI->getPressureSets(RegUnits[i]); + unsigned Weight = PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + CurrSetPressure[*PSetI] += Weight; + if (CurrSetPressure[*PSetI] > P.MaxSetPressure[*PSetI]) { + P.MaxSetPressure[*PSetI] = CurrSetPressure[*PSetI]; + } } } } /// Simply decrease the current pressure as impacted by these registers. -void RegPressureTracker::decreaseRegPressure(ArrayRef Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) { - if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { - const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); - decreaseSetPressure(CurrSetPressure, - TRI->getRegClassPressureSets(RC), - TRI->getRegClassWeight(RC).RegWeight); - } - else { - decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]), - TRI->getRegUnitWeight(Regs[I])); - } - } +void RegPressureTracker::decreaseRegPressure(ArrayRef RegUnits) { + for (unsigned I = 0, E = RegUnits.size(); I != E; ++I) + decreaseSetPressure(CurrSetPressure, MRI->getPressureSets(RegUnits[I])); } /// Clear the result so it can be used for another round of pressure tracking. @@ -187,12 +147,30 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) { LiveInRegs.clear(); } -const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const { +const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const { if (TargetRegisterInfo::isVirtualRegister(Reg)) return &LIS->getInterval(Reg); return LIS->getCachedRegUnit(Reg); } +void RegPressureTracker::reset() { + MBB = 0; + LIS = 0; + + CurrSetPressure.clear(); + LiveThruPressure.clear(); + P.MaxSetPressure.clear(); + + if (RequireIntervals) + static_cast(P).reset(); + else + static_cast(P).reset(); + + LiveRegs.PhysRegs.clear(); + LiveRegs.VirtRegs.clear(); + UntiedDefs.clear(); +} + /// Setup the RegPressureTracker. /// /// TODO: Add support for pressure without LiveIntervals. @@ -200,13 +178,17 @@ void RegPressureTracker::init(const MachineFunction *mf, const RegisterClassInfo *rci, const LiveIntervals *lis, const MachineBasicBlock *mbb, - MachineBasicBlock::const_iterator pos) + MachineBasicBlock::const_iterator pos, + bool ShouldTrackUntiedDefs) { + reset(); + MF = mf; TRI = MF->getTarget().getRegisterInfo(); RCI = rci; MRI = &MF->getRegInfo(); MBB = mbb; + TrackUntiedDefs = ShouldTrackUntiedDefs; if (RequireIntervals) { assert(lis && "IntervalPressure requires LiveIntervals"); @@ -216,16 +198,12 @@ void RegPressureTracker::init(const MachineFunction *mf, CurrPos = pos; CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0); - if (RequireIntervals) - static_cast(P).reset(); - else - static_cast(P).reset(); P.MaxSetPressure = CurrSetPressure; - LiveRegs.PhysRegs.clear(); LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs()); - LiveRegs.VirtRegs.clear(); LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs()); + if (TrackUntiedDefs) + UntiedDefs.setUniverse(MRI->getNumVirtRegs()); } /// Does this pressure result have a valid top position and live ins. @@ -304,16 +282,36 @@ void RegPressureTracker::closeRegion() { // If both top and bottom are closed, do nothing. } +/// The register tracker is unaware of global liveness so ignores normal +/// live-thru ranges. However, two-address or coalesced chains can also lead +/// to live ranges with no holes. Count these to inform heuristics that we +/// can never drop below this pressure. +void RegPressureTracker::initLiveThru(const RegPressureTracker &RPTracker) { + LiveThruPressure.assign(TRI->getNumRegPressureSets(), 0); + assert(isBottomClosed() && "need bottom-up tracking to intialize."); + for (unsigned i = 0, e = P.LiveOutRegs.size(); i < e; ++i) { + unsigned Reg = P.LiveOutRegs[i]; + if (TargetRegisterInfo::isVirtualRegister(Reg) + && !RPTracker.hasUntiedDef(Reg)) { + increaseSetPressure(LiveThruPressure, MRI->getPressureSets(Reg)); + } + } +} + /// \brief Convenient wrapper for checking membership in RegisterOperands. -static bool containsReg(ArrayRef Regs, unsigned Reg) { - return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end(); +/// (std::count() doesn't have an early exit). +static bool containsReg(ArrayRef RegUnits, unsigned RegUnit) { + return std::find(RegUnits.begin(), RegUnits.end(), RegUnit) != RegUnits.end(); } /// Collect this instruction's unique uses and defs into SmallVectors for /// processing defs and uses in order. +/// +/// FIXME: always ignore tied opers class RegisterOperands { const TargetRegisterInfo *TRI; const MachineRegisterInfo *MRI; + bool IgnoreDead; public: SmallVector Uses; @@ -321,7 +319,8 @@ public: SmallVector DeadDefs; RegisterOperands(const TargetRegisterInfo *tri, - const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {} + const MachineRegisterInfo *mri, bool ID = false): + TRI(tri), MRI(mri), IgnoreDead(ID) {} /// Push this operand's register onto the correct vector. void collect(const MachineOperand &MO) { @@ -330,25 +329,27 @@ public: if (MO.readsReg()) pushRegUnits(MO.getReg(), Uses); if (MO.isDef()) { - if (MO.isDead()) - pushRegUnits(MO.getReg(), DeadDefs); + if (MO.isDead()) { + if (!IgnoreDead) + pushRegUnits(MO.getReg(), DeadDefs); + } else pushRegUnits(MO.getReg(), Defs); } } protected: - void pushRegUnits(unsigned Reg, SmallVectorImpl &Regs) { + void pushRegUnits(unsigned Reg, SmallVectorImpl &RegUnits) { if (TargetRegisterInfo::isVirtualRegister(Reg)) { - if (containsReg(Regs, Reg)) + if (containsReg(RegUnits, Reg)) return; - Regs.push_back(Reg); + RegUnits.push_back(Reg); } else if (MRI->isAllocatable(Reg)) { for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { - if (containsReg(Regs, *Units)) + if (containsReg(RegUnits, *Units)) continue; - Regs.push_back(*Units); + RegUnits.push_back(*Units); } } } @@ -367,6 +368,56 @@ static void collectOperands(const MachineInstr *MI, RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); } +/// Initialize an array of N PressureDiffs. +void PressureDiffs::init(unsigned N) { + Size = N; + if (N <= Max) { + memset(PDiffArray, 0, N * sizeof(PressureDiff)); + return; + } + Max = Size; + free(PDiffArray); + PDiffArray = reinterpret_cast(calloc(N, sizeof(PressureDiff))); +} + +/// Add a change in pressure to the pressure diff of a given instruction. +void PressureDiff::addPressureChange(unsigned RegUnit, bool IsDec, + const MachineRegisterInfo *MRI) { + PSetIterator PSetI = MRI->getPressureSets(RegUnit); + int Weight = IsDec ? -PSetI.getWeight() : PSetI.getWeight(); + for (; PSetI.isValid(); ++PSetI) { + // Find an existing entry in the pressure diff for this PSet. + PressureDiff::iterator I = begin(), E = end(); + for (; I != E && I->isValid(); ++I) { + if (I->getPSet() >= *PSetI) + break; + } + // If all pressure sets are more constrained, skip the remaining PSets. + if (I == E) + break; + // Insert this PressureChange. + if (!I->isValid() || I->getPSet() != *PSetI) { + PressureChange PTmp = PressureChange(*PSetI); + for (PressureDiff::iterator J = I; J != E && PTmp.isValid(); ++J) + std::swap(*J,PTmp); + } + // Update the units for this pressure set. + I->setUnitInc(I->getUnitInc() + Weight); + } +} + +/// Record the pressure difference induced by the given operand list. +static void collectPDiff(PressureDiff &PDiff, RegisterOperands &RegOpers, + const MachineRegisterInfo *MRI) { + assert(!PDiff.begin()->isValid() && "stale PDiff"); + + for (unsigned i = 0, e = RegOpers.Defs.size(); i != e; ++i) + PDiff.addPressureChange(RegOpers.Defs[i], true, MRI); + + for (unsigned i = 0, e = RegOpers.Uses.size(); i != e; ++i) + PDiff.addPressureChange(RegOpers.Uses[i], false, MRI); +} + /// Force liveness of registers. void RegPressureTracker::addLiveRegs(ArrayRef Regs) { for (unsigned i = 0, e = Regs.size(); i != e; ++i) { @@ -383,7 +434,7 @@ void RegPressureTracker::discoverLiveIn(unsigned Reg) { // At live in discovery, unconditionally increase the high water mark. P.LiveInRegs.push_back(Reg); - P.increase(Reg, TRI, MRI); + increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); } /// Add Reg to the live out set and increase max pressure. @@ -394,11 +445,16 @@ void RegPressureTracker::discoverLiveOut(unsigned Reg) { // At live out discovery, unconditionally increase the high water mark. P.LiveOutRegs.push_back(Reg); - P.increase(Reg, TRI, MRI); + increaseSetPressure(P.MaxSetPressure, MRI->getPressureSets(Reg)); } -/// Recede across the previous instruction. -bool RegPressureTracker::recede() { +/// Recede across the previous instruction. If LiveUses is provided, record any +/// RegUnits that are made live by the current instruction's uses. This includes +/// registers that are both defined and used by the instruction. If a pressure +/// difference pointer is provided record the changes is pressure caused by this +/// instruction independent of liveness. +bool RegPressureTracker::recede(SmallVectorImpl *LiveUses, + PressureDiff *PDiff) { // Check for the top of the analyzable region. if (CurrPos == MBB->begin()) { closeRegion(); @@ -431,6 +487,9 @@ bool RegPressureTracker::recede() { RegisterOperands RegOpers(TRI, MRI); collectOperands(CurrPos, RegOpers); + if (PDiff) + collectPDiff(*PDiff, RegOpers, MRI); + // Boost pressure for all dead defs together. increaseRegPressure(RegOpers.DeadDefs); decreaseRegPressure(RegOpers.DeadDefs); @@ -439,10 +498,20 @@ bool RegPressureTracker::recede() { // TODO: consider earlyclobbers? for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { unsigned Reg = RegOpers.Defs[i]; - if (LiveRegs.erase(Reg)) - decreaseRegPressure(Reg); - else - discoverLiveOut(Reg); + bool DeadDef = false; + if (RequireIntervals) { + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + DeadDef = LRQ.isDeadDef(); + } + } + if (!DeadDef) { + if (LiveRegs.erase(Reg)) + decreaseRegPressure(Reg); + else + discoverLiveOut(Reg); + } } // Generate liveness for uses. @@ -451,12 +520,24 @@ bool RegPressureTracker::recede() { if (!LiveRegs.contains(Reg)) { // Adjust liveouts if LiveIntervals are available. if (RequireIntervals) { - const LiveInterval *LI = getInterval(Reg); - if (LI && !LI->killedAt(SlotIdx)) - discoverLiveOut(Reg); + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (!LRQ.isKill() && !LRQ.valueDefined()) + discoverLiveOut(Reg); + } } increaseRegPressure(Reg); LiveRegs.insert(Reg); + if (LiveUses && !containsReg(*LiveUses, Reg)) + LiveUses->push_back(Reg); + } + } + if (TrackUntiedDefs) { + for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = RegOpers.Defs[i]; + if (TargetRegisterInfo::isVirtualRegister(Reg) && !LiveRegs.contains(Reg)) + UntiedDefs.insert(Reg); } } return true; @@ -464,6 +545,8 @@ bool RegPressureTracker::recede() { /// Advance across the current instruction. bool RegPressureTracker::advance() { + assert(!TrackUntiedDefs && "unsupported mode"); + // Check for the bottom of the analyzable region. if (CurrPos == MBB->end()) { closeRegion(); @@ -496,8 +579,8 @@ bool RegPressureTracker::advance() { // Kill liveness at last uses. bool lastUse = false; if (RequireIntervals) { - const LiveInterval *LI = getInterval(Reg); - lastUse = LI && LI->killedAt(SlotIdx); + const LiveRange *LR = getLiveRange(Reg); + lastUse = LR && LR->Query(SlotIdx).isKill(); } else { // Allocatable physregs are always single-use before register rewriting. @@ -533,9 +616,9 @@ bool RegPressureTracker::advance() { static void computeExcessPressureDelta(ArrayRef OldPressureVec, ArrayRef NewPressureVec, RegPressureDelta &Delta, - const TargetRegisterInfo *TRI) { - int ExcessUnits = 0; - unsigned PSetID = ~0U; + const RegisterClassInfo *RCI, + ArrayRef LiveThruPressureVec) { + Delta.Excess = PressureChange(); for (unsigned i = 0, e = OldPressureVec.size(); i < e; ++i) { unsigned POld = OldPressureVec[i]; unsigned PNew = NewPressureVec[i]; @@ -543,7 +626,10 @@ static void computeExcessPressureDelta(ArrayRef OldPressureVec, if (!PDiff) // No change in this set in the common case. continue; // Only consider change beyond the limit. - unsigned Limit = TRI->getRegPressureSetLimit(i); + unsigned Limit = RCI->getRegPressureSetLimit(i); + if (!LiveThruPressureVec.empty()) + Limit += LiveThruPressureVec[i]; + if (Limit > POld) { if (Limit > PNew) PDiff = 0; // Under the limit @@ -553,13 +639,12 @@ static void computeExcessPressureDelta(ArrayRef OldPressureVec, else if (Limit > PNew) PDiff = Limit - POld; // Just obeyed limit. - if (std::abs(PDiff) > std::abs(ExcessUnits)) { - ExcessUnits = PDiff; - PSetID = i; + if (PDiff) { + Delta.Excess = PressureChange(i); + Delta.Excess.setUnitInc(PDiff); + break; } } - Delta.Excess.PSetID = PSetID; - Delta.Excess.UnitIncrease = ExcessUnits; } /// Find the max change in max pressure that either surpasses a critical PSet @@ -570,11 +655,11 @@ static void computeExcessPressureDelta(ArrayRef OldPressureVec, /// RegPressureTracker API change to work with pressure differences. static void computeMaxPressureDelta(ArrayRef OldMaxPressureVec, ArrayRef NewMaxPressureVec, - ArrayRef CriticalPSets, + ArrayRef CriticalPSets, ArrayRef MaxPressureLimit, RegPressureDelta &Delta) { - Delta.CriticalMax = PressureElement(); - Delta.CurrentMax = PressureElement(); + Delta.CriticalMax = PressureChange(); + Delta.CurrentMax = PressureChange(); unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); for (unsigned i = 0, e = OldMaxPressureVec.size(); i < e; ++i) { @@ -583,23 +668,25 @@ static void computeMaxPressureDelta(ArrayRef OldMaxPressureVec, if (PNew == POld) // No change in this set in the common case. continue; - while (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID < i) - ++CritIdx; + if (!Delta.CriticalMax.isValid()) { + while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < i) + ++CritIdx; - if (CritIdx != CritEnd && CriticalPSets[CritIdx].PSetID == i) { - int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].UnitIncrease; - if (PDiff > Delta.CriticalMax.UnitIncrease) { - Delta.CriticalMax.PSetID = i; - Delta.CriticalMax.UnitIncrease = PDiff; + if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == i) { + int PDiff = (int)PNew - (int)CriticalPSets[CritIdx].getUnitInc(); + if (PDiff > 0) { + Delta.CriticalMax = PressureChange(i); + Delta.CriticalMax.setUnitInc(PDiff); + } } } - - // Find the greatest increase above MaxPressureLimit. + // Find the first increase above MaxPressureLimit. // (Ignores negative MDiff). - int MDiff = (int)PNew - (int)MaxPressureLimit[i]; - if (MDiff > Delta.CurrentMax.UnitIncrease) { - Delta.CurrentMax.PSetID = i; - Delta.CurrentMax.UnitIncrease = PNew; + if (!Delta.CurrentMax.isValid() && PNew > MaxPressureLimit[i]) { + Delta.CurrentMax = PressureChange(i); + Delta.CurrentMax.setUnitInc(PNew - POld); + if (CritIdx == CritEnd || Delta.CriticalMax.isValid()) + break; } } } @@ -614,7 +701,7 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { assert(!MI->isDebugValue() && "Expect a nondebug instruction."); // Account for register pressure similar to RegPressureTracker::recede(). - RegisterOperands RegOpers(TRI, MRI); + RegisterOperands RegOpers(TRI, MRI, /*IgnoreDead=*/true); collectOperands(MI, RegOpers); // Boost max pressure for all dead defs together. @@ -625,8 +712,19 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { // Kill liveness at live defs. for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { unsigned Reg = RegOpers.Defs[i]; - if (!containsReg(RegOpers.Uses, Reg)) - decreaseRegPressure(Reg); + bool DeadDef = false; + if (RequireIntervals) { + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + SlotIndex SlotIdx = LIS->getInstructionIndex(MI); + LiveQueryResult LRQ = LR->Query(SlotIdx); + DeadDef = LRQ.isDeadDef(); + } + } + if (!DeadDef) { + if (!containsReg(RegOpers.Uses, Reg)) + decreaseRegPressure(Reg); + } } // Generate liveness for uses. for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { @@ -648,8 +746,9 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { /// result per-SUnit with enough information to adjust for the current /// scheduling position. But this works as a proof of concept. void RegPressureTracker:: -getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, - ArrayRef CriticalPSets, +getMaxUpwardPressureDelta(const MachineInstr *MI, PressureDiff *PDiff, + RegPressureDelta &Delta, + ArrayRef CriticalPSets, ArrayRef MaxPressureLimit) { // Snapshot Pressure. // FIXME: The snapshot heap space should persist. But I'm planning to @@ -659,15 +758,117 @@ getMaxUpwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, bumpUpwardPressure(MI); - computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI, + LiveThruPressure); computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, MaxPressureLimit, Delta); - assert(Delta.CriticalMax.UnitIncrease >= 0 && - Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + assert(Delta.CriticalMax.getUnitInc() >= 0 && + Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure"); // Restore the tracker's state. P.MaxSetPressure.swap(SavedMaxPressure); CurrSetPressure.swap(SavedPressure); + +#ifndef NDEBUG + if (!PDiff) + return; + + // Check if the alternate algorithm yields the same result. + RegPressureDelta Delta2; + getUpwardPressureDelta(MI, *PDiff, Delta2, CriticalPSets, MaxPressureLimit); + if (Delta != Delta2) { + dbgs() << "DELTA: " << *MI; + if (Delta.Excess.isValid()) + dbgs() << "Excess1 " << TRI->getRegPressureSetName(Delta.Excess.getPSet()) + << " " << Delta.Excess.getUnitInc() << "\n"; + if (Delta.CriticalMax.isValid()) + dbgs() << "Critic1 " << TRI->getRegPressureSetName(Delta.CriticalMax.getPSet()) + << " " << Delta.CriticalMax.getUnitInc() << "\n"; + if (Delta.CurrentMax.isValid()) + dbgs() << "CurrMx1 " << TRI->getRegPressureSetName(Delta.CurrentMax.getPSet()) + << " " << Delta.CurrentMax.getUnitInc() << "\n"; + if (Delta2.Excess.isValid()) + dbgs() << "Excess2 " << TRI->getRegPressureSetName(Delta2.Excess.getPSet()) + << " " << Delta2.Excess.getUnitInc() << "\n"; + if (Delta2.CriticalMax.isValid()) + dbgs() << "Critic2 " << TRI->getRegPressureSetName(Delta2.CriticalMax.getPSet()) + << " " << Delta2.CriticalMax.getUnitInc() << "\n"; + if (Delta2.CurrentMax.isValid()) + dbgs() << "CurrMx2 " << TRI->getRegPressureSetName(Delta2.CurrentMax.getPSet()) + << " " << Delta2.CurrentMax.getUnitInc() << "\n"; + llvm_unreachable("RegP Delta Mismatch"); + } +#endif +} + +/// This is a prototype of the fast version of querying register pressure that +/// does not directly depend on current liveness. It's still slow because we +/// recompute pressure change on-the-fly. This implementation only exists to +/// prove correctness. +/// +/// @param Delta captures information needed for heuristics. +/// +/// @param CriticalPSets Are the pressure sets that are known to exceed some +/// limit within the region, not necessarily at the current position. +/// +/// @param MaxPressureLimit Is the max pressure within the region, not +/// necessarily at the current position. +void RegPressureTracker:: +getUpwardPressureDelta(const MachineInstr *MI, /*const*/ PressureDiff &PDiff, + RegPressureDelta &Delta, + ArrayRef CriticalPSets, + ArrayRef MaxPressureLimit) const { + unsigned CritIdx = 0, CritEnd = CriticalPSets.size(); + for (PressureDiff::const_iterator + PDiffI = PDiff.begin(), PDiffE = PDiff.end(); + PDiffI != PDiffE && PDiffI->isValid(); ++PDiffI) { + + unsigned PSetID = PDiffI->getPSet(); + unsigned Limit = RCI->getRegPressureSetLimit(PSetID); + if (!LiveThruPressure.empty()) + Limit += LiveThruPressure[PSetID]; + + unsigned POld = CurrSetPressure[PSetID]; + unsigned MOld = P.MaxSetPressure[PSetID]; + unsigned MNew = MOld; + // Ignore DeadDefs here because they aren't captured by PressureChange. + unsigned PNew = POld + PDiffI->getUnitInc(); + assert((PDiffI->getUnitInc() >= 0) == (PNew >= POld) && "PSet overflow"); + if (PNew > MOld) + MNew = PNew; + // Check if current pressure has exceeded the limit. + if (!Delta.Excess.isValid()) { + unsigned ExcessInc = 0; + if (PNew > Limit) + ExcessInc = POld > Limit ? PNew - POld : PNew - Limit; + else if (POld > Limit) + ExcessInc = Limit - POld; + if (ExcessInc) { + Delta.Excess = PressureChange(PSetID); + Delta.Excess.setUnitInc(ExcessInc); + } + } + // Check if max pressure has exceeded a critical pressure set max. + if (MNew == MOld) + continue; + if (!Delta.CriticalMax.isValid()) { + while (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() < PSetID) + ++CritIdx; + + if (CritIdx != CritEnd && CriticalPSets[CritIdx].getPSet() == PSetID) { + int CritInc = (int)MNew - (int)CriticalPSets[CritIdx].getUnitInc(); + if (CritInc > 0 && CritInc <= INT16_MAX) { + Delta.CriticalMax = PressureChange(PSetID); + Delta.CriticalMax.setUnitInc(CritInc); + } + } + } + // Check if max pressure has exceeded the current max. + if (!Delta.CurrentMax.isValid() && MNew > MaxPressureLimit[PSetID]) { + Delta.CurrentMax = PressureChange(PSetID); + Delta.CurrentMax.setUnitInc(MNew - MOld); + } + } } /// Helper to find a vreg use between two indices [PriorUseIdx, NextUseIdx). @@ -713,10 +914,12 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { // FIXME: allow the caller to pass in the list of vreg uses that remain // to be bottom-scheduled to avoid searching uses at each query. SlotIndex CurrIdx = getCurrSlot(); - const LiveInterval *LI = getInterval(Reg); - if (LI && LI->killedAt(SlotIdx) - && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { - decreaseRegPressure(Reg); + const LiveRange *LR = getLiveRange(Reg); + if (LR) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { + decreaseRegPressure(Reg); + } } } else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { @@ -741,7 +944,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { /// This assumes that the current LiveIn set is sufficient. void RegPressureTracker:: getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, - ArrayRef CriticalPSets, + ArrayRef CriticalPSets, ArrayRef MaxPressureLimit) { // Snapshot Pressure. std::vector SavedPressure = CurrSetPressure; @@ -749,11 +952,12 @@ getMaxDownwardPressureDelta(const MachineInstr *MI, RegPressureDelta &Delta, bumpDownwardPressure(MI); - computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, TRI); + computeExcessPressureDelta(SavedPressure, CurrSetPressure, Delta, RCI, + LiveThruPressure); computeMaxPressureDelta(SavedMaxPressure, P.MaxSetPressure, CriticalPSets, MaxPressureLimit, Delta); - assert(Delta.CriticalMax.UnitIncrease >= 0 && - Delta.CurrentMax.UnitIncrease >= 0 && "cannot decrease max pressure"); + assert(Delta.CriticalMax.getUnitInc() >= 0 && + Delta.CurrentMax.getUnitInc() >= 0 && "cannot decrease max pressure"); // Restore the tracker's state. P.MaxSetPressure.swap(SavedMaxPressure); diff --git a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp index f82ccbe..75ebdaa 100644 --- a/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp +++ b/contrib/llvm/lib/CodeGen/RegisterScavenging.cpp @@ -31,9 +31,8 @@ using namespace llvm; /// setUsed - Set the register and its sub-registers as being used. void RegScavenger::setUsed(unsigned Reg) { - RegsAvailable.reset(Reg); - - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) RegsAvailable.reset(*SubRegs); } @@ -45,8 +44,8 @@ bool RegScavenger::isAliasUsed(unsigned Reg) const { } void RegScavenger::initRegState() { - for (SmallVector::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { + for (SmallVectorImpl::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { I->Reg = 0; I->Restore = NULL; } @@ -105,8 +104,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) { } void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { - BV.set(Reg); - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) + for (MCSubRegIterator SubRegs(Reg, TRI, /*IncludeSelf=*/true); + SubRegs.isValid(); ++SubRegs) BV.set(*SubRegs); } @@ -182,8 +181,8 @@ void RegScavenger::forward() { MachineInstr *MI = MBBI; - for (SmallVector::iterator I = Scavenged.begin(), - IE = Scavenged.end(); I != IE; ++I) { + for (SmallVectorImpl::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { if (I->Restore != MI) continue; @@ -369,7 +368,7 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, // Exclude all the registers being used by the instruction. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) { MachineOperand &MO = I->getOperand(i); - if (MO.isReg() && MO.getReg() != 0 && + if (MO.isReg() && MO.getReg() != 0 && !(MO.isUse() && MO.isUndef()) && !TargetRegisterInfo::isVirtualRegister(MO.getReg())) Candidates.reset(MO.getReg()); } diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp index 07e5b47..75e3790 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAG.cpp @@ -64,8 +64,8 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { /// specified node. bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this depenence, don't add a redundant one. - for (SmallVector::iterator I = Preds.begin(), E = Preds.end(); - I != E; ++I) { + for (SmallVectorImpl::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) { // Zero-latency weak edges may be added purely for heuristic ordering. Don't // add them if another kind of edge already exists. if (!Required && I->getSUnit() == D.getSUnit()) @@ -77,7 +77,7 @@ bool SUnit::addPred(const SDep &D, bool Required) { // Find the corresponding successor in N. SDep ForwardD = *I; ForwardD.setSUnit(this); - for (SmallVector::iterator II = PredSU->Succs.begin(), + for (SmallVectorImpl::iterator II = PredSU->Succs.begin(), EE = PredSU->Succs.end(); II != EE; ++II) { if (*II == ForwardD) { II->setLatency(D.getLatency()); @@ -132,8 +132,8 @@ bool SUnit::addPred(const SDep &D, bool Required) { /// the specified node. void SUnit::removePred(const SDep &D) { // Find the matching predecessor. - for (SmallVector::iterator I = Preds.begin(), E = Preds.end(); - I != E; ++I) + for (SmallVectorImpl::iterator I = Preds.begin(), E = Preds.end(); + I != E; ++I) if (*I == D) { // Find the corresponding successor in N. SDep P = D; diff --git a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp index e4da6a4..7f1f9c4 100644 --- a/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/contrib/llvm/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -36,6 +36,8 @@ #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" +#include + using namespace llvm; static cl::opt EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, @@ -98,7 +100,7 @@ static void getUnderlyingObjects(const Value *V, SmallVector Objs; GetUnderlyingObjects(const_cast(V), Objs); - for (SmallVector::iterator I = Objs.begin(), IE = Objs.end(); + for (SmallVectorImpl::iterator I = Objs.begin(), IE = Objs.end(); I != IE; ++I) { V = *I; if (!Visited.insert(V)) @@ -116,12 +118,15 @@ static void getUnderlyingObjects(const Value *V, } while (!Working.empty()); } +typedef SmallVector, 4> +UnderlyingObjectsVector; + /// getUnderlyingObjectsForInstr - If this machine instr has memory reference /// information and it can be tracked to a normal reference to a known /// object, return the Value for that object. static void getUnderlyingObjectsForInstr(const MachineInstr *MI, - const MachineFrameInfo *MFI, - SmallVectorImpl > &Objects) { + const MachineFrameInfo *MFI, + UnderlyingObjectsVector &Objects) { if (!MI->hasOneMemOperand() || !(*MI->memoperands_begin())->getValue() || (*MI->memoperands_begin())->isVolatile()) @@ -134,8 +139,8 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, SmallVector Objs; getUnderlyingObjects(V, Objs); - for (SmallVector::iterator I = Objs.begin(), IE = Objs.end(); - I != IE; ++I) { + for (SmallVectorImpl::iterator I = Objs.begin(), IE = Objs.end(); + I != IE; ++I) { bool MayAlias = true; V = *I; @@ -155,7 +160,7 @@ static void getUnderlyingObjectsForInstr(const MachineInstr *MI, return; } - Objects.push_back(std::make_pair(V, MayAlias)); + Objects.push_back(UnderlyingObjectsVector::value_type(V, MayAlias)); } } @@ -175,14 +180,11 @@ void ScheduleDAGInstrs::finishBlock() { void ScheduleDAGInstrs::enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, - unsigned endcount) { + unsigned regioninstrs) { assert(bb == BB && "startBlock should set BB"); RegionBegin = begin; RegionEnd = end; - EndIndex = endcount; - MISUnitMap.clear(); - - ScheduleDAG::clearDAG(); + NumRegionInstrs = regioninstrs; } /// Close the current scheduling region. Don't clear any state in case the @@ -267,13 +269,10 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { SU->hasPhysRegDefs = true; Dep = SDep(SU, SDep::Data, *Alias); RegUse = UseSU->getInstr(); - Dep.setMinLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, - RegUse, UseOp, /*FindMin=*/true)); } Dep.setLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, - RegUse, UseOp, /*FindMin=*/false)); + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, + UseOp)); ST.adjustSchedDependency(SU, UseSU, Dep); UseSU->addPred(Dep); @@ -310,10 +309,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { DefSU->addPred(SDep(SU, Kind, /*Reg=*/*Alias)); else { SDep Dep(SU, Kind, /*Reg=*/*Alias); - unsigned OutLatency = - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); - Dep.setMinLatency(OutLatency); - Dep.setLatency(OutLatency); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); DefSU->addPred(Dep); } } @@ -389,10 +386,8 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { SUnit *DefSU = DefI->SU; if (DefSU != SU && DefSU != &ExitSU) { SDep Dep(SU, SDep::Output, Reg); - unsigned OutLatency = - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()); - Dep.setMinLatency(OutLatency); - Dep.setLatency(OutLatency); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); DefSU->addPred(Dep); } DefI->SU = SU; @@ -409,9 +404,19 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { MachineInstr *MI = SU->getInstr(); unsigned Reg = MI->getOperand(OperIdx).getReg(); + // Record this local VReg use. + VReg2UseMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, SU)); + // Lookup this operand's reaching definition. assert(LIS && "vreg dependencies requires LiveIntervals"); - LiveRangeQuery LRQ(LIS->getInterval(Reg), LIS->getInstructionIndex(MI)); + LiveQueryResult LRQ + = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI)); VNInfo *VNI = LRQ.valueIn(); // VNI will be valid because MachineOperand::readsReg() is checked by caller. @@ -427,10 +432,7 @@ void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { // Adjust the dependence latency using operand def/use information, then // allow the target to perform its own adjustments. int DefOp = Def->findRegisterDefOperandIdx(Reg); - dep.setLatency( - SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, false)); - dep.setMinLatency( - SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx, true)); + dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); const TargetSubtargetInfo &ST = TM.getSubtarget(); ST.adjustSchedDependency(DefSU, SU, const_cast(dep)); @@ -472,8 +474,8 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, SmallVector Objs; getUnderlyingObjects(V, Objs); - for (SmallVector::iterator I = Objs.begin(), - IE = Objs.end(); I != IE; ++I) { + for (SmallVectorImpl::iterator I = Objs.begin(), + IE = Objs.end(); I != IE; ++I) { V = *I; if (const PseudoSourceValue *PSV = dyn_cast(V)) { @@ -642,8 +644,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, bool isNormalMemory = false) { // If this is a false dependency, // do not add the edge, but rememeber the rejected node. - if (!EnableAASchedMI || - MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { + if (!AA || MIsNeedChainEdge(AA, MFI, SUa->getInstr(), SUb->getInstr())) { SDep Dep(SUa, isNormalMemory ? SDep::MayAliasMem : SDep::Barrier); Dep.setLatency(TrueMemOrderLatency); SUb->addPred(Dep); @@ -671,7 +672,7 @@ void addChainDependency (AliasAnalysis *AA, const MachineFrameInfo *MFI, void ScheduleDAGInstrs::initSUnits() { // We'll be allocating one SUnit for each real instruction in the region, // which is contained within a basic block. - SUnits.reserve(BB->size()); + SUnits.reserve(NumRegionInstrs); for (MachineBasicBlock::iterator I = RegionBegin; I != RegionEnd; ++I) { MachineInstr *MI = I; @@ -693,10 +694,22 @@ void ScheduleDAGInstrs::initSUnits() { /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, - RegPressureTracker *RPTracker) { + RegPressureTracker *RPTracker, + PressureDiffs *PDiffs) { + const TargetSubtargetInfo &ST = TM.getSubtarget(); + bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI + : ST.useAA(); + AliasAnalysis *AAForDep = UseAA ? AA : 0; + + MISUnitMap.clear(); + ScheduleDAG::clearDAG(); + // Create an SUnit for each real instruction. initSUnits(); + if (PDiffs) + PDiffs->init(SUnits.size()); + // We build scheduling units by walking a block's instruction list from bottom // to top. @@ -722,10 +735,9 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Uses.setUniverse(TRI->getNumRegs()); assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); - // FIXME: Allow SparseSet to reserve space for the creation of virtual - // registers during scheduling. Don't artificially inflate the Universe - // because we want to assert that vregs are not created during DAG building. + VRegUses.clear(); VRegDefs.setUniverse(MRI.getNumVirtRegs()); + VRegUses.setUniverse(MRI.getNumVirtRegs()); // Model data dependencies between instructions being scheduled and the // ExitSU. @@ -745,17 +757,18 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, DbgMI = MI; continue; } + SUnit *SU = MISUnitMap[MI]; + assert(SU && "No SUnit mapped to this MI"); + if (RPTracker) { - RPTracker->recede(); + PressureDiff *PDiff = PDiffs ? &(*PDiffs)[SU->NodeNum] : 0; + RPTracker->recede(/*LiveUses=*/0, PDiff); assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); } assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && "Cannot schedule terminators or labels!"); - SUnit *SU = MISUnitMap[MI]; - assert(SU && "No SUnit mapped to this MI"); - // Add register-based dependencies (data, anti, and output). bool HasVRegDef = false; for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { @@ -833,20 +846,20 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, unsigned ChainLatency = 0; if (AliasChain->getInstr()->mayLoad()) ChainLatency = TrueMemOrderLatency; - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes, ChainLatency); } AliasChain = SU; for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); for (MapVector::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); for (MapVector >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) - addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, I->second[i], RejectMemNodes, TrueMemOrderLatency); } adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, @@ -855,7 +868,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, AliasMemDefs.clear(); AliasMemUses.clear(); } else if (MI->mayStore()) { - SmallVector, 4> Objs; + UnderlyingObjectsVector Objs; getUnderlyingObjectsForInstr(MI, MFI, Objs); if (Objs.empty()) { @@ -864,10 +877,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, } bool MayAlias = false; - for (SmallVector, 4>::iterator - K = Objs.begin(), KE = Objs.end(); K != KE; ++K) { - const Value *V = K->first; - bool ThisMayAlias = K->second; + for (UnderlyingObjectsVector::iterator K = Objs.begin(), KE = Objs.end(); + K != KE; ++K) { + const Value *V = K->getPointer(); + bool ThisMayAlias = K->getInt(); if (ThisMayAlias) MayAlias = true; @@ -879,7 +892,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MapVector::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, + 0, true); I->second = SU; } else { if (ThisMayAlias) @@ -894,7 +908,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) - addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, J->second[i], RejectMemNodes, TrueMemOrderLatency, true); J->second.clear(); } @@ -903,11 +917,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // Add dependencies from all the PendingLoads, i.e. loads // with no underlying object. for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + addChainDependency(AAForDep, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); // Add dependence on alias chain, if needed. if (AliasChain) - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); // But we also should check dependent instructions for the // SU in question. adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, @@ -929,7 +943,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else { - SmallVector, 4> Objs; + UnderlyingObjectsVector Objs; getUnderlyingObjectsForInstr(MI, MFI, Objs); if (Objs.empty()) { @@ -937,7 +951,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // potentially aliasing stores. for (MapVector::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; @@ -945,10 +959,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MayAlias = false; } - for (SmallVector, 4>::iterator + for (UnderlyingObjectsVector::iterator J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { - const Value *V = J->first; - bool ThisMayAlias = J->second; + const Value *V = J->getPointer(); + bool ThisMayAlias = J->getInt(); if (ThisMayAlias) MayAlias = true; @@ -959,7 +973,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, MapVector::iterator IE = ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + addChainDependency(AAForDep, MFI, SU, I->second, RejectMemNodes, + 0, true); if (ThisMayAlias) AliasMemUses[V].push_back(SU); else @@ -969,7 +984,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); // Add dependencies on alias and barrier chains, if needed. if (MayAlias && AliasChain) - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + addChainDependency(AAForDep, MFI, SU, AliasChain, RejectMemNodes); if (BarrierChain) BarrierChain->addPred(SDep(SU, SDep::Barrier)); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2e09ec0..43f72c5 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -35,6 +35,8 @@ #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include using namespace llvm; @@ -43,6 +45,7 @@ STATISTIC(PreIndexedNodes , "Number of pre-indexed nodes created"); STATISTIC(PostIndexedNodes, "Number of post-indexed nodes created"); STATISTIC(OpsNarrowed , "Number of load/op/store narrowed"); STATISTIC(LdStFP2Int , "Number of fp load/store pairs transformed to int"); +STATISTIC(SlicedLoads, "Number of load sliced"); namespace { static cl::opt @@ -53,6 +56,14 @@ namespace { CombinerGlobalAA("combiner-global-alias-analysis", cl::Hidden, cl::desc("Include global information in alias analysis")); + /// Hidden option to stress test load slicing, i.e., when this option + /// is enabled, load slicing bypasses most of its profitability guards. + static cl::opt + StressLoadSlicing("combiner-stress-load-slicing", cl::Hidden, + cl::desc("Bypass the profitability model of load " + "slicing"), + cl::init(false)); + //------------------------------ DAGCombiner ---------------------------------// class DAGCombiner { @@ -62,6 +73,7 @@ namespace { CodeGenOpt::Level OptLevel; bool LegalOperations; bool LegalTypes; + bool ForCodeSize; // Worklist of all of the nodes that need to be simplified. // @@ -144,6 +156,7 @@ namespace { bool CombineToPreIndexedLoadStore(SDNode *N); bool CombineToPostIndexedLoadStore(SDNode *N); + bool SliceUpLoad(SDNode *N); void ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad); SDValue PromoteOperand(SDValue Op, EVT PVT, bool &Replace); @@ -154,8 +167,8 @@ namespace { SDValue PromoteExtend(SDValue Op); bool PromoteLoad(SDValue Op); - void ExtendSetCCUses(SmallVector SetCCs, - SDValue Trunc, SDValue ExtLoad, DebugLoc DL, + void ExtendSetCCUses(const SmallVectorImpl &SetCCs, + SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType); /// combine - call the node-specific routine that knows how to fold each @@ -246,18 +259,18 @@ namespace { SDValue visitVECTOR_SHUFFLE(SDNode *N); SDValue XformToShuffleWithZero(SDNode *N); - SDValue ReassociateOps(unsigned Opc, DebugLoc DL, SDValue LHS, SDValue RHS); + SDValue ReassociateOps(unsigned Opc, SDLoc DL, SDValue LHS, SDValue RHS); SDValue visitShiftByConstant(SDNode *N, unsigned Amt); bool SimplifySelectOps(SDNode *SELECT, SDValue LHS, SDValue RHS); SDValue SimplifyBinOpWithSameOpcodeHands(SDNode *N); - SDValue SimplifySelect(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2); - SDValue SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue N2, + SDValue SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2); + SDValue SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare = false); SDValue SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - DebugLoc DL, bool foldBooleans = true); + SDLoc DL, bool foldBooleans = true); SDValue SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, unsigned HiOp); SDValue CombineConsecutiveLoads(SDNode *N, EVT VT); @@ -267,7 +280,7 @@ namespace { SDValue MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, bool DemandHighBits = true); SDValue MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1); - SDNode *MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL); + SDNode *MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL); SDValue ReduceLoadWidth(SDNode *N); SDValue ReduceLoadOpStoreWidth(SDNode *N); SDValue TransformFPLoadStorePair(SDNode *N); @@ -279,15 +292,15 @@ namespace { /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void GatherAllAliases(SDNode *N, SDValue OriginalChain, - SmallVector &Aliases); + SmallVectorImpl &Aliases); /// isAlias - Return true if there is any possibility that the two addresses /// overlap. - bool isAlias(SDValue Ptr1, int64_t Size1, + bool isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const; @@ -299,7 +312,7 @@ namespace { /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlignment, const MDNode *&TBAAInfo) const; @@ -315,8 +328,15 @@ namespace { public: DAGCombiner(SelectionDAG &D, AliasAnalysis &A, CodeGenOpt::Level OL) - : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), - OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) {} + : DAG(D), TLI(D.getTargetLoweringInfo()), Level(BeforeLegalizeTypes), + OptLevel(OL), LegalOperations(false), LegalTypes(false), AA(A) { + AttributeSet FnAttrs = + DAG.getMachineFunction().getFunction()->getAttributes(); + ForCodeSize = + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || + FnAttrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::MinSize); + } /// Run - runs the dag combiner on all nodes in the work list void Run(CombineLevel AtLevel); @@ -326,7 +346,11 @@ namespace { /// getShiftAmountTy - Returns a type large enough to hold any valid /// shift amount - before type legalization these can be huge. EVT getShiftAmountTy(EVT LHSTy) { - return LegalTypes ? TLI.getShiftAmountTy(LHSTy) : TLI.getPointerTy(); + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + if (LHSTy.isVector()) + return LHSTy; + return LegalTypes ? TLI.getScalarShiftAmountTy(LHSTy) + : TLI.getPointerTy(); } /// isTypeLegal - This method returns true if we are running before type @@ -335,6 +359,12 @@ namespace { if (!LegalTypes) return true; return TLI.isTypeLegal(VT); } + + /// getSetCCResultType - Convenience wrapper around + /// TargetLowering::getSetCCResultType + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(*DAG.getContext(), VT); + } }; } @@ -482,12 +512,12 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); // fold (fneg (fadd A, B)) -> (fsub (fneg B), A) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1), Op.getOperand(0)); @@ -501,7 +531,7 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, return Op.getOperand(1); // fold (fneg (fsub A, B)) -> (fsub B, A) - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(0)); case ISD::FMUL: @@ -512,24 +542,24 @@ static SDValue GetNegatedExpression(SDValue Op, SelectionDAG &DAG, if (isNegatibleForFree(Op.getOperand(0), LegalOperations, DAG.getTargetLoweringInfo(), &DAG.getTarget().Options, Depth+1)) - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); // fold (fneg (fmul X, Y)) -> (fmul X, (fneg Y)) - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), Op.getOperand(0), GetNegatedExpression(Op.getOperand(1), DAG, LegalOperations, Depth+1)); case ISD::FP_EXTEND: case ISD::FSIN: - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(Op.getOpcode(), SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1)); case ISD::FP_ROUND: - return DAG.getNode(ISD::FP_ROUND, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FP_ROUND, SDLoc(Op), Op.getValueType(), GetNegatedExpression(Op.getOperand(0), DAG, LegalOperations, Depth+1), Op.getOperand(1)); @@ -573,7 +603,7 @@ static bool isOneUseSetCC(SDValue N) { return false; } -SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, +SDValue DAGCombiner::ReassociateOps(unsigned Opc, SDLoc DL, SDValue N0, SDValue N1) { EVT VT = N0.getValueType(); if (N0.getOpcode() == Opc && isa(N0.getOperand(1))) { @@ -587,7 +617,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, } if (N0.hasOneUse()) { // reassoc. (op (op x, c1), y) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorkList(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N0.getOperand(1)); @@ -605,7 +635,7 @@ SDValue DAGCombiner::ReassociateOps(unsigned Opc, DebugLoc DL, } if (N1.hasOneUse()) { // reassoc. (op y, (op x, c1)) -> (op (op x, y), c1) iff x+c1 has one use - SDValue OpNode = DAG.getNode(Opc, N0.getDebugLoc(), VT, + SDValue OpNode = DAG.getNode(Opc, SDLoc(N0), VT, N1.getOperand(0), N0); AddToWorkList(OpNode.getNode()); return DAG.getNode(Opc, DL, VT, OpNode, N1.getOperand(1)); @@ -706,7 +736,7 @@ bool DAGCombiner::SimplifyDemandedBits(SDValue Op, const APInt &Demanded) { } void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { - DebugLoc dl = Load->getDebugLoc(); + SDLoc dl(Load); EVT VT = Load->getValueType(0); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, dl, VT, SDValue(ExtLoad, 0)); @@ -725,7 +755,7 @@ void DAGCombiner::ReplaceLoadWithPromotedLoad(SDNode *Load, SDNode *ExtLoad) { SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = false; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (LoadSDNode *LD = dyn_cast(Op)) { EVT MemVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(LD) @@ -735,9 +765,7 @@ SDValue DAGCombiner::PromoteOperand(SDValue Op, EVT PVT, bool &Replace) { Replace = true; return DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); } unsigned Opc = Op.getOpcode(); @@ -767,7 +795,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { if (!TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, PVT)) return SDValue(); EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (NewOp.getNode() == 0) @@ -782,7 +810,7 @@ SDValue DAGCombiner::SExtPromoteOperand(SDValue Op, EVT PVT) { SDValue DAGCombiner::ZExtPromoteOperand(SDValue Op, EVT PVT) { EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); bool Replace = false; SDValue NewOp = PromoteOperand(Op, PVT, Replace); if (NewOp.getNode() == 0) @@ -845,7 +873,7 @@ SDValue DAGCombiner::PromoteIntBinOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Opc, dl, PVT, NN0, NN1)); } @@ -892,7 +920,7 @@ SDValue DAGCombiner::PromoteIntShiftOp(SDValue Op) { DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ISD::TRUNCATE, dl, VT, DAG.getNode(Opc, dl, PVT, N0, Op.getOperand(1))); } @@ -923,7 +951,7 @@ SDValue DAGCombiner::PromoteExtend(SDValue Op) { // fold (aext (sext x)) -> (sext x) DEBUG(dbgs() << "\nPromoting "; Op.getNode()->dump(&DAG)); - return DAG.getNode(Op.getOpcode(), Op.getDebugLoc(), VT, Op.getOperand(0)); + return DAG.getNode(Op.getOpcode(), SDLoc(Op), VT, Op.getOperand(0)); } return SDValue(); } @@ -948,7 +976,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { if (TLI.IsDesirableToPromoteOp(Op, PVT)) { assert(PVT != VT && "Don't know what type to promote to!"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDNode *N = Op.getNode(); LoadSDNode *LD = cast(N); EVT MemVT = LD->getMemoryVT(); @@ -958,9 +986,7 @@ bool DAGCombiner::PromoteLoad(SDValue Op) { : LD->getExtensionType(); SDValue NewLD = DAG.getExtLoad(ExtType, dl, PVT, LD->getChain(), LD->getBasePtr(), - LD->getPointerInfo(), - MemVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + MemVT, LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::TRUNCATE, dl, VT, NewLD); DEBUG(dbgs() << "\nPromoting "; @@ -1008,7 +1034,8 @@ void DAGCombiner::Run(CombineLevel AtLevel) { // try and combine it. while (!WorkListContents.empty()) { SDNode *N; - // The WorkListOrder holds the SDNodes in order, but it may contain duplicates. + // The WorkListOrder holds the SDNodes in order, but it may contain + // duplicates. // In order to avoid a linear scan, we use a set (O(log N)) to hold what the // worklist *should* contain, and check the node we want to visit is should // actually be visited. @@ -1245,7 +1272,7 @@ static SDValue getInputChainForNode(SDNode *N) { if (unsigned NumOps = N->getNumOperands()) { if (N->getOperand(0).getValueType() == MVT::Other) return N->getOperand(0); - else if (N->getOperand(NumOps-1).getValueType() == MVT::Other) + if (N->getOperand(NumOps-1).getValueType() == MVT::Other) return N->getOperand(NumOps-1); for (unsigned i = 1; i < NumOps-1; ++i) if (N->getOperand(i).getValueType() == MVT::Other) @@ -1320,7 +1347,7 @@ SDValue DAGCombiner::visitTokenFactor(SDNode *N) { Result = DAG.getEntryNode(); } else { // New and improved token factor. - Result = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + Result = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, &Ops[0], Ops.size()); } @@ -1350,7 +1377,7 @@ SDValue DAGCombiner::visitMERGE_VALUES(SDNode *N) { } static -SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, +SDValue combineShlAddConstant(SDLoc DL, SDValue N0, SDValue N1, SelectionDAG &DAG) { EVT VT = N0.getValueType(); SDValue N00 = N0.getOperand(0); @@ -1360,10 +1387,10 @@ SDValue combineShlAddConstant(DebugLoc DL, SDValue N0, SDValue N1, if (N01C && N00.getOpcode() == ISD::ADD && N00.getNode()->hasOneUse() && isa(N00.getOperand(1))) { // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, N0); // fold (add x, 0) -> x if (N1C && N1C->isNullValue()) return N0; @@ -1408,28 +1435,28 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (GlobalAddressSDNode *GA = dyn_cast(N0)) if (!LegalOperations && TLI.isOffsetFoldingLegal(GA) && N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() + (uint64_t)N1C->getSExtValue()); // fold ((c1-A)+c2) -> (c1+c2)-A if (N1C && N0.getOpcode() == ISD::SUB) if (ConstantSDNode *N0C = dyn_cast(N0.getOperand(0))) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(N1C->getAPIntValue()+ N0C->getAPIntValue(), VT), N0.getOperand(1)); // reassociate add - SDValue RADD = ReassociateOps(ISD::ADD, N->getDebugLoc(), N0, N1); + SDValue RADD = ReassociateOps(ISD::ADD, SDLoc(N), N0, N1); if (RADD.getNode() != 0) return RADD; // fold ((0-A) + B) -> B-A if (N0.getOpcode() == ISD::SUB && isa(N0.getOperand(0)) && cast(N0.getOperand(0))->isNullValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, N0.getOperand(1)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, N0.getOperand(1)); // fold (A + (0-B)) -> A-B if (N1.getOpcode() == ISD::SUB && isa(N1.getOperand(0)) && cast(N1.getOperand(0))->isNullValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1.getOperand(1)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1.getOperand(1)); // fold (A+(B-A)) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(1)) return N1.getOperand(0); @@ -1439,18 +1466,18 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // fold (A+(B-(A+C))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(0)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1).getOperand(1)); // fold (A+(B-(C+A))) to (B-C) if (N1.getOpcode() == ISD::SUB && N1.getOperand(1).getOpcode() == ISD::ADD && N0 == N1.getOperand(1).getOperand(1)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1.getOperand(0), + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1).getOperand(0)); // fold (A+((B-A)+or-C)) to (B+or-C) if ((N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::ADD) && N1.getOperand(0).getOpcode() == ISD::SUB && N0 == N1.getOperand(0).getOperand(1)) - return DAG.getNode(N1.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N1.getOpcode(), SDLoc(N), VT, N1.getOperand(0).getOperand(0), N1.getOperand(1)); // fold (A-B)+(C-D) to (A+C)-(B+D) when A or C is constant @@ -1461,9 +1488,9 @@ SDValue DAGCombiner::visitADD(SDNode *N) { SDValue N11 = N1.getOperand(1); if (isa(N00) || isa(N10)) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, - DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, N00, N10), - DAG.getNode(ISD::ADD, N1.getDebugLoc(), VT, N01, N11)); + return DAG.getNode(ISD::SUB, SDLoc(N), VT, + DAG.getNode(ISD::ADD, SDLoc(N0), VT, N00, N10), + DAG.getNode(ISD::ADD, SDLoc(N1), VT, N01, N11)); } if (!VT.isVector() && SimplifyDemandedBits(SDValue(N, 0))) @@ -1481,17 +1508,17 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1); } } // fold (add (shl (add x, c1), c2), ) -> (add (add (shl x, c2), c1<hasOneUse()) { - SDValue Result = combineShlAddConstant(N->getDebugLoc(), N0, N1, DAG); + SDValue Result = combineShlAddConstant(SDLoc(N), N0, N1, DAG); if (Result.getNode()) return Result; } if (N1.getOpcode() == ISD::SHL && N1.getNode()->hasOneUse()) { - SDValue Result = combineShlAddConstant(N->getDebugLoc(), N1, N0, DAG); + SDValue Result = combineShlAddConstant(SDLoc(N), N1, N0, DAG); if (Result.getNode()) return Result; } @@ -1501,8 +1528,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (ConstantSDNode *C = dyn_cast(N1.getOperand(0).getOperand(0))) if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N1.getOperand(0).getOperand(1), N1.getOperand(1))); if (N0.getOpcode() == ISD::SHL && @@ -1510,8 +1537,8 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (ConstantSDNode *C = dyn_cast(N0.getOperand(0).getOperand(0))) if (C->getAPIntValue() == 0) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N1, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N1, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0).getOperand(1), N0.getOperand(1))); @@ -1524,7 +1551,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { // (add z, (and (sbbl x, x), 1)) -> (sub z, (sbbl x, x)) // and similar xforms where the inner op is either ~0 or 0. if (NumSignBits == DestBits && AndOp1 && AndOp1->isOne()) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); return DAG.getNode(ISD::SUB, DL, VT, N->getOperand(0), AndOp0); } } @@ -1533,7 +1560,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (N0.getOpcode() == ISD::SIGN_EXTEND && N0.getOperand(0).getValueType() == MVT::i1 && !TLI.isOperationLegal(ISD::SIGN_EXTEND, MVT::i1)) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue ZExt = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, N0.getOperand(0)); return DAG.getNode(ISD::SUB, DL, VT, N1, ZExt); } @@ -1550,18 +1577,18 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // If the flag result is dead, turn this into an ADD. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, N1), + return CombineTo(N, DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); // canonicalize constant to RHS. if (N0C && !N1C) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N1, N0); + return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N1, N0); // fold (addc x, 0) -> x + no carry out if (N1C && N1C->isNullValue()) return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); // fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits. APInt LHSZero, LHSOne; @@ -1574,9 +1601,9 @@ SDValue DAGCombiner::visitADDC(SDNode *N) { // If all possibly-set bits on the LHS are clear on the RHS, return an OR. // If all possibly-set bits on the RHS are clear on the LHS, return an OR. if ((RHSZero & ~LHSZero) == ~LHSZero || (LHSZero & ~RHSZero) == ~RHSZero) - return CombineTo(N, DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N1), + return CombineTo(N, DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N1), DAG.getNode(ISD::CARRY_FALSE, - N->getDebugLoc(), MVT::Glue)); + SDLoc(N), MVT::Glue)); } return SDValue(); @@ -1591,30 +1618,25 @@ SDValue DAGCombiner::visitADDE(SDNode *N) { // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::ADDE, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::ADDE, SDLoc(N), N->getVTList(), N1, N0, CarryIn); // fold (adde x, y, false) -> (addc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::ADDC, N->getDebugLoc(), N->getVTList(), N0, N1); + return DAG.getNode(ISD::ADDC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } // Since it may not be valid to emit a fold to zero for vector initializers // check if we can before folding. -static SDValue tryFoldToZero(DebugLoc DL, const TargetLowering &TLI, EVT VT, - SelectionDAG &DAG, bool LegalOperations) { - if (!VT.isVector()) { +static SDValue tryFoldToZero(SDLoc DL, const TargetLowering &TLI, EVT VT, + SelectionDAG &DAG, + bool LegalOperations, bool LegalTypes) { + if (!VT.isVector()) + return DAG.getConstant(0, VT); + if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) return DAG.getConstant(0, VT); - } - if (!LegalOperations || TLI.isOperationLegal(ISD::BUILD_VECTOR, VT)) { - // Produce a vector of zeros. - SDValue El = DAG.getConstant(0, VT.getVectorElementType()); - std::vector Ops(VT.getVectorNumElements(), El); - return DAG.getNode(ISD::BUILD_VECTOR, DL, VT, - &Ops[0], Ops.size()); - } return SDValue(); } @@ -1640,17 +1662,17 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { // fold (sub x, x) -> 0 // FIXME: Refactor this and xor and other similar operations together. if (N0 == N1) - return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // fold (sub c1, c2) -> c1-c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SUB, VT, N0C, N1C); // fold (sub x, c) -> (add x, -c) if (N1C) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, DAG.getConstant(-N1C->getAPIntValue(), VT)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) if (N0C && N0C->isAllOnesValue()) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold A-(A-B) -> B if (N1.getOpcode() == ISD::SUB && N0 == N1.getOperand(0)) return N1.getOperand(1); @@ -1664,7 +1686,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (N1.getOpcode() == ISD::ADD && N0C && N1C1) { SDValue NewC = DAG.getConstant(N0C->getAPIntValue() - N1C1->getAPIntValue(), VT); - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, NewC, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, NewC, N1.getOperand(0)); } // fold ((A+(B+or-C))-B) -> A+or-C @@ -1672,19 +1694,19 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { (N0.getOperand(1).getOpcode() == ISD::SUB || N0.getOperand(1).getOpcode() == ISD::ADD) && N0.getOperand(1).getOperand(0) == N1) - return DAG.getNode(N0.getOperand(1).getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOperand(1).getOpcode(), SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(1)); // fold ((A+(C+B))-B) -> A+C if (N0.getOpcode() == ISD::ADD && N0.getOperand(1).getOpcode() == ISD::ADD && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, + return DAG.getNode(ISD::ADD, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // fold ((A-(B-C))-C) -> A-B if (N0.getOpcode() == ISD::SUB && N0.getOperand(1).getOpcode() == ISD::SUB && N0.getOperand(1).getOperand(1) == N1) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1).getOperand(0)); // If either operand of a sub is undef, the result is undef @@ -1698,7 +1720,7 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (!LegalOperations && TLI.isOffsetFoldingLegal(GA)) { // fold (sub Sym, c) -> Sym-c if (N1C && GA->getOpcode() == ISD::GlobalAddress) - return DAG.getGlobalAddress(GA->getGlobal(), N1C->getDebugLoc(), VT, + return DAG.getGlobalAddress(GA->getGlobal(), SDLoc(N1C), VT, GA->getOffset() - (uint64_t)N1C->getSExtValue()); // fold (sub Sym+c1, Sym+c2) -> c1-c2 @@ -1720,25 +1742,25 @@ SDValue DAGCombiner::visitSUBC(SDNode *N) { // If the flag result is dead, turn this into an SUB. if (!N->hasAnyUseOfValue(1)) - return CombineTo(N, DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, N1), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, N1), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // fold (subc x, x) -> 0 + no borrow if (N0 == N1) return CombineTo(N, DAG.getConstant(0, VT), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // fold (subc x, 0) -> x + no borrow if (N1C && N1C->isNullValue()) - return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, N0, DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); // Canonicalize (sub -1, x) -> ~x, i.e. (xor x, -1) + no borrow if (N0C && N0C->isAllOnesValue()) - return CombineTo(N, DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0), - DAG.getNode(ISD::CARRY_FALSE, N->getDebugLoc(), + return CombineTo(N, DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0), + DAG.getNode(ISD::CARRY_FALSE, SDLoc(N), MVT::Glue)); return SDValue(); @@ -1751,63 +1773,102 @@ SDValue DAGCombiner::visitSUBE(SDNode *N) { // fold (sube x, y, false) -> (subc x, y) if (CarryIn.getOpcode() == ISD::CARRY_FALSE) - return DAG.getNode(ISD::SUBC, N->getDebugLoc(), N->getVTList(), N0, N1); + return DAG.getNode(ISD::SUBC, SDLoc(N), N->getVTList(), N0, N1); return SDValue(); } +/// isConstantSplatVector - Returns true if N is a BUILD_VECTOR node whose +/// elements are all the same constant or undefined. +static bool isConstantSplatVector(SDNode *N, APInt& SplatValue) { + BuildVectorSDNode *C = dyn_cast(N); + if (!C) + return false; + + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + EVT EltVT = N->getValueType(0).getVectorElementType(); + return (C->isConstantSplat(SplatValue, SplatUndef, SplatBitSize, + HasAnyUndefs) && + EltVT.getSizeInBits() >= SplatBitSize); +} + SDValue DAGCombiner::visitMUL(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); - ConstantSDNode *N0C = dyn_cast(N0); - ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N0.getValueType(); + // fold (mul x, undef) -> 0 + if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) + return DAG.getConstant(0, VT); + + bool N0IsConst = false; + bool N1IsConst = false; + APInt ConstValue0, ConstValue1; // fold vector ops if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + N0IsConst = isConstantSplatVector(N0.getNode(), ConstValue0); + N1IsConst = isConstantSplatVector(N1.getNode(), ConstValue1); + } else { + N0IsConst = dyn_cast(N0) != 0; + ConstValue0 = N0IsConst ? (dyn_cast(N0))->getAPIntValue() + : APInt(); + N1IsConst = dyn_cast(N1) != 0; + ConstValue1 = N1IsConst ? (dyn_cast(N1))->getAPIntValue() + : APInt(); } - // fold (mul x, undef) -> 0 - if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) - return DAG.getConstant(0, VT); // fold (mul c1, c2) -> c1*c2 - if (N0C && N1C) - return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0C, N1C); + if (N0IsConst && N1IsConst) + return DAG.FoldConstantArithmetic(ISD::MUL, VT, N0.getNode(), N1.getNode()); + // canonicalize constant to RHS - if (N0C && !N1C) - return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, N1, N0); + if (N0IsConst && !N1IsConst) + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N1, N0); // fold (mul x, 0) -> 0 - if (N1C && N1C->isNullValue()) + if (N1IsConst && ConstValue1 == 0) return N1; + // We require a splat of the entire scalar bit width for non-contiguous + // bit patterns. + bool IsFullSplat = + ConstValue1.getBitWidth() == VT.getScalarType().getSizeInBits(); + // fold (mul x, 1) -> x + if (N1IsConst && ConstValue1 == 1 && IsFullSplat) + return N0; // fold (mul x, -1) -> 0-x - if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + if (N1IsConst && ConstValue1.isAllOnesValue()) + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // fold (mul x, (1 << c)) -> x << c - if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, - DAG.getConstant(N1C->getAPIntValue().logBase2(), + if (N1IsConst && ConstValue1.isPowerOf2() && IsFullSplat) + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, + DAG.getConstant(ConstValue1.logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (mul x, -(1 << c)) -> -(x << c) or (-x) << c - if (N1C && (-N1C->getAPIntValue()).isPowerOf2()) { - unsigned Log2Val = (-N1C->getAPIntValue()).logBase2(); + if (N1IsConst && (-ConstValue1).isPowerOf2() && IsFullSplat) { + unsigned Log2Val = (-ConstValue1).logBase2(); // FIXME: If the input is something that is easily negated (e.g. a // single-use add), we should put the negate there. - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, + DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, DAG.getConstant(Log2Val, getShiftAmountTy(N0.getValueType())))); } + + APInt Val; // (mul (shl X, c1), c2) -> (mul X, c2 << c1) - if (N1C && N0.getOpcode() == ISD::SHL && - isa(N0.getOperand(1))) { - SDValue C3 = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + if (N1IsConst && N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa(N0.getOperand(1)))) { + SDValue C3 = DAG.getNode(ISD::SHL, SDLoc(N), VT, N1, N0.getOperand(1)); AddToWorkList(C3.getNode()); - return DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::MUL, SDLoc(N), VT, N0.getOperand(0), C3); } @@ -1816,7 +1877,9 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { { SDValue Sh(0,0), Y(0,0); // Check for both (mul (shl X, C), Y) and (mul Y, (shl X, C)). - if (N0.getOpcode() == ISD::SHL && isa(N0.getOperand(1)) && + if (N0.getOpcode() == ISD::SHL && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa(N0.getOperand(1))) && N0.getNode()->hasOneUse()) { Sh = N0; Y = N1; } else if (N1.getOpcode() == ISD::SHL && @@ -1826,24 +1889,25 @@ SDValue DAGCombiner::visitMUL(SDNode *N) { } if (Sh.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, Sh.getOperand(0), Y); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SHL, SDLoc(N), VT, Mul, Sh.getOperand(1)); } } // fold (mul (add x, c1), c2) -> (add (mul x, c2), c1*c2) - if (N1C && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && - isa(N0.getOperand(1))) - return DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, - DAG.getNode(ISD::MUL, N0.getDebugLoc(), VT, + if (N1IsConst && N0.getOpcode() == ISD::ADD && N0.getNode()->hasOneUse() && + (isConstantSplatVector(N0.getOperand(1).getNode(), Val) || + isa(N0.getOperand(1)))) + return DAG.getNode(ISD::ADD, SDLoc(N), VT, + DAG.getNode(ISD::MUL, SDLoc(N0), VT, N0.getOperand(0), N1), - DAG.getNode(ISD::MUL, N1.getDebugLoc(), VT, + DAG.getNode(ISD::MUL, SDLoc(N1), VT, N0.getOperand(1), N1)); // reassociate mul - SDValue RMUL = ReassociateOps(ISD::MUL, N->getDebugLoc(), N0, N1); + SDValue RMUL = ReassociateOps(ISD::MUL, SDLoc(N), N0, N1); if (RMUL.getNode() != 0) return RMUL; @@ -1871,13 +1935,13 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return N0; // fold (sdiv X, -1) -> 0-X if (N1C && N1C->isAllOnesValue()) - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), N0); // If we know the sign bits of both operands are zero, strength reduce to a // udiv instead. Handles (X&15) /s 4 -> X&15 >> 2 if (!VT.isVector()) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UDIV, N->getDebugLoc(), N1.getValueType(), + return DAG.getNode(ISD::UDIV, SDLoc(N), N1.getValueType(), N0, N1); } // fold (sdiv X, pow2) -> simple ops after legalize @@ -1892,19 +1956,19 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { unsigned lg2 = N1C->getAPIntValue().countTrailingZeros(); // Splat the sign bit into the register - SDValue SGN = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, + SDValue SGN = DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, DAG.getConstant(VT.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); AddToWorkList(SGN.getNode()); // Add (N0 < 0) ? abs2 - 1 : 0; - SDValue SRL = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, SGN, + SDValue SRL = DAG.getNode(ISD::SRL, SDLoc(N), VT, SGN, DAG.getConstant(VT.getSizeInBits() - lg2, getShiftAmountTy(SGN.getValueType()))); - SDValue ADD = DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N0, SRL); + SDValue ADD = DAG.getNode(ISD::ADD, SDLoc(N), VT, N0, SRL); AddToWorkList(SRL.getNode()); AddToWorkList(ADD.getNode()); // Divide by pow2 - SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, ADD, + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), VT, ADD, DAG.getConstant(lg2, getShiftAmountTy(ADD.getValueType()))); // If we're dividing by a positive value, we're done. Otherwise, we must @@ -1913,7 +1977,7 @@ SDValue DAGCombiner::visitSDIV(SDNode *N) { return SRA; AddToWorkList(SRA.getNode()); - return DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SUB, SDLoc(N), VT, DAG.getConstant(0, VT), SRA); } @@ -1952,7 +2016,7 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::UDIV, VT, N0C, N1C); // fold (udiv x, (1 << c)) -> x >>u c if (N1C && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, DAG.getConstant(N1C->getAPIntValue().logBase2(), getShiftAmountTy(N0.getValueType()))); // fold (udiv x, (shl c, y)) -> x >>u (log2(c)+y) iff c is power of 2 @@ -1960,13 +2024,13 @@ SDValue DAGCombiner::visitUDIV(SDNode *N) { if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { EVT ADDVT = N1.getOperand(1).getValueType(); - SDValue Add = DAG.getNode(ISD::ADD, N->getDebugLoc(), ADDVT, + SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N), ADDVT, N1.getOperand(1), DAG.getConstant(SHC->getAPIntValue() .logBase2(), ADDVT)); AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, Add); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, Add); } } } @@ -2000,19 +2064,19 @@ SDValue DAGCombiner::visitSREM(SDNode *N) { // urem instead. Handles (X & 0x0FFFFFFF) %s 16 -> X&15 if (!VT.isVector()) { if (DAG.SignBitIsZero(N1) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UREM, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::UREM, SDLoc(N), VT, N0, N1); } // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::SDIV, N->getDebugLoc(), VT, N0, N1); + SDValue Div = DAG.getNode(ISD::SDIV, SDLoc(N), VT, N0, N1); AddToWorkList(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); AddToWorkList(Mul.getNode()); return Sub; } @@ -2040,18 +2104,18 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::UREM, VT, N0C, N1C); // fold (urem x, pow2) -> (and x, pow2-1) if (N1C && !N1C->isNullValue() && N1C->getAPIntValue().isPowerOf2()) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, DAG.getConstant(N1C->getAPIntValue()-1,VT)); // fold (urem x, (shl pow2, y)) -> (and x, (add (shl pow2, y), -1)) if (N1.getOpcode() == ISD::SHL) { if (ConstantSDNode *SHC = dyn_cast(N1.getOperand(0))) { if (SHC->getAPIntValue().isPowerOf2()) { SDValue Add = - DAG.getNode(ISD::ADD, N->getDebugLoc(), VT, N1, + DAG.getNode(ISD::ADD, SDLoc(N), VT, N1, DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT)); AddToWorkList(Add.getNode()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, Add); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, Add); } } } @@ -2059,13 +2123,13 @@ SDValue DAGCombiner::visitUREM(SDNode *N) { // If X/C can be simplified by the division-by-constant logic, lower // X%C to the equivalent of X-X/C*C. if (N1C && !N1C->isNullValue()) { - SDValue Div = DAG.getNode(ISD::UDIV, N->getDebugLoc(), VT, N0, N1); + SDValue Div = DAG.getNode(ISD::UDIV, SDLoc(N), VT, N0, N1); AddToWorkList(Div.getNode()); SDValue OptimizedDiv = combine(Div.getNode()); if (OptimizedDiv.getNode() && OptimizedDiv.getNode() != Div.getNode()) { - SDValue Mul = DAG.getNode(ISD::MUL, N->getDebugLoc(), VT, + SDValue Mul = DAG.getNode(ISD::MUL, SDLoc(N), VT, OptimizedDiv, N1); - SDValue Sub = DAG.getNode(ISD::SUB, N->getDebugLoc(), VT, N0, Mul); + SDValue Sub = DAG.getNode(ISD::SUB, SDLoc(N), VT, N0, Mul); AddToWorkList(Mul.getNode()); return Sub; } @@ -2086,14 +2150,14 @@ SDValue DAGCombiner::visitMULHS(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // fold (mulhs x, 0) -> 0 if (N1C && N1C->isNullValue()) return N1; // fold (mulhs x, 1) -> (sra x, size(x)-1) if (N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::SRA, N->getDebugLoc(), N0.getValueType(), N0, + return DAG.getNode(ISD::SRA, SDLoc(N), N0.getValueType(), N0, DAG.getConstant(N0.getValueType().getSizeInBits() - 1, getShiftAmountTy(N0.getValueType()))); // fold (mulhs x, undef) -> 0 @@ -2124,7 +2188,7 @@ SDValue DAGCombiner::visitMULHU(SDNode *N) { SDValue N1 = N->getOperand(1); ConstantSDNode *N1C = dyn_cast(N1); EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // fold (mulhu x, 0) -> 0 if (N1C && N1C->isNullValue()) @@ -2166,7 +2230,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!HiExists && (!LegalOperations || TLI.isOperationLegal(LoOp, N->getValueType(0)))) { - SDValue Res = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + SDValue Res = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->op_begin(), N->getNumOperands()); return CombineTo(N, Res, Res); } @@ -2176,7 +2240,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, if (!LoExists && (!LegalOperations || TLI.isOperationLegal(HiOp, N->getValueType(1)))) { - SDValue Res = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + SDValue Res = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->op_begin(), N->getNumOperands()); return CombineTo(N, Res, Res); } @@ -2187,7 +2251,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, // If the two computed results can be simplified separately, separate them. if (LoExists) { - SDValue Lo = DAG.getNode(LoOp, N->getDebugLoc(), N->getValueType(0), + SDValue Lo = DAG.getNode(LoOp, SDLoc(N), N->getValueType(0), N->op_begin(), N->getNumOperands()); AddToWorkList(Lo.getNode()); SDValue LoOpt = combine(Lo.getNode()); @@ -2198,7 +2262,7 @@ SDValue DAGCombiner::SimplifyNodeWithTwoResults(SDNode *N, unsigned LoOp, } if (HiExists) { - SDValue Hi = DAG.getNode(HiOp, N->getDebugLoc(), N->getValueType(1), + SDValue Hi = DAG.getNode(HiOp, SDLoc(N), N->getValueType(1), N->op_begin(), N->getNumOperands()); AddToWorkList(Hi.getNode()); SDValue HiOpt = combine(Hi.getNode()); @@ -2216,7 +2280,7 @@ SDValue DAGCombiner::visitSMUL_LOHI(SDNode *N) { if (Res.getNode()) return Res; EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2246,7 +2310,7 @@ SDValue DAGCombiner::visitUMUL_LOHI(SDNode *N) { if (Res.getNode()) return Res; EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If the type twice as wide is legal, transform the mulhu to a wider multiply // plus a shift. @@ -2275,7 +2339,7 @@ SDValue DAGCombiner::visitSMULO(SDNode *N) { // (smulo x, 2) -> (saddo x, x) if (ConstantSDNode *C2 = dyn_cast(N->getOperand(1))) if (C2->getAPIntValue() == 2) - return DAG.getNode(ISD::SADDO, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::SADDO, SDLoc(N), N->getVTList(), N->getOperand(0), N->getOperand(0)); return SDValue(); @@ -2285,7 +2349,7 @@ SDValue DAGCombiner::visitUMULO(SDNode *N) { // (umulo x, 2) -> (uaddo x, x) if (ConstantSDNode *C2 = dyn_cast(N->getOperand(1))) if (C2->getAPIntValue() == 2) - return DAG.getNode(ISD::UADDO, N->getDebugLoc(), N->getVTList(), + return DAG.getNode(ISD::UADDO, SDLoc(N), N->getVTList(), N->getOperand(0), N->getOperand(0)); return SDValue(); @@ -2336,11 +2400,11 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { !VT.isVector() && Op0VT == N1.getOperand(0).getValueType() && (!LegalOperations || TLI.isOperationLegal(N->getOpcode(), Op0VT))) { - SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); AddToWorkList(ORNode.getNode()); - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, ORNode); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode); } // For each of OP in SHL/SRL/SRA/AND... @@ -2350,11 +2414,11 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL || N0.getOpcode() == ISD::SRA || N0.getOpcode() == ISD::AND) && N0.getOperand(1) == N1.getOperand(1)) { - SDValue ORNode = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(N->getOpcode(), SDLoc(N0), N0.getOperand(0).getValueType(), N0.getOperand(0), N1.getOperand(0)); AddToWorkList(ORNode.getNode()); - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ORNode, N0.getOperand(1)); } @@ -2372,7 +2436,7 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { SDValue In1 = N1.getOperand(0); EVT In0Ty = In0.getValueType(); EVT In1Ty = In1.getValueType(); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // If both incoming values are integers, and the original types are the // same. if (In0Ty.isInteger() && In1Ty.isInteger() && In0Ty == In1Ty) { @@ -2414,10 +2478,10 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) { } if (SameMask) { - SDValue Op = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + SDValue Op = DAG.getNode(N->getOpcode(), SDLoc(N), VT, N0.getOperand(0), N1.getOperand(0)); AddToWorkList(Op.getNode()); - return DAG.getVectorShuffle(VT, N->getDebugLoc(), Op, + return DAG.getVectorShuffle(VT, SDLoc(N), Op, DAG.getUNDEF(VT), &SVN0->getMask()[0]); } } @@ -2460,7 +2524,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::AND, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N1, N0); // fold (and x, -1) -> x if (N1C && N1C->isAllOnesValue()) return N0; @@ -2469,7 +2533,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { APInt::getAllOnesValue(BitWidth))) return DAG.getConstant(0, VT); // reassociate and - SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1); + SDValue RAND = ReassociateOps(ISD::AND, SDLoc(N), N0, N1); if (RAND.getNode() != 0) return RAND; // fold (and (or x, C), D) -> D if (C & D) == D @@ -2483,7 +2547,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { APInt Mask = ~N1C->getAPIntValue(); Mask = Mask.trunc(N0Op0.getValueSizeInBits()); if (DAG.MaskedValueIsZero(N0Op0, Mask)) { - SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), + SDValue Zext = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), N0.getValueType(), N0Op0); // Replace uses of the AND with uses of the Zero extend node. @@ -2496,7 +2560,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { return SDValue(N, 0); // Return N so it doesn't get rechecked! } } - // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> + // similarly fold (and (X (load ([non_ext|any_ext|zero_ext] V))), c) -> // (X (load ([non_ext|zero_ext] V))) if 'and' only clears top bits which must // already be zero by virtue of the width of the base type of the load. // @@ -2573,7 +2637,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { SDValue NewLoad(Load, 0); if (Load->getExtensionType() == ISD::EXTLOAD) { NewLoad = DAG.getLoad(Load->getAddressingMode(), ISD::ZEXTLOAD, - Load->getValueType(0), Load->getDebugLoc(), + Load->getValueType(0), SDLoc(Load), Load->getChain(), Load->getBasePtr(), Load->getOffset(), Load->getMemoryVT(), Load->getMemOperand()); @@ -2604,26 +2668,39 @@ SDValue DAGCombiner::visitAND(SDNode *N) { LL.getValueType().isInteger()) { // fold (and (seteq X, 0), (seteq Y, 0)) -> (seteq (or X, Y), 0) if (cast(LR)->isNullValue() && Op1 == ISD::SETEQ) { - SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (and (seteq X, -1), (seteq Y, -1)) -> (seteq (and X, Y), -1) if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETEQ) { - SDValue ANDNode = DAG.getNode(ISD::AND, N0.getDebugLoc(), + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ANDNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } // fold (and (setgt X, -1), (setgt Y, -1)) -> (setgt (or X, Y), -1) if (cast(LR)->isAllOnesValue() && Op1 == ISD::SETGT) { - SDValue ORNode = DAG.getNode(ISD::OR, N0.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(N0), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } } + // Simplify (and (setne X, 0), (setne X, -1)) -> (setuge (add X, 1), 2) + if (LL == RL && isa(LR) && isa(RR) && + Op0 == Op1 && LL.getValueType().isInteger() && + Op0 == ISD::SETNE && ((cast(LR)->isNullValue() && + cast(RR)->isAllOnesValue()) || + (cast(LR)->isAllOnesValue() && + cast(RR)->isNullValue()))) { + SDValue ADDNode = DAG.getNode(ISD::ADD, SDLoc(N0), LL.getValueType(), + LL, DAG.getConstant(1, LL.getValueType())); + AddToWorkList(ADDNode.getNode()); + return DAG.getSetCC(SDLoc(N), VT, ADDNode, + DAG.getConstant(2, LL.getValueType()), ISD::SETUGE); + } // canonicalize equivalent to ll == rl if (LL == RR && LR == RL) { Op1 = ISD::getSetCCSwappedOperands(Op1); @@ -2636,8 +2713,8 @@ SDValue DAGCombiner::visitAND(SDNode *N) { (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && TLI.isOperationLegal(ISD::SETCC, - TLI.getSetCCResultType(N0.getSimpleValueType()))))) - return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + getSetCCResultType(N0.getSimpleValueType()))))) + return DAG.getSetCC(SDLoc(N), N0.getValueType(), LL, LR, Result); } } @@ -2665,11 +2742,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2687,12 +2762,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { BitWidth - MemVT.getScalarType().getSizeInBits())) && ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT))) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N0.getDebugLoc(), VT, - LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N0), VT, + LN0->getChain(), LN0->getBasePtr(), + MemVT, LN0->getMemOperand()); AddToWorkList(N); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2710,7 +2782,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ? cast(N0.getOperand(0)) : cast(N0); if (LN0->getExtensionType() != ISD::SEXTLOAD && - LN0->isUnindexed() && N0.hasOneUse() && LN0->hasOneUse()) { + LN0->isUnindexed() && N0.hasOneUse() && SDValue(LN0, 0).hasOneUse()) { uint32_t ActiveBits = N1C->getAPIntValue().getActiveBits(); if (ActiveBits > 0 && APIntOps::isMask(ActiveBits, N1C->getAPIntValue())){ EVT ExtVT = EVT::getIntegerVT(*DAG.getContext(), ActiveBits); @@ -2721,11 +2793,9 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue NewLoad = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, - LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), - ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, + LN0->getChain(), LN0->getBasePtr(), ExtVT, + LN0->getMemOperand()); AddToWorkList(N); CombineTo(LN0, NewLoad, NewLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2748,7 +2818,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { unsigned LVTStoreBytes = LoadedVT.getStoreSize(); unsigned EVTStoreBytes = ExtVT.getStoreSize(); unsigned PtrOff = LVTStoreBytes - EVTStoreBytes; - NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), PtrType, + NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, NewPtr, DAG.getConstant(PtrOff, PtrType)); Alignment = MinAlign(Alignment, PtrOff); } @@ -2757,11 +2827,11 @@ SDValue DAGCombiner::visitAND(SDNode *N) { EVT LoadResultTy = HasAnyExt ? LN0->getValueType(0) : VT; SDValue Load = - DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), LoadResultTy, + DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), LoadResultTy, LN0->getChain(), NewPtr, LN0->getPointerInfo(), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - Alignment); + Alignment, LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(LN0, Load, Load.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2786,7 +2856,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) { ADDC |= Mask; if (TLI.isLegalAddImmediate(ADDC.getSExtValue())) { SDValue NewAdd = - DAG.getNode(ISD::ADD, N0.getDebugLoc(), VT, + DAG.getNode(ISD::ADD, SDLoc(N0), VT, N0.getOperand(0), DAG.getConstant(ADDC, VT)); CombineTo(N0.getNode(), NewAdd); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -2797,6 +2867,14 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } + // fold (and (or (srl N, 8), (shl N, 8)), 0xffff) -> (srl (bswap N), const) + if (N1C && N1C->getAPIntValue() == 0xffff && N0.getOpcode() == ISD::OR) { + SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), + N0.getOperand(1), false); + if (BSwap.getNode()) + return BSwap; + } + return SDValue(); } @@ -2881,17 +2959,27 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, if (N00 != N10) return SDValue(); - // Make sure everything beyond the low halfword is zero since the SRL 16 - // will clear the top bits. + // Make sure everything beyond the low halfword gets set to zero since the SRL + // 16 will clear the top bits. unsigned OpSizeInBits = VT.getSizeInBits(); - if (DemandHighBits && OpSizeInBits > 16 && - (!LookPassAnd0 || !LookPassAnd1) && - !DAG.MaskedValueIsZero(N10, APInt::getHighBitsSet(OpSizeInBits, 16))) - return SDValue(); + if (DemandHighBits && OpSizeInBits > 16) { + // If the left-shift isn't masked out then the only way this is a bswap is + // if all bits beyond the low 8 are 0. In that case the entire pattern + // reduces to a left shift anyway: leave it for other parts of the combiner. + if (!LookPassAnd0) + return SDValue(); - SDValue Res = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, N00); + // However, if the right shift isn't masked out then it might be because + // it's not needed. See if we can spot that too. + if (!LookPassAnd1 && + !DAG.MaskedValueIsZero( + N10, APInt::getHighBitsSet(OpSizeInBits, OpSizeInBits - 16))) + return SDValue(); + } + + SDValue Res = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, N00); if (OpSizeInBits > 16) - Res = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, Res, + Res = DAG.getNode(ISD::SRL, SDLoc(N), VT, Res, DAG.getConstant(OpSizeInBits-16, getShiftAmountTy(VT))); return Res; } @@ -2899,7 +2987,7 @@ SDValue DAGCombiner::MatchBSwapHWordLow(SDNode *N, SDValue N0, SDValue N1, /// isBSwapHWordElement - Return true if the specified node is an element /// that makes up a 32-bit packed halfword byteswap. i.e. /// ((x&0xff)<<8)|((x&0xff00)>>8)|((x&0x00ff0000)<<8)|((x&0xff000000)>>8) -static bool isBSwapHWordElement(SDValue N, SmallVector &Parts) { +static bool isBSwapHWordElement(SDValue N, SmallVectorImpl &Parts) { if (!N.getNode()->hasOneUse()) return false; @@ -3024,19 +3112,19 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { if (Parts[0] != Parts[1] || Parts[0] != Parts[2] || Parts[0] != Parts[3]) return SDValue(); - SDValue BSwap = DAG.getNode(ISD::BSWAP, N->getDebugLoc(), VT, + SDValue BSwap = DAG.getNode(ISD::BSWAP, SDLoc(N), VT, SDValue(Parts[0],0)); - // Result of the bswap should be rotated by 16. If it's not legal, than + // Result of the bswap should be rotated by 16. If it's not legal, then // do (x << 16) | (x >> 16). SDValue ShAmt = DAG.getConstant(16, getShiftAmountTy(VT)); if (TLI.isOperationLegalOrCustom(ISD::ROTL, VT)) - return DAG.getNode(ISD::ROTL, N->getDebugLoc(), VT, BSwap, ShAmt); + return DAG.getNode(ISD::ROTL, SDLoc(N), VT, BSwap, ShAmt); if (TLI.isOperationLegalOrCustom(ISD::ROTR, VT)) - return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, BSwap, ShAmt); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, - DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, BSwap, ShAmt), - DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, BSwap, ShAmt)); + return DAG.getNode(ISD::ROTR, SDLoc(N), VT, BSwap, ShAmt); + return DAG.getNode(ISD::OR, SDLoc(N), VT, + DAG.getNode(ISD::SHL, SDLoc(N), VT, BSwap, ShAmt), + DAG.getNode(ISD::SRL, SDLoc(N), VT, BSwap, ShAmt)); } SDValue DAGCombiner::visitOR(SDNode *N) { @@ -3076,7 +3164,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N1, N0); // fold (or x, 0) -> x if (N1C && N1C->isNullValue()) return N0; @@ -3096,7 +3184,7 @@ SDValue DAGCombiner::visitOR(SDNode *N) { return BSwap; // reassociate or - SDValue ROR = ReassociateOps(ISD::OR, N->getDebugLoc(), N0, N1); + SDValue ROR = ReassociateOps(ISD::OR, SDLoc(N), N0, N1); if (ROR.getNode() != 0) return ROR; // Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2) @@ -3105,8 +3193,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { isa(N0.getOperand(1))) { ConstantSDNode *C1 = cast(N0.getOperand(1)); if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1), DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1)); } @@ -3121,19 +3209,19 @@ SDValue DAGCombiner::visitOR(SDNode *N) { // fold (or (setlt X, 0), (setlt Y, 0)) -> (setne (or X, Y), 0) if (cast(LR)->isNullValue() && (Op1 == ISD::SETNE || Op1 == ISD::SETLT)) { - SDValue ORNode = DAG.getNode(ISD::OR, LR.getDebugLoc(), + SDValue ORNode = DAG.getNode(ISD::OR, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorkList(ORNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ORNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ORNode, LR, Op1); } // fold (or (setne X, -1), (setne Y, -1)) -> (setne (and X, Y), -1) // fold (or (setgt X, -1), (setgt Y -1)) -> (setgt (and X, Y), -1) if (cast(LR)->isAllOnesValue() && (Op1 == ISD::SETNE || Op1 == ISD::SETGT)) { - SDValue ANDNode = DAG.getNode(ISD::AND, LR.getDebugLoc(), + SDValue ANDNode = DAG.getNode(ISD::AND, SDLoc(LR), LR.getValueType(), LL, RL); AddToWorkList(ANDNode.getNode()); - return DAG.getSetCC(N->getDebugLoc(), VT, ANDNode, LR, Op1); + return DAG.getSetCC(SDLoc(N), VT, ANDNode, LR, Op1); } } // canonicalize equivalent to ll == rl @@ -3148,8 +3236,8 @@ SDValue DAGCombiner::visitOR(SDNode *N) { (!LegalOperations || (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && TLI.isOperationLegal(ISD::SETCC, - TLI.getSetCCResultType(N0.getValueType()))))) - return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), + getSetCCResultType(N0.getValueType()))))) + return DAG.getSetCC(SDLoc(N), N0.getValueType(), LL, LR, Result); } } @@ -3176,15 +3264,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (DAG.MaskedValueIsZero(N0.getOperand(0), RHSMask&~LHSMask) && DAG.MaskedValueIsZero(N1.getOperand(0), LHSMask&~RHSMask)) { - SDValue X = DAG.getNode(ISD::OR, N0.getDebugLoc(), VT, + SDValue X = DAG.getNode(ISD::OR, SDLoc(N0), VT, N0.getOperand(0), N1.getOperand(0)); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, X, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(LHSMask | RHSMask, VT)); } } // See if this is some rotate idiom. - if (SDNode *Rot = MatchRotate(N0, N1, N->getDebugLoc())) + if (SDNode *Rot = MatchRotate(N0, N1, SDLoc(N))) return SDValue(Rot, 0); // Simplify the operands using demanded-bits information. @@ -3217,7 +3305,7 @@ static bool MatchRotateHalf(SDValue Op, SDValue &Shift, SDValue &Mask) { // MatchRotate - Handle an 'or' of two operands. If this is one of the many // idioms for rotate, and if the target supports rotation instructions, generate // a rot[lr]. -SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { +SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, SDLoc DL) { // Must be a legal type. Expanded 'n promoted things won't work with rotates. EVT VT = LHS.getValueType(); if (!TLI.isTypeLegal(VT)) return 0; @@ -3292,33 +3380,9 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { if (LHSMask.getNode() || RHSMask.getNode()) return 0; - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotl x, y) - // fold (or (shl x, y), (srl x, (sub 32, y))) -> (rotr x, (sub 32, y)) - if (RHSShiftAmt.getOpcode() == ISD::SUB && - LHSShiftAmt == RHSShiftAmt.getOperand(1)) { - if (ConstantSDNode *SUBC = - dyn_cast(RHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } - } - } - - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotr x, y) - // fold (or (shl x, (sub 32, y)), (srl x, r)) -> (rotl x, (sub 32, y)) - if (LHSShiftAmt.getOpcode() == ISD::SUB && - RHSShiftAmt == LHSShiftAmt.getOperand(1)) { - if (ConstantSDNode *SUBC = - dyn_cast(LHSShiftAmt.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - } - } - - // Look for sign/zext/any-extended or truncate cases: + // If the shift amount is sign/zext/any-extended just peel it off. + SDValue LExtOp0 = LHSShiftAmt; + SDValue RExtOp0 = RHSShiftAmt; if ((LHSShiftAmt.getOpcode() == ISD::SIGN_EXTEND || LHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || LHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || @@ -3327,37 +3391,31 @@ SDNode *DAGCombiner::MatchRotate(SDValue LHS, SDValue RHS, DebugLoc DL) { RHSShiftAmt.getOpcode() == ISD::ZERO_EXTEND || RHSShiftAmt.getOpcode() == ISD::ANY_EXTEND || RHSShiftAmt.getOpcode() == ISD::TRUNCATE)) { - SDValue LExtOp0 = LHSShiftAmt.getOperand(0); - SDValue RExtOp0 = RHSShiftAmt.getOperand(0); - if (RExtOp0.getOpcode() == ISD::SUB && - RExtOp0.getOperand(1) == LExtOp0) { - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotl x, y) - // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> - // (rotr x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast(RExtOp0.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, - LHSShiftArg, - HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); - } - } - } else if (LExtOp0.getOpcode() == ISD::SUB && - RExtOp0 == LExtOp0.getOperand(1)) { - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotr x, y) - // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> - // (rotl x, (sub 32, y)) - if (ConstantSDNode *SUBC = - dyn_cast(LExtOp0.getOperand(0))) { - if (SUBC->getAPIntValue() == OpSizeInBits) { - return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, - LHSShiftArg, - HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); - } - } - } + LExtOp0 = LHSShiftAmt.getOperand(0); + RExtOp0 = RHSShiftAmt.getOperand(0); + } + + if (RExtOp0.getOpcode() == ISD::SUB && RExtOp0.getOperand(1) == LExtOp0) { + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotl x, y) + // fold (or (shl x, (*ext y)), (srl x, (*ext (sub 32, y)))) -> + // (rotr x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast(RExtOp0.getOperand(0))) + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTL ? ISD::ROTL : ISD::ROTR, DL, VT, LHSShiftArg, + HasROTL ? LHSShiftAmt : RHSShiftAmt).getNode(); + } else if (LExtOp0.getOpcode() == ISD::SUB && + RExtOp0 == LExtOp0.getOperand(1)) { + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotr x, y) + // fold (or (shl x, (*ext (sub 32, y))), (srl x, (*ext y))) -> + // (rotl x, (sub 32, y)) + if (ConstantSDNode *SUBC = + dyn_cast(LExtOp0.getOperand(0))) + if (SUBC->getAPIntValue() == OpSizeInBits) + return DAG.getNode(HasROTR ? ISD::ROTR : ISD::ROTL, DL, VT, LHSShiftArg, + HasROTR ? RHSShiftAmt : LHSShiftAmt).getNode(); } return 0; @@ -3396,12 +3454,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { return DAG.FoldConstantArithmetic(ISD::XOR, VT, N0C, N1C); // canonicalize constant to RHS if (N0C && !N1C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N1, N0); // fold (xor x, 0) -> x if (N1C && N1C->isNullValue()) return N0; // reassociate xor - SDValue RXOR = ReassociateOps(ISD::XOR, N->getDebugLoc(), N0, N1); + SDValue RXOR = ReassociateOps(ISD::XOR, SDLoc(N), N0, N1); if (RXOR.getNode() != 0) return RXOR; @@ -3417,9 +3475,9 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { default: llvm_unreachable("Unhandled SetCC Equivalent!"); case ISD::SETCC: - return DAG.getSetCC(N->getDebugLoc(), VT, LHS, RHS, NotCC); + return DAG.getSetCC(SDLoc(N), VT, LHS, RHS, NotCC); case ISD::SELECT_CC: - return DAG.getSelectCC(N->getDebugLoc(), LHS, RHS, N0.getOperand(2), + return DAG.getSelectCC(SDLoc(N), LHS, RHS, N0.getOperand(2), N0.getOperand(3), NotCC); } } @@ -3430,10 +3488,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { N0.getNode()->hasOneUse() && isSetCCEquivalent(N0.getOperand(0), LHS, RHS, CC)){ SDValue V = N0.getOperand(0); - V = DAG.getNode(ISD::XOR, N0.getDebugLoc(), V.getValueType(), V, + V = DAG.getNode(ISD::XOR, SDLoc(N0), V.getValueType(), V, DAG.getConstant(1, V.getValueType())); AddToWorkList(V.getNode()); - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, V); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, V); } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are setcc @@ -3442,10 +3500,10 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isOneUseSetCC(RHS) || isOneUseSetCC(LHS)) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); - return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } // fold (not (or x, y)) -> (and (not x), (not y)) iff x or y are constants @@ -3454,28 +3512,36 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { SDValue LHS = N0.getOperand(0), RHS = N0.getOperand(1); if (isa(RHS) || isa(LHS)) { unsigned NewOpcode = N0.getOpcode() == ISD::AND ? ISD::OR : ISD::AND; - LHS = DAG.getNode(ISD::XOR, LHS.getDebugLoc(), VT, LHS, N1); // LHS = ~LHS - RHS = DAG.getNode(ISD::XOR, RHS.getDebugLoc(), VT, RHS, N1); // RHS = ~RHS + LHS = DAG.getNode(ISD::XOR, SDLoc(LHS), VT, LHS, N1); // LHS = ~LHS + RHS = DAG.getNode(ISD::XOR, SDLoc(RHS), VT, RHS, N1); // RHS = ~RHS AddToWorkList(LHS.getNode()); AddToWorkList(RHS.getNode()); - return DAG.getNode(NewOpcode, N->getDebugLoc(), VT, LHS, RHS); + return DAG.getNode(NewOpcode, SDLoc(N), VT, LHS, RHS); } } + // fold (xor (and x, y), y) -> (and (not x), y) + if (N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() && + N0->getOperand(1) == N1) { + SDValue X = N0->getOperand(0); + SDValue NotX = DAG.getNOT(SDLoc(X), X, VT); + AddToWorkList(NotX.getNode()); + return DAG.getNode(ISD::AND, SDLoc(N), VT, NotX, N1); + } // fold (xor (xor x, c1), c2) -> (xor x, (xor c1, c2)) if (N1C && N0.getOpcode() == ISD::XOR) { ConstantSDNode *N00C = dyn_cast(N0.getOperand(0)); ConstantSDNode *N01C = dyn_cast(N0.getOperand(1)); if (N00C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(1), + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(1), DAG.getConstant(N1C->getAPIntValue() ^ N00C->getAPIntValue(), VT)); if (N01C) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::XOR, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(N1C->getAPIntValue() ^ N01C->getAPIntValue(), VT)); } // fold (xor x, x) -> 0 if (N0 == N1) - return tryFoldToZero(N->getDebugLoc(), TLI, VT, DAG, LegalOperations); + return tryFoldToZero(SDLoc(N), TLI, VT, DAG, LegalOperations, LegalTypes); // Simplify: xor (op x...), (op y...) -> (op (xor x, y)) if (N0.getOpcode() == N1.getOpcode()) { @@ -3548,17 +3614,17 @@ SDValue DAGCombiner::visitShiftByConstant(SDNode *N, unsigned Amt) { } // Fold the constants, shifting the binop RHS by the shift amount. - SDValue NewRHS = DAG.getNode(N->getOpcode(), LHS->getOperand(1).getDebugLoc(), + SDValue NewRHS = DAG.getNode(N->getOpcode(), SDLoc(LHS->getOperand(1)), N->getValueType(0), LHS->getOperand(1), N->getOperand(1)); // Create the new shift. SDValue NewShift = DAG.getNode(N->getOpcode(), - LHS->getOperand(0).getDebugLoc(), + SDLoc(LHS->getOperand(0)), VT, LHS->getOperand(0), N->getOperand(1)); // Create the new binop. - return DAG.getNode(LHS->getOpcode(), N->getDebugLoc(), VT, NewShift, NewRHS); + return DAG.getNode(LHS->getOpcode(), SDLoc(N), VT, NewShift, NewRHS); } SDValue DAGCombiner::visitSHL(SDNode *N) { @@ -3569,6 +3635,12 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (shl c1, c2) -> c1<getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), TruncVT, + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -3617,7 +3689,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { uint64_t c2 = N1C->getZExtValue(); if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } @@ -3639,13 +3711,34 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { if (c2 >= OpSizeInBits - InnerShiftSize) { if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SHL, N0->getDebugLoc(), VT, - DAG.getNode(N0.getOpcode(), N0->getDebugLoc(), VT, + return DAG.getNode(ISD::SHL, SDLoc(N0), VT, + DAG.getNode(N0.getOpcode(), SDLoc(N0), VT, N0.getOperand(0)->getOperand(0)), DAG.getConstant(c1 + c2, N1.getValueType())); } } + // fold (shl (zext (srl x, C)), C) -> (zext (shl (srl x, C), C)) + // Only fold this if the inner zext has no other uses to avoid increasing + // the total number of instructions. + if (N1C && N0.getOpcode() == ISD::ZERO_EXTEND && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::SRL && + isa(N0.getOperand(0)->getOperand(1))) { + uint64_t c1 = + cast(N0.getOperand(0)->getOperand(1))->getZExtValue(); + if (c1 < VT.getSizeInBits()) { + uint64_t c2 = N1C->getZExtValue(); + if (c1 == c2) { + SDValue NewOp0 = N0.getOperand(0); + EVT CountVT = NewOp0.getOperand(1).getValueType(); + SDValue NewSHL = DAG.getNode(ISD::SHL, SDLoc(N), NewOp0.getValueType(), + NewOp0, DAG.getConstant(c2, CountVT)); + AddToWorkList(NewSHL.getNode()); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N0), VT, NewSHL); + } + } + } + // fold (shl (srl x, c1), c2) -> (and (shl x, (sub c2, c1), MASK) or // (and (srl x, (sub c1, c2), MASK) // Only fold this if the inner shift has no other uses -- if it does, folding @@ -3660,14 +3753,14 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { SDValue Shift; if (c2 > c1) { Mask = Mask.shl(c2-c1); - Shift = DAG.getNode(ISD::SHL, N->getDebugLoc(), VT, N0.getOperand(0), + Shift = DAG.getNode(ISD::SHL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c2-c1, N1.getValueType())); } else { Mask = Mask.lshr(c1-c2); - Shift = DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + Shift = DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1-c2, N1.getValueType())); } - return DAG.getNode(ISD::AND, N0.getDebugLoc(), VT, Shift, + return DAG.getNode(ISD::AND, SDLoc(N0), VT, Shift, DAG.getConstant(Mask, VT)); } } @@ -3678,7 +3771,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) { VT.getSizeInBits() - N1C->getZExtValue()), VT); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), HiBitsMask); } @@ -3699,6 +3792,12 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (sra c1, c2) -> (sra c1, c2) if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRA, VT, N0C, N1C); @@ -3724,7 +3823,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { ExtVT, VT.getVectorNumElements()); if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, ExtVT))) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), DAG.getValueType(ExtVT)); } @@ -3733,7 +3832,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { if (ConstantSDNode *C1 = dyn_cast(N0.getOperand(1))) { unsigned Sum = N1C->getZExtValue() + C1->getZExtValue(); if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1; - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(Sum, N1C->getValueType(0))); } } @@ -3765,11 +3864,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue Amt = DAG.getConstant(ShiftAmt, getShiftAmountTy(N0.getOperand(0).getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, + SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), VT, N0.getOperand(0), Amt); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), TruncVT, + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), TruncVT, Shift); - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), N->getValueType(0), Trunc); } } @@ -3785,11 +3884,11 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getScalarType().getSizeInBits()); - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -3812,9 +3911,9 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { SDValue Amt = DAG.getConstant(LargeShiftAmt->getZExtValue() + N1C->getZExtValue(), getShiftAmountTy(N0.getOperand(0).getOperand(0).getValueType())); - SDValue SRA = DAG.getNode(ISD::SRA, N->getDebugLoc(), LargeVT, + SDValue SRA = DAG.getNode(ISD::SRA, SDLoc(N), LargeVT, N0.getOperand(0).getOperand(0), Amt); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, SRA); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, SRA); } } @@ -3825,7 +3924,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) { // If the sign bit is known to be zero, switch this to a SRL. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, N1); if (N1C) { SDValue NewSRA = visitShiftByConstant(N, N1C->getZExtValue()); @@ -3844,6 +3943,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { EVT VT = N0.getValueType(); unsigned OpSizeInBits = VT.getScalarType().getSizeInBits(); + // fold vector ops + if (VT.isVector()) { + SDValue FoldedVOp = SimplifyVBinOp(N); + if (FoldedVOp.getNode()) return FoldedVOp; + } + // fold (srl c1, c2) -> c1 >>u c2 if (N0C && N1C) return DAG.FoldConstantArithmetic(ISD::SRL, VT, N0C, N1C); @@ -3868,7 +3973,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { uint64_t c2 = N1C->getZExtValue(); if (c1 + c2 >= OpSizeInBits) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(c1 + c2, N1.getValueType())); } @@ -3886,8 +3991,8 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (c1 + OpSizeInBits == InnerShiftSize) { if (c1 + c2 >= InnerShiftSize) return DAG.getConstant(0, VT); - return DAG.getNode(ISD::TRUNCATE, N0->getDebugLoc(), VT, - DAG.getNode(ISD::SRL, N0->getDebugLoc(), InnerShiftVT, + return DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, + DAG.getNode(ISD::SRL, SDLoc(N0), InnerShiftVT, N0.getOperand(0)->getOperand(0), DAG.getConstant(c1 + c2, ShiftCountVT))); } @@ -3897,12 +4002,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (N1C && N0.getOpcode() == ISD::SHL && N0.getOperand(1) == N1 && N0.getValueSizeInBits() <= 64) { uint64_t ShAmt = N1C->getZExtValue()+64-N0.getValueSizeInBits(); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0.getOperand(0), DAG.getConstant(~0ULL >> ShAmt, VT)); } - - // fold (srl (anyextend x), c) -> (anyextend (srl x, c)) + // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask) if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) { // Shifting in all undef bits? EVT SmallVT = N0.getOperand(0).getValueType(); @@ -3911,11 +4015,14 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { if (!LegalTypes || TLI.isTypeDesirableForOp(ISD::SRL, SmallVT)) { uint64_t ShiftAmt = N1C->getZExtValue(); - SDValue SmallShift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), SmallVT, + SDValue SmallShift = DAG.getNode(ISD::SRL, SDLoc(N0), SmallVT, N0.getOperand(0), DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT))); AddToWorkList(SmallShift.getNode()); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift); + APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt); + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift), + DAG.getConstant(Mask, VT)); } } @@ -3923,7 +4030,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { // bit, which is unmodified by sra. if (N1C && N1C->getZExtValue() + 1 == VT.getSizeInBits()) { if (N0.getOpcode() == ISD::SRA) - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0.getOperand(0), N1); + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0.getOperand(0), N1); } // fold (srl (ctlz x), "5") -> x iff x has one bit set (the low bit). @@ -3951,12 +4058,12 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue Op = N0.getOperand(0); if (ShAmt) { - Op = DAG.getNode(ISD::SRL, N0.getDebugLoc(), VT, Op, + Op = DAG.getNode(ISD::SRL, SDLoc(N0), VT, Op, DAG.getConstant(ShAmt, getShiftAmountTy(Op.getValueType()))); AddToWorkList(Op.getNode()); } - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Op, DAG.getConstant(1, VT)); } } @@ -3971,11 +4078,11 @@ SDValue DAGCombiner::visitSRL(SDNode *N) { SDValue N100 = N1.getOperand(0).getOperand(0); APInt TruncC = N101C->getAPIntValue(); TruncC = TruncC.trunc(TruncVT.getSizeInBits()); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), VT, N0, - DAG.getNode(ISD::AND, N->getDebugLoc(), + return DAG.getNode(ISD::SRL, SDLoc(N), VT, N0, + DAG.getNode(ISD::AND, SDLoc(N), TruncVT, DAG.getNode(ISD::TRUNCATE, - N->getDebugLoc(), + SDLoc(N), TruncVT, N100), DAG.getConstant(TruncC, TruncVT))); } @@ -4035,7 +4142,7 @@ SDValue DAGCombiner::visitCTLZ(SDNode *N) { // fold (ctlz c1) -> c2 if (isa(N0)) - return DAG.getNode(ISD::CTLZ, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTLZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4045,7 +4152,7 @@ SDValue DAGCombiner::visitCTLZ_ZERO_UNDEF(SDNode *N) { // fold (ctlz_zero_undef c1) -> c2 if (isa(N0)) - return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTLZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4055,7 +4162,7 @@ SDValue DAGCombiner::visitCTTZ(SDNode *N) { // fold (cttz c1) -> c2 if (isa(N0)) - return DAG.getNode(ISD::CTTZ, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTTZ, SDLoc(N), VT, N0); return SDValue(); } @@ -4065,7 +4172,7 @@ SDValue DAGCombiner::visitCTTZ_ZERO_UNDEF(SDNode *N) { // fold (cttz_zero_undef c1) -> c2 if (isa(N0)) - return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTTZ_ZERO_UNDEF, SDLoc(N), VT, N0); return SDValue(); } @@ -4075,7 +4182,7 @@ SDValue DAGCombiner::visitCTPOP(SDNode *N) { // fold (ctpop c1) -> c2 if (isa(N0)) - return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::CTPOP, SDLoc(N), VT, N0); return SDValue(); } @@ -4100,7 +4207,7 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { return N2; // fold (select C, 1, X) -> (or C, X) if (VT == MVT::i1 && N1C && N1C->getAPIntValue() == 1) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select C, 0, 1) -> (xor C, 1) if (VT.isInteger() && (VT0 == MVT::i1 || @@ -4110,38 +4217,38 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { N1C && N2C && N1C->isNullValue() && N2C->getAPIntValue() == 1) { SDValue XORNode; if (VT == VT0) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT0, + return DAG.getNode(ISD::XOR, SDLoc(N), VT0, N0, DAG.getConstant(1, VT0)); - XORNode = DAG.getNode(ISD::XOR, N0.getDebugLoc(), VT0, + XORNode = DAG.getNode(ISD::XOR, SDLoc(N0), VT0, N0, DAG.getConstant(1, VT0)); AddToWorkList(XORNode.getNode()); if (VT.bitsGT(VT0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, XORNode); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, XORNode); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, XORNode); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, XORNode); } // fold (select C, 0, X) -> (and (not C), X) if (VT == VT0 && VT == MVT::i1 && N1C && N1C->isNullValue()) { - SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorkList(NOTNode.getNode()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, NOTNode, N2); + return DAG.getNode(ISD::AND, SDLoc(N), VT, NOTNode, N2); } // fold (select C, X, 1) -> (or (not C), X) if (VT == VT0 && VT == MVT::i1 && N2C && N2C->getAPIntValue() == 1) { - SDValue NOTNode = DAG.getNOT(N0.getDebugLoc(), N0, VT); + SDValue NOTNode = DAG.getNOT(SDLoc(N0), N0, VT); AddToWorkList(NOTNode.getNode()); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, NOTNode, N1); + return DAG.getNode(ISD::OR, SDLoc(N), VT, NOTNode, N1); } // fold (select C, X, 0) -> (and C, X) if (VT == MVT::i1 && N2C && N2C->isNullValue()) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // fold (select X, X, Y) -> (or X, Y) // fold (select X, 1, Y) -> (or X, Y) if (VT == MVT::i1 && (N0 == N1 || (N1C && N1C->getAPIntValue() == 1))) - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::OR, SDLoc(N), VT, N0, N2); // fold (select X, Y, X) -> (and X, Y) // fold (select X, Y, 0) -> (and X, Y) if (VT == MVT::i1 && (N0 == N2 || (N2C && N2C->getAPIntValue() == 0))) - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::AND, SDLoc(N), VT, N0, N1); // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N1, N2)) @@ -4155,20 +4262,37 @@ SDValue DAGCombiner::visitSELECT(SDNode *N) { // about, since there is no way to mark an opcode illegal at all value types if (TLI.isOperationLegalOrCustom(ISD::SELECT_CC, MVT::Other) && TLI.isOperationLegalOrCustom(ISD::SELECT_CC, VT)) - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1, N2, N0.getOperand(2)); - return SimplifySelect(N->getDebugLoc(), N0, N1, N2); + return SimplifySelect(SDLoc(N), N0, N1, N2); } return SDValue(); } +static +std::pair SplitVSETCC(const SDNode *N, SelectionDAG &DAG) { + SDLoc DL(N); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + + // Split the inputs. + SDValue Lo, Hi, LL, LH, RL, RH; + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); + + Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); + Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); + + return std::make_pair(Lo, Hi); +} + SDValue DAGCombiner::visitVSELECT(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); SDValue N2 = N->getOperand(2); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Canonicalize integer abs. // vselect (setg[te] X, 0), X, -X -> @@ -4201,6 +4325,34 @@ SDValue DAGCombiner::visitVSELECT(SDNode *N) { } } + // If the VSELECT result requires splitting and the mask is provided by a + // SETCC, then split both nodes and its operands before legalization. This + // prevents the type legalizer from unrolling SETCC into scalar comparisons + // and enables future optimizations (e.g. min/max pattern matching on X86). + if (N0.getOpcode() == ISD::SETCC) { + EVT VT = N->getValueType(0); + + // Check if any splitting is required. + if (TLI.getTypeAction(*DAG.getContext(), VT) != + TargetLowering::TypeSplitVector) + return SDValue(); + + SDValue Lo, Hi, CCLo, CCHi, LL, LH, RL, RH; + llvm::tie(CCLo, CCHi) = SplitVSETCC(N0.getNode(), DAG); + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 1); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 2); + + Lo = DAG.getNode(N->getOpcode(), DL, LL.getValueType(), CCLo, LL, RL); + Hi = DAG.getNode(N->getOpcode(), DL, LH.getValueType(), CCHi, LH, RH); + + // Add the new VSELECT nodes to the work list in case they need to be split + // again. + AddToWorkList(Lo.getNode()); + AddToWorkList(Hi.getNode()); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, VT, Lo, Hi); + } + return SDValue(); } @@ -4217,35 +4369,37 @@ SDValue DAGCombiner::visitSELECT_CC(SDNode *N) { return N2; // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), - N0, N1, CC, N->getDebugLoc(), false); - if (SCC.getNode()) AddToWorkList(SCC.getNode()); + SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), + N0, N1, CC, SDLoc(N), false); + if (SCC.getNode()) { + AddToWorkList(SCC.getNode()); - if (ConstantSDNode *SCCC = dyn_cast_or_null(SCC.getNode())) { - if (!SCCC->isNullValue()) - return N2; // cond always true -> true val - else - return N3; // cond always false -> false val - } + if (ConstantSDNode *SCCC = dyn_cast(SCC.getNode())) { + if (!SCCC->isNullValue()) + return N2; // cond always true -> true val + else + return N3; // cond always false -> false val + } - // Fold to a simpler select_cc - if (SCC.getNode() && SCC.getOpcode() == ISD::SETCC) - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), N2.getValueType(), - SCC.getOperand(0), SCC.getOperand(1), N2, N3, - SCC.getOperand(2)); + // Fold to a simpler select_cc + if (SCC.getOpcode() == ISD::SETCC) + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), N2.getValueType(), + SCC.getOperand(0), SCC.getOperand(1), N2, N3, + SCC.getOperand(2)); + } // If we can fold this based on the true/false value, do so. if (SimplifySelectOps(N, N2, N3)) return SDValue(N, 0); // Don't revisit N. // fold select_cc into other things, such as min/max/abs - return SimplifySelectCC(N->getDebugLoc(), N0, N1, N2, N3, CC); + return SimplifySelectCC(SDLoc(N), N0, N1, N2, N3, CC); } SDValue DAGCombiner::visitSETCC(SDNode *N) { return SimplifySetCC(N->getValueType(0), N->getOperand(0), N->getOperand(1), cast(N->getOperand(2))->get(), - N->getDebugLoc()); + SDLoc(N)); } // ExtendUsesToFormExtLoad - Trying to extend uses of a load to enable this: @@ -4254,7 +4408,7 @@ SDValue DAGCombiner::visitSETCC(SDNode *N) { // mentioned transformation is profitable. static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, unsigned ExtOpc, - SmallVector &ExtendNodes, + SmallVectorImpl &ExtendNodes, const TargetLowering &TLI) { bool HasCopyToRegUses = false; bool isTruncFree = TLI.isTruncateFree(N->getValueType(0), N0.getValueType()); @@ -4312,8 +4466,8 @@ static bool ExtendUsesToFormExtLoad(SDNode *N, SDValue N0, return true; } -void DAGCombiner::ExtendSetCCUses(SmallVector SetCCs, - SDValue Trunc, SDValue ExtLoad, DebugLoc DL, +void DAGCombiner::ExtendSetCCUses(const SmallVectorImpl &SetCCs, + SDValue Trunc, SDValue ExtLoad, SDLoc DL, ISD::NodeType ExtType) { // Extend SetCC uses if necessary. for (unsigned i = 0, e = SetCCs.size(); i != e; ++i) { @@ -4340,12 +4494,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // fold (sext c1) -> c1 if (isa(N0)) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0); // fold (sext (sext x)) -> (sext x) // fold (sext (aext x)) -> (sext x) if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N0.getOperand(0)); if (N0.getOpcode() == ISD::TRUNCATE) { @@ -4379,22 +4533,22 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // Op is i32, Mid is i8, and Dest is i64. If Op has more than 24 sign // bits, just sext from i32. if (NumSignBits > OpBits-MidBits) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, Op); } else { // Op is i64, Mid is i8, and Dest is i32. If Op has more than 56 sign // bits, just truncate to i32. if (NumSignBits > OpBits-MidBits) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); } // fold (sext (truncate x)) -> (sextinreg x). if (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, N0.getValueType())) { if (OpBits < DestBits) - Op = DAG.getNode(ISD::ANY_EXTEND, N0.getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N0), VT, Op); else if (OpBits > DestBits) - Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op, + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), VT, Op); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, Op, DAG.getValueType(N0.getValueType())); } } @@ -4412,17 +4566,15 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4436,15 +4588,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4467,23 +4617,20 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::SIGN_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, LN0->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.sext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ExtLoad, DAG.getConstant(Mask, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - N0.getOperand(0).getDebugLoc(), + SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::SIGN_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4494,13 +4641,13 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // sext(setcc) -> sext_in_reg(vsetcc) for vectors. // Only do this before legalize for now. if (VT.isVector() && !LegalOperations && - TLI.getBooleanContents(true) == + TLI.getBooleanContents(true) == TargetLowering::ZeroOrNegativeOneBooleanContent) { EVT N0VT = N0.getOperand(0).getValueType(); // On some architectures (such as SSE/NEON/etc) the SETCC result type is // of the same size as the compared operands. Only optimize sext(setcc()) // if this is the case. - EVT SVT = TLI.getSetCCResultType(N0VT); + EVT SVT = getSetCCResultType(N0VT); // We know that the # elements of the results is the same as the // # elements of the compare (and the # elements of the compare result @@ -4508,24 +4655,19 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == SVT.getSizeInBits()) - return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); + // If the desired elements are smaller or larger than the source // elements we can use a matching integer vector type and then // truncate/sign extend - EVT MatchingElementType = - EVT::getIntegerVT(*DAG.getContext(), - N0VT.getScalarType().getSizeInBits()); - EVT MatchingVectorType = - EVT::getVectorVT(*DAG.getContext(), MatchingElementType, - N0VT.getVectorNumElements()); - + EVT MatchingVectorType = N0VT.changeVectorElementTypeToInteger(); if (SVT == MatchingVectorType) { - SDValue VsetCC = DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, + SDValue VsetCC = DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); } } @@ -4534,24 +4676,26 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { SDValue NegOne = DAG.getConstant(APInt::getAllOnesValue(ElementWidth), VT); SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), NegOne, DAG.getConstant(0, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!VT.isVector() && (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))) - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, - DAG.getSetCC(N->getDebugLoc(), - TLI.getSetCCResultType(VT), - N0.getOperand(0), N0.getOperand(1), - cast(N0.getOperand(2))->get()), - NegOne, DAG.getConstant(0, VT)); + if (!VT.isVector() && + (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(VT)))) { + return DAG.getSelect(SDLoc(N), VT, + DAG.getSetCC(SDLoc(N), + getSetCCResultType(VT), + N0.getOperand(0), N0.getOperand(1), + cast(N0.getOperand(2))->get()), + NegOne, DAG.getConstant(0, VT)); + } } // fold (sext x) -> (zext x) if the sign bit is known zero. if ((!LegalOperations || TLI.isOperationLegal(ISD::ZERO_EXTEND, VT)) && DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); return SDValue(); } @@ -4600,11 +4744,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // fold (zext c1) -> c1 if (isa(N0)) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0); // fold (zext (zext x)) -> (zext x) // fold (zext (aext x)) -> (zext x) if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, N0.getOperand(0)); // fold (zext (truncate x)) -> (zext x) or @@ -4623,9 +4767,9 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { VT.getSizeInBits())); if (TruncatedBits == (KnownZero & TruncatedBits)) { if (VT.bitsGT(Op.getValueType())) - return DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N), VT, Op); if (VT.bitsLT(Op.getValueType())) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); return Op; } @@ -4665,13 +4809,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { SDValue Op = N0.getOperand(0); if (Op.getValueType().bitsLT(VT)) { - Op = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, Op); AddToWorkList(Op.getNode()); } else if (Op.getValueType().bitsGT(VT)) { - Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op); + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Op); AddToWorkList(Op.getNode()); } - return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), + return DAG.getZeroExtendInReg(Op, SDLoc(N), N0.getValueType().getScalarType()); } @@ -4685,13 +4829,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { !TLI.isZExtFree(N0.getValueType(), VT))) { SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(X), VT, X); } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); } APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(Mask, VT)); } @@ -4708,18 +4852,16 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4741,23 +4883,20 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0.getOperand(0), ISD::ZERO_EXTEND, SetCCs, TLI); if (DoXform) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, LN0->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(LN0), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), LN0->getMemoryVT(), - LN0->isVolatile(), - LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getMemOperand()); APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - SDValue And = DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + SDValue And = DAG.getNode(N0.getOpcode(), SDLoc(N), VT, ExtLoad, DAG.getConstant(Mask, VT)); SDValue Trunc = DAG.getNode(ISD::TRUNCATE, - N0.getOperand(0).getDebugLoc(), + SDLoc(N0.getOperand(0)), N0.getOperand(0).getValueType(), ExtLoad); CombineTo(N, And); CombineTo(N0.getOperand(0).getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ZERO_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4772,15 +4911,13 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT MemVT = LN0->getMemoryVT(); if ((!LegalOperations && !LN0->isVolatile()) || TLI.isLoadExtLegal(ISD::ZEXTLOAD, MemVT)) { - SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::ZEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), MemVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), N0.getValueType(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4801,11 +4938,11 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { // for that matter). Check to see that they are the same size. If so, // we know that the element size of the sext'd result matches the // element size of the compare operands. - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()), - DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &OneOps[0], OneOps.size())); // If the desired elements are smaller or larger than the source @@ -4818,18 +4955,18 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, - DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT), - DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, + DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT), + DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &OneOps[0], OneOps.size())); } // zext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; @@ -4852,7 +4989,7 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) { return SDValue(); } - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Ensure that the shift amount is wide enough for the shifted value. if (VT.getSizeInBits() >= 256) @@ -4872,14 +5009,14 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // fold (aext c1) -> c1 if (isa(N0)) - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, N0); // fold (aext (aext x)) -> (aext x) // fold (aext (zext x)) -> (zext x) // fold (aext (sext x)) -> (sext x) if (N0.getOpcode() == ISD::ANY_EXTEND || N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND) - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); // fold (aext (truncate (load x))) -> (aext (smaller load x)) // fold (aext (truncate (srl (load x), c))) -> (aext (small load (x+c/n))) @@ -4902,8 +5039,8 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { if (TruncOp.getValueType() == VT) return TruncOp; // x iff x size == zext size. if (TruncOp.getValueType().bitsGT(VT)) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, TruncOp); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, TruncOp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, TruncOp); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, TruncOp); } // Fold (aext (and (trunc x), cst)) -> (and x, cst) @@ -4915,13 +5052,13 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.getValueType())) { SDValue X = N0.getOperand(0).getOperand(0); if (X.getValueType().bitsLT(VT)) { - X = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, X); } else if (X.getValueType().bitsGT(VT)) { - X = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, X); } APInt Mask = cast(N0.getOperand(1))->getAPIntValue(); Mask = Mask.zext(VT.getSizeInBits()); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, X, DAG.getConstant(Mask, VT)); } @@ -4938,17 +5075,15 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { DoXform = ExtendUsesToFormExtLoad(N, N0, ISD::ANY_EXTEND, SetCCs, TLI); if (DoXform) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); - SDValue Trunc = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + SDValue Trunc = DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad); CombineTo(N0.getNode(), Trunc, ExtLoad.getValue(1)); - ExtendSetCCUses(SetCCs, Trunc, ExtLoad, N->getDebugLoc(), + ExtendSetCCUses(SetCCs, Trunc, ExtLoad, SDLoc(N), ISD::ANY_EXTEND); return SDValue(N, 0); // Return N so it doesn't get rechecked! } @@ -4962,14 +5097,12 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { N0.hasOneUse()) { LoadSDNode *LN0 = cast(N0); EVT MemVT = LN0->getMemoryVT(); - SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), N->getDebugLoc(), + SDValue ExtLoad = DAG.getExtLoad(LN0->getExtensionType(), SDLoc(N), VT, LN0->getChain(), LN0->getBasePtr(), - LN0->getPointerInfo(), MemVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + MemVT, LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), + DAG.getNode(ISD::TRUNCATE, SDLoc(N0), N0.getValueType(), ExtLoad), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -4986,7 +5119,7 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { // we know that the element size of the sext'd result matches the // element size of the compare operands. if (VT.getSizeInBits() == N0VT.getSizeInBits()) - return DAG.getSetCC(N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getSetCC(SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); // If the desired elements are smaller or larger than the source @@ -5000,16 +5133,16 @@ SDValue DAGCombiner::visitANY_EXTEND(SDNode *N) { EVT::getVectorVT(*DAG.getContext(), MatchingElementType, N0VT.getVectorNumElements()); SDValue VsetCC = - DAG.getSetCC(N->getDebugLoc(), MatchingVectorType, N0.getOperand(0), + DAG.getSetCC(SDLoc(N), MatchingVectorType, N0.getOperand(0), N0.getOperand(1), cast(N0.getOperand(2))->get()); - return DAG.getSExtOrTrunc(VsetCC, N->getDebugLoc(), VT); + return DAG.getSExtOrTrunc(VsetCC, SDLoc(N), VT); } } // aext(setcc x,y,cc) -> select_cc x, y, 1, 0, cc SDValue SCC = - SimplifySelectCC(N->getDebugLoc(), N0.getOperand(0), N0.getOperand(1), + SimplifySelectCC(SDLoc(N), N0.getOperand(0), N0.getOperand(1), DAG.getConstant(1, VT), DAG.getConstant(0, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) @@ -5030,9 +5163,8 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { assert(CV != 0 && "Const value should be ConstSDNode."); const APInt &CVal = CV->getAPIntValue(); APInt NewVal = CVal & Mask; - if (NewVal != CVal) { + if (NewVal != CVal) return DAG.getConstant(NewVal, V.getValueType()); - } break; } case ISD::OR: @@ -5056,7 +5188,7 @@ SDValue DAGCombiner::GetDemandedBits(SDValue V, const APInt &Mask) { APInt NewMask = Mask << Amt; SDValue SimplifyLHS = GetDemandedBits(V.getOperand(0), NewMask); if (SimplifyLHS.getNode()) - return DAG.getNode(ISD::SRL, V.getDebugLoc(), V.getValueType(), + return DAG.getNode(ISD::SRL, SDLoc(V), V.getValueType(), SimplifyLHS, V.getOperand(1)); } } @@ -5160,12 +5292,19 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // For the transform to be legal, the load must produce only two values // (the value loaded and the chain). Don't transform a pre-increment - // load, for example, which produces an extra value. Otherwise the + // load, for example, which produces an extra value. Otherwise the // transformation is not equivalent, and the downstream logic to replace // uses gets things wrong. if (LN0->getNumValues() > 2) return SDValue(); + // If the load that we're shrinking is an extload and we're not just + // discarding the extension we can't simply shrink the load. Bail. + // TODO: It would be possible to merge the extensions in some cases. + if (LN0->getExtensionType() != ISD::NON_EXTLOAD && + LN0->getMemoryVT().getSizeInBits() < ExtVT.getSizeInBits() + ShAmt) + return SDValue(); + EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) @@ -5182,22 +5321,22 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { uint64_t PtrOff = ShAmt / 8; unsigned NewAlign = MinAlign(LN0->getAlignment(), PtrOff); - SDValue NewPtr = DAG.getNode(ISD::ADD, LN0->getDebugLoc(), + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LN0), PtrType, LN0->getBasePtr(), DAG.getConstant(PtrOff, PtrType)); AddToWorkList(NewPtr.getNode()); SDValue Load; if (ExtType == ISD::NON_EXTLOAD) - Load = DAG.getLoad(VT, N0.getDebugLoc(), LN0->getChain(), NewPtr, + Load = DAG.getLoad(VT, SDLoc(N0), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), NewAlign); + LN0->isInvariant(), NewAlign, LN0->getTBAAInfo()); else - Load = DAG.getExtLoad(ExtType, N0.getDebugLoc(), VT, LN0->getChain(),NewPtr, + Load = DAG.getExtLoad(ExtType, SDLoc(N0), VT, LN0->getChain(),NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), ExtVT, LN0->isVolatile(), LN0->isNonTemporal(), - NewAlign); + NewAlign, LN0->getTBAAInfo()); // Replace the old load's chain with the new load's chain. WorkListRemover DeadNodes(*this); @@ -5216,7 +5355,7 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { if (ShLeftAmt >= VT.getSizeInBits()) Result = DAG.getConstant(0, VT); else - Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, + Result = DAG.getNode(ISD::SHL, SDLoc(N0), VT, Result, DAG.getConstant(ShLeftAmt, ShImmTy)); } @@ -5234,7 +5373,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg c1) -> c1 if (isa(N0) || N0.getOpcode() == ISD::UNDEF) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0, N1); // If the input is already sign extended, just drop the extension. if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1) @@ -5242,10 +5381,9 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2 if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG && - EVT.bitsLT(cast(N0.getOperand(1))->getVT())) { - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + EVT.bitsLT(cast(N0.getOperand(1))->getVT())) + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, N0.getOperand(0), N1); - } // fold (sext_in_reg (sext x)) -> (sext x) // fold (sext_in_reg (aext x)) -> (sext x) @@ -5254,12 +5392,12 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue N00 = N0.getOperand(0); if (N00.getValueType().getScalarType().getSizeInBits() <= EVTBits && (!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND, VT))) - return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1); + return DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, N00, N1); } // fold (sext_in_reg x) -> (zext_in_reg x) if the sign bit is known zero. if (DAG.MaskedValueIsZero(N0, APInt::getBitsSet(VTBits, EVTBits-1, EVTBits))) - return DAG.getZeroExtendInReg(N0, N->getDebugLoc(), EVT); + return DAG.getZeroExtendInReg(N0, SDLoc(N), EVT); // fold operands of sext_in_reg based on knowledge that the top bits are not // demanded. @@ -5282,7 +5420,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { // extended enough. unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0)); if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits) - return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SRA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1)); } } @@ -5294,12 +5432,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); AddToWorkList(ExtLoad.getNode()); @@ -5312,12 +5448,10 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::SEXTLOAD, EVT))) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::SEXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - EVT, - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), EVT, + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -5328,7 +5462,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { SDValue BSwap = MatchBSwapHWordLow(N0.getNode(), N0.getOperand(0), N0.getOperand(1), false); if (BSwap.getNode() != 0) - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), VT, BSwap, N1); } @@ -5345,21 +5479,21 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { return N0; // fold (truncate c1) -> c1 if (isa(N0)) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0); // fold (truncate (truncate x)) -> (truncate x) if (N0.getOpcode() == ISD::TRUNCATE) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // fold (truncate (ext x)) -> (ext x) or (truncate x) or x if (N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) { if (N0.getOperand(0).getValueType().bitsLT(VT)) // if the source is smaller than the dest, we still need an extend - return DAG.getNode(N0.getOpcode(), N->getDebugLoc(), VT, + return DAG.getNode(N0.getOpcode(), SDLoc(N), VT, N0.getOperand(0)); if (N0.getOperand(0).getValueType().bitsGT(VT)) // if the source is larger than the dest, than we just need the truncate - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, N0.getOperand(0)); // if the source and dest are the same type, we can drop both the extend // and the truncate. return N0.getOperand(0); @@ -5391,14 +5525,14 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { SDValue EltNo = N0->getOperand(1); if (isa(EltNo) && isTypeLegal(NVT)) { int Elt = cast(EltNo)->getZExtValue(); - EVT IndexTy = N0->getOperand(1).getValueType(); + EVT IndexTy = TLI.getVectorIdxTy(); int Index = isLE ? (Elt*SizeRatio) : (Elt*SizeRatio + (SizeRatio-1)); - SDValue V = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + SDValue V = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, N0.getOperand(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, - N->getDebugLoc(), TrTy, V, + SDLoc(N), TrTy, V, DAG.getConstant(Index, IndexTy)); } } @@ -5430,7 +5564,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) Opnds.push_back(BuildVect.getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0], + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), VT, &Opnds[0], Opnds.size()); } } @@ -5445,7 +5579,7 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { GetDemandedBits(N0, APInt::getLowBitsSet(N0.getValueSizeInBits(), VT.getSizeInBits())); if (Shorter.getNode()) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Shorter); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, Shorter); } // fold (truncate (load x)) -> (smaller load x) // fold (truncate (srl (load x), c)) -> (smaller load (x+c/evtbits)) @@ -5488,11 +5622,11 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { Opnds.push_back(DAG.getUNDEF(VTs[i])); continue; } - SDValue NV = DAG.getNode(ISD::TRUNCATE, V.getDebugLoc(), VTs[i], V); + SDValue NV = DAG.getNode(ISD::TRUNCATE, SDLoc(V), VTs[i], V); AddToWorkList(NV.getNode()); Opnds.push_back(NV); } - return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, &Opnds[0], Opnds.size()); } } @@ -5538,7 +5672,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { if (NewAlign <= Align && (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) - return DAG.getLoad(VT, N->getDebugLoc(), LD1->getChain(), + return DAG.getLoad(VT, SDLoc(N), LD1->getChain(), LD1->getBasePtr(), LD1->getPointerInfo(), false, false, false, Align); } @@ -5575,7 +5709,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // If the input is a constant, let getNode fold it. if (isa(N0) || isa(N0)) { - SDValue Res = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, N0); + SDValue Res = DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0); if (Res.getNode() != N) { if (!LegalOperations || TLI.isOperationLegal(Res.getNode()->getOpcode(), VT)) @@ -5592,7 +5726,7 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // (conv (conv x, t1), t2) -> (conv x, t2) if (N0.getOpcode() == ISD::BITCAST) - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, N0.getOperand(0)); // fold (conv (load x)) -> (load (conv*)x) @@ -5600,20 +5734,22 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && // Do not change the width of a volatile load. !cast(N0)->isVolatile() && - (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT))) { + (!LegalOperations || TLI.isOperationLegal(ISD::LOAD, VT)) && + TLI.isLoadBitCastBeneficial(N0.getValueType(), VT)) { LoadSDNode *LN0 = cast(N0); unsigned Align = TLI.getDataLayout()-> getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext())); unsigned OrigAlign = LN0->getAlignment(); if (Align <= OrigAlign) { - SDValue Load = DAG.getLoad(VT, N->getDebugLoc(), LN0->getChain(), + SDValue Load = DAG.getLoad(VT, SDLoc(N), LN0->getChain(), LN0->getBasePtr(), LN0->getPointerInfo(), LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), OrigAlign); + LN0->isInvariant(), OrigAlign, + LN0->getTBAAInfo()); AddToWorkList(N); CombineTo(N0.getNode(), - DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + DAG.getNode(ISD::BITCAST, SDLoc(N0), N0.getValueType(), Load), Load.getValue(1)); return Load; @@ -5623,20 +5759,20 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) // This often reduces constant pool loads. - if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(VT)) || - (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(VT))) && + if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || + (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && N0.getNode()->hasOneUse() && VT.isInteger() && !VT.isVector() && !N0.getValueType().isVector()) { - SDValue NewConv = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), VT, + SDValue NewConv = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); AddToWorkList(NewConv.getNode()); APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) - return DAG.getNode(ISD::XOR, N->getDebugLoc(), VT, + return DAG.getNode(ISD::XOR, SDLoc(N), VT, NewConv, DAG.getConstant(SignBit, VT)); assert(N0.getOpcode() == ISD::FABS); - return DAG.getNode(ISD::AND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::AND, SDLoc(N), VT, NewConv, DAG.getConstant(~SignBit, VT)); } @@ -5650,38 +5786,38 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { unsigned OrigXWidth = N0.getOperand(1).getValueType().getSizeInBits(); EVT IntXVT = EVT::getIntegerVT(*DAG.getContext(), OrigXWidth); if (isTypeLegal(IntXVT)) { - SDValue X = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0), IntXVT, N0.getOperand(1)); AddToWorkList(X.getNode()); // If X has a different width than the result/lhs, sext it or truncate it. unsigned VTWidth = VT.getSizeInBits(); if (OrigXWidth < VTWidth) { - X = DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, X); + X = DAG.getNode(ISD::SIGN_EXTEND, SDLoc(N), VT, X); AddToWorkList(X.getNode()); } else if (OrigXWidth > VTWidth) { // To get the sign bit in the right place, we have to shift it right // before truncating. - X = DAG.getNode(ISD::SRL, X.getDebugLoc(), + X = DAG.getNode(ISD::SRL, SDLoc(X), X.getValueType(), X, DAG.getConstant(OrigXWidth-VTWidth, X.getValueType())); AddToWorkList(X.getNode()); - X = DAG.getNode(ISD::TRUNCATE, X.getDebugLoc(), VT, X); + X = DAG.getNode(ISD::TRUNCATE, SDLoc(X), VT, X); AddToWorkList(X.getNode()); } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); - X = DAG.getNode(ISD::AND, X.getDebugLoc(), VT, + X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, VT)); AddToWorkList(X.getNode()); - SDValue Cst = DAG.getNode(ISD::BITCAST, N0.getDebugLoc(), + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0), VT, N0.getOperand(0)); - Cst = DAG.getNode(ISD::AND, Cst.getDebugLoc(), VT, + Cst = DAG.getNode(ISD::AND, SDLoc(Cst), VT, Cst, DAG.getConstant(~SignBit, VT)); AddToWorkList(Cst.getNode()); - return DAG.getNode(ISD::OR, N->getDebugLoc(), VT, X, Cst); + return DAG.getNode(ISD::OR, SDLoc(N), VT, X, Cst); } } @@ -5722,8 +5858,8 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // Due to the FP element handling below calling this routine recursively, // we can end up with a scalar-to-vector node here. if (BV->getOpcode() == ISD::SCALAR_TO_VECTOR) - return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, - DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, + DAG.getNode(ISD::BITCAST, SDLoc(BV), DstEltVT, BV->getOperand(0))); SmallVector Ops; @@ -5732,12 +5868,12 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { // If the vector element type is not legal, the BUILD_VECTOR operands // are promoted and implicitly truncated. Make that explicit here. if (Op.getValueType() != SrcEltVT) - Op = DAG.getNode(ISD::TRUNCATE, BV->getDebugLoc(), SrcEltVT, Op); - Ops.push_back(DAG.getNode(ISD::BITCAST, BV->getDebugLoc(), + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(BV), SrcEltVT, Op); + Ops.push_back(DAG.getNode(ISD::BITCAST, SDLoc(BV), DstEltVT, Op)); AddToWorkList(Ops.back().getNode()); } - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5794,7 +5930,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { } EVT VT = EVT::getVectorVT(*DAG.getContext(), DstEltVT, Ops.size()); - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5821,7 +5957,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { Ops.push_back(DAG.getConstant(ThisVal, DstEltVT)); if (isS2V && i == 0 && j == 0 && ThisVal.zext(SrcBitSize) == OpVal) // Simply turn this into a SCALAR_TO_VECTOR of the new type. - return DAG.getNode(ISD::SCALAR_TO_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(BV), VT, Ops[0]); OpVal = OpVal.lshr(DstBitSize); } @@ -5831,7 +5967,7 @@ ConstantFoldBITCASTofBUILD_VECTOR(SDNode *BV, EVT DstEltVT) { std::reverse(Ops.end()-NumOutputsPerInput, Ops.end()); } - return DAG.getNode(ISD::BUILD_VECTOR, BV->getDebugLoc(), VT, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(BV), VT, &Ops[0], Ops.size()); } @@ -5850,10 +5986,10 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd c1, c2) -> c1 + c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N0); // fold (fadd A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -5861,20 +5997,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // fold (fadd A, (fneg B)) -> (fsub A, B) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options) == 2) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, GetNegatedExpression(N1, DAG, LegalOperations)); // fold (fadd (fneg A), B) -> (fsub B, A) if ((!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FSUB, VT)) && isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options) == 2) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N1, + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N1, GetNegatedExpression(N0, DAG, LegalOperations)); // If allowed, fold (fadd (fadd x, c1), c2) -> (fadd x, (fadd c1, c2)) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FADD && N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FADD, SDLoc(N), VT, N0.getOperand(1), N1)); // No FP constant should be created after legalization as Instruction @@ -5883,22 +6019,20 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { // We don't need test this condition for transformation like following, as // the DAG being transformed implies it is legal to take FP constant as // operand. - // + // // (fadd (fmul c, x), x) -> (fmul c+1, x) - // + // bool AllowNewFpConst = (Level < AfterLegalizeDAG); // If allow, fold (fadd (fneg x), x) -> 0.0 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { + N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) return DAG.getConstantFP(0.0, VT); - } // If allow, fold (fadd x, (fneg x)) -> 0.0 if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && - N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { + N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) return DAG.getConstantFP(0.0, VT); - } // In unsafe math mode, we can fold chains of FADD's of the same value // into multiplications. This transform is not safe in general because @@ -5910,43 +6044,43 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP00 = dyn_cast(N0.getOperand(0)); ConstantFPSDNode *CFP01 = dyn_cast(N0.getOperand(1)); - // (fadd (fmul c, x), x) -> (fmul c+1, x) + // (fadd (fmul c, x), x) -> (fmul x, c+1) if (CFP00 && !CFP01 && N0.getOperand(1) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP00, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); } - // (fadd (fmul x, c), x) -> (fmul c+1, x) + // (fadd (fmul x, c), x) -> (fmul x, c+1) if (CFP01 && !CFP00 && N0.getOperand(0) == N1) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP01, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, NewCFP); } - // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) + // (fadd (fmul c, x), (fadd x, x)) -> (fmul x, c+2) if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(1) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP00, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(1), NewCFP); } - // (fadd (fmul x, c), (fadd x, x)) -> (fmul c+2, x) + // (fadd (fmul x, c), (fadd x, x)) -> (fmul x, c+2) if (CFP01 && !CFP00 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP01, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), NewCFP); } } @@ -5955,98 +6089,93 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); ConstantFPSDNode *CFP11 = dyn_cast(N1.getOperand(1)); - // (fadd x, (fmul c, x)) -> (fmul c+1, x) + // (fadd x, (fmul c, x)) -> (fmul x, c+1) if (CFP10 && !CFP11 && N1.getOperand(1) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP10, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); } - // (fadd x, (fmul x, c)) -> (fmul c+1, x) + // (fadd x, (fmul x, c)) -> (fmul x, c+1) if (CFP11 && !CFP10 && N1.getOperand(0) == N0) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP11, 0), DAG.getConstantFP(1.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, NewCFP); } - // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) - if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(1) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + // (fadd (fadd x, x), (fmul c, x)) -> (fmul x, c+2) + if (CFP10 && !CFP11 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(1) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP10, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N0.getOperand(1), NewCFP); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N1.getOperand(1), NewCFP); } - // (fadd (fadd x, x), (fmul x, c)) -> (fmul c+2, x) - if (CFP11 && !CFP10 && N1.getOpcode() == ISD::FADD && - N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { - SDValue NewCFP = DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, + // (fadd (fadd x, x), (fmul x, c)) -> (fmul x, c+2) + if (CFP11 && !CFP10 && N0.getOpcode() == ISD::FADD && + N0.getOperand(0) == N0.getOperand(1) && + N1.getOperand(0) == N0.getOperand(0)) { + SDValue NewCFP = DAG.getNode(ISD::FADD, SDLoc(N), VT, SDValue(CFP11, 0), DAG.getConstantFP(2.0, VT)); - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N0.getOperand(0), NewCFP); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, + N1.getOperand(0), NewCFP); } } if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP = dyn_cast(N0.getOperand(0)); - // (fadd (fadd x, x), x) -> (fmul 3.0, x) + // (fadd (fadd x, x), x) -> (fmul x, 3.0) if (!CFP && N0.getOperand(0) == N0.getOperand(1) && - (N0.getOperand(0) == N1)) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + (N0.getOperand(0) == N1)) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, DAG.getConstantFP(3.0, VT)); - } } if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); - // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + // (fadd x, (fadd x, x)) -> (fmul x, 3.0) if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, DAG.getConstantFP(3.0, VT)); - } } - // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) + // (fadd (fadd x, x), (fadd x, x)) -> (fmul x, 4.0) if (AllowNewFpConst && N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && - N0.getOperand(0) == N1.getOperand(0)) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0.getOperand(0) == N1.getOperand(0)) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), DAG.getConstantFP(4.0, VT)); - } } // FADD -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fadd (fmul x, y), z) -> (fma x, y, z) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N0.getOperand(0), N0.getOperand(1), N1); - } // fold (fadd x, (fmul y, z)) -> (fma y, z, x) // Note: Commutes FADD operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1.getOperand(0), N1.getOperand(1), N0); - } } return SDValue(); @@ -6058,7 +6187,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // fold vector ops if (VT.isVector()) { @@ -6068,7 +6197,7 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // fold (fsub c1, c2) -> c1-c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FSUB, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FSUB, SDLoc(N), VT, N0, N1); // fold (fsub A, 0) -> A if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -6101,8 +6230,9 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { if (N10 == N0 && isNegatibleForFree(N11, LegalOperations, TLI, &DAG.getTarget().Options)) return GetNegatedExpression(N11, DAG, LegalOperations); - else if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, - &DAG.getTarget().Options)) + + if (N11 == N0 && isNegatibleForFree(N10, LegalOperations, TLI, + &DAG.getTarget().Options)) return GetNegatedExpression(N10, DAG, LegalOperations); } } @@ -6110,27 +6240,25 @@ SDValue DAGCombiner::visitFSUB(SDNode *N) { // FSUB -> FMA combines: if ((DAG.getTarget().Options.AllowFPOpFusion == FPOpFusion::Fast || DAG.getTarget().Options.UnsafeFPMath) && - DAG.getTarget().getTargetLowering()->isFMAFasterThanMulAndAdd(VT) && - TLI.isOperationLegalOrCustom(ISD::FMA, VT)) { + DAG.getTarget().getTargetLowering()->isFMAFasterThanFMulAndFAdd(VT) && + (!LegalOperations || TLI.isOperationLegalOrCustom(ISD::FMA, VT))) { // fold (fsub (fmul x, y), z) -> (fma x, y, (fneg z)) - if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) { + if (N0.getOpcode() == ISD::FMUL && N0->hasOneUse()) return DAG.getNode(ISD::FMA, dl, VT, N0.getOperand(0), N0.getOperand(1), DAG.getNode(ISD::FNEG, dl, VT, N1)); - } // fold (fsub x, (fmul y, z)) -> (fma (fneg y), z, x) // Note: Commutes FSUB operands. - if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) { + if (N1.getOpcode() == ISD::FMUL && N1->hasOneUse()) return DAG.getNode(ISD::FMA, dl, VT, DAG.getNode(ISD::FNEG, dl, VT, N1.getOperand(0)), N1.getOperand(1), N0); - } - // fold (fsub (-(fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) - if (N0.getOpcode() == ISD::FNEG && + // fold (fsub (fneg (fmul, x, y)), z) -> (fma (fneg x), y, (fneg z)) + if (N0.getOpcode() == ISD::FNEG && N0.getOperand(0).getOpcode() == ISD::FMUL && N0->hasOneUse() && N0.getOperand(0).hasOneUse()) { SDValue N00 = N0.getOperand(0).getOperand(0); @@ -6160,10 +6288,10 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { // fold (fmul c1, c2) -> c1*c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, N1); // canonicalize constant to RHS if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N1, N0); + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N1, N0); // fold (fmul A, 0) -> 0 if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N1CFP->getValueAPF().isZero()) @@ -6177,21 +6305,21 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { return N0; // fold (fmul X, 2.0) -> (fadd X, X) if (N1CFP && N1CFP->isExactlyValue(+2.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N0); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N0); // fold (fmul X, -1.0) -> (fneg X) if (N1CFP && N1CFP->isExactlyValue(-1.0)) if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0); // fold (fmul (fneg X), (fneg Y)) -> (fmul X, Y) if (char LHSNeg = isNegatibleForFree(N0, LegalOperations, TLI, &DAG.getTarget().Options)) { - if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, + if (char RHSNeg = isNegatibleForFree(N1, LegalOperations, TLI, &DAG.getTarget().Options)) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations)); } @@ -6201,8 +6329,8 @@ SDValue DAGCombiner::visitFMUL(SDNode *N) { if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0.getOpcode() == ISD::FMUL && N0.getNode()->hasOneUse() && isa(N0.getOperand(1))) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0.getOperand(0), - DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), + DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(1), N1)); return SDValue(); @@ -6215,7 +6343,7 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { ConstantFPSDNode *N0CFP = dyn_cast(N0); ConstantFPSDNode *N1CFP = dyn_cast(N1); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (DAG.getTarget().Options.UnsafeFPMath) { if (N0CFP && N0CFP->isZero()) @@ -6224,13 +6352,13 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { return N2; } if (N0CFP && N0CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N1, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N1, N2); if (N1CFP && N1CFP->isExactlyValue(1.0)) - return DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0, N2); + return DAG.getNode(ISD::FADD, SDLoc(N), VT, N0, N2); // Canonicalize (fma c, x, y) -> (fma x, c, y) if (N0CFP && !N1CFP) - return DAG.getNode(ISD::FMA, N->getDebugLoc(), VT, N1, N0, N2); + return DAG.getNode(ISD::FMA, SDLoc(N), VT, N1, N0, N2); // (fma x, c1, (fmul x, c2)) -> (fmul x, c1+c2) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && @@ -6267,21 +6395,17 @@ SDValue DAGCombiner::visitFMA(SDNode *N) { } // (fma x, c, x) -> (fmul x, (c+1)) - if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) { - return DAG.getNode(ISD::FMUL, dl, VT, - N0, + if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && N0 == N2) + return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(1.0, VT))); - } // (fma x, c, (fneg x)) -> (fmul x, (c-1)) if (DAG.getTarget().Options.UnsafeFPMath && N1CFP && - N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, dl, VT, - N0, + N2.getOpcode() == ISD::FNEG && N2.getOperand(0) == N0) + return DAG.getNode(ISD::FMUL, dl, VT, N0, DAG.getNode(ISD::FADD, dl, VT, N1, DAG.getConstantFP(-1.0, VT))); - } return SDValue(); @@ -6303,7 +6427,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // fold (fdiv c1, c2) -> c1/c2 if (N0CFP && N1CFP) - return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, N0, N1); // fold (fdiv X, c2) -> fmul X, 1/c2 if losing precision is acceptable. if (N1CFP && DAG.getTarget().Options.UnsafeFPMath) { @@ -6320,7 +6444,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT) || TLI.isOperationLegal(llvm::ISD::ConstantFP, VT) || TLI.isFPImmLegal(Recip, VT))) - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, N0, + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0, DAG.getConstantFP(Recip, VT)); } @@ -6332,7 +6456,7 @@ SDValue DAGCombiner::visitFDIV(SDNode *N) { // Both can be negated for free, check to see if at least one is cheaper // negated. if (LHSNeg == 2 || RHSNeg == 2) - return DAG.getNode(ISD::FDIV, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FDIV, SDLoc(N), VT, GetNegatedExpression(N0, DAG, LegalOperations), GetNegatedExpression(N1, DAG, LegalOperations)); } @@ -6350,7 +6474,7 @@ SDValue DAGCombiner::visitFREM(SDNode *N) { // fold (frem c1, c2) -> fmod(c1,c2) if (N0CFP && N1CFP) - return DAG.getNode(ISD::FREM, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FREM, SDLoc(N), VT, N0, N1); return SDValue(); } @@ -6363,7 +6487,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { EVT VT = N->getValueType(0); if (N0CFP && N1CFP) // Constant fold - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1); if (N1CFP) { const APFloat& V = N1CFP->getValueAPF(); @@ -6371,11 +6495,11 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, c1) -> fneg(fabs(x)) iff isneg(c1) if (!V.isNegative()) { if (!LegalOperations || TLI.isOperationLegal(ISD::FABS, VT)) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); } else { if (!LegalOperations || TLI.isOperationLegal(ISD::FNEG, VT)) - return DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, - DAG.getNode(ISD::FABS, N0.getDebugLoc(), VT, N0)); + return DAG.getNode(ISD::FNEG, SDLoc(N), VT, + DAG.getNode(ISD::FABS, SDLoc(N0), VT, N0)); } } @@ -6384,22 +6508,22 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(copysign(x,z), y) -> copysign(x, y) if (N0.getOpcode() == ISD::FABS || N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0.getOperand(0), N1); // copysign(x, abs(y)) -> abs(x) if (N1.getOpcode() == ISD::FABS) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // copysign(x, copysign(y,z)) -> copysign(x, z) if (N1.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(1)); // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); return SDValue(); @@ -6416,7 +6540,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) - return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and SINT_TO_FP is not legal on this target, // but UINT_TO_FP is legal on this target, try to convert. @@ -6424,7 +6548,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::UINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to UINT_TO_FP. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); } // The next optimizations are desireable only if SELECT_CC can be lowered. @@ -6442,7 +6566,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(-1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } // fold (sint_to_fp (zext (setcc x, y, cc))) -> @@ -6455,7 +6579,7 @@ SDValue DAGCombiner::visitSINT_TO_FP(SDNode *N) { { N0.getOperand(0).getOperand(0), N0.getOperand(0).getOperand(1), DAG.getConstantFP(1.0, VT) , DAG.getConstantFP(0.0, VT), N0.getOperand(0).getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } } @@ -6473,7 +6597,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { // ...but only if the target supports immediate floating-point values (!LegalOperations || TLI.isOperationLegalOrCustom(llvm::ISD::ConstantFP, VT))) - return DAG.getNode(ISD::UINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::UINT_TO_FP, SDLoc(N), VT, N0); // If the input is a legal type, and UINT_TO_FP is not legal on this target, // but SINT_TO_FP is legal on this target, try to convert. @@ -6481,7 +6605,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::SINT_TO_FP, OpVT)) { // If the sign bit is known to be zero, we can change this to SINT_TO_FP. if (DAG.SignBitIsZero(N0)) - return DAG.getNode(ISD::SINT_TO_FP, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::SINT_TO_FP, SDLoc(N), VT, N0); } // The next optimizations are desireable only if SELECT_CC can be lowered. @@ -6499,7 +6623,7 @@ SDValue DAGCombiner::visitUINT_TO_FP(SDNode *N) { { N0.getOperand(0), N0.getOperand(1), DAG.getConstantFP(1.0, VT), DAG.getConstantFP(0.0, VT), N0.getOperand(2) }; - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), VT, Ops, 5); + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), VT, Ops, 5); } } @@ -6513,7 +6637,7 @@ SDValue DAGCombiner::visitFP_TO_SINT(SDNode *N) { // fold (fp_to_sint c1fp) -> c1 if (N0CFP) - return DAG.getNode(ISD::FP_TO_SINT, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_TO_SINT, SDLoc(N), VT, N0); return SDValue(); } @@ -6525,7 +6649,7 @@ SDValue DAGCombiner::visitFP_TO_UINT(SDNode *N) { // fold (fp_to_uint c1fp) -> c1 if (N0CFP) - return DAG.getNode(ISD::FP_TO_UINT, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_TO_UINT, SDLoc(N), VT, N0); return SDValue(); } @@ -6538,7 +6662,7 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // fold (fp_round c1fp) -> c1fp if (N0CFP) - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0, N1); // fold (fp_round (fp_extend x)) -> x if (N0.getOpcode() == ISD::FP_EXTEND && VT == N0.getOperand(0).getValueType()) @@ -6549,16 +6673,16 @@ SDValue DAGCombiner::visitFP_ROUND(SDNode *N) { // This is a value preserving truncation if both round's are. bool IsTrunc = N->getConstantOperandVal(1) == 1 && N0.getNode()->getConstantOperandVal(1) == 1; - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, N0.getOperand(0), + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, N0.getOperand(0), DAG.getIntPtrConstant(IsTrunc)); } // fold (fp_round (copysign X, Y)) -> (copysign (fp_round X), Y) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse()) { - SDValue Tmp = DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), VT, + SDValue Tmp = DAG.getNode(ISD::FP_ROUND, SDLoc(N0), VT, N0.getOperand(0), N1); AddToWorkList(Tmp.getNode()); - return DAG.getNode(ISD::FCOPYSIGN, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, Tmp, N0.getOperand(1)); } @@ -6574,7 +6698,7 @@ SDValue DAGCombiner::visitFP_ROUND_INREG(SDNode *N) { // fold (fp_round_inreg c1fp) -> c1fp if (N0CFP && isTypeLegal(EVT)) { SDValue Round = DAG.getConstantFP(*N0CFP->getConstantFPValue(), EVT); - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, Round); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, Round); } return SDValue(); @@ -6592,7 +6716,7 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { // fold (fp_extend c1fp) -> c1fp if (N0CFP) - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, N0); // Turn fp_extend(fp_round(X, 1)) -> x since the fp_round doesn't affect the // value of X. @@ -6601,25 +6725,23 @@ SDValue DAGCombiner::visitFP_EXTEND(SDNode *N) { SDValue In = N0.getOperand(0); if (In.getValueType() == VT) return In; if (VT.bitsLT(In.getValueType())) - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), VT, + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), VT, In, N0.getOperand(1)); - return DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), VT, In); + return DAG.getNode(ISD::FP_EXTEND, SDLoc(N), VT, In); } // fold (fpext (load x)) -> (fpext (fptrunc (extload x))) - if (ISD::isNON_EXTLoad(N0.getNode()) && N0.hasOneUse() && + if (ISD::isNormalLoad(N0.getNode()) && N0.hasOneUse() && ((!LegalOperations && !cast(N0)->isVolatile()) || TLI.isLoadExtLegal(ISD::EXTLOAD, N0.getValueType()))) { LoadSDNode *LN0 = cast(N0); - SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, N->getDebugLoc(), VT, + SDValue ExtLoad = DAG.getExtLoad(ISD::EXTLOAD, SDLoc(N), VT, LN0->getChain(), - LN0->getBasePtr(), LN0->getPointerInfo(), - N0.getValueType(), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->getAlignment()); + LN0->getBasePtr(), N0.getValueType(), + LN0->getMemOperand()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), - DAG.getNode(ISD::FP_ROUND, N0.getDebugLoc(), + DAG.getNode(ISD::FP_ROUND, SDLoc(N0), N0.getValueType(), ExtLoad, DAG.getIntPtrConstant(1)), ExtLoad.getValue(1)); return SDValue(N, 0); // Return N so it doesn't get rechecked! @@ -6650,10 +6772,10 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { - Int = DAG.getNode(ISD::XOR, N0.getDebugLoc(), IntVT, Int, + Int = DAG.getNode(ISD::XOR, SDLoc(N0), IntVT, Int, DAG.getConstant(APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Int); } } @@ -6661,12 +6783,11 @@ SDValue DAGCombiner::visitFNEG(SDNode *N) { // (fneg (fmul c, x)) -> (fmul -c, x) if (N0.getOpcode() == ISD::FMUL) { ConstantFPSDNode *CFP1 = dyn_cast(N0.getOperand(1)); - if (CFP1) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + if (CFP1) + return DAG.getNode(ISD::FMUL, SDLoc(N), VT, N0.getOperand(0), - DAG.getNode(ISD::FNEG, N->getDebugLoc(), VT, + DAG.getNode(ISD::FNEG, SDLoc(N), VT, N0.getOperand(1))); - } } return SDValue(); @@ -6679,7 +6800,7 @@ SDValue DAGCombiner::visitFCEIL(SDNode *N) { // fold (fceil c1) -> fceil(c1) if (N0CFP) - return DAG.getNode(ISD::FCEIL, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FCEIL, SDLoc(N), VT, N0); return SDValue(); } @@ -6691,7 +6812,7 @@ SDValue DAGCombiner::visitFTRUNC(SDNode *N) { // fold (ftrunc c1) -> ftrunc(c1) if (N0CFP) - return DAG.getNode(ISD::FTRUNC, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FTRUNC, SDLoc(N), VT, N0); return SDValue(); } @@ -6703,7 +6824,7 @@ SDValue DAGCombiner::visitFFLOOR(SDNode *N) { // fold (ffloor c1) -> ffloor(c1) if (N0CFP) - return DAG.getNode(ISD::FFLOOR, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FFLOOR, SDLoc(N), VT, N0); return SDValue(); } @@ -6720,28 +6841,28 @@ SDValue DAGCombiner::visitFABS(SDNode *N) { // fold (fabs c1) -> fabs(c1) if (N0CFP) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0); // fold (fabs (fabs x)) -> (fabs x) if (N0.getOpcode() == ISD::FABS) return N->getOperand(0); // fold (fabs (fneg x)) -> (fabs x) // fold (fabs (fcopysign x, y)) -> (fabs x) if (N0.getOpcode() == ISD::FNEG || N0.getOpcode() == ISD::FCOPYSIGN) - return DAG.getNode(ISD::FABS, N->getDebugLoc(), VT, N0.getOperand(0)); + return DAG.getNode(ISD::FABS, SDLoc(N), VT, N0.getOperand(0)); // Transform fabs(bitconvert(x)) -> bitconvert(x&~sign) to avoid loading // constant pool values. - if (!TLI.isFAbsFree(VT) && + if (!TLI.isFAbsFree(VT) && N0.getOpcode() == ISD::BITCAST && N0.getNode()->hasOneUse() && N0.getOperand(0).getValueType().isInteger() && !N0.getOperand(0).getValueType().isVector()) { SDValue Int = N0.getOperand(0); EVT IntVT = Int.getValueType(); if (IntVT.isInteger() && !IntVT.isVector()) { - Int = DAG.getNode(ISD::AND, N0.getDebugLoc(), IntVT, Int, + Int = DAG.getNode(ISD::AND, SDLoc(N0), IntVT, Int, DAG.getConstant(~APInt::getSignBit(IntVT.getSizeInBits()), IntVT)); AddToWorkList(Int.getNode()); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Int); } } @@ -6765,7 +6886,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (N1.getOpcode() == ISD::SETCC && TLI.isOperationLegalOrCustom(ISD::BR_CC, N1.getOperand(0).getValueType())) { - return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, Chain, N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2); } @@ -6811,12 +6932,12 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (AndConst.isPowerOf2() && cast(Op1)->getAPIntValue()==AndConst.logBase2()) { SDValue SetCC = - DAG.getSetCC(N->getDebugLoc(), - TLI.getSetCCResultType(Op0.getValueType()), + DAG.getSetCC(SDLoc(N), + getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETNE); - SDValue NewBRCond = DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + SDValue NewBRCond = DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); // Don't add the new BRCond into the worklist or else SimplifySelectCC // will convert it back to (X & C1) >> C2. @@ -6861,7 +6982,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { DAG.ReplaceAllUsesOfValueWith(N1, Tmp); removeFromWorkList(TheXor); DAG.DeleteNode(TheXor); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, Tmp, N2); } @@ -6882,8 +7003,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { EVT SetCCVT = N1.getValueType(); if (LegalTypes) - SetCCVT = TLI.getSetCCResultType(SetCCVT); - SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(), + SetCCVT = getSetCCResultType(SetCCVT); + SDValue SetCC = DAG.getSetCC(SDLoc(TheXor), SetCCVT, Op0, Op1, Equal ? ISD::SETEQ : ISD::SETNE); @@ -6892,7 +7013,7 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { DAG.ReplaceAllUsesOfValueWith(N1, SetCC); removeFromWorkList(N1.getNode()); DAG.DeleteNode(N1.getNode()); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + return DAG.getNode(ISD::BRCOND, SDLoc(N), MVT::Other, Chain, SetCC, N2); } } @@ -6913,14 +7034,14 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) { // MachineBasicBlock CFG, which is awkward. // Use SimplifySetCC to simplify SETCC's. - SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()), - CondLHS, CondRHS, CC->get(), N->getDebugLoc(), + SDValue Simp = SimplifySetCC(getSetCCResultType(CondLHS.getValueType()), + CondLHS, CondRHS, CC->get(), SDLoc(N), false); if (Simp.getNode()) AddToWorkList(Simp.getNode()); // fold to a simpler setcc if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC) - return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::BR_CC, SDLoc(N), MVT::Other, N->getOperand(0), Simp.getOperand(2), Simp.getOperand(0), Simp.getOperand(1), N->getOperand(4)); @@ -7118,10 +7239,10 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDValue Result; if (isLoad) - Result = DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + Result = DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); else - Result = DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + Result = DAG.getIndexedStore(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); ++PreIndexedNodes; ++NodesCombined; @@ -7156,7 +7277,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // x0 * offset0 + y0 * ptr0 = t0 // knowing that // x1 * offset1 + y1 * ptr0 = t1 (the indexed load/store) - // + // // where x0, x1, y0 and y1 in {-1, 1} are given by the types of the // indexed load/store and the expresion that needs to be re-written. // @@ -7186,7 +7307,7 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { SDValue NewOp2 = Result.getValue(isLoad ? 1 : 0); SDValue NewUse = DAG.getNode(Opcode, - OtherUses[i]->getDebugLoc(), + SDLoc(OtherUses[i]), OtherUses[i]->getValueType(0), NewOp1, NewOp2); DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); removeFromWorkList(OtherUses[i]); @@ -7278,7 +7399,7 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { for (SDNode::use_iterator III = Use->use_begin(), EEE = Use->use_end(); III != EEE; ++III) { SDNode *UseUse = *III; - if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) + if (!canFoldInAddressingMode(Use, UseUse, DAG, TLI)) RealUse = true; } @@ -7295,9 +7416,9 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) { // Check for #2 if (!Op->isPredecessorOf(N) && !N->isPredecessorOf(Op)) { SDValue Result = isLoad - ? DAG.getIndexedLoad(SDValue(N,0), N->getDebugLoc(), + ? DAG.getIndexedLoad(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM) - : DAG.getIndexedStore(SDValue(N,0), N->getDebugLoc(), + : DAG.getIndexedStore(SDValue(N,0), SDLoc(N), BasePtr, Offset, AM); ++PostIndexedNodes; ++NodesCombined; @@ -7403,17 +7524,20 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > LD->getMemOperand()->getBaseAlignment()) { SDValue NewLoad = - DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + DAG.getExtLoad(LD->getExtensionType(), SDLoc(N), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), - LD->isVolatile(), LD->isNonTemporal(), Align); + LD->isVolatile(), LD->isNonTemporal(), Align, + LD->getTBAAInfo()); return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); } } } - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -7423,22 +7547,17 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Replace the chain to void dependency. if (LD->getExtensionType() == ISD::NON_EXTLOAD) { - ReplLoad = DAG.getLoad(N->getValueType(0), LD->getDebugLoc(), - BetterChain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + ReplLoad = DAG.getLoad(N->getValueType(0), SDLoc(LD), + BetterChain, Ptr, LD->getMemOperand()); } else { - ReplLoad = DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), + ReplLoad = DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), LD->getValueType(0), - BetterChain, Ptr, LD->getPointerInfo(), - LD->getMemoryVT(), - LD->isVolatile(), - LD->isNonTemporal(), - LD->getAlignment()); + BetterChain, Ptr, LD->getMemoryVT(), + LD->getMemOperand()); } // Create token factor to keep old chain connected. - SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplLoad.getValue(1)); // Make sure the new and old chains are cleaned up. @@ -7454,9 +7573,562 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { if (CombineToPreIndexedLoadStore(N) || CombineToPostIndexedLoadStore(N)) return SDValue(N, 0); + // Try to slice up N to more direct loads if the slices are mapped to + // different register banks or pairing can take place. + if (SliceUpLoad(N)) + return SDValue(N, 0); + return SDValue(); } +namespace { +/// \brief Helper structure used to slice a load in smaller loads. +/// Basically a slice is obtained from the following sequence: +/// Origin = load Ty1, Base +/// Shift = srl Ty1 Origin, CstTy Amount +/// Inst = trunc Shift to Ty2 +/// +/// Then, it will be rewriten into: +/// Slice = load SliceTy, Base + SliceOffset +/// [Inst = zext Slice to Ty2], only if SliceTy <> Ty2 +/// +/// SliceTy is deduced from the number of bits that are actually used to +/// build Inst. +struct LoadedSlice { + /// \brief Helper structure used to compute the cost of a slice. + struct Cost { + /// Are we optimizing for code size. + bool ForCodeSize; + /// Various cost. + unsigned Loads; + unsigned Truncates; + unsigned CrossRegisterBanksCopies; + unsigned ZExts; + unsigned Shift; + + Cost(bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(0), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) {} + + /// \brief Get the cost of one isolated slice. + Cost(const LoadedSlice &LS, bool ForCodeSize = false) + : ForCodeSize(ForCodeSize), Loads(1), Truncates(0), + CrossRegisterBanksCopies(0), ZExts(0), Shift(0) { + EVT TruncType = LS.Inst->getValueType(0); + EVT LoadedType = LS.getLoadedType(); + if (TruncType != LoadedType && + !LS.DAG->getTargetLoweringInfo().isZExtFree(LoadedType, TruncType)) + ZExts = 1; + } + + /// \brief Account for slicing gain in the current cost. + /// Slicing provide a few gains like removing a shift or a + /// truncate. This method allows to grow the cost of the original + /// load with the gain from this slice. + void addSliceGain(const LoadedSlice &LS) { + // Each slice saves a truncate. + const TargetLowering &TLI = LS.DAG->getTargetLoweringInfo(); + if (!TLI.isTruncateFree(LS.Inst->getValueType(0), + LS.Inst->getOperand(0).getValueType())) + ++Truncates; + // If there is a shift amount, this slice gets rid of it. + if (LS.Shift) + ++Shift; + // If this slice can merge a cross register bank copy, account for it. + if (LS.canMergeExpensiveCrossRegisterBankCopy()) + ++CrossRegisterBanksCopies; + } + + Cost &operator+=(const Cost &RHS) { + Loads += RHS.Loads; + Truncates += RHS.Truncates; + CrossRegisterBanksCopies += RHS.CrossRegisterBanksCopies; + ZExts += RHS.ZExts; + Shift += RHS.Shift; + return *this; + } + + bool operator==(const Cost &RHS) const { + return Loads == RHS.Loads && Truncates == RHS.Truncates && + CrossRegisterBanksCopies == RHS.CrossRegisterBanksCopies && + ZExts == RHS.ZExts && Shift == RHS.Shift; + } + + bool operator!=(const Cost &RHS) const { return !(*this == RHS); } + + bool operator<(const Cost &RHS) const { + // Assume cross register banks copies are as expensive as loads. + // FIXME: Do we want some more target hooks? + unsigned ExpensiveOpsLHS = Loads + CrossRegisterBanksCopies; + unsigned ExpensiveOpsRHS = RHS.Loads + RHS.CrossRegisterBanksCopies; + // Unless we are optimizing for code size, consider the + // expensive operation first. + if (!ForCodeSize && ExpensiveOpsLHS != ExpensiveOpsRHS) + return ExpensiveOpsLHS < ExpensiveOpsRHS; + return (Truncates + ZExts + Shift + ExpensiveOpsLHS) < + (RHS.Truncates + RHS.ZExts + RHS.Shift + ExpensiveOpsRHS); + } + + bool operator>(const Cost &RHS) const { return RHS < *this; } + + bool operator<=(const Cost &RHS) const { return !(RHS < *this); } + + bool operator>=(const Cost &RHS) const { return !(*this < RHS); } + }; + // The last instruction that represent the slice. This should be a + // truncate instruction. + SDNode *Inst; + // The original load instruction. + LoadSDNode *Origin; + // The right shift amount in bits from the original load. + unsigned Shift; + // The DAG from which Origin came from. + // This is used to get some contextual information about legal types, etc. + SelectionDAG *DAG; + + LoadedSlice(SDNode *Inst = NULL, LoadSDNode *Origin = NULL, + unsigned Shift = 0, SelectionDAG *DAG = NULL) + : Inst(Inst), Origin(Origin), Shift(Shift), DAG(DAG) {} + + LoadedSlice(const LoadedSlice &LS) + : Inst(LS.Inst), Origin(LS.Origin), Shift(LS.Shift), DAG(LS.DAG) {} + + /// \brief Get the bits used in a chunk of bits \p BitWidth large. + /// \return Result is \p BitWidth and has used bits set to 1 and + /// not used bits set to 0. + APInt getUsedBits() const { + // Reproduce the trunc(lshr) sequence: + // - Start from the truncated value. + // - Zero extend to the desired bit width. + // - Shift left. + assert(Origin && "No original load to compare against."); + unsigned BitWidth = Origin->getValueSizeInBits(0); + assert(Inst && "This slice is not bound to an instruction"); + assert(Inst->getValueSizeInBits(0) <= BitWidth && + "Extracted slice is bigger than the whole type!"); + APInt UsedBits(Inst->getValueSizeInBits(0), 0); + UsedBits.setAllBits(); + UsedBits = UsedBits.zext(BitWidth); + UsedBits <<= Shift; + return UsedBits; + } + + /// \brief Get the size of the slice to be loaded in bytes. + unsigned getLoadedSize() const { + unsigned SliceSize = getUsedBits().countPopulation(); + assert(!(SliceSize & 0x7) && "Size is not a multiple of a byte."); + return SliceSize / 8; + } + + /// \brief Get the type that will be loaded for this slice. + /// Note: This may not be the final type for the slice. + EVT getLoadedType() const { + assert(DAG && "Missing context"); + LLVMContext &Ctxt = *DAG->getContext(); + return EVT::getIntegerVT(Ctxt, getLoadedSize() * 8); + } + + /// \brief Get the alignment of the load used for this slice. + unsigned getAlignment() const { + unsigned Alignment = Origin->getAlignment(); + unsigned Offset = getOffsetFromBase(); + if (Offset != 0) + Alignment = MinAlign(Alignment, Alignment + Offset); + return Alignment; + } + + /// \brief Check if this slice can be rewritten with legal operations. + bool isLegal() const { + // An invalid slice is not legal. + if (!Origin || !Inst || !DAG) + return false; + + // Offsets are for indexed load only, we do not handle that. + if (Origin->getOffset().getOpcode() != ISD::UNDEF) + return false; + + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + + // Check that the type is legal. + EVT SliceType = getLoadedType(); + if (!TLI.isTypeLegal(SliceType)) + return false; + + // Check that the load is legal for this type. + if (!TLI.isOperationLegal(ISD::LOAD, SliceType)) + return false; + + // Check that the offset can be computed. + // 1. Check its type. + EVT PtrType = Origin->getBasePtr().getValueType(); + if (PtrType == MVT::Untyped || PtrType.isExtended()) + return false; + + // 2. Check that it fits in the immediate. + if (!TLI.isLegalAddImmediate(getOffsetFromBase())) + return false; + + // 3. Check that the computation is legal. + if (!TLI.isOperationLegal(ISD::ADD, PtrType)) + return false; + + // Check that the zext is legal if it needs one. + EVT TruncateType = Inst->getValueType(0); + if (TruncateType != SliceType && + !TLI.isOperationLegal(ISD::ZERO_EXTEND, TruncateType)) + return false; + + return true; + } + + /// \brief Get the offset in bytes of this slice in the original chunk of + /// bits. + /// \pre DAG != NULL. + uint64_t getOffsetFromBase() const { + assert(DAG && "Missing context."); + bool IsBigEndian = + DAG->getTargetLoweringInfo().getDataLayout()->isBigEndian(); + assert(!(Shift & 0x7) && "Shifts not aligned on Bytes are not supported."); + uint64_t Offset = Shift / 8; + unsigned TySizeInBytes = Origin->getValueSizeInBits(0) / 8; + assert(!(Origin->getValueSizeInBits(0) & 0x7) && + "The size of the original loaded type is not a multiple of a" + " byte."); + // If Offset is bigger than TySizeInBytes, it means we are loading all + // zeros. This should have been optimized before in the process. + assert(TySizeInBytes > Offset && + "Invalid shift amount for given loaded size"); + if (IsBigEndian) + Offset = TySizeInBytes - Offset - getLoadedSize(); + return Offset; + } + + /// \brief Generate the sequence of instructions to load the slice + /// represented by this object and redirect the uses of this slice to + /// this new sequence of instructions. + /// \pre this->Inst && this->Origin are valid Instructions and this + /// object passed the legal check: LoadedSlice::isLegal returned true. + /// \return The last instruction of the sequence used to load the slice. + SDValue loadSlice() const { + assert(Inst && Origin && "Unable to replace a non-existing slice."); + const SDValue &OldBaseAddr = Origin->getBasePtr(); + SDValue BaseAddr = OldBaseAddr; + // Get the offset in that chunk of bytes w.r.t. the endianess. + int64_t Offset = static_cast(getOffsetFromBase()); + assert(Offset >= 0 && "Offset too big to fit in int64_t!"); + if (Offset) { + // BaseAddr = BaseAddr + Offset. + EVT ArithType = BaseAddr.getValueType(); + BaseAddr = DAG->getNode(ISD::ADD, SDLoc(Origin), ArithType, BaseAddr, + DAG->getConstant(Offset, ArithType)); + } + + // Create the type of the loaded slice according to its size. + EVT SliceType = getLoadedType(); + + // Create the load for the slice. + SDValue LastInst = DAG->getLoad( + SliceType, SDLoc(Origin), Origin->getChain(), BaseAddr, + Origin->getPointerInfo().getWithOffset(Offset), Origin->isVolatile(), + Origin->isNonTemporal(), Origin->isInvariant(), getAlignment()); + // If the final type is not the same as the loaded type, this means that + // we have to pad with zero. Create a zero extend for that. + EVT FinalType = Inst->getValueType(0); + if (SliceType != FinalType) + LastInst = + DAG->getNode(ISD::ZERO_EXTEND, SDLoc(LastInst), FinalType, LastInst); + return LastInst; + } + + /// \brief Check if this slice can be merged with an expensive cross register + /// bank copy. E.g., + /// i = load i32 + /// f = bitcast i32 i to float + bool canMergeExpensiveCrossRegisterBankCopy() const { + if (!Inst || !Inst->hasOneUse()) + return false; + SDNode *Use = *Inst->use_begin(); + if (Use->getOpcode() != ISD::BITCAST) + return false; + assert(DAG && "Missing context"); + const TargetLowering &TLI = DAG->getTargetLoweringInfo(); + EVT ResVT = Use->getValueType(0); + const TargetRegisterClass *ResRC = TLI.getRegClassFor(ResVT.getSimpleVT()); + const TargetRegisterClass *ArgRC = + TLI.getRegClassFor(Use->getOperand(0).getValueType().getSimpleVT()); + if (ArgRC == ResRC || !TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // At this point, we know that we perform a cross-register-bank copy. + // Check if it is expensive. + const TargetRegisterInfo *TRI = TLI.getTargetMachine().getRegisterInfo(); + // Assume bitcasts are cheap, unless both register classes do not + // explicitly share a common sub class. + if (!TRI || TRI->getCommonSubClass(ArgRC, ResRC)) + return false; + + // Check if it will be merged with the load. + // 1. Check the alignment constraint. + unsigned RequiredAlignment = TLI.getDataLayout()->getABITypeAlignment( + ResVT.getTypeForEVT(*DAG->getContext())); + + if (RequiredAlignment > getAlignment()) + return false; + + // 2. Check that the load is a legal operation for that type. + if (!TLI.isOperationLegal(ISD::LOAD, ResVT)) + return false; + + // 3. Check that we do not have a zext in the way. + if (Inst->getValueType(0) != getLoadedType()) + return false; + + return true; + } +}; +} + +/// \brief Sorts LoadedSlice according to their offset. +struct LoadedSliceSorter { + bool operator()(const LoadedSlice &LHS, const LoadedSlice &RHS) { + assert(LHS.Origin == RHS.Origin && "Different bases not implemented."); + return LHS.getOffsetFromBase() < RHS.getOffsetFromBase(); + } +}; + +/// \brief Check that all bits set in \p UsedBits form a dense region, i.e., +/// \p UsedBits looks like 0..0 1..1 0..0. +static bool areUsedBitsDense(const APInt &UsedBits) { + // If all the bits are one, this is dense! + if (UsedBits.isAllOnesValue()) + return true; + + // Get rid of the unused bits on the right. + APInt NarrowedUsedBits = UsedBits.lshr(UsedBits.countTrailingZeros()); + // Get rid of the unused bits on the left. + if (NarrowedUsedBits.countLeadingZeros()) + NarrowedUsedBits = NarrowedUsedBits.trunc(NarrowedUsedBits.getActiveBits()); + // Check that the chunk of bits is completely used. + return NarrowedUsedBits.isAllOnesValue(); +} + +/// \brief Check whether or not \p First and \p Second are next to each other +/// in memory. This means that there is no hole between the bits loaded +/// by \p First and the bits loaded by \p Second. +static bool areSlicesNextToEachOther(const LoadedSlice &First, + const LoadedSlice &Second) { + assert(First.Origin == Second.Origin && First.Origin && + "Unable to match different memory origins."); + APInt UsedBits = First.getUsedBits(); + assert((UsedBits & Second.getUsedBits()) == 0 && + "Slices are not supposed to overlap."); + UsedBits |= Second.getUsedBits(); + return areUsedBitsDense(UsedBits); +} + +/// \brief Adjust the \p GlobalLSCost according to the target +/// paring capabilities and the layout of the slices. +/// \pre \p GlobalLSCost should account for at least as many loads as +/// there is in the slices in \p LoadedSlices. +static void adjustCostForPairing(SmallVectorImpl &LoadedSlices, + LoadedSlice::Cost &GlobalLSCost) { + unsigned NumberOfSlices = LoadedSlices.size(); + // If there is less than 2 elements, no pairing is possible. + if (NumberOfSlices < 2) + return; + + // Sort the slices so that elements that are likely to be next to each + // other in memory are next to each other in the list. + std::sort(LoadedSlices.begin(), LoadedSlices.end(), LoadedSliceSorter()); + const TargetLowering &TLI = LoadedSlices[0].DAG->getTargetLoweringInfo(); + // First (resp. Second) is the first (resp. Second) potentially candidate + // to be placed in a paired load. + const LoadedSlice *First = NULL; + const LoadedSlice *Second = NULL; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice, + // Set the beginning of the pair. + First = Second) { + + Second = &LoadedSlices[CurrSlice]; + + // If First is NULL, it means we start a new pair. + // Get to the next slice. + if (!First) + continue; + + EVT LoadedType = First->getLoadedType(); + + // If the types of the slices are different, we cannot pair them. + if (LoadedType != Second->getLoadedType()) + continue; + + // Check if the target supplies paired loads for this type. + unsigned RequiredAlignment = 0; + if (!TLI.hasPairedLoad(LoadedType, RequiredAlignment)) { + // move to the next pair, this type is hopeless. + Second = NULL; + continue; + } + // Check if we meet the alignment requirement. + if (RequiredAlignment > First->getAlignment()) + continue; + + // Check that both loads are next to each other in memory. + if (!areSlicesNextToEachOther(*First, *Second)) + continue; + + assert(GlobalLSCost.Loads > 0 && "We save more loads than we created!"); + --GlobalLSCost.Loads; + // Move to the next pair. + Second = NULL; + } +} + +/// \brief Check the profitability of all involved LoadedSlice. +/// Currently, it is considered profitable if there is exactly two +/// involved slices (1) which are (2) next to each other in memory, and +/// whose cost (\see LoadedSlice::Cost) is smaller than the original load (3). +/// +/// Note: The order of the elements in \p LoadedSlices may be modified, but not +/// the elements themselves. +/// +/// FIXME: When the cost model will be mature enough, we can relax +/// constraints (1) and (2). +static bool isSlicingProfitable(SmallVectorImpl &LoadedSlices, + const APInt &UsedBits, bool ForCodeSize) { + unsigned NumberOfSlices = LoadedSlices.size(); + if (StressLoadSlicing) + return NumberOfSlices > 1; + + // Check (1). + if (NumberOfSlices != 2) + return false; + + // Check (2). + if (!areUsedBitsDense(UsedBits)) + return false; + + // Check (3). + LoadedSlice::Cost OrigCost(ForCodeSize), GlobalSlicingCost(ForCodeSize); + // The original code has one big load. + OrigCost.Loads = 1; + for (unsigned CurrSlice = 0; CurrSlice < NumberOfSlices; ++CurrSlice) { + const LoadedSlice &LS = LoadedSlices[CurrSlice]; + // Accumulate the cost of all the slices. + LoadedSlice::Cost SliceCost(LS, ForCodeSize); + GlobalSlicingCost += SliceCost; + + // Account as cost in the original configuration the gain obtained + // with the current slices. + OrigCost.addSliceGain(LS); + } + + // If the target supports paired load, adjust the cost accordingly. + adjustCostForPairing(LoadedSlices, GlobalSlicingCost); + return OrigCost > GlobalSlicingCost; +} + +/// \brief If the given load, \p LI, is used only by trunc or trunc(lshr) +/// operations, split it in the various pieces being extracted. +/// +/// This sort of thing is introduced by SROA. +/// This slicing takes care not to insert overlapping loads. +/// \pre LI is a simple load (i.e., not an atomic or volatile load). +bool DAGCombiner::SliceUpLoad(SDNode *N) { + if (Level < AfterLegalizeDAG) + return false; + + LoadSDNode *LD = cast(N); + if (LD->isVolatile() || !ISD::isNormalLoad(LD) || + !LD->getValueType(0).isInteger()) + return false; + + // Keep track of already used bits to detect overlapping values. + // In that case, we will just abort the transformation. + APInt UsedBits(LD->getValueSizeInBits(0), 0); + + SmallVector LoadedSlices; + + // Check if this load is used as several smaller chunks of bits. + // Basically, look for uses in trunc or trunc(lshr) and record a new chain + // of computation for each trunc. + for (SDNode::use_iterator UI = LD->use_begin(), UIEnd = LD->use_end(); + UI != UIEnd; ++UI) { + // Skip the uses of the chain. + if (UI.getUse().getResNo() != 0) + continue; + + SDNode *User = *UI; + unsigned Shift = 0; + + // Check if this is a trunc(lshr). + if (User->getOpcode() == ISD::SRL && User->hasOneUse() && + isa(User->getOperand(1))) { + Shift = cast(User->getOperand(1))->getZExtValue(); + User = *User->use_begin(); + } + + // At this point, User is a Truncate, iff we encountered, trunc or + // trunc(lshr). + if (User->getOpcode() != ISD::TRUNCATE) + return false; + + // The width of the type must be a power of 2 and greater than 8-bits. + // Otherwise the load cannot be represented in LLVM IR. + // Moreover, if we shifted with a non 8-bits multiple, the slice + // will be accross several bytes. We do not support that. + unsigned Width = User->getValueSizeInBits(0); + if (Width < 8 || !isPowerOf2_32(Width) || (Shift & 0x7)) + return 0; + + // Build the slice for this chain of computations. + LoadedSlice LS(User, LD, Shift, &DAG); + APInt CurrentUsedBits = LS.getUsedBits(); + + // Check if this slice overlaps with another. + if ((CurrentUsedBits & UsedBits) != 0) + return false; + // Update the bits used globally. + UsedBits |= CurrentUsedBits; + + // Check if the new slice would be legal. + if (!LS.isLegal()) + return false; + + // Record the slice. + LoadedSlices.push_back(LS); + } + + // Abort slicing if it does not seem to be profitable. + if (!isSlicingProfitable(LoadedSlices, UsedBits, ForCodeSize)) + return false; + + ++SlicedLoads; + + // Rewrite each chain to use an independent load. + // By construction, each chain can be represented by a unique load. + + // Prepare the argument for the new token factor for all the slices. + SmallVector ArgChains; + for (SmallVectorImpl::const_iterator + LSIt = LoadedSlices.begin(), + LSItEnd = LoadedSlices.end(); + LSIt != LSItEnd; ++LSIt) { + SDValue SliceInst = LSIt->loadSlice(); + CombineTo(LSIt->Inst, SliceInst, true); + if (SliceInst.getNode()->getOpcode() != ISD::LOAD) + SliceInst = SliceInst.getOperand(0); + assert(SliceInst->getOpcode() == ISD::LOAD && + "It takes more than a zext to get to the loaded slice!!"); + ArgChains.push_back(SliceInst.getValue(1)); + } + + SDValue Chain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, + &ArgChains[0], ArgChains.size()); + DAG.ReplaceAllUsesOfValueWith(SDValue(N, 1), Chain); + return true; +} + /// CheckForMaskedLoad - Check to see if V is (and load (ptr), imm), where the /// load is having specific bytes cleared out. If so, return the byte size /// being masked out and the shift amount. @@ -7500,9 +8172,9 @@ CheckForMaskedLoad(SDValue V, SDValue Ptr, SDValue Chain) { // 0 and the bits being kept are 1. Use getSExtValue so that leading bits // follow the sign bit for uniformity. uint64_t NotMask = ~cast(V->getOperand(1))->getSExtValue(); - unsigned NotMaskLZ = CountLeadingZeros_64(NotMask); + unsigned NotMaskLZ = countLeadingZeros(NotMask); if (NotMaskLZ & 7) return Result; // Must be multiple of a byte. - unsigned NotMaskTZ = CountTrailingZeros_64(NotMask); + unsigned NotMaskTZ = countTrailingZeros(NotMask); if (NotMaskTZ & 7) return Result; // Must be multiple of a byte. if (NotMaskLZ == 64) return Result; // All zero mask. @@ -7559,7 +8231,7 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, // Okay, we can do this! Replace the 'St' store with a store of IVal that is // shifted by ByteShift and truncated down to NumBytes. if (ByteShift) - IVal = DAG.getNode(ISD::SRL, IVal->getDebugLoc(), IVal.getValueType(), IVal, + IVal = DAG.getNode(ISD::SRL, SDLoc(IVal), IVal.getValueType(), IVal, DAG.getConstant(ByteShift*8, DC->getShiftAmountTy(IVal.getValueType()))); @@ -7574,16 +8246,16 @@ ShrinkLoadReplaceStoreWithStore(const std::pair &MaskInfo, SDValue Ptr = St->getBasePtr(); if (StOffset) { - Ptr = DAG.getNode(ISD::ADD, IVal->getDebugLoc(), Ptr.getValueType(), + Ptr = DAG.getNode(ISD::ADD, SDLoc(IVal), Ptr.getValueType(), Ptr, DAG.getConstant(StOffset, Ptr.getValueType())); NewAlign = MinAlign(NewAlign, StOffset); } // Truncate down to the new size. - IVal = DAG.getNode(ISD::TRUNCATE, IVal->getDebugLoc(), VT, IVal); + IVal = DAG.getNode(ISD::TRUNCATE, SDLoc(IVal), VT, IVal); ++OpsNarrowed; - return DAG.getStore(St->getChain(), St->getDebugLoc(), IVal, Ptr, + return DAG.getStore(St->getChain(), SDLoc(St), IVal, Ptr, St->getPointerInfo().getWithOffset(StOffset), false, false, NewAlign).getNode(); } @@ -7684,17 +8356,18 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { if (NewAlign < TLI.getDataLayout()->getABITypeAlignment(NewVTTy)) return SDValue(); - SDValue NewPtr = DAG.getNode(ISD::ADD, LD->getDebugLoc(), + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(LD), Ptr.getValueType(), Ptr, DAG.getConstant(PtrOff, Ptr.getValueType())); - SDValue NewLD = DAG.getLoad(NewVT, N0.getDebugLoc(), + SDValue NewLD = DAG.getLoad(NewVT, SDLoc(N0), LD->getChain(), NewPtr, LD->getPointerInfo().getWithOffset(PtrOff), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), NewAlign); - SDValue NewVal = DAG.getNode(Opc, Value.getDebugLoc(), NewVT, NewLD, + LD->isInvariant(), NewAlign, + LD->getTBAAInfo()); + SDValue NewVal = DAG.getNode(Opc, SDLoc(Value), NewVT, NewLD, DAG.getConstant(NewImm, NewVT)); - SDValue NewST = DAG.getStore(Chain, N->getDebugLoc(), + SDValue NewST = DAG.getStore(Chain, SDLoc(N), NewVal, NewPtr, ST->getPointerInfo().getWithOffset(PtrOff), false, false, NewAlign); @@ -7747,12 +8420,12 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { if (LDAlign < ABIAlign || STAlign < ABIAlign) return SDValue(); - SDValue NewLD = DAG.getLoad(IntVT, Value.getDebugLoc(), + SDValue NewLD = DAG.getLoad(IntVT, SDLoc(Value), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), false, false, false, LDAlign); - SDValue NewST = DAG.getStore(NewLD.getValue(1), N->getDebugLoc(), + SDValue NewST = DAG.getStore(NewLD.getValue(1), SDLoc(N), NewLD, ST->getBasePtr(), ST->getPointerInfo(), false, false, STAlign); @@ -7802,17 +8475,28 @@ struct BaseIndexOffset { static BaseIndexOffset match(SDValue Ptr) { bool IsIndexSignExt = false; - // Just Base or possibly anything else. + // We only can pattern match BASE + INDEX + OFFSET. If Ptr is not an ADD + // instruction, then it could be just the BASE or everything else we don't + // know how to handle. Just use Ptr as BASE and give up. if (Ptr->getOpcode() != ISD::ADD) return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); - // Base + offset. + // We know that we have at least an ADD instruction. Try to pattern match + // the simple case of BASE + OFFSET. if (isa(Ptr->getOperand(1))) { int64_t Offset = cast(Ptr->getOperand(1))->getSExtValue(); return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, IsIndexSignExt); } + // Inside a loop the current BASE pointer is calculated using an ADD and a + // MUL instruction. In this case Ptr is the actual BASE pointer. + // (i64 add (i64 %array_ptr) + // (i64 mul (i64 %induction_var) + // (i64 %element_size))) + if (Ptr->getOperand(1)->getOpcode() == ISD::MUL) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + // Look at Base + Index + Offset cases. SDValue Base = Ptr->getOperand(0); SDValue IndexOffset = Ptr->getOperand(1); @@ -7963,6 +8647,11 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { Index = STn; break; } else if (LoadSDNode *Ldn = dyn_cast(NextInChain)) { + if (Ldn->isVolatile()) { + Index = NULL; + break; + } + // Save the load node for later. Continue the scan. AliasLoadNodes.push_back(Ldn); NextInChain = Ldn->getChain().getNode(); @@ -8080,7 +8769,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // The earliest Node in the DAG. LSBaseSDNode *EarliestOp = StoreNodes[EarliestNodeUsed].MemNode; - DebugLoc DL = StoreNodes[0].MemNode->getDebugLoc(); + SDLoc DL(StoreNodes[0].MemNode); SDValue StoredVal; if (UseVector) { @@ -8276,8 +8965,8 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { JointMemOpVT = EVT::getIntegerVT(*DAG.getContext(), StoreBW); } - DebugLoc LoadDL = LoadNodes[0].MemNode->getDebugLoc(); - DebugLoc StoreDL = StoreNodes[0].MemNode->getDebugLoc(); + SDLoc LoadDL(LoadNodes[0].MemNode); + SDLoc StoreDL(StoreNodes[0].MemNode); LoadSDNode *FirstLoad = cast(LoadNodes[0].MemNode); SDValue NewLoad = DAG.getLoad(JointMemOpVT, LoadDL, @@ -8338,9 +9027,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (Align <= OrigAlign && ((!LegalOperations && !ST->isVolatile()) || TLI.isOperationLegalOrCustom(ISD::STORE, SVT))) - return DAG.getStore(Chain, N->getDebugLoc(), Value.getOperand(0), + return DAG.getStore(Chain, SDLoc(N), Value.getOperand(0), Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), OrigAlign); + ST->isNonTemporal(), OrigAlign, + ST->getTBAAInfo()); } // Turn 'store undef, Ptr' -> nothing. @@ -8355,7 +9045,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // transform should not be done in this case. if (Value.getOpcode() != ISD::TargetConstantFP) { SDValue Tmp; - switch (CFP->getValueType(0).getSimpleVT().SimpleTy) { + switch (CFP->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unknown FP type"); case MVT::f16: // We don't do this for these yet. case MVT::f80: @@ -8367,9 +9057,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i32)) { Tmp = DAG.getConstant((uint32_t)CFP->getValueAPF(). bitcastToAPInt().getZExtValue(), MVT::i32); - return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + return DAG.getStore(Chain, SDLoc(N), Tmp, + Ptr, ST->getMemOperand()); } break; case MVT::f64: @@ -8378,9 +9067,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { TLI.isOperationLegalOrCustom(ISD::STORE, MVT::i64)) { Tmp = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). getZExtValue(), MVT::i64); - return DAG.getStore(Chain, N->getDebugLoc(), Tmp, - Ptr, ST->getPointerInfo(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + return DAG.getStore(Chain, SDLoc(N), Tmp, + Ptr, ST->getMemOperand()); } if (!ST->isVolatile() && @@ -8396,19 +9084,20 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); - SDValue St0 = DAG.getStore(Chain, ST->getDebugLoc(), Lo, + SDValue St0 = DAG.getStore(Chain, SDLoc(ST), Lo, Ptr, ST->getPointerInfo(), isVolatile, isNonTemporal, - ST->getAlignment()); - Ptr = DAG.getNode(ISD::ADD, N->getDebugLoc(), Ptr.getValueType(), Ptr, + ST->getAlignment(), TBAAInfo); + Ptr = DAG.getNode(ISD::ADD, SDLoc(N), Ptr.getValueType(), Ptr, DAG.getConstant(4, Ptr.getValueType())); Alignment = MinAlign(Alignment, 4U); - SDValue St1 = DAG.getStore(Chain, ST->getDebugLoc(), Hi, + SDValue St1 = DAG.getStore(Chain, SDLoc(ST), Hi, Ptr, ST->getPointerInfo().getWithOffset(4), isVolatile, isNonTemporal, - Alignment); - return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + Alignment, TBAAInfo); + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, St0, St1); } @@ -8421,9 +9110,10 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { if (Align > ST->getAlignment()) - return DAG.getTruncStore(Chain, N->getDebugLoc(), Value, + return DAG.getTruncStore(Chain, SDLoc(N), Value, Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), Align); + ST->isVolatile(), ST->isNonTemporal(), Align, + ST->getTBAAInfo()); } } @@ -8433,7 +9123,9 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { if (NewST.getNode()) return NewST; - if (CombinerAA) { + bool UseAA = CombinerAA.getNumOccurrences() > 0 ? CombinerAA : + TLI.getTargetMachine().getSubtarget().useAA(); + if (UseAA) { // Walk up chain skipping non-aliasing memory nodes. SDValue BetterChain = FindBetterChain(N, Chain); @@ -8443,19 +9135,15 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Replace the chain to avoid dependency. if (ST->isTruncatingStore()) { - ReplStore = DAG.getTruncStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getPointerInfo(), - ST->getMemoryVT(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + ReplStore = DAG.getTruncStore(BetterChain, SDLoc(N), Value, Ptr, + ST->getMemoryVT(), ST->getMemOperand()); } else { - ReplStore = DAG.getStore(BetterChain, N->getDebugLoc(), Value, Ptr, - ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + ReplStore = DAG.getStore(BetterChain, SDLoc(N), Value, Ptr, + ST->getMemOperand()); } // Create token to keep both nodes around. - SDValue Token = DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), + SDValue Token = DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, Chain, ReplStore); // Make sure the new and old chains are cleaned up. @@ -8483,10 +9171,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { ST->getMemoryVT().getScalarType().getSizeInBits())); AddToWorkList(Value.getNode()); if (Shorter.getNode()) - return DAG.getTruncStore(Chain, N->getDebugLoc(), Shorter, - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + return DAG.getTruncStore(Chain, SDLoc(N), Shorter, + Ptr, ST->getMemoryVT(), ST->getMemOperand()); // Otherwise, see if we can simplify the operation with // SimplifyDemandedBits, which only works if the value has a single use. @@ -8516,10 +9202,8 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { && Value.getNode()->hasOneUse() && ST->isUnindexed() && TLI.isTruncStoreLegal(Value.getOperand(0).getValueType(), ST->getMemoryVT())) { - return DAG.getTruncStore(Chain, N->getDebugLoc(), Value.getOperand(0), - Ptr, ST->getPointerInfo(), ST->getMemoryVT(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + return DAG.getTruncStore(Chain, SDLoc(N), Value.getOperand(0), + Ptr, ST->getMemoryVT(), ST->getMemOperand()); } // Only perform this optimization before the types are legal, because we @@ -8547,7 +9231,7 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { SDValue InVec = N->getOperand(0); SDValue InVal = N->getOperand(1); SDValue EltNo = N->getOperand(2); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If the inserted element is an UNDEF, just use the input vector. if (InVal.getOpcode() == ISD::UNDEF) @@ -8568,7 +9252,9 @@ SDValue DAGCombiner::visitINSERT_VECTOR_ELT(SDNode *N) { // be converted to a BUILD_VECTOR). Fill in the Ops vector with the // vector elements. SmallVector Ops; - if (InVec.getOpcode() == ISD::BUILD_VECTOR) { + // Do not combine these two vectors if the output vector will not replace + // the input vector. + if (InVec.getOpcode() == ISD::BUILD_VECTOR && InVec.hasOneUse()) { Ops.append(InVec.getNode()->op_begin(), InVec.getNode()->op_end()); } else if (InVec.getOpcode() == ISD::UNDEF) { @@ -8608,7 +9294,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { SDValue InOp = InVec.getOperand(0); if (InOp.getValueType() != NVT) { assert(InOp.getValueType().isInteger() && NVT.isInteger()); - return DAG.getSExtOrTrunc(InOp, InVec.getDebugLoc(), NVT); + return DAG.getSExtOrTrunc(InOp, SDLoc(InVec), NVT); } return InOp; } @@ -8641,8 +9327,8 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { OrigElt -= NumElem; } - EVT IndexTy = N->getOperand(1).getValueType(); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), NVT, + EVT IndexTy = TLI.getVectorIdxTy(); + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NVT, InVec, DAG.getConstant(OrigElt, IndexTy)); } @@ -8756,7 +9442,7 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { EVT PtrType = NewPtr.getValueType(); if (TLI.isBigEndian()) PtrOff = VT.getSizeInBits() / 8 - PtrOff; - NewPtr = DAG.getNode(ISD::ADD, N->getDebugLoc(), PtrType, NewPtr, + NewPtr = DAG.getNode(ISD::ADD, SDLoc(N), PtrType, NewPtr, DAG.getConstant(PtrOff, PtrType)); } @@ -8773,20 +9459,21 @@ SDValue DAGCombiner::visitEXTRACT_VECTOR_ELT(SDNode *N) { // extending load instead. ISD::LoadExtType ExtType = TLI.isLoadExtLegal(ISD::ZEXTLOAD, LVT) ? ISD::ZEXTLOAD : ISD::EXTLOAD; - Load = DAG.getExtLoad(ExtType, N->getDebugLoc(), NVT, LN0->getChain(), + Load = DAG.getExtLoad(ExtType, SDLoc(N), NVT, LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LVT, LN0->isVolatile(), LN0->isNonTemporal(),Align); + LVT, LN0->isVolatile(), LN0->isNonTemporal(), + Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); } else { - Load = DAG.getLoad(LVT, N->getDebugLoc(), LN0->getChain(), NewPtr, + Load = DAG.getLoad(LVT, SDLoc(N), LN0->getChain(), NewPtr, LN0->getPointerInfo().getWithOffset(PtrOff), - LN0->isVolatile(), LN0->isNonTemporal(), - LN0->isInvariant(), Align); + LN0->isVolatile(), LN0->isNonTemporal(), + LN0->isInvariant(), Align, LN0->getTBAAInfo()); Chain = Load.getValue(1); if (NVT.bitsLT(LVT)) - Load = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), NVT, Load); + Load = DAG.getNode(ISD::TRUNCATE, SDLoc(N), NVT, Load); else - Load = DAG.getNode(ISD::BITCAST, N->getDebugLoc(), NVT, Load); + Load = DAG.getNode(ISD::BITCAST, SDLoc(N), NVT, Load); } WorkListRemover DeadNodes(*this); SDValue From[] = { SDValue(N, 0), SDValue(LN0,1) }; @@ -8816,7 +9503,7 @@ SDValue DAGCombiner::reduceBuildVecExtToExtBuildVec(SDNode *N) { return SDValue(); unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); // Check to see if this is a BUILD_VECTOR of a bunch of values @@ -8918,7 +9605,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { EVT VT = N->getValueType(0); unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT SrcVT = MVT::Other; unsigned Opcode = ISD::DELETED_NODE; @@ -8983,7 +9670,7 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { SDValue DAGCombiner::visitBUILD_VECTOR(SDNode *N) { unsigned NumInScalars = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); // A vector built entirely of undefs is undef. @@ -9119,8 +9806,35 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { return N->getOperand(0); // Check if all of the operands are undefs. + EVT VT = N->getValueType(0); if (ISD::allOperandsUndef(N)) - return DAG.getUNDEF(N->getValueType(0)); + return DAG.getUNDEF(VT); + + // Optimize concat_vectors where one of the vectors is undef. + if (N->getNumOperands() == 2 && + N->getOperand(1)->getOpcode() == ISD::UNDEF) { + SDValue In = N->getOperand(0); + assert(In.getValueType().isVector() && "Must concat vectors"); + + // Transform: concat_vectors(scalar, undef) -> scalar_to_vector(sclr). + if (In->getOpcode() == ISD::BITCAST && + !In->getOperand(0)->getValueType(0).isVector()) { + SDValue Scalar = In->getOperand(0); + EVT SclTy = Scalar->getValueType(0); + + if (!SclTy.isFloatingPoint() && !SclTy.isInteger()) + return SDValue(); + + EVT NVT = EVT::getVectorVT(*DAG.getContext(), SclTy, + VT.getSizeInBits() / SclTy.getSizeInBits()); + if (!TLI.isTypeLegal(NVT) || !TLI.isTypeLegal(Scalar.getValueType())) + return SDValue(); + + SDLoc dl = SDLoc(N); + SDValue Res = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, NVT, Scalar); + return DAG.getNode(ISD::BITCAST, dl, VT, Res); + } + } // Type legalization of vectors and DAG canonicalization of SHUFFLE_VECTOR // nodes often generate nop CONCAT_VECTOR nodes. @@ -9158,7 +9872,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { // The extract index must be constant. if (!CS) return SDValue(); - + // Check that we are reading from the identity index. if (CS->getZExtValue() != IdentityIndex) return SDValue(); @@ -9166,7 +9880,7 @@ SDValue DAGCombiner::visitCONCAT_VECTORS(SDNode *N) { if (SingleSource.getNode()) return SingleSource; - + return SDValue(); } @@ -9179,7 +9893,8 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // (extract_subvec (concat V1, V2, ...), i) // Into: // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + // Only operand 0 is checked as 'concat' assumes all inputs of the same + // type. if (V->getOperand(0).getValueType() != NVT) return SDValue(); unsigned Idx = dyn_cast(N->getOperand(1))->getZExtValue(); @@ -9194,7 +9909,7 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { V = V.getOperand(0); if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Handle only simple case where vector being inserted and vector // being extracted are of same type, and are half size of larger vectors. EVT BigVT = V->getOperand(0).getValueType(); @@ -9246,22 +9961,36 @@ static SDValue partitionShuffleOfConcats(SDNode *N, SelectionDAG &DAG) { for (unsigned I = 0; I != NumConcats; ++I) { // Make sure we're dealing with a copy. unsigned Begin = I * NumElemsPerConcat; - if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) - return SDValue(); + bool AllUndef = true, NoUndef = true; + for (unsigned J = Begin; J != Begin + NumElemsPerConcat; ++J) { + if (SVN->getMaskElt(J) >= 0) + AllUndef = false; + else + NoUndef = false; + } - for (unsigned J = 1; J != NumElemsPerConcat; ++J) { - if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + if (NoUndef) { + if (SVN->getMaskElt(Begin) % NumElemsPerConcat != 0) return SDValue(); - } - unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; - if (FirstElt < N0.getNumOperands()) - Ops.push_back(N0.getOperand(FirstElt)); - else - Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + for (unsigned J = 1; J != NumElemsPerConcat; ++J) + if (SVN->getMaskElt(Begin + J - 1) + 1 != SVN->getMaskElt(Begin + J)) + return SDValue(); + + unsigned FirstElt = SVN->getMaskElt(Begin) / NumElemsPerConcat; + if (FirstElt < N0.getNumOperands()) + Ops.push_back(N0.getOperand(FirstElt)); + else + Ops.push_back(N1.getOperand(FirstElt - N0.getNumOperands())); + + } else if (AllUndef) { + Ops.push_back(DAG.getUNDEF(N0.getOperand(0).getValueType())); + } else { // Mixed with general masks and undefs, can't do optimization. + return SDValue(); + } } - return DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, Ops.data(), + return DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Ops.data(), Ops.size()); } @@ -9288,7 +10017,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { if (Idx >= (int)NumElts) Idx -= NumElts; NewMask.push_back(Idx); } - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, DAG.getUNDEF(VT), + return DAG.getVectorShuffle(VT, SDLoc(N), N0, DAG.getUNDEF(VT), &NewMask[0]); } @@ -9298,14 +10027,14 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { for (unsigned i = 0; i != NumElts; ++i) { int Idx = SVN->getMaskElt(i); if (Idx >= 0) { - if (Idx < (int)NumElts) - Idx += NumElts; - else + if (Idx >= (int)NumElts) Idx -= NumElts; + else + Idx = -1; // remove reference to lhs } NewMask.push_back(Idx); } - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N1, DAG.getUNDEF(VT), + return DAG.getVectorShuffle(VT, SDLoc(N), N1, DAG.getUNDEF(VT), &NewMask[0]); } @@ -9322,7 +10051,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { NewMask.push_back(Idx); } if (Changed) - return DAG.getVectorShuffle(VT, N->getDebugLoc(), N0, N1, &NewMask[0]); + return DAG.getVectorShuffle(VT, SDLoc(N), N0, N1, &NewMask[0]); } // If it is a splat, check if the argument vector is another splat or a @@ -9419,7 +10148,7 @@ SDValue DAGCombiner::visitVECTOR_SHUFFLE(SDNode *N) { /// vector_shuffle V, Zero, <0, 4, 2, 4> SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); if (N->getOpcode() == ISD::AND) { @@ -9450,7 +10179,7 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { EVT EltVT = RVT.getVectorElementType(); SmallVector ZeroOps(RVT.getVectorNumElements(), DAG.getConstant(0, EltVT)); - SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + SDValue Zero = DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), RVT, &ZeroOps[0], ZeroOps.size()); LHS = DAG.getNode(ISD::BITCAST, dl, RVT, LHS); SDValue Shuf = DAG.getVectorShuffle(RVT, dl, LHS, Zero, &Indices[0]); @@ -9506,13 +10235,13 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { // legalization, the types may not match between the two BUILD_VECTORS. // Truncate one of the operands to make them match. if (RVT.getSizeInBits() > VT.getSizeInBits()) { - RHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, RHSOp); + RHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, RHSOp); } else { - LHSOp = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), RVT, LHSOp); + LHSOp = DAG.getNode(ISD::TRUNCATE, SDLoc(N), RVT, LHSOp); VT = RVT; } } - SDValue FoldOp = DAG.getNode(N->getOpcode(), LHS.getDebugLoc(), VT, + SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(LHS), VT, LHSOp, RHSOp); if (FoldOp.getOpcode() != ISD::UNDEF && FoldOp.getOpcode() != ISD::Constant && @@ -9523,7 +10252,7 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { } if (Ops.size() == LHS.getNumOperands()) - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), LHS.getValueType(), &Ops[0], Ops.size()); } @@ -9548,7 +10277,7 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { Op.getOpcode() != ISD::ConstantFP) break; EVT EltVT = Op.getValueType(); - SDValue FoldOp = DAG.getNode(N->getOpcode(), N0.getDebugLoc(), EltVT, Op); + SDValue FoldOp = DAG.getNode(N->getOpcode(), SDLoc(N0), EltVT, Op); if (FoldOp.getOpcode() != ISD::UNDEF && FoldOp.getOpcode() != ISD::ConstantFP) break; @@ -9559,11 +10288,11 @@ SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { if (Ops.size() != N0.getNumOperands()) return SDValue(); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N0.getValueType(), &Ops[0], Ops.size()); } -SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, +SDValue DAGCombiner::SimplifySelect(SDLoc DL, SDValue N0, SDValue N1, SDValue N2){ assert(N0.getOpcode() ==ISD::SETCC && "First argument must be a SetCC node!"); @@ -9577,13 +10306,13 @@ SDValue DAGCombiner::SimplifySelect(DebugLoc DL, SDValue N0, // Check to see if we got a select_cc back (to turn into setcc/select). // Otherwise, just return whatever node we got back, like fabs. if (SCC.getOpcode() == ISD::SELECT_CC) { - SDValue SETCC = DAG.getNode(ISD::SETCC, N0.getDebugLoc(), + SDValue SETCC = DAG.getNode(ISD::SETCC, SDLoc(N0), N0.getValueType(), SCC.getOperand(0), SCC.getOperand(1), SCC.getOperand(4)); AddToWorkList(SETCC.getNode()); - return DAG.getNode(ISD::SELECT, SCC.getDebugLoc(), SCC.getValueType(), - SCC.getOperand(2), SCC.getOperand(3), SETCC); + return DAG.getSelect(SDLoc(SCC), SCC.getValueType(), + SCC.getOperand(2), SCC.getOperand(3), SETCC); } return SCC; @@ -9652,10 +10381,10 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, if (LLD->isPredecessorOf(RLD) || RLD->isPredecessorOf(LLD)) return false; - Addr = DAG.getNode(ISD::SELECT, TheSelect->getDebugLoc(), - LLD->getBasePtr().getValueType(), - TheSelect->getOperand(0), LLD->getBasePtr(), - RLD->getBasePtr()); + Addr = DAG.getSelect(SDLoc(TheSelect), + LLD->getBasePtr().getValueType(), + TheSelect->getOperand(0), LLD->getBasePtr(), + RLD->getBasePtr()); } else { // Otherwise SELECT_CC SDNode *CondLHS = TheSelect->getOperand(0).getNode(); SDNode *CondRHS = TheSelect->getOperand(1).getNode(); @@ -9666,7 +10395,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, (RLD->isPredecessorOf(CondLHS) || RLD->isPredecessorOf(CondRHS)))) return false; - Addr = DAG.getNode(ISD::SELECT_CC, TheSelect->getDebugLoc(), + Addr = DAG.getNode(ISD::SELECT_CC, SDLoc(TheSelect), LLD->getBasePtr().getValueType(), TheSelect->getOperand(0), TheSelect->getOperand(1), @@ -9677,17 +10406,17 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, SDValue Load; if (LLD->getExtensionType() == ISD::NON_EXTLOAD) { Load = DAG.getLoad(TheSelect->getValueType(0), - TheSelect->getDebugLoc(), - // FIXME: Discards pointer info. + SDLoc(TheSelect), + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->isInvariant(), LLD->getAlignment()); } else { Load = DAG.getExtLoad(LLD->getExtensionType() == ISD::EXTLOAD ? RLD->getExtensionType() : LLD->getExtensionType(), - TheSelect->getDebugLoc(), + SDLoc(TheSelect), TheSelect->getValueType(0), - // FIXME: Discards pointer info. + // FIXME: Discards pointer and TBAA info. LLD->getChain(), Addr, MachinePointerInfo(), LLD->getMemoryVT(), LLD->isVolatile(), LLD->isNonTemporal(), LLD->getAlignment()); @@ -9708,7 +10437,7 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, /// SimplifySelectCC - Simplify an expression of the form (N0 cond N1) ? N2 : N3 /// where 'cond' is the comparison specified by CC. -SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, +SDValue DAGCombiner::SimplifySelectCC(SDLoc DL, SDValue N0, SDValue N1, SDValue N2, SDValue N3, ISD::CondCode CC, bool NotExtCompare) { // (x ? y : y) -> y. @@ -9720,7 +10449,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, ConstantSDNode *N3C = dyn_cast(N3.getNode()); // Determine if the condition we're dealing with is constant - SDValue SCC = SimplifySetCC(TLI.getSetCCResultType(N0.getValueType()), + SDValue SCC = SimplifySetCC(getSetCCResultType(N0.getValueType()), N0, N1, CC, DL, false); if (SCC.getNode()) AddToWorkList(SCC.getNode()); ConstantSDNode *SCCC = dyn_cast_or_null(SCC.getNode()); @@ -9786,13 +10515,13 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue One = DAG.getIntPtrConstant(EltSize); SDValue Cond = DAG.getSetCC(DL, - TLI.getSetCCResultType(N0.getValueType()), + getSetCCResultType(N0.getValueType()), N0, N1, CC); AddToWorkList(Cond.getNode()); - SDValue CstOffset = DAG.getNode(ISD::SELECT, DL, Zero.getValueType(), - Cond, One, Zero); + SDValue CstOffset = DAG.getSelect(DL, Zero.getValueType(), + Cond, One, Zero); AddToWorkList(CstOffset.getNode()); - CPIdx = DAG.getNode(ISD::ADD, DL, TLI.getPointerTy(), CPIdx, + CPIdx = DAG.getNode(ISD::ADD, DL, CPIdx.getValueType(), CPIdx, CstOffset); AddToWorkList(CPIdx.getNode()); return DAG.getLoad(TV->getValueType(0), DL, DAG.getEntryNode(), CPIdx, @@ -9817,7 +10546,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, ShCtV = XType.getSizeInBits()-ShCtV-1; SDValue ShCt = DAG.getConstant(ShCtV, getShiftAmountTy(N0.getValueType())); - SDValue Shift = DAG.getNode(ISD::SRL, N0.getDebugLoc(), + SDValue Shift = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, ShCt); AddToWorkList(Shift.getNode()); @@ -9829,7 +10558,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return DAG.getNode(ISD::AND, DL, AType, Shift, N2); } - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), + SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); @@ -9862,14 +10591,14 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, SDValue ShlAmt = DAG.getConstant(AndMask.countLeadingZeros(), getShiftAmountTy(AndLHS.getValueType())); - SDValue Shl = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, AndLHS, ShlAmt); + SDValue Shl = DAG.getNode(ISD::SHL, SDLoc(N0), VT, AndLHS, ShlAmt); // Now arithmetic right shift it all the way over, so the result is either // all-ones, or zero. SDValue ShrAmt = DAG.getConstant(AndMask.getBitWidth()-1, getShiftAmountTy(Shl.getValueType())); - SDValue Shr = DAG.getNode(ISD::SRA, N0.getDebugLoc(), VT, Shl, ShrAmt); + SDValue Shr = DAG.getNode(ISD::SRA, SDLoc(N0), VT, Shl, ShrAmt); return DAG.getNode(ISD::AND, DL, VT, Shr, N3); } @@ -9889,21 +10618,21 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, // NOTE: Don't create a SETCC if it's not legal on this target. if (!LegalOperations || TLI.isOperationLegal(ISD::SETCC, - LegalTypes ? TLI.getSetCCResultType(N0.getValueType()) : MVT::i1)) { + LegalTypes ? getSetCCResultType(N0.getValueType()) : MVT::i1)) { SDValue Temp, SCC; // cast from setcc result type to select result type if (LegalTypes) { - SCC = DAG.getSetCC(DL, TLI.getSetCCResultType(N0.getValueType()), + SCC = DAG.getSetCC(DL, getSetCCResultType(N0.getValueType()), N0, N1, CC); if (N2.getValueType().bitsLT(SCC.getValueType())) - Temp = DAG.getZeroExtendInReg(SCC, N2.getDebugLoc(), + Temp = DAG.getZeroExtendInReg(SCC, SDLoc(N2), N2.getValueType()); else - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC); } else { - SCC = DAG.getSetCC(N0.getDebugLoc(), MVT::i1, N0, N1, CC); - Temp = DAG.getNode(ISD::ZERO_EXTEND, N2.getDebugLoc(), + SCC = DAG.getSetCC(SDLoc(N0), MVT::i1, N0, N1, CC); + Temp = DAG.getNode(ISD::ZERO_EXTEND, SDLoc(N2), N2.getValueType(), SCC); } @@ -9914,9 +10643,10 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, return Temp; // shl setcc result by log2 n2c - return DAG.getNode(ISD::SHL, DL, N2.getValueType(), Temp, - DAG.getConstant(N2C->getAPIntValue().logBase2(), - getShiftAmountTy(Temp.getValueType()))); + return DAG.getNode( + ISD::SHL, DL, N2.getValueType(), Temp, + DAG.getConstant(N2C->getAPIntValue().logBase2(), + getShiftAmountTy(Temp.getValueType()))); } } @@ -9926,8 +10656,8 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (0 && N3C && N3C->isNullValue() && N2C && (N2C->getAPIntValue() == 1ULL)) { EVT XType = N0.getValueType(); if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(XType))) { - SDValue Res = DAG.getSetCC(DL, TLI.getSetCCResultType(XType), N0, N1, CC); + TLI.isOperationLegal(ISD::SETCC, getSetCCResultType(XType))) { + SDValue Res = DAG.getSetCC(DL, getSetCCResultType(XType), N0, N1, CC); if (Res.getValueType() != VT) Res = DAG.getNode(ISD::ZERO_EXTEND, DL, VT, Res); return Res; @@ -9937,16 +10667,16 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, if (N1C && N1C->isNullValue() && CC == ISD::SETEQ && (!LegalOperations || TLI.isOperationLegal(ISD::CTLZ, XType))) { - SDValue Ctlz = DAG.getNode(ISD::CTLZ, N0.getDebugLoc(), XType, N0); + SDValue Ctlz = DAG.getNode(ISD::CTLZ, SDLoc(N0), XType, N0); return DAG.getNode(ISD::SRL, DL, XType, Ctlz, DAG.getConstant(Log2_32(XType.getSizeInBits()), getShiftAmountTy(Ctlz.getValueType()))); } // fold (setgt X, 0) -> (srl (and (-X, ~X), size(X)-1)) if (N1C && N1C->isNullValue() && CC == ISD::SETGT) { - SDValue NegN0 = DAG.getNode(ISD::SUB, N0.getDebugLoc(), + SDValue NegN0 = DAG.getNode(ISD::SUB, SDLoc(N0), XType, DAG.getConstant(0, XType), N0); - SDValue NotN0 = DAG.getNOT(N0.getDebugLoc(), N0, XType); + SDValue NotN0 = DAG.getNOT(SDLoc(N0), N0, XType); return DAG.getNode(ISD::SRL, DL, XType, DAG.getNode(ISD::AND, DL, XType, NegN0, NotN0), DAG.getConstant(XType.getSizeInBits()-1, @@ -9954,7 +10684,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, } // fold (setgt X, -1) -> (xor (srl (X, size(X)-1), 1)) if (N1C && N1C->isAllOnesValue() && CC == ISD::SETGT) { - SDValue Sign = DAG.getNode(ISD::SRL, N0.getDebugLoc(), XType, N0, + SDValue Sign = DAG.getNode(ISD::SRL, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); return DAG.getNode(ISD::XOR, DL, XType, Sign, DAG.getConstant(1, XType)); @@ -9980,11 +10710,11 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, EVT XType = N0.getValueType(); if (SubC && SubC->isNullValue() && XType.isInteger()) { - SDValue Shift = DAG.getNode(ISD::SRA, N0.getDebugLoc(), XType, + SDValue Shift = DAG.getNode(ISD::SRA, SDLoc(N0), XType, N0, DAG.getConstant(XType.getSizeInBits()-1, getShiftAmountTy(N0.getValueType()))); - SDValue Add = DAG.getNode(ISD::ADD, N0.getDebugLoc(), + SDValue Add = DAG.getNode(ISD::ADD, SDLoc(N0), XType, N0, Shift); AddToWorkList(Shift.getNode()); AddToWorkList(Add.getNode()); @@ -9998,7 +10728,7 @@ SDValue DAGCombiner::SimplifySelectCC(DebugLoc DL, SDValue N0, SDValue N1, /// SimplifySetCC - This is a stub for TargetLowering::SimplifySetCC. SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, - DebugLoc DL, bool foldBooleans) { + SDLoc DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, Level, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); @@ -10072,17 +10802,20 @@ static bool FindBaseOffset(SDValue Ptr, SDValue &Base, int64_t &Offset, /// isAlias - Return true if there is any possibility that the two addresses /// overlap. -bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, +bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool IsVolatile1, const Value *SrcValue1, int SrcValueOffset1, unsigned SrcValueAlign1, const MDNode *TBAAInfo1, - SDValue Ptr2, int64_t Size2, + SDValue Ptr2, int64_t Size2, bool IsVolatile2, const Value *SrcValue2, int SrcValueOffset2, unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const { // If they are the same then they must be aliases. if (Ptr1 == Ptr2) return true; + // If they are both volatile then they cannot be reordered. + if (IsVolatile1 && IsVolatile2) return true; + // Gather base node and offset information. SDValue Base1, Base2; int64_t Offset1, Offset2; @@ -10127,7 +10860,9 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, return false; } - if (CombinerGlobalAA) { + bool UseAA = CombinerGlobalAA.getNumOccurrences() > 0 ? CombinerGlobalAA : + TLI.getTargetMachine().getSubtarget().useAA(); + if (UseAA && SrcValue1 && SrcValue2) { // Use alias analysis information. int64_t MinOffset = std::min(SrcValueOffset1, SrcValueOffset2); int64_t Overlap1 = Size1 + SrcValueOffset1 - MinOffset; @@ -10146,24 +10881,25 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { SDValue Ptr0, Ptr1; int64_t Size0, Size1; + bool IsVolatile0, IsVolatile1; const Value *SrcValue0, *SrcValue1; int SrcValueOffset0, SrcValueOffset1; unsigned SrcValueAlign0, SrcValueAlign1; const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; - FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0, + FindAliasInfo(Op0, Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0); - FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1, + FindAliasInfo(Op1, Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); - return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0, + return isAlias(Ptr0, Size0, IsVolatile0, SrcValue0, SrcValueOffset0, SrcValueAlign0, SrcTBAAInfo0, - Ptr1, Size1, SrcValue1, SrcValueOffset1, + Ptr1, Size1, IsVolatile1, SrcValue1, SrcValueOffset1, SrcValueAlign1, SrcTBAAInfo1); } /// FindAliasInfo - Extracts the relevant alias information from the memory -/// node. Returns true if the operand was a load. +/// node. Returns true if the operand was a nonvolatile load. bool DAGCombiner::FindAliasInfo(SDNode *N, - SDValue &Ptr, int64_t &Size, + SDValue &Ptr, int64_t &Size, bool &IsVolatile, const Value *&SrcValue, int &SrcValueOffset, unsigned &SrcValueAlign, @@ -10172,29 +10908,31 @@ bool DAGCombiner::FindAliasInfo(SDNode *N, Ptr = LS->getBasePtr(); Size = LS->getMemoryVT().getSizeInBits() >> 3; + IsVolatile = LS->isVolatile(); SrcValue = LS->getSrcValue(); SrcValueOffset = LS->getSrcValueOffset(); SrcValueAlign = LS->getOriginalAlignment(); TBAAInfo = LS->getTBAAInfo(); - return isa(LS); + return isa(LS) && !IsVolatile; } /// GatherAllAliases - Walk up chain skipping non-aliasing memory nodes, /// looking for aliasing nodes and adding them to the Aliases vector. void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, - SmallVector &Aliases) { + SmallVectorImpl &Aliases) { SmallVector Chains; // List of chains to visit. SmallPtrSet Visited; // Visited node set. // Get alias information for node. SDValue Ptr; int64_t Size; + bool IsVolatile; const Value *SrcValue; int SrcValueOffset; unsigned SrcValueAlign; const MDNode *SrcTBAAInfo; - bool IsLoad = FindAliasInfo(N, Ptr, Size, SrcValue, SrcValueOffset, - SrcValueAlign, SrcTBAAInfo); + bool IsLoad = FindAliasInfo(N, Ptr, Size, IsVolatile, SrcValue, + SrcValueOffset, SrcValueAlign, SrcTBAAInfo); // Starting off. Chains.push_back(OriginalChain); @@ -10235,20 +10973,21 @@ void DAGCombiner::GatherAllAliases(SDNode *N, SDValue OriginalChain, // Get alias information for Chain. SDValue OpPtr; int64_t OpSize; + bool OpIsVolatile; const Value *OpSrcValue; int OpSrcValueOffset; unsigned OpSrcValueAlign; const MDNode *OpSrcTBAAInfo; bool IsOpLoad = FindAliasInfo(Chain.getNode(), OpPtr, OpSize, - OpSrcValue, OpSrcValueOffset, + OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo); // If chain is alias then stop here. if (!(IsLoad && IsOpLoad) && - isAlias(Ptr, Size, SrcValue, SrcValueOffset, SrcValueAlign, - SrcTBAAInfo, - OpPtr, OpSize, OpSrcValue, OpSrcValueOffset, + isAlias(Ptr, Size, IsVolatile, SrcValue, SrcValueOffset, + SrcValueAlign, SrcTBAAInfo, + OpPtr, OpSize, OpIsVolatile, OpSrcValue, OpSrcValueOffset, OpSrcValueAlign, OpSrcTBAAInfo)) { Aliases.push_back(Chain); } else { @@ -10298,7 +11037,7 @@ SDValue DAGCombiner::FindBetterChain(SDNode *N, SDValue OldChain) { return Aliases[0]; // Construct a custom tailored token factor. - return DAG.getNode(ISD::TokenFactor, N->getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::TokenFactor, SDLoc(N), MVT::Other, &Aliases[0], Aliases.size()); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp index e096a23..a6f7461 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -41,6 +41,7 @@ #define DEBUG_TYPE "isel" #include "llvm/CodeGen/FastISel.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Loads.h" #include "llvm/CodeGen/Analysis.h" @@ -89,18 +90,16 @@ bool FastISel::LowerArguments() { // Fallback to SDISel argument lowering code to deal with sret pointer // parameter. return false; - + if (!FastLowerArguments()) return false; - // Enter non-dead arguments into ValueMap for uses in non-entry BBs. + // Enter arguments into ValueMap for uses in non-entry BBs. for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), E = FuncInfo.Fn->arg_end(); I != E; ++I) { - if (!I->use_empty()) { - DenseMap::iterator VI = LocalValueMap.find(I); - assert(VI != LocalValueMap.end() && "Missed an argument?"); - FuncInfo.ValueMap[I] = VI->second; - } + DenseMap::iterator VI = LocalValueMap.find(I); + assert(VI != LocalValueMap.end() && "Missed an argument?"); + FuncInfo.ValueMap[I] = VI->second; } return true; } @@ -598,7 +597,10 @@ bool FastISel::SelectCall(const User *I) { case Intrinsic::dbg_declare: { const DbgDeclareInst *DI = cast(Call); - if (!DIVariable(DI->getVariable()).Verify() || + DIVariable DIVar(DI->getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!DIVar || !FuncInfo.MF->getMMI().hasDebugInfo()) { DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); return true; @@ -610,16 +612,16 @@ bool FastISel::SelectCall(const User *I) { return true; } - unsigned Reg = 0; unsigned Offset = 0; - if (const Argument *Arg = dyn_cast(Address)) { + Optional Op; + if (const Argument *Arg = dyn_cast(Address)) // Some arguments' frame index is recorded during argument lowering. Offset = FuncInfo.getArgumentFrameIndex(Arg); - if (Offset) - Reg = TRI.getFrameRegister(*FuncInfo.MF); - } - if (!Reg) - Reg = lookUpRegForValue(Address); + if (Offset) + Op = MachineOperand::CreateFI(Offset); + if (!Op) + if (unsigned Reg = lookUpRegForValue(Address)) + Op = MachineOperand::CreateReg(Reg, false); // If we have a VLA that has a "use" in a metadata node that's then used // here but it has no other uses, then we have a problem. E.g., @@ -632,20 +634,29 @@ bool FastISel::SelectCall(const User *I) { // If we assign 'a' a vreg and fast isel later on has to use the selection // DAG isel, it will want to copy the value to the vreg. However, there are // no uses, which goes counter to what selection DAG isel expects. - if (!Reg && !Address->use_empty() && isa(Address) && + if (!Op && !Address->use_empty() && isa(Address) && (!isa(Address) || !FuncInfo.StaticAllocaMap.count(cast(Address)))) - Reg = FuncInfo.InitializeRegForValue(Address); - - if (Reg) - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(Reg, RegState::Debug).addImm(Offset) - .addMetadata(DI->getVariable()); - else + Op = MachineOperand::CreateReg(FuncInfo.InitializeRegForValue(Address), + false); + + if (Op) { + if (Op->isReg()) { + Op->setIsDebug(true); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE), false, Op->getReg(), 0, + DI->getVariable()); + } else + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op) + .addImm(0) + .addMetadata(DI->getVariable()); + } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); + } return true; } case Intrinsic::dbg_value: { @@ -673,13 +684,14 @@ bool FastISel::SelectCall(const User *I) { .addFPImm(CF).addImm(DI->getOffset()) .addMetadata(DI->getVariable()); } else if (unsigned Reg = lookUpRegForValue(V)) { - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) - .addReg(Reg, RegState::Debug).addImm(DI->getOffset()) - .addMetadata(DI->getVariable()); + // FIXME: This does not handle register-indirect values at offset 0. + bool IsIndirect = DI->getOffset() != 0; + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, IsIndirect, + Reg, DI->getOffset(), DI->getVariable()); } else { // We can't yet handle anything else here because it would require // generating code, thus altering codegen because of debug info. - DEBUG(dbgs() << "Dropping debug info for " << DI); + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); } return true; } @@ -1559,4 +1571,19 @@ bool FastISel::tryToFoldLoad(const LoadInst *LI, const Instruction *FoldInst) { return tryToFoldLoadIntoMI(User, RI.getOperandNo(), LI); } +bool FastISel::canFoldAddIntoGEP(const User *GEP, const Value *Add) { + // Must be an add. + if (!isa(Add)) + return false; + // Type size needs to match. + if (TD.getTypeSizeInBits(GEP->getType()) != + TD.getTypeSizeInBits(Add->getType())) + return false; + // Must be in the same basic block. + if (isa(Add) && + FuncInfo.MBBMap[cast(Add)->getParent()] != FuncInfo.MBB) + return false; + // Must have a constant operand. + return isa(cast(Add)->getOperand(1)); +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index b46edad..4309dc1 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -55,21 +55,19 @@ static bool isUsedOutsideOfDefiningBlock(const Instruction *I) { return false; } -FunctionLoweringInfo::FunctionLoweringInfo(const TargetLowering &tli) - : TLI(tli) { -} - void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { + const TargetLowering *TLI = TM.getTargetLowering(); + Fn = &fn; MF = &mf; RegInfo = &MF->getRegInfo(); // Check whether the function can return without sret-demotion. SmallVector Outs; - GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, TLI); - CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, - Fn->isVarArg(), - Outs, Fn->getContext()); + GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, *TLI); + CanLowerReturn = TLI->CanLowerReturn(Fn->getCallingConv(), *MF, + Fn->isVarArg(), + Outs, Fn->getContext()); // Initialize the mapping of values to registers. This is only set up for // instruction values that are used outside of the block that defines @@ -79,9 +77,9 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { if (const AllocaInst *AI = dyn_cast(I)) if (const ConstantInt *CUI = dyn_cast(AI->getArraySize())) { Type *Ty = AI->getAllocatedType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), AI->getAlignment()); TySize *= CUI->getZExtValue(); // Get total allocated size. @@ -114,8 +112,11 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // in a predictable order. if (const DbgDeclareInst *DI = dyn_cast(I)) { MachineModuleInfo &MMI = MF->getMMI(); + DIVariable DIVar(DI->getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); if (MMI.hasDebugInfo() && - DIVariable(DI->getVariable()).Verify() && + DIVar && !DI->getDebugLoc().isUnknown()) { // Don't handle byval struct arguments or VLAs, for example. // Non-byval arguments are handled here (they refer to the stack @@ -167,10 +168,10 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { assert(PHIReg && "PHI node does not have an assigned virtual register!"); SmallVector ValueVTs; - ComputeValueVTs(TLI, PN->getType(), ValueVTs); + ComputeValueVTs(*TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(Fn->getContext(), VT); + unsigned NumRegisters = TLI->getNumRegisters(Fn->getContext(), VT); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); for (unsigned i = 0; i != NumRegisters; ++i) BuildMI(MBB, DL, TII->get(TargetOpcode::PHI), PHIReg + i); @@ -208,7 +209,8 @@ void FunctionLoweringInfo::clear() { /// CreateReg - Allocate a single virtual register for the given type. unsigned FunctionLoweringInfo::CreateReg(MVT VT) { - return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); + return RegInfo-> + createVirtualRegister(TM.getTargetLowering()->getRegClassFor(VT)); } /// CreateRegs - Allocate the appropriate number of virtual registers of @@ -219,15 +221,17 @@ unsigned FunctionLoweringInfo::CreateReg(MVT VT) { /// will assign registers for each member or element. /// unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { + const TargetLowering *TLI = TM.getTargetLowering(); + SmallVector ValueVTs; - ComputeValueVTs(TLI, Ty, ValueVTs); + ComputeValueVTs(*TLI, Ty, ValueVTs); unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; - MVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); + MVT RegisterVT = TLI->getRegisterType(Ty->getContext(), ValueVT); - unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT); + unsigned NumRegs = TLI->getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { unsigned R = CreateReg(RegisterVT); if (!FirstReg) FirstReg = R; @@ -266,15 +270,17 @@ void FunctionLoweringInfo::ComputePHILiveOutRegInfo(const PHINode *PN) { if (!Ty->isIntegerTy() || Ty->isVectorTy()) return; + const TargetLowering *TLI = TM.getTargetLowering(); + SmallVector ValueVTs; - ComputeValueVTs(TLI, Ty, ValueVTs); + ComputeValueVTs(*TLI, Ty, ValueVTs); assert(ValueVTs.size() == 1 && "PHIs with non-vector integer types should have a single VT."); EVT IntVT = ValueVTs[0]; - if (TLI.getNumRegisters(PN->getContext(), IntVT) != 1) + if (TLI->getNumRegisters(PN->getContext(), IntVT) != 1) return; - IntVT = TLI.getTypeToTransformTo(PN->getContext(), IntVT); + IntVT = TLI->getTypeToTransformTo(PN->getContext(), IntVT); unsigned BitWidth = IntVT.getSizeInBits(); unsigned DestReg = ValueMap[PN]; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 3b1abd7..3a8fb85 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -21,6 +21,7 @@ #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -211,6 +212,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, assert(Node->getMachineOpcode() != TargetOpcode::IMPLICIT_DEF && "IMPLICIT_DEF should have been handled as a special case elsewhere!"); + unsigned NumResults = CountResults(Node); for (unsigned i = 0; i < II.getNumDefs(); ++i) { // If the specific node value is only used by a CopyToReg and the dest reg // is a vreg in the same register class, use the CopyToReg'd destination @@ -218,6 +220,10 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, unsigned VRBase = 0; const TargetRegisterClass *RC = TRI->getAllocatableClass(TII->getRegClass(II, i, TRI, *MF)); + // If the register class is unknown for the given definition, then try to + // infer one from the value type. + if (!RC && i < NumResults) + RC = TLI->getRegClassFor(Node->getSimpleValueType(i)); if (II.OpInfo[i].isOptionalDef()) { // Optional def must be a physical register. unsigned NumResults = CountResults(Node); @@ -639,8 +645,8 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, if (SD->getKind() == SDDbgValue::FRAMEIX) { // Stack address; this needs to be lowered in target-dependent fashion. // EmitTargetCodeForFrameDebugValue is responsible for allocation. - unsigned FrameIx = SD->getFrameIx(); - return TII->emitFrameIndexDebugValue(*MF, FrameIx, Offset, MDPtr, DL); + return BuildMI(*MF, DL, TII->get(TargetOpcode::DBG_VALUE)) + .addFrameIndex(SD->getFrameIx()).addImm(Offset).addMetadata(MDPtr); } // Otherwise, we're going to create an instruction here. const MCInstrDesc &II = TII->get(TargetOpcode::DBG_VALUE); @@ -678,7 +684,13 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, MIB.addReg(0U); } - MIB.addImm(Offset).addMetadata(MDPtr); + if (Offset != 0) // Indirect addressing. + MIB.addImm(Offset); + else + MIB.addReg(0U, RegState::Debug); + + MIB.addMetadata(MDPtr); + return &*MIB; } @@ -716,10 +728,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, const MCInstrDesc &II = TII->get(Opc); unsigned NumResults = CountResults(Node); + unsigned NumDefs = II.getNumDefs(); + const uint16_t *ScratchRegs = NULL; + + // Handle PATCHPOINT specially and then use the generic code. + if (Opc == TargetOpcode::PATCHPOINT) { + unsigned CC = Node->getConstantOperandVal(PatchPointOpers::CCPos); + NumDefs = NumResults; + ScratchRegs = TLI->getScratchRegisters((CallingConv::ID) CC); + } + unsigned NumImpUses = 0; unsigned NodeOperands = - countOperands(Node, II.getNumOperands() - II.getNumDefs(), NumImpUses); - bool HasPhysRegOuts = NumResults > II.getNumDefs() && II.getImplicitDefs()!=0; + countOperands(Node, II.getNumOperands() - NumDefs, NumImpUses); + bool HasPhysRegOuts = NumResults > NumDefs && II.getImplicitDefs()!=0; #ifndef NDEBUG unsigned NumMIOperands = NodeOperands + NumResults; if (II.isVariadic()) @@ -742,14 +764,20 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. - bool HasOptPRefs = II.getNumDefs() > NumResults; + bool HasOptPRefs = NumDefs > NumResults; assert((!HasOptPRefs || !HasPhysRegOuts) && "Unable to cope with optional defs and phys regs defs!"); - unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; + unsigned NumSkip = HasOptPRefs ? NumDefs - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) - AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + AddOperand(MIB, Node->getOperand(i), i-NumSkip+NumDefs, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); + // Add scratch registers as implicit def and early clobber + if (ScratchRegs) + for (unsigned i = 0; ScratchRegs[i]; ++i) + MIB.addReg(ScratchRegs[i], RegState::ImplicitDefine | + RegState::EarlyClobber); + // Transfer all of the memory reference descriptions of this instruction. MIB.setMemRefs(cast(Node)->memoperands_begin(), cast(Node)->memoperands_end()); @@ -778,8 +806,8 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Additional results must be physical register defs. if (HasPhysRegOuts) { - for (unsigned i = II.getNumDefs(); i < NumResults; ++i) { - unsigned Reg = II.getImplicitDefs()[i - II.getNumDefs()]; + for (unsigned i = NumDefs; i < NumResults; ++i) { + unsigned Reg = II.getImplicitDefs()[i - NumDefs]; if (!Node->hasAnyUseOfValue(i)) continue; // This implicitly defined physreg has a use. diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h index a9c2203..920dda8 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -1,4 +1,4 @@ -//===---- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG class ---==// +//===- InstrEmitter.h - Emit MachineInstrs for the SelectionDAG -*- C++ -*--==// // // The LLVM Compiler Infrastructure // diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 2a1d8c2..9061ae9 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -58,6 +58,10 @@ class SelectionDAGLegalize : public SelectionDAG::DAGUpdateListener { /// LegalizedNodes - The set of nodes which have already been legalized. SmallPtrSet LegalizedNodes; + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(*DAG.getContext(), VT); + } + // Libcall insertion helpers. public: @@ -79,24 +83,24 @@ private: /// is necessary to spill the vector being inserted into to memory, perform /// the insert there, and then read the result back. SDValue PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, - SDValue Idx, DebugLoc dl); + SDValue Idx, SDLoc dl); SDValue ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, - SDValue Idx, DebugLoc dl); + SDValue Idx, SDLoc dl); /// ShuffleWithNarrowerEltType - Return a vector shuffle operation which /// performs the same shuffe in terms of order or result bytes, but on a type /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> - SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, + SDValue ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, SDValue N1, SDValue N2, ArrayRef Mask) const; - void LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - DebugLoc dl); + bool LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, + bool &NeedInvert, SDLoc dl); SDValue ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); SDValue ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, - unsigned NumOps, bool isSigned, DebugLoc dl); + unsigned NumOps, bool isSigned, SDLoc dl); std::pair ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -113,21 +117,21 @@ private: void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl &Results); void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl &Results); - SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); + SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, SDLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); SDValue ExpandSCALAR_TO_VECTOR(SDNode *Node); void ExpandDYNAMIC_STACKALLOC(SDNode *Node, SmallVectorImpl &Results); SDValue ExpandFCOPYSIGN(SDNode *Node); SDValue ExpandLegalINT_TO_FP(bool isSigned, SDValue LegalOp, EVT DestVT, - DebugLoc dl); + SDLoc dl); SDValue PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl); + SDLoc dl); SDValue PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl); + SDLoc dl); - SDValue ExpandBSWAP(SDValue Op, DebugLoc dl); - SDValue ExpandBitCount(unsigned Opc, SDValue Op, DebugLoc dl); + SDValue ExpandBSWAP(SDValue Op, SDLoc dl); + SDValue ExpandBitCount(unsigned Opc, SDValue Op, SDLoc dl); SDValue ExpandExtractFromVectorThroughStack(SDValue Op); SDValue ExpandInsertToVectorThroughStack(SDValue Op); @@ -181,7 +185,7 @@ public: /// whose vector element type is narrower than the original shuffle type. /// e.g. <0, 1, 0, 1> -> v8i16 <0, 1, 2, 3, 0, 1, 2, 3> SDValue -SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, DebugLoc dl, +SelectionDAGLegalize::ShuffleWithNarrowerEltType(EVT NVT, EVT VT, SDLoc dl, SDValue N1, SDValue N2, ArrayRef Mask) const { unsigned NumMaskElts = VT.getVectorNumElements(); @@ -247,7 +251,7 @@ void SelectionDAGLegalize::LegalizeDAG() { SDValue SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { bool Extend = false; - DebugLoc dl = CFP->getDebugLoc(); + SDLoc dl(CFP); // If a FP immediate is precise when represented as a float and if the // target can do an extending load from float to double, we put it into @@ -307,7 +311,9 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Val = ST->getValue(); EVT VT = Val.getValueType(); int Alignment = ST->getAlignment(); - DebugLoc dl = ST->getDebugLoc(); + unsigned AS = ST->getAddressSpace(); + + SDLoc dl(ST); if (ST->getMemoryVT().isFloatingPoint() || ST->getMemoryVT().isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), VT.getSizeInBits()); @@ -339,7 +345,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, SDValue Store = DAG.getTruncStore(Chain, dl, Val, StackPtr, MachinePointerInfo(), StoredVT, false, false, 0); - SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy()); + SDValue Increment = DAG.getConstant(RegBytes, TLI.getPointerTy(AS)); SmallVector Stores; unsigned Offset = 0; @@ -377,7 +383,8 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, .getWithOffset(Offset), MemVT, ST->isVolatile(), ST->isNonTemporal(), - MinAlign(ST->getAlignment(), Offset))); + MinAlign(ST->getAlignment(), Offset), + ST->getTBAAInfo())); // The order of the stores doesn't matter - say it with a TokenFactor. SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &Stores[0], @@ -404,13 +411,14 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, Store1 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Lo:Hi, Ptr, ST->getPointerInfo(), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), Alignment); + Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, TLI.getPointerTy(AS))); Alignment = MinAlign(Alignment, IncrementSize); Store2 = DAG.getTruncStore(Chain, dl, TLI.isLittleEndian()?Hi:Lo, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), NewStoredVT, ST->isVolatile(), ST->isNonTemporal(), - Alignment); + Alignment, ST->getTBAAInfo()); SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Store1, Store2); @@ -428,16 +436,14 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, SDValue Ptr = LD->getBasePtr(); EVT VT = LD->getValueType(0); EVT LoadedVT = LD->getMemoryVT(); - DebugLoc dl = LD->getDebugLoc(); + SDLoc dl(LD); if (VT.isFloatingPoint() || VT.isVector()) { EVT intVT = EVT::getIntegerVT(*DAG.getContext(), LoadedVT.getSizeInBits()); if (TLI.isTypeLegal(intVT) && TLI.isTypeLegal(LoadedVT)) { // Expand to a (misaligned) integer load of the same size, // then bitconvert to floating point or vector. - SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), - LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + SDValue newLoad = DAG.getLoad(intVT, dl, Chain, Ptr, + LD->getMemOperand()); SDValue Result = DAG.getNode(ISD::BITCAST, dl, LoadedVT, newLoad); if (LoadedVT != VT) Result = DAG.getNode(VT.isFloatingPoint() ? ISD::FP_EXTEND : @@ -470,7 +476,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), - MinAlign(LD->getAlignment(), Offset)); + MinAlign(LD->getAlignment(), Offset), + LD->getTBAAInfo()); // Follow the load with a store to the stack slot. Remember the store. Stores.push_back(DAG.getStore(Load.getValue(1), dl, Load, StackPtr, MachinePointerInfo(), false, false, 0)); @@ -488,7 +495,8 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, LD->getPointerInfo().getWithOffset(Offset), MemVT, LD->isVolatile(), LD->isNonTemporal(), - MinAlign(LD->getAlignment(), Offset)); + MinAlign(LD->getAlignment(), Offset), + LD->getTBAAInfo()); // Follow the load with a store to the stack slot. Remember the store. // On big-endian machines this requires a truncating store to ensure // that the bits end up in the right place. @@ -532,23 +540,25 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, if (TLI.isLittleEndian()) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment); + LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), + LD->getTBAAInfo()); } else { Hi = DAG.getExtLoad(HiExtType, dl, VT, Chain, Ptr, LD->getPointerInfo(), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), Alignment); + LD->isNonTemporal(), Alignment, LD->getTBAAInfo()); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getConstant(IncrementSize, TLI.getPointerTy())); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, VT, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), NewLoadedVT, LD->isVolatile(), - LD->isNonTemporal(), MinAlign(Alignment,IncrementSize)); + LD->isNonTemporal(), MinAlign(Alignment, IncrementSize), + LD->getTBAAInfo()); } // aggregate the two parts @@ -570,7 +580,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, /// the insert there, and then read the result back. SDValue SelectionDAGLegalize:: PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, - DebugLoc dl) { + SDLoc dl) { SDValue Tmp1 = Vec; SDValue Tmp2 = Val; SDValue Tmp3 = Idx; @@ -606,13 +616,13 @@ PerformInsertVectorEltInMemory(SDValue Vec, SDValue Val, SDValue Idx, false, false, 0); // Load the updated vector. return DAG.getLoad(VT, dl, Ch, StackPtr, - MachinePointerInfo::getFixedStack(SPFI), false, false, + MachinePointerInfo::getFixedStack(SPFI), false, false, false, 0); } SDValue SelectionDAGLegalize:: -ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, DebugLoc dl) { +ExpandINSERT_VECTOR_ELT(SDValue Vec, SDValue Val, SDValue Idx, SDLoc dl) { if (ConstantSDNode *InsertPos = dyn_cast(Idx)) { // SCALAR_TO_VECTOR requires that the type of the value being inserted // match the element type of the vector being created, except for @@ -651,7 +661,8 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); - DebugLoc dl = ST->getDebugLoc(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); + SDLoc dl(ST); if (ConstantFPSDNode *CFP = dyn_cast(ST->getValue())) { if (CFP->getValueType(0) == MVT::f32 && TLI.isTypeLegal(MVT::i32)) { @@ -659,7 +670,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { bitcastToAPInt().zextOrTrunc(32), MVT::i32); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); } if (CFP->getValueType(0) == MVT::f64) { @@ -668,7 +679,7 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { SDValue Con = DAG.getConstant(CFP->getValueAPF().bitcastToAPInt(). zextOrTrunc(64), MVT::i64); return DAG.getStore(Chain, dl, Con, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); } if (TLI.isTypeLegal(MVT::i32) && !ST->isVolatile()) { @@ -681,12 +692,13 @@ SDValue SelectionDAGLegalize::OptimizeFloatStore(StoreSDNode* ST) { if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(4)); + DAG.getConstant(4, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(4), - isVolatile, isNonTemporal, MinAlign(Alignment, 4U)); + isVolatile, isNonTemporal, MinAlign(Alignment, 4U), + TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -699,11 +711,12 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { StoreSDNode *ST = cast(Node); SDValue Chain = ST->getChain(); SDValue Ptr = ST->getBasePtr(); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); if (!ST->isTruncatingStore()) { if (SDNode *OptStore = OptimizeFloatStore(ST).getNode()) { @@ -741,7 +754,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -763,7 +776,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getZeroExtendInReg(Value, dl, StVT); SDValue Result = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + NVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); } else if (StWidth & (StWidth - 1)) { // If not storing a power-of-2 number of bits, expand as two stores. @@ -784,19 +798,20 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { // Store the bottom RoundWidth bits. Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), RoundVT, - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, + TBAAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getNode(ISD::SRL, dl, Value.getValueType(), Value, DAG.getConstant(RoundWidth, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); } else { // Big endian - avoid unaligned stores. // TRUNCSTORE:i24 X -> TRUNCSTORE:i16 (srl X, 8), TRUNCSTORE@+2:i8 X @@ -805,16 +820,17 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { DAG.getConstant(ExtraWidth, TLI.getShiftAmountTy(Value.getValueType()))); Hi = DAG.getTruncStore(Chain, dl, Hi, Ptr, ST->getPointerInfo(), - RoundVT, isVolatile, isNonTemporal, Alignment); + RoundVT, isVolatile, isNonTemporal, Alignment, + TBAAInfo); // Store the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getTruncStore(Chain, dl, Value, Ptr, ST->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); } // The order of the stores doesn't matter. @@ -850,7 +866,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { Value = DAG.getNode(ISD::TRUNCATE, dl, StVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); ReplaceNode(SDValue(Node, 0), Result); break; } @@ -863,7 +879,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { SDValue Chain = LD->getChain(); // The chain. SDValue Ptr = LD->getBasePtr(); // The base pointer. SDValue Value; // The value returned by the load op. - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { @@ -898,9 +914,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { assert(NVT.getSizeInBits() == VT.getSizeInBits() && "Can only promote loads to same size type"); - SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getMemOperand()); RVal = DAG.getNode(ISD::BITCAST, dl, VT, Res); RChain = Res.getValue(1); break; @@ -920,6 +934,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { unsigned Alignment = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); if (SrcWidth != SrcVT.getStoreSizeInBits() && // Some targets pretend to have an i1 loading operation, and actually @@ -946,7 +961,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { SDValue Result = DAG.getExtLoad(NewExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), - NVT, isVolatile, isNonTemporal, Alignment); + NVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); Ch = Result.getValue(1); // The chain. @@ -983,16 +998,16 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1012,17 +1027,17 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Load the top RoundWidth bits. Hi = DAG.getExtLoad(ExtType, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo(), RoundVT, isVolatile, - isNonTemporal, Alignment); + isNonTemporal, Alignment, TBAAInfo); // Load the remaining ExtraWidth bits. IncrementSize = RoundWidth / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, Node->getValueType(0), Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), ExtraVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of // the other one. @@ -1075,9 +1090,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { case TargetLowering::Expand: if (!TLI.isLoadExtLegal(ISD::EXTLOAD, SrcVT) && TLI.isTypeLegal(SrcVT)) { SDValue Load = DAG.getLoad(SrcVT, dl, Chain, Ptr, - LD->getPointerInfo(), - LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + LD->getMemOperand()); unsigned ExtendOp; switch (ExtType) { case ISD::EXTLOAD: @@ -1105,9 +1118,8 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { // Turn the unsupported load into an EXTLOAD followed by an explicit // zero/sign extend inreg. SDValue Result = DAG.getExtLoad(ISD::EXTLOAD, dl, Node->getValueType(0), - Chain, Ptr, LD->getPointerInfo(), SrcVT, - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + Chain, Ptr, SrcVT, + LD->getMemOperand()); SDValue ValRes; if (ExtType == ISD::SEXTLOAD) ValRes = DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, @@ -1249,7 +1261,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { if (Action == TargetLowering::Expand) { // replace ISD::DEBUGTRAP with ISD::TRAP SDValue NewVal; - NewVal = DAG.getNode(ISD::TRAP, Node->getDebugLoc(), Node->getVTList(), + NewVal = DAG.getNode(ISD::TRAP, SDLoc(Node), Node->getVTList(), Node->getOperand(0)); ReplaceNode(Node, NewVal.getNode()); LegalizeOp(NewVal.getNode()); @@ -1370,7 +1382,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Idx = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Store the value to a temporary stack slot, then LOAD the returned part. SDValue StackPtr = DAG.CreateStackTemporary(Vec.getValueType()); SDValue Ch = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, @@ -1382,11 +1394,7 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, Idx.getValueType())); - if (Idx.getValueType().bitsGT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); - else - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); - + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); StackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); if (Op.getValueType().isVector()) @@ -1404,7 +1412,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { SDValue Vec = Op.getOperand(0); SDValue Part = Op.getOperand(1); SDValue Idx = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Store the value to a temporary stack slot, then LOAD the returned part. @@ -1424,11 +1432,7 @@ SDValue SelectionDAGLegalize::ExpandInsertToVectorThroughStack(SDValue Op) { Idx = DAG.getNode(ISD::MUL, dl, Idx.getValueType(), Idx, DAG.getConstant(EltSize, Idx.getValueType())); - - if (Idx.getValueType().bitsGT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Idx); - else - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); + Idx = DAG.getZExtOrTrunc(Idx, dl, TLI.getPointerTy()); SDValue SubStackPtr = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, StackPtr); @@ -1449,7 +1453,7 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { // Create the stack frame object. EVT VT = Node->getValueType(0); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue FIPtr = DAG.CreateStackTemporary(VT); int FI = cast(FIPtr.getNode())->getIndex(); MachinePointerInfo PtrInfo = MachinePointerInfo::getFixedStack(FI); @@ -1489,12 +1493,12 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) { StoreChain = DAG.getEntryNode(); // Result is a load from the stack slot. - return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, + return DAG.getLoad(VT, dl, StoreChain, FIPtr, PtrInfo, false, false, false, 0); } SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue Tmp1 = Node->getOperand(0); SDValue Tmp2 = Node->getOperand(1); @@ -1527,7 +1531,8 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { unsigned Strides = (FloatVT.getSizeInBits()-1)/LoadTy.getSizeInBits(); unsigned ByteOffset = (Strides * LoadTy.getSizeInBits()) / 8; LoadPtr = DAG.getNode(ISD::ADD, dl, LoadPtr.getValueType(), - LoadPtr, DAG.getIntPtrConstant(ByteOffset)); + LoadPtr, + DAG.getConstant(ByteOffset, LoadPtr.getValueType())); // Load a legal integer containing the sign bit. SignBit = DAG.getLoad(LoadTy, dl, Ch, LoadPtr, MachinePointerInfo(), false, false, false, 0); @@ -1542,16 +1547,16 @@ SDValue SelectionDAGLegalize::ExpandFCOPYSIGN(SDNode* Node) { } } // Now get the sign bit proper, by seeing whether the value is negative. - SignBit = DAG.getSetCC(dl, TLI.getSetCCResultType(SignBit.getValueType()), + SignBit = DAG.getSetCC(dl, getSetCCResultType(SignBit.getValueType()), SignBit, DAG.getConstant(0, SignBit.getValueType()), ISD::SETLT); // Get the absolute value of the result. SDValue AbsVal = DAG.getNode(ISD::FABS, dl, Tmp1.getValueType(), Tmp1); // Select between the nabs and abs value based on the sign bit of // the input. - return DAG.getNode(ISD::SELECT, dl, AbsVal.getValueType(), SignBit, - DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), - AbsVal); + return DAG.getSelect(dl, AbsVal.getValueType(), SignBit, + DAG.getNode(ISD::FNEG, dl, AbsVal.getValueType(), AbsVal), + AbsVal); } void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, @@ -1559,7 +1564,7 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" " not tell us which reg is the stack pointer!"); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); EVT VT = Node->getValueType(0); SDValue Tmp1 = SDValue(Node, 0); SDValue Tmp2 = SDValue(Node, 1); @@ -1568,52 +1573,76 @@ void SelectionDAGLegalize::ExpandDYNAMIC_STACKALLOC(SDNode* Node, // Chain the dynamic stack allocation so that it doesn't modify the stack // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true), + SDLoc(Node)); SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast(Tmp3)->getZExtValue(); unsigned StackAlign = TM.getFrameLowering()->getStackAlignment(); - if (Align > StackAlign) - SP = DAG.getNode(ISD::AND, dl, VT, SP, - DAG.getConstant(-(uint64_t)Align, VT)); Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + if (Align > StackAlign) + Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, + DAG.getConstant(-(uint64_t)Align, VT)); Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), - DAG.getIntPtrConstant(0, true), SDValue()); + DAG.getIntPtrConstant(0, true), SDValue(), + SDLoc(Node)); Results.push_back(Tmp1); Results.push_back(Tmp2); } /// LegalizeSetCCCondCode - Legalize a SETCC with given LHS and RHS and -/// condition code CC on the current target. This routine expands SETCC with -/// illegal condition code into AND / OR of multiple SETCC values. -void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, +/// condition code CC on the current target. +/// +/// If the SETCC has been legalized using AND / OR, then the legalized node +/// will be stored in LHS. RHS and CC will be set to SDValue(). NeedInvert +/// will be set to false. +/// +/// If the SETCC has been legalized by using getSetCCSwappedOperands(), +/// then the values of LHS and RHS will be swapped, CC will be set to the +/// new condition, and NeedInvert will be set to false. +/// +/// If the SETCC has been legalized using the inverse condcode, then LHS and +/// RHS will be unchanged, CC will set to the inverted condcode, and NeedInvert +/// will be set to true. The caller must invert the result of the SETCC with +/// SelectionDAG::getNOT() or take equivalent action to swap the effect of a +/// true/false result. +/// +/// \returns true if the SetCC has been legalized, false if it hasn't. +bool SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, - DebugLoc dl) { + bool &NeedInvert, + SDLoc dl) { MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast(CC)->get(); + NeedInvert = false; switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); case TargetLowering::Legal: // Nothing to do. break; case TargetLowering::Expand: { + ISD::CondCode InvCC = ISD::getSetCCSwappedOperands(CCCode); + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + std::swap(LHS, RHS); + CC = DAG.getCondCode(InvCC); + return true; + } ISD::CondCode CC1 = ISD::SETCC_INVALID, CC2 = ISD::SETCC_INVALID; - ISD::CondCode InvCC = ISD::SETCC_INVALID; unsigned Opc = 0; switch (CCCode) { default: llvm_unreachable("Don't know how to expand this condition!"); - case ISD::SETO: + case ISD::SETO: assert(TLI.getCondCodeAction(ISD::SETOEQ, OpVT) == TargetLowering::Legal && "If SETO is expanded, SETOEQ must be legal!"); CC1 = ISD::SETOEQ; CC2 = ISD::SETOEQ; Opc = ISD::AND; break; - case ISD::SETUO: + case ISD::SETUO: assert(TLI.getCondCodeAction(ISD::SETUNE, OpVT) == TargetLowering::Legal && "If SETUO is expanded, SETUNE must be legal!"); @@ -1623,12 +1652,12 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, case ISD::SETOGE: case ISD::SETOLT: case ISD::SETOLE: - case ISD::SETONE: - case ISD::SETUEQ: - case ISD::SETUNE: - case ISD::SETUGT: - case ISD::SETUGE: - case ISD::SETULT: + case ISD::SETONE: + case ISD::SETUEQ: + case ISD::SETUNE: + case ISD::SETUGT: + case ISD::SETUGE: + case ISD::SETULT: case ISD::SETULE: // If we are floating point, assign and break, otherwise fall through. if (!OpVT.isInteger()) { @@ -1644,20 +1673,23 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, case ISD::SETGT: case ISD::SETGE: case ISD::SETLT: + // We only support using the inverted operation, which is computed above + // and not a different manner of supporting expanding these cases. + llvm_unreachable("Don't know how to expand this condition!"); case ISD::SETNE: case ISD::SETEQ: - InvCC = ISD::getSetCCSwappedOperands(CCCode); - if (TLI.getCondCodeAction(InvCC, OpVT) == TargetLowering::Expand) { - // We only support using the inverted operation and not a - // different manner of supporting expanding these cases. - llvm_unreachable("Don't know how to expand this condition!"); + // Try inverting the result of the inverse condition. + InvCC = CCCode == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ; + if (TLI.isCondCodeLegal(InvCC, OpVT)) { + CC = DAG.getCondCode(InvCC); + NeedInvert = true; + return true; } - LHS = DAG.getSetCC(dl, VT, RHS, LHS, InvCC); - RHS = SDValue(); - CC = SDValue(); - return; + // If inverting the condition didn't work then we have no means to expand + // the condition. + llvm_unreachable("Don't know how to expand this condition!"); } - + SDValue SetCC1, SetCC2; if (CCCode != ISD::SETO && CCCode != ISD::SETUO) { // If we aren't the ordered or unorder operation, @@ -1672,9 +1704,10 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, LHS = DAG.getNode(Opc, dl, VT, SetCC1, SetCC2); RHS = SDValue(); CC = SDValue(); - break; + return true; } } + return false; } /// EmitStackConvert - Emit a store/load combination to the stack. This stores @@ -1684,7 +1717,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, - DebugLoc dl) { + SDLoc dl) { // Create the stack frame object. unsigned SrcAlign = TLI.getDataLayout()->getPrefTypeAlignment(SrcOp.getValueType(). @@ -1725,7 +1758,7 @@ SDValue SelectionDAGLegalize::EmitStackConvert(SDValue SrcOp, } SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); // Create a vector sized/aligned stack slot, store the value to element #0, // then load the whole vector back out. SDValue StackPtr = DAG.CreateStackTemporary(Node->getValueType(0)); @@ -1749,7 +1782,7 @@ SDValue SelectionDAGLegalize::ExpandSCALAR_TO_VECTOR(SDNode *Node) { SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) { unsigned NumElems = Node->getNumOperands(); SDValue Value1, Value2; - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); EVT VT = Node->getValueType(0); EVT OpVT = Node->getOperand(0).getValueType(); EVT EltVT = VT.getVectorElementType(); @@ -1881,7 +1914,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), isTailCall, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Node->getDebugLoc()); + Callee, Args, DAG, SDLoc(Node)); std::pair CallInfo = TLI.LowerCallTo(CLI); @@ -1896,7 +1929,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, /// and returning a result of type RetVT. SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, EVT RetVT, const SDValue *Ops, unsigned NumOps, - bool isSigned, DebugLoc dl) { + bool isSigned, SDLoc dl) { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -1950,7 +1983,7 @@ SelectionDAGLegalize::ExpandChainLibCall(RTLIB::Libcall LC, CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Node->getDebugLoc()); + Callee, Args, DAG, SDLoc(Node)); std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo; @@ -1963,7 +1996,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; @@ -1981,7 +2014,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC = Call_I8; break; case MVT::i16: LC = Call_I16; break; @@ -1996,7 +2029,7 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned, static bool isDivRemLibcallAvailable(SDNode *Node, bool isSigned, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2043,7 +2076,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, bool isSigned = Opcode == ISD::SDIVREM; RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; @@ -2082,7 +2115,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); TargetLowering:: CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, @@ -2100,7 +2133,7 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, /// isSinCosLibcallAvailable - Return true if sincos libcall is available. static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; @@ -2130,7 +2163,7 @@ static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, static bool useSinCos(SDNode *Node) { unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN ? ISD::FCOS : ISD::FSIN; - + SDValue Op0 = Node->getOperand(0); for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), UE = Op0.getNode()->use_end(); UI != UE; ++UI) { @@ -2150,7 +2183,7 @@ void SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl &Results) { RTLIB::Libcall LC; - switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + switch (Node->getSimpleValueType(0).SimpleTy) { default: llvm_unreachable("Unexpected request for libcall!"); case MVT::f32: LC = RTLIB::SINCOS_F32; break; case MVT::f64: LC = RTLIB::SINCOS_F64; break; @@ -2158,25 +2191,25 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, case MVT::f128: LC = RTLIB::SINCOS_F128; break; case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; } - + // The input chain to this libcall is the entry node of the function. // Legalizing the call will automatically add the previous call to the // dependence. SDValue InChain = DAG.getEntryNode(); - + EVT RetVT = Node->getValueType(0); Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - + TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - + // Pass the argument. Entry.Node = Node->getOperand(0); Entry.Ty = RetTy; Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + // Pass the return address of sin. SDValue SinPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = SinPtr; @@ -2184,7 +2217,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + // Also pass the return address of the cos. SDValue CosPtr = DAG.CreateStackTemporary(RetVT); Entry.Node = CosPtr; @@ -2192,11 +2225,11 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, Entry.isSExt = false; Entry.isZExt = false; Args.push_back(Entry); - + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), TLI.getPointerTy()); - - DebugLoc dl = Node->getDebugLoc(); + + SDLoc dl(Node); TargetLowering:: CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()), false, false, false, false, @@ -2218,7 +2251,7 @@ SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Op0, EVT DestVT, - DebugLoc dl) { + SDLoc dl) { if (Op0.getValueType() == MVT::i32 && TLI.isTypeLegal(MVT::f64)) { // simple 32-bit [signed|unsigned] integer to float/double expansion @@ -2226,11 +2259,11 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue StackSlot = DAG.CreateStackTemporary(MVT::f64); // word offset constant for Hi/Lo address computation - SDValue WordOff = DAG.getConstant(sizeof(int), TLI.getPointerTy()); + SDValue WordOff = DAG.getConstant(sizeof(int), StackSlot.getValueType()); // set up Hi and Lo (into buffer) address based on endian SDValue Hi = StackSlot; - SDValue Lo = DAG.getNode(ISD::ADD, dl, - TLI.getPointerTy(), StackSlot, WordOff); + SDValue Lo = DAG.getNode(ISD::ADD, dl, StackSlot.getValueType(), + StackSlot, WordOff); if (TLI.isLittleEndian()) std::swap(Hi, Lo); @@ -2327,9 +2360,9 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, // select. We happen to get lucky and machinesink does the right // thing most of the time. This would be a good candidate for a //pseudo-op, or, even better, for whole-function isel. - SDValue SignBitTest = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + SDValue SignBitTest = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(0, MVT::i64), ISD::SETLT); - return DAG.getNode(ISD::SELECT, dl, MVT::f32, SignBitTest, Slow, Fast); + return DAG.getSelect(dl, MVT::f32, SignBitTest, Slow, Fast); } // Otherwise, implement the fully general conversion. @@ -2340,13 +2373,13 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, DAG.getConstant(UINT64_C(0x800), MVT::i64)); SDValue And2 = DAG.getNode(ISD::AND, dl, MVT::i64, Op0, DAG.getConstant(UINT64_C(0x7ff), MVT::i64)); - SDValue Ne = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + SDValue Ne = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), And2, DAG.getConstant(UINT64_C(0), MVT::i64), ISD::SETNE); - SDValue Sel = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ne, Or, Op0); - SDValue Ge = DAG.getSetCC(dl, TLI.getSetCCResultType(MVT::i64), + SDValue Sel = DAG.getSelect(dl, MVT::i64, Ne, Or, Op0); + SDValue Ge = DAG.getSetCC(dl, getSetCCResultType(MVT::i64), Op0, DAG.getConstant(UINT64_C(0x0020000000000000), MVT::i64), ISD::SETUGE); - SDValue Sel2 = DAG.getNode(ISD::SELECT, dl, MVT::i64, Ge, Sel, Op0); + SDValue Sel2 = DAG.getSelect(dl, MVT::i64, Ge, Sel, Op0); EVT SHVT = TLI.getShiftAmountTy(Sel2.getValueType()); SDValue Sh = DAG.getNode(ISD::SRL, dl, MVT::i64, Sel2, @@ -2365,18 +2398,18 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue Tmp1 = DAG.getNode(ISD::SINT_TO_FP, dl, DestVT, Op0); - SDValue SignSet = DAG.getSetCC(dl, TLI.getSetCCResultType(Op0.getValueType()), + SDValue SignSet = DAG.getSetCC(dl, getSetCCResultType(Op0.getValueType()), Op0, DAG.getConstant(0, Op0.getValueType()), ISD::SETLT); SDValue Zero = DAG.getIntPtrConstant(0), Four = DAG.getIntPtrConstant(4); - SDValue CstOffset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), + SDValue CstOffset = DAG.getSelect(dl, Zero.getValueType(), SignSet, Four, Zero); // If the sign bit of the integer is set, the large number will be treated // as a negative number. To counteract this, the dynamic code adds an // offset depending on the data type. uint64_t FF; - switch (Op0.getValueType().getSimpleVT().SimpleTy) { + switch (Op0.getSimpleValueType().SimpleTy) { default: llvm_unreachable("Unsupported integer type!"); case MVT::i8 : FF = 0x43800000ULL; break; // 2^8 (as a float) case MVT::i16: FF = 0x47800000ULL; break; // 2^16 (as a float) @@ -2389,7 +2422,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue CPIdx = DAG.getConstantPool(FudgeFactor, TLI.getPointerTy()); unsigned Alignment = cast(CPIdx)->getAlignment(); - CPIdx = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), CPIdx, CstOffset); + CPIdx = DAG.getNode(ISD::ADD, dl, CPIdx.getValueType(), CPIdx, CstOffset); Alignment = std::min(Alignment, 4u); SDValue FudgeInReg; if (DestVT == MVT::f32) @@ -2417,7 +2450,7 @@ SDValue SelectionDAGLegalize::ExpandLegalINT_TO_FP(bool isSigned, SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl) { + SDLoc dl) { // First step, figure out the appropriate *INT_TO_FP operation to use. EVT NewInTy = LegalOp.getValueType(); @@ -2459,7 +2492,7 @@ SDValue SelectionDAGLegalize::PromoteLegalINT_TO_FP(SDValue LegalOp, SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, EVT DestVT, bool isSigned, - DebugLoc dl) { + SDLoc dl) { // First step, figure out the appropriate FP_TO*INT operation to use. EVT NewOutTy = DestVT; @@ -2494,7 +2527,7 @@ SDValue SelectionDAGLegalize::PromoteLegalFP_TO_INT(SDValue LegalOp, /// ExpandBSWAP - Open code the operations for BSWAP of the specified operation. /// -SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { +SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, SDLoc dl) { EVT VT = Op.getValueType(); EVT SHVT = TLI.getShiftAmountTy(VT); SDValue Tmp1, Tmp2, Tmp3, Tmp4, Tmp5, Tmp6, Tmp7, Tmp8; @@ -2542,7 +2575,7 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { /// ExpandBitCount - Expand the specified bitcount instruction into operations. /// SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, - DebugLoc dl) { + SDLoc dl) { switch (Opc) { default: llvm_unreachable("Cannot expand this yet!"); case ISD::CTPOP: { @@ -2650,6 +2683,7 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; } break; case ISD::ATOMIC_CMP_SWAP: @@ -2659,6 +2693,7 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; } break; case ISD::ATOMIC_LOAD_ADD: @@ -2668,6 +2703,7 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; } break; case ISD::ATOMIC_LOAD_SUB: @@ -2677,6 +2713,7 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; } break; case ISD::ATOMIC_LOAD_AND: @@ -2686,6 +2723,7 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; } break; case ISD::ATOMIC_LOAD_OR: @@ -2695,6 +2733,7 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; } break; case ISD::ATOMIC_LOAD_XOR: @@ -2704,6 +2743,7 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; } break; case ISD::ATOMIC_LOAD_NAND: @@ -2713,6 +2753,47 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; + } + break; + case ISD::ATOMIC_LOAD_MAX: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MAX_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MAX_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MAX_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MAX_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MAX_16;break; + } + break; + case ISD::ATOMIC_LOAD_UMAX: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMAX_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMAX_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMAX_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMAX_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMAX_16;break; + } + break; + case ISD::ATOMIC_LOAD_MIN: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_MIN_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_MIN_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_MIN_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_MIN_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_MIN_16;break; + } + break; + case ISD::ATOMIC_LOAD_UMIN: + switch (VT.SimpleTy) { + default: llvm_unreachable("Unexpected value type for atomic!"); + case MVT::i8: LC = RTLIB::SYNC_FETCH_AND_UMIN_1; break; + case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_UMIN_2; break; + case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_UMIN_4; break; + case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_UMIN_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_UMIN_16;break; } break; } @@ -2722,8 +2803,9 @@ std::pair SelectionDAGLegalize::ExpandAtomic(SDNode *Node) { void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SmallVector Results; - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3, Tmp4; + bool NeedInvert; switch (Node->getOpcode()) { case ISD::CTPOP: case ISD::CTLZ: @@ -2913,7 +2995,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { APInt x = APInt::getSignBit(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, VT); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(VT), Node->getOperand(0), Tmp1, ISD::SETLT); True = DAG.getNode(ISD::FP_TO_SINT, dl, NVT, Node->getOperand(0)); @@ -2922,7 +3004,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Node->getOperand(0), Tmp1)); False = DAG.getNode(ISD::XOR, dl, NVT, False, DAG.getConstant(x, NVT)); - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, True, False); + Tmp1 = DAG.getSelect(dl, NVT, Tmp2, True, False); Results.push_back(Tmp1); break; } @@ -2934,27 +3016,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned Align = Node->getConstantOperandVal(3); SDValue VAListLoad = DAG.getLoad(TLI.getPointerTy(), dl, Tmp1, Tmp2, - MachinePointerInfo(V), + MachinePointerInfo(V), false, false, false, 0); SDValue VAList = VAListLoad; if (Align > TLI.getMinStackArgumentAlignment()) { assert(((Align & (Align-1)) == 0) && "Expected Align to be a power of 2"); - VAList = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + VAList = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(Align - 1, - TLI.getPointerTy())); + VAList.getValueType())); - VAList = DAG.getNode(ISD::AND, dl, TLI.getPointerTy(), VAList, + VAList = DAG.getNode(ISD::AND, dl, VAList.getValueType(), VAList, DAG.getConstant(-(int64_t)Align, - TLI.getPointerTy())); + VAList.getValueType())); } // Increment the pointer, VAList, to the next vaarg - Tmp3 = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), VAList, + Tmp3 = DAG.getNode(ISD::ADD, dl, VAList.getValueType(), VAList, DAG.getConstant(TLI.getDataLayout()-> getTypeAllocSize(VT.getTypeForEVT(*DAG.getContext())), - TLI.getPointerTy())); + VAList.getValueType())); // Store the incremented VAList to the legalized pointer Tmp3 = DAG.getStore(VAListLoad.getValue(1), dl, Tmp3, Tmp2, MachinePointerInfo(V), false, false, 0); @@ -3025,7 +3107,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // cast operands to v8i32 and re-build the mask. // Calculate new VT, the size of the new VT should be equal to original. - EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), NewEltVT, VT.getSizeInBits()/NewEltVT.getSizeInBits()); assert(NewVT.bitsEq(VT)); @@ -3065,11 +3147,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { if (Idx < NumElems) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op0, - DAG.getIntPtrConstant(Idx))); + DAG.getConstant(Idx, TLI.getVectorIdxTy()))); else Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, Op1, - DAG.getIntPtrConstant(Idx - NumElems))); + DAG.getConstant(Idx - NumElems, + TLI.getVectorIdxTy()))); } Tmp1 = DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], Ops.size()); @@ -3131,10 +3214,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { EVT VT = Node->getValueType(0); Tmp1 = Node->getOperand(0); Tmp2 = DAG.getConstantFP(0.0, VT); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(Tmp1.getValueType()), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, ISD::SETUGT); Tmp3 = DAG.getNode(ISD::FNEG, dl, VT, Tmp1); - Tmp1 = DAG.getNode(ISD::SELECT, dl, VT, Tmp2, Tmp1, Tmp3); + Tmp1 = DAG.getSelect(dl, VT, Tmp2, Tmp1, Tmp3); Results.push_back(Tmp1); break; } @@ -3224,6 +3307,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128)); break; + case ISD::FROUND: + Results.push_back(ExpandFPLibCall(Node, RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128)); + break; case ISD::FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, RTLIB::POWI_F80, RTLIB::POWI_F128, @@ -3263,22 +3353,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandConstantFP(CFP, true)); break; } - case ISD::EHSELECTION: { - unsigned Reg = TLI.getExceptionSelectorRegister(); - assert(Reg && "Can't expand to unknown register!"); - Results.push_back(DAG.getCopyFromReg(Node->getOperand(1), dl, Reg, - Node->getValueType(0))); - Results.push_back(Results[0].getValue(1)); - break; - } - case ISD::EXCEPTIONADDR: { - unsigned Reg = TLI.getExceptionPointerRegister(); - assert(Reg && "Can't expand to unknown register!"); - Results.push_back(DAG.getCopyFromReg(Node->getOperand(0), dl, Reg, - Node->getValueType(0))); - Results.push_back(Results[0].getValue(1)); - break; - } case ISD::FSUB: { EVT VT = Node->getValueType(0); assert(TLI.isOperationLegalOrCustom(ISD::FADD, VT) && @@ -3528,10 +3602,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = DAG.getConstant(VT.getSizeInBits() - 1, TLI.getShiftAmountTy(BottomHalf.getValueType())); Tmp1 = DAG.getNode(ISD::SRA, dl, VT, BottomHalf, Tmp1); - TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, Tmp1, + TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, Tmp1, ISD::SETNE); } else { - TopHalf = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), TopHalf, + TopHalf = DAG.getSetCC(dl, getSetCCResultType(VT), TopHalf, DAG.getConstant(0, VT), ISD::SETNE); } Results.push_back(BottomHalf); @@ -3574,9 +3648,10 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { unsigned EntrySize = DAG.getMachineFunction().getJumpTableInfo()->getEntrySize(TD); - Index = DAG.getNode(ISD::MUL, dl, PTy, - Index, DAG.getConstant(EntrySize, PTy)); - SDValue Addr = DAG.getNode(ISD::ADD, dl, PTy, Index, Table); + Index = DAG.getNode(ISD::MUL, dl, Index.getValueType(), + Index, DAG.getConstant(EntrySize, Index.getValueType())); + SDValue Addr = DAG.getNode(ISD::ADD, dl, Index.getValueType(), + Index, Table); EVT MemVT = EVT::getIntegerVT(*DAG.getContext(), EntrySize * 8); SDValue LD = DAG.getExtLoad(ISD::SEXTLOAD, dl, PTy, Chain, Addr, @@ -3620,10 +3695,21 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp1 = Node->getOperand(0); Tmp2 = Node->getOperand(1); Tmp3 = Node->getOperand(2); - LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, Tmp3, dl); + bool Legalized = LegalizeSetCCCondCode(Node->getValueType(0), Tmp1, Tmp2, + Tmp3, NeedInvert, dl); + + if (Legalized) { + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SETCC node. + if (Tmp3.getNode()) + Tmp1 = DAG.getNode(ISD::SETCC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp1 = DAG.getNOT(dl, Tmp1, Tmp1->getValueType(0)); - // If we expanded the SETCC into an AND/OR, return the new node - if (Tmp2.getNode() == 0) { Results.push_back(Tmp1); break; } @@ -3654,14 +3740,52 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp4 = Node->getOperand(3); // False SDValue CC = Node->getOperand(4); - LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp1.getValueType()), - Tmp1, Tmp2, CC, dl); + bool Legalized = false; + // Try to legalize by inverting the condition. This is for targets that + // might support an ordered version of a condition, but not the unordered + // version (or vice versa). + ISD::CondCode InvCC = ISD::getSetCCInverse(cast(CC)->get(), + Tmp1.getValueType().isInteger()); + if (TLI.isCondCodeLegal(InvCC, Tmp1.getSimpleValueType())) { + // Use the new condition code and swap true and false + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp1, Tmp2, Tmp4, Tmp3, InvCC); + } else { + // If The inverse is not legal, then try to swap the arguments using + // the inverse condition code. + ISD::CondCode SwapInvCC = ISD::getSetCCSwappedOperands(InvCC); + if (TLI.isCondCodeLegal(SwapInvCC, Tmp1.getSimpleValueType())) { + // The swapped inverse condition is legal, so swap true and false, + // lhs and rhs. + Legalized = true; + Tmp1 = DAG.getSelectCC(dl, Tmp2, Tmp1, Tmp4, Tmp3, SwapInvCC); + } + } + + if (!Legalized) { + Legalized = LegalizeSetCCCondCode( + getSetCCResultType(Tmp1.getValueType()), Tmp1, Tmp2, CC, NeedInvert, + dl); - assert(!Tmp2.getNode() && "Can't legalize SELECT_CC with legal condition!"); - Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); - CC = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, - Tmp3, Tmp4, CC); + assert(Legalized && "Can't legalize SELECT_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then swap + // the True/False operands to match. + if (NeedInvert) + std::swap(Tmp3, Tmp4); + + // If we expanded the SETCC by swapping LHS and RHS, or by inverting the + // condition code, create a new SELECT_CC node. + if (CC.getNode()) { + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), + Tmp1, Tmp2, Tmp3, Tmp4, CC); + } else { + Tmp2 = DAG.getConstant(0, Tmp1.getValueType()); + CC = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, Node->getValueType(0), Tmp1, Tmp2, + Tmp3, Tmp4, CC); + } + } Results.push_back(Tmp1); break; } @@ -3671,14 +3795,27 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { Tmp3 = Node->getOperand(3); // RHS Tmp4 = Node->getOperand(1); // CC - LegalizeSetCCCondCode(TLI.getSetCCResultType(Tmp2.getValueType()), - Tmp2, Tmp3, Tmp4, dl); - - assert(!Tmp3.getNode() && "Can't legalize BR_CC with legal condition!"); - Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); - Tmp4 = DAG.getCondCode(ISD::SETNE); - Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, - Tmp3, Node->getOperand(4)); + bool Legalized = LegalizeSetCCCondCode(getSetCCResultType( + Tmp2.getValueType()), Tmp2, Tmp3, Tmp4, NeedInvert, dl); + (void)Legalized; + assert(Legalized && "Can't legalize BR_CC with legal condition!"); + + // If we expanded the SETCC by inverting the condition code, then wrap + // the existing SETCC in a NOT to restore the intended condition. + if (NeedInvert) + Tmp4 = DAG.getNOT(dl, Tmp4, Tmp4->getValueType(0)); + + // If we expanded the SETCC by swapping LHS and RHS, create a new BR_CC + // node. + if (Tmp4.getNode()) { + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, + Tmp4, Tmp2, Tmp3, Node->getOperand(4)); + } else { + Tmp3 = DAG.getConstant(0, Tmp2.getValueType()); + Tmp4 = DAG.getCondCode(ISD::SETNE); + Tmp1 = DAG.getNode(ISD::BR_CC, dl, Node->getValueType(0), Tmp1, Tmp4, Tmp2, + Tmp3, Node->getOperand(4)); + } Results.push_back(Tmp1); break; } @@ -3698,10 +3835,12 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(0), DAG.getIntPtrConstant(Idx)); + Node->getOperand(0), DAG.getConstant(Idx, + TLI.getVectorIdxTy())); SDValue Sh = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT.getScalarType(), - Node->getOperand(1), DAG.getIntPtrConstant(Idx)); + Node->getOperand(1), DAG.getConstant(Idx, + TLI.getVectorIdxTy())); Scalars.push_back(DAG.getNode(Node->getOpcode(), dl, VT.getScalarType(), Ex, Sh)); } @@ -3738,7 +3877,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { OVT = Node->getOperand(0).getSimpleValueType(); } MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { case ISD::CTTZ: @@ -3753,11 +3892,11 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp1 = DAG.getNode(Node->getOpcode(), dl, NVT, Tmp1); if (Node->getOpcode() == ISD::CTTZ) { // FIXME: This should set a bit in the zero extended value instead. - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Tmp1, DAG.getConstant(NVT.getSizeInBits(), NVT), ISD::SETEQ); - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp2, - DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); + Tmp1 = DAG.getSelect(dl, NVT, Tmp2, + DAG.getConstant(OVT.getSizeInBits(), NVT), Tmp1); } else if (Node->getOpcode() == ISD::CTLZ || Node->getOpcode() == ISD::CTLZ_ZERO_UNDEF) { // Tmp1 = Tmp1 - (sizeinbits(NVT) - sizeinbits(Old VT)) @@ -3852,7 +3991,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node) { Tmp2 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(1)); Tmp3 = DAG.getNode(ExtOp, dl, NVT, Node->getOperand(2)); // Perform the larger operation, then round down. - Tmp1 = DAG.getNode(ISD::SELECT, dl, NVT, Tmp1, Tmp2, Tmp3); + Tmp1 = DAG.getSelect(dl, NVT, Tmp1, Tmp2, Tmp3); if (TruncOp != ISD::FP_ROUND) Tmp1 = DAG.getNode(TruncOp, dl, Node->getValueType(0), Tmp1); else diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index de217d8..ecf4c5d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -88,6 +88,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOWI: R = SoftenFloatRes_FPOWI(N); break; case ISD::FREM: R = SoftenFloatRes_FREM(N); break; case ISD::FRINT: R = SoftenFloatRes_FRINT(N); break; + case ISD::FROUND: R = SoftenFloatRes_FROUND(N); break; case ISD::FSIN: R = SoftenFloatRes_FSIN(N); break; case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; @@ -118,7 +119,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_MERGE_VALUES(SDNode *N, SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { // Convert the inputs to integers, and build a new pair out of them. - return DAG.getNode(ISD::BUILD_PAIR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_PAIR, SDLoc(N), TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), BitConvertToInteger(N->getOperand(0)), @@ -133,7 +134,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue NewOp = BitConvertVectorToIntegerVector(N->getOperand(0)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), NewOp.getValueType().getVectorElementType(), NewOp, N->getOperand(1)); } @@ -147,7 +148,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { API.clearBit(Size-1); SDValue Mask = DAG.getConstant(API, NVT); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return DAG.getNode(ISD::AND, N->getDebugLoc(), NVT, Op, Mask); + return DAG.getNode(ISD::AND, SDLoc(N), NVT, Op, Mask); } SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { @@ -160,7 +161,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { RTLIB::ADD_F80, RTLIB::ADD_F128, RTLIB::ADD_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { @@ -172,13 +173,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { RTLIB::CEIL_F80, RTLIB::CEIL_F128, RTLIB::CEIL_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT LVT = LHS.getValueType(); EVT RVT = RHS.getValueType(); @@ -226,7 +227,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { RTLIB::COS_F80, RTLIB::COS_F128, RTLIB::COS_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { @@ -239,7 +240,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { RTLIB::DIV_F80, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { @@ -251,7 +252,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { RTLIB::EXP_F80, RTLIB::EXP_F128, RTLIB::EXP_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { @@ -263,7 +264,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { RTLIB::EXP2_F80, RTLIB::EXP2_F128, RTLIB::EXP2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { @@ -275,7 +276,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, RTLIB::FLOOR_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { @@ -287,7 +288,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { RTLIB::LOG_F80, RTLIB::LOG_F128, RTLIB::LOG_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { @@ -299,7 +300,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { RTLIB::LOG2_F80, RTLIB::LOG2_F128, RTLIB::LOG2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { @@ -311,7 +312,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { RTLIB::LOG10_F80, RTLIB::LOG10_F128, RTLIB::LOG10_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -325,7 +326,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { RTLIB::FMA_F80, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), - NVT, Ops, 3, false, N->getDebugLoc()); + NVT, Ops, 3, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { @@ -338,7 +339,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { RTLIB::MUL_F80, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { @@ -350,7 +351,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { RTLIB::NEARBYINT_F80, RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { @@ -364,7 +365,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -372,7 +373,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -381,7 +382,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, - N->getDebugLoc()); + SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -389,7 +390,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { @@ -402,7 +403,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { RTLIB::POW_F80, RTLIB::POW_F128, RTLIB::POW_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -416,7 +417,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { RTLIB::POWI_F80, RTLIB::POWI_F128, RTLIB::POWI_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { @@ -429,7 +430,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { RTLIB::REM_F80, RTLIB::REM_F128, RTLIB::REM_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { @@ -441,7 +442,19 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { RTLIB::RINT_F80, RTLIB::RINT_F128, RTLIB::RINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; +} + +SDValue DAGTypeLegalizer::SoftenFloatRes_FROUND(SDNode *N) { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue Op = GetSoftenedFloat(N->getOperand(0)); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { @@ -453,7 +466,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { RTLIB::SIN_F80, RTLIB::SIN_F128, RTLIB::SIN_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { @@ -465,7 +478,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { RTLIB::SQRT_F80, RTLIB::SQRT_F128, RTLIB::SQRT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { @@ -478,7 +491,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { RTLIB::SUB_F80, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + NVT, Ops, 2, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { @@ -490,21 +503,22 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, RTLIB::TRUNC_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + NVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { LoadSDNode *L = cast(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue NewL; if (L->getExtensionType() == ISD::NON_EXTLOAD) { NewL = DAG.getLoad(L->getAddressingMode(), L->getExtensionType(), NVT, dl, L->getChain(), L->getBasePtr(), L->getOffset(), - L->getPointerInfo(), NVT, L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment()); + L->getPointerInfo(), NVT, L->isVolatile(), + L->isNonTemporal(), false, L->getAlignment(), + L->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -516,7 +530,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getMemoryVT(), dl, L->getChain(), L->getBasePtr(), L->getOffset(), L->getPointerInfo(), L->getMemoryVT(), L->isVolatile(), - L->isNonTemporal(), false, L->getAlignment()); + L->isNonTemporal(), false, L->getAlignment(), + L->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); @@ -526,14 +541,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), N->getOperand(0),LHS,RHS); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); } SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), N->getOperand(0), N->getOperand(1), LHS, RHS, N->getOperand(4)); } @@ -548,7 +563,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { SDValue Ptr = N->getOperand(1); // Get the pointer. EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue NewVAARG; NewVAARG = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), @@ -565,7 +580,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { EVT SVT = N->getOperand(0).getValueType(); EVT RVT = N->getValueType(0); EVT NVT = EVT(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If the input is not legal, eg: i1 -> fp, then it needs to be promoted to // a larger type, eg: i8 -> fp. Even if it is legal, no libcall may exactly @@ -585,7 +600,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { NVT, N->getOperand(0)); return TLI.makeLibCall(DAG, LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, false, dl); + &Op, 1, false, dl).first; } @@ -633,7 +648,7 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { } SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); } @@ -645,7 +660,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { @@ -655,7 +670,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -676,7 +691,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { @@ -684,14 +699,14 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, SDLoc(N)).first; } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { @@ -701,7 +716,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -724,7 +739,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { EVT VT = NewLHS.getValueType(); NewLHS = GetSoftenedFloat(NewLHS); NewRHS = GetSoftenedFloat(NewRHS); - TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, SDLoc(N)); // If softenSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { @@ -744,7 +759,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "Can only soften the stored value!"); StoreSDNode *ST = cast(N); SDValue Val = ST->getValue(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (ST->isTruncatingStore()) // Do an FP_ROUND followed by a non-truncating store. @@ -754,9 +769,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_STORE(SDNode *N, unsigned OpNo) { Val = GetSoftenedFloat(Val); return DAG.getStore(ST->getChain(), dl, Val, ST->getBasePtr(), - ST->getPointerInfo(), - ST->isVolatile(), ST->isNonTemporal(), - ST->getAlignment()); + ST->getMemOperand()); } @@ -817,6 +830,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::FPOW: ExpandFloatRes_FPOW(N, Lo, Hi); break; case ISD::FPOWI: ExpandFloatRes_FPOWI(N, Lo, Hi); break; case ISD::FRINT: ExpandFloatRes_FRINT(N, Lo, Hi); break; + case ISD::FROUND: ExpandFloatRes_FROUND(N, Lo, Hi); break; case ISD::FSIN: ExpandFloatRes_FSIN(N, Lo, Hi); break; case ISD::FSQRT: ExpandFloatRes_FSQRT(N, Lo, Hi); break; case ISD::FSUB: ExpandFloatRes_FSUB(N, Lo, Hi); break; @@ -850,14 +864,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FABS(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(N->getValueType(0) == MVT::ppcf128 && "Logic only correct for ppcf128!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Tmp; GetExpandedFloat(N->getOperand(0), Lo, Tmp); Hi = DAG.getNode(ISD::FABS, dl, Tmp.getValueType(), Tmp); // Lo = Hi==fabs(Hi) ? Lo : -Lo; - Lo = DAG.getNode(ISD::SELECT_CC, dl, Lo.getValueType(), Tmp, Hi, Lo, + Lo = DAG.getSelectCC(dl, Tmp, Hi, Lo, DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo), - DAG.getCondCode(ISD::SETEQ)); + ISD::SETEQ); } void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, @@ -912,7 +926,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, RTLIB::DIV_F128, RTLIB::DIV_PPCF128), N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -986,7 +1000,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, RTLIB::FMA_F128, RTLIB::FMA_PPCF128), N->getValueType(0), Ops, 3, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1000,7 +1014,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, RTLIB::MUL_F128, RTLIB::MUL_PPCF128), N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1018,7 +1032,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedFloat(N->getOperand(0), Lo, Hi); Lo = DAG.getNode(ISD::FNEG, dl, Lo.getValueType(), Lo); Hi = DAG.getNode(ISD::FNEG, dl, Hi.getValueType(), Hi); @@ -1027,7 +1041,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEG(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); + Hi = DAG.getNode(ISD::FP_EXTEND, SDLoc(N), NVT, N->getOperand(0)); Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), APInt(NVT.getSizeInBits(), 0)), NVT); } @@ -1072,6 +1086,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, GetPairElements(Call, Lo, Hi); } +void DAGTypeLegalizer::ExpandFloatRes_FROUND(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::ROUND_F32, + RTLIB::ROUND_F64, + RTLIB::ROUND_F80, + RTLIB::ROUND_F128, + RTLIB::ROUND_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), @@ -1102,7 +1128,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, RTLIB::SUB_F128, RTLIB::SUB_PPCF128), N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDLoc(N)).first; GetPairElements(Call, Lo, Hi); } @@ -1127,15 +1153,14 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, LoadSDNode *LD = cast(N); SDValue Chain = LD->getChain(); SDValue Ptr = LD->getBasePtr(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); assert(NVT.isByteSized() && "Expanded type not byte sized!"); assert(LD->getMemoryVT().bitsLE(NVT) && "Float type not round?"); Hi = DAG.getExtLoad(LD->getExtensionType(), dl, NVT, Chain, Ptr, - LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->getMemoryVT(), LD->getMemOperand()); // Remember the chain. Chain = Hi.getValue(1); @@ -1157,7 +1182,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, SDValue Src = N->getOperand(0); EVT SrcVT = Src.getValueType(); bool isSigned = N->getOpcode() == ISD::SINT_TO_FP; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // First do an SINT_TO_FP, whether the original was signed or unsigned. // When promoting partial word types to i32 we must honor the signedness, @@ -1181,7 +1206,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl); + Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl).first; GetPairElements(Hi, Lo, Hi); } @@ -1216,8 +1241,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, APInt(128, Parts)), MVT::ppcf128)); - Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), - Lo, Hi, DAG.getCondCode(ISD::SETLT)); + Lo = DAG.getSelectCC(dl, Src, DAG.getConstant(0, SrcVT), + Lo, Hi, ISD::SETLT); GetPairElements(Lo, Lo, Hi); } @@ -1251,6 +1276,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { case ISD::EXTRACT_ELEMENT: Res = ExpandOp_EXTRACT_ELEMENT(N); break; case ISD::BR_CC: Res = ExpandFloatOp_BR_CC(N); break; + case ISD::FCOPYSIGN: Res = ExpandFloatOp_FCOPYSIGN(N); break; case ISD::FP_ROUND: Res = ExpandFloatOp_FP_ROUND(N); break; case ISD::FP_TO_SINT: Res = ExpandFloatOp_FP_TO_SINT(N); break; case ISD::FP_TO_UINT: Res = ExpandFloatOp_FP_TO_UINT(N); break; @@ -1280,7 +1306,7 @@ bool DAGTypeLegalizer::ExpandFloatOperand(SDNode *N, unsigned OpNo) { void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - DebugLoc dl) { + SDLoc dl) { SDValue LHSLo, LHSHi, RHSLo, RHSHi; GetExpandedFloat(NewLHS, LHSLo, LHSHi); GetExpandedFloat(NewRHS, RHSLo, RHSHi); @@ -1293,14 +1319,14 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, // FCMPU crN, lo1, lo2 // The following can be improved, but not that much. SDValue Tmp1, Tmp2, Tmp3; - Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETOEQ); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, CCCode); Tmp3 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); - Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETUNE); - Tmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp2 = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, CCCode); Tmp1 = DAG.getNode(ISD::AND, dl, Tmp1.getValueType(), Tmp1, Tmp2); NewLHS = DAG.getNode(ISD::OR, dl, Tmp1.getValueType(), Tmp1, Tmp3); @@ -1310,7 +1336,7 @@ void DAGTypeLegalizer::FloatExpandSetCCOperands(SDValue &NewLHS, SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); ISD::CondCode CCCode = cast(N->getOperand(1))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1325,19 +1351,30 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_BR_CC(SDNode *N) { N->getOperand(4)), 0); } +SDValue DAGTypeLegalizer::ExpandFloatOp_FCOPYSIGN(SDNode *N) { + assert(N->getOperand(1).getValueType() == MVT::ppcf128 && + "Logic only correct for ppcf128!"); + SDValue Lo, Hi; + GetExpandedFloat(N->getOperand(1), Lo, Hi); + // The ppcf128 value is providing only the sign; take it from the + // higher-order double (which must have the larger magnitude). + return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), + N->getValueType(0), N->getOperand(0), Hi); +} + SDValue DAGTypeLegalizer::ExpandFloatOp_FP_ROUND(SDNode *N) { assert(N->getOperand(0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); SDValue Lo, Hi; GetExpandedFloat(N->getOperand(0), Lo, Hi); // Round it the rest of the way (e.g. to f32) if needed. - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), N->getValueType(0), Hi, N->getOperand(1)); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { EVT RVT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). FIXME: Do this in a less hacky way. @@ -1353,12 +1390,12 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl); + return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { EVT RVT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand ppcf128 to i32 by hand for the benefit of llvm-gcc bootstrap on // PPC (the libcall is not available). FIXME: Do this in a less hacky way. @@ -1370,29 +1407,29 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. - return DAG.getNode(ISD::SELECT_CC, dl, MVT::i32, N->getOperand(0), Tmp, - DAG.getNode(ISD::ADD, dl, MVT::i32, - DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, - DAG.getNode(ISD::FSUB, dl, - MVT::ppcf128, - N->getOperand(0), - Tmp)), - DAG.getConstant(0x80000000, MVT::i32)), - DAG.getNode(ISD::FP_TO_SINT, dl, - MVT::i32, N->getOperand(0)), - DAG.getCondCode(ISD::SETGE)); + return DAG.getSelectCC(dl, N->getOperand(0), Tmp, + DAG.getNode(ISD::ADD, dl, MVT::i32, + DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, + DAG.getNode(ISD::FSUB, dl, + MVT::ppcf128, + N->getOperand(0), + Tmp)), + DAG.getConstant(0x80000000, MVT::i32)), + DAG.getNode(ISD::FP_TO_SINT, dl, + MVT::i32, N->getOperand(0)), + ISD::SETGE); } RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, - false, dl); + false, dl).first; } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(4))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -1410,7 +1447,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { SDValue DAGTypeLegalizer::ExpandFloatOp_SETCC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(2))->get(); - FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + FloatExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { @@ -1444,8 +1481,6 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_STORE(SDNode *N, unsigned OpNo) { SDValue Lo, Hi; GetExpandedOp(ST->getValue(), Lo, Hi); - return DAG.getTruncStore(Chain, N->getDebugLoc(), Hi, Ptr, - ST->getPointerInfo(), - ST->getMemoryVT(), ST->isVolatile(), - ST->isNonTemporal(), ST->getAlignment()); + return DAG.getTruncStore(Chain, SDLoc(N), Hi, Ptr, + ST->getMemoryVT(), ST->getMemOperand()); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index cd2f060..4255948 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -153,20 +153,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_MERGE_VALUES(SDNode *N, SDValue DAGTypeLegalizer::PromoteIntRes_AssertSext(SDNode *N) { // Sign-extend the new bits, and continue the assertion. SDValue Op = SExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::AssertSext, N->getDebugLoc(), + return DAG.getNode(ISD::AssertSext, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } SDValue DAGTypeLegalizer::PromoteIntRes_AssertZext(SDNode *N) { // Zero the new bits, and continue the assertion. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::AssertZext, N->getDebugLoc(), + return DAG.getNode(ISD::AssertZext, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { EVT ResVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), ResVT, N->getChain(), N->getBasePtr(), N->getMemOperand(), N->getOrdering(), @@ -179,7 +179,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic0(AtomicSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); - SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), N->getOrdering(), @@ -193,7 +193,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Atomic1(AtomicSDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_Atomic2(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); SDValue Op3 = GetPromotedInteger(N->getOperand(3)); - SDValue Res = DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), Op2, Op3, N->getMemOperand(), N->getOrdering(), N->getSynchScope()); @@ -209,7 +209,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BITCAST(SDNode *N) { EVT NInVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: @@ -264,7 +264,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned DiffBits = NVT.getSizeInBits() - OVT.getSizeInBits(); return DAG.getNode(ISD::SRL, dl, NVT, DAG.getNode(ISD::BSWAP, dl, NVT, Op), @@ -274,7 +274,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BSWAP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { // The pair element type may be legal, or may not promote to the same type as // the result, for example i14 = BUILD_PAIR (i7, i7). Handle all cases. - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), JoinIntegers(N->getOperand(0), N->getOperand(1))); @@ -283,7 +283,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_PAIR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_Constant(SDNode *N) { EVT VT = N->getValueType(0); // FIXME there is no actual debug info here - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Zero extend things like i1, sign extend everything else. It shouldn't // matter in theory which one we pick, but this tends to give better code? unsigned Opc = VT.isByteSized() ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND; @@ -301,7 +301,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { CvtCode == ISD::CVT_SF || CvtCode == ISD::CVT_UF) && "can only promote integers"); EVT OutVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getConvertRndSat(OutVT, N->getDebugLoc(), N->getOperand(0), + return DAG.getConvertRndSat(OutVT, SDLoc(N), N->getOperand(0), N->getOperand(1), N->getOperand(2), N->getOperand(3), N->getOperand(4), CvtCode); } @@ -309,7 +309,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONVERT_RNDSAT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); Op = DAG.getNode(N->getOpcode(), dl, NVT, Op); @@ -322,14 +322,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTLZ(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_CTPOP(SDNode *N) { // Zero extend to the promoted type and do the count there. SDValue Op = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::CTPOP, N->getDebugLoc(), Op.getValueType(), Op); + return DAG.getNode(ISD::CTPOP, SDLoc(N), Op.getValueType(), Op); } SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); EVT OVT = N->getValueType(0); EVT NVT = Op.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->getOpcode() == ISD::CTTZ) { // The count is the same in the promoted type except if the original // value was zero. This can be handled by setting the bit just off @@ -342,7 +342,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CTTZ(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NVT, N->getOperand(0), N->getOperand(1)); @@ -351,7 +351,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned NewOpc = N->getOpcode(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If we're promoting a UINT to a larger size and the larger FP_TO_UINT is // not Legal, check to see if we can use FP_TO_SINT instead. (If both UINT @@ -374,7 +374,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP_TO_XINT(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Res = DAG.getNode(N->getOpcode(), dl, NVT, N->getOperand(0)); @@ -384,7 +384,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_FP32_TO_FP16(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_INT_EXTEND(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (getTypeAction(N->getOperand(0).getValueType()) == TargetLowering::TypePromoteInteger) { @@ -415,11 +415,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_LOAD(LoadSDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); ISD::LoadExtType ExtType = ISD::isNON_EXTLoad(N) ? ISD::EXTLOAD : N->getExtensionType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Res = DAG.getExtLoad(ExtType, dl, NVT, N->getChain(), N->getBasePtr(), - N->getPointerInfo(), - N->getMemoryVT(), N->isVolatile(), - N->isNonTemporal(), N->getAlignment()); + N->getMemoryVT(), N->getMemOperand()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -433,7 +431,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_Overflow(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(1)); EVT ValueVTs[] = { N->getValueType(0), NVT }; SDValue Ops[] = { N->getOperand(0), N->getOperand(1) }; - SDValue Res = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + SDValue Res = DAG.getNode(N->getOpcode(), SDLoc(N), DAG.getVTList(ValueVTs, 2), Ops, 2); // Modified the sum result - switch anything that used the old sum to use @@ -453,7 +451,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SADDSUBO(SDNode *N, unsigned ResNo) { SDValue RHS = SExtPromotedInteger(N->getOperand(1)); EVT OVT = N->getOperand(0).getValueType(); EVT NVT = LHS.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Do the arithmetic in the larger type. unsigned Opcode = N->getOpcode() == ISD::SADDO ? ISD::ADD : ISD::SUB; @@ -476,15 +474,15 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SDIV(SDNode *N) { // Sign extend the input. SDValue LHS = SExtPromotedInteger(N->getOperand(0)); SDValue RHS = SExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SELECT(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), N->getOperand(0),LHS,RHS); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { @@ -492,23 +490,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VSELECT(SDNode *N) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - Mask = PromoteTargetBoolean(Mask, TLI.getSetCCResultType(OpTy)); + Mask = PromoteTargetBoolean(Mask, getSetCCResultType(OpTy)); SDValue LHS = GetPromotedInteger(N->getOperand(1)); SDValue RHS = GetPromotedInteger(N->getOperand(2)); - return DAG.getNode(ISD::VSELECT, N->getDebugLoc(), + return DAG.getNode(ISD::VSELECT, SDLoc(N), LHS.getValueType(), Mask, LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SELECT_CC(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(2)); SDValue RHS = GetPromotedInteger(N->getOperand(3)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), N->getOperand(0), N->getOperand(1), LHS, RHS, N->getOperand(4)); } SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { - EVT SVT = TLI.getSetCCResultType(N->getOperand(0).getValueType()); + EVT SVT = getSetCCResultType(N->getOperand(0).getValueType()); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); @@ -517,13 +515,24 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { if (!TLI.isTypeLegal(SVT)) SVT = NVT; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && "Vector compare must return a vector result!"); + SDValue LHS = N->getOperand(0); + SDValue RHS = N->getOperand(1); + if (LHS.getValueType() != RHS.getValueType()) { + if (getTypeAction(LHS.getValueType()) == TargetLowering::TypePromoteInteger && + !LHS.getValueType().isVector()) + LHS = GetPromotedInteger(LHS); + if (getTypeAction(RHS.getValueType()) == TargetLowering::TypePromoteInteger && + !RHS.getValueType().isVector()) + RHS = GetPromotedInteger(RHS); + } + // Get the SETCC result using the canonical SETCC type. - SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, N->getOperand(0), - N->getOperand(1), N->getOperand(2)); + SDValue SetCC = DAG.getNode(N->getOpcode(), dl, SVT, LHS, RHS, + N->getOperand(2)); assert(NVT.bitsLE(SVT) && "Integer type overpromoted?"); // Convert to the expected type. @@ -534,12 +543,12 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { SDValue Res = GetPromotedInteger(N->getOperand(0)); SDValue Amt = N->getOperand(1); Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt); + return DAG.getNode(ISD::SHL, SDLoc(N), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), + return DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } @@ -549,7 +558,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { // that too is okay if they are integer operations. SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = GetPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } @@ -558,7 +567,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { SDValue Res = SExtPromotedInteger(N->getOperand(0)); SDValue Amt = N->getOperand(1); Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt); + return DAG.getNode(ISD::SRA, SDLoc(N), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { @@ -566,14 +575,14 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { SDValue Res = ZExtPromotedInteger(N->getOperand(0)); SDValue Amt = N->getOperand(1); Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; - return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt); + return DAG.getNode(ISD::SRL, SDLoc(N), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Res; SDValue InOp = N->getOperand(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (getTypeAction(InOp.getValueType())) { default: llvm_unreachable("Unknown type action!"); @@ -618,7 +627,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UADDSUBO(SDNode *N, unsigned ResNo) { SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); EVT OVT = N->getOperand(0).getValueType(); EVT NVT = LHS.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Do the arithmetic in the larger type. unsigned Opcode = N->getOpcode() == ISD::UADDO ? ISD::ADD : ISD::SUB; @@ -642,7 +651,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_XMULO(SDNode *N, unsigned ResNo) { return PromoteIntRes_Overflow(N); SDValue LHS = N->getOperand(0), RHS = N->getOperand(1); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT SmallVT = LHS.getValueType(); // To determine if the result overflowed in a larger type, we extend the @@ -690,7 +699,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_UDIV(SDNode *N) { // Zero extend the input. SDValue LHS = ZExtPromotedInteger(N->getOperand(0)); SDValue RHS = ZExtPromotedInteger(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } @@ -703,7 +712,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { SDValue Chain = N->getOperand(0); // Get the chain. SDValue Ptr = N->getOperand(1); // Get the pointer. EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT); @@ -847,12 +856,12 @@ void DAGTypeLegalizer::PromoteSetCCOperands(SDValue &NewLHS,SDValue &NewRHS, SDValue DAGTypeLegalizer::PromoteIntOp_ANY_EXTEND(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), Op); + return DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Op); } SDValue DAGTypeLegalizer::PromoteIntOp_ATOMIC_STORE(AtomicSDNode *N) { SDValue Op2 = GetPromotedInteger(N->getOperand(2)); - return DAG.getAtomic(N->getOpcode(), N->getDebugLoc(), N->getMemoryVT(), + return DAG.getAtomic(N->getOpcode(), SDLoc(N), N->getMemoryVT(), N->getChain(), N->getBasePtr(), Op2, N->getMemOperand(), N->getOrdering(), N->getSynchScope()); } @@ -881,7 +890,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BRCOND(SDNode *N, unsigned OpNo) { assert(OpNo == 1 && "only know how to promote condition"); // Promote all the way up to the canonical SetCC type. - EVT SVT = TLI.getSetCCResultType(MVT::Other); + EVT SVT = getSetCCResultType(MVT::Other); SDValue Cond = PromoteTargetBoolean(N->getOperand(1), SVT); // The chain (Op#0) and basic block destination (Op#2) are always legal types. @@ -895,7 +904,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_PAIR(SDNode *N) { SDValue Lo = ZExtPromotedInteger(N->getOperand(0)); SDValue Hi = GetPromotedInteger(N->getOperand(1)); assert(Lo.getValueType() == N->getValueType(0) && "Operand over promoted?"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Hi = DAG.getNode(ISD::SHL, dl, N->getValueType(0), Hi, DAG.getConstant(OVT.getSizeInBits(), TLI.getPointerTy())); @@ -908,7 +917,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_BUILD_VECTOR(SDNode *N) { // type does not have a strange size (eg: it is not i1). EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - assert(!(NumElts & 1) && "Legal vector of one illegal element?"); + assert(!((NumElts & 1) && (!TLI.isTypeLegal(VecVT))) && + "Legal vector of one illegal element?"); // Promote the inserted value. The type does not need to match the // vector element type. Check that any extra bits introduced will be @@ -931,7 +941,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONVERT_RNDSAT(SDNode *N) { CvtCode == ISD::CVT_FS || CvtCode == ISD::CVT_FU) && "can only promote integer arguments"); SDValue InOp = GetPromotedInteger(N->getOperand(0)); - return DAG.getConvertRndSat(N->getValueType(0), N->getDebugLoc(), InOp, + return DAG.getConvertRndSat(N->getValueType(0), SDLoc(N), InOp, N->getOperand(1), N->getOperand(2), N->getOperand(3), N->getOperand(4), CvtCode); } @@ -955,7 +965,8 @@ SDValue DAGTypeLegalizer::PromoteIntOp_INSERT_VECTOR_ELT(SDNode *N, assert(OpNo == 2 && "Different operand and result vector types?"); // Promote the index. - SDValue Idx = ZExtPromotedInteger(N->getOperand(2)); + SDValue Idx = DAG.getZExtOrTrunc(N->getOperand(2), SDLoc(N), + TLI.getVectorIdxTy()); return SDValue(DAG.UpdateNodeOperands(N, N->getOperand(0), N->getOperand(1), Idx), 0); } @@ -973,7 +984,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SELECT(SDNode *N, unsigned OpNo) { EVT OpTy = N->getOperand(1).getValueType(); // Promote all the way up to the canonical SetCC type. - EVT SVT = TLI.getSetCCResultType(N->getOpcode() == ISD::SELECT ? + EVT SVT = getSetCCResultType(N->getOpcode() == ISD::SELECT ? OpTy.getScalarType() : OpTy); Cond = PromoteTargetBoolean(Cond, SVT); @@ -1011,7 +1022,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_Shift(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_SIGN_EXTEND(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, DAG.getValueType(N->getOperand(0).getValueType())); @@ -1025,22 +1036,18 @@ SDValue DAGTypeLegalizer::PromoteIntOp_SINT_TO_FP(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(ISD::isUNINDEXEDStore(N) && "Indexed store during type legalization!"); SDValue Ch = N->getChain(), Ptr = N->getBasePtr(); - unsigned Alignment = N->getAlignment(); - bool isVolatile = N->isVolatile(); - bool isNonTemporal = N->isNonTemporal(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Val = GetPromotedInteger(N->getValue()); // Get promoted value. // Truncate the value and store the result. - return DAG.getTruncStore(Ch, dl, Val, Ptr, N->getPointerInfo(), - N->getMemoryVT(), - isVolatile, isNonTemporal, Alignment); + return DAG.getTruncStore(Ch, dl, Val, Ptr, + N->getMemoryVT(), N->getMemOperand()); } SDValue DAGTypeLegalizer::PromoteIntOp_TRUNCATE(SDNode *N) { SDValue Op = GetPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), Op); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), Op); } SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { @@ -1049,7 +1056,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_UINT_TO_FP(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_ZERO_EXTEND(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = GetPromotedInteger(N->getOperand(0)); Op = DAG.getNode(ISD::ANY_EXTEND, dl, N->getValueType(0), Op); return DAG.getZeroExtendInReg(Op, dl, @@ -1127,7 +1134,8 @@ void DAGTypeLegalizer::ExpandIntegerResult(SDNode *N, unsigned ResNo) { case ISD::ATOMIC_LOAD_MAX: case ISD::ATOMIC_LOAD_UMIN: case ISD::ATOMIC_LOAD_UMAX: - case ISD::ATOMIC_SWAP: { + case ISD::ATOMIC_SWAP: + case ISD::ATOMIC_CMP_SWAP: { std::pair Tmp = ExpandAtomic(N); SplitInteger(Tmp.first, Lo, Hi); ReplaceValueWith(SDValue(N, 1), Tmp.second); @@ -1180,6 +1188,7 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_2; break; case MVT::i32: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_4; break; case MVT::i64: LC = RTLIB::SYNC_LOCK_TEST_AND_SET_8; break; + case MVT::i128:LC = RTLIB::SYNC_LOCK_TEST_AND_SET_16;break; } break; case ISD::ATOMIC_CMP_SWAP: @@ -1189,6 +1198,7 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2; break; case MVT::i32: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4; break; case MVT::i64: LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8; break; + case MVT::i128:LC = RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16;break; } break; case ISD::ATOMIC_LOAD_ADD: @@ -1198,6 +1208,7 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_ADD_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_ADD_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_ADD_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_ADD_16;break; } break; case ISD::ATOMIC_LOAD_SUB: @@ -1207,6 +1218,7 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_SUB_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_SUB_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_SUB_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_SUB_16;break; } break; case ISD::ATOMIC_LOAD_AND: @@ -1216,6 +1228,7 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_AND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_AND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_AND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_AND_16;break; } break; case ISD::ATOMIC_LOAD_OR: @@ -1225,6 +1238,7 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_OR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_OR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_OR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_OR_16;break; } break; case ISD::ATOMIC_LOAD_XOR: @@ -1234,6 +1248,7 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_XOR_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_XOR_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_XOR_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_XOR_16;break; } break; case ISD::ATOMIC_LOAD_NAND: @@ -1243,6 +1258,7 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { case MVT::i16: LC = RTLIB::SYNC_FETCH_AND_NAND_2; break; case MVT::i32: LC = RTLIB::SYNC_FETCH_AND_NAND_4; break; case MVT::i64: LC = RTLIB::SYNC_FETCH_AND_NAND_8; break; + case MVT::i128:LC = RTLIB::SYNC_FETCH_AND_NAND_16;break; } break; } @@ -1254,7 +1270,7 @@ std::pair DAGTypeLegalizer::ExpandAtomic(SDNode *Node) { /// and the shift amount is a constant 'Amt'. Expand the operation. void DAGTypeLegalizer::ExpandShiftByConstant(SDNode *N, unsigned Amt, SDValue &Lo, SDValue &Hi) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Expand the incoming operand to be shifted, so that we have its parts SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); @@ -1352,7 +1368,7 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned NVTBits = NVT.getScalarType().getSizeInBits(); assert(isPowerOf2_32(NVTBits) && "Expanded integer type size not a power of two!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); APInt HighBitMask = APInt::getHighBitsSet(ShBits, ShBits - Log2_32(NVTBits)); APInt KnownZero, KnownOne; @@ -1439,7 +1455,7 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { unsigned NVTBits = NVT.getSizeInBits(); assert(isPowerOf2_32(NVTBits) && "Expanded integer type size not a power of two!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Get the incoming operand to be shifted. SDValue InL, InH; @@ -1448,7 +1464,7 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy); SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode); SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt); - SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy), + SDValue isShort = DAG.getSetCC(dl, getSetCCResultType(ShTy), Amt, NVBitsNode, ISD::SETULT); SDValue LoS, HiS, LoL, HiL; @@ -1467,8 +1483,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { LoL = DAG.getConstant(0, NVT); // Lo part is zero. HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; case ISD::SRL: // Short: ShAmt < NVTBits @@ -1483,8 +1499,8 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { HiL = DAG.getConstant(0, NVT); // Hi part is zero. LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; case ISD::SRA: // Short: ShAmt < NVTBits @@ -1500,15 +1516,15 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) { DAG.getConstant(NVTBits-1, ShTy)); LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part. - Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL); - Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL); + Lo = DAG.getSelect(dl, NVT, isShort, LoS, LoL); + Hi = DAG.getSelect(dl, NVT, isShort, HiS, HiL); return true; } } void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; GetExpandedInteger(N->getOperand(0), LHSL, LHSH); @@ -1545,25 +1561,25 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUB(SDNode *N, if (N->getOpcode() == ISD::ADD) { Lo = DAG.getNode(ISD::ADD, dl, NVT, LoOps, 2); Hi = DAG.getNode(ISD::ADD, dl, NVT, HiOps, 2); - SDValue Cmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[0], + SDValue Cmp1 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[0], ISD::SETULT); - SDValue Carry1 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp1, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); - SDValue Cmp2 = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, LoOps[1], + SDValue Carry1 = DAG.getSelect(dl, NVT, Cmp1, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); + SDValue Cmp2 = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, LoOps[1], ISD::SETULT); - SDValue Carry2 = DAG.getNode(ISD::SELECT, dl, NVT, Cmp2, - DAG.getConstant(1, NVT), Carry1); + SDValue Carry2 = DAG.getSelect(dl, NVT, Cmp2, + DAG.getConstant(1, NVT), Carry1); Hi = DAG.getNode(ISD::ADD, dl, NVT, Hi, Carry2); } else { Lo = DAG.getNode(ISD::SUB, dl, NVT, LoOps, 2); Hi = DAG.getNode(ISD::SUB, dl, NVT, HiOps, 2); SDValue Cmp = - DAG.getSetCC(dl, TLI.getSetCCResultType(LoOps[0].getValueType()), + DAG.getSetCC(dl, getSetCCResultType(LoOps[0].getValueType()), LoOps[0], LoOps[1], ISD::SETULT); - SDValue Borrow = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, - DAG.getConstant(1, NVT), - DAG.getConstant(0, NVT)); + SDValue Borrow = DAG.getSelect(dl, NVT, Cmp, + DAG.getConstant(1, NVT), + DAG.getConstant(0, NVT)); Hi = DAG.getNode(ISD::SUB, dl, NVT, Hi, Borrow); } } @@ -1572,7 +1588,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBC(SDNode *N, SDValue &Lo, SDValue &Hi) { // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); @@ -1598,7 +1614,7 @@ void DAGTypeLegalizer::ExpandIntRes_ADDSUBE(SDNode *N, SDValue &Lo, SDValue &Hi) { // Expand the subcomponents. SDValue LHSL, LHSH, RHSL, RHSH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), LHSL, LHSH); GetExpandedInteger(N->getOperand(1), RHSL, RHSH); SDVTList VTList = DAG.getVTList(LHSL.getValueType(), MVT::Glue); @@ -1623,7 +1639,7 @@ void DAGTypeLegalizer::ExpandIntRes_MERGE_VALUES(SDNode *N, unsigned ResNo, void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { // The low part is any extension of the input (which degenerates to a copy). @@ -1645,7 +1661,7 @@ void DAGTypeLegalizer::ExpandIntRes_ANY_EXTEND(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); EVT EVT = cast(N->getOperand(1))->getVT(); @@ -1666,7 +1682,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertSext(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); EVT EVT = cast(N->getOperand(1))->getVT(); @@ -1686,7 +1702,7 @@ void DAGTypeLegalizer::ExpandIntRes_AssertZext(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_BSWAP(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Hi, Lo); // Note swapped operands. Lo = DAG.getNode(ISD::BSWAP, dl, Lo.getValueType(), Lo); Hi = DAG.getNode(ISD::BSWAP, dl, Hi.getValueType(), Hi); @@ -1703,26 +1719,26 @@ void DAGTypeLegalizer::ExpandIntRes_Constant(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_CTLZ(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // ctlz (HiLo) -> Hi != 0 ? ctlz(Hi) : (ctlz(Lo)+32) GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); - SDValue HiNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Hi, + SDValue HiNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Hi, DAG.getConstant(0, NVT), ISD::SETNE); SDValue LoLZ = DAG.getNode(N->getOpcode(), dl, NVT, Lo); SDValue HiLZ = DAG.getNode(ISD::CTLZ_ZERO_UNDEF, dl, NVT, Hi); - Lo = DAG.getNode(ISD::SELECT, dl, NVT, HiNotZero, HiLZ, - DAG.getNode(ISD::ADD, dl, NVT, LoLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); + Lo = DAG.getSelect(dl, NVT, HiNotZero, HiLZ, + DAG.getNode(ISD::ADD, dl, NVT, LoLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); Hi = DAG.getConstant(0, NVT); } void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // ctpop(HiLo) -> ctpop(Hi)+ctpop(Lo) GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); @@ -1733,42 +1749,44 @@ void DAGTypeLegalizer::ExpandIntRes_CTPOP(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_CTTZ(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // cttz (HiLo) -> Lo != 0 ? cttz(Lo) : (cttz(Hi)+32) GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT NVT = Lo.getValueType(); - SDValue LoNotZero = DAG.getSetCC(dl, TLI.getSetCCResultType(NVT), Lo, + SDValue LoNotZero = DAG.getSetCC(dl, getSetCCResultType(NVT), Lo, DAG.getConstant(0, NVT), ISD::SETNE); SDValue LoLZ = DAG.getNode(ISD::CTTZ_ZERO_UNDEF, dl, NVT, Lo); SDValue HiLZ = DAG.getNode(N->getOpcode(), dl, NVT, Hi); - Lo = DAG.getNode(ISD::SELECT, dl, NVT, LoNotZero, LoLZ, - DAG.getNode(ISD::ADD, dl, NVT, HiLZ, - DAG.getConstant(NVT.getSizeInBits(), NVT))); + Lo = DAG.getSelect(dl, NVT, LoNotZero, LoLZ, + DAG.getNode(ISD::ADD, dl, NVT, HiLZ, + DAG.getConstant(NVT.getSizeInBits(), NVT))); Hi = DAG.getConstant(0, NVT); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, + dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, + dl).first, Lo, Hi); } @@ -1790,7 +1808,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); bool isInvariant = N->isInvariant(); - DebugLoc dl = N->getDebugLoc(); + const MDNode *TBAAInfo = N->getTBAAInfo(); + SDLoc dl(N); assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -1798,7 +1817,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, EVT MemVT = N->getMemoryVT(); Lo = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), - MemVT, isVolatile, isNonTemporal, Alignment); + MemVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); // Remember the chain. Ch = Lo.getValue(1); @@ -1820,7 +1839,8 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, } else if (TLI.isLittleEndian()) { // Little-endian - low bits are at low addresses. Lo = DAG.getLoad(NVT, dl, Ch, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -1829,11 +1849,11 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1851,17 +1871,17 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, Hi = DAG.getExtLoad(ExtType, dl, NVT, Ch, Ptr, N->getPointerInfo(), EVT::getIntegerVT(*DAG.getContext(), MemVT.getSizeInBits() - ExcessBits), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); // Load the rest of the low bits. Lo = DAG.getExtLoad(ISD::ZEXTLOAD, dl, NVT, Ch, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -1889,7 +1909,7 @@ void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, void DAGTypeLegalizer::ExpandIntRes_Logical(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue LL, LH, RL, RH; GetExpandedInteger(N->getOperand(0), LL, LH); GetExpandedInteger(N->getOperand(1), RL, RH); @@ -1901,7 +1921,7 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); bool HasMULHS = TLI.isOperationLegalOrCustom(ISD::MULHS, NVT); bool HasMULHU = TLI.isOperationLegalOrCustom(ISD::MULHU, NVT); @@ -1984,7 +2004,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl), + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, + dl).first, Lo, Hi); } @@ -1992,7 +2013,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, SDValue &Lo, SDValue &Hi) { SDValue LHS = Node->getOperand(0); SDValue RHS = Node->getOperand(1); - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); // Expand the result by simply replacing it with the equivalent // non-overflow-checking operation. @@ -2033,7 +2054,7 @@ void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2047,13 +2068,13 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // If we can emit an efficient shift operation, do so now. Check to see if // the RHS is a constant. @@ -2142,7 +2163,8 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl).first, Lo, + Hi); return; } @@ -2153,7 +2175,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { // The low part is sign extension of the input (degenerates to a copy). @@ -2183,7 +2205,7 @@ void DAGTypeLegalizer::ExpandIntRes_SIGN_EXTEND(SDNode *N, void DAGTypeLegalizer:: ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetExpandedInteger(N->getOperand(0), Lo, Hi); EVT EVT = cast(N->getOperand(1))->getVT(); @@ -2211,7 +2233,7 @@ ExpandIntRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi) { void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2225,13 +2247,13 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Lo = DAG.getNode(ISD::TRUNCATE, dl, NVT, N->getOperand(0)); Hi = DAG.getNode(ISD::SRL, dl, N->getOperand(0).getValueType(), N->getOperand(0), @@ -2243,7 +2265,7 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHS = N->getOperand(0); SDValue RHS = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Expand the result by simply replacing it with the equivalent // non-overflow-checking operation. @@ -2265,7 +2287,7 @@ void DAGTypeLegalizer::ExpandIntRes_UADDSUBO(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // A divide for UMULO should be faster than a function call. if (N->getOpcode() == ISD::UMULO) { @@ -2276,16 +2298,16 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, // A divide for UMULO will be faster than a function call. Select to // make sure we aren't using 0. - SDValue isZero = DAG.getSetCC(dl, TLI.getSetCCResultType(VT), + SDValue isZero = DAG.getSetCC(dl, getSetCCResultType(VT), RHS, DAG.getConstant(0, VT), ISD::SETEQ); - SDValue NotZero = DAG.getNode(ISD::SELECT, dl, VT, isZero, - DAG.getConstant(1, VT), RHS); + SDValue NotZero = DAG.getSelect(dl, VT, isZero, + DAG.getConstant(1, VT), RHS); SDValue DIV = DAG.getNode(ISD::UDIV, dl, VT, MUL, NotZero); SDValue Overflow = DAG.getSetCC(dl, N->getValueType(1), DIV, LHS, ISD::SETNE); - Overflow = DAG.getNode(ISD::SELECT, dl, N->getValueType(1), isZero, - DAG.getConstant(0, N->getValueType(1)), - Overflow); + Overflow = DAG.getSelect(dl, N->getValueType(1), isZero, + DAG.getConstant(0, N->getValueType(1)), + Overflow); ReplaceValueWith(SDValue(N, 1), Overflow); return; } @@ -2293,7 +2315,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, Type *RetTy = VT.getTypeForEVT(*DAG.getContext()); EVT PtrVT = TLI.getPointerTy(); Type *PtrTy = PtrVT.getTypeForEVT(*DAG.getContext()); - + // Replace this with a libcall that will check overflow. RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i32) @@ -2351,7 +2373,7 @@ void DAGTypeLegalizer::ExpandIntRes_XMULO(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2365,13 +2387,13 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; if (VT == MVT::i16) @@ -2385,13 +2407,13 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl).first, Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); if (Op.getValueType().bitsLE(NVT)) { // The low part is zero extension of the input (degenerates to a copy). @@ -2418,7 +2440,7 @@ void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, void DAGTypeLegalizer::ExpandIntRes_ATOMIC_LOAD(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = cast(N)->getMemoryVT(); SDValue Zero = DAG.getConstant(0, VT); SDValue Swap = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, VT, @@ -2498,7 +2520,7 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) { void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - DebugLoc dl) { + SDLoc dl) { SDValue LHSLo, LHSHi, RHSLo, RHSHi; GetExpandedInteger(NewLHS, LHSLo, LHSHi); GetExpandedInteger(NewRHS, RHSLo, RHSHi); @@ -2555,16 +2577,16 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL); SDValue Tmp1, Tmp2; - Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), + Tmp1 = TLI.SimplifySetCC(getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); if (!Tmp1.getNode()) - Tmp1 = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSLo.getValueType()), + Tmp1 = DAG.getSetCC(dl, getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC); - Tmp2 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + Tmp2 = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, CCCode, false, DagCombineInfo, dl); if (!Tmp2.getNode()) Tmp2 = DAG.getNode(ISD::SETCC, dl, - TLI.getSetCCResultType(LHSHi.getValueType()), + getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, DAG.getCondCode(CCCode)); ConstantSDNode *Tmp1C = dyn_cast(Tmp1.getNode()); @@ -2584,21 +2606,21 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, return; } - NewLHS = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSHi.getValueType()), + NewLHS = TLI.SimplifySetCC(getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ, false, DagCombineInfo, dl); if (!NewLHS.getNode()) - NewLHS = DAG.getSetCC(dl, TLI.getSetCCResultType(LHSHi.getValueType()), + NewLHS = DAG.getSetCC(dl, getSetCCResultType(LHSHi.getValueType()), LHSHi, RHSHi, ISD::SETEQ); - NewLHS = DAG.getNode(ISD::SELECT, dl, Tmp1.getValueType(), - NewLHS, Tmp1, Tmp2); + NewLHS = DAG.getSelect(dl, Tmp1.getValueType(), + NewLHS, Tmp1, Tmp2); NewRHS = SDValue(); } SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); ISD::CondCode CCCode = cast(N->getOperand(1))->get(); - IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -2616,7 +2638,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_BR_CC(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(4))->get(); - IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. @@ -2634,7 +2656,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SELECT_CC(SDNode *N) { SDValue DAGTypeLegalizer::ExpandIntOp_SETCC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(2))->get(); - IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); + IntegerExpandSetCCOperands(NewLHS, NewRHS, CCCode, SDLoc(N)); // If ExpandSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { @@ -2672,7 +2694,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, SDLoc(N)).first; } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2689,7 +2711,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getAlignment(); bool isVolatile = N->isVolatile(); bool isNonTemporal = N->isNonTemporal(); - DebugLoc dl = N->getDebugLoc(); + const MDNode *TBAAInfo = N->getTBAAInfo(); + SDLoc dl(N); SDValue Lo, Hi; assert(NVT.isByteSized() && "Expanded type not byte sized!"); @@ -2698,7 +2721,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); return DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), N->getMemoryVT(), isVolatile, isNonTemporal, - Alignment); + Alignment, TBAAInfo); } if (TLI.isLittleEndian()) { @@ -2706,7 +2729,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { GetExpandedInteger(N->getValue(), Lo, Hi); Lo = DAG.getStore(Ch, dl, Lo, Ptr, N->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); unsigned ExcessBits = N->getMemoryVT().getSizeInBits() - NVT.getSizeInBits(); @@ -2715,11 +2738,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), NEVT, isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2747,17 +2770,17 @@ SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { // Store both the high bits and maybe some of the low bits. Hi = DAG.getTruncStore(Ch, dl, Hi, Ptr, N->getPointerInfo(), - HiVT, isVolatile, isNonTemporal, Alignment); + HiVT, isVolatile, isNonTemporal, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); // Store the lowest ExcessBits bits in the second half. Lo = DAG.getTruncStore(Ch, dl, Lo, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), EVT::getIntegerVT(*DAG.getContext(), ExcessBits), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -2765,14 +2788,14 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { SDValue InL, InH; GetExpandedInteger(N->getOperand(0), InL, InH); // Just truncate the low part of the source. - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), InL); } SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); EVT SrcVT = Op.getValueType(); EVT DstVT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // The following optimization is valid only if every value in SrcVT (when // treated as signed) is representable in DstVT. Check that the mantissa @@ -2806,7 +2829,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Lo, Hi; GetExpandedInteger(Op, Lo, Hi); SDValue SignSet = DAG.getSetCC(dl, - TLI.getSetCCResultType(Hi.getValueType()), + getSetCCResultType(Hi.getValueType()), Hi, DAG.getConstant(0, Hi.getValueType()), ISD::SETLT); @@ -2819,10 +2842,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Zero = DAG.getIntPtrConstant(0); SDValue Four = DAG.getIntPtrConstant(4); if (TLI.isBigEndian()) std::swap(Zero, Four); - SDValue Offset = DAG.getNode(ISD::SELECT, dl, Zero.getValueType(), SignSet, - Zero, Four); + SDValue Offset = DAG.getSelect(dl, Zero.getValueType(), SignSet, + Zero, Four); unsigned Alignment = cast(FudgePtr)->getAlignment(); - FudgePtr = DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), FudgePtr, Offset); + FudgePtr = DAG.getNode(ISD::ADD, dl, FudgePtr.getValueType(), + FudgePtr, Offset); Alignment = std::min(Alignment, 4u); // Load the value out, extending it from f32 to the destination float type. @@ -2839,11 +2863,11 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl).first; } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Swap = DAG.getAtomic(ISD::ATOMIC_SWAP, dl, cast(N)->getMemoryVT(), N->getOperand(0), @@ -2865,7 +2889,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned OutNumElems = OutVT.getVectorNumElements(); EVT NOutVTElem = NOutVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue BaseIdx = N->getOperand(1); SmallVector Ops; @@ -2874,7 +2898,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { // Extract the element from the original vector. SDValue Index = DAG.getNode(ISD::ADD, dl, BaseIdx.getValueType(), - BaseIdx, DAG.getIntPtrConstant(i)); + BaseIdx, DAG.getConstant(i, BaseIdx.getValueType())); SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InVT.getVectorElementType(), N->getOperand(0), Index); @@ -2890,7 +2914,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_VECTOR_SHUFFLE(SDNode *N) { ShuffleVectorSDNode *SV = cast(N); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SmallVector NewMask; @@ -2913,12 +2937,20 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { unsigned NumElems = N->getNumOperands(); EVT NOutVTElem = NOutVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SmallVector Ops; Ops.reserve(NumElems); for (unsigned i = 0; i != NumElems; ++i) { - SDValue Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + SDValue Op; + // BUILD_VECTOR integer operand types are allowed to be larger than the + // result's element type. This may still be true after the promotion. For + // example, we might be promoting ( = BV , , ...) to + // (v?i16 = BV , , ...), and we can't any_extend to . + if (N->getOperand(i).getValueType().bitsLT(NOutVTElem)) + Op = DAG.getNode(ISD::ANY_EXTEND, dl, NOutVTElem, N->getOperand(i)); + else + Op = N->getOperand(i); Ops.push_back(Op); } @@ -2927,7 +2959,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_BUILD_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); assert(!N->getOperand(0).getValueType().isVector() && "Input must be a scalar"); @@ -2943,7 +2975,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SCALAR_TO_VECTOR(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT OutVT = N->getValueType(0); EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); @@ -2964,7 +2996,8 @@ SDValue DAGTypeLegalizer::PromoteIntRes_CONCAT_VECTORS(SDNode *N) { SDValue Op = N->getOperand(i); for (unsigned j = 0; j < NumElem; ++j) { SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - InElemTy, Op, DAG.getIntPtrConstant(j)); + InElemTy, Op, DAG.getConstant(j, + TLI.getVectorIdxTy())); Ops[i * NumElem + j] = DAG.getNode(ISD::ANY_EXTEND, dl, OutElemTy, Ext); } } @@ -2979,7 +3012,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { EVT NOutVTElem = NOutVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); SDValue ConvElem = DAG.getNode(ISD::ANY_EXTEND, dl, @@ -2989,9 +3022,9 @@ SDValue DAGTypeLegalizer::PromoteIntRes_INSERT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue V0 = GetPromotedInteger(N->getOperand(0)); - SDValue V1 = N->getOperand(1); + SDValue V1 = DAG.getZExtOrTrunc(N->getOperand(1), dl, TLI.getVectorIdxTy()); SDValue Ext = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, V0->getValueType(0).getScalarType(), V0, V1); @@ -3002,7 +3035,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_EXTRACT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElems = N->getNumOperands(); EVT RetSclrTy = N->getValueType(0).getVectorElementType(); @@ -3019,7 +3052,7 @@ SDValue DAGTypeLegalizer::PromoteIntOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i=0; igetOperand(ResNo)); } -/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type -/// which is split into two not necessarily identical pieces. -void DAGTypeLegalizer::GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT) { - // Currently all types are split in half. - if (!InVT.isVector()) { - LoVT = HiVT = TLI.getTypeToTransformTo(*DAG.getContext(), InVT); - } else { - unsigned NumElements = InVT.getVectorNumElements(); - assert(!(NumElements & 1) && "Splitting vector, but not in half!"); - LoVT = HiVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElements/2); - } -} - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = Pair.getDebugLoc(); + SDLoc dl(Pair); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), Pair.getValueType()); Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, NVT, Pair, DAG.getIntPtrConstant(0)); @@ -1012,12 +972,9 @@ void DAGTypeLegalizer::GetPairElements(SDValue Pair, SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index) { - DebugLoc dl = Index.getDebugLoc(); + SDLoc dl(Index); // Make sure the index type is big enough to compute in. - if (Index.getValueType().bitsGT(TLI.getPointerTy())) - Index = DAG.getNode(ISD::TRUNCATE, dl, TLI.getPointerTy(), Index); - else - Index = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Index); + Index = DAG.getZExtOrTrunc(Index, dl, TLI.getPointerTy()); // Calculate the element offset and add it to the pointer. unsigned EltSize = EltVT.getSizeInBits() / 8; // FIXME: should be ABI size. @@ -1029,9 +986,9 @@ SDValue DAGTypeLegalizer::GetVectorElementPointer(SDValue VecPtr, EVT EltVT, /// JoinIntegers - Build an integer with low bits Lo and high bits Hi. SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { - // Arbitrarily use dlHi for result DebugLoc - DebugLoc dlHi = Hi.getDebugLoc(); - DebugLoc dlLo = Lo.getDebugLoc(); + // Arbitrarily use dlHi for result SDLoc + SDLoc dlHi(Hi); + SDLoc dlLo(Lo); EVT LVT = Lo.getValueType(); EVT HVT = Hi.getValueType(); EVT NVT = EVT::getIntegerVT(*DAG.getContext(), @@ -1048,22 +1005,25 @@ SDValue DAGTypeLegalizer::JoinIntegers(SDValue Lo, SDValue Hi) { SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned) { unsigned NumOps = N->getNumOperands(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (NumOps == 0) { - return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, + dl).first; } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, + dl).first; } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, + dl).first; } SmallVector Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); return TLI.makeLibCall(DAG, LC, N->getValueType(0), - &Ops[0], NumOps, isSigned, dl); + &Ops[0], NumOps, isSigned, dl).first; } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to @@ -1093,7 +1053,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, false, 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Node->getDebugLoc()); + Callee, Args, DAG, SDLoc(Node)); std::pair CallInfo = TLI.LowerCallTo(CLI); return CallInfo; @@ -1103,7 +1063,7 @@ DAGTypeLegalizer::ExpandChainLibCall(RTLIB::Libcall LC, /// of the given type. A target boolean is an integer value, not necessarily of /// type i1, the bits of which conform to getBooleanContents. SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { - DebugLoc dl = Bool.getDebugLoc(); + SDLoc dl(Bool); ISD::NodeType ExtendCode = TargetLowering::getExtendForContent(TLI.getBooleanContents(VT.isVector())); return DAG.getNode(ExtendCode, dl, VT, Bool); @@ -1114,7 +1074,7 @@ SDValue DAGTypeLegalizer::PromoteTargetBoolean(SDValue Bool, EVT VT) { void DAGTypeLegalizer::SplitInteger(SDValue Op, EVT LoVT, EVT HiVT, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); assert(LoVT.getSizeInBits() + HiVT.getSizeInBits() == Op.getValueType().getSizeInBits() && "Invalid integer splitting!"); Lo = DAG.getNode(ISD::TRUNCATE, dl, LoVT, Op); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 1c4274a..13bb08f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -1,4 +1,4 @@ -//===-- LegalizeTypes.h - Definition of the DAG Type Legalizer class ------===// +//===-- LegalizeTypes.h - DAG Type Legalizer class definition ---*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -73,6 +73,10 @@ private: return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } + EVT getSetCCResultType(EVT VT) const { + return TLI.getSetCCResultType(*DAG.getContext(), VT); + } + /// IgnoreNodeResults - Pretend all of this node's results are legal. bool IgnoreNodeResults(SDNode *N) const { return N->getOpcode() == ISD::TargetConstant; @@ -195,7 +199,7 @@ private: /// final size. SDValue SExtPromotedInteger(SDValue Op) { EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); Op = GetPromotedInteger(Op); return DAG.getNode(ISD::SIGN_EXTEND_INREG, dl, Op.getValueType(), Op, DAG.getValueType(OldVT)); @@ -205,7 +209,7 @@ private: /// final size. SDValue ZExtPromotedInteger(SDValue Op) { EVT OldVT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); Op = GetPromotedInteger(Op); return DAG.getZeroExtendInReg(Op, dl, OldVT.getScalarType()); } @@ -357,7 +361,7 @@ private: SDValue ExpandIntOp_ATOMIC_STORE(SDNode *N); void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl); + ISD::CondCode &CCCode, SDLoc dl); //===--------------------------------------------------------------------===// // Float to Integer Conversion Support: LegalizeFloatTypes.cpp @@ -406,6 +410,7 @@ private: SDValue SoftenFloatRes_FPOWI(SDNode *N); SDValue SoftenFloatRes_FREM(SDNode *N); SDValue SoftenFloatRes_FRINT(SDNode *N); + SDValue SoftenFloatRes_FROUND(SDNode *N); SDValue SoftenFloatRes_FSIN(SDNode *N); SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); @@ -466,6 +471,7 @@ private: void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FROUND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSUB (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -476,6 +482,7 @@ private: // Float Operand Expansion. bool ExpandFloatOperand(SDNode *N, unsigned OperandNo); SDValue ExpandFloatOp_BR_CC(SDNode *N); + SDValue ExpandFloatOp_FCOPYSIGN(SDNode *N); SDValue ExpandFloatOp_FP_ROUND(SDNode *N); SDValue ExpandFloatOp_FP_TO_SINT(SDNode *N); SDValue ExpandFloatOp_FP_TO_UINT(SDNode *N); @@ -484,7 +491,7 @@ private: SDValue ExpandFloatOp_STORE(SDNode *N, unsigned OpNo); void FloatExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl); + ISD::CondCode &CCCode, SDLoc dl); //===--------------------------------------------------------------------===// // Scalarization Support: LegalizeVectorTypes.cpp @@ -530,7 +537,7 @@ private: // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); - SDValue ScalarizeVecOp_EXTEND(SDNode *N); + SDValue ScalarizeVecOp_UnaryOp(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -554,6 +561,7 @@ private: void SplitVecRes_BinOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi); + void SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi); void SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi); @@ -624,6 +632,7 @@ private: SDValue WidenVecRes_Ternary(SDNode *N); SDValue WidenVecRes_Binary(SDNode *N); + SDValue WidenVecRes_BinaryCanTrap(SDNode *N); SDValue WidenVecRes_Convert(SDNode *N); SDValue WidenVecRes_POWI(SDNode *N); SDValue WidenVecRes_Shift(SDNode *N); @@ -649,7 +658,7 @@ private: /// loads to load a vector with a resulting wider type. It takes /// LdChain: list of chains for the load to be generated. /// Ld: load to widen - SDValue GenWidenVectorLoads(SmallVector& LdChain, + SDValue GenWidenVectorLoads(SmallVectorImpl &LdChain, LoadSDNode *LD); /// GenWidenVectorExtLoads - Helper function to generate a set of extension @@ -657,20 +666,20 @@ private: /// LdChain: list of chains for the load to be generated. /// Ld: load to widen /// ExtType: extension element type - SDValue GenWidenVectorExtLoads(SmallVector& LdChain, + SDValue GenWidenVectorExtLoads(SmallVectorImpl &LdChain, LoadSDNode *LD, ISD::LoadExtType ExtType); /// Helper genWidenVectorStores - Helper function to generate a set of /// stores to store a widen vector into non widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value - void GenWidenVectorStores(SmallVector& StChain, StoreSDNode *ST); + void GenWidenVectorStores(SmallVectorImpl &StChain, StoreSDNode *ST); /// Helper genWidenVectorTruncStores - Helper function to generate a set of /// stores to store a truncate widen vector into non widen memory /// StChain: list of chains for the stores we have generated /// ST: store of a widen value - void GenWidenVectorTruncStores(SmallVector& StChain, + void GenWidenVectorTruncStores(SmallVectorImpl &StChain, StoreSDNode *ST); /// Modifies a vector input (widen or narrows) to a vector of NVT. The @@ -695,10 +704,6 @@ private: GetExpandedFloat(Op, Lo, Hi); } - /// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type - /// which is split (or expanded) into two not necessarily identical pieces. - void GetSplitDestVTs(EVT InVT, EVT &LoVT, EVT &HiVT); - /// GetPairElements - Use ISD::EXTRACT_ELEMENT nodes to extract the low and /// high parts of the given value. void GetPairElements(SDValue Pair, SDValue &Lo, SDValue &Hi); @@ -726,6 +731,12 @@ private: GetExpandedFloat(Op, Lo, Hi); } + + /// This function will split the integer \p Op into \p NumElements + /// operations of type \p EltVT and store them in \p Ops. + void IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl &Ops, EVT EltVT); + // Generic Result Expansion. void ExpandRes_MERGE_VALUES (SDNode *N, unsigned ResNo, SDValue &Lo, SDValue &Hi); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 222d1c0..c749fde 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -41,7 +41,7 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NOutVT = TLI.getTypeToTransformTo(*DAG.getContext(), OutVT); SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Handle some special cases efficiently. switch (getTypeAction(InVT)) { @@ -77,12 +77,9 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypeWidenVector: { assert(!(InVT.getVectorNumElements() & 1) && "Unsupported BITCAST"); InOp = GetWidenedVector(InOp); - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - InVT.getVectorNumElements()/2); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, InOp, - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(InVT); + llvm::tie(Lo, Hi) = DAG.SplitVector(InOp, dl, LoVT, HiVT); if (TLI.isBigEndian()) std::swap(Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); @@ -115,7 +112,8 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { SmallVector Vals; for (unsigned i = 0; i < NumElems; ++i) Vals.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ElemVT, - CastInOp, DAG.getIntPtrConstant(i))); + CastInOp, DAG.getConstant(i, + TLI.getVectorIdxTy()))); // Build Lo, Hi pair by pairing extracted elements if needed. unsigned Slot = 0; @@ -161,13 +159,14 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { false, false, 0); // Load the first half from the stack slot. - Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, + Lo = DAG.getLoad(NOutVT, dl, Store, StackPtr, PtrInfo, false, false, false, 0); // Increment the pointer to the other half. unsigned IncrementSize = NOutVT.getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, + StackPtr.getValueType())); // Load the second half from the stack slot. Hi = DAG.getLoad(NOutVT, dl, Store, StackPtr, @@ -203,7 +202,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue OldVec = N->getOperand(0); unsigned OldElts = OldVec.getValueType().getVectorNumElements(); EVT OldEltVT = OldVec.getValueType().getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Convert to a vector of the expanded element type, for example // <3 x i64> -> <6 x i32>. @@ -227,10 +226,6 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Extract the elements at 2 * Idx and 2 * Idx + 1 from the new vector. SDValue Idx = N->getOperand(1); - // Make sure the type of Idx is big enough to hold the new values. - if (Idx.getValueType().bitsLT(TLI.getPointerTy())) - Idx = DAG.getNode(ISD::ZERO_EXTEND, dl, TLI.getPointerTy(), Idx); - Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); Lo = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, NewVec, Idx); @@ -245,7 +240,7 @@ void DAGTypeLegalizer::ExpandRes_EXTRACT_VECTOR_ELT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(ISD::isNormalLoad(N) && "This routine only for normal loads!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); LoadSDNode *LD = cast(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), LD->getValueType(0)); @@ -255,20 +250,22 @@ void DAGTypeLegalizer::ExpandRes_NormalLoad(SDNode *N, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); Lo = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Alignment); + isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); // Increment the pointer to the other half. unsigned IncrementSize = NVT.getSizeInBits() / 8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, isInvariant, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -289,7 +286,7 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), OVT); SDValue Chain = N->getOperand(0); SDValue Ptr = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); const unsigned Align = N->getConstantOperandVal(3); Lo = DAG.getVAArg(NVT, dl, Chain, Ptr, N->getOperand(2), Align); @@ -309,29 +306,54 @@ void DAGTypeLegalizer::ExpandRes_VAARG(SDNode *N, SDValue &Lo, SDValue &Hi) { // Generic Operand Expansion. //===--------------------------------------------------------------------===// +void DAGTypeLegalizer::IntegerToVector(SDValue Op, unsigned NumElements, + SmallVectorImpl &Ops, + EVT EltVT) { + assert(Op.getValueType().isInteger()); + SDLoc DL(Op); + SDValue Parts[2]; + + if (NumElements > 1) { + NumElements >>= 1; + SplitInteger(Op, Parts[0], Parts[1]); + if (TLI.isBigEndian()) + std::swap(Parts[0], Parts[1]); + IntegerToVector(Parts[0], NumElements, Ops, EltVT); + IntegerToVector(Parts[1], NumElements, Ops, EltVT); + } else { + Ops.push_back(DAG.getNode(ISD::BITCAST, DL, EltVT, Op)); + } +} + SDValue DAGTypeLegalizer::ExpandOp_BITCAST(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->getValueType(0).isVector()) { // An illegal expanding type is being converted to a legal vector type. // Make a two element vector out of the expanded parts and convert that // instead, but only if the new vector type is legal (otherwise there // is no point, and it might create expansion loops). For example, on // x86 this turns v1i64 = BITCAST i64 into v1i64 = BITCAST v2i32. + // + // FIXME: I'm not sure why we are first trying to split the input into + // a 2 element vector, so I'm leaving it here to maintain the current + // behavior. + unsigned NumElts = 2; EVT OVT = N->getOperand(0).getValueType(); EVT NVT = EVT::getVectorVT(*DAG.getContext(), TLI.getTypeToTransformTo(*DAG.getContext(), OVT), - 2); + NumElts); + if (!isTypeLegal(NVT)) { + // If we can't find a legal type by splitting the integer in half, + // then we can use the node's value type. + NumElts = N->getValueType(0).getVectorNumElements(); + NVT = N->getValueType(0); + } - if (isTypeLegal(NVT)) { - SDValue Parts[2]; - GetExpandedOp(N->getOperand(0), Parts[0], Parts[1]); + SmallVector Ops; + IntegerToVector(N->getOperand(0), NumElts, Ops, NVT.getVectorElementType()); - if (TLI.isBigEndian()) - std::swap(Parts[0], Parts[1]); - - SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Parts, 2); - return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); - } + SDValue Vec = DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, &Ops[0], NumElts); + return DAG.getNode(ISD::BITCAST, dl, N->getValueType(0), Vec); } // Otherwise, store to a temporary and load out again as the new type. @@ -344,7 +366,7 @@ SDValue DAGTypeLegalizer::ExpandOp_BUILD_VECTOR(SDNode *N) { unsigned NumElts = VecVT.getVectorNumElements(); EVT OldVT = N->getOperand(0).getValueType(); EVT NewVT = TLI.getTypeToTransformTo(*DAG.getContext(), OldVT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); assert(OldVT == VecVT.getVectorElementType() && "BUILD_VECTOR operand type doesn't match vector element type!"); @@ -382,7 +404,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { // The vector type is legal but the element type needs expansion. EVT VecVT = N->getValueType(0); unsigned NumElts = VecVT.getVectorNumElements(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Val = N->getOperand(1); EVT OldEVT = Val.getValueType(); @@ -406,7 +428,8 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { Idx = DAG.getNode(ISD::ADD, dl, Idx.getValueType(), Idx, Idx); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Lo, Idx); Idx = DAG.getNode(ISD::ADD, dl, - Idx.getValueType(), Idx, DAG.getIntPtrConstant(1)); + Idx.getValueType(), Idx, + DAG.getConstant(1, Idx.getValueType())); NewVec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, NewVec, Hi, Idx); // Convert the new vector to the old vector type. @@ -414,7 +437,7 @@ SDValue DAGTypeLegalizer::ExpandOp_INSERT_VECTOR_ELT(SDNode *N) { } SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); assert(VT.getVectorElementType() == N->getOperand(0).getValueType() && "SCALAR_TO_VECTOR operand type doesn't match vector element type!"); @@ -430,7 +453,7 @@ SDValue DAGTypeLegalizer::ExpandOp_SCALAR_TO_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { assert(ISD::isNormalStore(N) && "This routine only for normal stores!"); assert(OpNo == 1 && "Can only expand the stored value so far"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); StoreSDNode *St = cast(N); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), @@ -440,6 +463,7 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { unsigned Alignment = St->getAlignment(); bool isVolatile = St->isVolatile(); bool isNonTemporal = St->isNonTemporal(); + const MDNode *TBAAInfo = St->getTBAAInfo(); assert(NVT.isByteSized() && "Expanded type not byte sized!"); unsigned IncrementSize = NVT.getSizeInBits() / 8; @@ -451,15 +475,14 @@ SDValue DAGTypeLegalizer::ExpandOp_NormalStore(SDNode *N, unsigned OpNo) { std::swap(Lo, Hi); Lo = DAG.getStore(Chain, dl, Lo, Ptr, St->getPointerInfo(), - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); - assert(isTypeLegal(Ptr.getValueType()) && "Pointers must be legal!"); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getStore(Chain, dl, Hi, Ptr, St->getPointerInfo().getWithOffset(IncrementSize), isVolatile, isNonTemporal, - MinAlign(Alignment, IncrementSize)); + MinAlign(Alignment, IncrementSize), TBAAInfo); return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); } @@ -483,21 +506,19 @@ void DAGTypeLegalizer::SplitRes_MERGE_VALUES(SDNode *N, unsigned ResNo, void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH, CL, CH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitOp(N->getOperand(1), LL, LH); GetSplitOp(N->getOperand(2), RL, RH); SDValue Cond = N->getOperand(0); CL = CH = Cond; if (Cond.getValueType().isVector()) { - assert(Cond.getValueType().getVectorElementType() == MVT::i1 && - "Condition legalized before result?"); - unsigned NumElements = Cond.getValueType().getVectorNumElements(); - EVT VCondTy = EVT::getVectorVT(*DAG.getContext(), MVT::i1, NumElements / 2); - CL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getIntPtrConstant(0)); - CH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VCondTy, Cond, - DAG.getIntPtrConstant(NumElements / 2)); + // Check if there are already splitted versions of the vector available and + // use those instead of splitting the mask operand again. + if (getTypeAction(Cond.getValueType()) == TargetLowering::TypeSplitVector) + GetSplitVector(Cond, CL, CH); + else + llvm::tie(CL, CH) = DAG.SplitVector(Cond, dl); } Lo = DAG.getNode(N->getOpcode(), dl, LL.getValueType(), CL, LL, RL); @@ -507,7 +528,7 @@ void DAGTypeLegalizer::SplitRes_SELECT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LL, LH, RL, RH; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitOp(N->getOperand(2), LL, LH); GetSplitOp(N->getOperand(3), RL, RH); @@ -519,7 +540,7 @@ void DAGTypeLegalizer::SplitRes_SELECT_CC(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getUNDEF(LoVT); Hi = DAG.getUNDEF(HiVT); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index c6e066e..2c3cdcc 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -171,7 +171,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { return TranslateLegalizeResults(Op, Result); case TargetLowering::Custom: Changed = true; - return LegalizeOp(TLI.LowerOperation(Result, DAG)); + return TranslateLegalizeResults(Op, TLI.LowerOperation(Result, DAG)); case TargetLowering::Expand: Changed = true; return LegalizeOp(ExpandStore(Op)); @@ -227,6 +227,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FP_TO_UINT: case ISD::FNEG: case ISD::FABS: + case ISD::FCOPYSIGN: case ISD::FSQRT: case ISD::FSIN: case ISD::FCOS: @@ -241,6 +242,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FTRUNC: case ISD::FRINT: case ISD::FNEARBYINT: + case ISD::FROUND: case ISD::FFLOOR: case ISD::FP_ROUND: case ISD::FP_EXTEND: @@ -320,7 +322,7 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SmallVector Operands(Op.getNumOperands()); for (unsigned j = 0; j != Op.getNumOperands(); ++j) { @@ -357,7 +359,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { // Build a new vector type and check if it is legal. MVT NVT = MVT::getVectorVT(EltVT.getSimpleVT(), NumElts); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SmallVector Operands(Op.getNumOperands()); unsigned Opc = Op.getOpcode() == ISD::UINT_TO_FP ? ISD::ZERO_EXTEND : @@ -375,7 +377,7 @@ SDValue VectorLegalizer::PromoteVectorOpINT_TO_FP(SDValue Op) { SDValue VectorLegalizer::ExpandLoad(SDValue Op) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); LoadSDNode *LD = cast(Op.getNode()); SDValue Chain = LD->getChain(); SDValue BasePTR = LD->getBasePtr(); @@ -416,7 +418,8 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LD->isVolatile(), LD->isNonTemporal(), - LD->isInvariant(), LD->getAlignment()); + LD->isInvariant(), LD->getAlignment(), + LD->getTBAAInfo()); } else { EVT LoadVT = WideVT; while (RemainingBytes < LoadBytes) { @@ -426,13 +429,14 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, LD->getPointerInfo().getWithOffset(Offset), LoadVT, LD->isVolatile(), - LD->isNonTemporal(), LD->getAlignment()); + LD->isNonTemporal(), LD->getAlignment(), + LD->getTBAAInfo()); } RemainingBytes -= LoadBytes; Offset += LoadBytes; BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(LoadBytes)); + DAG.getConstant(LoadBytes, BasePTR.getValueType())); LoadVals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -497,10 +501,10 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), SrcVT.getScalarType(), LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + LD->getAlignment(), LD->getTBAAInfo()); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + DAG.getConstant(Stride, BasePTR.getValueType())); Vals.push_back(ScalarLoad.getValue(0)); LoadChains.push_back(ScalarLoad.getValue(1)); @@ -519,7 +523,7 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { } SDValue VectorLegalizer::ExpandStore(SDValue Op) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); StoreSDNode *ST = cast(Op.getNode()); SDValue Chain = ST->getChain(); SDValue BasePTR = ST->getBasePtr(); @@ -529,6 +533,7 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { unsigned Alignment = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); unsigned NumElem = StVT.getVectorNumElements(); // The type of the data we want to save @@ -551,15 +556,15 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { SmallVector Stores; for (unsigned Idx = 0; Idx < NumElem; Idx++) { SDValue Ex = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, - RegSclVT, Value, DAG.getIntPtrConstant(Idx)); + RegSclVT, Value, DAG.getConstant(Idx, TLI.getVectorIdxTy())); // This scalar TruncStore may be illegal, but we legalize it later. SDValue Store = DAG.getTruncStore(Chain, dl, Ex, BasePTR, ST->getPointerInfo().getWithOffset(Idx*Stride), MemSclVT, - isVolatile, isNonTemporal, Alignment); + isVolatile, isNonTemporal, Alignment, TBAAInfo); BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + DAG.getConstant(Stride, BasePTR.getValueType())); Stores.push_back(Store); } @@ -572,9 +577,9 @@ SDValue VectorLegalizer::ExpandStore(SDValue Op) { SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { // Lower a select instruction where the condition is a scalar and the // operands are vectors. Lower this select to VSELECT and implement it - // using XOR AND OR. The selector bit is broadcasted. + // using XOR AND OR. The selector bit is broadcasted. EVT VT = Op.getValueType(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Mask = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -597,15 +602,12 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { return DAG.UnrollVectorOp(Op.getNode()); // Generate a mask operand. - EVT MaskTy = TLI.getSetCCResultType(VT); - assert(MaskTy.isVector() && "Invalid CC type"); - assert(MaskTy.getSizeInBits() == Op1.getValueType().getSizeInBits() - && "Invalid mask size"); + EVT MaskTy = VT.changeVectorElementTypeToInteger(); // What is the size of each element in the vector mask. EVT BitTy = MaskTy.getScalarType(); - Mask = DAG.getNode(ISD::SELECT, DL, BitTy, Mask, + Mask = DAG.getSelect(DL, BitTy, Mask, DAG.getConstant(APInt::getAllOnesValue(BitTy.getSizeInBits()), BitTy), DAG.getConstant(0, BitTy)); @@ -637,7 +639,7 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) return DAG.UnrollVectorOp(Op.getNode()); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); EVT OrigTy = cast(Op->getOperand(1))->getVT(); unsigned BW = VT.getScalarType().getSizeInBits(); @@ -652,13 +654,14 @@ SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. - EVT VT = Op.getOperand(0).getValueType(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Mask = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); SDValue Op2 = Op.getOperand(2); + EVT VT = Mask.getValueType(); + // If we can't even use the basic vector operations of // AND,OR,XOR, we will have to scalarize the op. // Notice that the operation may be 'promoted' which means that it is @@ -673,8 +676,12 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { TargetLowering::ZeroOrNegativeOneBooleanContent) return DAG.UnrollVectorOp(Op.getNode()); - assert(VT.getSizeInBits() == Op1.getValueType().getSizeInBits() - && "Invalid mask size"); + // If the mask and the type are different sizes, unroll the vector op. This + // can occur when getSetCCResultType returns something that is different in + // size from the operand types. For example, v4i8 = select v4i32, v4i8, v4i8. + if (VT.getSizeInBits() != Op1.getValueType().getSizeInBits()) + return DAG.UnrollVectorOp(Op.getNode()); + // Bitcast the operands to be the same type as the mask. // This is needed when we select between FP types because // the mask is a vector of integers. @@ -693,7 +700,7 @@ SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { EVT VT = Op.getOperand(0).getValueType(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); // Make sure that the SINT_TO_FP and SRL instructions are available. if (TLI.getOperationAction(ISD::SINT_TO_FP, VT) == TargetLowering::Expand || @@ -734,7 +741,7 @@ SDValue VectorLegalizer::ExpandUINT_TO_FLOAT(SDValue Op) { SDValue VectorLegalizer::ExpandFNEG(SDValue Op) { if (TLI.isOperationLegalOrCustom(ISD::FSUB, Op.getValueType())) { SDValue Zero = DAG.getConstantFP(-0.0, Op.getValueType()); - return DAG.getNode(ISD::FSUB, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(ISD::FSUB, SDLoc(Op), Op.getValueType(), Zero, Op.getOperand(0)); } return DAG.UnrollVectorOp(Op.getNode()); @@ -746,19 +753,20 @@ SDValue VectorLegalizer::UnrollVSETCC(SDValue Op) { EVT EltVT = VT.getVectorElementType(); SDValue LHS = Op.getOperand(0), RHS = Op.getOperand(1), CC = Op.getOperand(2); EVT TmpEltVT = LHS.getValueType().getVectorElementType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SmallVector Ops(NumElems); for (unsigned i = 0; i < NumElems; ++i) { SDValue LHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, LHS, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); SDValue RHSElem = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, TmpEltVT, RHS, - DAG.getIntPtrConstant(i)); - Ops[i] = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(TmpEltVT), + DAG.getConstant(i, TLI.getVectorIdxTy())); + Ops[i] = DAG.getNode(ISD::SETCC, dl, + TLI.getSetCCResultType(*DAG.getContext(), TmpEltVT), LHSElem, RHSElem, CC); - Ops[i] = DAG.getNode(ISD::SELECT, dl, EltVT, Ops[i], - DAG.getConstant(APInt::getAllOnesValue - (EltVT.getSizeInBits()), EltVT), - DAG.getConstant(0, EltVT)); + Ops[i] = DAG.getSelect(dl, EltVT, Ops[i], + DAG.getConstant(APInt::getAllOnesValue + (EltVT.getSizeInBits()), EltVT), + DAG.getConstant(0, EltVT)); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElems); } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index 04c6bfd..f7a3e3d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -83,6 +83,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -97,6 +98,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { case ISD::ADD: case ISD::AND: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: @@ -128,7 +130,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::ScalarizeVecRes_BinOp(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), LHS.getValueType(), LHS, RHS); } @@ -136,7 +138,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_TernaryOp(SDNode *N) { SDValue Op0 = GetScalarizedVector(N->getOperand(0)); SDValue Op1 = GetScalarizedVector(N->getOperand(1)); SDValue Op2 = GetScalarizedVector(N->getOperand(2)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), Op0.getValueType(), Op0, Op1, Op2); } @@ -148,7 +150,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_MERGE_VALUES(SDNode *N, SDValue DAGTypeLegalizer::ScalarizeVecRes_BITCAST(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), NewVT, N->getOperand(0)); } @@ -158,14 +160,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_BUILD_VECTOR(SDNode *N) { // The BUILD_VECTOR operands may be of wider element types and // we may need to truncate them back to the requested return type. if (EltVT.isInteger()) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); return InOp; } SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op0 = GetScalarizedVector(N->getOperand(0)); - return DAG.getConvertRndSat(NewVT, N->getDebugLoc(), + return DAG.getConvertRndSat(NewVT, SDLoc(N), Op0, DAG.getValueType(NewVT), DAG.getValueType(Op0.getValueType()), N->getOperand(3), @@ -174,7 +176,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_CONVERT_RNDSAT(SDNode *N) { } SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0).getVectorElementType(), N->getOperand(0), N->getOperand(1)); } @@ -182,13 +184,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecRes_FP_ROUND(SDNode *N) { EVT NewVT = N->getValueType(0).getVectorElementType(); SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FP_ROUND, N->getDebugLoc(), + return DAG.getNode(ISD::FP_ROUND, SDLoc(N), NewVT, Op, N->getOperand(1)); } SDValue DAGTypeLegalizer::ScalarizeVecRes_FPOWI(SDNode *N) { SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::FPOWI, N->getDebugLoc(), + return DAG.getNode(ISD::FPOWI, SDLoc(N), Op.getValueType(), Op, N->getOperand(1)); } @@ -199,7 +201,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); if (Op.getValueType() != EltVT) // FIXME: Can this happen for floating point types? - Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, Op); + Op = DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, Op); return Op; } @@ -209,13 +211,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_LOAD(LoadSDNode *N) { SDValue Result = DAG.getLoad(ISD::UNINDEXED, N->getExtensionType(), N->getValueType(0).getVectorElementType(), - N->getDebugLoc(), + SDLoc(N), N->getChain(), N->getBasePtr(), DAG.getUNDEF(N->getBasePtr().getValueType()), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->isInvariant(), N->getOriginalAlignment()); + N->isInvariant(), N->getOriginalAlignment(), + N->getTBAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. @@ -227,14 +230,14 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_UnaryOp(SDNode *N) { // Get the dest type - it doesn't always match the input type, e.g. int_to_fp. EVT DestVT = N->getValueType(0).getVectorElementType(); SDValue Op = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), DestVT, Op); + return DAG.getNode(N->getOpcode(), SDLoc(N), DestVT, Op); } SDValue DAGTypeLegalizer::ScalarizeVecRes_InregOp(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); EVT ExtVT = cast(N->getOperand(1))->getVT().getVectorElementType(); SDValue LHS = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), EltVT, + return DAG.getNode(N->getOpcode(), SDLoc(N), EltVT, LHS, DAG.getValueType(ExtVT)); } @@ -244,7 +247,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) { EVT EltVT = N->getValueType(0).getVectorElementType(); SDValue InOp = N->getOperand(0); if (InOp.getValueType() != EltVT) - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), EltVT, InOp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), EltVT, InOp); return InOp; } @@ -262,33 +265,34 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSELECT(SDNode *N) { assert(VecBool == TargetLowering::UndefinedBooleanContent || VecBool == TargetLowering::ZeroOrNegativeOneBooleanContent); // Vector read from all ones, scalar expects a single 1 so mask. - Cond = DAG.getNode(ISD::AND, N->getDebugLoc(), CondVT, + Cond = DAG.getNode(ISD::AND, SDLoc(N), CondVT, Cond, DAG.getConstant(1, CondVT)); break; case TargetLowering::ZeroOrNegativeOneBooleanContent: assert(VecBool == TargetLowering::UndefinedBooleanContent || VecBool == TargetLowering::ZeroOrOneBooleanContent); // Vector reads from a one, scalar from all ones so sign extend. - Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), CondVT, + Cond = DAG.getNode(ISD::SIGN_EXTEND_INREG, SDLoc(N), CondVT, Cond, DAG.getValueType(MVT::i1)); break; } } - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), Cond, LHS, - GetScalarizedVector(N->getOperand(2))); + + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), Cond, LHS, + GetScalarizedVector(N->getOperand(2))); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(1)); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), - LHS.getValueType(), N->getOperand(0), LHS, - GetScalarizedVector(N->getOperand(2))); + return DAG.getSelect(SDLoc(N), + LHS.getValueType(), N->getOperand(0), LHS, + GetScalarizedVector(N->getOperand(2))); } SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT_CC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(2)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), LHS.getValueType(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), LHS.getValueType(), N->getOperand(0), N->getOperand(1), LHS, GetScalarizedVector(N->getOperand(3)), N->getOperand(4)); @@ -303,7 +307,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SETCC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Turn it into a scalar SETCC. return DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, N->getOperand(2)); @@ -330,7 +334,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_VSETCC(SDNode *N) { SDValue LHS = GetScalarizedVector(N->getOperand(0)); SDValue RHS = GetScalarizedVector(N->getOperand(1)); EVT NVT = N->getValueType(0).getVectorElementType(); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Turn it into a scalar SETCC. SDValue Res = DAG.getNode(ISD::SETCC, DL, MVT::i1, LHS, RHS, @@ -368,7 +372,8 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::ANY_EXTEND: case ISD::ZERO_EXTEND: case ISD::SIGN_EXTEND: - Res = ScalarizeVecOp_EXTEND(N); + case ISD::TRUNCATE: + Res = ScalarizeVecOp_UnaryOp(N); break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); @@ -401,22 +406,22 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { /// to be scalarized, it must be <1 x ty>. Convert the element instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { SDValue Elt = GetScalarizedVector(N->getOperand(0)); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Elt); } /// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs /// to be scalarized, it must be <1 x ty>. Extend the element instead. -SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) { +SDValue DAGTypeLegalizer::ScalarizeVecOp_UnaryOp(SDNode *N) { assert(N->getValueType(0).getVectorNumElements() == 1 && "Unexected vector type!"); SDValue Elt = GetScalarizedVector(N->getOperand(0)); SmallVector Ops(1); - Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + Ops[0] = DAG.getNode(N->getOpcode(), SDLoc(N), N->getValueType(0).getScalarType(), Elt); // Revectorize the result so the types line up with what the uses of this // expression expect. - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), &Ops[0], 1); } @@ -426,7 +431,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SmallVector Ops(N->getNumOperands()); for (unsigned i = 0, e = N->getNumOperands(); i < e; ++i) Ops[i] = GetScalarizedVector(N->getOperand(i)); - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), N->getValueType(0), &Ops[0], Ops.size()); } @@ -436,7 +441,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue Res = GetScalarizedVector(N->getOperand(0)); if (Res.getValueType() != N->getValueType(0)) - Res = DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), N->getValueType(0), + Res = DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), N->getValueType(0), Res); return Res; } @@ -446,7 +451,7 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ assert(N->isUnindexed() && "Indexed store of one-element vector?"); assert(OpNo == 1 && "Do not know how to scalarize this operand!"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->isTruncatingStore()) return DAG.getTruncStore(N->getChain(), dl, @@ -454,12 +459,12 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo){ N->getBasePtr(), N->getPointerInfo(), N->getMemoryVT().getVectorElementType(), N->isVolatile(), N->isNonTemporal(), - N->getAlignment()); + N->getAlignment(), N->getTBAAInfo()); return DAG.getStore(N->getChain(), dl, GetScalarizedVector(N->getOperand(1)), N->getBasePtr(), N->getPointerInfo(), N->isVolatile(), N->isNonTemporal(), - N->getOriginalAlignment()); + N->getOriginalAlignment(), N->getTBAAInfo()); } @@ -516,7 +521,6 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { SplitVecRes_VECTOR_SHUFFLE(cast(N), Lo, Hi); break; - case ISD::ANY_EXTEND: case ISD::CONVERT_RNDSAT: case ISD::CTLZ: case ISD::CTTZ: @@ -539,21 +543,27 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) { case ISD::FP_TO_SINT: case ISD::FP_TO_UINT: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: - case ISD::SIGN_EXTEND: case ISD::SINT_TO_FP: case ISD::TRUNCATE: case ISD::UINT_TO_FP: - case ISD::ZERO_EXTEND: SplitVecRes_UnaryOp(N, Lo, Hi); break; + case ISD::ANY_EXTEND: + case ISD::SIGN_EXTEND: + case ISD::ZERO_EXTEND: + SplitVecRes_ExtendOp(N, Lo, Hi); + break; + case ISD::ADD: case ISD::SUB: case ISD::MUL: case ISD::FADD: + case ISD::FCOPYSIGN: case ISD::FSUB: case ISD::FMUL: case ISD::SDIV: @@ -587,7 +597,7 @@ void DAGTypeLegalizer::SplitVecRes_BinOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(0), LHSLo, LHSHi); SDValue RHSLo, RHSHi; GetSplitVector(N->getOperand(1), RHSLo, RHSHi); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, RHSLo); Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi, RHSHi); @@ -601,7 +611,7 @@ void DAGTypeLegalizer::SplitVecRes_TernaryOp(SDNode *N, SDValue &Lo, GetSplitVector(N->getOperand(1), Op1Lo, Op1Hi); SDValue Op2Lo, Op2Hi; GetSplitVector(N->getOperand(2), Op2Lo, Op2Hi); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); Lo = DAG.getNode(N->getOpcode(), dl, Op0Lo.getValueType(), Op0Lo, Op1Lo, Op2Lo); @@ -614,8 +624,8 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, // We know the result is a vector. The input may be either a vector or a // scalar value. EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); - DebugLoc dl = N->getDebugLoc(); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); + SDLoc dl(N); SDValue InOp = N->getOperand(0); EVT InVT = InOp.getValueType(); @@ -668,8 +678,8 @@ void DAGTypeLegalizer::SplitVecRes_BITCAST(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc dl(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); unsigned LoNumElts = LoVT.getVectorNumElements(); SmallVector LoOps(N->op_begin(), N->op_begin()+LoNumElts); Lo = DAG.getNode(ISD::BUILD_VECTOR, dl, LoVT, &LoOps[0], LoOps.size()); @@ -681,7 +691,7 @@ void DAGTypeLegalizer::SplitVecRes_BUILD_VECTOR(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, SDValue &Hi) { assert(!(N->getNumOperands() & 1) && "Unsupported CONCAT_VECTORS"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumSubvectors = N->getNumOperands() / 2; if (NumSubvectors == 1) { Lo = N->getOperand(0); @@ -690,7 +700,7 @@ void DAGTypeLegalizer::SplitVecRes_CONCAT_VECTORS(SDNode *N, SDValue &Lo, } EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); SmallVector LoOps(N->op_begin(), N->op_begin()+NumSubvectors); Lo = DAG.getNode(ISD::CONCAT_VECTORS, dl, LoVT, &LoOps[0], LoOps.size()); @@ -703,20 +713,21 @@ void DAGTypeLegalizer::SplitVecRes_EXTRACT_SUBVECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Vec = N->getOperand(0); SDValue Idx = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT LoVT, HiVT; - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, LoVT, Vec, Idx); uint64_t IdxVal = cast(Idx)->getZExtValue(); Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, HiVT, Vec, - DAG.getIntPtrConstant(IdxVal + LoVT.getVectorNumElements())); + DAG.getConstant(IdxVal + LoVT.getVectorNumElements(), + TLI.getVectorIdxTy())); } void DAGTypeLegalizer::SplitVecRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(N->getOperand(0), Lo, Hi); Lo = DAG.getNode(ISD::FPOWI, dl, Lo.getValueType(), Lo, N->getOperand(1)); Hi = DAG.getNode(ISD::FPOWI, dl, Hi.getValueType(), Hi, N->getOperand(1)); @@ -726,10 +737,11 @@ void DAGTypeLegalizer::SplitVecRes_InregOp(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue LHSLo, LHSHi; GetSplitVector(N->getOperand(0), LHSLo, LHSHi); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT LoVT, HiVT; - GetSplitDestVTs(cast(N->getOperand(1))->getVT(), LoVT, HiVT); + llvm::tie(LoVT, HiVT) = + DAG.GetSplitDestVTs(cast(N->getOperand(1))->getVT()); Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo, DAG.getValueType(LoVT)); @@ -742,7 +754,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue Vec = N->getOperand(0); SDValue Elt = N->getOperand(1); SDValue Idx = N->getOperand(2); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(Vec, Lo, Hi); if (ConstantSDNode *CIdx = dyn_cast(Idx)) { @@ -753,7 +765,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, Lo.getValueType(), Lo, Elt, Idx); else Hi = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, Hi.getValueType(), Hi, Elt, - DAG.getIntPtrConstant(IdxVal - LoNumElts)); + DAG.getConstant(IdxVal - LoNumElts, + TLI.getVectorIdxTy())); return; } @@ -780,7 +793,7 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, // Increment the pointer to the other part. unsigned IncrementSize = Lo.getValueType().getSizeInBits() / 8; StackPtr = DAG.getNode(ISD::ADD, dl, StackPtr.getValueType(), StackPtr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, StackPtr.getValueType())); // Load the Hi part from the stack slot. Hi = DAG.getLoad(Hi.getValueType(), dl, Store, StackPtr, MachinePointerInfo(), @@ -790,8 +803,8 @@ void DAGTypeLegalizer::SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc dl(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); Lo = DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, LoVT, N->getOperand(0)); Hi = DAG.getUNDEF(HiVT); } @@ -800,8 +813,8 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, SDValue &Hi) { assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!"); EVT LoVT, HiVT; - DebugLoc dl = LD->getDebugLoc(); - GetSplitDestVTs(LD->getValueType(0), LoVT, HiVT); + SDLoc dl(LD); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(LD->getValueType(0)); ISD::LoadExtType ExtType = LD->getExtensionType(); SDValue Ch = LD->getChain(); @@ -812,20 +825,22 @@ void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT LoMemVT, HiMemVT; - GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); Lo = DAG.getLoad(ISD::UNINDEXED, ExtType, LoVT, dl, Ch, Ptr, Offset, LD->getPointerInfo(), LoMemVT, isVolatile, isNonTemporal, - isInvariant, Alignment); + isInvariant, Alignment, TBAAInfo); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; Ptr = DAG.getNode(ISD::ADD, dl, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); Hi = DAG.getLoad(ISD::UNINDEXED, ExtType, HiVT, dl, Ch, Ptr, Offset, LD->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment); + HiMemVT, isVolatile, isNonTemporal, isInvariant, Alignment, + TBAAInfo); // Build a factor node to remember that this load is independent of the // other one. @@ -843,23 +858,13 @@ void DAGTypeLegalizer::SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi) { "Operand types must be vectors"); EVT LoVT, HiVT; - DebugLoc DL = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc DL(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // Split the input. - EVT InVT = N->getOperand(0).getValueType(); SDValue LL, LH, RL, RH; - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - LL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(0)); - LH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - - RL = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getIntPtrConstant(0)); - RH = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InNVT, N->getOperand(1), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); + llvm::tie(LL, LH) = DAG.SplitVectorOperand(N, 0); + llvm::tie(RL, RH) = DAG.SplitVectorOperand(N, 1); Lo = DAG.getNode(N->getOpcode(), DL, LoVT, LL, RL, N->getOperand(2)); Hi = DAG.getNode(N->getOpcode(), DL, HiVT, LH, RH, N->getOperand(2)); @@ -869,22 +874,16 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, SDValue &Hi) { // Get the dest types - they may not match the input types, e.g. int_to_fp. EVT LoVT, HiVT; - DebugLoc dl = N->getDebugLoc(); - GetSplitDestVTs(N->getValueType(0), LoVT, HiVT); + SDLoc dl(N); + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(N->getValueType(0)); // If the input also splits, handle it directly for a compile time speedup. // Otherwise split it by hand. EVT InVT = N->getOperand(0).getValueType(); - if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) { + if (getTypeAction(InVT) == TargetLowering::TypeSplitVector) GetSplitVector(N->getOperand(0), Lo, Hi); - } else { - EVT InNVT = EVT::getVectorVT(*DAG.getContext(), InVT.getVectorElementType(), - LoVT.getVectorNumElements()); - Lo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(0)); - Hi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InNVT, N->getOperand(0), - DAG.getIntPtrConstant(InNVT.getVectorNumElements())); - } + else + llvm::tie(Lo, Hi) = DAG.SplitVectorOperand(N, 0); if (N->getOpcode() == ISD::FP_ROUND) { Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo, N->getOperand(1)); @@ -907,11 +906,63 @@ void DAGTypeLegalizer::SplitVecRes_UnaryOp(SDNode *N, SDValue &Lo, } } +void DAGTypeLegalizer::SplitVecRes_ExtendOp(SDNode *N, SDValue &Lo, + SDValue &Hi) { + SDLoc dl(N); + EVT SrcVT = N->getOperand(0).getValueType(); + EVT DestVT = N->getValueType(0); + EVT LoVT, HiVT; + llvm::tie(LoVT, HiVT) = DAG.GetSplitDestVTs(DestVT); + + // We can do better than a generic split operation if the extend is doing + // more than just doubling the width of the elements and the following are + // true: + // - The number of vector elements is even, + // - the source type is legal, + // - the type of a split source is illegal, + // - the type of an extended (by doubling element size) source is legal, and + // - the type of that extended source when split is legal. + // + // This won't necessarily completely legalize the operation, but it will + // more effectively move in the right direction and prevent falling down + // to scalarization in many cases due to the input vector being split too + // far. + unsigned NumElements = SrcVT.getVectorNumElements(); + if ((NumElements & 1) == 0 && + SrcVT.getSizeInBits() * 2 < DestVT.getSizeInBits()) { + LLVMContext &Ctx = *DAG.getContext(); + EVT NewSrcVT = EVT::getVectorVT( + Ctx, EVT::getIntegerVT( + Ctx, SrcVT.getVectorElementType().getSizeInBits() * 2), + NumElements); + EVT SplitSrcVT = + EVT::getVectorVT(Ctx, SrcVT.getVectorElementType(), NumElements / 2); + EVT SplitLoVT, SplitHiVT; + llvm::tie(SplitLoVT, SplitHiVT) = DAG.GetSplitDestVTs(NewSrcVT); + if (TLI.isTypeLegal(SrcVT) && !TLI.isTypeLegal(SplitSrcVT) && + TLI.isTypeLegal(NewSrcVT) && TLI.isTypeLegal(SplitLoVT)) { + DEBUG(dbgs() << "Split vector extend via incremental extend:"; + N->dump(&DAG); dbgs() << "\n"); + // Extend the source vector by one step. + SDValue NewSrc = + DAG.getNode(N->getOpcode(), dl, NewSrcVT, N->getOperand(0)); + // Get the low and high halves of the new, extended one step, vector. + llvm::tie(Lo, Hi) = DAG.SplitVector(NewSrc, dl); + // Extend those vector halves the rest of the way. + Lo = DAG.getNode(N->getOpcode(), dl, LoVT, Lo); + Hi = DAG.getNode(N->getOpcode(), dl, HiVT, Hi); + return; + } + } + // Fall back to the generic unary operator splitting otherwise. + SplitVecRes_UnaryOp(N, Lo, Hi); +} + void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo, SDValue &Hi) { // The low and high parts of the original input give four input vectors. SDValue Inputs[4]; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(N->getOperand(0), Inputs[0], Inputs[1]); GetSplitVector(N->getOperand(1), Inputs[2], Inputs[3]); EVT NewVT = Inputs[0].getValueType(); @@ -994,7 +1045,8 @@ void DAGTypeLegalizer::SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, // Extract the vector element by hand. SVOps.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, - Inputs[Input], DAG.getIntPtrConstant(Idx))); + Inputs[Input], DAG.getConstant(Idx, + TLI.getVectorIdxTy()))); } // Construct the Lo/Hi output using a BUILD_VECTOR. @@ -1030,6 +1082,10 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { dbgs() << "\n"); SDValue Res = SDValue(); + // See if the target wants to custom split this node. + if (CustomLowerNode(N, N->getOperand(OpNo).getValueType(), false)) + return false; + if (Res.getNode() == 0) { switch (N->getOpcode()) { default: @@ -1094,41 +1150,23 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { SDValue Mask = N->getOperand(0); SDValue Src0 = N->getOperand(1); SDValue Src1 = N->getOperand(2); - DebugLoc DL = N->getDebugLoc(); - EVT MaskVT = Mask.getValueType(); - assert(MaskVT.isVector() && "VSELECT without a vector mask?"); + EVT Src0VT = Src0.getValueType(); + SDLoc DL(N); + assert(Mask.getValueType().isVector() && "VSELECT without a vector mask?"); SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); assert(Lo.getValueType() == Hi.getValueType() && - "Lo and Hi have differing types");; - - unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); - unsigned HiNumElts = Hi.getValueType().getVectorNumElements(); - assert(LoNumElts == HiNumElts && "Asymmetric vector split?"); - - LLVMContext &Ctx = *DAG.getContext(); - SDValue Zero = DAG.getIntPtrConstant(0); - SDValue LoElts = DAG.getIntPtrConstant(LoNumElts); - EVT Src0VT = Src0.getValueType(); - EVT Src0EltTy = Src0VT.getVectorElementType(); - EVT MaskEltTy = MaskVT.getVectorElementType(); - - EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts); - EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts); - EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts); - EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts); - - SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero); - SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero); + "Lo and Hi have differing types"); - SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts); - SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts); + EVT LoOpVT, HiOpVT; + llvm::tie(LoOpVT, HiOpVT) = DAG.GetSplitDestVTs(Src0VT); + assert(LoOpVT == HiOpVT && "Asymmetric vector split?"); - SDValue LoMask = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero); - SDValue HiMask = - DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts); + SDValue LoOp0, HiOp0, LoOp1, HiOp1, LoMask, HiMask; + llvm::tie(LoOp0, HiOp0) = DAG.SplitVector(Src0, DL); + llvm::tie(LoOp1, HiOp1) = DAG.SplitVector(Src1, DL); + llvm::tie(LoMask, HiMask) = DAG.SplitVector(Mask, DL); SDValue LoSelect = DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); @@ -1142,7 +1180,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); SDValue Lo, Hi; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); GetSplitVector(N->getOperand(0), Lo, Hi); EVT InVT = Lo.getValueType(); @@ -1167,7 +1205,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_BITCAST(SDNode *N) { if (TLI.isBigEndian()) std::swap(Lo, Hi); - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), JoinIntegers(Lo, Hi)); } @@ -1175,7 +1213,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_SUBVECTOR(SDNode *N) { // We know that the extracted result type is legal. EVT SubVT = N->getValueType(0); SDValue Idx = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Lo, Hi; GetSplitVector(N->getOperand(0), Lo, Hi); @@ -1215,7 +1253,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { // Store the vector to the stack. EVT EltVT = VecVT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue StackPtr = DAG.CreateStackTemporary(VecVT); SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Vec, StackPtr, MachinePointerInfo(), false, false, 0); @@ -1229,7 +1267,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { assert(N->isUnindexed() && "Indexed store of vector?"); assert(OpNo == 1 && "Can only split the stored value"); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); bool isTruncating = N->isTruncatingStore(); SDValue Ch = N->getChain(); @@ -1238,39 +1276,40 @@ SDValue DAGTypeLegalizer::SplitVecOp_STORE(StoreSDNode *N, unsigned OpNo) { unsigned Alignment = N->getOriginalAlignment(); bool isVol = N->isVolatile(); bool isNT = N->isNonTemporal(); + const MDNode *TBAAInfo = N->getTBAAInfo(); SDValue Lo, Hi; GetSplitVector(N->getOperand(1), Lo, Hi); EVT LoMemVT, HiMemVT; - GetSplitDestVTs(MemoryVT, LoMemVT, HiMemVT); + llvm::tie(LoMemVT, HiMemVT) = DAG.GetSplitDestVTs(MemoryVT); unsigned IncrementSize = LoMemVT.getSizeInBits()/8; if (isTruncating) Lo = DAG.getTruncStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - LoMemVT, isVol, isNT, Alignment); + LoMemVT, isVol, isNT, Alignment, TBAAInfo); else Lo = DAG.getStore(Ch, DL, Lo, Ptr, N->getPointerInfo(), - isVol, isNT, Alignment); + isVol, isNT, Alignment, TBAAInfo); // Increment the pointer to the other half. Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr, - DAG.getIntPtrConstant(IncrementSize)); + DAG.getConstant(IncrementSize, Ptr.getValueType())); if (isTruncating) Hi = DAG.getTruncStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - HiMemVT, isVol, isNT, Alignment); + HiMemVT, isVol, isNT, Alignment, TBAAInfo); else Hi = DAG.getStore(Ch, DL, Hi, Ptr, N->getPointerInfo().getWithOffset(IncrementSize), - isVol, isNT, Alignment); + isVol, isNT, Alignment, TBAAInfo); return DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Lo, Hi); } SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // The input operands all must have the same type, and we know the result // type is valid. Convert this to a buildvector which extracts all the @@ -1284,7 +1323,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_CONCAT_VECTORS(SDNode *N) { for (unsigned i = 0, e = Op.getValueType().getVectorNumElements(); i != e; ++i) { Elts.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, EltVT, - Op, DAG.getIntPtrConstant(i))); + Op, DAG.getConstant(i, TLI.getVectorIdxTy()))); } } @@ -1327,15 +1366,11 @@ SDValue DAGTypeLegalizer::SplitVecOp_TRUNCATE(SDNode *N) { // to split more than once. if (InElementSize <= OutElementSize * 2) return SplitVecOp_UnaryOp(N); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // Extract the halves of the input via extract_subvector. - EVT SplitVT = EVT::getVectorVT(*DAG.getContext(), - InVT.getVectorElementType(), NumElements/2); - SDValue InLoVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getIntPtrConstant(0)); - SDValue InHiVec = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, SplitVT, InVec, - DAG.getIntPtrConstant(NumElements/2)); + SDValue InLoVec, InHiVec; + llvm::tie(InLoVec, InHiVec) = DAG.SplitVector(InVec, DL); // Truncate them to 1/2 the element size. EVT HalfElementVT = EVT::getIntegerVT(*DAG.getContext(), InElementSize/2); EVT HalfVT = EVT::getVectorVT(*DAG.getContext(), HalfElementVT, @@ -1359,7 +1394,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_VSETCC(SDNode *N) { "Operand types must be vectors"); // The result has a legal vector type, but the input needs splitting. SDValue Lo0, Hi0, Lo1, Hi1, LoRes, HiRes; - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); GetSplitVector(N->getOperand(0), Lo0, Hi0); GetSplitVector(N->getOperand(1), Lo1, Hi1); unsigned PartElements = Lo0.getValueType().getVectorNumElements(); @@ -1377,7 +1412,7 @@ SDValue DAGTypeLegalizer::SplitVecOp_FP_ROUND(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); SDValue Lo, Hi; - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); GetSplitVector(N->getOperand(0), Lo, Hi); EVT InVT = Lo.getValueType(); @@ -1434,27 +1469,31 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::VECTOR_SHUFFLE: Res = WidenVecRes_VECTOR_SHUFFLE(cast(N)); break; + case ISD::ADD: case ISD::AND: case ISD::BSWAP: + case ISD::MUL: + case ISD::MULHS: + case ISD::MULHU: + case ISD::OR: + case ISD::SUB: + case ISD::XOR: + Res = WidenVecRes_Binary(N); + break; + case ISD::FADD: case ISD::FCOPYSIGN: - case ISD::FDIV: case ISD::FMUL: case ISD::FPOW: - case ISD::FREM: case ISD::FSUB: - case ISD::MUL: - case ISD::MULHS: - case ISD::MULHU: - case ISD::OR: + case ISD::FDIV: + case ISD::FREM: case ISD::SDIV: - case ISD::SREM: case ISD::UDIV: + case ISD::SREM: case ISD::UREM: - case ISD::SUB: - case ISD::XOR: - Res = WidenVecRes_Binary(N); + Res = WidenVecRes_BinaryCanTrap(N); break; case ISD::FPOWI: @@ -1495,6 +1534,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { case ISD::FNEARBYINT: case ISD::FNEG: case ISD::FRINT: + case ISD::FROUND: case ISD::FSIN: case ISD::FSQRT: case ISD::FTRUNC: @@ -1512,7 +1552,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) { SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { // Ternary op widening. - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); @@ -1522,8 +1562,17 @@ SDValue DAGTypeLegalizer::WidenVecRes_Ternary(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { // Binary op widening. + SDLoc dl(N); + EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); + SDValue InOp1 = GetWidenedVector(N->getOperand(0)); + SDValue InOp2 = GetWidenedVector(N->getOperand(1)); + return DAG.getNode(N->getOpcode(), dl, WidenVT, InOp1, InOp2); +} + +SDValue DAGTypeLegalizer::WidenVecRes_BinaryCanTrap(SDNode *N) { + // Binary op widening for operations that can trap. unsigned Opcode = N->getOpcode(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); EVT WidenEltVT = WidenVT.getVectorElementType(); EVT VT = WidenVT; @@ -1562,9 +1611,9 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { while (CurNumElts != 0) { while (CurNumElts >= NumElts) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp1, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, InOp2, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, VT, EOp1, EOp2); Idx += NumElts; CurNumElts -= NumElts; @@ -1577,9 +1626,11 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { if (NumElts == 1) { for (unsigned i = 0; i != CurNumElts; ++i, ++Idx) { SDValue EOp1 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp1, DAG.getIntPtrConstant(Idx)); + InOp1, DAG.getConstant(Idx, + TLI.getVectorIdxTy())); SDValue EOp2 = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, WidenEltVT, - InOp2, DAG.getIntPtrConstant(Idx)); + InOp2, DAG.getConstant(Idx, + TLI.getVectorIdxTy())); ConcatOps[ConcatEnd++] = DAG.getNode(Opcode, dl, WidenEltVT, EOp1, EOp2); } @@ -1617,7 +1668,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { unsigned NumToInsert = ConcatEnd - Idx - 1; for (unsigned i = 0, OpIdx = Idx+1; i < NumToInsert; i++, OpIdx++) { VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NextVT, VecOp, - ConcatOps[OpIdx], DAG.getIntPtrConstant(i)); + ConcatOps[OpIdx], DAG.getConstant(i, + TLI.getVectorIdxTy())); } ConcatOps[Idx+1] = VecOp; ConcatEnd = Idx + 2; @@ -1659,7 +1711,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Binary(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { SDValue InOp = N->getOperand(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned WidenNumElts = WidenVT.getVectorNumElements(); @@ -1705,7 +1757,8 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { if (InVTNumElts % WidenNumElts == 0) { SDValue InVal = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, InWidenVT, - InOp, DAG.getIntPtrConstant(0)); + InOp, DAG.getConstant(0, + TLI.getVectorIdxTy())); // Extract the input and convert the shorten input vector. if (N->getNumOperands() == 1) return DAG.getNode(Opcode, DL, WidenVT, InVal); @@ -1720,7 +1773,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_Convert(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, InEltVT, InOp, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); if (N->getNumOperands() == 1) Ops[i] = DAG.getNode(Opcode, DL, EltVT, Val); else @@ -1738,7 +1791,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_POWI(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); SDValue ShOp = N->getOperand(1); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); } SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { @@ -1757,14 +1810,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_Shift(SDNode *N) { if (ShVT != ShWidenVT) ShOp = ModifyToType(ShOp, ShWidenVT); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp, ShOp); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp, ShOp); } SDValue DAGTypeLegalizer::WidenVecRes_Unary(SDNode *N) { // Unary op widening. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), WidenVT, InOp); + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, InOp); } SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { @@ -1774,7 +1827,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_InregOp(SDNode *N) { .getVectorElementType(), WidenVT.getVectorNumElements()); SDValue WidenLHS = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, WidenLHS, DAG.getValueType(ExtVT)); } @@ -1788,7 +1841,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { EVT InVT = InOp.getValueType(); EVT VT = N->getValueType(0); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); switch (getTypeAction(InVT)) { case TargetLowering::TypeLegal: @@ -1868,19 +1921,21 @@ SDValue DAGTypeLegalizer::WidenVecRes_BITCAST(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Build a vector with undefined for the new nodes. EVT VT = N->getValueType(0); - EVT EltVT = VT.getVectorElementType(); + + // Integer BUILD_VECTOR operands may be larger than the node's vector element + // type. The UNDEFs need to have the same type as the existing operands. + EVT EltVT = N->getOperand(0).getValueType(); unsigned NumElts = VT.getVectorNumElements(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned WidenNumElts = WidenVT.getVectorNumElements(); SmallVector NewOps(N->op_begin(), N->op_end()); - NewOps.reserve(WidenNumElts); - for (unsigned i = NumElts; i < WidenNumElts; ++i) - NewOps.push_back(DAG.getUNDEF(EltVT)); + assert(WidenNumElts >= NumElts && "Shrinking vector instead of widening!"); + NewOps.append(WidenNumElts - NumElts, DAG.getUNDEF(EltVT)); return DAG.getNode(ISD::BUILD_VECTOR, dl, WidenVT, &NewOps[0], NewOps.size()); } @@ -1888,7 +1943,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_BUILD_VECTOR(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { EVT InVT = N->getOperand(0).getValueType(); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned WidenNumElts = WidenVT.getVectorNumElements(); unsigned NumInElts = InVT.getVectorNumElements(); unsigned NumOperands = N->getNumOperands(); @@ -1946,7 +2001,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(j)); + DAG.getConstant(j, TLI.getVectorIdxTy())); } SDValue UndefVal = DAG.getUNDEF(EltVT); for (; Idx < WidenNumElts; ++Idx) @@ -1955,7 +2010,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONCAT_VECTORS(SDNode *N) { } SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue InOp = N->getOperand(0); SDValue RndOp = N->getOperand(3); SDValue SatOp = N->getOperand(4); @@ -2004,7 +2059,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { if (InVTNumElts % WidenNumElts == 0) { // Extract the input and convert the shorten input vector. InOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, InWidenVT, InOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); return DAG.getConvertRndSat(WidenVT, dl, InOp, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2020,7 +2075,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_CONVERT_RNDSAT(SDNode *N) { unsigned i; for (i=0; i < MinElts; ++i) { SDValue ExtVal = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getIntPtrConstant(i)); + DAG.getConstant(i, TLI.getVectorIdxTy())); Ops[i] = DAG.getConvertRndSat(WidenVT, dl, ExtVal, DTyOp, STyOp, RndOp, SatOp, CvtCode); } @@ -2038,7 +2093,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned WidenNumElts = WidenVT.getVectorNumElements(); SDValue InOp = N->getOperand(0); SDValue Idx = N->getOperand(1); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) InOp = GetWidenedVector(InOp); @@ -2063,7 +2118,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { unsigned i; for (i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(IdxVal+i)); + DAG.getConstant(IdxVal+i, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for (; i < WidenNumElts; ++i) @@ -2073,7 +2128,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_EXTRACT_SUBVECTOR(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_INSERT_VECTOR_ELT(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::INSERT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::INSERT_VECTOR_ELT, SDLoc(N), InOp.getValueType(), InOp, N->getOperand(1), N->getOperand(2)); } @@ -2096,7 +2151,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { if (LdChain.size() == 1) NewChain = LdChain[0]; else - NewChain = DAG.getNode(ISD::TokenFactor, LD->getDebugLoc(), MVT::Other, + NewChain = DAG.getNode(ISD::TokenFactor, SDLoc(LD), MVT::Other, &LdChain[0], LdChain.size()); // Modified the chain - switch anything that used the old chain to use @@ -2108,7 +2163,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_LOAD(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); - return DAG.getNode(ISD::SCALAR_TO_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::SCALAR_TO_VECTOR, SDLoc(N), WidenVT, N->getOperand(0)); } @@ -2132,14 +2187,14 @@ SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(1)); SDValue InOp2 = GetWidenedVector(N->getOperand(2)); assert(InOp1.getValueType() == WidenVT && InOp2.getValueType() == WidenVT); - return DAG.getNode(N->getOpcode(), N->getDebugLoc(), + return DAG.getNode(N->getOpcode(), SDLoc(N), WidenVT, Cond1, InOp1, InOp2); } SDValue DAGTypeLegalizer::WidenVecRes_SELECT_CC(SDNode *N) { SDValue InOp1 = GetWidenedVector(N->getOperand(2)); SDValue InOp2 = GetWidenedVector(N->getOperand(3)); - return DAG.getNode(ISD::SELECT_CC, N->getDebugLoc(), + return DAG.getNode(ISD::SELECT_CC, SDLoc(N), InOp1.getValueType(), N->getOperand(0), N->getOperand(1), InOp1, InOp2, N->getOperand(4)); } @@ -2153,7 +2208,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_SETCC(SDNode *N) { EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(0)); SDValue InOp2 = GetWidenedVector(N->getOperand(1)); - return DAG.getNode(ISD::SETCC, N->getDebugLoc(), WidenVT, + return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2, N->getOperand(2)); } @@ -2164,7 +2219,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_UNDEF(SDNode *N) { SDValue DAGTypeLegalizer::WidenVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); unsigned NumElts = VT.getVectorNumElements(); @@ -2208,7 +2263,7 @@ SDValue DAGTypeLegalizer::WidenVecRes_VSETCC(SDNode *N) { InOp2.getValueType() == WidenInVT && "Input not widened to expected type!"); (void)WidenInVT; - return DAG.getNode(ISD::SETCC, N->getDebugLoc(), + return DAG.getNode(ISD::SETCC, SDLoc(N), WidenVT, InOp1, InOp2, N->getOperand(2)); } @@ -2277,7 +2332,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { // into some scalar code and create a nasty build vector. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SDValue InOp = N->getOperand(0); if (getTypeAction(InOp.getValueType()) == TargetLowering::TypeWidenVector) @@ -2290,7 +2345,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_Convert(SDNode *N) { for (unsigned i=0; i < NumElts; ++i) Ops[i] = DAG.getNode(Opcode, dl, EltVT, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, InEltVT, InOp, - DAG.getIntPtrConstant(i))); + DAG.getConstant(i, TLI.getVectorIdxTy()))); return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); } @@ -2299,7 +2354,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { EVT VT = N->getValueType(0); SDValue InOp = GetWidenedVector(N->getOperand(0)); EVT InWidenVT = InOp.getValueType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Check if we can convert between two legal vector types and extract. unsigned InWidenSize = InWidenVT.getSizeInBits(); @@ -2311,7 +2366,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_BITCAST(SDNode *N) { if (TLI.isTypeLegal(NewVT)) { SDValue BitOp = DAG.getNode(ISD::BITCAST, dl, NewVT, InOp); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, VT, BitOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); } } @@ -2324,7 +2379,7 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { // nasty build vector. EVT VT = N->getValueType(0); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); unsigned NumElts = VT.getVectorNumElements(); SmallVector Ops(NumElts); @@ -2339,20 +2394,20 @@ SDValue DAGTypeLegalizer::WidenVecOp_CONCAT_VECTORS(SDNode *N) { InOp = GetWidenedVector(InOp); for (unsigned j=0; j < NumInElts; ++j) Ops[Idx++] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(j)); + DAG.getConstant(j, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BUILD_VECTOR, dl, VT, &Ops[0], NumElts); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_SUBVECTOR(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, SDLoc(N), N->getValueType(0), InOp, N->getOperand(1)); } SDValue DAGTypeLegalizer::WidenVecOp_EXTRACT_VECTOR_ELT(SDNode *N) { SDValue InOp = GetWidenedVector(N->getOperand(0)); - return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getDebugLoc(), + return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, SDLoc(N), N->getValueType(0), InOp, N->getOperand(1)); } @@ -2370,14 +2425,14 @@ SDValue DAGTypeLegalizer::WidenVecOp_STORE(SDNode *N) { if (StChain.size() == 1) return StChain[0]; else - return DAG.getNode(ISD::TokenFactor, ST->getDebugLoc(), + return DAG.getNode(ISD::TokenFactor, SDLoc(ST), MVT::Other,&StChain[0],StChain.size()); } SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SDValue InOp0 = GetWidenedVector(N->getOperand(0)); SDValue InOp1 = GetWidenedVector(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // WARNING: In this code we widen the compare instruction with garbage. // This garbage may contain denormal floats which may be slow. Is this a real @@ -2385,8 +2440,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { // Get a new SETCC node to compare the newly widened operands. // Only some of the compared elements are legal. - EVT SVT = TLI.getSetCCResultType(InOp0.getValueType()); - SDValue WideSETCC = DAG.getNode(ISD::SETCC, N->getDebugLoc(), + EVT SVT = TLI.getSetCCResultType(*DAG.getContext(), InOp0.getValueType()); + SDValue WideSETCC = DAG.getNode(ISD::SETCC, SDLoc(N), SVT, InOp0, InOp1, N->getOperand(2)); // Extract the needed results from the result vector. @@ -2394,7 +2449,8 @@ SDValue DAGTypeLegalizer::WidenVecOp_SETCC(SDNode *N) { SVT.getVectorElementType(), N->getValueType(0).getVectorNumElements()); SDValue CC = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, - ResVT, WideSETCC, DAG.getIntPtrConstant(0)); + ResVT, WideSETCC, DAG.getConstant(0, + TLI.getVectorIdxTy())); return PromoteTargetBoolean(CC, N->getValueType(0)); } @@ -2465,9 +2521,10 @@ static EVT FindMemType(SelectionDAG& DAG, const TargetLowering &TLI, // LDOps: Load operators to build a vector type // [Start,End) the list of loads to use. static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, - SmallVector& LdOps, + SmallVectorImpl &LdOps, unsigned Start, unsigned End) { - DebugLoc dl = LdOps[Start].getDebugLoc(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SDLoc dl(LdOps[Start]); EVT LdTy = LdOps[Start].getValueType(); unsigned Width = VecTy.getSizeInBits(); unsigned NumElts = Width / LdTy.getSizeInBits(); @@ -2487,12 +2544,12 @@ static SDValue BuildVectorFromScalar(SelectionDAG& DAG, EVT VecTy, LdTy = NewLdTy; } VecOp = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, NewVecVT, VecOp, LdOps[i], - DAG.getIntPtrConstant(Idx++)); + DAG.getConstant(Idx++, TLI.getVectorIdxTy())); } return DAG.getNode(ISD::BITCAST, dl, VecTy, VecOp); } -SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, +SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVectorImpl &LdChain, LoadSDNode *LD) { // The strategy assumes that we can efficiently load powers of two widths. // The routines chops the vector into the largest vector loads with the same @@ -2501,7 +2558,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); unsigned WidenWidth = WidenVT.getSizeInBits(); EVT LdVT = LD->getMemoryVT(); - DebugLoc dl = LD->getDebugLoc(); + SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); assert(LdVT.getVectorElementType() == WidenVT.getVectorElementType()); @@ -2512,6 +2569,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); bool isInvariant = LD->isInvariant(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); int LdWidth = LdVT.getSizeInBits(); int WidthDiff = WidenWidth - LdWidth; // Difference @@ -2521,7 +2579,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, EVT NewVT = FindMemType(DAG, TLI, LdWidth, WidenVT, LdAlign, WidthDiff); int NewVTWidth = NewVT.getSizeInBits(); SDValue LdOp = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo(), - isVolatile, isNonTemporal, isInvariant, Align); + isVolatile, isNonTemporal, isInvariant, Align, + TBAAInfo); LdChain.push_back(LdOp.getValue(1)); // Check if we can load the element with one instruction @@ -2557,7 +2616,7 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, unsigned Increment = NewVTWidth / 8; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); SDValue L; if (LdWidth < NewVTWidth) { @@ -2566,7 +2625,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, NewVTWidth = NewVT.getSizeInBits(); L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment)); + isNonTemporal, isInvariant, MinAlign(Align, Increment), + TBAAInfo); LdChain.push_back(L.getValue(1)); if (L->getValueType(0).isVector()) { SmallVector Loads; @@ -2582,7 +2642,8 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, } else { L = DAG.getLoad(NewVT, dl, Chain, BasePtr, LD->getPointerInfo().getWithOffset(Offset), isVolatile, - isNonTemporal, isInvariant, MinAlign(Align, Increment)); + isNonTemporal, isInvariant, MinAlign(Align, Increment), + TBAAInfo); LdChain.push_back(L.getValue(1)); } @@ -2646,14 +2707,14 @@ SDValue DAGTypeLegalizer::GenWidenVectorLoads(SmallVector &LdChain, } SDValue -DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector& LdChain, - LoadSDNode * LD, +DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVectorImpl &LdChain, + LoadSDNode *LD, ISD::LoadExtType ExtType) { // For extension loads, it may not be more efficient to chop up the vector // and then extended it. Instead, we unroll the load and build a new vector. EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(),LD->getValueType(0)); EVT LdVT = LD->getMemoryVT(); - DebugLoc dl = LD->getDebugLoc(); + SDLoc dl(LD); assert(LdVT.isVector() && WidenVT.isVector()); // Load information @@ -2662,6 +2723,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector& LdChain, unsigned Align = LD->getAlignment(); bool isVolatile = LD->isVolatile(); bool isNonTemporal = LD->isNonTemporal(); + const MDNode *TBAAInfo = LD->getTBAAInfo(); EVT EltVT = WidenVT.getVectorElementType(); EVT LdEltVT = LdVT.getVectorElementType(); @@ -2673,15 +2735,17 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector& LdChain, unsigned Increment = LdEltVT.getSizeInBits() / 8; Ops[0] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, BasePtr, LD->getPointerInfo(), - LdEltVT, isVolatile, isNonTemporal, Align); + LdEltVT, isVolatile, isNonTemporal, Align, TBAAInfo); LdChain.push_back(Ops[0].getValue(1)); unsigned i = 0, Offset = Increment; for (i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); + BasePtr, + DAG.getConstant(Offset, + BasePtr.getValueType())); Ops[i] = DAG.getExtLoad(ExtType, dl, EltVT, Chain, NewBasePtr, LD->getPointerInfo().getWithOffset(Offset), LdEltVT, - isVolatile, isNonTemporal, Align); + isVolatile, isNonTemporal, Align, TBAAInfo); LdChain.push_back(Ops[i].getValue(1)); } @@ -2694,7 +2758,7 @@ DAGTypeLegalizer::GenWidenVectorExtLoads(SmallVector& LdChain, } -void DAGTypeLegalizer::GenWidenVectorStores(SmallVector& StChain, +void DAGTypeLegalizer::GenWidenVectorStores(SmallVectorImpl &StChain, StoreSDNode *ST) { // The strategy assumes that we can efficiently store powers of two widths. // The routines chops the vector into the largest vector stores with the same @@ -2704,8 +2768,9 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector& StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); - DebugLoc dl = ST->getDebugLoc(); + SDLoc dl(ST); EVT StVT = ST->getMemoryVT(); unsigned StWidth = StVT.getSizeInBits(); @@ -2726,16 +2791,16 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector& StChain, unsigned NumVTElts = NewVT.getVectorNumElements(); do { SDValue EOp = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NewVT, ValOp, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); StWidth -= NewVTWidth; Offset += Increment; Idx += NumVTElts; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); } else { // Cast the vector to the scalar type we can store @@ -2746,15 +2811,15 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector& StChain, Idx = Idx * ValEltWidth / NewVTWidth; do { SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, NewVT, VecOp, - DAG.getIntPtrConstant(Idx++)); + DAG.getConstant(Idx++, TLI.getVectorIdxTy())); StChain.push_back(DAG.getStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo().getWithOffset(Offset), isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); StWidth -= NewVTWidth; Offset += Increment; BasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), BasePtr, - DAG.getIntPtrConstant(Increment)); + DAG.getConstant(Increment, BasePtr.getValueType())); } while (StWidth != 0 && StWidth >= NewVTWidth); // Restore index back to be relative to the original widen element type Idx = Idx * NewVTWidth / ValEltWidth; @@ -2763,7 +2828,7 @@ void DAGTypeLegalizer::GenWidenVectorStores(SmallVector& StChain, } void -DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector& StChain, +DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVectorImpl &StChain, StoreSDNode *ST) { // For extension loads, it may not be more efficient to truncate the vector // and then store it. Instead, we extract each element and then store it. @@ -2772,8 +2837,9 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector& StChain, unsigned Align = ST->getAlignment(); bool isVolatile = ST->isVolatile(); bool isNonTemporal = ST->isNonTemporal(); + const MDNode *TBAAInfo = ST->getTBAAInfo(); SDValue ValOp = GetWidenedVector(ST->getValue()); - DebugLoc dl = ST->getDebugLoc(); + SDLoc dl(ST); EVT StVT = ST->getMemoryVT(); EVT ValVT = ValOp.getValueType(); @@ -2791,20 +2857,22 @@ DAGTypeLegalizer::GenWidenVectorTruncStores(SmallVector& StChain, unsigned Increment = ValEltVT.getSizeInBits() / 8; unsigned NumElts = StVT.getVectorNumElements(); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, BasePtr, ST->getPointerInfo(), StEltVT, - isVolatile, isNonTemporal, Align)); + isVolatile, isNonTemporal, Align, + TBAAInfo)); unsigned Offset = Increment; for (unsigned i=1; i < NumElts; ++i, Offset += Increment) { SDValue NewBasePtr = DAG.getNode(ISD::ADD, dl, BasePtr.getValueType(), - BasePtr, DAG.getIntPtrConstant(Offset)); + BasePtr, DAG.getConstant(Offset, + BasePtr.getValueType())); SDValue EOp = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, ValEltVT, ValOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); StChain.push_back(DAG.getTruncStore(Chain, dl, EOp, NewBasePtr, ST->getPointerInfo().getWithOffset(Offset), StEltVT, isVolatile, isNonTemporal, - MinAlign(Align, Offset))); + MinAlign(Align, Offset), TBAAInfo)); } } @@ -2816,7 +2884,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { EVT InVT = InOp.getValueType(); assert(InVT.getVectorElementType() == NVT.getVectorElementType() && "input and widen element type must match"); - DebugLoc dl = InOp.getDebugLoc(); + SDLoc dl(InOp); // Check if InOp already has the right width. if (InVT == NVT) @@ -2837,7 +2905,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { if (WidenNumElts < InNumElts && InNumElts % WidenNumElts) return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, InOp, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); // Fall back to extract and build. SmallVector Ops(WidenNumElts); @@ -2846,7 +2914,7 @@ SDValue DAGTypeLegalizer::ModifyToType(SDValue InOp, EVT NVT) { unsigned Idx; for (Idx = 0; Idx < MinNumElts; ++Idx) Ops[Idx] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, InOp, - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI.getVectorIdxTy())); SDValue UndefVal = DAG.getUNDEF(EltVT); for ( ; Idx < WidenNumElts; ++Idx) diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index 473e138..1dd2128 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -42,11 +42,11 @@ static cl::opt RegPressureThreshold( ResourcePriorityQueue::ResourcePriorityQueue(SelectionDAGISel *IS) : Picker(this), - InstrItins(IS->getTargetLowering().getTargetMachine().getInstrItineraryData()) + InstrItins(IS->getTargetLowering()->getTargetMachine().getInstrItineraryData()) { - TII = IS->getTargetLowering().getTargetMachine().getInstrInfo(); - TRI = IS->getTargetLowering().getTargetMachine().getRegisterInfo(); - TLI = &IS->getTargetLowering(); + TII = IS->getTargetLowering()->getTargetMachine().getInstrInfo(); + TRI = IS->getTargetLowering()->getTargetMachine().getRegisterInfo(); + TLI = IS->getTargetLowering(); const TargetMachine &tm = (*IS->MF).getTarget(); ResourcesModel = tm.getInstrInfo()->CreateTargetScheduleState(&tm,NULL); @@ -389,10 +389,9 @@ signed ResourcePriorityQueue::regPressureDelta(SUnit *SU, bool RawPressure) { // Constants used to denote relative importance of // heuristic components for cost computation. static const unsigned PriorityOne = 200; -static const unsigned PriorityTwo = 100; -static const unsigned PriorityThree = 50; -static const unsigned PriorityFour = 15; -static const unsigned PriorityFive = 5; +static const unsigned PriorityTwo = 50; +static const unsigned PriorityThree = 15; +static const unsigned PriorityFour = 5; static const unsigned ScaleOne = 20; static const unsigned ScaleTwo = 10; static const unsigned ScaleThree = 5; @@ -449,7 +448,7 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { if (N->isMachineOpcode()) { const MCInstrDesc &TID = TII->get(N->getMachineOpcode()); if (TID.isCall()) - ResCount += (PriorityThree + (ScaleThree*N->getNumValues())); + ResCount += (PriorityTwo + (ScaleThree*N->getNumValues())); } else switch (N->getOpcode()) { @@ -457,11 +456,11 @@ signed ResourcePriorityQueue::SUSchedulingCost(SUnit *SU) { case ISD::TokenFactor: case ISD::CopyFromReg: case ISD::CopyToReg: - ResCount += PriorityFive; + ResCount += PriorityFour; break; case ISD::INLINEASM: - ResCount += PriorityFour; + ResCount += PriorityThree; break; } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h deleted file mode 100644 index 7e7b897..0000000 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SDNodeOrdering.h +++ /dev/null @@ -1,56 +0,0 @@ -//===-- llvm/CodeGen/SDNodeOrdering.h - SDNode Ordering ---------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares the SDNodeOrdering class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_SDNODEORDERING_H -#define LLVM_CODEGEN_SDNODEORDERING_H - -#include "llvm/ADT/DenseMap.h" - -namespace llvm { - -class SDNode; - -/// SDNodeOrdering - Maps a unique (monotonically increasing) value to each -/// SDNode that roughly corresponds to the ordering of the original LLVM -/// instruction. This is used for turning off scheduling, because we'll forgo -/// the normal scheduling algorithms and output the instructions according to -/// this ordering. -class SDNodeOrdering { - DenseMap OrderMap; - - void operator=(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; - SDNodeOrdering(const SDNodeOrdering&) LLVM_DELETED_FUNCTION; -public: - SDNodeOrdering() {} - - void add(const SDNode *Node, unsigned NewOrder) { - unsigned &OldOrder = OrderMap[Node]; - if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder)) - OldOrder = NewOrder; - } - void remove(const SDNode *Node) { - DenseMap::iterator Itr = OrderMap.find(Node); - if (Itr != OrderMap.end()) - OrderMap.erase(Itr); - } - void clear() { - OrderMap.clear(); - } - unsigned getOrder(const SDNode *Node) { - return OrderMap[Node]; - } -}; - -} // end llvm namespace - -#endif diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index d1f36cb..6c5e0ab 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -102,8 +102,8 @@ private: void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, - SmallVector&); - bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&); + SmallVectorImpl&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl&); void ListScheduleBottomUp(); /// forceUnitLatencies - The fast scheduler doesn't care about real latencies. @@ -387,7 +387,7 @@ SUnit *ScheduleDAGFast::CopyAndMoveSuccessors(SUnit *SU) { void ScheduleDAGFast::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, const TargetRegisterClass *DestRC, const TargetRegisterClass *SrcRC, - SmallVector &Copies) { + SmallVectorImpl &Copies) { SUnit *CopyFromSU = newSUnit(static_cast(NULL)); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; @@ -448,7 +448,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, std::vector &LiveRegDefs, SmallSet &RegAdded, - SmallVector &LRegs, + SmallVectorImpl &LRegs, const TargetRegisterInfo *TRI) { bool Added = false; for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { @@ -467,7 +467,7 @@ static bool CheckForLiveRegDef(SUnit *SU, unsigned Reg, /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, - SmallVector &LRegs){ + SmallVectorImpl &LRegs){ if (NumLiveRegs == 0) return false; @@ -567,7 +567,7 @@ void ScheduleDAGFast::ListScheduleBottomUp() { // "expensive to copy" values to break the dependency. In case even // that doesn't work, insert cross class copies. SUnit *TrySU = NotReady[0]; - SmallVector &LRegs = LRegsMap[TrySU]; + SmallVectorImpl &LRegs = LRegsMap[TrySU]; assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index c009cfc..1a562d7 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -229,8 +229,8 @@ private: void InsertCopiesAndMoveSuccs(SUnit*, unsigned, const TargetRegisterClass*, const TargetRegisterClass*, - SmallVector&); - bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&); + SmallVectorImpl&); + bool DelayForLiveRegsBottomUp(SUnit*, SmallVectorImpl&); void releaseInterferences(unsigned Reg = 0); @@ -718,7 +718,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { // indicate the scheduled cycle. SU->setHeightToAtLeast(CurCycle); - // Reserve resources for the scheduled intruction. + // Reserve resources for the scheduled instruction. EmitNode(SU); Sequence.push_back(SU); @@ -1133,9 +1133,9 @@ SUnit *ScheduleDAGRRList::CopyAndMoveSuccessors(SUnit *SU) { /// InsertCopiesAndMoveSuccs - Insert register copies and move all /// scheduled successors of the given SUnit to the last copy. void ScheduleDAGRRList::InsertCopiesAndMoveSuccs(SUnit *SU, unsigned Reg, - const TargetRegisterClass *DestRC, - const TargetRegisterClass *SrcRC, - SmallVector &Copies) { + const TargetRegisterClass *DestRC, + const TargetRegisterClass *SrcRC, + SmallVectorImpl &Copies) { SUnit *CopyFromSU = CreateNewSUnit(NULL); CopyFromSU->CopySrcRC = SrcRC; CopyFromSU->CopyDstRC = DestRC; @@ -1205,7 +1205,7 @@ static EVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, std::vector &LiveRegDefs, SmallSet &RegAdded, - SmallVector &LRegs, + SmallVectorImpl &LRegs, const TargetRegisterInfo *TRI) { for (MCRegAliasIterator AliasI(Reg, TRI, true); AliasI.isValid(); ++AliasI) { @@ -1227,7 +1227,7 @@ static void CheckForLiveRegDef(SUnit *SU, unsigned Reg, static void CheckForLiveRegDefMasked(SUnit *SU, const uint32_t *RegMask, std::vector &LiveRegDefs, SmallSet &RegAdded, - SmallVector &LRegs) { + SmallVectorImpl &LRegs) { // Look at all live registers. Skip Reg0 and the special CallResource. for (unsigned i = 1, e = LiveRegDefs.size()-1; i != e; ++i) { if (!LiveRegDefs[i]) continue; @@ -1252,7 +1252,7 @@ static const uint32_t *getNodeRegMask(const SDNode *N) { /// If the specific node is the last one that's available to schedule, do /// whatever is necessary (i.e. backtracking or cloning) to make it possible. bool ScheduleDAGRRList:: -DelayForLiveRegsBottomUp(SUnit *SU, SmallVector &LRegs) { +DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { if (NumLiveRegs == 0) return false; @@ -1331,7 +1331,7 @@ void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { SUnit *SU = Interferences[i-1]; LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); if (Reg) { - SmallVector &LRegs = LRegsPos->second; + SmallVectorImpl &LRegs = LRegsPos->second; if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) continue; } @@ -1385,7 +1385,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // to resolve it. for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { SUnit *TrySU = Interferences[i]; - SmallVector &LRegs = LRegsMap[TrySU]; + SmallVectorImpl &LRegs = LRegsMap[TrySU]; // Try unscheduling up to the point where it's safe to schedule // this node. @@ -1433,7 +1433,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { // insert cross class copies. // If it's not too expensive, i.e. cost != -1, issue copies. SUnit *TrySU = Interferences[0]; - SmallVector &LRegs = LRegsMap[TrySU]; + SmallVectorImpl &LRegs = LRegsMap[TrySU]; assert(LRegs.size() == 1 && "Can't handle this yet!"); unsigned Reg = LRegs[0]; SUnit *LRDef = LiveRegDefs[Reg]; @@ -1692,7 +1692,7 @@ public: unsigned getNodeOrdering(const SUnit *SU) const { if (!SU->getNode()) return 0; - return scheduleDAG->DAG->GetOrdering(SU->getNode()); + return SU->getNode()->getIROrder(); } bool empty() const { return Queue.empty(); } @@ -2401,7 +2401,8 @@ static bool BURRSort(SUnit *left, SUnit *right, RegReductionPQBase *SPQ) { bool RHasPhysReg = right->hasPhysRegDefs; if (LHasPhysReg != RHasPhysReg) { #ifndef NDEBUG - const char *const PhysRegMsg[] = {" has no physreg"," defines a physreg"}; + static const char *const PhysRegMsg[] = { " has no physreg", + " defines a physreg" }; #endif DEBUG(dbgs() << " SU (" << left->NodeNum << ") " << PhysRegMsg[LHasPhysReg] << " SU(" << right->NodeNum << ") " @@ -3013,7 +3014,7 @@ llvm::createHybridListDAGScheduler(SelectionDAGISel *IS, const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = &IS->getTargetLowering(); + const TargetLowering *TLI = IS->getTargetLowering(); HybridBURRPriorityQueue *PQ = new HybridBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); @@ -3029,7 +3030,7 @@ llvm::createILPListDAGScheduler(SelectionDAGISel *IS, const TargetMachine &TM = IS->TM; const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - const TargetLowering *TLI = &IS->getTargetLowering(); + const TargetLowering *TLI = IS->getTargetLowering(); ILPBURRPriorityQueue *PQ = new ILPBURRPriorityQueue(*IS->MF, true, false, TII, TRI, TLI); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index b22440d..054e3dd 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -690,21 +690,11 @@ void ScheduleDAGSDNodes::VerifyScheduledSequence(bool isBottomUp) { } #endif // NDEBUG -namespace { - struct OrderSorter { - bool operator()(const std::pair &A, - const std::pair &B) { - return A.first < B.first; - } - }; -} - /// ProcessSDDbgValues - Process SDDbgValues associated with this node. -static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, - InstrEmitter &Emitter, - SmallVector, 32> &Orders, - DenseMap &VRBaseMap, - unsigned Order) { +static void +ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + SmallVectorImpl > &Orders, + DenseMap &VRBaseMap, unsigned Order) { if (!N->getHasDebugValue()) return; @@ -731,12 +721,12 @@ static void ProcessSDDbgValues(SDNode *N, SelectionDAG *DAG, // ProcessSourceNode - Process nodes with source order numbers. These are added // to a vector which EmitSchedule uses to determine how to insert dbg_value // instructions in the right order. -static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, - InstrEmitter &Emitter, - DenseMap &VRBaseMap, - SmallVector, 32> &Orders, - SmallSet &Seen) { - unsigned Order = DAG->GetOrdering(N); +static void +ProcessSourceNode(SDNode *N, SelectionDAG *DAG, InstrEmitter &Emitter, + DenseMap &VRBaseMap, + SmallVectorImpl > &Orders, + SmallSet &Seen) { + unsigned Order = N->getIROrder(); if (!Order || !Seen.insert(Order)) { // Process any valid SDDbgValues even if node does not have any order // assigned. @@ -745,7 +735,10 @@ static void ProcessSourceNode(SDNode *N, SelectionDAG *DAG, } MachineBasicBlock *BB = Emitter.getBlock(); - if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI()) { + if (Emitter.getInsertPos() == BB->begin() || BB->back().isPHI() || + // Fast-isel may have inserted some instructions, in which case the + // BB->back().isPHI() test will not fire when we want it to. + prior(Emitter.getInsertPos())->isPHI()) { // Did not insert any instruction. Orders.push_back(std::make_pair(Order, (MachineInstr*)0)); return; @@ -858,7 +851,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Sort the source order instructions and use the order to insert debug // values. - std::sort(Orders.begin(), Orders.end(), OrderSorter()); + std::sort(Orders.begin(), Orders.end(), less_first()); SDDbgInfo::DbgIterator DI = DAG->DbgBegin(); SDDbgInfo::DbgIterator DE = DAG->DbgEnd(); @@ -883,7 +876,7 @@ EmitSchedule(MachineBasicBlock::iterator &InsertPos) { // Insert at the instruction, which may be in a different // block, if the block was split by a custom inserter. MachineBasicBlock::iterator Pos = MI; - MI->getParent()->insert(llvm::next(Pos), DbgMI); + MI->getParent()->insert(Pos, DbgMI); } } } diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 15235c8..45d5a4f 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -13,7 +13,6 @@ #include "llvm/CodeGen/SelectionDAG.h" #include "SDNodeDbgValue.h" -#include "SDNodeOrdering.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" @@ -636,9 +635,6 @@ void SelectionDAG::DeallocateNode(SDNode *N) { NodeAllocator.Deallocate(AllNodes.remove(N)); - // Remove the ordering of this node. - Ordering->remove(N); - // If any of the SDDbgValue nodes refer to this SDNode, invalidate them. ArrayRef DbgVals = DbgInfo->getSDDbgValues(N); for (unsigned i = 0, e = DbgVals.size(); i != e; ++i) @@ -868,30 +864,30 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { PointerType::get(Type::getInt8Ty(*getContext()), 0) : VT.getTypeForEVT(*getContext()); - return TLI.getDataLayout()->getABITypeAlignment(Ty); + return TM.getTargetLowering()->getDataLayout()->getABITypeAlignment(Ty); } // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) - : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), - getVTList(MVT::Other)), - Root(getEntryNode()), Ordering(0), UpdateListeners(0) { + : TM(tm), TSI(*tm.getSelectionDAGInfo()), TTI(0), TLI(0), OptLevel(OL), + EntryNode(ISD::EntryToken, 0, DebugLoc(), getVTList(MVT::Other)), + Root(getEntryNode()), NewNodesMustHaveLegalTypes(false), + UpdateListeners(0) { AllNodes.push_back(&EntryNode); - Ordering = new SDNodeOrdering(); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) { +void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti, + const TargetLowering *tli) { MF = &mf; TTI = tti; + TLI = tli; Context = &mf.getFunction()->getContext(); } SelectionDAG::~SelectionDAG() { assert(!UpdateListeners && "Dangling registered DAGUpdateListeners"); allnodes_clear(); - delete Ordering; delete DbgInfo; } @@ -918,29 +914,28 @@ void SelectionDAG::clear() { EntryNode.UseList = 0; AllNodes.push_back(&EntryNode); Root = getEntryNode(); - Ordering->clear(); DbgInfo->clear(); } -SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getAnyExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ANY_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::SIGN_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, SDLoc DL, EVT VT) { return VT.bitsGT(Op.getValueType()) ? getNode(ISD::ZERO_EXTEND, DL, VT, Op) : getNode(ISD::TRUNCATE, DL, VT, Op); } -SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, SDLoc DL, EVT VT) { assert(!VT.isVector() && "getZeroExtendInReg should use the vector element type instead of " "the vector type!"); @@ -954,7 +949,7 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) { /// getNOT - Create a bitwise NOT operation as (XOR Val, -1). /// -SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) { +SDValue SelectionDAG::getNOT(SDLoc DL, SDValue Val, EVT VT) { EVT EltVT = VT.getScalarType(); SDValue NegOne = getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT); @@ -979,16 +974,66 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { EVT EltVT = VT.getScalarType(); const ConstantInt *Elt = &Val; + const TargetLowering *TLI = TM.getTargetLowering(); + // In some cases the vector type is legal but the element type is illegal and // needs to be promoted, for example v8i8 on ARM. In this case, promote the // inserted value (the type does not need to match the vector element type). // Any extra bits introduced will be truncated away. - if (VT.isVector() && TLI.getTypeAction(*getContext(), EltVT) == + if (VT.isVector() && TLI->getTypeAction(*getContext(), EltVT) == TargetLowering::TypePromoteInteger) { - EltVT = TLI.getTypeToTransformTo(*getContext(), EltVT); + EltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); APInt NewVal = Elt->getValue().zext(EltVT.getSizeInBits()); Elt = ConstantInt::get(*getContext(), NewVal); } + // In other cases the element type is illegal and needs to be expanded, for + // example v2i64 on MIPS32. In this case, find the nearest legal type, split + // the value into n parts and use a vector type with n-times the elements. + // Then bitcast to the type requested. + // Legalizing constants too early makes the DAGCombiner's job harder so we + // only legalize if the DAG tells us we must produce legal types. + else if (NewNodesMustHaveLegalTypes && VT.isVector() && + TLI->getTypeAction(*getContext(), EltVT) == + TargetLowering::TypeExpandInteger) { + APInt NewVal = Elt->getValue(); + EVT ViaEltVT = TLI->getTypeToTransformTo(*getContext(), EltVT); + unsigned ViaEltSizeInBits = ViaEltVT.getSizeInBits(); + unsigned ViaVecNumElts = VT.getSizeInBits() / ViaEltSizeInBits; + EVT ViaVecVT = EVT::getVectorVT(*getContext(), ViaEltVT, ViaVecNumElts); + + // Check the temporary vector is the correct size. If this fails then + // getTypeToTransformTo() probably returned a type whose size (in bits) + // isn't a power-of-2 factor of the requested type size. + assert(ViaVecVT.getSizeInBits() == VT.getSizeInBits()); + + SmallVector EltParts; + for (unsigned i = 0; i < ViaVecNumElts / VT.getVectorNumElements(); ++i) { + EltParts.push_back(getConstant(NewVal.lshr(i * ViaEltSizeInBits) + .trunc(ViaEltSizeInBits), + ViaEltVT, isT)); + } + + // EltParts is currently in little endian order. If we actually want + // big-endian order then reverse it now. + if (TLI->isBigEndian()) + std::reverse(EltParts.begin(), EltParts.end()); + + // The elements must be reversed when the element order is different + // to the endianness of the elements (because the BITCAST is itself a + // vector shuffle in this situation). However, we do not need any code to + // perform this reversal because getConstant() is producing a vector + // splat. + // This situation occurs in MIPS MSA. + + SmallVector Ops; + for (unsigned i = 0; i < VT.getVectorNumElements(); ++i) + Ops.insert(Ops.end(), EltParts.begin(), EltParts.end()); + + SDValue Result = getNode(ISD::BITCAST, SDLoc(), VT, + getNode(ISD::BUILD_VECTOR, SDLoc(), ViaVecVT, + &Ops[0], Ops.size())); + return Result; + } assert(Elt->getBitWidth() == EltVT.getSizeInBits() && "APInt size does not match type size!"); @@ -1012,13 +1057,13 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) { if (VT.isVector()) { SmallVector Ops; Ops.assign(VT.getVectorNumElements(), Result); - Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size()); + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); } return Result; } SDValue SelectionDAG::getIntPtrConstant(uint64_t Val, bool isTarget) { - return getConstant(Val, TLI.getPointerTy(), isTarget); + return getConstant(Val, TM.getTargetLowering()->getPointerTy(), isTarget); } @@ -1054,8 +1099,8 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){ if (VT.isVector()) { SmallVector Ops; Ops.assign(VT.getVectorNumElements(), Result); - // FIXME DebugLoc info might be appropriate here - Result = getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, &Ops[0], Ops.size()); + // FIXME SDLoc info might be appropriate here + Result = getNode(ISD::BUILD_VECTOR, SDLoc(), VT, &Ops[0], Ops.size()); } return Result; } @@ -1077,15 +1122,16 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { llvm_unreachable("Unsupported type in getConstantFP"); } -SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, +SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, SDLoc DL, EVT VT, int64_t Offset, bool isTargetGA, unsigned char TargetFlags) { assert((TargetFlags == 0 || isTargetGA) && "Cannot set target flags on target-independent globals"); + const TargetLowering *TLI = TM.getTargetLowering(); // Truncate (with sign-extension) the offset value to the pointer size. - unsigned BitWidth = TLI.getPointerTy().getSizeInBits(); + unsigned BitWidth = TLI->getPointerTypeSizeInBits(GV->getType()); if (BitWidth < 64) Offset = SignExtend64(Offset, BitWidth); @@ -1112,7 +1158,8 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV, DebugLoc DL, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL, GV, VT, + SDNode *N = new (NodeAllocator) GlobalAddressSDNode(Opc, DL.getIROrder(), + DL.getDebugLoc(), GV, VT, Offset, TargetFlags); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -1161,7 +1208,8 @@ SDValue SelectionDAG::getConstantPool(const Constant *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = + TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1188,7 +1236,8 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT, assert((TargetFlags == 0 || isTarget) && "Cannot set target flags on target-independent globals"); if (Alignment == 0) - Alignment = TLI.getDataLayout()->getPrefTypeAlignment(C->getType()); + Alignment = + TM.getTargetLowering()->getDataLayout()->getPrefTypeAlignment(C->getType()); unsigned Opc = isTarget ? ISD::TargetConstantPool : ISD::ConstantPool; FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0); @@ -1299,13 +1348,10 @@ static void commuteShuffle(SDValue &N1, SDValue &N2, SmallVectorImpl &M) { } } -SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, +SDValue SelectionDAG::getVectorShuffle(EVT VT, SDLoc dl, SDValue N1, SDValue N2, const int *Mask) { - assert(N1.getValueType() == N2.getValueType() && "Invalid VECTOR_SHUFFLE"); - assert(VT.isVector() && N1.getValueType().isVector() && - "Vector Shuffle VTs must be a vectors"); - assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() - && "Vector Shuffle VTs must have same element type"); + assert(VT == N1.getValueType() && VT == N2.getValueType() && + "Invalid VECTOR_SHUFFLE"); // Canonicalize shuffle undef, undef -> undef if (N1.getOpcode() == ISD::UNDEF && N2.getOpcode() == ISD::UNDEF) @@ -1354,17 +1400,13 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, commuteShuffle(N1, N2, MaskVec); } - // If Identity shuffle, or all shuffle in to undef, return that node. - bool AllUndef = true; + // If Identity shuffle return that node. bool Identity = true; for (unsigned i = 0; i != NElts; ++i) { if (MaskVec[i] >= 0 && MaskVec[i] != (int)i) Identity = false; - if (MaskVec[i] >= 0) AllUndef = false; } - if (Identity && NElts == N1.getValueType().getVectorNumElements()) + if (Identity && NElts) return N1; - if (AllUndef) - return getUNDEF(VT); FoldingSetNodeID ID; SDValue Ops[2] = { N1, N2 }; @@ -1383,13 +1425,15 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1, memcpy(MaskAlloc, &MaskVec[0], NElts * sizeof(int)); ShuffleVectorSDNode *N = - new (NodeAllocator) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc); + new (NodeAllocator) ShuffleVectorSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), N1, N2, + MaskAlloc); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, +SDValue SelectionDAG::getConvertRndSat(EVT VT, SDLoc dl, SDValue Val, SDValue DTy, SDValue STy, SDValue Rnd, SDValue Sat, ISD::CvtCode Code) { @@ -1406,8 +1450,9 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl, Ops, 5, - Code); + CvtRndSatSDNode *N = new (NodeAllocator) CvtRndSatSDNode(VT, dl.getIROrder(), + dl.getDebugLoc(), + Ops, 5, Code); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1441,7 +1486,7 @@ SDValue SelectionDAG::getRegisterMask(const uint32_t *RegMask) { return SDValue(N, 0); } -SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { +SDValue SelectionDAG::getEHLabel(SDLoc dl, SDValue Root, MCSymbol *Label) { FoldingSetNodeID ID; SDValue Ops[] = { Root }; AddNodeIDNode(ID, ISD::EH_LABEL, getVTList(MVT::Other), &Ops[0], 1); @@ -1450,7 +1495,8 @@ SDValue SelectionDAG::getEHLabel(DebugLoc dl, SDValue Root, MCSymbol *Label) { if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) EHLabelSDNode(dl, Root, Label); + SDNode *N = new (NodeAllocator) EHLabelSDNode(dl.getIROrder(), + dl.getDebugLoc(), Root, Label); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); @@ -1513,16 +1559,36 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { return SDValue(N, 0); } +/// getAddrSpaceCast - Return an AddrSpaceCastSDNode. +SDValue SelectionDAG::getAddrSpaceCast(SDLoc dl, EVT VT, SDValue Ptr, + unsigned SrcAS, unsigned DestAS) { + SDValue Ops[] = {Ptr}; + FoldingSetNodeID ID; + AddNodeIDNode(ID, ISD::ADDRSPACECAST, getVTList(VT), &Ops[0], 1); + ID.AddInteger(SrcAS); + ID.AddInteger(DestAS); + + void *IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) + return SDValue(E, 0); + + SDNode *N = new (NodeAllocator) AddrSpaceCastSDNode(dl.getIROrder(), + dl.getDebugLoc(), + VT, Ptr, SrcAS, DestAS); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} /// getShiftAmountOperand - Return the specified value casted to /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - EVT ShTy = TLI.getShiftAmountTy(LHSTy); + EVT ShTy = TM.getTargetLowering()->getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; - return getNode(Opcode, Op.getDebugLoc(), ShTy, Op); + return getNode(Opcode, SDLoc(Op), ShTy, Op); } /// CreateStackTemporary - Create a stack temporary, suitable for holding the @@ -1531,11 +1597,12 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) { MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); unsigned ByteSize = VT.getStoreSize(); Type *Ty = VT.getTypeForEVT(*getContext()); + const TargetLowering *TLI = TM.getTargetLowering(); unsigned StackAlign = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), minAlign); + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), minAlign); int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); - return getFrameIndex(FrameIdx, TLI.getPointerTy()); + return getFrameIndex(FrameIdx, TLI->getPointerTy()); } /// CreateStackTemporary - Create a stack temporary suitable for holding @@ -1545,24 +1612,30 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) { VT2.getStoreSizeInBits())/8; Type *Ty1 = VT1.getTypeForEVT(*getContext()); Type *Ty2 = VT2.getTypeForEVT(*getContext()); - const DataLayout *TD = TLI.getDataLayout(); + const TargetLowering *TLI = TM.getTargetLowering(); + const DataLayout *TD = TLI->getDataLayout(); unsigned Align = std::max(TD->getPrefTypeAlignment(Ty1), TD->getPrefTypeAlignment(Ty2)); MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo(); int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false); - return getFrameIndex(FrameIdx, TLI.getPointerTy()); + return getFrameIndex(FrameIdx, TLI->getPointerTy()); } SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, - SDValue N2, ISD::CondCode Cond, DebugLoc dl) { + SDValue N2, ISD::CondCode Cond, SDLoc dl) { // These setcc operations always fold. switch (Cond) { default: break; case ISD::SETFALSE: case ISD::SETFALSE2: return getConstant(0, VT); case ISD::SETTRUE: - case ISD::SETTRUE2: return getConstant(1, VT); + case ISD::SETTRUE2: { + const TargetLowering *TLI = TM.getTargetLowering(); + TargetLowering::BooleanContent Cnt = TLI->getBooleanContents(VT.isVector()); + return getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + } case ISD::SETOEQ: case ISD::SETOGT: @@ -1644,7 +1717,12 @@ SDValue SelectionDAG::FoldSetCC(EVT VT, SDValue N1, } } else { // Ensure that the constant occurs on the RHS. - return getSetCC(dl, VT, N2, N1, ISD::getSetCCSwappedOperands(Cond)); + ISD::CondCode SwappedCond = ISD::getSetCCSwappedOperands(Cond); + MVT CompVT = N1.getValueType().getSimpleVT(); + if (!TM.getTargetLowering()->isCondCodeLegal(SwappedCond, CompVT)) + return SDValue(); + + return getSetCC(dl, VT, N2, N1, SwappedCond); } } @@ -1680,6 +1758,7 @@ bool SelectionDAG::MaskedValueIsZero(SDValue Op, const APInt &Mask, /// processing. void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, APInt &KnownOne, unsigned Depth) const { + const TargetLowering *TLI = TM.getTargetLowering(); unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits(); KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything. @@ -1802,7 +1881,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, // The boolean result conforms to getBooleanContents. Fall through. case ISD::SETCC: // If we know the result of a setcc has the top bits zero, use this info. - if (TLI.getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getValueType().isVector()) == TargetLowering::ZeroOrOneBooleanContent && BitWidth > 1) KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); return; @@ -1942,7 +2021,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::SIGN_EXTEND: { EVT InVT = Op.getOperand(0).getValueType(); unsigned InBits = InVT.getScalarType().getSizeInBits(); - APInt InSignBit = APInt::getSignBit(InBits); APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits); KnownZero = KnownZero.trunc(InBits); @@ -2054,7 +2132,6 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, const APInt &RA = Rem->getAPIntValue().abs(); if (RA.isPowerOf2()) { APInt LowBits = RA - 1; - APInt Mask2 = LowBits | APInt::getSignBit(BitWidth); ComputeMaskedBits(Op.getOperand(0), KnownZero2,KnownOne2,Depth+1); // The low bits of the first operand are unchanged by the srem. @@ -2114,7 +2191,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, case ISD::INTRINSIC_W_CHAIN: case ISD::INTRINSIC_VOID: // Allow the target to implement this method for its nodes. - TLI.computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); + TLI->computeMaskedBitsForTargetNode(Op, KnownZero, KnownOne, *this, Depth); return; } } @@ -2125,6 +2202,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, /// information. For example, immediately after an "SRA X, 2", we know that /// the top 3 bits are all equal to each other, so we return 3. unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ + const TargetLowering *TLI = TM.getTargetLowering(); EVT VT = Op.getValueType(); assert(VT.isInteger() && "Invalid VT!"); unsigned VTBits = VT.getScalarType().getSizeInBits(); @@ -2149,7 +2227,8 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ } case ISD::SIGN_EXTEND: - Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); + Tmp = + VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits(); return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp; case ISD::SIGN_EXTEND_INREG: @@ -2209,7 +2288,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ // The boolean result conforms to getBooleanContents. Fall through. case ISD::SETCC: // If setcc returns 0/-1, all bits are sign bits. - if (TLI.getBooleanContents(Op.getValueType().isVector()) == + if (TLI->getBooleanContents(Op.getValueType().isVector()) == TargetLowering::ZeroOrNegativeOneBooleanContent) return VTBits; break; @@ -2310,7 +2389,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ Op.getOpcode() == ISD::INTRINSIC_WO_CHAIN || Op.getOpcode() == ISD::INTRINSIC_W_CHAIN || Op.getOpcode() == ISD::INTRINSIC_VOID) { - unsigned NumBits = TLI.ComputeNumSignBitsForTargetNode(Op, Depth); + unsigned NumBits = TLI->ComputeNumSignBitsForTargetNode(Op, Depth); if (NumBits > 1) FirstAnswer = std::max(FirstAnswer, NumBits); } @@ -2403,14 +2482,15 @@ bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const { /// getNode - Gets or creates the specified node. /// -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT) { FoldingSetNodeID ID; AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0); void *IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) SDNode(Opcode, DL, getVTList(VT)); + SDNode *N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), getVTList(VT)); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); @@ -2420,7 +2500,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) { return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue Operand) { // Constant fold unary operations with an integer constant operand. if (ConstantSDNode *C = dyn_cast(Operand.getNode())) { @@ -2671,10 +2751,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTs, Operand); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, Operand); } AllNodes.push_back(N); @@ -2789,11 +2871,11 @@ SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, return Outputs.back(); // Otherwise build a big vector out of the scalar elements we generated. - return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(), + return getNode(ISD::BUILD_VECTOR, SDLoc(), VT, Outputs.data(), Outputs.size()); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2) { ConstantSDNode *N1C = dyn_cast(N1.getNode()); ConstantSDNode *N2C = dyn_cast(N2.getNode()); @@ -3072,9 +3154,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, if (VT.isSimple() && N1.getValueType().isSimple()) { assert(VT.isVector() && N1.getValueType().isVector() && "Extract subvector VTs must be a vectors!"); - assert(VT.getVectorElementType() == N1.getValueType().getVectorElementType() && + assert(VT.getVectorElementType() == + N1.getValueType().getVectorElementType() && "Extract subvector VTs must have the same element type!"); - assert(VT.getSimpleVT() <= N1.getValueType().getSimpleVT() && + assert(VT.getSimpleVT() <= N1.getSimpleValueType() && "Extract subvector must be from larger vector to smaller vector!"); if (isa(Index.getNode())) { @@ -3085,7 +3168,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, } // Trivial extraction. - if (VT.getSimpleVT() == N1.getValueType().getSimpleVT()) + if (VT.getSimpleVT() == N1.getSimpleValueType()) return N1; } break; @@ -3243,10 +3326,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTs, N1, N2); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2); } AllNodes.push_back(N); @@ -3256,11 +3341,26 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3) { // Perform various simplifications. ConstantSDNode *N1C = dyn_cast(N1.getNode()); switch (Opcode) { + case ISD::FMA: { + ConstantFPSDNode *N1CFP = dyn_cast(N1); + ConstantFPSDNode *N2CFP = dyn_cast(N2); + ConstantFPSDNode *N3CFP = dyn_cast(N3); + if (N1CFP && N2CFP && N3CFP) { + APFloat V1 = N1CFP->getValueAPF(); + const APFloat &V2 = N2CFP->getValueAPF(); + const APFloat &V3 = N3CFP->getValueAPF(); + APFloat::opStatus s = + V1.fusedMultiplyAdd(V2, V3, APFloat::rmNearestTiesToEven); + if (s != APFloat::opInvalidOp) + return getConstantFP(V1, VT); + } + break; + } case ISD::CONCAT_VECTORS: // A CONCAT_VECTOR with all operands BUILD_VECTOR can be simplified to // one big BUILD_VECTOR. @@ -3300,7 +3400,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, "Insert subvector VTs must be a vectors"); assert(VT == N1.getValueType() && "Dest and insert subvector source types must match!"); - assert(N2.getValueType().getSimpleVT() <= N1.getValueType().getSimpleVT() && + assert(N2.getSimpleValueType() <= N1.getSimpleValueType() && "Insert subvector must be from smaller vector to larger vector!"); if (isa(Index.getNode())) { assert((N2.getValueType().getVectorNumElements() + @@ -3310,7 +3410,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } // Trivial insertion. - if (VT.getSimpleVT() == N2.getValueType().getSimpleVT()) + if (VT.getSimpleVT() == N2.getSimpleValueType()) return N2; } break; @@ -3333,10 +3433,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTs, N1, N2, N3); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs, N1, N2, N3); } AllNodes.push_back(N); @@ -3346,14 +3448,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VT, Ops, 4); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; @@ -3379,14 +3481,14 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { ArgChains.push_back(SDValue(L, 1)); // Build a tokenfactor for all the chains. - return getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + return getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, &ArgChains[0], ArgChains.size()); } /// getMemsetValue - Vectorized representation of the memset value /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, - DebugLoc dl) { + SDLoc dl) { assert(Value.getOpcode() != ISD::UNDEF); unsigned NumBits = VT.getScalarType().getSizeInBits(); @@ -3412,7 +3514,7 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, /// getMemsetStringVal - Similar to getMemsetValue. Except this is only /// used when a memcpy is turned into a memset when the source is a constant /// string ptr. -static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, +static SDValue getMemsetStringVal(EVT VT, SDLoc dl, SelectionDAG &DAG, const TargetLowering &TLI, StringRef Str) { // Handle vector with all elements zero. if (Str.empty()) { @@ -3454,10 +3556,10 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, /// getMemBasePlusOffset - Returns base and offset node for the /// -static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, +static SDValue getMemBasePlusOffset(SDValue Base, unsigned Offset, SDLoc dl, SelectionDAG &DAG) { EVT VT = Base.getValueType(); - return DAG.getNode(ISD::ADD, Base.getDebugLoc(), + return DAG.getNode(ISD::ADD, dl, VT, Base, DAG.getConstant(Offset, VT)); } @@ -3585,7 +3687,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, return true; } -static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, +static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, bool isVol, @@ -3630,7 +3732,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); // Don't promote to an alignment that would require dynamic stack - // realignment. + // realignment. const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); if (!TRI->needsStackRealignment(MF)) while (NewAlign > Align && @@ -3671,7 +3773,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); if (Value.getNode()) Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); } @@ -3685,11 +3787,11 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); assert(NVT.bitsGE(VT)); Value = DAG.getExtLoad(ISD::EXTLOAD, dl, NVT, Chain, - getMemBasePlusOffset(Src, SrcOff, DAG), + getMemBasePlusOffset(Src, SrcOff, dl, DAG), SrcPtrInfo.getWithOffset(SrcOff), VT, isVol, false, MinAlign(SrcAlign, SrcOff)); Store = DAG.getTruncStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), VT, isVol, false, Align); } @@ -3703,7 +3805,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, &OutChains[0], OutChains.size()); } -static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, +static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, bool isVol, @@ -3755,10 +3857,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value, Store; + SDValue Value; Value = DAG.getLoad(VT, dl, Chain, - getMemBasePlusOffset(Src, SrcOff, DAG), + getMemBasePlusOffset(Src, SrcOff, dl, DAG), SrcPtrInfo.getWithOffset(SrcOff), isVol, false, false, SrcAlign); LoadValues.push_back(Value); @@ -3771,10 +3873,10 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; unsigned VTSize = VT.getSizeInBits() / 8; - SDValue Value, Store; + SDValue Store; Store = DAG.getStore(Chain, dl, LoadValues[i], - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); OutChains.push_back(Store); DstOff += VTSize; @@ -3784,7 +3886,25 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, &OutChains[0], OutChains.size()); } -static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, +/// \brief Lower the call to 'memset' intrinsic function into a series of store +/// operations. +/// +/// \param DAG Selection DAG where lowered code is placed. +/// \param dl Link to corresponding IR location. +/// \param Chain Control flow dependency. +/// \param Dst Pointer to destination memory location. +/// \param Src Value of byte to write into the memory. +/// \param Size Number of bytes to write. +/// \param Align Alignment of the destination in bytes. +/// \param isVol True if destination is volatile. +/// \param DstPtrInfo IR information on the memory pointer. +/// \returns New head in the control flow, if lowering was successful, empty +/// SDValue otherwise. +/// +/// The function tries to replace 'llvm.memset' intrinsic with several store +/// operations and value calculation code. This is usually profitable for small +/// memory size. +static SDValue getMemsetStores(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, uint64_t Size, unsigned Align, bool isVol, @@ -3856,7 +3976,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, } assert(Value.getValueType() == VT && "Value with wrong type."); SDValue Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), + getMemBasePlusOffset(Dst, DstOff, dl, DAG), DstPtrInfo.getWithOffset(DstOff), isVol, false, Align); OutChains.push_back(Store); @@ -3868,7 +3988,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, &OutChains[0], OutChains.size()); } -SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, +SDValue SelectionDAG::getMemcpy(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, @@ -3914,29 +4034,31 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, // beyond the given memory regions. But fixing this isn't easy, and most // people don't care. + const TargetLowering *TLI = TM.getTargetLowering(); + // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); - // FIXME: pass in DebugLoc + // FIXME: pass in SDLoc TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMCPY), + TLI->getLibcallCallingConv(RTLIB::MEMCPY), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMCPY), + TLI->getPointerTy()), Args, *this, dl); - std::pair CallResult = TLI.LowerCallTo(CLI); + std::pair CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } -SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, +SDValue SelectionDAG::getMemmove(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo, @@ -3970,29 +4092,31 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, // FIXME: If the memmove is volatile, lowering it to plain libc memmove may // not be safe. See memcpy above for more details. + const TargetLowering *TLI = TM.getTargetLowering(); + // Emit a library call. TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; - Entry.Ty = TLI.getDataLayout()->getIntPtrType(*getContext()); + Entry.Ty = TLI->getDataLayout()->getIntPtrType(*getContext()); Entry.Node = Dst; Args.push_back(Entry); Entry.Node = Src; Args.push_back(Entry); Entry.Node = Size; Args.push_back(Entry); - // FIXME: pass in DebugLoc + // FIXME: pass in SDLoc TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMMOVE), + TLI->getLibcallCallingConv(RTLIB::MEMMOVE), /*isTailCall=*/false, /*doesNotReturn=*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMMOVE), + TLI->getPointerTy()), Args, *this, dl); - std::pair CallResult = TLI.LowerCallTo(CLI); + std::pair CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } -SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, +SDValue SelectionDAG::getMemset(SDValue Chain, SDLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { @@ -4023,7 +4147,8 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, return Result; // Emit a library call. - Type *IntPtrTy = TLI.getDataLayout()->getIntPtrType(*getContext()); + const TargetLowering *TLI = TM.getTargetLowering(); + Type *IntPtrTy = TLI->getDataLayout()->getIntPtrType(*getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; Entry.Node = Dst; Entry.Ty = IntPtrTy; @@ -4041,22 +4166,53 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, Entry.Ty = IntPtrTy; Entry.isSExt = false; Args.push_back(Entry); - // FIXME: pass in DebugLoc + // FIXME: pass in SDLoc TargetLowering:: CallLoweringInfo CLI(Chain, Type::getVoidTy(*getContext()), false, false, false, false, 0, - TLI.getLibcallCallingConv(RTLIB::MEMSET), + TLI->getLibcallCallingConv(RTLIB::MEMSET), /*isTailCall=*/false, /*doesNotReturn*/false, /*isReturnValueUsed=*/false, - getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET), - TLI.getPointerTy()), + getExternalSymbol(TLI->getLibcallName(RTLIB::MEMSET), + TLI->getPointerTy()), Args, *this, dl); - std::pair CallResult = TLI.LowerCallTo(CLI); + std::pair CallResult = TLI->LowerCallTo(CLI); return CallResult.second; } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, + SDVTList VTList, SDValue* Ops, unsigned NumOps, + MachineMemOperand *MMO, + AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + FoldingSetNodeID ID; + ID.AddInteger(MemVT.getRawBits()); + AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps); + ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); + void* IP = 0; + if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { + cast(E)->refineAlignment(MMO); + return SDValue(E, 0); + } + + // Allocate the operands array for the node out of the BumpPtrAllocator, since + // SDNode doesn't have access to it. This memory will be "leaked" when + // the node is deallocated, but recovered when the allocator is released. + // If the number of operands is less than 5 we use AtomicSDNode's internal + // storage. + SDUse *DynOps = NumOps > 4 ? OperandAllocator.Allocate(NumOps) : 0; + + SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, MemVT, + Ops, DynOps, NumOps, MMO, + Ordering, SynchScope); + CSEMap.InsertNode(N, IP); + AllNodes.push_back(N); + return SDValue(N, 0); +} + +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachinePointerInfo PtrInfo, unsigned Alignment, @@ -4084,7 +4240,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Cmp, SDValue Swp, MachineMemOperand *MMO, @@ -4096,25 +4252,11 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, EVT VT = Cmp.getValueType(); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Cmp, Swp}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 4); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, - Ptr, Cmp, Swp, MMO, Ordering, - SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 4, MMO, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, const Value* PtrVal, @@ -4145,7 +4287,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, SDValue Chain, SDValue Ptr, SDValue Val, MachineMemOperand *MMO, @@ -4169,25 +4311,11 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, SDVTList VTs = Opcode == ISD::ATOMIC_STORE ? getVTList(MVT::Other) : getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr, Val}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 3); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, - Ptr, Val, MMO, - Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 3, MMO, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, const Value* PtrVal, @@ -4218,7 +4346,7 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, Ordering, SynchScope); } -SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, +SDValue SelectionDAG::getAtomic(unsigned Opcode, SDLoc dl, EVT MemVT, EVT VT, SDValue Chain, SDValue Ptr, MachineMemOperand *MMO, @@ -4227,26 +4355,13 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT, assert(Opcode == ISD::ATOMIC_LOAD && "Invalid Atomic Op"); SDVTList VTs = getVTList(VT, MVT::Other); - FoldingSetNodeID ID; - ID.AddInteger(MemVT.getRawBits()); SDValue Ops[] = {Chain, Ptr}; - AddNodeIDNode(ID, Opcode, VTs, Ops, 2); - ID.AddInteger(MMO->getPointerInfo().getAddrSpace()); - void* IP = 0; - if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - cast(E)->refineAlignment(MMO); - return SDValue(E, 0); - } - SDNode *N = new (NodeAllocator) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, - Ptr, MMO, Ordering, SynchScope); - CSEMap.InsertNode(N, IP); - AllNodes.push_back(N); - return SDValue(N, 0); + return getAtomic(Opcode, dl, MemVT, VTs, Ops, 2, MMO, Ordering, SynchScope); } /// getMergeValues - Create a MERGE_VALUES node from the given operands. SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, - DebugLoc dl) { + SDLoc dl) { if (NumOps == 1) return Ops[0]; @@ -4259,7 +4374,7 @@ SDValue SelectionDAG::getMergeValues(const SDValue *Ops, unsigned NumOps, } SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachinePointerInfo PtrInfo, @@ -4271,7 +4386,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, } SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachinePointerInfo PtrInfo, unsigned Align, bool Vol, @@ -4294,7 +4409,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, } SDValue -SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, +SelectionDAG::getMemIntrinsicNode(unsigned Opcode, SDLoc dl, SDVTList VTList, const SDValue *Ops, unsigned NumOps, EVT MemVT, MachineMemOperand *MMO) { assert((Opcode == ISD::INTRINSIC_VOID || @@ -4318,12 +4433,14 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList, return SDValue(E, 0); } - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, - MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + NumOps, MemVT, MMO); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, - MemVT, MMO); + N = new (NodeAllocator) MemIntrinsicSDNode(Opcode, dl.getIROrder(), + dl.getDebugLoc(), VTList, Ops, + NumOps, MemVT, MMO); } AllNodes.push_back(N); return SDValue(N, 0); @@ -4365,7 +4482,7 @@ static MachinePointerInfo InferPointerInfo(SDValue Ptr, SDValue OffsetOp) { SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, - EVT VT, DebugLoc dl, SDValue Chain, + EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, bool isInvariant, @@ -4398,7 +4515,7 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, SDValue SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, - EVT VT, DebugLoc dl, SDValue Chain, + EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue Offset, EVT MemVT, MachineMemOperand *MMO) { if (VT == MemVT) { @@ -4437,14 +4554,15 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, ISD::LoadExtType ExtType, cast(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl, VTs, AM, ExtType, + SDNode *N = new (NodeAllocator) LoadSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, ExtType, MemVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, +SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, @@ -4457,7 +4575,15 @@ SDValue SelectionDAG::getLoad(EVT VT, DebugLoc dl, TBAAInfo, Ranges); } -SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, +SDValue SelectionDAG::getLoad(EVT VT, SDLoc dl, + SDValue Chain, SDValue Ptr, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ISD::NON_EXTLOAD, VT, dl, Chain, Ptr, Undef, + VT, MMO); +} + +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, SDValue Chain, SDValue Ptr, MachinePointerInfo PtrInfo, EVT MemVT, bool isVolatile, bool isNonTemporal, @@ -4469,8 +4595,16 @@ SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, DebugLoc dl, EVT VT, } +SDValue SelectionDAG::getExtLoad(ISD::LoadExtType ExtType, SDLoc dl, EVT VT, + SDValue Chain, SDValue Ptr, EVT MemVT, + MachineMemOperand *MMO) { + SDValue Undef = getUNDEF(Ptr.getValueType()); + return getLoad(ISD::UNINDEXED, ExtType, VT, dl, Chain, Ptr, Undef, + MemVT, MMO); +} + SDValue -SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, +SelectionDAG::getIndexedLoad(SDValue OrigLoad, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM) { LoadSDNode *LD = cast(OrigLoad); assert(LD->getOffset().getOpcode() == ISD::UNDEF && @@ -4481,7 +4615,7 @@ SelectionDAG::getIndexedLoad(SDValue OrigLoad, DebugLoc dl, SDValue Base, false, LD->getAlignment()); } -SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, bool isVolatile, bool isNonTemporal, unsigned Alignment, const MDNode *TBAAInfo) { @@ -4508,7 +4642,7 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, return getStore(Chain, dl, Val, Ptr, MMO); } -SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachineMemOperand *MMO) { assert(Chain.getValueType() == MVT::Other && "Invalid chain type"); @@ -4527,14 +4661,15 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val, cast(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, - false, VT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, false, VT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } -SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, MachinePointerInfo PtrInfo, EVT SVT,bool isVolatile, bool isNonTemporal, unsigned Alignment, @@ -4561,7 +4696,7 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, return getTruncStore(Chain, dl, Val, Ptr, SVT, MMO); } -SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, +SDValue SelectionDAG::getTruncStore(SDValue Chain, SDLoc dl, SDValue Val, SDValue Ptr, EVT SVT, MachineMemOperand *MMO) { EVT VT = Val.getValueType(); @@ -4595,15 +4730,16 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val, cast(E)->refineAlignment(MMO); return SDValue(E, 0); } - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, - true, SVT, MMO); + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, + ISD::UNINDEXED, true, SVT, MMO); CSEMap.InsertNode(N, IP); AllNodes.push_back(N); return SDValue(N, 0); } SDValue -SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, +SelectionDAG::getIndexedStore(SDValue OrigStore, SDLoc dl, SDValue Base, SDValue Offset, ISD::MemIndexedMode AM) { StoreSDNode *ST = cast(OrigStore); assert(ST->getOffset().getOpcode() == ISD::UNDEF && @@ -4619,7 +4755,8 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl, VTs, AM, + SDNode *N = new (NodeAllocator) StoreSDNode(Ops, dl.getIROrder(), + dl.getDebugLoc(), VTs, AM, ST->isTruncatingStore(), ST->getMemoryVT(), ST->getMemOperand()); @@ -4628,7 +4765,7 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base, return SDValue(N, 0); } -SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, +SDValue SelectionDAG::getVAArg(EVT VT, SDLoc dl, SDValue Chain, SDValue Ptr, SDValue SV, unsigned Align) { @@ -4636,7 +4773,7 @@ SDValue SelectionDAG::getVAArg(EVT VT, DebugLoc dl, return getNode(ISD::VAARG, dl, getVTList(VT, MVT::Other), Ops, 4); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, const SDUse *Ops, unsigned NumOps) { switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -4652,7 +4789,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return getNode(Opcode, DL, VT, &NewOps[0], NumOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, EVT VT, const SDValue *Ops, unsigned NumOps) { switch (NumOps) { case 0: return getNode(Opcode, DL, VT); @@ -4694,10 +4831,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) return SDValue(E, 0); - N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops, NumOps); CSEMap.InsertNode(N, IP); } else { - N = new (NodeAllocator) SDNode(Opcode, DL, VTs, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTs, Ops, NumOps); } AllNodes.push_back(N); @@ -4707,14 +4846,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, ArrayRef ResultTys, const SDValue *Ops, unsigned NumOps) { return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), Ops, NumOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, const EVT *VTs, unsigned NumVTs, const SDValue *Ops, unsigned NumOps) { if (NumVTs == 1) @@ -4722,7 +4861,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return getNode(Opcode, DL, makeVTList(VTs, NumVTs), Ops, NumOps); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, const SDValue *Ops, unsigned NumOps) { if (VTList.NumVTs == 1) return getNode(Opcode, DL, VTList.VTs[0], Ops, NumOps); @@ -4760,26 +4899,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, return SDValue(E, 0); if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], - Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); } else { - N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops, NumOps); } CSEMap.InsertNode(N, IP); } else { if (NumOps == 1) { - N = new (NodeAllocator) UnarySDNode(Opcode, DL, VTList, Ops[0]); + N = new (NodeAllocator) UnarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0]); } else if (NumOps == 2) { - N = new (NodeAllocator) BinarySDNode(Opcode, DL, VTList, Ops[0], Ops[1]); + N = new (NodeAllocator) BinarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1]); } else if (NumOps == 3) { - N = new (NodeAllocator) TernarySDNode(Opcode, DL, VTList, Ops[0], Ops[1], - Ops[2]); + N = new (NodeAllocator) TernarySDNode(Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTList, Ops[0], + Ops[1], Ops[2]); } else { - N = new (NodeAllocator) SDNode(Opcode, DL, VTList, Ops, NumOps); + N = new (NodeAllocator) SDNode(Opcode, DL.getIROrder(), DL.getDebugLoc(), + VTList, Ops, NumOps); } } AllNodes.push_back(N); @@ -4789,36 +4938,36 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, return SDValue(N, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList) { +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList) { return getNode(Opcode, DL, VTList, 0, 0); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1) { SDValue Ops[] = { N1 }; return getNode(Opcode, DL, VTList, Ops, 1); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2) { SDValue Ops[] = { N1, N2 }; return getNode(Opcode, DL, VTList, Ops, 2); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3) { SDValue Ops[] = { N1, N2, N3 }; return getNode(Opcode, DL, VTList, Ops, 3); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4) { SDValue Ops[] = { N1, N2, N3, N4 }; return getNode(Opcode, DL, VTList, Ops, 4); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList, +SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, SDVTList VTList, SDValue N1, SDValue N2, SDValue N3, SDValue N4, SDValue N5) { SDValue Ops[] = { N1, N2, N3, N4, N5 }; @@ -4830,76 +4979,81 @@ SDVTList SelectionDAG::getVTList(EVT VT) { } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2) { - for (std::vector::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 2 && I->VTs[0] == VT1 && I->VTs[1] == VT2) - return *I; - - EVT *Array = Allocator.Allocate(2); - Array[0] = VT1; - Array[1] = VT2; - SDVTList Result = makeVTList(Array, 2); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(2U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate(2); + Array[0] = VT1; + Array[1] = VT2; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 2); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3) { - for (std::vector::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 3 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && - I->VTs[2] == VT3) - return *I; - - EVT *Array = Allocator.Allocate(3); - Array[0] = VT1; - Array[1] = VT2; - Array[2] = VT3; - SDVTList Result = makeVTList(Array, 3); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(3U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate(3); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 3); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) { - for (std::vector::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) - if (I->NumVTs == 4 && I->VTs[0] == VT1 && I->VTs[1] == VT2 && - I->VTs[2] == VT3 && I->VTs[3] == VT4) - return *I; - - EVT *Array = Allocator.Allocate(4); - Array[0] = VT1; - Array[1] = VT2; - Array[2] = VT3; - Array[3] = VT4; - SDVTList Result = makeVTList(Array, 4); - VTList.push_back(Result); - return Result; + FoldingSetNodeID ID; + ID.AddInteger(4U); + ID.AddInteger(VT1.getRawBits()); + ID.AddInteger(VT2.getRawBits()); + ID.AddInteger(VT3.getRawBits()); + ID.AddInteger(VT4.getRawBits()); + + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate(4); + Array[0] = VT1; + Array[1] = VT2; + Array[2] = VT3; + Array[3] = VT4; + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, 4); + VTListMap.InsertNode(Result, IP); + } + return Result->getSDVTList(); } SDVTList SelectionDAG::getVTList(const EVT *VTs, unsigned NumVTs) { - switch (NumVTs) { - case 0: llvm_unreachable("Cannot have nodes without results!"); - case 1: return getVTList(VTs[0]); - case 2: return getVTList(VTs[0], VTs[1]); - case 3: return getVTList(VTs[0], VTs[1], VTs[2]); - case 4: return getVTList(VTs[0], VTs[1], VTs[2], VTs[3]); - default: break; + FoldingSetNodeID ID; + ID.AddInteger(NumVTs); + for (unsigned index = 0; index < NumVTs; index++) { + ID.AddInteger(VTs[index].getRawBits()); } - for (std::vector::reverse_iterator I = VTList.rbegin(), - E = VTList.rend(); I != E; ++I) { - if (I->NumVTs != NumVTs || VTs[0] != I->VTs[0] || VTs[1] != I->VTs[1]) - continue; - - if (std::equal(&VTs[2], &VTs[NumVTs], &I->VTs[2])) - return *I; + void *IP = 0; + SDVTListNode *Result = VTListMap.FindNodeOrInsertPos(ID, IP); + if (Result == NULL) { + EVT *Array = Allocator.Allocate(NumVTs); + std::copy(VTs, VTs + NumVTs, Array); + Result = new (Allocator) SDVTListNode(ID.Intern(Allocator), Array, NumVTs); + VTListMap.InsertNode(Result, IP); } - - EVT *Array = Allocator.Allocate(NumVTs); - std::copy(VTs, VTs+NumVTs, Array); - SDVTList Result = makeVTList(Array, NumVTs); - VTList.push_back(Result); - return Result; + return Result->getSDVTList(); } @@ -5138,17 +5292,21 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc, return N; } -/// UpdadeDebugLocOnMergedSDNode - If the opt level is -O0 then it throws away +/// UpdadeSDLocOnMergedSDNode - If the opt level is -O0 then it throws away /// the line number information on the merged node since it is not possible to /// preserve the information that operation is associated with multiple lines. /// This will make the debugger working better at -O0, were there is a higher /// probability having other instructions associated with that line. /// -SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) { +/// For IROrder, we keep the smaller of the two +SDNode *SelectionDAG::UpdadeSDLocOnMergedSDNode(SDNode *N, SDLoc OLoc) { DebugLoc NLoc = N->getDebugLoc(); - if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && (OLoc != NLoc)) { + if (!(NLoc.isUnknown()) && (OptLevel == CodeGenOpt::None) && + (OLoc.getDebugLoc() != NLoc)) { N->setDebugLoc(DebugLoc()); } + unsigned Order = std::min(N->getIROrder(), OLoc.getIROrder()); + N->setIROrder(Order); return N; } @@ -5157,7 +5315,7 @@ SDNode *SelectionDAG::UpdadeDebugLocOnMergedSDNode(SDNode *N, DebugLoc OLoc) { /// /// Note that MorphNodeTo returns the resultant node. If there is already a /// node of the specified opcode and operands, it returns that node instead of -/// the current one. Note that the DebugLoc need not be the same. +/// the current one. Note that the SDLoc need not be the same. /// /// Using MorphNodeTo is faster than creating a new node and swapping it in /// with ReplaceAllUsesWith both because it often avoids allocating a new @@ -5173,7 +5331,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, FoldingSetNodeID ID; AddNodeIDNode(ID, Opc, VTs, Ops, NumOps); if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) - return UpdadeDebugLocOnMergedSDNode(ON, N->getDebugLoc()); + return UpdadeSDLocOnMergedSDNode(ON, SDLoc(N)); } if (!RemoveNodeFromCSEMaps(N)) @@ -5250,20 +5408,20 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc, /// node of the specified opcode and operands, it returns that node instead of /// the current one. MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT) { +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT) { SDVTList VTs = getVTList(VT); return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, SDValue Op1) { +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1 }; return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2 }; @@ -5271,7 +5429,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT); SDValue Ops[] = { Op1, Op2, Op3 }; @@ -5279,20 +5437,20 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT, ArrayRef Ops) { SDVTList VTs = getVTList(VT); return getMachineNode(Opcode, dl, VTs, Ops); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, EVT VT2) { +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2) { SDVTList VTs = getVTList(VT1, VT2); return getMachineNode(Opcode, dl, VTs, None); } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, SDValue Op1) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1 }; @@ -5300,7 +5458,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2); SDValue Ops[] = { Op1, Op2 }; @@ -5308,7 +5466,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2); @@ -5317,7 +5475,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, ArrayRef Ops) { SDVTList VTs = getVTList(VT1, VT2); @@ -5325,7 +5483,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, SDValue Op1, SDValue Op2) { SDVTList VTs = getVTList(VT1, VT2, VT3); @@ -5334,7 +5492,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, SDValue Op1, SDValue Op2, SDValue Op3) { SDVTList VTs = getVTList(VT1, VT2, VT3); @@ -5343,7 +5501,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, ArrayRef Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3); @@ -5351,7 +5509,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, EVT VT1, EVT VT2, EVT VT3, EVT VT4, ArrayRef Ops) { SDVTList VTs = getVTList(VT1, VT2, VT3, VT4); @@ -5359,7 +5517,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc dl, ArrayRef ResultTys, ArrayRef Ops) { SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); @@ -5367,7 +5525,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, } MachineSDNode * -SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, +SelectionDAG::getMachineNode(unsigned Opcode, SDLoc DL, SDVTList VTs, ArrayRef OpsArray) { bool DoCSE = VTs.VTs[VTs.NumVTs-1] != MVT::Glue; MachineSDNode *N; @@ -5380,12 +5538,13 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps); IP = 0; if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) { - return cast(UpdadeDebugLocOnMergedSDNode(E, DL)); + return cast(UpdadeSDLocOnMergedSDNode(E, DL)); } } // Allocate a new MachineSDNode. - N = new (NodeAllocator) MachineSDNode(~Opcode, DL, VTs); + N = new (NodeAllocator) MachineSDNode(~Opcode, DL.getIROrder(), + DL.getDebugLoc(), VTs); // Initialize the operands list. if (NumOps > array_lengthof(N->LocalOperands)) @@ -5411,7 +5570,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs, /// getTargetExtractSubreg - A convenience function for creating /// TargetOpcode::EXTRACT_SUBREG nodes. SDValue -SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, +SelectionDAG::getTargetExtractSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand) { SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); SDNode *Subreg = getMachineNode(TargetOpcode::EXTRACT_SUBREG, DL, @@ -5422,7 +5581,7 @@ SelectionDAG::getTargetExtractSubreg(int SRIdx, DebugLoc DL, EVT VT, /// getTargetInsertSubreg - A convenience function for creating /// TargetOpcode::INSERT_SUBREG nodes. SDValue -SelectionDAG::getTargetInsertSubreg(int SRIdx, DebugLoc DL, EVT VT, +SelectionDAG::getTargetInsertSubreg(int SRIdx, SDLoc DL, EVT VT, SDValue Operand, SDValue Subreg) { SDValue SRIdxVal = getTargetConstant(SRIdx, MVT::i32); SDNode *Result = getMachineNode(TargetOpcode::INSERT_SUBREG, DL, @@ -5845,18 +6004,6 @@ unsigned SelectionDAG::AssignTopologicalOrder() { return DAGSize; } -/// AssignOrdering - Assign an order to the SDNode. -void SelectionDAG::AssignOrdering(const SDNode *SD, unsigned Order) { - assert(SD && "Trying to assign an order to a null node!"); - Ordering->add(SD, Order); -} - -/// GetOrdering - Get the order for the SDNode. -unsigned SelectionDAG::GetOrdering(const SDNode *SD) const { - assert(SD && "Trying to get the order of a null node!"); - return Ordering->getOrder(SD); -} - /// AddDbgValue - Add a dbg_value SDNode. If SD is non-null that means the /// value is produced by SD. void SelectionDAG::AddDbgValue(SDDbgValue *DB, SDNode *SD, bool isParameter) { @@ -5883,7 +6030,7 @@ void SelectionDAG::TransferDbgValues(SDValue From, SDValue To) { ClonedDVs.push_back(Clone); } } - for (SmallVector::iterator I = ClonedDVs.begin(), + for (SmallVectorImpl::iterator I = ClonedDVs.begin(), E = ClonedDVs.end(); I != E; ++I) AddDbgValue(*I, ToNode, false); } @@ -5896,16 +6043,22 @@ HandleSDNode::~HandleSDNode() { DropOperands(); } -GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, DebugLoc DL, - const GlobalValue *GA, +GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, unsigned Order, + DebugLoc DL, const GlobalValue *GA, EVT VT, int64_t o, unsigned char TF) - : SDNode(Opc, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { + : SDNode(Opc, Order, DL, getSDVTList(VT)), Offset(o), TargetFlags(TF) { TheGlobal = GA; } -MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, - MachineMemOperand *mmo) - : SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) { +AddrSpaceCastSDNode::AddrSpaceCastSDNode(unsigned Order, DebugLoc dl, EVT VT, + SDValue X, unsigned SrcAS, + unsigned DestAS) + : UnarySDNode(ISD::ADDRSPACECAST, Order, dl, getSDVTList(VT), X), + SrcAddrSpace(SrcAS), DestAddrSpace(DestAS) {} + +MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, + EVT memvt, MachineMemOperand *mmo) + : SDNode(Opc, Order, dl, VTs), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!"); @@ -5914,10 +6067,10 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt, assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!"); } -MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, +MemSDNode::MemSDNode(unsigned Opc, unsigned Order, DebugLoc dl, SDVTList VTs, const SDValue *Ops, unsigned NumOps, EVT memvt, MachineMemOperand *mmo) - : SDNode(Opc, dl, VTs, Ops, NumOps), + : SDNode(Opc, Order, dl, VTs, Ops, NumOps), MemoryVT(memvt), MMO(mmo) { SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(), MMO->isNonTemporal(), MMO->isInvariant()); @@ -6064,9 +6217,10 @@ bool SDNode::hasPredecessor(const SDNode *N) const { return hasPredecessorHelper(N, Visited, Worklist); } -bool SDNode::hasPredecessorHelper(const SDNode *N, - SmallPtrSet &Visited, - SmallVector &Worklist) const { +bool +SDNode::hasPredecessorHelper(const SDNode *N, + SmallPtrSet &Visited, + SmallVectorImpl &Worklist) const { if (Visited.empty()) { Worklist.push_back(this); } else { @@ -6103,7 +6257,7 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT VT = N->getValueType(0); unsigned NE = VT.getVectorNumElements(); EVT EltVT = VT.getVectorElementType(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SmallVector Scalars; SmallVector Operands(N->getNumOperands()); @@ -6121,11 +6275,12 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { EVT OperandVT = Operand.getValueType(); if (OperandVT.isVector()) { // A vector operand; extract a single element. + const TargetLowering *TLI = TM.getTargetLowering(); EVT OperandEltVT = OperandVT.getVectorElementType(); Operands[j] = getNode(ISD::EXTRACT_VECTOR_ELT, dl, OperandEltVT, Operand, - getConstant(i, TLI.getPointerTy())); + getConstant(i, TLI->getVectorIdxTy())); } else { // A scalar operand; just use it as is. Operands[j] = Operand; @@ -6147,8 +6302,8 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) { case ISD::ROTL: case ISD::ROTR: Scalars.push_back(getNode(N->getOpcode(), dl, EltVT, Operands[0], - getShiftAmountOperand(Operands[0].getValueType(), - Operands[1]))); + getShiftAmountOperand(Operands[0].getValueType(), + Operands[1]))); break; case ISD::SIGN_EXTEND_INREG: case ISD::FP_ROUND_INREG: { @@ -6203,8 +6358,9 @@ bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base, const GlobalValue *GV2 = NULL; int64_t Offset1 = 0; int64_t Offset2 = 0; - bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1); - bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); + const TargetLowering *TLI = TM.getTargetLowering(); + bool isGA1 = TLI->isGAPlusOffset(Loc.getNode(), GV1, Offset1); + bool isGA2 = TLI->isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2); if (isGA1 && isGA2 && GV1 == GV2) return Offset1 == (Offset2 + Dist*Bytes); return false; @@ -6217,11 +6373,12 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { // If this is a GlobalAddress + cst, return the alignment. const GlobalValue *GV; int64_t GVOffset = 0; - if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { - unsigned PtrWidth = TLI.getPointerTy().getSizeInBits(); + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->isGAPlusOffset(Ptr.getNode(), GV, GVOffset)) { + unsigned PtrWidth = TLI->getPointerTypeSizeInBits(GV->getType()); APInt KnownZero(PtrWidth, 0), KnownOne(PtrWidth, 0); llvm::ComputeMaskedBits(const_cast(GV), KnownZero, KnownOne, - TLI.getDataLayout()); + TLI->getDataLayout()); unsigned AlignBits = KnownZero.countTrailingOnes(); unsigned Align = AlignBits ? 1 << std::min(31U, AlignBits) : 0; if (Align) @@ -6251,6 +6408,38 @@ unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const { return 0; } +/// GetSplitDestVTs - Compute the VTs needed for the low/hi parts of a type +/// which is split (or expanded) into two not necessarily identical pieces. +std::pair SelectionDAG::GetSplitDestVTs(const EVT &VT) const { + // Currently all types are split in half. + EVT LoVT, HiVT; + if (!VT.isVector()) { + LoVT = HiVT = TLI->getTypeToTransformTo(*getContext(), VT); + } else { + unsigned NumElements = VT.getVectorNumElements(); + assert(!(NumElements & 1) && "Splitting vector, but not in half!"); + LoVT = HiVT = EVT::getVectorVT(*getContext(), VT.getVectorElementType(), + NumElements/2); + } + return std::make_pair(LoVT, HiVT); +} + +/// SplitVector - Split the vector with EXTRACT_SUBVECTOR and return the +/// low/high part. +std::pair +SelectionDAG::SplitVector(const SDValue &N, const SDLoc &DL, const EVT &LoVT, + const EVT &HiVT) { + assert(LoVT.getVectorNumElements() + HiVT.getVectorNumElements() <= + N.getValueType().getVectorNumElements() && + "More vector elements requested than available!"); + SDValue Lo, Hi; + Lo = getNode(ISD::EXTRACT_SUBVECTOR, DL, LoVT, N, + getConstant(0, TLI->getVectorIdxTy())); + Hi = getNode(ISD::EXTRACT_SUBVECTOR, DL, HiVT, N, + getConstant(LoVT.getVectorNumElements(), TLI->getVectorIdxTy())); + return std::make_pair(Lo, Hi); +} + // getAddressSpace - Return the address space this GlobalAddress belongs to. unsigned GlobalAddressSDNode::getAddressSpace() const { return getGlobal()->getType()->getAddressSpace(); @@ -6372,7 +6561,7 @@ static void checkForCyclesHelper(const SDNode *N, void llvm::checkForCycles(const llvm::SDNode *N) { #ifdef XDEBUG - assert(N && "Checking nonexistant SDNode"); + assert(N && "Checking nonexistent SDNode"); SmallPtrSet visited; SmallPtrSet checked; checkForCyclesHelper(N, visited, checked); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 194aba8..2b2713d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -15,6 +15,7 @@ #include "SelectionDAGBuilder.h" #include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" @@ -32,6 +33,7 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/CodeGen/StackMaps.h" #include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" @@ -48,7 +50,6 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/IntegersSubsetMapping.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" @@ -57,6 +58,7 @@ #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include using namespace llvm; @@ -87,7 +89,7 @@ LimitFPPrecision("limit-float-precision", // store [4096 x i8] %data, [4096 x i8]* %buffer static const unsigned MaxParallelChains = 64; -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V); @@ -96,7 +98,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, /// larger then ValueVT then AssertOp can be used to specify whether the extra /// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT /// (ISD::AssertSext). -static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, +static SDValue getCopyFromParts(SelectionDAG &DAG, SDLoc DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, @@ -217,7 +219,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, /// type larger then ValueVT then AssertOp can be used to specify whether the /// extra bits are known to be zero (ISD::AssertZext) or sign extended from /// ValueVT (ISD::AssertSext). -static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, +static SDValue getCopyFromPartsVector(SelectionDAG &DAG, SDLoc DL, const SDValue *Parts, unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); @@ -280,7 +282,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, - DAG.getIntPtrConstant(0)); + DAG.getConstant(0, TLI.getVectorIdxTy())); } // Vector/Vector bitcast. @@ -327,14 +329,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::BUILD_VECTOR, DL, ValueVT, Val); } -static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, +static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. -static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, +static void getCopyToParts(SelectionDAG &DAG, SDLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { @@ -466,7 +468,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, /// getCopyToPartsVector - Create a series of nodes that contain the specified /// value split into legal parts. -static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, +static void getCopyToPartsVector(SelectionDAG &DAG, SDLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, MVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); @@ -489,7 +491,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, SmallVector Ops; for (unsigned i = 0, e = ValueVT.getVectorNumElements(); i != e; ++i) Ops.push_back(DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - ElementVT, Val, DAG.getIntPtrConstant(i))); + ElementVT, Val, DAG.getConstant(i, + TLI.getVectorIdxTy()))); for (unsigned i = ValueVT.getVectorNumElements(), e = PartVT.getVectorNumElements(); i != e; ++i) @@ -515,7 +518,7 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(ValueVT.getVectorNumElements() == 1 && "Only trivial vector-to-scalar conversions should get here!"); Val = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - PartVT, Val, DAG.getIntPtrConstant(0)); + PartVT, Val, DAG.getConstant(0, TLI.getVectorIdxTy())); bool Smaller = ValueVT.bitsLE(PartVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), @@ -545,10 +548,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, if (IntermediateVT.isVector()) Ops[i] = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, IntermediateVT, Val, - DAG.getIntPtrConstant(i * (NumElements / NumIntermediates))); + DAG.getConstant(i * (NumElements / NumIntermediates), + TLI.getVectorIdxTy())); else Ops[i] = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, - IntermediateVT, Val, DAG.getIntPtrConstant(i)); + IntermediateVT, Val, + DAG.getConstant(i, TLI.getVectorIdxTy())); } // Split the intermediate operands into legal parts. @@ -644,7 +649,7 @@ namespace { /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. SDValue getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - DebugLoc dl, + SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V = 0) const; @@ -652,7 +657,7 @@ namespace { /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. - void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, + void getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V) const; /// AddInlineAsmOperands - Add this value to the specified inlineasm node @@ -671,7 +676,7 @@ namespace { /// If the Flag pointer is NULL, no flag is used. SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, FunctionLoweringInfo &FuncInfo, - DebugLoc dl, + SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V) const { // A Value with type {} or [0 x %t] needs no registers. @@ -717,6 +722,14 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, unsigned NumSignBits = LOI->NumSignBits; unsigned NumZeroBits = LOI->KnownZero.countLeadingOnes(); + if (NumZeroBits == RegSize) { + // The current value is a zero. + // Explicitly express that as it would be easier for + // optimizations to kick in. + Parts[i] = DAG.getConstant(0, RegisterVT); + continue; + } + // FIXME: We capture more information than the dag can represent. For // now, just use the tightest assertzext/assertsext possible. bool isSExt = true; @@ -761,7 +774,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, /// specified value into the registers specified by this object. This uses /// Chain/Flag as the input and updates them for the output Chain/Flag. /// If the Flag pointer is NULL, no flag is used. -void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, +void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, SDLoc dl, SDValue &Chain, SDValue *Flag, const Value *V) const { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -869,7 +882,7 @@ void SelectionDAGBuilder::clear() { UnusedArgNodeMap.clear(); PendingLoads.clear(); PendingExports.clear(); - CurDebugLoc = DebugLoc(); + CurInst = NULL; HasTailCall = false; } @@ -900,7 +913,7 @@ SDValue SelectionDAGBuilder::getRoot() { } // Otherwise, we have to make a token factor node. - SDValue Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + SDValue Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &PendingLoads[0], PendingLoads.size()); PendingLoads.clear(); DAG.setRoot(Root); @@ -930,7 +943,7 @@ SDValue SelectionDAGBuilder::getControlRoot() { PendingExports.push_back(Root); } - Root = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + Root = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &PendingExports[0], PendingExports.size()); PendingExports.clear(); @@ -938,27 +951,21 @@ SDValue SelectionDAGBuilder::getControlRoot() { return Root; } -void SelectionDAGBuilder::AssignOrderingToNode(const SDNode *Node) { - if (DAG.GetOrdering(Node) != 0) return; // Already has ordering. - DAG.AssignOrdering(Node, SDNodeOrder); - - for (unsigned I = 0, E = Node->getNumOperands(); I != E; ++I) - AssignOrderingToNode(Node->getOperand(I).getNode()); -} - void SelectionDAGBuilder::visit(const Instruction &I) { // Set up outgoing PHI node register values before emitting the terminator. if (isa(&I)) HandlePHINodesInSuccessorBlocks(I.getParent()); - CurDebugLoc = I.getDebugLoc(); + ++SDNodeOrder; + + CurInst = &I; visit(I.getOpcode(), I); if (!isa(&I) && !HasTailCall) CopyToExportRegsIfNeeded(&I); - CurDebugLoc = DebugLoc(); + CurInst = NULL; } void SelectionDAGBuilder::visitPHI(const PHINode &) { @@ -975,12 +982,6 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; #include "llvm/IR/Instruction.def" } - - // Assign the ordering to the freshly created DAG nodes. - if (NodeMap.count(&I)) { - ++SDNodeOrder; - AssignOrderingToNode(getValue(&I).getNode()); - } } // resolveDanglingDebugInfo - if we saw an earlier dbg_value referring to V, @@ -1002,7 +1003,7 @@ void SelectionDAGBuilder::resolveDanglingDebugInfo(const Value *V, DAG.AddDbgValue(SDV, Val.getNode(), false); } } else - DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); + DEBUG(dbgs() << "Dropping debug info for " << *DI << "\n"); DanglingDebugInfoMap[V] = DanglingDebugInfo(); } } @@ -1020,9 +1021,10 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) { DenseMap::iterator It = FuncInfo.ValueMap.find(V); if (It != FuncInfo.ValueMap.end()) { unsigned InReg = It->second; - RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType()); + RegsForValue RFV(*DAG.getContext(), *TM.getTargetLowering(), + InReg, V->getType()); SDValue Chain = DAG.getEntryNode(); - N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); + N = RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); resolveDanglingDebugInfo(V, N); return N; } @@ -1051,17 +1053,21 @@ SDValue SelectionDAGBuilder::getNonRegisterValue(const Value *V) { /// getValueImpl - Helper function for getValue and getNonRegisterValue. /// Create an SDValue for the given value. SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { + const TargetLowering *TLI = TM.getTargetLowering(); + if (const Constant *C = dyn_cast(V)) { - EVT VT = TLI.getValueType(V->getType(), true); + EVT VT = TLI->getValueType(V->getType(), true); if (const ConstantInt *CI = dyn_cast(C)) return DAG.getConstant(*CI, VT); if (const GlobalValue *GV = dyn_cast(C)) - return DAG.getGlobalAddress(GV, getCurDebugLoc(), VT); + return DAG.getGlobalAddress(GV, getCurSDLoc(), VT); - if (isa(C)) - return DAG.getConstant(0, TLI.getPointerTy()); + if (isa(C)) { + unsigned AS = V->getType()->getPointerAddressSpace(); + return DAG.getConstant(0, TLI->getPointerTy(AS)); + } if (const ConstantFP *CFP = dyn_cast(C)) return DAG.getConstantFP(*CFP, VT); @@ -1090,9 +1096,9 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } return DAG.getMergeValues(&Constants[0], Constants.size(), - getCurDebugLoc()); + getCurSDLoc()); } - + if (const ConstantDataSequential *CDS = dyn_cast(C)) { SmallVector Ops; @@ -1105,8 +1111,8 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } if (isa(CDS->getType())) - return DAG.getMergeValues(&Ops[0], Ops.size(), getCurDebugLoc()); - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + return DAG.getMergeValues(&Ops[0], Ops.size(), getCurSDLoc()); + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, &Ops[0], Ops.size()); } @@ -1115,7 +1121,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { "Unknown struct or array constant!"); SmallVector ValueVTs; - ComputeValueVTs(TLI, C->getType(), ValueVTs); + ComputeValueVTs(*TLI, C->getType(), ValueVTs); unsigned NumElts = ValueVTs.size(); if (NumElts == 0) return SDValue(); // empty struct @@ -1131,7 +1137,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } return DAG.getMergeValues(&Constants[0], NumElts, - getCurDebugLoc()); + getCurSDLoc()); } if (const BlockAddress *BA = dyn_cast(C)) @@ -1148,7 +1154,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { Ops.push_back(getValue(CV->getOperand(i))); } else { assert(isa(C) && "Unknown vector constant!"); - EVT EltVT = TLI.getValueType(VecTy->getElementType()); + EVT EltVT = TLI->getValueType(VecTy->getElementType()); SDValue Op; if (EltVT.isFloatingPoint()) @@ -1159,7 +1165,7 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { } // Create a BUILD_VECTOR node. - return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + return NodeMap[V] = DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, &Ops[0], Ops.size()); } @@ -1169,21 +1175,22 @@ SDValue SelectionDAGBuilder::getValueImpl(const Value *V) { DenseMap::iterator SI = FuncInfo.StaticAllocaMap.find(AI); if (SI != FuncInfo.StaticAllocaMap.end()) - return DAG.getFrameIndex(SI->second, TLI.getPointerTy()); + return DAG.getFrameIndex(SI->second, TLI->getPointerTy()); } // If this is an instruction which fast-isel has deferred, select it now. if (const Instruction *Inst = dyn_cast(V)) { unsigned InReg = FuncInfo.InitializeRegForValue(Inst); - RegsForValue RFV(*DAG.getContext(), TLI, InReg, Inst->getType()); + RegsForValue RFV(*DAG.getContext(), *TLI, InReg, Inst->getType()); SDValue Chain = DAG.getEntryNode(); - return RFV.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), Chain, NULL, V); + return RFV.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, NULL, V); } llvm_unreachable("Can't get register for value!"); } void SelectionDAGBuilder::visitRet(const ReturnInst &I) { + const TargetLowering *TLI = TM.getTargetLowering(); SDValue Chain = getControlRoot(); SmallVector Outs; SmallVector OutVals; @@ -1196,7 +1203,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // Leave Outs empty so that LowerReturn won't try to load return // registers the usual way. SmallVector PtrValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()), + ComputeValueVTs(*TLI, PointerType::getUnqual(F->getReturnType()), PtrValueVTs); SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]); @@ -1204,26 +1211,26 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { SmallVector ValueVTs; SmallVector Offsets; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); + ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); SmallVector Chains(NumValues); for (unsigned i = 0; i != NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), + SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), RetPtr.getValueType(), RetPtr, DAG.getIntPtrConstant(Offsets[i])); Chains[i] = - DAG.getStore(Chain, getCurDebugLoc(), + DAG.getStore(Chain, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + i), // FIXME: better loc info would be nice. Add, MachinePointerInfo(), false, false, 0); } - Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], NumValues); } else if (I.getNumOperands() != 0) { SmallVector ValueVTs; - ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs); + ComputeValueVTs(*TLI, I.getOperand(0)->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues) { SDValue RetOp = getValue(I.getOperand(0)); @@ -1241,12 +1248,12 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ExtendKind = ISD::ZERO_EXTEND; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); + VT = TLI->getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); - unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); - MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), VT); + MVT PartVT = TLI->getRegisterType(*DAG.getContext(), VT); SmallVector Parts(NumParts); - getCopyToParts(DAG, getCurDebugLoc(), + getCopyToParts(DAG, getCurSDLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), &Parts[0], NumParts, PartVT, &I, ExtendKind); @@ -1264,7 +1271,7 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { for (unsigned i = 0; i < NumParts; ++i) { Outs.push_back(ISD::OutputArg(Flags, Parts[i].getValueType(), - /*isfixed=*/true, 0, 0)); + VT, /*isfixed=*/true, 0, 0)); OutVals.push_back(Parts[i]); } } @@ -1274,8 +1281,9 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { bool isVarArg = DAG.getMachineFunction().getFunction()->isVarArg(); CallingConv::ID CallConv = DAG.getMachineFunction().getFunction()->getCallingConv(); - Chain = TLI.LowerReturn(Chain, CallConv, isVarArg, - Outs, OutVals, getCurDebugLoc(), DAG); + Chain = TM.getTargetLowering()->LowerReturn(Chain, CallConv, isVarArg, + Outs, OutVals, getCurSDLoc(), + DAG); // Verify that the target's LowerReturn behaved as expected. assert(Chain.getNode() && Chain.getValueType() == MVT::Other && @@ -1474,7 +1482,7 @@ void SelectionDAGBuilder::FindMergedConditions(const Value *Cond, /// If we should emit this as a bunch of and/or'd together conditions, return /// false. bool -SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases){ +SelectionDAGBuilder::ShouldEmitAsBranches(const std::vector &Cases) { if (Cases.size() != 2) return true; // If this is two comparisons of the same values or'd or and'd together, they @@ -1519,7 +1527,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // If this is not a fall-through branch, emit the branch. if (Succ0MBB != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Succ0MBB))); @@ -1548,7 +1556,7 @@ void SelectionDAGBuilder::visitBr(const BranchInst &I) { // jle foo // if (const BinaryOperator *BOp = dyn_cast(CondVal)) { - if (!TLI.isJumpExpensive() && + if (!TM.getTargetLowering()->isJumpExpensive() && BOp->hasOneUse() && (BOp->getOpcode() == Instruction::And || BOp->getOpcode() == Instruction::Or)) { @@ -1596,7 +1604,7 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB) { SDValue Cond; SDValue CondLHS = getValue(CB.CmpLHS); - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); // Build the setcc now. if (CB.CmpMHS == NULL) { @@ -1612,18 +1620,17 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, } else Cond = DAG.getSetCC(dl, MVT::i1, CondLHS, getValue(CB.CmpRHS), CB.CC); } else { - assert(CB.CC == ISD::SETCC_INVALID && - "Condition is undefined for to-the-range belonging check."); + assert(CB.CC == ISD::SETLE && "Can handle only LE ranges now"); const APInt& Low = cast(CB.CmpLHS)->getValue(); const APInt& High = cast(CB.CmpRHS)->getValue(); SDValue CmpOp = getValue(CB.CmpMHS); EVT VT = CmpOp.getValueType(); - - if (cast(CB.CmpLHS)->isMinValue(false)) { + + if (cast(CB.CmpLHS)->isMinValue(true)) { Cond = DAG.getSetCC(dl, MVT::i1, CmpOp, DAG.getConstant(High, VT), - ISD::SETULE); + ISD::SETLE); } else { SDValue SUB = DAG.getNode(ISD::SUB, dl, VT, CmpOp, DAG.getConstant(Low, VT)); @@ -1671,11 +1678,11 @@ void SelectionDAGBuilder::visitSwitchCase(CaseBlock &CB, void SelectionDAGBuilder::visitJumpTable(JumpTable &JT) { // Emit the code for the jump table assert(JT.Reg != -1U && "Should lower JT Header first!"); - EVT PTy = TLI.getPointerTy(); - SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + EVT PTy = TM.getTargetLowering()->getPointerTy(); + SDValue Index = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), JT.Reg, PTy); SDValue Table = DAG.getJumpTable(JT.JTI, PTy); - SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurDebugLoc(), + SDValue BrJumpTable = DAG.getNode(ISD::BR_JT, getCurSDLoc(), MVT::Other, Index.getValue(1), Table, Index); DAG.setRoot(BrJumpTable); @@ -1691,7 +1698,7 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // difference between smallest and largest cases. SDValue SwitchOp = getValue(JTH.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, DAG.getConstant(JTH.First, VT)); // The SDNode we just created, which holds the value being switched on minus @@ -1699,19 +1706,22 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, // can be used as an index into the jump table in a subsequent basic block. // This value may be smaller or larger than the target's pointer type, and // therefore require extension or truncating. - SwitchOp = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), TLI.getPointerTy()); + const TargetLowering *TLI = TM.getTargetLowering(); + SwitchOp = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), TLI->getPointerTy()); - unsigned JumpTableReg = FuncInfo.CreateReg(TLI.getPointerTy()); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + unsigned JumpTableReg = FuncInfo.CreateReg(TLI->getPointerTy()); + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), JumpTableReg, SwitchOp); JT.Reg = JumpTableReg; // Emit the range check for the jump table, and branch to the default block // for the switch statement if the value being switched on exceeds the largest // case in the switch. - SDValue CMP = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(Sub.getValueType()), Sub, - DAG.getConstant(JTH.Last-JTH.First,VT), + SDValue CMP = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, + DAG.getConstant(JTH.Last - JTH.First,VT), ISD::SETUGT); // Set NextBlock to be the MBB immediately after the current one, if any. @@ -1722,17 +1732,88 @@ void SelectionDAGBuilder::visitJumpTableHeader(JumpTable &JT, if (++BBI != FuncInfo.MF->end()) NextBlock = BBI; - SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), MVT::Other, CopyTo, CMP, DAG.getBasicBlock(JT.Default)); if (JT.MBB != NextBlock) - BrCond = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrCond, + BrCond = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrCond, DAG.getBasicBlock(JT.MBB)); DAG.setRoot(BrCond); } +/// Codegen a new tail for a stack protector check ParentMBB which has had its +/// tail spliced into a stack protector check success bb. +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void SelectionDAGBuilder::visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB) { + + // First create the loads to the guard/stack slot for the comparison. + const TargetLowering *TLI = TM.getTargetLowering(); + EVT PtrTy = TLI->getPointerTy(); + + MachineFrameInfo *MFI = ParentBB->getParent()->getFrameInfo(); + int FI = MFI->getStackProtectorIndex(); + + const Value *IRGuard = SPD.getGuard(); + SDValue GuardPtr = getValue(IRGuard); + SDValue StackSlotPtr = DAG.getFrameIndex(FI, PtrTy); + + unsigned Align = + TLI->getDataLayout()->getPrefTypeAlignment(IRGuard->getType()); + SDValue Guard = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + GuardPtr, MachinePointerInfo(IRGuard, 0), + true, false, false, Align); + + SDValue StackSlot = DAG.getLoad(PtrTy, getCurSDLoc(), DAG.getEntryNode(), + StackSlotPtr, + MachinePointerInfo::getFixedStack(FI), + true, false, false, Align); + + // Perform the comparison via a subtract/getsetcc. + EVT VT = Guard.getValueType(); + SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, Guard, StackSlot); + + SDValue Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), + Sub, DAG.getConstant(0, VT), + ISD::SETNE); + + // If the sub is not 0, then we know the guard/stackslot do not equal, so + // branch to failure MBB. + SDValue BrCond = DAG.getNode(ISD::BRCOND, getCurSDLoc(), + MVT::Other, StackSlot.getOperand(0), + Cmp, DAG.getBasicBlock(SPD.getFailureMBB())); + // Otherwise branch to success MBB. + SDValue Br = DAG.getNode(ISD::BR, getCurSDLoc(), + MVT::Other, BrCond, + DAG.getBasicBlock(SPD.getSuccessMBB())); + + DAG.setRoot(Br); +} + +/// Codegen the failure basic block for a stack protector check. +/// +/// A failure stack protector machine basic block consists simply of a call to +/// __stack_chk_fail(). +/// +/// For a high level explanation of how this fits into the stack protector +/// generation see the comment on the declaration of class +/// StackProtectorDescriptor. +void +SelectionDAGBuilder::visitSPDescriptorFailure(StackProtectorDescriptor &SPD) { + const TargetLowering *TLI = TM.getTargetLowering(); + SDValue Chain = TLI->makeLibCall(DAG, RTLIB::STACKPROTECTOR_CHECK_FAIL, + MVT::isVoid, 0, 0, false, getCurSDLoc(), + false, false).second; + DAG.setRoot(Chain); +} + /// visitBitTestHeader - This function emits necessary code to produce value /// suitable for "bit tests" void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, @@ -1740,18 +1821,20 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, // Subtract the minimum value SDValue SwitchOp = getValue(B.SValue); EVT VT = SwitchOp.getValueType(); - SDValue Sub = DAG.getNode(ISD::SUB, getCurDebugLoc(), VT, SwitchOp, + SDValue Sub = DAG.getNode(ISD::SUB, getCurSDLoc(), VT, SwitchOp, DAG.getConstant(B.First, VT)); // Check range - SDValue RangeCmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(Sub.getValueType()), + const TargetLowering *TLI = TM.getTargetLowering(); + SDValue RangeCmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), + Sub.getValueType()), Sub, DAG.getConstant(B.Range, VT), ISD::SETUGT); // Determine the type of the test operands. bool UsePtrType = false; - if (!TLI.isTypeLegal(VT)) + if (!TLI->isTypeLegal(VT)) UsePtrType = true; else { for (unsigned i = 0, e = B.Cases.size(); i != e; ++i) @@ -1763,13 +1846,13 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, } } if (UsePtrType) { - VT = TLI.getPointerTy(); - Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT); + VT = TLI->getPointerTy(); + Sub = DAG.getZExtOrTrunc(Sub, getCurSDLoc(), VT); } B.RegVT = VT.getSimpleVT(); B.Reg = FuncInfo.CreateReg(B.RegVT); - SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), + SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurSDLoc(), B.Reg, Sub); // Set NextBlock to be the MBB immediately after the current one, if any. @@ -1784,12 +1867,12 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, addSuccessorWithWeight(SwitchBB, B.Default); addSuccessorWithWeight(SwitchBB, MBB); - SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + SDValue BrRange = DAG.getNode(ISD::BRCOND, getCurSDLoc(), MVT::Other, CopyTo, RangeCmp, DAG.getBasicBlock(B.Default)); if (MBB != NextBlock) - BrRange = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, CopyTo, + BrRange = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, CopyTo, DAG.getBasicBlock(MBB)); DAG.setRoot(BrRange); @@ -1803,35 +1886,36 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, BitTestCase &B, MachineBasicBlock *SwitchBB) { MVT VT = BB.RegVT; - SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), + SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurSDLoc(), Reg, VT); SDValue Cmp; unsigned PopCount = CountPopulation_64(B.Mask); + const TargetLowering *TLI = TM.getTargetLowering(); if (PopCount == 1) { // Testing for a single bit; just compare the shift count with what it // would need to be to shift a 1 bit in that position. - Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(VT), + Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), VT), ShiftOp, - DAG.getConstant(CountTrailingZeros_64(B.Mask), VT), + DAG.getConstant(countTrailingZeros(B.Mask), VT), ISD::SETEQ); } else if (PopCount == BB.Range) { // There is only one zero bit in the range, test for it directly. - Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(VT), + Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), VT), ShiftOp, DAG.getConstant(CountTrailingOnes_64(B.Mask), VT), ISD::SETNE); } else { // Make desired shift - SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurDebugLoc(), VT, + SDValue SwitchVal = DAG.getNode(ISD::SHL, getCurSDLoc(), VT, DAG.getConstant(1, VT), ShiftOp); // Emit bit tests and jumps - SDValue AndOp = DAG.getNode(ISD::AND, getCurDebugLoc(), + SDValue AndOp = DAG.getNode(ISD::AND, getCurSDLoc(), VT, SwitchVal, DAG.getConstant(B.Mask, VT)); - Cmp = DAG.getSetCC(getCurDebugLoc(), - TLI.getSetCCResultType(VT), + Cmp = DAG.getSetCC(getCurSDLoc(), + TLI->getSetCCResultType(*DAG.getContext(), VT), AndOp, DAG.getConstant(0, VT), ISD::SETNE); } @@ -1841,7 +1925,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, // The branch weight from SwitchBB to NextMBB is BranchWeightToNext. addSuccessorWithWeight(SwitchBB, NextMBB, BranchWeightToNext); - SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurDebugLoc(), + SDValue BrAnd = DAG.getNode(ISD::BRCOND, getCurSDLoc(), MVT::Other, getControlRoot(), Cmp, DAG.getBasicBlock(B.TargetBB)); @@ -1853,7 +1937,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, NextBlock = BBI; if (NextMBB != NextBlock) - BrAnd = DAG.getNode(ISD::BR, getCurDebugLoc(), MVT::Other, BrAnd, + BrAnd = DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, BrAnd, DAG.getBasicBlock(NextMBB)); DAG.setRoot(BrAnd); @@ -1885,7 +1969,7 @@ void SelectionDAGBuilder::visitInvoke(const InvokeInst &I) { addSuccessorWithWeight(InvokeMBB, LandingPad); // Drop into normal successor. - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Return))); } @@ -1904,28 +1988,29 @@ void SelectionDAGBuilder::visitLandingPad(const LandingPadInst &LP) { // If there aren't registers to copy the values into (e.g., during SjLj // exceptions), then don't bother to create these DAG nodes. - if (TLI.getExceptionPointerRegister() == 0 && - TLI.getExceptionSelectorRegister() == 0) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getExceptionPointerRegister() == 0 && + TLI->getExceptionSelectorRegister() == 0) return; SmallVector ValueVTs; - ComputeValueVTs(TLI, LP.getType(), ValueVTs); + ComputeValueVTs(*TLI, LP.getType(), ValueVTs); assert(ValueVTs.size() == 2 && "Only two-valued landingpads are supported"); // Get the two live-in registers as SDValues. The physregs have already been // copied into virtual registers. SDValue Ops[2]; Ops[0] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurDebugLoc(), - FuncInfo.ExceptionPointerVirtReg, TLI.getPointerTy()), - getCurDebugLoc(), ValueVTs[0]); + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionPointerVirtReg, TLI->getPointerTy()), + getCurSDLoc(), ValueVTs[0]); Ops[1] = DAG.getZExtOrTrunc( - DAG.getCopyFromReg(DAG.getEntryNode(), getCurDebugLoc(), - FuncInfo.ExceptionSelectorVirtReg, TLI.getPointerTy()), - getCurDebugLoc(), ValueVTs[1]); + DAG.getCopyFromReg(DAG.getEntryNode(), getCurSDLoc(), + FuncInfo.ExceptionSelectorVirtReg, TLI->getPointerTy()), + getCurSDLoc(), ValueVTs[1]); // Merge into one. - SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + SDValue Res = DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValueVTs[0], ValueVTs.size()), &Ops[0], 2); setValue(&LP, Res); @@ -1979,7 +2064,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, SDValue CondLHS = getValue(SV); EVT VT = CondLHS.getValueType(); - DebugLoc DL = getCurDebugLoc(); + SDLoc DL = getCurSDLoc(); SDValue Or = DAG.getNode(ISD::OR, DL, VT, CondLHS, DAG.getConstant(CommonBit, VT)); @@ -2030,12 +2115,11 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, // The last case block won't fall through into 'NextBlock' if we emit the // branches in this order. See if rearranging a case value would help. // We start at the bottom as it's the case with the least weight. - for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I){ + for (Case *I = &*(CR.Range.second-2), *E = &*CR.Range.first-1; I != E; --I) if (I->BB == NextBlock) { std::swap(*I, BackCase); break; } - } } // Create a CaseBlock record representing a conditional branch to @@ -2062,7 +2146,7 @@ bool SelectionDAGBuilder::handleSmallSwitchRange(CaseRec& CR, CC = ISD::SETEQ; LHS = SV; RHS = I->High; MHS = NULL; } else { - CC = ISD::SETCC_INVALID; + CC = ISD::SETLE; LHS = I->Low; MHS = SV; RHS = I->High; } @@ -2096,7 +2180,7 @@ static inline bool areJTsAllowed(const TargetLowering &TLI) { static APInt ComputeRange(const APInt &First, const APInt &Last) { uint32_t BitWidth = std::max(Last.getBitWidth(), First.getBitWidth()) + 1; - APInt LastExt = Last.zext(BitWidth), FirstExt = First.zext(BitWidth); + APInt LastExt = Last.sext(BitWidth), FirstExt = First.sext(BitWidth); return (LastExt - FirstExt + 1ULL); } @@ -2116,7 +2200,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) TSize += I->size(); - if (!areJTsAllowed(TLI) || TSize.ult(TLI.getMinimumJumpTableEntries())) + const TargetLowering *TLI = TM.getTargetLowering(); + if (!areJTsAllowed(*TLI) || TSize.ult(TLI->getMinimumJumpTableEntries())) return false; APInt Range = ComputeRange(First, Last); @@ -2162,7 +2247,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, const APInt &Low = cast(I->Low)->getValue(); const APInt &High = cast(I->High)->getValue(); - if (Low.ule(TEI) && TEI.ule(High)) { + if (Low.sle(TEI) && TEI.sle(High)) { DestBBs.push_back(I->BB); if (TEI==High) ++I; @@ -2177,7 +2262,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, for (CaseItr I = CR.Range.first, E = CR.Range.second; I != E; ++I) { DenseMap::iterator Itr = DestWeights.find(I->BB); - if (Itr != DestWeights.end()) + if (Itr != DestWeights.end()) Itr->second += I->ExtraWeight; else DestWeights[I->BB] = I->ExtraWeight; @@ -2197,7 +2282,7 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, } // Create a jump table index for this jump table. - unsigned JTEncoding = TLI.getJumpTableEncoding(); + unsigned JTEncoding = TLI->getJumpTableEncoding(); unsigned JTI = CurMF->getOrCreateJumpTableInfo(JTEncoding) ->createJumpTableIndex(DestBBs); @@ -2217,8 +2302,8 @@ bool SelectionDAGBuilder::handleJTSwitchCase(CaseRec &CR, bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, - MachineBasicBlock *Default, - MachineBasicBlock *SwitchBB) { + MachineBasicBlock* Default, + MachineBasicBlock* SwitchBB) { // Get the MachineFunction which holds the current MBB. This is used when // inserting any additional MBBs necessary to represent the switch. MachineFunction *CurMF = FuncInfo.MF; @@ -2282,7 +2367,9 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, LSize += J->size(); RSize -= J->size(); } - if (areJTsAllowed(TLI)) { + + const TargetLowering *TLI = TM.getTargetLowering(); + if (areJTsAllowed(*TLI)) { // If our case is dense we *really* should handle it earlier! assert((FMetric > 0) && "Should handle dense range earlier!"); } else { @@ -2334,7 +2421,7 @@ bool SelectionDAGBuilder::handleBTSplitSwitchCase(CaseRec& CR, // Create a CaseBlock record representing a conditional branch to // the LHS node if the value being switched on SV is less than C. // Otherwise, branch to LHS. - CaseBlock CB(ISD::SETULT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); + CaseBlock CB(ISD::SETLT, SV, C, NULL, TrueBB, FalseBB, CR.CaseBB); if (CR.CaseBB == SwitchBB) visitSwitchCase(CB, SwitchBB); @@ -2351,8 +2438,9 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, CaseRecVector& WorkList, const Value* SV, MachineBasicBlock* Default, - MachineBasicBlock *SwitchBB){ - EVT PTy = TLI.getPointerTy(); + MachineBasicBlock* SwitchBB) { + const TargetLowering *TLI = TM.getTargetLowering(); + EVT PTy = TLI->getPointerTy(); unsigned IntPtrBits = PTy.getSizeInBits(); Case& FrontCase = *CR.Range.first; @@ -2363,7 +2451,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, MachineFunction *CurMF = FuncInfo.MF; // If target does not have legal shift left, do not emit bit tests at all. - if (!TLI.isOperationLegal(ISD::SHL, TLI.getPointerTy())) + if (!TLI->isOperationLegal(ISD::SHL, PTy)) return false; size_t numCmps = 0; @@ -2406,7 +2494,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, // Optimize the case where all the case values fit in a // word without having to subtract minValue. In this case, // we can optimize away the subtraction. - if (maxValue.ult(IntPtrBits)) { + if (minValue.isNonNegative() && maxValue.slt(IntPtrBits)) { cmpRange = maxValue; } else { lowBound = minValue; @@ -2481,12 +2569,7 @@ bool SelectionDAGBuilder::handleBitTestsSwitchCase(CaseRec& CR, /// Clusterify - Transform simple list of Cases into list of CaseRange's size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const SwitchInst& SI) { - - /// Use a shorter form of declaration, and also - /// show the we want to use CRSBuilder as Clusterifier. - typedef IntegersSubsetMapping Clusterifier; - - Clusterifier TheClusterifier; + size_t numCmps = 0; BranchProbabilityInfo *BPI = FuncInfo.BPI; // Start with "simple" cases @@ -2495,27 +2578,40 @@ size_t SelectionDAGBuilder::Clusterify(CaseVector& Cases, const BasicBlock *SuccBB = i.getCaseSuccessor(); MachineBasicBlock *SMBB = FuncInfo.MBBMap[SuccBB]; - TheClusterifier.add(i.getCaseValueEx(), SMBB, - BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0); - } - - TheClusterifier.optimize(); - - size_t numCmps = 0; - for (Clusterifier::RangeIterator i = TheClusterifier.begin(), - e = TheClusterifier.end(); i != e; ++i, ++numCmps) { - Clusterifier::Cluster &C = *i; - // Update edge weight for the cluster. - unsigned W = C.first.Weight; - - // FIXME: Currently work with ConstantInt based numbers. - // Changing it to APInt based is a pretty heavy for this commit. - Cases.push_back(Case(C.first.getLow().toConstantInt(), - C.first.getHigh().toConstantInt(), C.second, W)); - - if (C.first.getLow() != C.first.getHigh()) - // A range counts double, since it requires two compares. - ++numCmps; + uint32_t ExtraWeight = + BPI ? BPI->getEdgeWeight(SI.getParent(), i.getSuccessorIndex()) : 0; + + Cases.push_back(Case(i.getCaseValue(), i.getCaseValue(), + SMBB, ExtraWeight)); + } + std::sort(Cases.begin(), Cases.end(), CaseCmp()); + + // Merge case into clusters + if (Cases.size() >= 2) + // Must recompute end() each iteration because it may be + // invalidated by erase if we hold on to it + for (CaseItr I = Cases.begin(), J = llvm::next(Cases.begin()); + J != Cases.end(); ) { + const APInt& nextValue = cast(J->Low)->getValue(); + const APInt& currentValue = cast(I->High)->getValue(); + MachineBasicBlock* nextBB = J->BB; + MachineBasicBlock* currentBB = I->BB; + + // If the two neighboring cases go to the same destination, merge them + // into a single case. + if ((nextValue - currentValue == 1) && (currentBB == nextBB)) { + I->High = J->High; + I->ExtraWeight += J->ExtraWeight; + J = Cases.erase(J); + } else { + I = J++; + } + } + + for (CaseItr I=Cases.begin(), E=Cases.end(); I!=E; ++I, ++numCmps) { + if (I->Low != I->High) + // A range counts double, since it requires two compares. + ++numCmps; } return numCmps; @@ -2549,7 +2645,7 @@ void SelectionDAGBuilder::visitSwitch(const SwitchInst &SI) { // If this is not a fall-through branch, emit the branch. SwitchMBB->addSuccessor(Default); if (Default != NextBlock) - DAG.setRoot(DAG.getNode(ISD::BR, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BR, getCurSDLoc(), MVT::Other, getControlRoot(), DAG.getBasicBlock(Default))); @@ -2616,7 +2712,7 @@ void SelectionDAGBuilder::visitIndirectBr(const IndirectBrInst &I) { addSuccessorWithWeight(IndirectBrMBB, Succ); } - DAG.setRoot(DAG.getNode(ISD::BRIND, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::BRIND, getCurSDLoc(), MVT::Other, getControlRoot(), getValue(I.getAddress()))); } @@ -2627,7 +2723,7 @@ void SelectionDAGBuilder::visitFSub(const User &I) { if (isa(I.getOperand(0)) && I.getOperand(0) == ConstantFP::getZeroValueForNegation(Ty)) { SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(ISD::FNEG, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::FNEG, getCurSDLoc(), Op2.getValueType(), Op2)); return; } @@ -2638,7 +2734,7 @@ void SelectionDAGBuilder::visitFSub(const User &I) { void SelectionDAGBuilder::visitBinary(const User &I, unsigned OpCode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - setValue(&I, DAG.getNode(OpCode, getCurDebugLoc(), + setValue(&I, DAG.getNode(OpCode, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); } @@ -2646,13 +2742,13 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = TM.getTargetLowering()->getShiftAmountTy(Op2.getValueType()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { unsigned ShiftSize = ShiftTy.getSizeInBits(); unsigned Op2Size = Op2.getValueType().getSizeInBits(); - DebugLoc DL = getCurDebugLoc(); + SDLoc DL = getCurSDLoc(); // If the operand is smaller than the shift count type, promote it. if (ShiftSize > Op2Size) @@ -2670,7 +2766,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { Op2 = DAG.getZExtOrTrunc(Op2, DL, MVT::i32); } - setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); } @@ -2684,9 +2780,10 @@ void SelectionDAGBuilder::visitSDiv(const User &I) { if (isa(&I) && cast(&I)->isExact() && !isa(Op1) && isa(Op2) && !cast(Op2)->isNullValue()) - setValue(&I, TLI.BuildExactSDIV(Op1, Op2, getCurDebugLoc(), DAG)); + setValue(&I, TM.getTargetLowering()->BuildExactSDIV(Op1, Op2, + getCurSDLoc(), DAG)); else - setValue(&I, DAG.getNode(ISD::SDIV, getCurDebugLoc(), Op1.getValueType(), + setValue(&I, DAG.getNode(ISD::SDIV, getCurSDLoc(), Op1.getValueType(), Op1, Op2)); } @@ -2700,8 +2797,8 @@ void SelectionDAGBuilder::visitICmp(const User &I) { SDValue Op2 = getValue(I.getOperand(1)); ISD::CondCode Opcode = getICmpCondCode(predicate); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Opcode)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Opcode)); } void SelectionDAGBuilder::visitFCmp(const User &I) { @@ -2715,13 +2812,13 @@ void SelectionDAGBuilder::visitFCmp(const User &I) { ISD::CondCode Condition = getFCmpCondCode(predicate); if (TM.Options.NoNaNsFPMath) Condition = getFCmpCodeWithoutNaN(Condition); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getSetCC(getCurDebugLoc(), DestVT, Op1, Op2, Condition)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getSetCC(getCurSDLoc(), DestVT, Op1, Op2, Condition)); } void SelectionDAGBuilder::visitSelect(const User &I) { SmallVector ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); + ComputeValueVTs(*TM.getTargetLowering(), I.getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -2733,7 +2830,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { ISD::VSELECT : ISD::SELECT; for (unsigned i = 0; i != NumValues; ++i) - Values[i] = DAG.getNode(OpCode, getCurDebugLoc(), + Values[i] = DAG.getNode(OpCode, getCurSDLoc(), TrueVal.getNode()->getValueType(TrueVal.getResNo()+i), Cond, SDValue(TrueVal.getNode(), @@ -2741,7 +2838,7 @@ void SelectionDAGBuilder::visitSelect(const User &I) { SDValue(FalseVal.getNode(), FalseVal.getResNo() + i)); - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValueVTs[0], NumValues), &Values[0], NumValues)); } @@ -2749,117 +2846,134 @@ void SelectionDAGBuilder::visitSelect(const User &I) { void SelectionDAGBuilder::visitTrunc(const User &I) { // TruncInst cannot be a no-op cast because sizeof(src) > sizeof(dest). SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitZExt(const User &I) { // ZExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // ZExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::ZERO_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitSExt(const User &I) { // SExt cannot be a no-op cast because sizeof(src) < sizeof(dest). // SExt also can't be a cast to bool for same reason. So, nothing much to do SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SIGN_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPTrunc(const User &I) { // FPTrunc is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurDebugLoc(), + const TargetLowering *TLI = TM.getTargetLowering(); + EVT DestVT = TLI->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_ROUND, getCurSDLoc(), DestVT, N, - DAG.getTargetConstant(0, TLI.getPointerTy()))); + DAG.getTargetConstant(0, TLI->getPointerTy()))); } -void SelectionDAGBuilder::visitFPExt(const User &I){ +void SelectionDAGBuilder::visitFPExt(const User &I) { // FPExt is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_EXTEND, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToUI(const User &I) { // FPToUI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_UINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitFPToSI(const User &I) { // FPToSI is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::FP_TO_SINT, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitUIToFP(const User &I) { // UIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::UINT_TO_FP, getCurSDLoc(), DestVT, N)); } -void SelectionDAGBuilder::visitSIToFP(const User &I){ +void SelectionDAGBuilder::visitSIToFP(const User &I) { // SIToFP is never a no-op cast, no need to check SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurDebugLoc(), DestVT, N)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getNode(ISD::SINT_TO_FP, getCurSDLoc(), DestVT, N)); } void SelectionDAGBuilder::visitPtrToInt(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitIntToPtr(const User &I) { // What to do depends on the size of the integer and the size of the pointer. // We can either truncate, zero extend, or no-op, accordingly. SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); - setValue(&I, DAG.getZExtOrTrunc(N, getCurDebugLoc(), DestVT)); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + setValue(&I, DAG.getZExtOrTrunc(N, getCurSDLoc(), DestVT)); } void SelectionDAGBuilder::visitBitCast(const User &I) { SDValue N = getValue(I.getOperand(0)); - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); // BitCast assures us that source and destination are the same size so this is // either a BITCAST or a no-op. if (DestVT != N.getValueType()) - setValue(&I, DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::BITCAST, getCurSDLoc(), DestVT, N)); // convert types. else setValue(&I, N); // noop cast. } +void SelectionDAGBuilder::visitAddrSpaceCast(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + const Value *SV = I.getOperand(0); + SDValue N = getValue(SV); + EVT DestVT = TM.getTargetLowering()->getValueType(I.getType()); + + unsigned SrcAS = SV->getType()->getPointerAddressSpace(); + unsigned DestAS = I.getType()->getPointerAddressSpace(); + + if (!TLI.isNoopAddrSpaceCast(SrcAS, DestAS)) + N = DAG.getAddrSpaceCast(getCurSDLoc(), DestVT, N, SrcAS, DestAS); + + setValue(&I, N); +} + void SelectionDAGBuilder::visitInsertElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); SDValue InVal = getValue(I.getOperand(1)); - SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), - getValue(I.getOperand(2))); - setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurDebugLoc(), - TLI.getValueType(I.getType()), + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(2)), + getCurSDLoc(), TLI.getVectorIdxTy()); + setValue(&I, DAG.getNode(ISD::INSERT_VECTOR_ELT, getCurSDLoc(), + TM.getTargetLowering()->getValueType(I.getType()), InVec, InVal, InIdx)); } void SelectionDAGBuilder::visitExtractElement(const User &I) { + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); SDValue InVec = getValue(I.getOperand(0)); - SDValue InIdx = DAG.getNode(ISD::ZERO_EXTEND, getCurDebugLoc(), - TLI.getPointerTy(), - getValue(I.getOperand(1))); - setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - TLI.getValueType(I.getType()), InVec, InIdx)); + SDValue InIdx = DAG.getSExtOrTrunc(getValue(I.getOperand(1)), + getCurSDLoc(), TLI.getVectorIdxTy()); + setValue(&I, DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), + TM.getTargetLowering()->getValueType(I.getType()), + InVec, InIdx)); } // Utility for visitShuffleVector - Return true if every element in Mask, @@ -2880,13 +2994,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SmallVector Mask; ShuffleVectorInst::getShuffleMask(cast(I.getOperand(2)), Mask); unsigned MaskNumElts = Mask.size(); - - EVT VT = TLI.getValueType(I.getType()); + + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getType()); EVT SrcVT = Src1.getValueType(); unsigned SrcNumElts = SrcVT.getVectorNumElements(); if (SrcNumElts == MaskNumElts) { - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, &Mask[0])); return; } @@ -2901,7 +3016,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (isSequentialInRange(Mask, 0, SrcNumElts, 0) && isSequentialInRange(Mask, SrcNumElts, SrcNumElts, SrcNumElts)) { // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), VT, Src1, Src2)); return; } @@ -2909,7 +3024,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (isSequentialInRange(Mask, 0, SrcNumElts, SrcNumElts) && isSequentialInRange(Mask, SrcNumElts, SrcNumElts, 0)) { // The shuffle is concatenating two vectors together. - setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::CONCAT_VECTORS, getCurSDLoc(), VT, Src2, Src1)); return; } @@ -2927,10 +3042,10 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MOps2[0] = Src2; Src1 = Src1U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurDebugLoc(), VT, + getCurSDLoc(), VT, &MOps1[0], NumConcat); Src2 = Src2U ? DAG.getUNDEF(VT) : DAG.getNode(ISD::CONCAT_VECTORS, - getCurDebugLoc(), VT, + getCurSDLoc(), VT, &MOps2[0], NumConcat); // Readjust mask for new input vector length. @@ -2942,7 +3057,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MappedOps.push_back(Idx); } - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, &MappedOps[0])); return; } @@ -3002,8 +3117,9 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { if (RangeUse[Input] == 0) Src = DAG.getUNDEF(VT); else - Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurDebugLoc(), VT, - Src, DAG.getIntPtrConstant(StartIdx[Input])); + Src = DAG.getNode(ISD::EXTRACT_SUBVECTOR, getCurSDLoc(), VT, + Src, DAG.getConstant(StartIdx[Input], + TLI->getVectorIdxTy())); } // Calculate new mask. @@ -3019,7 +3135,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { MappedOps.push_back(Idx); } - setValue(&I, DAG.getVectorShuffle(VT, getCurDebugLoc(), Src1, Src2, + setValue(&I, DAG.getVectorShuffle(VT, getCurSDLoc(), Src1, Src2, &MappedOps[0])); return; } @@ -3029,7 +3145,7 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { // replacing the shuffle with extract and build vector. // to insert and build vector. EVT EltVT = VT.getVectorElementType(); - EVT PtrVT = TLI.getPointerTy(); + EVT IdxVT = TLI->getVectorIdxTy(); SmallVector Ops; for (unsigned i = 0; i != MaskNumElts; ++i) { int Idx = Mask[i]; @@ -3041,14 +3157,14 @@ void SelectionDAGBuilder::visitShuffleVector(const User &I) { SDValue &Src = Idx < (int)SrcNumElts ? Src1 : Src2; if (Idx >= (int)SrcNumElts) Idx -= SrcNumElts; - Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurDebugLoc(), - EltVT, Src, DAG.getConstant(Idx, PtrVT)); + Res = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, getCurSDLoc(), + EltVT, Src, DAG.getConstant(Idx, IdxVT)); } Ops.push_back(Res); } - setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::BUILD_VECTOR, getCurSDLoc(), VT, &Ops[0], Ops.size())); } @@ -3062,10 +3178,11 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + const TargetLowering *TLI = TM.getTargetLowering(); SmallVector AggValueVTs; - ComputeValueVTs(TLI, AggTy, AggValueVTs); + ComputeValueVTs(*TLI, AggTy, AggValueVTs); SmallVector ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(*TLI, ValTy, ValValueVTs); unsigned NumAggValues = AggValueVTs.size(); unsigned NumValValues = ValValueVTs.size(); @@ -3089,7 +3206,7 @@ void SelectionDAGBuilder::visitInsertValue(const InsertValueInst &I) { Values[i] = IntoUndef ? DAG.getUNDEF(AggValueVTs[i]) : SDValue(Agg.getNode(), Agg.getResNo() + i); - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&AggValueVTs[0], NumAggValues), &Values[0], NumAggValues)); } @@ -3102,8 +3219,9 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { unsigned LinearIndex = ComputeLinearIndex(AggTy, I.getIndices()); + const TargetLowering *TLI = TM.getTargetLowering(); SmallVector ValValueVTs; - ComputeValueVTs(TLI, ValTy, ValValueVTs); + ComputeValueVTs(*TLI, ValTy, ValValueVTs); unsigned NumValValues = ValValueVTs.size(); @@ -3123,16 +3241,18 @@ void SelectionDAGBuilder::visitExtractValue(const ExtractValueInst &I) { DAG.getUNDEF(Agg.getNode()->getValueType(Agg.getResNo() + i)) : SDValue(Agg.getNode(), Agg.getResNo() + i); - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValValueVTs[0], NumValValues), &Values[0], NumValValues)); } void SelectionDAGBuilder::visitGetElementPtr(const User &I) { - SDValue N = getValue(I.getOperand(0)); + Value *Op0 = I.getOperand(0); // Note that the pointer operand may be a vector of pointers. Take the scalar // element which holds a pointer. - Type *Ty = I.getOperand(0)->getType()->getScalarType(); + Type *Ty = Op0->getType()->getScalarType(); + unsigned AS = Ty->getPointerAddressSpace(); + SDValue N = getValue(Op0); for (GetElementPtrInst::const_op_iterator OI = I.op_begin()+1, E = I.op_end(); OI != E; ++OI) { @@ -3142,7 +3262,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { if (Field) { // N = N + Offset uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, DAG.getConstant(Offset, N.getValueType())); } @@ -3151,50 +3271,50 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { Ty = cast(Ty)->getElementType(); // If this is a constant subscript, handle it quickly. + const TargetLowering *TLI = TM.getTargetLowering(); if (const ConstantInt *CI = dyn_cast(Idx)) { if (CI->isZero()) continue; uint64_t Offs = TD->getTypeAllocSize(Ty)*cast(CI)->getSExtValue(); SDValue OffsVal; - EVT PTy = TLI.getPointerTy(); + EVT PTy = TLI->getPointerTy(AS); unsigned PtrBits = PTy.getSizeInBits(); if (PtrBits < 64) - OffsVal = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), - TLI.getPointerTy(), + OffsVal = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), PTy, DAG.getConstant(Offs, MVT::i64)); else - OffsVal = DAG.getIntPtrConstant(Offs); + OffsVal = DAG.getConstant(Offs, PTy); - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, OffsVal); continue; } // N = N + Idx * ElementSize; - APInt ElementSize = APInt(TLI.getPointerTy().getSizeInBits(), + APInt ElementSize = APInt(TLI->getPointerSizeInBits(AS), TD->getTypeAllocSize(Ty)); SDValue IdxN = getValue(Idx); // If the index is smaller or larger than intptr_t, truncate or extend // it. - IdxN = DAG.getSExtOrTrunc(IdxN, getCurDebugLoc(), N.getValueType()); + IdxN = DAG.getSExtOrTrunc(IdxN, getCurSDLoc(), N.getValueType()); // If this is a multiply by a power of two, turn it into a shl // immediately. This is a very common case. if (ElementSize != 1) { if (ElementSize.isPowerOf2()) { unsigned Amt = ElementSize.logBase2(); - IdxN = DAG.getNode(ISD::SHL, getCurDebugLoc(), + IdxN = DAG.getNode(ISD::SHL, getCurSDLoc(), N.getValueType(), IdxN, DAG.getConstant(Amt, IdxN.getValueType())); } else { SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); - IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), + IdxN = DAG.getNode(ISD::MUL, getCurSDLoc(), N.getValueType(), IdxN, Scale); } } - N = DAG.getNode(ISD::ADD, getCurDebugLoc(), + N = DAG.getNode(ISD::ADD, getCurSDLoc(), N.getValueType(), N, IdxN); } } @@ -3209,18 +3329,19 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { return; // getValue will auto-populate this. Type *Ty = I.getAllocatedType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); + const TargetLowering *TLI = TM.getTargetLowering(); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); unsigned Align = - std::max((unsigned)TLI.getDataLayout()->getPrefTypeAlignment(Ty), + std::max((unsigned)TLI->getDataLayout()->getPrefTypeAlignment(Ty), I.getAlignment()); SDValue AllocSize = getValue(I.getArraySize()); - EVT IntPtr = TLI.getPointerTy(); + EVT IntPtr = TLI->getPointerTy(); if (AllocSize.getValueType() != IntPtr) - AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurDebugLoc(), IntPtr); + AllocSize = DAG.getZExtOrTrunc(AllocSize, getCurSDLoc(), IntPtr); - AllocSize = DAG.getNode(ISD::MUL, getCurDebugLoc(), IntPtr, + AllocSize = DAG.getNode(ISD::MUL, getCurSDLoc(), IntPtr, AllocSize, DAG.getConstant(TySize, IntPtr)); @@ -3233,18 +3354,18 @@ void SelectionDAGBuilder::visitAlloca(const AllocaInst &I) { // Round the size of the allocation up to the stack alignment size // by add SA-1 to the size. - AllocSize = DAG.getNode(ISD::ADD, getCurDebugLoc(), + AllocSize = DAG.getNode(ISD::ADD, getCurSDLoc(), AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(StackAlign-1)); // Mask out the low bits for alignment purposes. - AllocSize = DAG.getNode(ISD::AND, getCurDebugLoc(), + AllocSize = DAG.getNode(ISD::AND, getCurSDLoc(), AllocSize.getValueType(), AllocSize, DAG.getIntPtrConstant(~(uint64_t)(StackAlign-1))); SDValue Ops[] = { getRoot(), AllocSize, DAG.getIntPtrConstant(Align) }; SDVTList VTs = DAG.getVTList(AllocSize.getValueType(), MVT::Other); - SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurDebugLoc(), + SDValue DSA = DAG.getNode(ISD::DYNAMIC_STACKALLOC, getCurSDLoc(), VTs, Ops, 3); setValue(&I, DSA); DAG.setRoot(DSA.getValue(1)); @@ -3272,7 +3393,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { SmallVector ValueVTs; SmallVector Offsets; - ComputeValueVTs(TLI, Ty, ValueVTs, &Offsets); + ComputeValueVTs(*TM.getTargetLowering(), Ty, ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3306,15 +3427,15 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { // (MaxParallelChains should always remain as failsafe). if (ChainI == MaxParallelChains) { assert(PendingLoads.empty() && "PendingLoads must be serialized first"); - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); Root = Chain; ChainI = 0; } - SDValue A = DAG.getNode(ISD::ADD, getCurDebugLoc(), + SDValue A = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(ValueVTs[i], getCurDebugLoc(), Root, + SDValue L = DAG.getLoad(ValueVTs[i], getCurSDLoc(), Root, A, MachinePointerInfo(SV, Offsets[i]), isVolatile, isNonTemporal, isInvariant, Alignment, TBAAInfo, Ranges); @@ -3324,7 +3445,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { } if (!ConstantMemory) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); if (isVolatile) DAG.setRoot(Chain); @@ -3332,7 +3453,7 @@ void SelectionDAGBuilder::visitLoad(const LoadInst &I) { PendingLoads.push_back(Chain); } - setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&ValueVTs[0], NumValues), &Values[0], NumValues)); } @@ -3346,7 +3467,7 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { SmallVector ValueVTs; SmallVector Offsets; - ComputeValueVTs(TLI, SrcV->getType(), ValueVTs, &Offsets); + ComputeValueVTs(*TM.getTargetLowering(), SrcV->getType(), ValueVTs, &Offsets); unsigned NumValues = ValueVTs.size(); if (NumValues == 0) return; @@ -3370,30 +3491,28 @@ void SelectionDAGBuilder::visitStore(const StoreInst &I) { for (unsigned i = 0; i != NumValues; ++i, ++ChainI) { // See visitLoad comments. if (ChainI == MaxParallelChains) { - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); Root = Chain; ChainI = 0; } - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, Ptr, + SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, Ptr, DAG.getConstant(Offsets[i], PtrVT)); - SDValue St = DAG.getStore(Root, getCurDebugLoc(), + SDValue St = DAG.getStore(Root, getCurSDLoc(), SDValue(Src.getNode(), Src.getResNo() + i), Add, MachinePointerInfo(PtrV, Offsets[i]), isVolatile, isNonTemporal, Alignment, TBAAInfo); Chains[ChainI] = St; } - SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue StoreNode = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], ChainI); - ++SDNodeOrder; - AssignOrderingToNode(StoreNode.getNode()); DAG.setRoot(StoreNode); } static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, SynchronizationScope Scope, - bool Before, DebugLoc dl, + bool Before, SDLoc dl, SelectionDAG &DAG, const TargetLowering &TLI) { // Fence, if necessary @@ -3416,39 +3535,40 @@ static SDValue InsertFenceForAtomic(SDValue Chain, AtomicOrdering Order, } void SelectionDAGBuilder::visitAtomicCmpXchg(const AtomicCmpXchgInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); - if (TLI.getInsertFencesForAtomic()) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue L = DAG.getAtomic(ISD::ATOMIC_CMP_SWAP, dl, - getValue(I.getCompareOperand()).getValueType().getSimpleVT(), + getValue(I.getCompareOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getCompareOperand()), getValue(I.getNewValOperand()), MachinePointerInfo(I.getPointerOperand()), 0 /* Alignment */, - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); ISD::NodeType NT; switch (I.getOperation()) { default: llvm_unreachable("Unknown atomicrmw operation"); @@ -3469,47 +3589,50 @@ void SelectionDAGBuilder::visitAtomicRMW(const AtomicRMWInst &I) { SDValue InChain = getRoot(); - if (TLI.getInsertFencesForAtomic()) + const TargetLowering *TLI = TM.getTargetLowering(); + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue L = DAG.getAtomic(NT, dl, - getValue(I.getValOperand()).getValueType().getSimpleVT(), + getValue(I.getValOperand()).getSimpleValueType(), InChain, getValue(I.getPointerOperand()), getValue(I.getValOperand()), I.getPointerOperand(), 0 /* Alignment */, - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitFence(const FenceInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); + const TargetLowering *TLI = TM.getTargetLowering(); SDValue Ops[3]; Ops[0] = getRoot(); - Ops[1] = DAG.getConstant(I.getOrdering(), TLI.getPointerTy()); - Ops[2] = DAG.getConstant(I.getSynchScope(), TLI.getPointerTy()); + Ops[1] = DAG.getConstant(I.getOrdering(), TLI->getPointerTy()); + Ops[2] = DAG.getConstant(I.getSynchScope(), TLI->getPointerTy()); DAG.setRoot(DAG.getNode(ISD::ATOMIC_FENCE, dl, MVT::Other, Ops, 3)); } void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); - EVT VT = TLI.getValueType(I.getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); @@ -3518,35 +3641,36 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { DAG.getAtomic(ISD::ATOMIC_LOAD, dl, VT, VT, InChain, getValue(I.getPointerOperand()), I.getPointerOperand(), I.getAlignment(), - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); SDValue OutChain = L.getValue(1); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); setValue(&I, L); DAG.setRoot(OutChain); } void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { - DebugLoc dl = getCurDebugLoc(); + SDLoc dl = getCurSDLoc(); AtomicOrdering Order = I.getOrdering(); SynchronizationScope Scope = I.getSynchScope(); SDValue InChain = getRoot(); - EVT VT = TLI.getValueType(I.getValueOperand()->getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + EVT VT = TLI->getValueType(I.getValueOperand()->getType()); if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) InChain = InsertFenceForAtomic(InChain, Order, Scope, true, dl, - DAG, TLI); + DAG, *TLI); SDValue OutChain = DAG.getAtomic(ISD::ATOMIC_STORE, dl, VT, @@ -3554,12 +3678,12 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { getValue(I.getPointerOperand()), getValue(I.getValueOperand()), I.getPointerOperand(), I.getAlignment(), - TLI.getInsertFencesForAtomic() ? Monotonic : Order, + TLI->getInsertFencesForAtomic() ? Monotonic : Order, Scope); - if (TLI.getInsertFencesForAtomic()) + if (TLI->getInsertFencesForAtomic()) OutChain = InsertFenceForAtomic(OutChain, Order, Scope, false, dl, - DAG, TLI); + DAG, *TLI); DAG.setRoot(OutChain); } @@ -3584,12 +3708,13 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, // Info is set by getTgtMemInstrinsic TargetLowering::IntrinsicInfo Info; - bool IsTgtIntrinsic = TLI.getTgtMemIntrinsic(Info, I, Intrinsic); + const TargetLowering *TLI = TM.getTargetLowering(); + bool IsTgtIntrinsic = TLI->getTgtMemIntrinsic(Info, I, Intrinsic); // Add the intrinsic ID as an integer operand if it's not a target intrinsic. if (!IsTgtIntrinsic || Info.opc == ISD::INTRINSIC_VOID || Info.opc == ISD::INTRINSIC_W_CHAIN) - Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI.getPointerTy())); + Ops.push_back(DAG.getTargetConstant(Intrinsic, TLI->getPointerTy())); // Add all operands of the call to the operand list. for (unsigned i = 0, e = I.getNumArgOperands(); i != e; ++i) { @@ -3598,7 +3723,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, } SmallVector ValueVTs; - ComputeValueVTs(TLI, I.getType(), ValueVTs); + ComputeValueVTs(*TLI, I.getType(), ValueVTs); if (HasChain) ValueVTs.push_back(MVT::Other); @@ -3609,20 +3734,20 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, SDValue Result; if (IsTgtIntrinsic) { // This is target intrinsic that touches memory - Result = DAG.getMemIntrinsicNode(Info.opc, getCurDebugLoc(), + Result = DAG.getMemIntrinsicNode(Info.opc, getCurSDLoc(), VTs, &Ops[0], Ops.size(), Info.memVT, MachinePointerInfo(Info.ptrVal, Info.offset), Info.align, Info.vol, Info.readMem, Info.writeMem); } else if (!HasChain) { - Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurDebugLoc(), + Result = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, getCurSDLoc(), VTs, &Ops[0], Ops.size()); } else if (!I.getType()->isVoidTy()) { - Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurDebugLoc(), + Result = DAG.getNode(ISD::INTRINSIC_W_CHAIN, getCurSDLoc(), VTs, &Ops[0], Ops.size()); } else { - Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurDebugLoc(), + Result = DAG.getNode(ISD::INTRINSIC_VOID, getCurSDLoc(), VTs, &Ops[0], Ops.size()); } @@ -3636,17 +3761,11 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, if (!I.getType()->isVoidTy()) { if (VectorType *PTy = dyn_cast(I.getType())) { - EVT VT = TLI.getValueType(PTy); - Result = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), VT, Result); + EVT VT = TLI->getValueType(PTy); + Result = DAG.getNode(ISD::BITCAST, getCurSDLoc(), VT, Result); } setValue(&I, Result); - } else { - // Assign order to result here. If the intrinsic does not produce a result, - // it won't be mapped to a SDNode and visit() will not assign it an order - // number. - ++SDNodeOrder; - AssignOrderingToNode(Result.getNode()); } } @@ -3657,7 +3776,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, /// /// where Op is the hexadecimal representation of floating point value. static SDValue -GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { +GetSignificand(SelectionDAG &DAG, SDValue Op, SDLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x007fffff, MVT::i32)); SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1, @@ -3672,7 +3791,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { /// where Op is the hexadecimal representation of floating point value. static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, - DebugLoc dl) { + SDLoc dl) { SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, DAG.getConstant(0x7f800000, MVT::i32)); SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0, @@ -3691,7 +3810,7 @@ getF32Constant(SelectionDAG &DAG, unsigned Flt) { /// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3794,7 +3913,7 @@ static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3890,7 +4009,7 @@ static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -3985,7 +4104,7 @@ static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandLog10(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -4073,7 +4192,7 @@ static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. -static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, +static SDValue expandExp2(SDLoc dl, SDValue Op, SelectionDAG &DAG, const TargetLowering &TLI) { if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { @@ -4168,10 +4287,10 @@ static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. -static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS, +static SDValue expandPow(SDLoc dl, SDValue LHS, SDValue RHS, SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; - if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 && + if (LHS.getValueType() == MVT::f32 && RHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { if (ConstantFPSDNode *LHSC = dyn_cast(LHS)) { APFloat Ten(10.0f); @@ -4276,7 +4395,7 @@ static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS, /// ExpandPowI - Expand a llvm.powi intrinsic. -static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, +static SDValue ExpandPowI(SDLoc DL, SDValue LHS, SDValue RHS, SelectionDAG &DAG) { // If RHS is a constant, we can expand this out to a multiplication tree, // otherwise we end up lowering to a call to __powidf2 (for example). When @@ -4335,7 +4454,8 @@ static unsigned getTruncatedArgReg(const SDValue &N) { return 0; const SDValue &Ext = N.getOperand(0); - if (Ext.getOpcode() == ISD::AssertZext || Ext.getOpcode() == ISD::AssertSext){ + if (Ext.getOpcode() == ISD::AssertZext || + Ext.getOpcode() == ISD::AssertSext) { const SDValue &CFR = Ext.getOperand(0); if (CFR.getOpcode() == ISD::CopyFromReg) return cast(CFR.getOperand(1))->getReg(); @@ -4358,20 +4478,19 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, MachineFunction &MF = DAG.getMachineFunction(); const TargetInstrInfo *TII = DAG.getTarget().getInstrInfo(); - const TargetRegisterInfo *TRI = DAG.getTarget().getRegisterInfo(); // Ignore inlined function arguments here. DIVariable DV(Variable); if (DV.isInlinedFnArgument(MF.getFunction())) return false; - unsigned Reg = 0; + Optional Op; // Some arguments' frame index is recorded during argument lowering. - Offset = FuncInfo.getArgumentFrameIndex(Arg); - if (Offset) - Reg = TRI->getFrameRegister(MF); + if (int FI = FuncInfo.getArgumentFrameIndex(Arg)) + Op = MachineOperand::CreateFI(FI); - if (!Reg && N.getNode()) { + if (!Op && N.getNode()) { + unsigned Reg; if (N.getOpcode() == ISD::CopyFromReg) Reg = cast(N.getOperand(1))->getReg(); else @@ -4382,32 +4501,39 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, if (PR) Reg = PR; } + if (Reg) + Op = MachineOperand::CreateReg(Reg, false); } - if (!Reg) { + if (!Op) { // Check if ValueMap has reg number. DenseMap::iterator VMI = FuncInfo.ValueMap.find(V); if (VMI != FuncInfo.ValueMap.end()) - Reg = VMI->second; + Op = MachineOperand::CreateReg(VMI->second, false); } - if (!Reg && N.getNode()) { + if (!Op && N.getNode()) // Check if frame index is available. if (LoadSDNode *LNode = dyn_cast(N.getNode())) if (FrameIndexSDNode *FINode = - dyn_cast(LNode->getBasePtr().getNode())) { - Reg = TRI->getFrameRegister(MF); - Offset = FINode->getIndex(); - } - } + dyn_cast(LNode->getBasePtr().getNode())) + Op = MachineOperand::CreateFI(FINode->getIndex()); - if (!Reg) + if (!Op) return false; - MachineInstrBuilder MIB = BuildMI(MF, getCurDebugLoc(), - TII->get(TargetOpcode::DBG_VALUE)) - .addReg(Reg, RegState::Debug).addImm(Offset).addMetadata(Variable); - FuncInfo.ArgDbgValues.push_back(&*MIB); + // FIXME: This does not handle register-indirect values at offset 0. + bool IsIndirect = Offset != 0; + if (Op->isReg()) + FuncInfo.ArgDbgValues.push_back(BuildMI(MF, getCurDebugLoc(), + TII->get(TargetOpcode::DBG_VALUE), + IsIndirect, + Op->getReg(), Offset, Variable)); + else + FuncInfo.ArgDbgValues.push_back( + BuildMI(MF, getCurDebugLoc(), TII->get(TargetOpcode::DBG_VALUE)) + .addOperand(*Op).addImm(Offset).addMetadata(Variable)); + return true; } @@ -4424,6 +4550,8 @@ SelectionDAGBuilder::EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, /// otherwise lower it and return null. const char * SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { + const TargetLowering *TLI = TM.getTargetLowering(); + SDLoc sdl = getCurSDLoc(); DebugLoc dl = getCurDebugLoc(); SDValue Res; @@ -4436,17 +4564,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::vaend: visitVAEnd(I); return 0; case Intrinsic::vacopy: visitVACopy(I); return 0; case Intrinsic::returnaddress: - setValue(&I, DAG.getNode(ISD::RETURNADDR, dl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::RETURNADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; case Intrinsic::frameaddress: - setValue(&I, DAG.getNode(ISD::FRAMEADDR, dl, TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::FRAMEADDR, sdl, TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; case Intrinsic::setjmp: - return &"_setjmp"[!TLI.usesUnderscoreSetJmp()]; + return &"_setjmp"[!TLI->usesUnderscoreSetJmp()]; case Intrinsic::longjmp: - return &"_longjmp"[!TLI.usesUnderscoreLongJmp()]; + return &"_longjmp"[!TLI->usesUnderscoreLongJmp()]; case Intrinsic::memcpy: { // Assert for address < 256 since we support only user defined address // spaces. @@ -4462,7 +4590,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, + DAG.setRoot(DAG.getMemcpy(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, false, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); return 0; @@ -4480,7 +4608,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + DAG.setRoot(DAG.getMemset(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)))); return 0; } @@ -4499,7 +4627,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!Align) Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); - DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, + DAG.setRoot(DAG.getMemmove(getRoot(), sdl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)), MachinePointerInfo(I.getArgOperand(1)))); return 0; @@ -4508,17 +4636,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { const DbgDeclareInst &DI = cast(I); MDNode *Variable = DI.getVariable(); const Value *Address = DI.getAddress(); - if (!Address || !DIVariable(Variable).Verify()) { + DIVariable DIVar(Variable); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgDeclareInst should be either null or a DIVariable."); + if (!Address || !DIVar) { DEBUG(dbgs() << "Dropping debug info for " << DI << "\n"); return 0; } - // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder - // but do not always have a corresponding SDNode built. The SDNodeOrder - // absolute, but not relative, values are different depending on whether - // debug info exists. - ++SDNodeOrder; - // Check if address has undef value. if (isa(Address) || (Address->use_empty() && !isa(Address))) { @@ -4589,7 +4714,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } case Intrinsic::dbg_value: { const DbgValueInst &DI = cast(I); - if (!DIVariable(DI.getVariable()).Verify()) + DIVariable DIVar(DI.getVariable()); + assert((!DIVar || DIVar.isVariable()) && + "Variable in DbgValueInst should be either null or a DIVariable."); + if (!DIVar) return 0; MDNode *Variable = DI.getVariable(); @@ -4598,11 +4726,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { if (!V) return 0; - // Build an entry in DbgOrdering. Debug info input nodes get an SDNodeOrder - // but do not always have a corresponding SDNode built. The SDNodeOrder - // absolute, but not relative, values are different depending on whether - // debug info exists. - ++SDNodeOrder; SDDbgValue *SDV; if (isa(V) || isa(V) || isa(V)) { SDV = DAG.getDbgValue(Variable, V, Offset, dl, SDNodeOrder); @@ -4666,7 +4789,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::eh_return_i32: case Intrinsic::eh_return_i64: DAG.getMachineFunction().getMMI().setCallsEHReturn(true); - DAG.setRoot(DAG.getNode(ISD::EH_RETURN, dl, + DAG.setRoot(DAG.getNode(ISD::EH_RETURN, sdl, MVT::Other, getControlRoot(), getValue(I.getArgOperand(0)), @@ -4676,17 +4799,17 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { DAG.getMachineFunction().getMMI().setCallsUnwindInit(true); return 0; case Intrinsic::eh_dwarf_cfa: { - SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), dl, - TLI.getPointerTy()); - SDValue Offset = DAG.getNode(ISD::ADD, dl, - TLI.getPointerTy(), - DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, dl, - TLI.getPointerTy()), + SDValue CfaArg = DAG.getSExtOrTrunc(getValue(I.getArgOperand(0)), sdl, + TLI->getPointerTy()); + SDValue Offset = DAG.getNode(ISD::ADD, sdl, + CfaArg.getValueType(), + DAG.getNode(ISD::FRAME_TO_ARGS_OFFSET, sdl, + CfaArg.getValueType()), CfaArg); - SDValue FA = DAG.getNode(ISD::FRAMEADDR, dl, - TLI.getPointerTy(), - DAG.getConstant(0, TLI.getPointerTy())); - setValue(&I, DAG.getNode(ISD::ADD, dl, TLI.getPointerTy(), + SDValue FA = DAG.getNode(ISD::FRAMEADDR, sdl, + TLI->getPointerTy(), + DAG.getConstant(0, TLI->getPointerTy())); + setValue(&I, DAG.getNode(ISD::ADD, sdl, FA.getValueType(), FA, Offset)); return 0; } @@ -4712,7 +4835,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); Ops[1] = getValue(I.getArgOperand(0)); - SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, dl, + SDValue Op = DAG.getNode(ISD::EH_SJLJ_SETJMP, sdl, DAG.getVTList(MVT::i32, MVT::Other), Ops, 2); setValue(&I, Op.getValue(0)); @@ -4720,7 +4843,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::eh_sjlj_longjmp: { - DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, dl, MVT::Other, + DAG.setRoot(DAG.getNode(ISD::EH_SJLJ_LONGJMP, sdl, MVT::Other, getRoot(), getValue(I.getArgOperand(0)))); return 0; } @@ -4775,10 +4898,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); - ShAmt = DAG.getNode(ISD::BUILD_VECTOR, dl, ShAmtVT, &ShOps[0], 2); - EVT DestVT = TLI.getValueType(I.getType()); - ShAmt = DAG.getNode(ISD::BITCAST, dl, DestVT, ShAmt); - Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, DestVT, + ShAmt = DAG.getNode(ISD::BUILD_VECTOR, sdl, ShAmtVT, &ShOps[0], 2); + EVT DestVT = TLI->getValueType(I.getType()); + ShAmt = DAG.getNode(ISD::BITCAST, sdl, DestVT, ShAmt); + Res = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, sdl, DestVT, DAG.getConstant(NewIntrinsic, MVT::i32), getValue(I.getArgOperand(0)), ShAmt); setValue(&I, Res); @@ -4788,14 +4911,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vinsertf128_ps_256: case Intrinsic::x86_avx_vinsertf128_si_256: case Intrinsic::x86_avx2_vinserti128: { - EVT DestVT = TLI.getValueType(I.getType()); - EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); + EVT DestVT = TLI->getValueType(I.getType()); + EVT ElVT = TLI->getValueType(I.getArgOperand(1)->getType()); uint64_t Idx = (cast(I.getArgOperand(2))->getZExtValue() & 1) * ElVT.getVectorNumElements(); - Res = DAG.getNode(ISD::INSERT_SUBVECTOR, dl, DestVT, + Res = DAG.getNode(ISD::INSERT_SUBVECTOR, sdl, DestVT, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); return 0; } @@ -4803,12 +4926,12 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vextractf128_ps_256: case Intrinsic::x86_avx_vextractf128_si_256: case Intrinsic::x86_avx2_vextracti128: { - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI->getValueType(I.getType()); uint64_t Idx = (cast(I.getArgOperand(1))->getZExtValue() & 1) * DestVT.getVectorNumElements(); - Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DestVT, + Res = DAG.getNode(ISD::EXTRACT_SUBVECTOR, sdl, DestVT, getValue(I.getArgOperand(0)), - DAG.getIntPtrConstant(Idx)); + DAG.getConstant(Idx, TLI->getVectorIdxTy())); setValue(&I, Res); return 0; } @@ -4834,9 +4957,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::convertus: Code = ISD::CVT_US; break; case Intrinsic::convertuu: Code = ISD::CVT_UU; break; } - EVT DestVT = TLI.getValueType(I.getType()); + EVT DestVT = TLI->getValueType(I.getType()); const Value *Op1 = I.getArgOperand(0); - Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1), + Res = DAG.getConvertRndSat(DestVT, sdl, getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), getValue(I.getArgOperand(1)), @@ -4846,27 +4969,27 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::powi: - setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), + setValue(&I, ExpandPowI(sdl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); return 0; case Intrinsic::log: - setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::log2: - setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::log10: - setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandLog10(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::exp: - setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::exp2: - setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); + setValue(&I, expandExp2(sdl, getValue(I.getArgOperand(0)), DAG, *TLI)); return 0; case Intrinsic::pow: - setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1)), DAG, TLI)); + setValue(&I, expandPow(sdl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG, *TLI)); return 0; case Intrinsic::sqrt: case Intrinsic::fabs: @@ -4876,7 +4999,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::ceil: case Intrinsic::trunc: case Intrinsic::rint: - case Intrinsic::nearbyint: { + case Intrinsic::nearbyint: + case Intrinsic::round: { unsigned Opcode; switch (Intrinsic) { default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. @@ -4889,35 +5013,42 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; case Intrinsic::rint: Opcode = ISD::FRINT; break; case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + case Intrinsic::round: Opcode = ISD::FROUND; break; } - setValue(&I, DAG.getNode(Opcode, dl, + setValue(&I, DAG.getNode(Opcode, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return 0; } + case Intrinsic::copysign: + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, sdl, + getValue(I.getArgOperand(0)).getValueType(), + getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)))); + return 0; case Intrinsic::fma: - setValue(&I, DAG.getNode(ISD::FMA, dl, + setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); return 0; case Intrinsic::fmuladd: { - EVT VT = TLI.getValueType(I.getType()); + EVT VT = TLI->getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isFMAFasterThanMulAndAdd(VT)){ - setValue(&I, DAG.getNode(ISD::FMA, dl, + TLI->isFMAFasterThanFMulAndFAdd(VT)) { + setValue(&I, DAG.getNode(ISD::FMA, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), getValue(I.getArgOperand(2)))); } else { - SDValue Mul = DAG.getNode(ISD::FMUL, dl, + SDValue Mul = DAG.getNode(ISD::FMUL, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1))); - SDValue Add = DAG.getNode(ISD::FADD, dl, + SDValue Add = DAG.getNode(ISD::FADD, sdl, getValue(I.getArgOperand(0)).getValueType(), Mul, getValue(I.getArgOperand(2))); @@ -4926,21 +5057,21 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::convert_to_fp16: - setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, dl, + setValue(&I, DAG.getNode(ISD::FP32_TO_FP16, sdl, MVT::i16, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::convert_from_fp16: - setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, dl, + setValue(&I, DAG.getNode(ISD::FP16_TO_FP32, sdl, MVT::f32, getValue(I.getArgOperand(0)))); return 0; case Intrinsic::pcmarker: { SDValue Tmp = getValue(I.getArgOperand(0)); - DAG.setRoot(DAG.getNode(ISD::PCMARKER, dl, MVT::Other, getRoot(), Tmp)); + DAG.setRoot(DAG.getNode(ISD::PCMARKER, sdl, MVT::Other, getRoot(), Tmp)); return 0; } case Intrinsic::readcyclecounter: { SDValue Op = getRoot(); - Res = DAG.getNode(ISD::READCYCLECOUNTER, dl, + Res = DAG.getNode(ISD::READCYCLECOUNTER, sdl, DAG.getVTList(MVT::i64, MVT::Other), &Op, 1); setValue(&I, Res); @@ -4948,7 +5079,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { return 0; } case Intrinsic::bswap: - setValue(&I, DAG.getNode(ISD::BSWAP, dl, + setValue(&I, DAG.getNode(ISD::BSWAP, sdl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return 0; @@ -4957,7 +5088,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ConstantInt *CI = cast(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTTZ : ISD::CTTZ_ZERO_UNDEF, - dl, Ty, Arg)); + sdl, Ty, Arg)); return 0; } case Intrinsic::ctlz: { @@ -4965,33 +5096,33 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { ConstantInt *CI = cast(I.getArgOperand(1)); EVT Ty = Arg.getValueType(); setValue(&I, DAG.getNode(CI->isZero() ? ISD::CTLZ : ISD::CTLZ_ZERO_UNDEF, - dl, Ty, Arg)); + sdl, Ty, Arg)); return 0; } case Intrinsic::ctpop: { SDValue Arg = getValue(I.getArgOperand(0)); EVT Ty = Arg.getValueType(); - setValue(&I, DAG.getNode(ISD::CTPOP, dl, Ty, Arg)); + setValue(&I, DAG.getNode(ISD::CTPOP, sdl, Ty, Arg)); return 0; } case Intrinsic::stacksave: { SDValue Op = getRoot(); - Res = DAG.getNode(ISD::STACKSAVE, dl, - DAG.getVTList(TLI.getPointerTy(), MVT::Other), &Op, 1); + Res = DAG.getNode(ISD::STACKSAVE, sdl, + DAG.getVTList(TLI->getPointerTy(), MVT::Other), &Op, 1); setValue(&I, Res); DAG.setRoot(Res.getValue(1)); return 0; } case Intrinsic::stackrestore: { Res = getValue(I.getArgOperand(0)); - DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, dl, MVT::Other, getRoot(), Res)); + DAG.setRoot(DAG.getNode(ISD::STACKRESTORE, sdl, MVT::Other, getRoot(), Res)); return 0; } case Intrinsic::stackprotector: { // Emit code into the DAG to store the stack guard onto the stack. MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - EVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = TLI->getPointerTy(); SDValue Src = getValue(I.getArgOperand(0)); // The guard's value. AllocaInst *Slot = cast(I.getArgOperand(1)); @@ -5002,7 +5133,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(getRoot(), dl, Src, FIN, + Res = DAG.getStore(getRoot(), sdl, Src, FIN, MachinePointerInfo::getFixedStack(FI), true, false, 0); setValue(&I, Res); @@ -5046,14 +5177,14 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[4] = DAG.getSrcValue(I.getArgOperand(0)); Ops[5] = DAG.getSrcValue(F); - Res = DAG.getNode(ISD::INIT_TRAMPOLINE, dl, MVT::Other, Ops, 6); + Res = DAG.getNode(ISD::INIT_TRAMPOLINE, sdl, MVT::Other, Ops, 6); DAG.setRoot(Res); return 0; } case Intrinsic::adjust_trampoline: { - setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, dl, - TLI.getPointerTy(), + setValue(&I, DAG.getNode(ISD::ADJUST_TRAMPOLINE, sdl, + TLI->getPointerTy(), getValue(I.getArgOperand(0)))); return 0; } @@ -5070,7 +5201,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::gcwrite: llvm_unreachable("GC failed to lower gcread/gcwrite intrinsics!"); case Intrinsic::flt_rounds: - setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, dl, MVT::i32)); + setValue(&I, DAG.getNode(ISD::FLT_ROUNDS_, sdl, MVT::i32)); return 0; case Intrinsic::expect: { @@ -5083,9 +5214,9 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::trap: { StringRef TrapFuncName = TM.Options.getTrapFunctionName(); if (TrapFuncName.empty()) { - ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? + ISD::NodeType Op = (Intrinsic == Intrinsic::trap) ? ISD::TRAP : ISD::DEBUGTRAP; - DAG.setRoot(DAG.getNode(Op, dl,MVT::Other, getRoot())); + DAG.setRoot(DAG.getNode(Op, sdl,MVT::Other, getRoot())); return 0; } TargetLowering::ArgListTy Args; @@ -5094,9 +5225,10 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { false, false, false, false, 0, CallingConv::C, /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, - DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), - Args, DAG, dl); - std::pair Result = TLI.LowerCallTo(CLI); + DAG.getExternalSymbol(TrapFuncName.data(), + TLI->getPointerTy()), + Args, DAG, sdl); + std::pair Result = TLI->LowerCallTo(CLI); DAG.setRoot(Result.second); return 0; } @@ -5121,7 +5253,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2)); + setValue(&I, DAG.getNode(Op, sdl, VTs, Op1, Op2)); return 0; } case Intrinsic::prefetch: { @@ -5132,7 +5264,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Ops[2] = getValue(I.getArgOperand(1)); Ops[3] = getValue(I.getArgOperand(2)); Ops[4] = getValue(I.getArgOperand(3)); - DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, dl, + DAG.setRoot(DAG.getMemIntrinsicNode(ISD::PREFETCH, sdl, DAG.getVTList(MVT::Other), &Ops[0], 5, EVT::getIntegerVT(*Context, 8), @@ -5153,8 +5285,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SmallVector Allocas; GetUnderlyingObjects(I.getArgOperand(1), Allocas, TD); - for (SmallVector::iterator Object = Allocas.begin(), - E = Allocas.end(); Object != E; ++Object) { + for (SmallVectorImpl::iterator Object = Allocas.begin(), + E = Allocas.end(); Object != E; ++Object) { AllocaInst *LifetimeObject = dyn_cast_or_null(*Object); // Could not find an Alloca. @@ -5165,24 +5297,45 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Ops[2]; Ops[0] = getRoot(); - Ops[1] = DAG.getFrameIndex(FI, TLI.getPointerTy(), true); + Ops[1] = DAG.getFrameIndex(FI, TLI->getPointerTy(), true); unsigned Opcode = (IsStart ? ISD::LIFETIME_START : ISD::LIFETIME_END); - Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); + Res = DAG.getNode(Opcode, sdl, MVT::Other, Ops, 2); DAG.setRoot(Res); } return 0; } case Intrinsic::invariant_start: // Discard region information. - setValue(&I, DAG.getUNDEF(TLI.getPointerTy())); + setValue(&I, DAG.getUNDEF(TLI->getPointerTy())); return 0; case Intrinsic::invariant_end: // Discard region information. return 0; + case Intrinsic::stackprotectorcheck: { + // Do not actually emit anything for this basic block. Instead we initialize + // the stack protector descriptor and export the guard variable so we can + // access it in FinishBasicBlock. + const BasicBlock *BB = I.getParent(); + SPDescriptor.initialize(BB, FuncInfo.MBBMap[BB], I); + ExportFromCurrentBlock(SPDescriptor.getGuard()); + + // Flush our exports since we are going to process a terminator. + (void)getControlRoot(); + return 0; + } case Intrinsic::donothing: // ignore return 0; + case Intrinsic::experimental_stackmap: { + visitStackmap(I); + return 0; + } + case Intrinsic::experimental_patchpoint_void: + case Intrinsic::experimental_patchpoint_i64: { + visitPatchpoint(I); + return 0; + } } } @@ -5201,26 +5354,27 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check whether the function can return without sret-demotion. SmallVector Outs; - GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI); + const TargetLowering *TLI = TM.getTargetLowering(); + GetReturnInfo(RetTy, CS.getAttributes(), Outs, *TLI); - bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), - DAG.getMachineFunction(), - FTy->isVarArg(), Outs, - FTy->getContext()); + bool CanLowerReturn = TLI->CanLowerReturn(CS.getCallingConv(), + DAG.getMachineFunction(), + FTy->isVarArg(), Outs, + FTy->getContext()); SDValue DemoteStackSlot; int DemoteStackIdx = -100; if (!CanLowerReturn) { - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize( + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize( FTy->getReturnType()); - unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment( + unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment( FTy->getReturnType()); MachineFunction &MF = DAG.getMachineFunction(); DemoteStackIdx = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType()); - DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI.getPointerTy()); + DemoteStackSlot = DAG.getFrameIndex(DemoteStackIdx, TLI->getPointerTy()); Entry.Node = DemoteStackSlot; Entry.Ty = StackSlotPtrType; Entry.isSExt = false; @@ -5246,15 +5400,8 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SDValue ArgNode = getValue(V); Entry.Node = ArgNode; Entry.Ty = V->getType(); - unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); - Entry.isReturned = CS.paramHasAttr(attrInd, Attribute::Returned); - Entry.Alignment = CS.getParamAlignment(attrInd); + // Skip the first return-type Attribute to get to params. + Entry.setAttributes(&CS, i - CS.arg_begin() + 1); Args.push_back(Entry); } @@ -5277,18 +5424,18 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Both PendingLoads and PendingExports must be flushed here; // this call might not return. (void)getRoot(); - DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getControlRoot(), BeginLabel)); + DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getControlRoot(), BeginLabel)); } // Check if target-independent constraints permit a tail call here. - // Target-dependent constraints are checked within TLI.LowerCallTo. - if (isTailCall && !isInTailCallPosition(CS, TLI)) + // Target-dependent constraints are checked within TLI->LowerCallTo. + if (isTailCall && !isInTailCallPosition(CS, *TLI)) isTailCall = false; TargetLowering:: CallLoweringInfo CLI(getRoot(), RetTy, FTy, isTailCall, Callee, Args, DAG, - getCurDebugLoc(), CS); - std::pair Result = TLI.LowerCallTo(CLI); + getCurSDLoc(), CS); + std::pair Result = TLI->LowerCallTo(CLI); assert((isTailCall || Result.second.getNode()) && "Non-null chain expected with non-tail call!"); assert((Result.second.getNode() || !Result.first.getNode()) && @@ -5301,59 +5448,57 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, SmallVector PVTs; Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType()); - ComputeValueVTs(TLI, PtrRetTy, PVTs); + ComputeValueVTs(*TLI, PtrRetTy, PVTs); assert(PVTs.size() == 1 && "Pointers should fit in one register"); EVT PtrVT = PVTs[0]; SmallVector RetTys; SmallVector Offsets; RetTy = FTy->getReturnType(); - ComputeValueVTs(TLI, RetTy, RetTys, &Offsets); + ComputeValueVTs(*TLI, RetTy, RetTys, &Offsets); unsigned NumValues = RetTys.size(); SmallVector Values(NumValues); SmallVector Chains(NumValues); for (unsigned i = 0; i < NumValues; ++i) { - SDValue Add = DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, + SDValue Add = DAG.getNode(ISD::ADD, getCurSDLoc(), PtrVT, DemoteStackSlot, DAG.getConstant(Offsets[i], PtrVT)); - SDValue L = DAG.getLoad(RetTys[i], getCurDebugLoc(), Result.second, Add, + SDValue L = DAG.getLoad(RetTys[i], getCurSDLoc(), Result.second, Add, MachinePointerInfo::getFixedStack(DemoteStackIdx, Offsets[i]), false, false, false, 1); Values[i] = L; Chains[i] = L.getValue(1); } - SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), + SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &Chains[0], NumValues); PendingLoads.push_back(Chain); setValue(CS.getInstruction(), - DAG.getNode(ISD::MERGE_VALUES, getCurDebugLoc(), + DAG.getNode(ISD::MERGE_VALUES, getCurSDLoc(), DAG.getVTList(&RetTys[0], RetTys.size()), &Values[0], Values.size())); } - // Assign order to nodes here. If the call does not produce a result, it won't - // be mapped to a SDNode and visit() will not assign it an order number. if (!Result.second.getNode()) { - // As a special case, a null chain means that a tail call has been emitted and - // the DAG root is already updated. + // As a special case, a null chain means that a tail call has been emitted + // and the DAG root is already updated. HasTailCall = true; - ++SDNodeOrder; - AssignOrderingToNode(DAG.getRoot().getNode()); + + // Since there's no actual continuation from this block, nothing can be + // relying on us setting vregs for them. + PendingExports.clear(); } else { DAG.setRoot(Result.second); - ++SDNodeOrder; - AssignOrderingToNode(Result.second.getNode()); } if (LandingPad) { // Insert a label at the end of the invoke call to mark the try range. This // can be used to detect deletion of the invoke via the MachineModuleInfo. MCSymbol *EndLabel = MMI.getContext().CreateTempSymbol(); - DAG.setRoot(DAG.getEHLabel(getCurDebugLoc(), getRoot(), EndLabel)); + DAG.setRoot(DAG.getEHLabel(getCurSDLoc(), getRoot(), EndLabel)); // Inform MachineModuleInfo of range. MMI.addInvoke(LandingPad, BeginLabel, EndLabel); @@ -5408,10 +5553,10 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, } SDValue Ptr = Builder.getValue(PtrVal); - SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurDebugLoc(), Root, + SDValue LoadVal = Builder.DAG.getLoad(LoadVT, Builder.getCurSDLoc(), Root, Ptr, MachinePointerInfo(PtrVal), false /*volatile*/, - false /*nontemporal*/, + false /*nontemporal*/, false /*isinvariant*/, 1 /* align=1 */); if (!ConstantMemory) @@ -5419,6 +5564,18 @@ static SDValue getMemCmpLoad(const Value *PtrVal, MVT LoadVT, return LoadVal; } +/// processIntegerCallValue - Record the value for an instruction that +/// produces an integer result, converting the type where necessary. +void SelectionDAGBuilder::processIntegerCallValue(const Instruction &I, + SDValue Value, + bool IsSigned) { + EVT VT = TM.getTargetLowering()->getValueType(I.getType(), true); + if (IsSigned) + Value = DAG.getSExtOrTrunc(Value, getCurSDLoc(), VT); + else + Value = DAG.getZExtOrTrunc(Value, getCurSDLoc(), VT); + setValue(&I, Value); +} /// visitMemCmpCall - See if we can lower a call to memcmp in an optimized form. /// If so, return true and lower it, otherwise return false and it will be @@ -5434,15 +5591,33 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { !I.getType()->isIntegerTy()) return false; - const ConstantInt *Size = dyn_cast(I.getArgOperand(2)); + const Value *Size = I.getArgOperand(2); + const ConstantInt *CSize = dyn_cast(Size); + if (CSize && CSize->getZExtValue() == 0) { + EVT CallVT = TM.getTargetLowering()->getValueType(I.getType(), true); + setValue(&I, DAG.getConstant(0, CallVT)); + return true; + } + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair Res = + TSI.EmitTargetCodeForMemcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(LHS), getValue(RHS), getValue(Size), + MachinePointerInfo(LHS), + MachinePointerInfo(RHS)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } // memcmp(S1,S2,2) != 0 -> (*(short*)LHS != *(short*)RHS) != 0 // memcmp(S1,S2,4) != 0 -> (*(int*)LHS != *(int*)RHS) != 0 - if (Size && IsOnlyUsedInZeroEqualityComparison(&I)) { + if (CSize && IsOnlyUsedInZeroEqualityComparison(&I)) { bool ActuallyDoIt = true; MVT LoadVT; Type *LoadTy; - switch (Size->getZExtValue()) { + switch (CSize->getZExtValue()) { default: LoadVT = MVT::Other; LoadTy = 0; @@ -5450,20 +5625,20 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { break; case 2: LoadVT = MVT::i16; - LoadTy = Type::getInt16Ty(Size->getContext()); + LoadTy = Type::getInt16Ty(CSize->getContext()); break; case 4: LoadVT = MVT::i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); break; case 8: LoadVT = MVT::i64; - LoadTy = Type::getInt64Ty(Size->getContext()); + LoadTy = Type::getInt64Ty(CSize->getContext()); break; /* case 16: LoadVT = MVT::v4i32; - LoadTy = Type::getInt32Ty(Size->getContext()); + LoadTy = Type::getInt32Ty(CSize->getContext()); LoadTy = VectorType::get(LoadTy, 4); break; */ @@ -5476,10 +5651,11 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { // Require that we can find a legal MVT, and only do this if the target // supports unaligned loads of that type. Expanding into byte loads would // bloat the code. - if (ActuallyDoIt && Size->getZExtValue() > 4) { + const TargetLowering *TLI = TM.getTargetLowering(); + if (ActuallyDoIt && CSize->getZExtValue() > 4) { // TODO: Handle 5 byte compare as 4-byte + 1 byte. // TODO: Handle 8 byte compare on x86-32 as two 32-bit loads. - if (!TLI.isTypeLegal(LoadVT) ||!TLI.allowsUnalignedMemoryAccesses(LoadVT)) + if (!TLI->isTypeLegal(LoadVT) ||!TLI->allowsUnalignedMemoryAccesses(LoadVT)) ActuallyDoIt = false; } @@ -5487,10 +5663,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { SDValue LHSVal = getMemCmpLoad(LHS, LoadVT, LoadTy, *this); SDValue RHSVal = getMemCmpLoad(RHS, LoadVT, LoadTy, *this); - SDValue Res = DAG.getSetCC(getCurDebugLoc(), MVT::i1, LHSVal, RHSVal, + SDValue Res = DAG.getSetCC(getCurSDLoc(), MVT::i1, LHSVal, RHSVal, ISD::SETNE); - EVT CallVT = TLI.getValueType(I.getType(), true); - setValue(&I, DAG.getZExtOrTrunc(Res, getCurDebugLoc(), CallVT)); + processIntegerCallValue(I, Res, false); return true; } } @@ -5499,6 +5674,148 @@ bool SelectionDAGBuilder::visitMemCmpCall(const CallInst &I) { return false; } +/// visitMemChrCall -- See if we can lower a memchr call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitMemChrCall(const CallInst &I) { + // Verify that the prototype makes sense. void *memchr(void *, int, size_t) + if (I.getNumArgOperands() != 3) + return false; + + const Value *Src = I.getArgOperand(0); + const Value *Char = I.getArgOperand(1); + const Value *Length = I.getArgOperand(2); + if (!Src->getType()->isPointerTy() || + !Char->getType()->isIntegerTy() || + !Length->getType()->isIntegerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair Res = + TSI.EmitTargetCodeForMemchr(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Src), getValue(Char), getValue(Length), + MachinePointerInfo(Src)); + if (Res.first.getNode()) { + setValue(&I, Res.first); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrCpyCall -- See if we can lower a strcpy or stpcpy call into an +/// optimized form. If so, return true and lower it, otherwise return false +/// and it will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrCpyCall(const CallInst &I, bool isStpcpy) { + // Verify that the prototype makes sense. char *strcpy(char *, char *) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isPointerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair Res = + TSI.EmitTargetCodeForStrcpy(DAG, getCurSDLoc(), getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1), isStpcpy); + if (Res.first.getNode()) { + setValue(&I, Res.first); + DAG.setRoot(Res.second); + return true; + } + + return false; +} + +/// visitStrCmpCall - See if we can lower a call to strcmp in an optimized form. +/// If so, return true and lower it, otherwise return false and it will be +/// lowered like a normal call. +bool SelectionDAGBuilder::visitStrCmpCall(const CallInst &I) { + // Verify that the prototype makes sense. int strcmp(void*,void*) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isPointerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair Res = + TSI.EmitTargetCodeForStrcmp(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0), + MachinePointerInfo(Arg1)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, true); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrLenCall -- See if we can lower a strlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strlen(char *) + if (I.getNumArgOperands() != 1) + return false; + + const Value *Arg0 = I.getArgOperand(0); + if (!Arg0->getType()->isPointerTy() || !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair Res = + TSI.EmitTargetCodeForStrlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + +/// visitStrNLenCall -- See if we can lower a strnlen call into an optimized +/// form. If so, return true and lower it, otherwise return false and it +/// will be lowered like a normal call. +bool SelectionDAGBuilder::visitStrNLenCall(const CallInst &I) { + // Verify that the prototype makes sense. size_t strnlen(char *, size_t) + if (I.getNumArgOperands() != 2) + return false; + + const Value *Arg0 = I.getArgOperand(0), *Arg1 = I.getArgOperand(1); + if (!Arg0->getType()->isPointerTy() || + !Arg1->getType()->isIntegerTy() || + !I.getType()->isIntegerTy()) + return false; + + const TargetSelectionDAGInfo &TSI = DAG.getSelectionDAGInfo(); + std::pair Res = + TSI.EmitTargetCodeForStrnlen(DAG, getCurSDLoc(), DAG.getRoot(), + getValue(Arg0), getValue(Arg1), + MachinePointerInfo(Arg0)); + if (Res.first.getNode()) { + processIntegerCallValue(I, Res.first, false); + PendingLoads.push_back(Res.second); + return true; + } + + return false; +} + /// visitUnaryFloatCall - If a call instruction is a unary floating-point /// operation (as expected), translate it to an SDNode with the specified opcode /// and return true. @@ -5512,7 +5829,7 @@ bool SelectionDAGBuilder::visitUnaryFloatCall(const CallInst &I, return false; SDValue Tmp = getValue(I.getArgOperand(0)); - setValue(&I, DAG.getNode(Opcode, getCurDebugLoc(), Tmp.getValueType(), Tmp)); + setValue(&I, DAG.getNode(Opcode, getCurSDLoc(), Tmp.getValueType(), Tmp)); return true; } @@ -5561,7 +5878,7 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { I.onlyReadsMemory()) { SDValue LHS = getValue(I.getArgOperand(0)); SDValue RHS = getValue(I.getArgOperand(1)); - setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurDebugLoc(), + setValue(&I, DAG.getNode(ISD::FCOPYSIGN, getCurSDLoc(), LHS.getValueType(), LHS, RHS)); return; } @@ -5587,6 +5904,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { case LibFunc::sqrt: case LibFunc::sqrtf: case LibFunc::sqrtl: + case LibFunc::sqrt_finite: + case LibFunc::sqrtf_finite: + case LibFunc::sqrtl_finite: if (visitUnaryFloatCall(I, ISD::FSQRT)) return; break; @@ -5614,6 +5934,12 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitUnaryFloatCall(I, ISD::FRINT)) return; break; + case LibFunc::round: + case LibFunc::roundf: + case LibFunc::roundl: + if (visitUnaryFloatCall(I, ISD::FROUND)) + return; + break; case LibFunc::trunc: case LibFunc::truncf: case LibFunc::truncl: @@ -5636,6 +5962,30 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (visitMemCmpCall(I)) return; break; + case LibFunc::memchr: + if (visitMemChrCall(I)) + return; + break; + case LibFunc::strcpy: + if (visitStrCpyCall(I, false)) + return; + break; + case LibFunc::stpcpy: + if (visitStrCpyCall(I, true)) + return; + break; + case LibFunc::strcmp: + if (visitStrCmpCall(I)) + return; + break; + case LibFunc::strlen: + if (visitStrLenCall(I)) + return; + break; + case LibFunc::strnlen: + if (visitStrNLenCall(I)) + return; + break; } } } @@ -5644,7 +5994,8 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) { if (!RenameFn) Callee = getValue(I.getCalledValue()); else - Callee = DAG.getExternalSymbol(RenameFn, TLI.getPointerTy()); + Callee = DAG.getExternalSymbol(RenameFn, + TM.getTargetLowering()->getPointerTy()); // Check if we can potentially perform a tail call. More detailed checking is // be done within LowerCallTo, after more information about the call is known. @@ -5733,7 +6084,7 @@ typedef SmallVector SDISelAsmOperandInfoVector; /// static void GetRegistersForValue(SelectionDAG &DAG, const TargetLowering &TLI, - DebugLoc DL, + SDLoc DL, SDISelAsmOperandInfo &OpInfo) { LLVMContext &Context = *DAG.getContext(); @@ -5839,8 +6190,9 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { /// ConstraintOperands - Information about all of the constraints. SDISelAsmOperandInfoVector ConstraintOperands; + const TargetLowering *TLI = TM.getTargetLowering(); TargetLowering::AsmOperandInfoVector - TargetConstraints = TLI.ParseConstraints(CS); + TargetConstraints = TLI->ParseConstraints(CS); bool hasMemory = false; @@ -5865,10 +6217,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast(CS.getType())) { - OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); + OpVT = TLI->getSimpleValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI.getSimpleValueType(CS.getType()); + OpVT = TLI->getSimpleValueType(CS.getType()); } ++ResNo; break; @@ -5889,7 +6241,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD). + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), *TLI, TD). getSimpleVT(); } @@ -5901,7 +6253,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { else { for (unsigned j = 0, ee = OpInfo.Codes.size(); j != ee; ++j) { TargetLowering::ConstraintType - CType = TLI.getConstraintType(OpInfo.Codes[j]); + CType = TLI->getConstraintType(OpInfo.Codes[j]); if (CType == TargetLowering::C_Memory) { hasMemory = true; break; @@ -5933,11 +6285,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintVT != Input.ConstraintVT) { std::pair MatchRC = - TLI.getRegForInlineAsmConstraint(OpInfo.ConstraintCode, - OpInfo.ConstraintVT); + TLI->getRegForInlineAsmConstraint(OpInfo.ConstraintCode, + OpInfo.ConstraintVT); std::pair InputRC = - TLI.getRegForInlineAsmConstraint(Input.ConstraintCode, - Input.ConstraintVT); + TLI->getRegForInlineAsmConstraint(Input.ConstraintCode, + Input.ConstraintVT); if ((OpInfo.ConstraintVT.isInteger() != Input.ConstraintVT.isInteger()) || (MatchRC.second != InputRC.second)) { @@ -5950,7 +6302,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + TLI->ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); if (OpInfo.ConstraintType == TargetLowering::C_Memory && OpInfo.Type == InlineAsm::isClobber) @@ -5978,17 +6330,17 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (isa(OpVal) || isa(OpVal) || isa(OpVal) || isa(OpVal)) { OpInfo.CallOperand = DAG.getConstantPool(cast(OpVal), - TLI.getPointerTy()); + TLI->getPointerTy()); } else { // Otherwise, create a stack slot and emit a store to it before the // asm. Type *Ty = OpVal->getType(); - uint64_t TySize = TLI.getDataLayout()->getTypeAllocSize(Ty); - unsigned Align = TLI.getDataLayout()->getPrefTypeAlignment(Ty); + uint64_t TySize = TLI->getDataLayout()->getTypeAllocSize(Ty); + unsigned Align = TLI->getDataLayout()->getPrefTypeAlignment(Ty); MachineFunction &MF = DAG.getMachineFunction(); int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false); - SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy()); - Chain = DAG.getStore(Chain, getCurDebugLoc(), + SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI->getPointerTy()); + Chain = DAG.getStore(Chain, getCurSDLoc(), OpInfo.CallOperand, StackSlot, MachinePointerInfo::getFixedStack(SSFI), false, false, 0); @@ -6005,7 +6357,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this constraint is for a specific register, allocate it before // anything else. if (OpInfo.ConstraintType == TargetLowering::C_Register) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); + GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); } // Second pass - Loop over all of the operands, assigning virtual or physregs @@ -6016,7 +6368,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // C_Register operands have already been allocated, Other/Memory don't need // to be. if (OpInfo.ConstraintType == TargetLowering::C_RegisterClass) - GetRegistersForValue(DAG, TLI, getCurDebugLoc(), OpInfo); + GetRegistersForValue(DAG, *TLI, getCurSDLoc(), OpInfo); } // AsmNodeOperands - The operands for the ISD::INLINEASM node. @@ -6024,7 +6376,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands.push_back(SDValue()); // reserve space for input chain AsmNodeOperands.push_back( DAG.getTargetExternalSymbol(IA->getAsmString().c_str(), - TLI.getPointerTy())); + TLI->getPointerTy())); // If we have a !srcloc metadata node associated with it, we want to attach // this to the ultimately generated inline asm machineinstr. To do this, we @@ -6047,7 +6399,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { TargetLowering::AsmOperandInfo &OpInfo = TargetConstraints[i]; // Compute the constraint code and ConstraintType to use. - TLI.ComputeConstraintToUse(OpInfo, SDValue()); + TLI->ComputeConstraintToUse(OpInfo, SDValue()); // Ideally, we would only check against memory constraints. However, the // meaning of an other constraint can be target-specific and we can't easily @@ -6065,7 +6417,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } AsmNodeOperands.push_back(DAG.getTargetConstant(ExtraInfo, - TLI.getPointerTy())); + TLI->getPointerTy())); // Loop over all of the inputs, copying the operand values into the // appropriate registers and processing the output regs. @@ -6087,7 +6439,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know about this output. unsigned OpFlags = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlags, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(OpInfo.CallOperand); break; } @@ -6098,10 +6450,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // we can use. if (OpInfo.AssignedRegs.Regs.empty()) { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), + Ctx.emitError(CS.getInstruction(), "couldn't allocate output register for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } // If this is an indirect operand, store through the pointer after the @@ -6118,13 +6470,11 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Add information to the INLINEASM node to know that this register is // set. - OpInfo.AssignedRegs.AddInlineAsmOperands(OpInfo.isEarlyClobber ? - InlineAsm::Kind_RegDefEarlyClobber : - InlineAsm::Kind_RegDef, - false, - 0, - DAG, - AsmNodeOperands); + OpInfo.AssignedRegs + .AddInlineAsmOperands(OpInfo.isEarlyClobber + ? InlineAsm::Kind_RegDefEarlyClobber + : InlineAsm::Kind_RegDef, + false, 0, DAG, AsmNodeOperands); break; } case InlineAsm::isInput: { @@ -6156,10 +6506,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.isIndirect) { // This happens on gcc/testsuite/gcc.dg/pr8788-1.c LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" - " don't know how to handle tied " - "indirect register inputs"); - report_fatal_error("Cannot handle indirect register inputs!"); + Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" + " don't know how to handle tied " + "indirect register inputs"); + return; } RegsForValue MatchedRegs; @@ -6169,18 +6519,18 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); i != e; ++i) { - if (const TargetRegisterClass *RC = TLI.getRegClassFor(RegVT)) + if (const TargetRegisterClass *RC = TLI->getRegClassFor(RegVT)) MatchedRegs.Regs.push_back(RegInfo.createVirtualRegister(RC)); else { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), "inline asm error: This value" + Ctx.emitError(CS.getInstruction(), + "inline asm error: This value" " type register class is not natively supported!"); - report_fatal_error("inline asm error: This value type register " - "class is not natively supported!"); + return; } } // Use the produced MatchedRegs object to - MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); MatchedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, true, OpInfo.getMatchedOperand(), @@ -6196,7 +6546,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpFlag = InlineAsm::getFlagWordForMatchingOp(OpFlag, OpInfo.getMatchedOperand()); AsmNodeOperands.push_back(DAG.getTargetConstant(OpFlag, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(AsmNodeOperands[CurOp+1]); break; } @@ -6208,34 +6558,34 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { if (OpInfo.ConstraintType == TargetLowering::C_Other) { std::vector Ops; - TLI.LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, - Ops, DAG); + TLI->LowerAsmOperandForConstraint(InOperandVal, OpInfo.ConstraintCode, + Ops, DAG); if (Ops.empty()) { LLVMContext &Ctx = *DAG.getContext(); Ctx.emitError(CS.getInstruction(), "invalid operand for inline asm constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Imm, Ops.size()); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.insert(AsmNodeOperands.end(), Ops.begin(), Ops.end()); break; } if (OpInfo.ConstraintType == TargetLowering::C_Memory) { assert(OpInfo.isIndirect && "Operand must be indirect to be a mem!"); - assert(InOperandVal.getValueType() == TLI.getPointerTy() && + assert(InOperandVal.getValueType() == TLI->getPointerTy() && "Memory operands expect pointer values"); // Add information to the INLINEASM node to know about this input. unsigned ResOpType = InlineAsm::getFlagWord(InlineAsm::Kind_Mem, 1); AsmNodeOperands.push_back(DAG.getTargetConstant(ResOpType, - TLI.getPointerTy())); + TLI->getPointerTy())); AsmNodeOperands.push_back(InOperandVal); break; } @@ -6249,20 +6599,21 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { LLVMContext &Ctx = *DAG.getContext(); Ctx.emitError(CS.getInstruction(), "Don't know how to handle indirect register inputs yet " - "for constraint '" + Twine(OpInfo.ConstraintCode) + "'"); - break; + "for constraint '" + + Twine(OpInfo.ConstraintCode) + "'"); + return; } // Copy the input into the appropriate registers. if (OpInfo.AssignedRegs.Regs.empty()) { LLVMContext &Ctx = *DAG.getContext(); - Ctx.emitError(CS.getInstruction(), + Ctx.emitError(CS.getInstruction(), "couldn't allocate input reg for constraint '" + - Twine(OpInfo.ConstraintCode) + "'"); - break; + Twine(OpInfo.ConstraintCode) + "'"); + return; } - OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(), + OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); OpInfo.AssignedRegs.AddInlineAsmOperands(InlineAsm::Kind_RegUse, false, 0, @@ -6285,7 +6636,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { AsmNodeOperands[InlineAsm::Op_InputChain] = Chain; if (Flag.getNode()) AsmNodeOperands.push_back(Flag); - Chain = DAG.getNode(ISD::INLINEASM, getCurDebugLoc(), + Chain = DAG.getNode(ISD::INLINEASM, getCurSDLoc(), DAG.getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], AsmNodeOperands.size()); Flag = Chain.getValue(1); @@ -6293,12 +6644,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If this asm returns a register value, copy the result from that register // and set it as the value of the call. if (!RetValRegs.Regs.empty()) { - SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), + SDValue Val = RetValRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, CS.getInstruction()); // FIXME: Why don't we do this for inline asms with MRVs? if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) { - EVT ResultType = TLI.getValueType(CS.getType()); + EVT ResultType = TLI->getValueType(CS.getType()); // If any of the results of the inline asm is a vector, it may have the // wrong width/num elts. This can happen for register classes that can @@ -6306,7 +6657,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // not have the same VT as was expected. Convert it to the right type // with bit_convert. if (ResultType != Val.getValueType() && Val.getValueType().isVector()) { - Val = DAG.getNode(ISD::BITCAST, getCurDebugLoc(), + Val = DAG.getNode(ISD::BITCAST, getCurSDLoc(), ResultType, Val); } else if (ResultType != Val.getValueType() && @@ -6314,7 +6665,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // If a result value was tied to an input value, the computed result may // have a wider width than the expected result. Extract the relevant // portion. - Val = DAG.getNode(ISD::TRUNCATE, getCurDebugLoc(), ResultType, Val); + Val = DAG.getNode(ISD::TRUNCATE, getCurSDLoc(), ResultType, Val); } assert(ResultType == Val.getValueType() && "Asm result value mismatch!"); @@ -6333,7 +6684,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { for (unsigned i = 0, e = IndirectStoresToEmit.size(); i != e; ++i) { RegsForValue &OutRegs = IndirectStoresToEmit[i].first; const Value *Ptr = IndirectStoresToEmit[i].second; - SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurDebugLoc(), + SDValue OutVal = OutRegs.getCopyFromRegs(DAG, FuncInfo, getCurSDLoc(), Chain, &Flag, IA); StoresToEmit.push_back(std::make_pair(OutVal, Ptr)); } @@ -6341,7 +6692,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Emit the non-flagged stores from the physregs. SmallVector OutChains; for (unsigned i = 0, e = StoresToEmit.size(); i != e; ++i) { - SDValue Val = DAG.getStore(Chain, getCurDebugLoc(), + SDValue Val = DAG.getStore(Chain, getCurSDLoc(), StoresToEmit[i].first, getValue(StoresToEmit[i].second), MachinePointerInfo(StoresToEmit[i].second), @@ -6350,22 +6701,23 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { } if (!OutChains.empty()) - Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(), MVT::Other, + Chain = DAG.getNode(ISD::TokenFactor, getCurSDLoc(), MVT::Other, &OutChains[0], OutChains.size()); DAG.setRoot(Chain); } void SelectionDAGBuilder::visitVAStart(const CallInst &I) { - DAG.setRoot(DAG.getNode(ISD::VASTART, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::VASTART, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { - const DataLayout &TD = *TLI.getDataLayout(); - SDValue V = DAG.getVAArg(TLI.getValueType(I.getType()), getCurDebugLoc(), + const TargetLowering *TLI = TM.getTargetLowering(); + const DataLayout &TD = *TLI->getDataLayout(); + SDValue V = DAG.getVAArg(TLI->getValueType(I.getType()), getCurSDLoc(), getRoot(), getValue(I.getOperand(0)), DAG.getSrcValue(I.getOperand(0)), TD.getABITypeAlignment(I.getType())); @@ -6374,14 +6726,14 @@ void SelectionDAGBuilder::visitVAArg(const VAArgInst &I) { } void SelectionDAGBuilder::visitVAEnd(const CallInst &I) { - DAG.setRoot(DAG.getNode(ISD::VAEND, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::VAEND, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), DAG.getSrcValue(I.getArgOperand(0)))); } void SelectionDAGBuilder::visitVACopy(const CallInst &I) { - DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurDebugLoc(), + DAG.setRoot(DAG.getNode(ISD::VACOPY, getCurSDLoc(), MVT::Other, getRoot(), getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), @@ -6389,6 +6741,248 @@ void SelectionDAGBuilder::visitVACopy(const CallInst &I) { DAG.getSrcValue(I.getArgOperand(1)))); } +/// \brief Lower an argument list according to the target calling convention. +/// +/// \return A tuple of +/// +/// This is a helper for lowering intrinsics that follow a target calling +/// convention or require stack pointer adjustment. Only a subset of the +/// intrinsic's operands need to participate in the calling convention. +std::pair +SelectionDAGBuilder::LowerCallOperands(const CallInst &CI, unsigned ArgIdx, + unsigned NumArgs, SDValue Callee, + bool useVoidTy) { + TargetLowering::ArgListTy Args; + Args.reserve(NumArgs); + + // Populate the argument list. + // Attributes for args start at offset 1, after the return attribute. + ImmutableCallSite CS(&CI); + for (unsigned ArgI = ArgIdx, ArgE = ArgIdx + NumArgs, AttrI = ArgIdx + 1; + ArgI != ArgE; ++ArgI) { + const Value *V = CI.getOperand(ArgI); + + assert(!V->getType()->isEmptyTy() && "Empty type passed to intrinsic."); + + TargetLowering::ArgListEntry Entry; + Entry.Node = getValue(V); + Entry.Ty = V->getType(); + Entry.setAttributes(&CS, AttrI); + Args.push_back(Entry); + } + + Type *retTy = useVoidTy ? Type::getVoidTy(*DAG.getContext()) : CI.getType(); + TargetLowering::CallLoweringInfo CLI(getRoot(), retTy, /*retSExt*/ false, + /*retZExt*/ false, /*isVarArg*/ false, /*isInReg*/ false, NumArgs, + CI.getCallingConv(), /*isTailCall*/ false, /*doesNotReturn*/ false, + /*isReturnValueUsed*/ CI.use_empty(), Callee, Args, DAG, getCurSDLoc()); + + const TargetLowering *TLI = TM.getTargetLowering(); + return TLI->LowerCallTo(CLI); +} + +/// \brief Lower llvm.experimental.stackmap directly to its target opcode. +void SelectionDAGBuilder::visitStackmap(const CallInst &CI) { + // void @llvm.experimental.stackmap(i32 , i32 , + // [live variables...]) + + assert(CI.getType()->isVoidTy() && "Stackmap cannot return a value."); + + SDValue Callee = getValue(CI.getCalledValue()); + + // Lower into a call sequence with no args and no return value. + std::pair Result = LowerCallOperands(CI, 0, 0, Callee); + // Set the root to the target-lowered call chain. + SDValue Chain = Result.second; + DAG.setRoot(Chain); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + SDNode *CallEnd = Chain.getNode(); + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool hasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the stackmap intrinsic. + SmallVector Ops; + + // Add the and constants. + for (unsigned i = 0; i < 2; ++i) { + SDValue tmp = getValue(CI.getOperand(i)); + Ops.push_back(DAG.getTargetConstant( + cast(tmp)->getZExtValue(), MVT::i32)); + } + // Push live variables for the stack map. + for (unsigned i = 2, e = CI.getNumArgOperands(); i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (hasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a STACKMAP node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::STACKMAP, getCurSDLoc(), + NodeTys, Ops); + + // StackMap generates no value, so nothing goes in the NodeMap. + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. + DAG.ReplaceAllUsesWith(Call, MN); + + DAG.DeleteNode(Call); +} + +/// \brief Lower llvm.experimental.patchpoint directly to its target opcode. +void SelectionDAGBuilder::visitPatchpoint(const CallInst &CI) { + // void|i64 @llvm.experimental.patchpoint.void|i64(i32 , + // i32 , + // i8* , + // i32 , + // [Args...], + // [live variables...]) + + CallingConv::ID CC = CI.getCallingConv(); + bool isAnyRegCC = CC == CallingConv::AnyReg; + bool hasDef = !CI.getType()->isVoidTy(); + SDValue Callee = getValue(CI.getOperand(2)); // + + // Get the real number of arguments participating in the call + unsigned NumArgs = + cast(getValue(CI.getArgOperand(3)))->getZExtValue(); + + // Skip the four meta args: , , , + assert(CI.getNumArgOperands() >= NumArgs + 4 && + "Not enough arguments provided to the patchpoint intrinsic"); + + // For AnyRegCC the arguments are lowered later on manually. + unsigned NumCallArgs = isAnyRegCC ? 0 : NumArgs; + std::pair Result = + LowerCallOperands(CI, 4, NumCallArgs, Callee, isAnyRegCC); + + // Set the root to the target-lowered call chain. + SDValue Chain = Result.second; + DAG.setRoot(Chain); + + SDNode *CallEnd = Chain.getNode(); + if (hasDef && (CallEnd->getOpcode() == ISD::CopyFromReg)) + CallEnd = CallEnd->getOperand(0).getNode(); + + /// Get a call instruction from the call sequence chain. + /// Tail calls are not allowed. + assert(CallEnd->getOpcode() == ISD::CALLSEQ_END && + "Expected a callseq node."); + SDNode *Call = CallEnd->getOperand(0).getNode(); + bool hasGlue = Call->getGluedNode(); + + // Replace the target specific call node with the patchable intrinsic. + SmallVector Ops; + + // Add the and constants. + for (unsigned i = 0; i < 2; ++i) { + SDValue tmp = getValue(CI.getOperand(i)); + Ops.push_back(DAG.getTargetConstant( + cast(tmp)->getZExtValue(), MVT::i32)); + } + // Assume that the Callee is a constant address. + Ops.push_back( + DAG.getIntPtrConstant(cast(Callee)->getZExtValue(), + /*isTarget=*/true)); + + // Adjust to account for any arguments that have been passed on the + // stack instead. + // Call Node: Chain, Target, {Args}, RegMask, [Glue] + unsigned NumCallRegArgs = Call->getNumOperands() - (hasGlue ? 4 : 3); + NumCallRegArgs = isAnyRegCC ? NumArgs : NumCallRegArgs; + Ops.push_back(DAG.getTargetConstant(NumCallRegArgs, MVT::i32)); + + // Add the calling convention + Ops.push_back(DAG.getTargetConstant((unsigned)CC, MVT::i32)); + + // Add the arguments we omitted previously. The register allocator should + // place these in any free register. + if (isAnyRegCC) + for (unsigned i = 4, e = NumArgs + 4; i != e; ++i) + Ops.push_back(getValue(CI.getArgOperand(i))); + + // Push the arguments from the call instruction. + SDNode::op_iterator e = hasGlue ? Call->op_end()-2 : Call->op_end()-1; + for (SDNode::op_iterator i = Call->op_begin()+2; i != e; ++i) + Ops.push_back(*i); + + // Push live variables for the stack map. + for (unsigned i = NumArgs + 4, e = CI.getNumArgOperands(); i != e; ++i) { + SDValue OpVal = getValue(CI.getArgOperand(i)); + if (ConstantSDNode *C = dyn_cast(OpVal)) { + Ops.push_back( + DAG.getTargetConstant(StackMaps::ConstantOp, MVT::i64)); + Ops.push_back( + DAG.getTargetConstant(C->getSExtValue(), MVT::i64)); + } else + Ops.push_back(OpVal); + } + + // Push the register mask info. + if (hasGlue) + Ops.push_back(*(Call->op_end()-2)); + else + Ops.push_back(*(Call->op_end()-1)); + + // Push the chain (this is originally the first operand of the call, but + // becomes now the last or second to last operand). + Ops.push_back(*(Call->op_begin())); + + // Push the glue flag (last operand). + if (hasGlue) + Ops.push_back(*(Call->op_end()-1)); + + SDVTList NodeTys; + if (isAnyRegCC && hasDef) { + // Create the return types based on the intrinsic definition + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + SmallVector ValueVTs; + ComputeValueVTs(TLI, CI.getType(), ValueVTs); + assert(ValueVTs.size() == 1 && "Expected only one return value type."); + + // There is always a chain and a glue type at the end + ValueVTs.push_back(MVT::Other); + ValueVTs.push_back(MVT::Glue); + NodeTys = DAG.getVTList(ValueVTs.data(), ValueVTs.size()); + } else + NodeTys = DAG.getVTList(MVT::Other, MVT::Glue); + + // Replace the target specific call node with a PATCHPOINT node. + MachineSDNode *MN = DAG.getMachineNode(TargetOpcode::PATCHPOINT, + getCurSDLoc(), NodeTys, Ops); + + // Update the NodeMap. + if (hasDef) { + if (isAnyRegCC) + setValue(&CI, SDValue(MN, 0)); + else + setValue(&CI, Result.first); + } + + // Fixup the consumers of the intrinsic. The chain and glue may be used in the + // call sequence. Furthermore the location of the chain and glue can change + // when the AnyReg calling convention is used and the intrinsic returns a + // value. + if (isAnyRegCC && hasDef) { + SDValue From[] = {SDValue(Call, 0), SDValue(Call, 1)}; + SDValue To[] = {SDValue(MN, 1), SDValue(MN, 2)}; + DAG.ReplaceAllUsesOfValuesWith(From, To, 2); + } else + DAG.ReplaceAllUsesWith(Call, MN); + DAG.DeleteNode(Call); +} + /// TargetLowering::LowerCallTo - This is the default LowerCallTo /// implementation, which just calls LowerCall. /// FIXME: When all targets are @@ -6406,6 +7000,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; MyFlags.VT = RegisterVT; + MyFlags.ArgVT = VT; MyFlags.Used = CLI.IsReturnValueUsed; if (CLI.RetSExt) MyFlags.Flags.setSExt(); @@ -6495,7 +7090,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { for (unsigned j = 0; j != NumParts; ++j) { // if it isn't first piece, alignment must be 1 - ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), + ISD::OutputArg MyFlags(Flags, Parts[j].getValueType(), VT, i < CLI.NumFixedArgs, i, j*Parts[j].getValueType().getStoreSize()); if (NumParts > 1 && j == 0) @@ -6588,9 +7183,10 @@ SelectionDAGBuilder::CopyValueToVirtualRegister(const Value *V, unsigned Reg) { "Copy from a reg to the same reg!"); assert(!TargetRegisterInfo::isPhysicalRegister(Reg) && "Is a physreg"); - RegsForValue RFV(V->getContext(), TLI, Reg, V->getType()); + const TargetLowering *TLI = TM.getTargetLowering(); + RegsForValue RFV(V->getContext(), *TLI, Reg, V->getType()); SDValue Chain = DAG.getEntryNode(); - RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0, V); + RFV.getCopyToRegs(Op, DAG, getCurSDLoc(), Chain, 0, V); PendingExports.push_back(Chain); } @@ -6617,21 +7213,23 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; - DebugLoc dl = SDB->getCurDebugLoc(); - const DataLayout *TD = TLI.getDataLayout(); + SDLoc dl = SDB->getCurSDLoc(); + const TargetLowering *TLI = getTargetLowering(); + const DataLayout *TD = TLI->getDataLayout(); SmallVector Ins; if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*getTargetLowering(), + PointerType::getUnqual(F.getReturnType()), ValueVTs); // NOTE: Assuming that a pointer will never break down to more than one VT // or one register. ISD::ArgFlagsTy Flags; Flags.setSRet(); - MVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); - ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0); + MVT RegisterVT = TLI->getRegisterType(*DAG.getContext(), ValueVTs[0]); + ISD::InputArg RetArg(Flags, RegisterVT, ValueVTs[0], true, 0, 0); Ins.push_back(RetArg); } @@ -6640,8 +7238,9 @@ void SelectionDAGISel::LowerArguments(const Function &F) { for (Function::const_arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I, ++Idx) { SmallVector ValueVTs; - ComputeValueVTs(TLI, I->getType(), ValueVTs); + ComputeValueVTs(*TLI, I->getType(), ValueVTs); bool isArgValueUsed = !I->use_empty(); + unsigned PartBase = 0; for (unsigned Value = 0, NumValues = ValueVTs.size(); Value != NumValues; ++Value) { EVT VT = ValueVTs[Value]; @@ -6669,18 +7268,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { if (F.getParamAlignment(Idx)) FrameAlign = F.getParamAlignment(Idx); else - FrameAlign = TLI.getByValTypeAlignment(ElementTy); + FrameAlign = TLI->getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); } if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); - MVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); - unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); + MVT RegisterVT = TLI->getRegisterType(*CurDAG->getContext(), VT); + unsigned NumRegs = TLI->getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { - ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed, - Idx-1, i*RegisterVT.getStoreSize()); + ISD::InputArg MyFlags(Flags, RegisterVT, VT, isArgValueUsed, + Idx-1, PartBase+i*RegisterVT.getStoreSize()); if (NumRegs > 1 && i == 0) MyFlags.Flags.setSplit(); // if it isn't first piece, alignment must be 1 @@ -6688,14 +7287,15 @@ void SelectionDAGISel::LowerArguments(const Function &F) { MyFlags.Flags.setOrigAlign(1); Ins.push_back(MyFlags); } + PartBase += VT.getStoreSize(); } } // Call the target to set up the argument values. SmallVector InVals; - SDValue NewRoot = TLI.LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), - F.isVarArg(), Ins, - dl, DAG, InVals); + SDValue NewRoot = TLI->LowerFormalArguments(DAG.getRoot(), F.getCallingConv(), + F.isVarArg(), Ins, + dl, DAG, InVals); // Verify that the target's LowerFormalArguments behaved as expected. assert(NewRoot.getNode() && NewRoot.getValueType() == MVT::Other && @@ -6721,18 +7321,18 @@ void SelectionDAGISel::LowerArguments(const Function &F) { // Create a virtual register for the sret pointer, and put in a copy // from the sret argument into it. SmallVector ValueVTs; - ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); + ComputeValueVTs(*TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); MVT VT = ValueVTs[0].getSimpleVT(); - MVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + MVT RegVT = TLI->getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, NULL, AssertOp); MachineFunction& MF = SDB->DAG.getMachineFunction(); MachineRegisterInfo& RegInfo = MF.getRegInfo(); - unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT)); + unsigned SRetReg = RegInfo.createVirtualRegister(TLI->getRegClassFor(RegVT)); FuncInfo->DemoteRegister = SRetReg; - NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurDebugLoc(), + NewRoot = SDB->DAG.getCopyToReg(NewRoot, SDB->getCurSDLoc(), SRetReg, ArgValue); DAG.setRoot(NewRoot); @@ -6745,18 +7345,24 @@ void SelectionDAGISel::LowerArguments(const Function &F) { ++I, ++Idx) { SmallVector ArgValues; SmallVector ValueVTs; - ComputeValueVTs(TLI, I->getType(), ValueVTs); + ComputeValueVTs(*TLI, I->getType(), ValueVTs); unsigned NumValues = ValueVTs.size(); // If this argument is unused then remember its value. It is used to generate // debugging information. - if (I->use_empty() && NumValues) + if (I->use_empty() && NumValues) { SDB->setUnusedArgValue(I, InVals[i]); + // Also remember any frame index for use in FastISel. + if (FrameIndexSDNode *FI = + dyn_cast(InVals[i].getNode())) + FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); + } + for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - MVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); - unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); + MVT PartVT = TLI->getRegisterType(*CurDAG->getContext(), VT); + unsigned NumParts = TLI->getNumRegisters(*CurDAG->getContext(), VT); if (!I->use_empty()) { ISD::NodeType AssertOp = ISD::DELETED_NODE; @@ -6783,11 +7389,11 @@ void SelectionDAGISel::LowerArguments(const Function &F) { FuncInfo->setArgumentFrameIndex(I, FI->getIndex()); SDValue Res = DAG.getMergeValues(&ArgValues[0], NumValues, - SDB->getCurDebugLoc()); + SDB->getCurSDLoc()); SDB->setValue(I, Res); if (!TM.Options.EnableFastISel && Res.getOpcode() == ISD::BUILD_PAIR) { - if (LoadSDNode *LNode = + if (LoadSDNode *LNode = dyn_cast(Res.getOperand(0).getNode())) if (FrameIndexSDNode *FI = dyn_cast(LNode->getBasePtr().getNode())) @@ -6885,15 +7491,36 @@ SelectionDAGBuilder::HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB) { // Remember that this register needs to added to the machine PHI node as // the input for this MBB. SmallVector ValueVTs; - ComputeValueVTs(TLI, PN->getType(), ValueVTs); + const TargetLowering *TLI = TM.getTargetLowering(); + ComputeValueVTs(*TLI, PN->getType(), ValueVTs); for (unsigned vti = 0, vte = ValueVTs.size(); vti != vte; ++vti) { EVT VT = ValueVTs[vti]; - unsigned NumRegisters = TLI.getNumRegisters(*DAG.getContext(), VT); + unsigned NumRegisters = TLI->getNumRegisters(*DAG.getContext(), VT); for (unsigned i = 0, e = NumRegisters; i != e; ++i) FuncInfo.PHINodesToUpdate.push_back(std::make_pair(MBBI++, Reg+i)); Reg += NumRegisters; } } } + ConstantsOut.clear(); } + +/// Add a successor MBB to ParentMBB< creating a new MachineBB for BB if SuccMBB +/// is 0. +MachineBasicBlock * +SelectionDAGBuilder::StackProtectorDescriptor:: +AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + MachineBasicBlock *SuccMBB) { + // If SuccBB has not been created yet, create it. + if (!SuccMBB) { + MachineFunction *MF = ParentMBB->getParent(); + MachineFunction::iterator BBI = ParentMBB; + SuccMBB = MF->CreateMachineBasicBlock(BB); + MF->insert(++BBI, SuccMBB); + } + // Add it as a successor of ParentMBB. + ParentMBB->addSuccessor(SuccMBB); + return SuccMBB; +} diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9188945..835f643 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -1,4 +1,4 @@ -//===-- SelectionDAGBuilder.h - Selection-DAG building --------------------===// +//===-- SelectionDAGBuilder.h - Selection-DAG building --------*- C++ -*---===// // // The LLVM Compiler Infrastructure // @@ -26,6 +26,7 @@ namespace llvm { +class AddrSpaceCastInst; class AliasAnalysis; class AllocaInst; class BasicBlock; @@ -80,11 +81,11 @@ class ZExtInst; /// implementation that is parameterized by a TargetLowering object. /// class SelectionDAGBuilder { - /// CurDebugLoc - current file + line number. Changes as we build the DAG. - DebugLoc CurDebugLoc; + /// CurInst - The current instruction being visited + const Instruction *CurInst; DenseMap NodeMap; - + /// UnusedArgNodeMap - Maps argument value for unused arguments. This is used /// to preserve debug information for incoming arguments. DenseMap UnusedArgNodeMap; @@ -182,6 +183,17 @@ private: typedef std::vector CaseRecVector; + /// The comparison function for sorting the switch case values in the vector. + /// WARNING: Case ranges should be disjoint! + struct CaseCmp { + bool operator()(const Case &C1, const Case &C2) { + assert(isa(C1.Low) && isa(C2.High)); + const ConstantInt* CI1 = cast(C1.Low); + const ConstantInt* CI2 = cast(C2.High); + return CI1->getValue().slt(CI2->getValue()); + } + }; + struct CaseBitsCmp { bool operator()(const CaseBits &C1, const CaseBits &C2) { return C1.Bits > C2.Bits; @@ -224,7 +236,7 @@ private: struct JumpTable { JumpTable(unsigned R, unsigned J, MachineBasicBlock *M, MachineBasicBlock *D): Reg(R), JTI(J), MBB(M), Default(D) {} - + /// Reg - the virtual register containing the index of the jump table entry //. to jump to. unsigned Reg; @@ -278,12 +290,204 @@ private: BitTestInfo Cases; }; -public: - // TLI - This is information that describes the available target features we - // need for lowering. This indicates when operations are unavailable, - // implemented with a libcall, etc. + /// A class which encapsulates all of the information needed to generate a + /// stack protector check and signals to isel via its state being initialized + /// that a stack protector needs to be generated. + /// + /// *NOTE* The following is a high level documentation of SelectionDAG Stack + /// Protector Generation. The reason that it is placed here is for a lack of + /// other good places to stick it. + /// + /// High Level Overview of SelectionDAG Stack Protector Generation: + /// + /// Previously, generation of stack protectors was done exclusively in the + /// pre-SelectionDAG Codegen LLVM IR Pass "Stack Protector". This necessitated + /// splitting basic blocks at the IR level to create the success/failure basic + /// blocks in the tail of the basic block in question. As a result of this, + /// calls that would have qualified for the sibling call optimization were no + /// longer eligible for optimization since said calls were no longer right in + /// the "tail position" (i.e. the immediate predecessor of a ReturnInst + /// instruction). + /// + /// Then it was noticed that since the sibling call optimization causes the + /// callee to reuse the caller's stack, if we could delay the generation of + /// the stack protector check until later in CodeGen after the sibling call + /// decision was made, we get both the tail call optimization and the stack + /// protector check! + /// + /// A few goals in solving this problem were: + /// + /// 1. Preserve the architecture independence of stack protector generation. + /// + /// 2. Preserve the normal IR level stack protector check for platforms like + /// OpenBSD for which we support platform specific stack protector + /// generation. + /// + /// The main problem that guided the present solution is that one can not + /// solve this problem in an architecture independent manner at the IR level + /// only. This is because: + /// + /// 1. The decision on whether or not to perform a sibling call on certain + /// platforms (for instance i386) requires lower level information + /// related to available registers that can not be known at the IR level. + /// + /// 2. Even if the previous point were not true, the decision on whether to + /// perform a tail call is done in LowerCallTo in SelectionDAG which + /// occurs after the Stack Protector Pass. As a result, one would need to + /// put the relevant callinst into the stack protector check success + /// basic block (where the return inst is placed) and then move it back + /// later at SelectionDAG/MI time before the stack protector check if the + /// tail call optimization failed. The MI level option was nixed + /// immediately since it would require platform specific pattern + /// matching. The SelectionDAG level option was nixed because + /// SelectionDAG only processes one IR level basic block at a time + /// implying one could not create a DAG Combine to move the callinst. + /// + /// To get around this problem a few things were realized: + /// + /// 1. While one can not handle multiple IR level basic blocks at the + /// SelectionDAG Level, one can generate multiple machine basic blocks + /// for one IR level basic block. This is how we handle bit tests and + /// switches. + /// + /// 2. At the MI level, tail calls are represented via a special return + /// MIInst called "tcreturn". Thus if we know the basic block in which we + /// wish to insert the stack protector check, we get the correct behavior + /// by always inserting the stack protector check right before the return + /// statement. This is a "magical transformation" since no matter where + /// the stack protector check intrinsic is, we always insert the stack + /// protector check code at the end of the BB. + /// + /// Given the aforementioned constraints, the following solution was devised: + /// + /// 1. On platforms that do not support SelectionDAG stack protector check + /// generation, allow for the normal IR level stack protector check + /// generation to continue. + /// + /// 2. On platforms that do support SelectionDAG stack protector check + /// generation: + /// + /// a. Use the IR level stack protector pass to decide if a stack + /// protector is required/which BB we insert the stack protector check + /// in by reusing the logic already therein. If we wish to generate a + /// stack protector check in a basic block, we place a special IR + /// intrinsic called llvm.stackprotectorcheck right before the BB's + /// returninst or if there is a callinst that could potentially be + /// sibling call optimized, before the call inst. + /// + /// b. Then when a BB with said intrinsic is processed, we codegen the BB + /// normally via SelectBasicBlock. In said process, when we visit the + /// stack protector check, we do not actually emit anything into the + /// BB. Instead, we just initialize the stack protector descriptor + /// class (which involves stashing information/creating the success + /// mbbb and the failure mbb if we have not created one for this + /// function yet) and export the guard variable that we are going to + /// compare. + /// + /// c. After we finish selecting the basic block, in FinishBasicBlock if + /// the StackProtectorDescriptor attached to the SelectionDAGBuilder is + /// initialized, we first find a splice point in the parent basic block + /// before the terminator and then splice the terminator of said basic + /// block into the success basic block. Then we code-gen a new tail for + /// the parent basic block consisting of the two loads, the comparison, + /// and finally two branches to the success/failure basic blocks. We + /// conclude by code-gening the failure basic block if we have not + /// code-gened it already (all stack protector checks we generate in + /// the same function, use the same failure basic block). + class StackProtectorDescriptor { + public: + StackProtectorDescriptor() : ParentMBB(0), SuccessMBB(0), FailureMBB(0), + Guard(0) { } + ~StackProtectorDescriptor() { } + + /// Returns true if all fields of the stack protector descriptor are + /// initialized implying that we should/are ready to emit a stack protector. + bool shouldEmitStackProtector() const { + return ParentMBB && SuccessMBB && FailureMBB && Guard; + } + + /// Initialize the stack protector descriptor structure for a new basic + /// block. + void initialize(const BasicBlock *BB, + MachineBasicBlock *MBB, + const CallInst &StackProtCheckCall) { + // Make sure we are not initialized yet. + assert(!shouldEmitStackProtector() && "Stack Protector Descriptor is " + "already initialized!"); + ParentMBB = MBB; + SuccessMBB = AddSuccessorMBB(BB, MBB); + FailureMBB = AddSuccessorMBB(BB, MBB, FailureMBB); + if (!Guard) + Guard = StackProtCheckCall.getArgOperand(0); + } + + /// Reset state that changes when we handle different basic blocks. + /// + /// This currently includes: + /// + /// 1. The specific basic block we are generating a + /// stack protector for (ParentMBB). + /// + /// 2. The successor machine basic block that will contain the tail of + /// parent mbb after we create the stack protector check (SuccessMBB). This + /// BB is visited only on stack protector check success. + void resetPerBBState() { + ParentMBB = 0; + SuccessMBB = 0; + } + + /// Reset state that only changes when we switch functions. + /// + /// This currently includes: + /// + /// 1. FailureMBB since we reuse the failure code path for all stack + /// protector checks created in an individual function. + /// + /// 2.The guard variable since the guard variable we are checking against is + /// always the same. + void resetPerFunctionState() { + FailureMBB = 0; + Guard = 0; + } + + MachineBasicBlock *getParentMBB() { return ParentMBB; } + MachineBasicBlock *getSuccessMBB() { return SuccessMBB; } + MachineBasicBlock *getFailureMBB() { return FailureMBB; } + const Value *getGuard() { return Guard; } + + private: + /// The basic block for which we are generating the stack protector. + /// + /// As a result of stack protector generation, we will splice the + /// terminators of this basic block into the successor mbb SuccessMBB and + /// replace it with a compare/branch to the successor mbbs + /// SuccessMBB/FailureMBB depending on whether or not the stack protector + /// was violated. + MachineBasicBlock *ParentMBB; + + /// A basic block visited on stack protector check success that contains the + /// terminators of ParentMBB. + MachineBasicBlock *SuccessMBB; + + /// This basic block visited on stack protector check failure that will + /// contain a call to __stack_chk_fail(). + MachineBasicBlock *FailureMBB; + + /// The guard variable which we will compare against the stored value in the + /// stack protector stack slot. + const Value *Guard; + + /// Add a successor machine basic block to ParentMBB. If the successor mbb + /// has not been created yet (i.e. if SuccMBB = 0), then the machine basic + /// block will be created. + MachineBasicBlock *AddSuccessorMBB(const BasicBlock *BB, + MachineBasicBlock *ParentMBB, + MachineBasicBlock *SuccMBB = 0); + }; + +private: const TargetMachine &TM; - const TargetLowering &TLI; +public: SelectionDAG &DAG; const DataLayout *TD; AliasAnalysis *AA; @@ -298,6 +502,9 @@ public: /// BitTestCases - Vector of BitTestBlock structures used to communicate /// SwitchInst code generation information. std::vector BitTestCases; + /// A StackProtectorDescriptor structure used to communicate stack protector + /// information in between SelectBasicBlock and FinishBasicBlock. + StackProtectorDescriptor SPDescriptor; // Emit PHI-node-operand constants only once even if used by multiple // PHI nodes. @@ -308,9 +515,9 @@ public: FunctionLoweringInfo &FuncInfo; /// OptLevel - What optimization level we're generating code for. - /// + /// CodeGenOpt::Level OptLevel; - + /// GFI - Garbage collection metadata for the function. GCFunctionInfo *GFI; @@ -327,7 +534,7 @@ public: SelectionDAGBuilder(SelectionDAG &dag, FunctionLoweringInfo &funcinfo, CodeGenOpt::Level ol) - : SDNodeOrder(0), TM(dag.getTarget()), TLI(dag.getTargetLoweringInfo()), + : CurInst(NULL), SDNodeOrder(0), TM(dag.getTarget()), DAG(dag), FuncInfo(funcinfo), OptLevel(ol), HasTailCall(false) { } @@ -364,17 +571,18 @@ public: /// SDValue getControlRoot(); - DebugLoc getCurDebugLoc() const { return CurDebugLoc; } + SDLoc getCurSDLoc() const { + return SDLoc(CurInst, SDNodeOrder); + } + + DebugLoc getCurDebugLoc() const { + return CurInst ? CurInst->getDebugLoc() : DebugLoc(); + } unsigned getSDNodeOrder() const { return SDNodeOrder; } void CopyValueToVirtualRegister(const Value *V, unsigned Reg); - /// AssignOrderingToNode - Assign an ordering to the node. The order is gotten - /// from how the code appeared in the source. The ordering is used by the - /// scheduler to effectively turn off scheduling. - void AssignOrderingToNode(const SDNode *Node); - void visit(const Instruction &I); void visit(unsigned Opcode, const User &I); @@ -391,7 +599,7 @@ public: assert(N.getNode() == 0 && "Already set a value for this node!"); N = NewN; } - + void setUnusedArgValue(const Value *V, SDValue NewN) { SDValue &N = UnusedArgNodeMap[V]; assert(N.getNode() == 0 && "Already set a value for this node!"); @@ -412,6 +620,12 @@ public: void LowerCallTo(ImmutableCallSite CS, SDValue Callee, bool IsTailCall, MachineBasicBlock *LandingPad = NULL); + std::pair LowerCallOperands(const CallInst &CI, + unsigned ArgIdx, + unsigned NumArgs, + SDValue Callee, + bool useVoidTy = false); + /// UpdateSplitBlock - When an MBB was split during scheduling, update the /// references that ned to refer to the last resulting block. void UpdateSplitBlock(MachineBasicBlock *First, MachineBasicBlock *Last); @@ -453,6 +667,9 @@ private: public: void visitSwitchCase(CaseBlock &CB, MachineBasicBlock *SwitchBB); + void visitSPDescriptorParent(StackProtectorDescriptor &SPD, + MachineBasicBlock *ParentBB); + void visitSPDescriptorFailure(StackProtectorDescriptor &SPD); void visitBitTestHeader(BitTestBlock &B, MachineBasicBlock *SwitchBB); void visitBitTestCase(BitTestBlock &BB, MachineBasicBlock* NextMBB, @@ -463,7 +680,7 @@ public: void visitJumpTable(JumpTable &JT); void visitJumpTableHeader(JumpTable &JT, JumpTableHeader &JTH, MachineBasicBlock *SwitchBB); - + private: // These all get lowered before this pass. void visitInvoke(const InvokeInst &I); @@ -504,6 +721,7 @@ private: void visitPtrToInt(const User &I); void visitIntToPtr(const User &I); void visitBitCast(const User &I); + void visitAddrSpaceCast(const User &I); void visitExtractElement(const User &I); void visitInsertElement(const User &I); @@ -525,6 +743,11 @@ private: void visitPHI(const PHINode &I); void visitCall(const CallInst &I); bool visitMemCmpCall(const CallInst &I); + bool visitMemChrCall(const CallInst &I); + bool visitStrCpyCall(const CallInst &I, bool isStpcpy); + bool visitStrCmpCall(const CallInst &I); + bool visitStrLenCall(const CallInst &I); + bool visitStrNLenCall(const CallInst &I); bool visitUnaryFloatCall(const CallInst &I, unsigned Opcode); void visitAtomicLoad(const LoadInst &I); void visitAtomicStore(const StoreInst &I); @@ -537,6 +760,8 @@ private: void visitVAArg(const VAArgInst &I); void visitVAEnd(const CallInst &I); void visitVACopy(const CallInst &I); + void visitStackmap(const CallInst &I); + void visitPatchpoint(const CallInst &I); void visitUserOp1(const Instruction &I) { llvm_unreachable("UserOp1 should not exist at instruction selection time!"); @@ -545,10 +770,13 @@ private: llvm_unreachable("UserOp2 should not exist at instruction selection time!"); } + void processIntegerCallValue(const Instruction &I, + SDValue Value, bool IsSigned); + void HandlePHINodesInSuccessorBlocks(const BasicBlock *LLVMBB); /// EmitFuncArgumentDbgValue - If V is an function argument then create - /// corresponding DBG_VALUE machine instruction for it now. At the end of + /// corresponding DBG_VALUE machine instruction for it now. At the end of /// instruction selection, they will be inserted to the entry BB. bool EmitFuncArgumentDbgValue(const Value *V, MDNode *Variable, int64_t Offset, const SDValue &N); diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 47b0391..c04a08d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -92,9 +92,6 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::RETURNADDR: return "RETURNADDR"; case ISD::FRAMEADDR: return "FRAMEADDR"; case ISD::FRAME_TO_ARGS_OFFSET: return "FRAME_TO_ARGS_OFFSET"; - case ISD::EXCEPTIONADDR: return "EXCEPTIONADDR"; - case ISD::LSDAADDR: return "LSDAADDR"; - case ISD::EHSELECTION: return "EHSELECTION"; case ISD::EH_RETURN: return "EH_RETURN"; case ISD::EH_SJLJ_SETJMP: return "EH_SJLJ_SETJMP"; case ISD::EH_SJLJ_LONGJMP: return "EH_SJLJ_LONGJMP"; @@ -145,6 +142,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FCEIL: return "fceil"; case ISD::FRINT: return "frint"; case ISD::FNEARBYINT: return "fnearbyint"; + case ISD::FROUND: return "fround"; case ISD::FEXP: return "fexp"; case ISD::FEXP2: return "fexp2"; case ISD::FLOG: return "flog"; @@ -226,6 +224,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FP_TO_SINT: return "fp_to_sint"; case ISD::FP_TO_UINT: return "fp_to_uint"; case ISD::BITCAST: return "bitcast"; + case ISD::ADDRSPACECAST: return "addrspacecast"; case ISD::FP16_TO_FP32: return "fp16_to_fp32"; case ISD::FP32_TO_FP16: return "fp32_to_fp16"; @@ -487,10 +486,16 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { OS << " " << offset; if (unsigned int TF = BA->getTargetFlags()) OS << " [TF=" << TF << ']'; + } else if (const AddrSpaceCastSDNode *ASC = + dyn_cast(this)) { + OS << '[' + << ASC->getSrcAddressSpace() + << " -> " + << ASC->getDestAddressSpace() + << ']'; } - if (G) - if (unsigned Order = G->GetOrdering(this)) + if (unsigned Order = getIROrder()) OS << " [ORD=" << Order << ']'; if (getNodeId() != -1) @@ -501,8 +506,10 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const { DIScope Scope(dl.getScope(G->getMachineFunction().getFunction()->getContext())); OS << " dbg:"; + assert((!Scope || Scope.isScope()) && + "Scope of a DebugLoc should be null or a DIScope."); // Omit the directory, since it's usually long and uninteresting. - if (Scope.Verify()) + if (Scope) OS << Scope.getFilename(); else OS << ""; diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index 9935626..3a0cfa1 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -19,6 +19,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -222,23 +223,61 @@ defaultListDAGScheduler("default", "Best scheduler for the target", namespace llvm { //===--------------------------------------------------------------------===// + /// \brief This class is used by SelectionDAGISel to temporarily override + /// the optimization level on a per-function basis. + class OptLevelChanger { + SelectionDAGISel &IS; + CodeGenOpt::Level SavedOptLevel; + bool SavedFastISel; + + public: + OptLevelChanger(SelectionDAGISel &ISel, + CodeGenOpt::Level NewOptLevel) : IS(ISel) { + SavedOptLevel = IS.OptLevel; + if (NewOptLevel == SavedOptLevel) + return; + IS.OptLevel = NewOptLevel; + IS.TM.setOptLevel(NewOptLevel); + SavedFastISel = IS.TM.Options.EnableFastISel; + if (NewOptLevel == CodeGenOpt::None) + IS.TM.setFastISel(true); + DEBUG(dbgs() << "\nChanging optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << SavedOptLevel + << " ; After: -O" << NewOptLevel << "\n"); + } + + ~OptLevelChanger() { + if (IS.OptLevel == SavedOptLevel) + return; + DEBUG(dbgs() << "\nRestoring optimization level for Function " + << IS.MF->getFunction()->getName() << "\n"); + DEBUG(dbgs() << "\tBefore: -O" << IS.OptLevel + << " ; After: -O" << SavedOptLevel << "\n"); + IS.OptLevel = SavedOptLevel; + IS.TM.setOptLevel(SavedOptLevel); + IS.TM.setFastISel(SavedFastISel); + } + }; + + //===--------------------------------------------------------------------===// /// createDefaultScheduler - This creates an instruction scheduler appropriate /// for the target. ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { - const TargetLowering &TLI = IS->getTargetLowering(); + const TargetLowering *TLI = IS->getTargetLowering(); const TargetSubtargetInfo &ST = IS->TM.getSubtarget(); - if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() || - TLI.getSchedulingPreference() == Sched::Source) + if (OptLevel == CodeGenOpt::None || ST.useMachineScheduler() || + TLI->getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::RegPressure) + if (TLI->getSchedulingPreference() == Sched::RegPressure) return createBURRListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::Hybrid) + if (TLI->getSchedulingPreference() == Sched::Hybrid) return createHybridListDAGScheduler(IS, OptLevel); - if (TLI.getSchedulingPreference() == Sched::VLIW) + if (TLI->getSchedulingPreference() == Sched::VLIW) return createVLIWDAGScheduler(IS, OptLevel); - assert(TLI.getSchedulingPreference() == Sched::ILP && + assert(TLI->getSchedulingPreference() == Sched::ILP && "Unknown sched type!"); return createILPListDAGScheduler(IS, OptLevel); } @@ -275,10 +314,10 @@ void TargetLowering::AdjustInstrPostInstrSelection(MachineInstr *MI, // SelectionDAGISel code //===----------------------------------------------------------------------===// -SelectionDAGISel::SelectionDAGISel(const TargetMachine &tm, +SelectionDAGISel::SelectionDAGISel(TargetMachine &tm, CodeGenOpt::Level OL) : - MachineFunctionPass(ID), TM(tm), TLI(*tm.getTargetLowering()), - FuncInfo(new FunctionLoweringInfo(TLI)), + MachineFunctionPass(ID), TM(tm), + FuncInfo(new FunctionLoweringInfo(TM)), CurDAG(new SelectionDAG(tm, OL)), SDB(new SelectionDAGBuilder(*CurDAG, *FuncInfo, OL)), GFI(), @@ -355,6 +394,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { const Function &Fn = *mf.getFunction(); const TargetInstrInfo &TII = *TM.getInstrInfo(); const TargetRegisterInfo &TRI = *TM.getRegisterInfo(); + const TargetLowering *TLI = TM.getTargetLowering(); MF = &mf; RegInfo = &MF->getRegInfo(); @@ -368,11 +408,17 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { ST.resetSubtargetFeatures(MF); TM.resetTargetOptions(MF); + // Reset OptLevel to None for optnone functions. + CodeGenOpt::Level NewOptLevel = OptLevel; + if (Fn.hasFnAttribute(Attribute::OptimizeNone)) + NewOptLevel = CodeGenOpt::None; + OptLevelChanger OLC(*this, NewOptLevel); + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast(Fn), this); - CurDAG->init(*MF, TTI); + CurDAG->init(*MF, TTI, TLI); FuncInfo->set(Fn, *MF); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -401,29 +447,37 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Insert DBG_VALUE instructions for function arguments to the entry block. for (unsigned i = 0, e = FuncInfo->ArgDbgValues.size(); i != e; ++i) { MachineInstr *MI = FuncInfo->ArgDbgValues[e-i-1]; - unsigned Reg = MI->getOperand(0).getReg(); + bool hasFI = MI->getOperand(0).isFI(); + unsigned Reg = hasFI ? TRI.getFrameRegister(*MF) : MI->getOperand(0).getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) EntryMBB->insert(EntryMBB->begin(), MI); else { MachineInstr *Def = RegInfo->getVRegDef(Reg); - MachineBasicBlock::iterator InsertPos = Def; - // FIXME: VR def may not be in entry block. - Def->getParent()->insert(llvm::next(InsertPos), MI); + if (Def) { + MachineBasicBlock::iterator InsertPos = Def; + // FIXME: VR def may not be in entry block. + Def->getParent()->insert(llvm::next(InsertPos), MI); + } else + DEBUG(dbgs() << "Dropping debug info for dead vreg" + << TargetRegisterInfo::virtReg2Index(Reg) << "\n"); } // If Reg is live-in then update debug info to track its copy in a vreg. DenseMap::iterator LDI = LiveInMap.find(Reg); if (LDI != LiveInMap.end()) { + assert(!hasFI && "There's no handling of frame pointer updating here yet " + "- add if needed"); MachineInstr *Def = RegInfo->getVRegDef(LDI->second); MachineBasicBlock::iterator InsertPos = Def; const MDNode *Variable = MI->getOperand(MI->getNumOperands()-1).getMetadata(); - unsigned Offset = MI->getOperand(1).getImm(); + bool IsIndirect = MI->isIndirectDebugValue(); + unsigned Offset = IsIndirect ? MI->getOperand(1).getImm() : 0; // Def is never a terminator here, so it is ok to increment InsertPos. BuildMI(*EntryMBB, ++InsertPos, MI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(LDI->second, RegState::Debug) - .addImm(Offset).addMetadata(Variable); + TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, + LDI->second, Offset, Variable); // If this vreg is directly copied into an exported register then // that COPY instructions also need DBG_VALUE, if it is the only @@ -442,9 +496,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (CopyUseMI) { MachineInstr *NewMI = BuildMI(*MF, CopyUseMI->getDebugLoc(), - TII.get(TargetOpcode::DBG_VALUE)) - .addReg(CopyUseMI->getOperand(0).getReg(), RegState::Debug) - .addImm(Offset).addMetadata(Variable); + TII.get(TargetOpcode::DBG_VALUE), + IsIndirect, + CopyUseMI->getOperand(0).getReg(), + Offset, Variable); MachineBasicBlock::iterator Pos = CopyUseMI; EntryMBB->insertAfter(Pos, NewMI); } @@ -491,6 +546,10 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { if (J == E) break; To = J->second; } + // Make sure the new register has a sufficiently constrained register class. + if (TargetRegisterInfo::isVirtualRegister(From) && + TargetRegisterInfo::isVirtualRegister(To)) + MRI.constrainRegClass(To, MRI.getRegClass(From)); // Replace it. MRI.replaceRegWith(From, To); } @@ -611,6 +670,8 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + CurDAG->NewNodesMustHaveLegalTypes = true; + if (Changed) { if (ViewDAGCombineLT) CurDAG->viewGraph("dag-combine-lt input for " + BlockName); @@ -624,6 +685,7 @@ void SelectionDAGISel::CodeGenAndEmitDAG() { DEBUG(dbgs() << "Optimized type-legalized selection DAG: BB#" << BlockNumber << " '" << BlockName << "'\n"; CurDAG->dump()); + } { @@ -790,9 +852,6 @@ void SelectionDAGISel::DoInstructionSelection() { continue; // Replace node. if (ResNode) { - // Propagate ordering - CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node)); - ReplaceUses(Node, ResNode); } @@ -827,12 +886,13 @@ void SelectionDAGISel::PrepareEHLandingPad() { .addSym(Label); // Mark exception register as live in. - const TargetRegisterClass *PtrRC = TLI.getRegClassFor(TLI.getPointerTy()); - if (unsigned Reg = TLI.getExceptionPointerRegister()) + const TargetLowering *TLI = getTargetLowering(); + const TargetRegisterClass *PtrRC = TLI->getRegClassFor(TLI->getPointerTy()); + if (unsigned Reg = TLI->getExceptionPointerRegister()) FuncInfo->ExceptionPointerVirtReg = MBB->addLiveIn(Reg, PtrRC); // Mark exception selector register as live in. - if (unsigned Reg = TLI.getExceptionSelectorRegister()) + if (unsigned Reg = TLI->getExceptionSelectorRegister()) FuncInfo->ExceptionSelectorVirtReg = MBB->addLiveIn(Reg, PtrRC); } @@ -932,7 +992,7 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Initialize the Fast-ISel state, if needed. FastISel *FastIS = 0; if (TM.Options.EnableFastISel) - FastIS = TLI.createFastISel(*FuncInfo, LibInfo); + FastIS = getTargetLowering()->createFastISel(*FuncInfo, LibInfo); // Iterate over all basic blocks in the function. ReversePostOrderTraversal RPOT(&Fn); @@ -1135,6 +1195,91 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { delete FastIS; SDB->clearDanglingDebugInfo(); + SDB->SPDescriptor.resetPerFunctionState(); +} + +/// Given that the input MI is before a partial terminator sequence TSeq, return +/// true if M + TSeq also a partial terminator sequence. +/// +/// A Terminator sequence is a sequence of MachineInstrs which at this point in +/// lowering copy vregs into physical registers, which are then passed into +/// terminator instructors so we can satisfy ABI constraints. A partial +/// terminator sequence is an improper subset of a terminator sequence (i.e. it +/// may be the whole terminator sequence). +static bool MIIsInTerminatorSequence(const MachineInstr *MI) { + // If we do not have a copy or an implicit def, we return true if and only if + // MI is a debug value. + if (!MI->isCopy() && !MI->isImplicitDef()) + // Sometimes DBG_VALUE MI sneak in between the copies from the vregs to the + // physical registers if there is debug info associated with the terminator + // of our mbb. We want to include said debug info in our terminator + // sequence, so we return true in that case. + return MI->isDebugValue(); + + // We have left the terminator sequence if we are not doing one of the + // following: + // + // 1. Copying a vreg into a physical register. + // 2. Copying a vreg into a vreg. + // 3. Defining a register via an implicit def. + + // OPI should always be a register definition... + MachineInstr::const_mop_iterator OPI = MI->operands_begin(); + if (!OPI->isReg() || !OPI->isDef()) + return false; + + // Defining any register via an implicit def is always ok. + if (MI->isImplicitDef()) + return true; + + // Grab the copy source... + MachineInstr::const_mop_iterator OPI2 = OPI; + ++OPI2; + assert(OPI2 != MI->operands_end() + && "Should have a copy implying we should have 2 arguments."); + + // Make sure that the copy dest is not a vreg when the copy source is a + // physical register. + if (!OPI2->isReg() || + (!TargetRegisterInfo::isPhysicalRegister(OPI->getReg()) && + TargetRegisterInfo::isPhysicalRegister(OPI2->getReg()))) + return false; + + return true; +} + +/// Find the split point at which to splice the end of BB into its success stack +/// protector check machine basic block. +/// +/// On many platforms, due to ABI constraints, terminators, even before register +/// allocation, use physical registers. This creates an issue for us since +/// physical registers at this point can not travel across basic +/// blocks. Luckily, selectiondag always moves physical registers into vregs +/// when they enter functions and moves them through a sequence of copies back +/// into the physical registers right before the terminator creating a +/// ``Terminator Sequence''. This function is searching for the beginning of the +/// terminator sequence so that we can ensure that we splice off not just the +/// terminator, but additionally the copies that move the vregs into the +/// physical registers. +static MachineBasicBlock::iterator +FindSplitPointForStackProtector(MachineBasicBlock *BB, DebugLoc DL) { + MachineBasicBlock::iterator SplitPoint = BB->getFirstTerminator(); + // + if (SplitPoint == BB->begin()) + return SplitPoint; + + MachineBasicBlock::iterator Start = BB->begin(); + MachineBasicBlock::iterator Previous = SplitPoint; + --Previous; + + while (MIIsInTerminatorSequence(Previous)) { + SplitPoint = Previous; + if (Previous == Start) + break; + --Previous; + } + + return SplitPoint; } void @@ -1147,11 +1292,13 @@ SelectionDAGISel::FinishBasicBlock() { << FuncInfo->PHINodesToUpdate[i].first << ", " << FuncInfo->PHINodesToUpdate[i].second << ")\n"); + const bool MustUpdatePHINodes = SDB->SwitchCases.empty() && + SDB->JTCases.empty() && + SDB->BitTestCases.empty(); + // Next, now that we know what the last MBB the LLVM BB expanded is, update // PHI nodes in successors. - if (SDB->SwitchCases.empty() && - SDB->JTCases.empty() && - SDB->BitTestCases.empty()) { + if (MustUpdatePHINodes) { for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); assert(PHI->isPHI() && @@ -1160,9 +1307,54 @@ SelectionDAGISel::FinishBasicBlock() { continue; PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } - return; } + // Handle stack protector. + if (SDB->SPDescriptor.shouldEmitStackProtector()) { + MachineBasicBlock *ParentMBB = SDB->SPDescriptor.getParentMBB(); + MachineBasicBlock *SuccessMBB = SDB->SPDescriptor.getSuccessMBB(); + + // Find the split point to split the parent mbb. At the same time copy all + // physical registers used in the tail of parent mbb into virtual registers + // before the split point and back into physical registers after the split + // point. This prevents us needing to deal with Live-ins and many other + // register allocation issues caused by us splitting the parent mbb. The + // register allocator will clean up said virtual copies later on. + MachineBasicBlock::iterator SplitPoint = + FindSplitPointForStackProtector(ParentMBB, SDB->getCurDebugLoc()); + + // Splice the terminator of ParentMBB into SuccessMBB. + SuccessMBB->splice(SuccessMBB->end(), ParentMBB, + SplitPoint, + ParentMBB->end()); + + // Add compare/jump on neq/jump to the parent BB. + FuncInfo->MBB = ParentMBB; + FuncInfo->InsertPt = ParentMBB->end(); + SDB->visitSPDescriptorParent(SDB->SPDescriptor, ParentMBB); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + + // CodeGen Failure MBB if we have not codegened it yet. + MachineBasicBlock *FailureMBB = SDB->SPDescriptor.getFailureMBB(); + if (!FailureMBB->size()) { + FuncInfo->MBB = FailureMBB; + FuncInfo->InsertPt = FailureMBB->end(); + SDB->visitSPDescriptorFailure(SDB->SPDescriptor); + CurDAG->setRoot(SDB->getRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } + + // Clear the Per-BB State. + SDB->SPDescriptor.resetPerBBState(); + } + + // If we updated PHI Nodes, return early. + if (MustUpdatePHINodes) + return; + for (unsigned i = 0, e = SDB->BitTestCases.size(); i != e; ++i) { // Lower header first, if it wasn't already lowered if (!SDB->BitTestCases[i].Emitted) { @@ -1609,7 +1801,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { SelectInlineAsmMemoryOperands(Ops); EVT VTs[] = { MVT::Other, MVT::Glue }; - SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), VTs, &Ops[0], Ops.size()); New->setNodeId(-1); return New.getNode(); @@ -1881,10 +2073,9 @@ HandleMergeInputChains(SmallVectorImpl &ChainNodesMatched, } } - SDValue Res; if (InputChains.size() == 1) return InputChains[0]; - return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(), + return CurDAG->getNode(ISD::TokenFactor, SDLoc(ChainNodesMatched[0]), MVT::Other, &InputChains[0], InputChains.size()); } @@ -1957,6 +2148,18 @@ CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, return N == RecordedNodes[RecNo].first; } +/// CheckChildSame - Implements OP_CheckChildXSame. +LLVM_ATTRIBUTE_ALWAYS_INLINE static bool +CheckChildSame(const unsigned char *MatcherTable, unsigned &MatcherIndex, + SDValue N, + const SmallVectorImpl > &RecordedNodes, + unsigned ChildNo) { + if (ChildNo >= N.getNumOperands()) + return false; // Match fails if out of range child #. + return ::CheckSame(MatcherTable, MatcherIndex, N.getOperand(ChildNo), + RecordedNodes); +} + /// CheckPatternPredicate - Implements OP_CheckPatternPredicate. LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex, @@ -1981,24 +2184,23 @@ CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering &TLI) { + SDValue N, const TargetLowering *TLI) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (N.getValueType() == VT) return true; // Handle the case when VT is iPTR. - return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy(); + return VT == MVT::iPTR && N.getValueType() == TLI->getPointerTy(); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering &TLI, + SDValue N, const TargetLowering *TLI, unsigned ChildNo) { if (ChildNo >= N.getNumOperands()) return false; // Match fails if out of range child #. return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI); } - LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, SDValue N) { @@ -2008,13 +2210,13 @@ CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex, LLVM_ATTRIBUTE_ALWAYS_INLINE static bool CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex, - SDValue N, const TargetLowering &TLI) { + SDValue N, const TargetLowering *TLI) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (cast(N)->getVT() == VT) return true; // Handle the case when VT is iPTR. - return VT == MVT::iPTR && cast(N)->getVT() == TLI.getPointerTy(); + return VT == MVT::iPTR && cast(N)->getVT() == TLI->getPointerTy(); } LLVM_ATTRIBUTE_ALWAYS_INLINE static bool @@ -2072,6 +2274,13 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckSame: Result = !::CheckSame(Table, Index, N, RecordedNodes); return Index; + case SelectionDAGISel::OPC_CheckChild0Same: + case SelectionDAGISel::OPC_CheckChild1Same: + case SelectionDAGISel::OPC_CheckChild2Same: + case SelectionDAGISel::OPC_CheckChild3Same: + Result = !::CheckChildSame(Table, Index, N, RecordedNodes, + Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Same); + return Index; case SelectionDAGISel::OPC_CheckPatternPredicate: Result = !::CheckPatternPredicate(Table, Index, SDISel); return Index; @@ -2082,7 +2291,7 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, Result = !::CheckOpcode(Table, Index, N.getNode()); return Index; case SelectionDAGISel::OPC_CheckType: - Result = !::CheckType(Table, Index, N, SDISel.TLI); + Result = !::CheckType(Table, Index, N, SDISel.getTargetLowering()); return Index; case SelectionDAGISel::OPC_CheckChild0Type: case SelectionDAGISel::OPC_CheckChild1Type: @@ -2092,14 +2301,14 @@ static unsigned IsPredicateKnownToFail(const unsigned char *Table, case SelectionDAGISel::OPC_CheckChild5Type: case SelectionDAGISel::OPC_CheckChild6Type: case SelectionDAGISel::OPC_CheckChild7Type: - Result = !::CheckChildType(Table, Index, N, SDISel.TLI, + Result = !::CheckChildType(Table, Index, N, SDISel.getTargetLowering(), Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type); return Index; case SelectionDAGISel::OPC_CheckCondCode: Result = !::CheckCondCode(Table, Index, N); return Index; case SelectionDAGISel::OPC_CheckValueType: - Result = !::CheckValueType(Table, Index, N, SDISel.TLI); + Result = !::CheckValueType(Table, Index, N, SDISel.getTargetLowering()); return Index; case SelectionDAGISel::OPC_CheckInteger: Result = !::CheckInteger(Table, Index, N); @@ -2369,6 +2578,14 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckSame: if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break; continue; + + case OPC_CheckChild0Same: case OPC_CheckChild1Same: + case OPC_CheckChild2Same: case OPC_CheckChild3Same: + if (!::CheckChildSame(MatcherTable, MatcherIndex, N, RecordedNodes, + Opcode-OPC_CheckChild0Same)) + break; + continue; + case OPC_CheckPatternPredicate: if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break; continue; @@ -2392,7 +2609,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, continue; case OPC_CheckType: - if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break; + if (!::CheckType(MatcherTable, MatcherIndex, N, getTargetLowering())) + break; continue; case OPC_SwitchOpcode: { @@ -2427,7 +2645,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, } case OPC_SwitchType: { - MVT CurNodeVT = N.getValueType().getSimpleVT(); + MVT CurNodeVT = N.getSimpleValueType(); unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart; unsigned CaseSize; while (1) { @@ -2439,7 +2657,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MVT CaseVT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; if (CaseVT == MVT::iPTR) - CaseVT = TLI.getPointerTy(); + CaseVT = getTargetLowering()->getPointerTy(); // If the VT matches, then we will execute this case. if (CurNodeVT == CaseVT) @@ -2461,7 +2679,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_CheckChild2Type: case OPC_CheckChild3Type: case OPC_CheckChild4Type: case OPC_CheckChild5Type: case OPC_CheckChild6Type: case OPC_CheckChild7Type: - if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI, + if (!::CheckChildType(MatcherTable, MatcherIndex, N, getTargetLowering(), Opcode-OPC_CheckChild0Type)) break; continue; @@ -2469,7 +2687,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break; continue; case OPC_CheckValueType: - if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break; + if (!::CheckValueType(MatcherTable, MatcherIndex, N, getTargetLowering())) + break; continue; case OPC_CheckInteger: if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break; @@ -2538,7 +2757,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitConvertToTarget: { // Convert from IMM/FPIMM to target version. unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitConvertToTarget"); SDValue Imm = RecordedNodes[RecNo].first; if (Imm->getOpcode() == ISD::Constant) { @@ -2563,7 +2782,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Read all of the chained nodes. unsigned RecNo = Opcode == OPC_EmitMergeInputChains1_1; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched @@ -2600,7 +2819,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, // Read all of the chained nodes. for (unsigned i = 0; i != NumChains; ++i) { unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitMergeInputChains"); ChainNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); // FIXME: What if other value results of the node have uses not matched @@ -2627,13 +2846,13 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitCopyToReg: { unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitCopyToReg"); unsigned DestPhysReg = MatcherTable[MatcherIndex++]; if (InputChain.getNode() == 0) InputChain = CurDAG->getEntryNode(); - InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(), + InputChain = CurDAG->getCopyToReg(InputChain, SDLoc(NodeToMatch), DestPhysReg, RecordedNodes[RecNo].first, InputGlue); @@ -2644,7 +2863,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, case OPC_EmitNodeXForm: { unsigned XFormNo = MatcherTable[MatcherIndex++]; unsigned RecNo = MatcherTable[MatcherIndex++]; - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid EmitNodeXForm"); SDValue Res = RunSDNodeXForm(RecordedNodes[RecNo].first, XFormNo); RecordedNodes.push_back(std::pair(Res, (SDNode*) 0)); continue; @@ -2661,7 +2880,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, for (unsigned i = 0; i != NumVTs; ++i) { MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++]; - if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy; + if (VT == MVT::iPTR) VT = getTargetLowering()->getPointerTy().SimpleTy; VTs.push_back(VT); } @@ -2720,7 +2939,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (Opcode != OPC_MorphNodeTo) { // If this is a normal EmitNode command, just create the new node and // add the results to the RecordedNodes list. - Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(), + Res = CurDAG->getMachineNode(TargetOpc, SDLoc(NodeToMatch), VTList, Ops); // Add all the non-glue/non-chain results to the RecordedNodes list. @@ -2763,8 +2982,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, bool mayStore = MCID.mayStore(); unsigned NumMemRefs = 0; - for (SmallVector::const_iterator I = - MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + for (SmallVectorImpl::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { if ((*I)->isLoad()) { if (mayLoad) ++NumMemRefs; @@ -2780,8 +2999,8 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, MF->allocateMemRefsArray(NumMemRefs); MachineSDNode::mmo_iterator MemRefsPos = MemRefs; - for (SmallVector::const_iterator I = - MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { + for (SmallVectorImpl::const_iterator I = + MatchedMemRefs.begin(), E = MatchedMemRefs.end(); I != E; ++I) { if ((*I)->isLoad()) { if (mayLoad) *MemRefsPos++ = *I; @@ -2821,7 +3040,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (RecNo & 128) RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex); - assert(RecNo < RecordedNodes.size() && "Invalid CheckSame"); + assert(RecNo < RecordedNodes.size() && "Invalid MarkGlueResults"); GlueResultNodesMatched.push_back(RecordedNodes[RecNo].first.getNode()); } continue; @@ -2838,7 +3057,7 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, if (ResSlot & 128) ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex); - assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame"); + assert(ResSlot < RecordedNodes.size() && "Invalid CompleteMatch"); SDValue Res = RecordedNodes[ResSlot].first; assert(i < NodeToMatch->getNumValues() && diff --git a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp index f5fc66c..82b068d 100644 --- a/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/contrib/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -64,13 +64,29 @@ bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, return isUsedByReturnOnly(Node, Chain); } +/// \brief Set CallLoweringInfo attribute flags based on a call instruction +/// and called function attributes. +void TargetLowering::ArgListEntry::setAttributes(ImmutableCallSite *CS, + unsigned AttrIdx) { + isSExt = CS->paramHasAttr(AttrIdx, Attribute::SExt); + isZExt = CS->paramHasAttr(AttrIdx, Attribute::ZExt); + isInReg = CS->paramHasAttr(AttrIdx, Attribute::InReg); + isSRet = CS->paramHasAttr(AttrIdx, Attribute::StructRet); + isNest = CS->paramHasAttr(AttrIdx, Attribute::Nest); + isByVal = CS->paramHasAttr(AttrIdx, Attribute::ByVal); + isReturned = CS->paramHasAttr(AttrIdx, Attribute::Returned); + Alignment = CS->getParamAlignment(AttrIdx); +} /// Generate a libcall taking the given operands as arguments and returning a /// result of type RetVT. -SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, - RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, - bool isSigned, DebugLoc dl) const { +std::pair +TargetLowering::makeLibCall(SelectionDAG &DAG, + RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, SDLoc dl, + bool doesNotReturn, + bool isReturnValueUsed) const { TargetLowering::ArgListTy Args; Args.reserve(NumOps); @@ -89,11 +105,9 @@ SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, false, 0, getLibcallCallingConv(LC), /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair CallInfo = LowerCallTo(CLI); - - return CallInfo.first; + doesNotReturn, isReturnValueUsed, Callee, Args, + DAG, dl); + return LowerCallTo(CLI); } @@ -102,7 +116,7 @@ SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, SDValue &NewLHS, SDValue &NewRHS, ISD::CondCode &CCCode, - DebugLoc dl) const { + SDLoc dl) const { assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) && "Unsupported setcc type!"); @@ -183,14 +197,18 @@ void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, // Use the target specific return value for comparions lib calls. EVT RetVT = getCmpLibcallReturnType(); SDValue Ops[2] = { NewLHS, NewRHS }; - NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, + dl).first; NewRHS = DAG.getConstant(0, RetVT); CCCode = getCmpLibcallCC(LC1); if (LC2 != RTLIB::UNKNOWN_LIBCALL) { - SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), + SDValue Tmp = DAG.getNode(ISD::SETCC, dl, + getSetCCResultType(*DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); - NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), NewLHS, + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, + dl).first; + NewLHS = DAG.getNode(ISD::SETCC, dl, + getSetCCResultType(*DAG.getContext(), RetVT), NewLHS, NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); NewRHS = SDValue(); @@ -262,7 +280,7 @@ TargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { /// constant and return true. bool TargetLowering::TargetLoweringOpt::ShrinkDemandedConstant(SDValue Op, const APInt &Demanded) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // FIXME: ISD::SELECT, ISD::SELECT_CC switch (Op.getOpcode()) { @@ -302,7 +320,7 @@ bool TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, unsigned BitWidth, const APInt &Demanded, - DebugLoc dl) { + SDLoc dl) { assert(Op.getNumOperands() == 2 && "ShrinkDemandedOp only supports binary operators!"); assert(Op.getNode()->getNumValues() == 1 && @@ -356,7 +374,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth && "Mask size mismatches value type size!"); APInt NewMask = DemandedMask; - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Don't know anything. KnownZero = KnownOne = APInt(BitWidth, 0); @@ -508,7 +526,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // into an AND, as we know the bits will be cleared. // e.g. (X | C1) ^ C2 --> (X | C1) & ~C2 iff (C1&C2) == C2 // NB: it is okay if more bits are known than are requested - if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side + if ((NewMask & (KnownZero|KnownOne)) == NewMask) { // all known on one side if (KnownOne == KnownOne2) { // set bits are the same on both sides EVT VT = Op.getValueType(); SDValue ANDC = TLO.DAG.getConstant(~KnownOne & NewMask, VT); @@ -630,6 +648,31 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, TLO.DAG.getNode(ISD::ANY_EXTEND, dl, Op.getValueType(), NarrowShl)); } + // Repeat the SHL optimization above in cases where an extension + // intervenes: (shl (anyext (shr x, c1)), c2) to + // (shl (anyext x), c2-c1). This requires that the bottom c1 bits + // aren't demanded (as above) and that the shifted upper c1 bits of + // x aren't demanded. + if (InOp.hasOneUse() && + InnerOp.getOpcode() == ISD::SRL && + InnerOp.hasOneUse() && + isa(InnerOp.getOperand(1))) { + uint64_t InnerShAmt = cast(InnerOp.getOperand(1)) + ->getZExtValue(); + if (InnerShAmt < ShAmt && + InnerShAmt < InnerBits && + NewMask.lshr(InnerBits - InnerShAmt + ShAmt) == 0 && + NewMask.trunc(ShAmt) == 0) { + SDValue NewSA = + TLO.DAG.getConstant(ShAmt - InnerShAmt, + Op.getOperand(1).getValueType()); + EVT VT = Op.getValueType(); + SDValue NewExt = TLO.DAG.getNode(ISD::ANY_EXTEND, dl, VT, + InnerOp.getOperand(0)); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SHL, dl, VT, + NewExt, NewSA)); + } + } } KnownZero <<= SA->getZExtValue(); @@ -720,13 +763,24 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, // If the input sign bit is known to be zero, or if none of the top bits // are demanded, turn this into an unsigned shift right. - if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) { + if (KnownZero.intersects(SignBit) || (HighBits & ~NewMask) == HighBits) return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, Op.getOperand(0), Op.getOperand(1))); - } else if (KnownOne.intersects(SignBit)) { // New bits are known one. - KnownOne |= HighBits; + + int Log2 = NewMask.exactLogBase2(); + if (Log2 >= 0) { + // The bit must come from the sign. + SDValue NewSA = + TLO.DAG.getConstant(BitWidth - 1 - Log2, + Op.getOperand(1).getValueType()); + return TLO.CombineTo(Op, TLO.DAG.getNode(ISD::SRL, dl, VT, + Op.getOperand(0), NewSA)); } + + if (KnownOne.intersects(SignBit)) + // New bits are known one. + KnownOne |= HighBits; } break; case ISD::SIGN_EXTEND_INREG: { @@ -1066,7 +1120,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) { SDValue TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, bool foldBooleans, - DAGCombinerInfo &DCI, DebugLoc dl) const { + DAGCombinerInfo &DCI, SDLoc dl) const { SelectionDAG &DAG = DCI.DAG; // These setcc operations always fold. @@ -1075,13 +1129,20 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, case ISD::SETFALSE: case ISD::SETFALSE2: return DAG.getConstant(0, VT); case ISD::SETTRUE: - case ISD::SETTRUE2: return DAG.getConstant(1, VT); + case ISD::SETTRUE2: { + TargetLowering::BooleanContent Cnt = getBooleanContents(VT.isVector()); + return DAG.getConstant( + Cnt == TargetLowering::ZeroOrNegativeOneBooleanContent ? -1ULL : 1, VT); + } } // Ensure that the constant occurs on the RHS, and fold constant // comparisons. - if (isa(N0.getNode())) - return DAG.getSetCC(dl, VT, N1, N0, ISD::getSetCCSwappedOperands(Cond)); + ISD::CondCode SwappedCC = ISD::getSetCCSwappedOperands(Cond); + if (isa(N0.getNode()) && + (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(SwappedCC, N0.getSimpleValueType()))) + return DAG.getSetCC(dl, VT, N1, N0, SwappedCC); if (ConstantSDNode *N1C = dyn_cast(N1.getNode())) { const APInt &C1 = N1C->getAPIntValue(); @@ -1160,7 +1221,8 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, } // Make sure we're not losing bits from the constant. - if (MinBits < C1.getBitWidth() && MinBits > C1.getActiveBits()) { + if (MinBits > 0 && + MinBits < C1.getBitWidth() && MinBits >= C1.getActiveBits()) { EVT MinVT = EVT::getIntegerVT(*DAG.getContext(), MinBits); if (isTypeDesirableForOp(ISD::SETCC, MinVT)) { // Will get folded away. @@ -1175,6 +1237,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // the test is for equality or unsigned, and all 1 bits of the const are // in the same partial word, see if we can shorten the load. if (DCI.isBeforeLegalize() && + !ISD::isSignedIntSetCC(Cond) && N0.getOpcode() == ISD::AND && C1 == 0 && N0.getNode()->hasOneUse() && isa(N0.getOperand(0)) && @@ -1319,7 +1382,9 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode CC = cast(N0.getOperand(2))->get(); CC = ISD::getSetCCInverse(CC, N0.getOperand(0).getValueType().isInteger()); - return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(CC, N0.getOperand(0).getSimpleValueType())) + return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC); } if ((N0.getOpcode() == ISD::XOR || @@ -1756,16 +1821,22 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N0.getOperand(0) == N1 || N0.getOperand(1) == N1) { if (ValueHasExactlyOneBitSet(N1, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - SDValue Zero = DAG.getConstant(0, N1.getValueType()); - return DAG.getSetCC(dl, VT, N0, Zero, Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N0.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, N1.getValueType()); + return DAG.getSetCC(dl, VT, N0, Zero, Cond); + } } } if (N1.getOpcode() == ISD::AND) if (N1.getOperand(0) == N0 || N1.getOperand(1) == N0) { if (ValueHasExactlyOneBitSet(N0, DAG)) { Cond = ISD::getSetCCInverse(Cond, /*isInteger=*/true); - SDValue Zero = DAG.getConstant(0, N0.getValueType()); - return DAG.getSetCC(dl, VT, N1, Zero, Cond); + if (DCI.isBeforeLegalizeOps() || + isCondCodeLegal(Cond, N1.getSimpleValueType())) { + SDValue Zero = DAG.getConstant(0, N0.getValueType()); + return DAG.getSetCC(dl, VT, N1, Zero, Cond); + } } } } @@ -1966,7 +2037,7 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, int64_t Offs = GA->getOffset(); if (C) Offs += C->getZExtValue(); Ops.push_back(DAG.getTargetGlobalAddress(GA->getGlobal(), - C ? C->getDebugLoc() : DebugLoc(), + C ? SDLoc(C) : SDLoc(), Op.getValueType(), Offs)); return; } @@ -1989,8 +2060,8 @@ void TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::pair TargetLowering:: getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { - if (Constraint[0] != '{') + MVT VT) const { + if (Constraint.empty() || Constraint[0] != '{') return std::make_pair(0u, static_cast(0)); assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?"); @@ -2139,8 +2210,9 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( break; } } else if (PointerType *PT = dyn_cast(OpTy)) { - OpInfo.ConstraintVT = MVT::getIntegerVT( - 8*getDataLayout()->getPointerSize(PT->getAddressSpace())); + unsigned PtrSize + = getDataLayout()->getPointerSizeInBits(PT->getAddressSpace()); + OpInfo.ConstraintVT = MVT::getIntegerVT(PtrSize); } else { OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } @@ -2435,9 +2507,9 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, } } -/// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication +/// \brief Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. -SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, +SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, SDLoc dl, SelectionDAG &DAG) const { ConstantSDNode *C = cast(Op2); APInt d = C->getAPIntValue(); @@ -2461,7 +2533,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, return DAG.getNode(ISD::MUL, dl, Op1.getValueType(), Op1, Op2); } -/// BuildSDIVSequence - Given an ISD::SDIV node expressing a divide by constant, +/// \brief Given an ISD::SDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// @@ -2469,7 +2541,7 @@ SDValue TargetLowering:: BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, std::vector *Created) const { EVT VT = N->getValueType(0); - DebugLoc dl= N->getDebugLoc(); + SDLoc dl(N); // Check to see if we can do this. // FIXME: We should be more aggressive here. @@ -2521,7 +2593,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, return DAG.getNode(ISD::ADD, dl, VT, Q, T); } -/// BuildUDIVSequence - Given an ISD::UDIV node expressing a divide by constant, +/// \brief Given an ISD::UDIV node expressing a divide by constant, /// return a DAG expression to select that will generate the same value by /// multiplying by a magic number. See: /// @@ -2529,7 +2601,7 @@ SDValue TargetLowering:: BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, std::vector *Created) const { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // Check to see if we can do this. // FIXME: We should be more aggressive here. diff --git a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp b/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp deleted file mode 100644 index 2feea59..0000000 --- a/contrib/llvm/lib/CodeGen/ShrinkWrapping.cpp +++ /dev/null @@ -1,1152 +0,0 @@ -//===-- ShrinkWrapping.cpp - Reduce spills/restores of callee-saved regs --===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a shrink wrapping variant of prolog/epilog insertion: -// - Spills and restores of callee-saved registers (CSRs) are placed in the -// machine CFG to tightly surround their uses so that execution paths that -// do not use CSRs do not pay the spill/restore penalty. -// -// - Avoiding placment of spills/restores in loops: if a CSR is used inside a -// loop the spills are placed in the loop preheader, and restores are -// placed in the loop exit nodes (the successors of loop _exiting_ nodes). -// -// - Covering paths without CSR uses: -// If a region in a CFG uses CSRs and has multiple entry and/or exit points, -// the use info for the CSRs inside the region is propagated outward in the -// CFG to ensure validity of the spill/restore placements. This decreases -// the effectiveness of shrink wrapping but does not require edge splitting -// in the machine CFG. -// -// This shrink wrapping implementation uses an iterative analysis to determine -// which basic blocks require spills and restores for CSRs. -// -// This pass uses MachineDominators and MachineLoopInfo. Loop information -// is used to prevent placement of callee-saved register spills/restores -// in the bodies of loops. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "shrink-wrap" - -#include "PrologEpilogInserter.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include - -using namespace llvm; - -STATISTIC(numSRReduced, "Number of CSR spills+restores reduced."); - -// Shrink Wrapping: -static cl::opt -ShrinkWrapping("shrink-wrap", - cl::desc("Shrink wrap callee-saved register spills/restores")); - -// Shrink wrap only the specified function, a debugging aid. -static cl::opt -ShrinkWrapFunc("shrink-wrap-func", cl::Hidden, - cl::desc("Shrink wrap the specified function"), - cl::value_desc("funcname"), - cl::init("")); - -// Debugging level for shrink wrapping. -enum ShrinkWrapDebugLevel { - Disabled, BasicInfo, Iterations, Details -}; - -static cl::opt -ShrinkWrapDebugging("shrink-wrap-dbg", cl::Hidden, - cl::desc("Print shrink wrapping debugging information"), - cl::values( - clEnumVal(Disabled , "disable debug output"), - clEnumVal(BasicInfo , "print basic DF sets"), - clEnumVal(Iterations, "print SR sets for each iteration"), - clEnumVal(Details , "print all DF sets"), - clEnumValEnd)); - - -void PEI::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - if (ShrinkWrapping || ShrinkWrapFunc != "") { - AU.addRequired(); - AU.addRequired(); - } - AU.addPreserved(); - AU.addPreserved(); - AU.addRequired(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -//===----------------------------------------------------------------------===// -// ShrinkWrapping implementation -//===----------------------------------------------------------------------===// - -// Convienences for dealing with machine loops. -MachineBasicBlock* PEI::getTopLevelLoopPreheader(MachineLoop* LP) { - assert(LP && "Machine loop is NULL."); - MachineBasicBlock* PHDR = LP->getLoopPreheader(); - MachineLoop* PLP = LP->getParentLoop(); - while (PLP) { - PHDR = PLP->getLoopPreheader(); - PLP = PLP->getParentLoop(); - } - return PHDR; -} - -MachineLoop* PEI::getTopLevelLoopParent(MachineLoop *LP) { - if (LP == 0) - return 0; - MachineLoop* PLP = LP->getParentLoop(); - while (PLP) { - LP = PLP; - PLP = PLP->getParentLoop(); - } - return LP; -} - -bool PEI::isReturnBlock(MachineBasicBlock* MBB) { - return (MBB && !MBB->empty() && MBB->back().isReturn()); -} - -// Initialize shrink wrapping DFA sets, called before iterations. -void PEI::clearAnticAvailSets() { - AnticIn.clear(); - AnticOut.clear(); - AvailIn.clear(); - AvailOut.clear(); -} - -// Clear all sets constructed by shrink wrapping. -void PEI::clearAllSets() { - ReturnBlocks.clear(); - clearAnticAvailSets(); - UsedCSRegs.clear(); - CSRUsed.clear(); - TLLoops.clear(); - CSRSave.clear(); - CSRRestore.clear(); -} - -// Initialize all shrink wrapping data. -void PEI::initShrinkWrappingInfo() { - clearAllSets(); - EntryBlock = 0; -#ifndef NDEBUG - HasFastExitPath = false; -#endif - ShrinkWrapThisFunction = ShrinkWrapping; - // DEBUG: enable or disable shrink wrapping for the current function - // via --shrink-wrap-func=. -#ifndef NDEBUG - if (ShrinkWrapFunc != "") { - std::string MFName = MF->getName().str(); - ShrinkWrapThisFunction = (MFName == ShrinkWrapFunc); - } -#endif -} - - -/// placeCSRSpillsAndRestores - determine which MBBs of the function -/// need save, restore code for callee-saved registers by doing a DF analysis -/// similar to the one used in code motion (GVNPRE). This produces maps of MBBs -/// to sets of registers (CSRs) for saves and restores. MachineLoopInfo -/// is used to ensure that CSR save/restore code is not placed inside loops. -/// This function computes the maps of MBBs -> CSRs to spill and restore -/// in CSRSave, CSRRestore. -/// -/// If shrink wrapping is not being performed, place all spills in -/// the entry block, all restores in return blocks. In this case, -/// CSRSave has a single mapping, CSRRestore has mappings for each -/// return block. -/// -void PEI::placeCSRSpillsAndRestores(MachineFunction &Fn) { - - DEBUG(MF = &Fn); - - initShrinkWrappingInfo(); - - DEBUG(if (ShrinkWrapThisFunction) { - dbgs() << "Place CSR spills/restores for " - << MF->getName() << "\n"; - }); - - if (calculateSets(Fn)) - placeSpillsAndRestores(Fn); -} - -/// calcAnticInOut - calculate the anticipated in/out reg sets -/// for the given MBB by looking forward in the MCFG at MBB's -/// successors. -/// -bool PEI::calcAnticInOut(MachineBasicBlock* MBB) { - bool changed = false; - - // AnticOut[MBB] = INTERSECT(AnticIn[S] for S in SUCCESSORS(MBB)) - SmallVector successors; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC != MBB) - successors.push_back(SUCC); - } - - unsigned i = 0, e = successors.size(); - if (i != e) { - CSRegSet prevAnticOut = AnticOut[MBB]; - MachineBasicBlock* SUCC = successors[i]; - - AnticOut[MBB] = AnticIn[SUCC]; - for (++i; i != e; ++i) { - SUCC = successors[i]; - AnticOut[MBB] &= AnticIn[SUCC]; - } - if (prevAnticOut != AnticOut[MBB]) - changed = true; - } - - // AnticIn[MBB] = UNION(CSRUsed[MBB], AnticOut[MBB]); - CSRegSet prevAnticIn = AnticIn[MBB]; - AnticIn[MBB] = CSRUsed[MBB] | AnticOut[MBB]; - if (prevAnticIn != AnticIn[MBB]) - changed = true; - return changed; -} - -/// calcAvailInOut - calculate the available in/out reg sets -/// for the given MBB by looking backward in the MCFG at MBB's -/// predecessors. -/// -bool PEI::calcAvailInOut(MachineBasicBlock* MBB) { - bool changed = false; - - // AvailIn[MBB] = INTERSECT(AvailOut[P] for P in PREDECESSORS(MBB)) - SmallVector predecessors; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED != MBB) - predecessors.push_back(PRED); - } - - unsigned i = 0, e = predecessors.size(); - if (i != e) { - CSRegSet prevAvailIn = AvailIn[MBB]; - MachineBasicBlock* PRED = predecessors[i]; - - AvailIn[MBB] = AvailOut[PRED]; - for (++i; i != e; ++i) { - PRED = predecessors[i]; - AvailIn[MBB] &= AvailOut[PRED]; - } - if (prevAvailIn != AvailIn[MBB]) - changed = true; - } - - // AvailOut[MBB] = UNION(CSRUsed[MBB], AvailIn[MBB]); - CSRegSet prevAvailOut = AvailOut[MBB]; - AvailOut[MBB] = CSRUsed[MBB] | AvailIn[MBB]; - if (prevAvailOut != AvailOut[MBB]) - changed = true; - return changed; -} - -/// calculateAnticAvail - build the sets anticipated and available -/// registers in the MCFG of the current function iteratively, -/// doing a combined forward and backward analysis. -/// -void PEI::calculateAnticAvail(MachineFunction &Fn) { - // Initialize data flow sets. - clearAnticAvailSets(); - - // Calculate Antic{In,Out} and Avail{In,Out} iteratively on the MCFG. - bool changed = true; - unsigned iterations = 0; - while (changed) { - changed = false; - ++iterations; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - - // Calculate anticipated in, out regs at MBB from - // anticipated at successors of MBB. - changed |= calcAnticInOut(MBB); - - // Calculate available in, out regs at MBB from - // available at predecessors of MBB. - changed |= calcAvailInOut(MBB); - } - } - - DEBUG({ - if (ShrinkWrapDebugging >= Details) { - dbgs() - << "-----------------------------------------------------------\n" - << " Antic/Avail Sets:\n" - << "-----------------------------------------------------------\n" - << "iterations = " << iterations << "\n" - << "-----------------------------------------------------------\n" - << "MBB | USED | ANTIC_IN | ANTIC_OUT | AVAIL_IN | AVAIL_OUT\n" - << "-----------------------------------------------------------\n"; - - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets(MBB); - } - - dbgs() - << "-----------------------------------------------------------\n"; - } - }); -} - -/// propagateUsesAroundLoop - copy used register info from MBB to all blocks -/// of the loop given by LP and its parent loops. This prevents spills/restores -/// from being placed in the bodies of loops. -/// -void PEI::propagateUsesAroundLoop(MachineBasicBlock* MBB, MachineLoop* LP) { - if (! MBB || !LP) - return; - - std::vector loopBlocks = LP->getBlocks(); - for (unsigned i = 0, e = loopBlocks.size(); i != e; ++i) { - MachineBasicBlock* LBB = loopBlocks[i]; - if (LBB == MBB) - continue; - if (CSRUsed[LBB].contains(CSRUsed[MBB])) - continue; - CSRUsed[LBB] |= CSRUsed[MBB]; - } -} - -/// calculateSets - collect the CSRs used in this function, compute -/// the DF sets that describe the initial minimal regions in the -/// Machine CFG around which CSR spills and restores must be placed. -/// -/// Additionally, this function decides if shrink wrapping should -/// be disabled for the current function, checking the following: -/// 1. the current function has more than 500 MBBs: heuristic limit -/// on function size to reduce compile time impact of the current -/// iterative algorithm. -/// 2. all CSRs are used in the entry block. -/// 3. all CSRs are used in all immediate successors of the entry block. -/// 4. all CSRs are used in a subset of blocks, each of which dominates -/// all return blocks. These blocks, taken as a subgraph of the MCFG, -/// are equivalent to the entry block since all execution paths pass -/// through them. -/// -bool PEI::calculateSets(MachineFunction &Fn) { - // Sets used to compute spill, restore placement sets. - const std::vector CSI = - Fn.getFrameInfo()->getCalleeSavedInfo(); - - // If no CSRs used, we are done. - if (CSI.empty()) { - DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getName() - << ": uses no callee-saved registers\n"); - return false; - } - - // Save refs to entry and return blocks. - EntryBlock = Fn.begin(); - for (MachineFunction::iterator MBB = Fn.begin(), E = Fn.end(); - MBB != E; ++MBB) - if (isReturnBlock(MBB)) - ReturnBlocks.push_back(MBB); - - // Determine if this function has fast exit paths. - DEBUG(if (ShrinkWrapThisFunction) - findFastExitPath()); - - // Limit shrink wrapping via the current iterative bit vector - // implementation to functions with <= 500 MBBs. - if (Fn.size() > 500) { - DEBUG(if (ShrinkWrapThisFunction) - dbgs() << "DISABLED: " << Fn.getName() - << ": too large (" << Fn.size() << " MBBs)\n"); - ShrinkWrapThisFunction = false; - } - - // Return now if not shrink wrapping. - if (! ShrinkWrapThisFunction) - return false; - - // Collect set of used CSRs. - for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { - UsedCSRegs.set(inx); - } - - // Walk instructions in all MBBs, create CSRUsed[] sets, choose - // whether or not to shrink wrap this function. - MachineLoopInfo &LI = getAnalysis(); - MachineDominatorTree &DT = getAnalysis(); - const TargetRegisterInfo *TRI = Fn.getTarget().getRegisterInfo(); - - bool allCSRUsesInEntryBlock = true; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - for (MachineBasicBlock::iterator I = MBB->begin(); I != MBB->end(); ++I) { - for (unsigned inx = 0, e = CSI.size(); inx != e; ++inx) { - unsigned Reg = CSI[inx].getReg(); - // If instruction I reads or modifies Reg, add it to UsedCSRegs, - // CSRUsed map for the current block. - for (unsigned opInx = 0, opEnd = I->getNumOperands(); - opInx != opEnd; ++opInx) { - const MachineOperand &MO = I->getOperand(opInx); - if (! (MO.isReg() && (MO.isUse() || MO.isDef()))) - continue; - unsigned MOReg = MO.getReg(); - if (!MOReg) - continue; - if (MOReg == Reg || - (TargetRegisterInfo::isPhysicalRegister(MOReg) && - TargetRegisterInfo::isPhysicalRegister(Reg) && - TRI->isSubRegister(Reg, MOReg))) { - // CSR Reg is defined/used in block MBB. - CSRUsed[MBB].set(inx); - // Check for uses in EntryBlock. - if (MBB != EntryBlock) - allCSRUsesInEntryBlock = false; - } - } - } - } - - if (CSRUsed[MBB].empty()) - continue; - - // Propagate CSRUsed[MBB] in loops - if (MachineLoop* LP = LI.getLoopFor(MBB)) { - // Add top level loop to work list. - MachineBasicBlock* HDR = getTopLevelLoopPreheader(LP); - MachineLoop* PLP = getTopLevelLoopParent(LP); - - if (! HDR) { - HDR = PLP->getHeader(); - assert(HDR->pred_size() > 0 && "Loop header has no predecessors?"); - MachineBasicBlock::pred_iterator PI = HDR->pred_begin(); - HDR = *PI; - } - TLLoops[HDR] = PLP; - - // Push uses from inside loop to its parent loops, - // or to all other MBBs in its loop. - if (LP->getLoopDepth() > 1) { - for (MachineLoop* PLP = LP->getParentLoop(); PLP; - PLP = PLP->getParentLoop()) { - propagateUsesAroundLoop(MBB, PLP); - } - } else { - propagateUsesAroundLoop(MBB, LP); - } - } - } - - if (allCSRUsesInEntryBlock) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in EntryBlock\n"); - ShrinkWrapThisFunction = false; - } else { - bool allCSRsUsedInEntryFanout = true; - for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), - SE = EntryBlock->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (CSRUsed[SUCC] != UsedCSRegs) - allCSRsUsedInEntryFanout = false; - } - if (allCSRsUsedInEntryFanout) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in imm successors of EntryBlock\n"); - ShrinkWrapThisFunction = false; - } - } - - if (ShrinkWrapThisFunction) { - // Check if MBB uses CSRs and dominates all exit nodes. - // Such nodes are equiv. to the entry node w.r.t. - // CSR uses: every path through the function must - // pass through this node. If each CSR is used at least - // once by these nodes, shrink wrapping is disabled. - CSRegSet CSRUsedInChokePoints; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - if (MBB == EntryBlock || CSRUsed[MBB].empty() || MBB->succ_size() < 1) - continue; - bool dominatesExitNodes = true; - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) - if (! DT.dominates(MBB, ReturnBlocks[ri])) { - dominatesExitNodes = false; - break; - } - if (dominatesExitNodes) { - CSRUsedInChokePoints |= CSRUsed[MBB]; - if (CSRUsedInChokePoints == UsedCSRegs) { - DEBUG(dbgs() << "DISABLED: " << Fn.getName() - << ": all CSRs used in choke point(s) at " - << getBasicBlockName(MBB) << "\n"); - ShrinkWrapThisFunction = false; - break; - } - } - } - } - - // Return now if we have decided not to apply shrink wrapping - // to the current function. - if (! ShrinkWrapThisFunction) - return false; - - DEBUG({ - dbgs() << "ENABLED: " << Fn.getName(); - if (HasFastExitPath) - dbgs() << " (fast exit path)"; - dbgs() << "\n"; - if (ShrinkWrapDebugging >= BasicInfo) { - dbgs() << "------------------------------" - << "-----------------------------\n"; - dbgs() << "UsedCSRegs = " << stringifyCSRegSet(UsedCSRegs) << "\n"; - if (ShrinkWrapDebugging >= Details) { - dbgs() << "------------------------------" - << "-----------------------------\n"; - dumpAllUsed(); - } - } - }); - - // Build initial DF sets to determine minimal regions in the - // Machine CFG around which CSRs must be spilled and restored. - calculateAnticAvail(Fn); - - return true; -} - -/// addUsesForMEMERegion - add uses of CSRs spilled or restored in -/// multi-entry, multi-exit (MEME) regions so spill and restore -/// placement will not break code that enters or leaves a -/// shrink-wrapped region by inducing spills with no matching -/// restores or restores with no matching spills. A MEME region -/// is a subgraph of the MCFG with multiple entry edges, multiple -/// exit edges, or both. This code propagates use information -/// through the MCFG until all paths requiring spills and restores -/// _outside_ the computed minimal placement regions have been covered. -/// -bool PEI::addUsesForMEMERegion(MachineBasicBlock* MBB, - SmallVector& blks) { - if (MBB->succ_size() < 2 && MBB->pred_size() < 2) { - bool processThisBlock = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC->pred_size() > 1) { - processThisBlock = true; - break; - } - } - if (!CSRRestore[MBB].empty() && MBB->succ_size() > 0) { - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED->succ_size() > 1) { - processThisBlock = true; - break; - } - } - } - if (! processThisBlock) - return false; - } - - CSRegSet prop; - if (!CSRSave[MBB].empty()) - prop = CSRSave[MBB]; - else if (!CSRRestore[MBB].empty()) - prop = CSRRestore[MBB]; - else - prop = CSRUsed[MBB]; - if (prop.empty()) - return false; - - // Propagate selected bits to successors, predecessors of MBB. - bool addedUses = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - // Self-loop - if (SUCC == MBB) - continue; - if (! CSRUsed[SUCC].contains(prop)) { - CSRUsed[SUCC] |= prop; - addedUses = true; - blks.push_back(SUCC); - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(prop) << ")->" - << "successor " << getBasicBlockName(SUCC) << "\n"); - } - } - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - // Self-loop - if (PRED == MBB) - continue; - if (! CSRUsed[PRED].contains(prop)) { - CSRUsed[PRED] |= prop; - addedUses = true; - blks.push_back(PRED); - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(prop) << ")->" - << "predecessor " << getBasicBlockName(PRED) << "\n"); - } - } - return addedUses; -} - -/// addUsesForTopLevelLoops - add uses for CSRs used inside top -/// level loops to the exit blocks of those loops. -/// -bool PEI::addUsesForTopLevelLoops(SmallVector& blks) { - bool addedUses = false; - - // Place restores for top level loops where needed. - for (DenseMap::iterator - I = TLLoops.begin(), E = TLLoops.end(); I != E; ++I) { - MachineBasicBlock* MBB = I->first; - MachineLoop* LP = I->second; - MachineBasicBlock* HDR = LP->getHeader(); - SmallVector exitBlocks; - CSRegSet loopSpills; - - loopSpills = CSRSave[MBB]; - if (CSRSave[MBB].empty()) { - loopSpills = CSRUsed[HDR]; - assert(!loopSpills.empty() && "No CSRs used in loop?"); - } else if (CSRRestore[MBB].contains(CSRSave[MBB])) - continue; - - LP->getExitBlocks(exitBlocks); - assert(exitBlocks.size() > 0 && "Loop has no top level exit blocks?"); - for (unsigned i = 0, e = exitBlocks.size(); i != e; ++i) { - MachineBasicBlock* EXB = exitBlocks[i]; - if (! CSRUsed[EXB].contains(loopSpills)) { - CSRUsed[EXB] |= loopSpills; - addedUses = true; - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << "LOOP " << getBasicBlockName(MBB) - << "(" << stringifyCSRegSet(loopSpills) << ")->" - << getBasicBlockName(EXB) << "\n"); - if (EXB->succ_size() > 1 || EXB->pred_size() > 1) - blks.push_back(EXB); - } - } - } - return addedUses; -} - -/// calcSpillPlacements - determine which CSRs should be spilled -/// in MBB using AnticIn sets of MBB's predecessors, keeping track -/// of changes to spilled reg sets. Add MBB to the set of blocks -/// that need to be processed for propagating use info to cover -/// multi-entry/exit regions. -/// -bool PEI::calcSpillPlacements(MachineBasicBlock* MBB, - SmallVector &blks, - CSRegBlockMap &prevSpills) { - bool placedSpills = false; - // Intersect (CSRegs - AnticIn[P]) for P in Predecessors(MBB) - CSRegSet anticInPreds; - SmallVector predecessors; - for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), - PE = MBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock* PRED = *PI; - if (PRED != MBB) - predecessors.push_back(PRED); - } - unsigned i = 0, e = predecessors.size(); - if (i != e) { - MachineBasicBlock* PRED = predecessors[i]; - anticInPreds = UsedCSRegs - AnticIn[PRED]; - for (++i; i != e; ++i) { - PRED = predecessors[i]; - anticInPreds &= (UsedCSRegs - AnticIn[PRED]); - } - } else { - // Handle uses in entry blocks (which have no predecessors). - // This is necessary because the DFA formulation assumes the - // entry and (multiple) exit nodes cannot have CSR uses, which - // is not the case in the real world. - anticInPreds = UsedCSRegs; - } - // Compute spills required at MBB: - CSRSave[MBB] |= (AnticIn[MBB] - AvailIn[MBB]) & anticInPreds; - - if (! CSRSave[MBB].empty()) { - if (MBB == EntryBlock) { - for (unsigned ri = 0, re = ReturnBlocks.size(); ri != re; ++ri) - CSRRestore[ReturnBlocks[ri]] |= CSRSave[MBB]; - } else { - // Reset all regs spilled in MBB that are also spilled in EntryBlock. - if (CSRSave[EntryBlock].intersects(CSRSave[MBB])) { - CSRSave[MBB] = CSRSave[MBB] - CSRSave[EntryBlock]; - } - } - } - placedSpills = (CSRSave[MBB] != prevSpills[MBB]); - prevSpills[MBB] = CSRSave[MBB]; - // Remember this block for adding restores to successor - // blocks for multi-entry region. - if (placedSpills) - blks.push_back(MBB); - - DEBUG(if (! CSRSave[MBB].empty() && ShrinkWrapDebugging >= Iterations) - dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) << "\n"); - - return placedSpills; -} - -/// calcRestorePlacements - determine which CSRs should be restored -/// in MBB using AvailOut sets of MBB's succcessors, keeping track -/// of changes to restored reg sets. Add MBB to the set of blocks -/// that need to be processed for propagating use info to cover -/// multi-entry/exit regions. -/// -bool PEI::calcRestorePlacements(MachineBasicBlock* MBB, - SmallVector &blks, - CSRegBlockMap &prevRestores) { - bool placedRestores = false; - // Intersect (CSRegs - AvailOut[S]) for S in Successors(MBB) - CSRegSet availOutSucc; - SmallVector successors; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - if (SUCC != MBB) - successors.push_back(SUCC); - } - unsigned i = 0, e = successors.size(); - if (i != e) { - MachineBasicBlock* SUCC = successors[i]; - availOutSucc = UsedCSRegs - AvailOut[SUCC]; - for (++i; i != e; ++i) { - SUCC = successors[i]; - availOutSucc &= (UsedCSRegs - AvailOut[SUCC]); - } - } else { - if (! CSRUsed[MBB].empty() || ! AvailOut[MBB].empty()) { - // Handle uses in return blocks (which have no successors). - // This is necessary because the DFA formulation assumes the - // entry and (multiple) exit nodes cannot have CSR uses, which - // is not the case in the real world. - availOutSucc = UsedCSRegs; - } - } - // Compute restores required at MBB: - CSRRestore[MBB] |= (AvailOut[MBB] - AnticOut[MBB]) & availOutSucc; - - // Postprocess restore placements at MBB. - // Remove the CSRs that are restored in the return blocks. - // Lest this be confusing, note that: - // CSRSave[EntryBlock] == CSRRestore[B] for all B in ReturnBlocks. - if (MBB->succ_size() && ! CSRRestore[MBB].empty()) { - if (! CSRSave[EntryBlock].empty()) - CSRRestore[MBB] = CSRRestore[MBB] - CSRSave[EntryBlock]; - } - placedRestores = (CSRRestore[MBB] != prevRestores[MBB]); - prevRestores[MBB] = CSRRestore[MBB]; - // Remember this block for adding saves to predecessor - // blocks for multi-entry region. - if (placedRestores) - blks.push_back(MBB); - - DEBUG(if (! CSRRestore[MBB].empty() && ShrinkWrapDebugging >= Iterations) - dbgs() << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); - - return placedRestores; -} - -/// placeSpillsAndRestores - place spills and restores of CSRs -/// used in MBBs in minimal regions that contain the uses. -/// -void PEI::placeSpillsAndRestores(MachineFunction &Fn) { - CSRegBlockMap prevCSRSave; - CSRegBlockMap prevCSRRestore; - SmallVector cvBlocks, ncvBlocks; - bool changed = true; - unsigned iterations = 0; - - // Iterate computation of spill and restore placements in the MCFG until: - // 1. CSR use info has been fully propagated around the MCFG, and - // 2. computation of CSRSave[], CSRRestore[] reach fixed points. - while (changed) { - changed = false; - ++iterations; - - DEBUG(if (ShrinkWrapDebugging >= Iterations) - dbgs() << "iter " << iterations - << " --------------------------------------------------\n"); - - // Calculate CSR{Save,Restore} sets using Antic, Avail on the MCFG, - // which determines the placements of spills and restores. - // Keep track of changes to spills, restores in each iteration to - // minimize the total iterations. - bool SRChanged = false; - for (MachineFunction::iterator MBBI = Fn.begin(), MBBE = Fn.end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - - // Place spills for CSRs in MBB. - SRChanged |= calcSpillPlacements(MBB, cvBlocks, prevCSRSave); - - // Place restores for CSRs in MBB. - SRChanged |= calcRestorePlacements(MBB, cvBlocks, prevCSRRestore); - } - - // Add uses of CSRs used inside loops where needed. - changed |= addUsesForTopLevelLoops(cvBlocks); - - // Add uses for CSRs spilled or restored at branch, join points. - if (changed || SRChanged) { - while (! cvBlocks.empty()) { - MachineBasicBlock* MBB = cvBlocks.pop_back_val(); - changed |= addUsesForMEMERegion(MBB, ncvBlocks); - } - if (! ncvBlocks.empty()) { - cvBlocks = ncvBlocks; - ncvBlocks.clear(); - } - } - - if (changed) { - calculateAnticAvail(Fn); - CSRSave.clear(); - CSRRestore.clear(); - } - } - - // Check for effectiveness: - // SR0 = {r | r in CSRSave[EntryBlock], CSRRestore[RB], RB in ReturnBlocks} - // numSRReduced = |(UsedCSRegs - SR0)|, approx. SR0 by CSRSave[EntryBlock] - // Gives a measure of how many CSR spills have been moved from EntryBlock - // to minimal regions enclosing their uses. - CSRegSet notSpilledInEntryBlock = (UsedCSRegs - CSRSave[EntryBlock]); - unsigned numSRReducedThisFunc = notSpilledInEntryBlock.count(); - numSRReduced += numSRReducedThisFunc; - DEBUG(if (ShrinkWrapDebugging >= BasicInfo) { - dbgs() << "-----------------------------------------------------------\n"; - dbgs() << "total iterations = " << iterations << " ( " - << Fn.getName() - << " " << numSRReducedThisFunc - << " " << Fn.size() - << " )\n"; - dbgs() << "-----------------------------------------------------------\n"; - dumpSRSets(); - dbgs() << "-----------------------------------------------------------\n"; - if (numSRReducedThisFunc) - verifySpillRestorePlacement(); - }); -} - -// Debugging methods. -#ifndef NDEBUG -/// findFastExitPath - debugging method used to detect functions -/// with at least one path from the entry block to a return block -/// directly or which has a very small number of edges. -/// -void PEI::findFastExitPath() { - if (! EntryBlock) - return; - // Fina a path from EntryBlock to any return block that does not branch: - // Entry - // | ... - // v | - // B1<-----+ - // | - // v - // Return - for (MachineBasicBlock::succ_iterator SI = EntryBlock->succ_begin(), - SE = EntryBlock->succ_end(); SI != SE; ++SI) { - MachineBasicBlock* SUCC = *SI; - - // Assume positive, disprove existence of fast path. - HasFastExitPath = true; - - // Check the immediate successors. - if (isReturnBlock(SUCC)) { - if (ShrinkWrapDebugging >= BasicInfo) - dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) - << "->" << getBasicBlockName(SUCC) << "\n"; - break; - } - // Traverse df from SUCC, look for a branch block. - std::string exitPath = getBasicBlockName(SUCC); - for (df_iterator BI = df_begin(SUCC), - BE = df_end(SUCC); BI != BE; ++BI) { - MachineBasicBlock* SBB = *BI; - // Reject paths with branch nodes. - if (SBB->succ_size() > 1) { - HasFastExitPath = false; - break; - } - exitPath += "->" + getBasicBlockName(SBB); - } - if (HasFastExitPath) { - if (ShrinkWrapDebugging >= BasicInfo) - dbgs() << "Fast exit path: " << getBasicBlockName(EntryBlock) - << "->" << exitPath << "\n"; - break; - } - } -} - -/// verifySpillRestorePlacement - check the current spill/restore -/// sets for safety. Attempt to find spills without restores or -/// restores without spills. -/// Spills: walk df from each MBB in spill set ensuring that -/// all CSRs spilled at MMBB are restored on all paths -/// from MBB to all exit blocks. -/// Restores: walk idf from each MBB in restore set ensuring that -/// all CSRs restored at MBB are spilled on all paths -/// reaching MBB. -/// -void PEI::verifySpillRestorePlacement() { - unsigned numReturnBlocks = 0; - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - if (isReturnBlock(MBB) || MBB->succ_size() == 0) - ++numReturnBlocks; - } - for (CSRegBlockMap::iterator BI = CSRSave.begin(), - BE = CSRSave.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet spilled = BI->second; - CSRegSet restored; - - if (spilled.empty()) - continue; - - DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(spilled) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"); - - if (CSRRestore[MBB].intersects(spilled)) { - restored |= (CSRRestore[MBB] & spilled); - } - - // Walk depth first from MBB to find restores of all CSRs spilled at MBB: - // we must find restores for all spills w/no intervening spills on all - // paths from MBB to all return blocks. - for (df_iterator BI = df_begin(MBB), - BE = df_end(MBB); BI != BE; ++BI) { - MachineBasicBlock* SBB = *BI; - if (SBB == MBB) - continue; - // Stop when we encounter spills of any CSRs spilled at MBB that - // have not yet been seen to be restored. - if (CSRSave[SBB].intersects(spilled) && - !restored.contains(CSRSave[SBB] & spilled)) - break; - // Collect the CSRs spilled at MBB that are restored - // at this DF successor of MBB. - if (CSRRestore[SBB].intersects(spilled)) - restored |= (CSRRestore[SBB] & spilled); - // If we are at a retun block, check that the restores - // we have seen so far exhaust the spills at MBB, then - // reset the restores. - if (isReturnBlock(SBB) || SBB->succ_size() == 0) { - if (restored != spilled) { - CSRegSet notRestored = (spilled - restored); - DEBUG(dbgs() << MF->getName() << ": " - << stringifyCSRegSet(notRestored) - << " spilled at " << getBasicBlockName(MBB) - << " are never restored on path to return " - << getBasicBlockName(SBB) << "\n"); - } - restored.clear(); - } - } - } - - // Check restore placements. - for (CSRegBlockMap::iterator BI = CSRRestore.begin(), - BE = CSRRestore.end(); BI != BE; ++BI) { - MachineBasicBlock* MBB = BI->first; - CSRegSet restored = BI->second; - CSRegSet spilled; - - if (restored.empty()) - continue; - - DEBUG(dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]) - << " RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(restored) << "\n"); - - if (CSRSave[MBB].intersects(restored)) { - spilled |= (CSRSave[MBB] & restored); - } - // Walk inverse depth first from MBB to find spills of all - // CSRs restored at MBB: - for (idf_iterator BI = idf_begin(MBB), - BE = idf_end(MBB); BI != BE; ++BI) { - MachineBasicBlock* PBB = *BI; - if (PBB == MBB) - continue; - // Stop when we encounter restores of any CSRs restored at MBB that - // have not yet been seen to be spilled. - if (CSRRestore[PBB].intersects(restored) && - !spilled.contains(CSRRestore[PBB] & restored)) - break; - // Collect the CSRs restored at MBB that are spilled - // at this DF predecessor of MBB. - if (CSRSave[PBB].intersects(restored)) - spilled |= (CSRSave[PBB] & restored); - } - if (spilled != restored) { - CSRegSet notSpilled = (restored - spilled); - DEBUG(dbgs() << MF->getName() << ": " - << stringifyCSRegSet(notSpilled) - << " restored at " << getBasicBlockName(MBB) - << " are never spilled\n"); - } - } -} - -// Debugging print methods. -std::string PEI::getBasicBlockName(const MachineBasicBlock* MBB) { - if (!MBB) - return ""; - - if (MBB->getBasicBlock()) - return MBB->getBasicBlock()->getName().str(); - - std::ostringstream name; - name << "_MBB_" << MBB->getNumber(); - return name.str(); -} - -std::string PEI::stringifyCSRegSet(const CSRegSet& s) { - const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo(); - const std::vector CSI = - MF->getFrameInfo()->getCalleeSavedInfo(); - - std::ostringstream srep; - if (CSI.size() == 0) { - srep << "[]"; - return srep.str(); - } - srep << "["; - CSRegSet::iterator I = s.begin(), E = s.end(); - if (I != E) { - unsigned reg = CSI[*I].getReg(); - srep << TRI->getName(reg); - for (++I; I != E; ++I) { - reg = CSI[*I].getReg(); - srep << ","; - srep << TRI->getName(reg); - } - } - srep << "]"; - return srep.str(); -} - -void PEI::dumpSet(const CSRegSet& s) { - DEBUG(dbgs() << stringifyCSRegSet(s) << "\n"); -} - -void PEI::dumpUsed(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << "CSRUsed[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRUsed[MBB]) << "\n"; - }); -} - -void PEI::dumpAllUsed() { - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpUsed(MBB); - } -} - -void PEI::dumpSets(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << "\n"; - }); -} - -void PEI::dumpSets1(MachineBasicBlock* MBB) { - DEBUG({ - if (MBB) - dbgs() << getBasicBlockName(MBB) << " | " - << stringifyCSRegSet(CSRUsed[MBB]) << " | " - << stringifyCSRegSet(AnticIn[MBB]) << " | " - << stringifyCSRegSet(AnticOut[MBB]) << " | " - << stringifyCSRegSet(AvailIn[MBB]) << " | " - << stringifyCSRegSet(AvailOut[MBB]) << " | " - << stringifyCSRegSet(CSRSave[MBB]) << " | " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - }); -} - -void PEI::dumpAllSets() { - for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); - MBBI != MBBE; ++MBBI) { - MachineBasicBlock* MBB = MBBI; - dumpSets1(MBB); - } -} - -void PEI::dumpSRSets() { - DEBUG({ - for (MachineFunction::iterator MBB = MF->begin(), E = MF->end(); - MBB != E; ++MBB) { - if (!CSRSave[MBB].empty()) { - dbgs() << "SAVE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRSave[MBB]); - if (CSRRestore[MBB].empty()) - dbgs() << '\n'; - } - - if (!CSRRestore[MBB].empty() && !CSRSave[MBB].empty()) - dbgs() << " " - << "RESTORE[" << getBasicBlockName(MBB) << "] = " - << stringifyCSRegSet(CSRRestore[MBB]) << "\n"; - } - }); -} -#endif diff --git a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp index 3903743..da2e710 100644 --- a/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp +++ b/contrib/llvm/lib/CodeGen/SjLjEHPrepare.cpp @@ -42,48 +42,47 @@ STATISTIC(NumInvokes, "Number of invokes replaced"); STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { - class SjLjEHPrepare : public FunctionPass { - const TargetLoweringBase *TLI; - Type *FunctionContextTy; - Constant *RegisterFn; - Constant *UnregisterFn; - Constant *BuiltinSetjmpFn; - Constant *FrameAddrFn; - Constant *StackAddrFn; - Constant *StackRestoreFn; - Constant *LSDAAddrFn; - Value *PersonalityFn; - Constant *CallSiteFn; - Constant *FuncCtxFn; - AllocaInst *FuncCtx; - public: - static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL) - : FunctionPass(ID), TLI(tli) { } - bool doInitialization(Module &M); - bool runOnFunction(Function &F); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const {} - const char *getPassName() const { - return "SJLJ Exception Handling preparation"; - } +class SjLjEHPrepare : public FunctionPass { + const TargetMachine *TM; + Type *FunctionContextTy; + Constant *RegisterFn; + Constant *UnregisterFn; + Constant *BuiltinSetjmpFn; + Constant *FrameAddrFn; + Constant *StackAddrFn; + Constant *StackRestoreFn; + Constant *LSDAAddrFn; + Value *PersonalityFn; + Constant *CallSiteFn; + Constant *FuncCtxFn; + AllocaInst *FuncCtx; + +public: + static char ID; // Pass identification, replacement for typeid + explicit SjLjEHPrepare(const TargetMachine *TM) : FunctionPass(ID), TM(TM) {} + bool doInitialization(Module &M); + bool runOnFunction(Function &F); + + virtual void getAnalysisUsage(AnalysisUsage &AU) const {} + const char *getPassName() const { + return "SJLJ Exception Handling preparation"; + } - private: - bool setupEntryBlockAndCallSites(Function &F); - void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, - Value *SelVal); - Value *setupFunctionContext(Function &F, ArrayRef LPads); - void lowerIncomingArguments(Function &F); - void lowerAcrossUnwindEdges(Function &F, ArrayRef Invokes); - void insertCallSiteStore(Instruction *I, int Number); - }; +private: + bool setupEntryBlockAndCallSites(Function &F); + void substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal); + Value *setupFunctionContext(Function &F, ArrayRef LPads); + void lowerIncomingArguments(Function &F); + void lowerAcrossUnwindEdges(Function &F, ArrayRef Invokes); + void insertCallSiteStore(Instruction *I, int Number); +}; } // end anonymous namespace char SjLjEHPrepare::ID = 0; // Public Interface To the SjLjEHPrepare pass. -FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) { - return new SjLjEHPrepare(TLI); +FunctionPass *llvm::createSjLjEHPreparePass(const TargetMachine *TM) { + return new SjLjEHPrepare(TM); } // doInitialization - Set up decalarations and types needed to process // exceptions. @@ -92,23 +91,19 @@ bool SjLjEHPrepare::doInitialization(Module &M) { // builtin_setjmp uses a five word jbuf Type *VoidPtrTy = Type::getInt8PtrTy(M.getContext()); Type *Int32Ty = Type::getInt32Ty(M.getContext()); - FunctionContextTy = - StructType::get(VoidPtrTy, // __prev - Int32Ty, // call_site - ArrayType::get(Int32Ty, 4), // __data - VoidPtrTy, // __personality - VoidPtrTy, // __lsda - ArrayType::get(VoidPtrTy, 5), // __jbuf - NULL); - RegisterFn = M.getOrInsertFunction("_Unwind_SjLj_Register", - Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), - (Type *)0); - UnregisterFn = - M.getOrInsertFunction("_Unwind_SjLj_Unregister", - Type::getVoidTy(M.getContext()), - PointerType::getUnqual(FunctionContextTy), - (Type *)0); + FunctionContextTy = StructType::get(VoidPtrTy, // __prev + Int32Ty, // call_site + ArrayType::get(Int32Ty, 4), // __data + VoidPtrTy, // __personality + VoidPtrTy, // __lsda + ArrayType::get(VoidPtrTy, 5), // __jbuf + NULL); + RegisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Register", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), (Type *)0); + UnregisterFn = M.getOrInsertFunction( + "_Unwind_SjLj_Unregister", Type::getVoidTy(M.getContext()), + PointerType::getUnqual(FunctionContextTy), (Type *)0); FrameAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::frameaddress); StackAddrFn = Intrinsic::getDeclaration(&M, Intrinsic::stacksave); StackRestoreFn = Intrinsic::getDeclaration(&M, Intrinsic::stackrestore); @@ -134,16 +129,17 @@ void SjLjEHPrepare::insertCallSiteStore(Instruction *I, int Number) { Value *CallSite = Builder.CreateGEP(FuncCtx, Idxs, "call_site"); // Insert a store of the call-site number - ConstantInt *CallSiteNoC = ConstantInt::get(Type::getInt32Ty(I->getContext()), - Number); - Builder.CreateStore(CallSiteNoC, CallSite, true/*volatile*/); + ConstantInt *CallSiteNoC = + ConstantInt::get(Type::getInt32Ty(I->getContext()), Number); + Builder.CreateStore(CallSiteNoC, CallSite, true /*volatile*/); } /// MarkBlocksLiveIn - Insert BB and all of its predescessors into LiveBBs until /// we reach blocks we've already seen. static void MarkBlocksLiveIn(BasicBlock *BB, - SmallPtrSet &LiveBBs) { - if (!LiveBBs.insert(BB)) return; // already been here. + SmallPtrSet &LiveBBs) { + if (!LiveBBs.insert(BB)) + return; // already been here. for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) MarkBlocksLiveIn(*PI, LiveBBs); @@ -153,12 +149,14 @@ static void MarkBlocksLiveIn(BasicBlock *BB, /// instruction with those returned by the personality function. void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, Value *SelVal) { - SmallVector UseWorkList(LPI->use_begin(), LPI->use_end()); + SmallVector UseWorkList(LPI->use_begin(), LPI->use_end()); while (!UseWorkList.empty()) { Value *Val = UseWorkList.pop_back_val(); ExtractValueInst *EVI = dyn_cast(Val); - if (!EVI) continue; - if (EVI->getNumIndices() != 1) continue; + if (!EVI) + continue; + if (EVI->getNumIndices() != 1) + continue; if (*EVI->idx_begin() == 0) EVI->replaceAllUsesWith(ExnVal); else if (*EVI->idx_begin() == 1) @@ -167,14 +165,15 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, EVI->eraseFromParent(); } - if (LPI->getNumUses() == 0) return; + if (LPI->getNumUses() == 0) + return; // There are still some uses of LPI. Construct an aggregate with the exception // values and replace the LPI with that aggregate. Type *LPadType = LPI->getType(); Value *LPadVal = UndefValue::get(LPadType); - IRBuilder<> - Builder(llvm::next(BasicBlock::iterator(cast(SelVal)))); + IRBuilder<> Builder( + llvm::next(BasicBlock::iterator(cast(SelVal)))); LPadVal = Builder.CreateInsertValue(LPadVal, ExnVal, 0, "lpad.val"); LPadVal = Builder.CreateInsertValue(LPadVal, SelVal, 1, "lpad.val"); @@ -183,17 +182,18 @@ void SjLjEHPrepare::substituteLPadValues(LandingPadInst *LPI, Value *ExnVal, /// setupFunctionContext - Allocate the function context on the stack and fill /// it with all of the data that we know at this point. -Value *SjLjEHPrepare:: -setupFunctionContext(Function &F, ArrayRef LPads) { +Value *SjLjEHPrepare::setupFunctionContext(Function &F, + ArrayRef LPads) { BasicBlock *EntryBB = F.begin(); // Create an alloca for the incoming jump buffer ptr and the new jump buffer // that needs to be restored on all exits from the function. This is an alloca // because the value needs to be added to the global context list. + const TargetLowering *TLI = TM->getTargetLowering(); unsigned Align = - TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); - FuncCtx = - new AllocaInst(FunctionContextTy, 0, Align, "fn_context", EntryBB->begin()); + TLI->getDataLayout()->getPrefTypeAlignment(FunctionContextTy); + FuncCtx = new AllocaInst(FunctionContextTy, 0, Align, "fn_context", + EntryBB->begin()); // Fill in the function context structure. for (unsigned I = 0, E = LPads.size(); I != E; ++I) { @@ -204,13 +204,13 @@ setupFunctionContext(Function &F, ArrayRef LPads) { Value *FCData = Builder.CreateConstGEP2_32(FuncCtx, 0, 2, "__data"); // The exception values come back in context->__data[0]. - Value *ExceptionAddr = Builder.CreateConstGEP2_32(FCData, 0, 0, - "exception_gep"); + Value *ExceptionAddr = + Builder.CreateConstGEP2_32(FCData, 0, 0, "exception_gep"); Value *ExnVal = Builder.CreateLoad(ExceptionAddr, true, "exn_val"); ExnVal = Builder.CreateIntToPtr(ExnVal, Builder.getInt8PtrTy()); - Value *SelectorAddr = Builder.CreateConstGEP2_32(FCData, 0, 1, - "exn_selector_gep"); + Value *SelectorAddr = + Builder.CreateConstGEP2_32(FCData, 0, 1, "exn_selector_gep"); Value *SelVal = Builder.CreateLoad(SelectorAddr, true, "exn_selector_val"); substituteLPadValues(LPI, ExnVal, SelVal); @@ -220,9 +220,11 @@ setupFunctionContext(Function &F, ArrayRef LPads) { IRBuilder<> Builder(EntryBB->getTerminator()); if (!PersonalityFn) PersonalityFn = LPads[0]->getPersonalityFn(); - Value *PersonalityFieldPtr = Builder.CreateConstGEP2_32(FuncCtx, 0, 3, - "pers_fn_gep"); - Builder.CreateStore(PersonalityFn, PersonalityFieldPtr, /*isVolatile=*/true); + Value *PersonalityFieldPtr = + Builder.CreateConstGEP2_32(FuncCtx, 0, 3, "pers_fn_gep"); + Builder.CreateStore( + Builder.CreateBitCast(PersonalityFn, Builder.getInt8PtrTy()), + PersonalityFieldPtr, /*isVolatile=*/true); // LSDA address Value *LSDA = Builder.CreateCall(LSDAAddrFn, "lsda_addr"); @@ -242,8 +244,8 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { isa(cast(AfterAllocaInsPt)->getArraySize())) ++AfterAllocaInsPt; - for (Function::arg_iterator - AI = F.arg_begin(), AE = F.arg_end(); AI != AE; ++AI) { + for (Function::arg_iterator AI = F.arg_begin(), AE = F.arg_end(); AI != AE; + ++AI) { Type *Ty = AI->getType(); // Aggregate types can't be cast, but are legal argument types, so we have @@ -262,9 +264,8 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { // This is always a no-op cast because we're casting AI to AI->getType() // so src and destination types are identical. BitCast is the only // possibility. - CastInst *NC = - new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", - AfterAllocaInsPt); + CastInst *NC = new BitCastInst(AI, AI->getType(), AI->getName() + ".tmp", + AfterAllocaInsPt); AI->replaceAllUsesWith(NC); // Set the operand of the cast instruction back to the AllocaInst. @@ -281,20 +282,21 @@ void SjLjEHPrepare::lowerIncomingArguments(Function &F) { /// lowerAcrossUnwindEdges - Find all variables which are alive across an unwind /// edge and spill them. void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, - ArrayRef Invokes) { + ArrayRef Invokes) { // Finally, scan the code looking for instructions with bad live ranges. - for (Function::iterator - BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { - for (BasicBlock::iterator - II = BB->begin(), IIE = BB->end(); II != IIE; ++II) { + for (Function::iterator BB = F.begin(), BBE = F.end(); BB != BBE; ++BB) { + for (BasicBlock::iterator II = BB->begin(), IIE = BB->end(); II != IIE; + ++II) { // Ignore obvious cases we don't have to handle. In particular, most // instructions either have no uses or only have a single use inside the // current block. Ignore them quickly. Instruction *Inst = II; - if (Inst->use_empty()) continue; + if (Inst->use_empty()) + continue; if (Inst->hasOneUse() && cast(Inst->use_back())->getParent() == BB && - !isa(Inst->use_back())) continue; + !isa(Inst->use_back())) + continue; // If this is an alloca in the entry block, it's not a real register // value. @@ -303,16 +305,16 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, continue; // Avoid iterator invalidation by copying users to a temporary vector. - SmallVector Users; - for (Value::use_iterator - UI = Inst->use_begin(), E = Inst->use_end(); UI != E; ++UI) { + SmallVector Users; + for (Value::use_iterator UI = Inst->use_begin(), E = Inst->use_end(); + UI != E; ++UI) { Instruction *User = cast(*UI); if (User->getParent() != BB || isa(User)) Users.push_back(User); } // Find all of the blocks that this value is live in. - SmallPtrSet LiveBBs; + SmallPtrSet LiveBBs; LiveBBs.insert(Inst->getParent()); while (!Users.empty()) { Instruction *U = Users.back(); @@ -336,7 +338,7 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, BasicBlock *UnwindBlock = Invokes[i]->getUnwindDest(); if (UnwindBlock != BB && LiveBBs.count(UnwindBlock)) { DEBUG(dbgs() << "SJLJ Spill: " << *Inst << " around " - << UnwindBlock->getName() << "\n"); + << UnwindBlock->getName() << "\n"); NeedsSpill = true; break; } @@ -359,15 +361,16 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, LandingPadInst *LPI = UnwindBlock->getLandingPadInst(); // Place PHIs into a set to avoid invalidating the iterator. - SmallPtrSet PHIsToDemote; - for (BasicBlock::iterator - PN = UnwindBlock->begin(); isa(PN); ++PN) + SmallPtrSet PHIsToDemote; + for (BasicBlock::iterator PN = UnwindBlock->begin(); isa(PN); ++PN) PHIsToDemote.insert(cast(PN)); - if (PHIsToDemote.empty()) continue; + if (PHIsToDemote.empty()) + continue; // Demote the PHIs to the stack. - for (SmallPtrSet::iterator - I = PHIsToDemote.begin(), E = PHIsToDemote.end(); I != E; ++I) + for (SmallPtrSet::iterator I = PHIsToDemote.begin(), + E = PHIsToDemote.end(); + I != E; ++I) DemotePHIToStack(*I); // Move the landingpad instruction back to the top of the landing pad block. @@ -379,9 +382,9 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { - SmallVector Returns; - SmallVector Invokes; - SmallSetVector LPads; + SmallVector Returns; + SmallVector Invokes; + SmallSetVector LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) @@ -401,7 +404,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { Returns.push_back(RI); } - if (Invokes.empty()) return false; + if (Invokes.empty()) + return false; NumInvokes += Invokes.size(); @@ -409,7 +413,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { lowerAcrossUnwindEdges(F, Invokes); Value *FuncCtx = - setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); + setupFunctionContext(F, makeArrayRef(LPads.begin(), LPads.end())); BasicBlock *EntryBB = F.begin(); IRBuilder<> Builder(EntryBB->getTerminator()); @@ -443,7 +447,7 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { insertCallSiteStore(Invokes[I], I + 1); ConstantInt *CallSiteNum = - ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); + ConstantInt::get(Type::getInt32Ty(F.getContext()), I + 1); // Record the call site value for the back end so it stays associated with // the invoke. @@ -465,8 +469,8 @@ bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { } // Register the function context and make sure it's known to not throw - CallInst *Register = CallInst::Create(RegisterFn, FuncCtx, "", - EntryBB->getTerminator()); + CallInst *Register = + CallInst::Create(RegisterFn, FuncCtx, "", EntryBB->getTerminator()); Register->setDoesNotThrow(); // Following any allocas not in the entry block, update the saved SP in the diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp index c5bbba3..10a93b7 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.cpp +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.cpp @@ -31,8 +31,8 @@ #include "SpillPlacement.h" #include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/EdgeBundles.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" @@ -53,11 +53,16 @@ char &llvm::SpillPlacementID = SpillPlacement::ID; void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesAll(); + AU.addRequired(); AU.addRequiredTransitive(); AU.addRequiredTransitive(); MachineFunctionPass::getAnalysisUsage(AU); } +/// Decision threshold. A node gets the output value 0 if the weighted sum of +/// its inputs falls in the open interval (-Threshold;Threshold). +static const BlockFrequency Threshold = 2; + /// Node - Each edge bundle corresponds to a Hopfield node. /// /// The node contains precomputed frequency data that only depends on the CFG, @@ -68,31 +73,25 @@ void SpillPlacement::getAnalysisUsage(AnalysisUsage &AU) const { /// because all weights are positive. /// struct SpillPlacement::Node { - /// Scale - Inverse block frequency feeding into[0] or out of[1] the bundle. - /// Ideally, these two numbers should be identical, but inaccuracies in the - /// block frequency estimates means that we need to normalize ingoing and - /// outgoing frequencies separately so they are commensurate. - float Scale[2]; - - /// Bias - Normalized contributions from non-transparent blocks. - /// A bundle connected to a MustSpill block has a huge negative bias, - /// otherwise it is a number in the range [-2;2]. - float Bias; + /// BiasN - Sum of blocks that prefer a spill. + BlockFrequency BiasN; + /// BiasP - Sum of blocks that prefer a register. + BlockFrequency BiasP; /// Value - Output value of this node computed from the Bias and links. - /// This is always in the range [-1;1]. A positive number means the variable - /// should go in a register through this bundle. - float Value; + /// This is always on of the values {-1, 0, 1}. A positive number means the + /// variable should go in a register through this bundle. + int Value; - typedef SmallVector, 4> LinkVector; + typedef SmallVector, 4> LinkVector; /// Links - (Weight, BundleNo) for all transparent blocks connecting to other - /// bundles. The weights are all positive and add up to at most 2, weights - /// from ingoing and outgoing nodes separately add up to a most 1. The weight - /// sum can be less than 2 when the variable is not live into / out of some - /// connected basic blocks. + /// bundles. The weights are all positive block frequencies. LinkVector Links; + /// SumLinkWeights - Cached sum of the weights of all links + ThresHold. + BlockFrequency SumLinkWeights; + /// preferReg - Return true when this node prefers to be in a register. bool preferReg() const { // Undecided nodes (Value==0) go on the stack. @@ -101,28 +100,24 @@ struct SpillPlacement::Node { /// mustSpill - Return True if this node is so biased that it must spill. bool mustSpill() const { - // Actually, we must spill if Bias < sum(weights). - // It may be worth it to compute the weight sum here? - return Bias < -2.0f; - } - - /// Node - Create a blank Node. - Node() { - Scale[0] = Scale[1] = 0; + // We must spill if Bias < -sum(weights) or the MustSpill flag was set. + // BiasN is saturated when MustSpill is set, make sure this still returns + // true when the RHS saturates. Note that SumLinkWeights includes Threshold. + return BiasN >= BiasP + SumLinkWeights; } /// clear - Reset per-query data, but preserve frequencies that only depend on // the CFG. void clear() { - Bias = Value = 0; + BiasN = BiasP = Value = 0; + SumLinkWeights = Threshold; Links.clear(); } /// addLink - Add a link to bundle b with weight w. - /// out=0 for an ingoing link, and 1 for an outgoing link. - void addLink(unsigned b, float w, bool out) { - // Normalize w relative to all connected blocks from that direction. - w *= Scale[out]; + void addLink(unsigned b, BlockFrequency w) { + // Update cached sum. + SumLinkWeights += w; // There can be multiple links to the same bundle, add them up. for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) @@ -134,33 +129,48 @@ struct SpillPlacement::Node { Links.push_back(std::make_pair(w, b)); } - /// addBias - Bias this node from an ingoing[0] or outgoing[1] link. - /// Return the change to the total number of positive biases. - void addBias(float w, bool out) { - // Normalize w relative to all connected blocks from that direction. - w *= Scale[out]; - Bias += w; + /// addBias - Bias this node. + void addBias(BlockFrequency freq, BorderConstraint direction) { + switch (direction) { + default: + break; + case PrefReg: + BiasP += freq; + break; + case PrefSpill: + BiasN += freq; + break; + case MustSpill: + BiasN = BlockFrequency::getMaxFrequency(); + break; + } } /// update - Recompute Value from Bias and Links. Return true when node /// preference changes. bool update(const Node nodes[]) { // Compute the weighted sum of inputs. - float Sum = Bias; - for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) - Sum += I->first * nodes[I->second].Value; + BlockFrequency SumN = BiasN; + BlockFrequency SumP = BiasP; + for (LinkVector::iterator I = Links.begin(), E = Links.end(); I != E; ++I) { + if (nodes[I->second].Value == -1) + SumN += I->first; + else if (nodes[I->second].Value == 1) + SumP += I->first; + } - // The weighted sum is going to be in the range [-2;2]. Ideally, we should - // simply set Value = sign(Sum), but we will add a dead zone around 0 for - // two reasons: + // Each weighted sum is going to be less than the total frequency of the + // bundle. Ideally, we should simply set Value = sign(SumP - SumN), but we + // will add a dead zone around 0 for two reasons: + // // 1. It avoids arbitrary bias when all links are 0 as is possible during // initial iterations. // 2. It helps tame rounding errors when the links nominally sum to 0. - const float Thres = 1e-4f; + // bool Before = preferReg(); - if (Sum < -Thres) + if (SumN >= SumP + Threshold) Value = -1; - else if (Sum > Thres) + else if (SumP >= SumN + Threshold) Value = 1; else Value = 0; @@ -177,22 +187,13 @@ bool SpillPlacement::runOnMachineFunction(MachineFunction &mf) { nodes = new Node[bundles->getNumBundles()]; // Compute total ingoing and outgoing block frequencies for all bundles. - BlockFrequency.resize(mf.getNumBlockIDs()); + BlockFrequencies.resize(mf.getNumBlockIDs()); + MachineBlockFrequencyInfo &MBFI = getAnalysis(); for (MachineFunction::iterator I = mf.begin(), E = mf.end(); I != E; ++I) { - float Freq = LiveIntervals::getSpillWeight(true, false, - loops->getLoopDepth(I)); unsigned Num = I->getNumber(); - BlockFrequency[Num] = Freq; - nodes[bundles->getBundle(Num, 1)].Scale[0] += Freq; - nodes[bundles->getBundle(Num, 0)].Scale[1] += Freq; + BlockFrequencies[Num] = MBFI.getBlockFreq(I); } - // Scales are reciprocal frequencies. - for (unsigned i = 0, e = bundles->getNumBundles(); i != e; ++i) - for (unsigned d = 0; d != 2; ++d) - if (nodes[i].Scale[d] > 0) - nodes[i].Scale[d] = 1 / nodes[i].Scale[d]; - // We never change the function. return false; } @@ -213,12 +214,15 @@ void SpillPlacement::activate(unsigned n) { // landing pads, or loops with many 'continue' statements. It is difficult to // allocate registers when so many different blocks are involved. // - // Give a small negative bias to large bundles such that 1/32 of the - // connected blocks need to be interested before we consider expanding the - // region through the bundle. This helps compile time by limiting the number - // of blocks visited and the number of links in the Hopfield network. - if (bundles->getBlocks(n).size() > 100) - nodes[n].Bias = -0.0625f; + // Give a small negative bias to large bundles such that a substantial + // fraction of the connected blocks need to be interested before we consider + // expanding the region through the bundle. This helps compile time by + // limiting the number of blocks visited and the number of links in the + // Hopfield network. + if (bundles->getBlocks(n).size() > 100) { + nodes[n].BiasP = 0; + nodes[n].BiasN = (BlockFrequency::getEntryFrequency() / 16); + } } @@ -227,27 +231,20 @@ void SpillPlacement::activate(unsigned n) { void SpillPlacement::addConstraints(ArrayRef LiveBlocks) { for (ArrayRef::iterator I = LiveBlocks.begin(), E = LiveBlocks.end(); I != E; ++I) { - float Freq = getBlockFrequency(I->Number); - const float Bias[] = { - 0, // DontCare, - 1, // PrefReg, - -1, // PrefSpill - 0, // PrefBoth - -HUGE_VALF // MustSpill - }; + BlockFrequency Freq = BlockFrequencies[I->Number]; // Live-in to block? if (I->Entry != DontCare) { unsigned ib = bundles->getBundle(I->Number, 0); activate(ib); - nodes[ib].addBias(Freq * Bias[I->Entry], 1); + nodes[ib].addBias(Freq, I->Entry); } // Live-out from block? if (I->Exit != DontCare) { unsigned ob = bundles->getBundle(I->Number, 1); activate(ob); - nodes[ob].addBias(Freq * Bias[I->Exit], 0); + nodes[ob].addBias(Freq, I->Exit); } } } @@ -256,15 +253,15 @@ void SpillPlacement::addConstraints(ArrayRef LiveBlocks) { void SpillPlacement::addPrefSpill(ArrayRef Blocks, bool Strong) { for (ArrayRef::iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) { - float Freq = getBlockFrequency(*I); + BlockFrequency Freq = BlockFrequencies[*I]; if (Strong) Freq += Freq; unsigned ib = bundles->getBundle(*I, 0); unsigned ob = bundles->getBundle(*I, 1); activate(ib); activate(ob); - nodes[ib].addBias(-Freq, 1); - nodes[ob].addBias(-Freq, 0); + nodes[ib].addBias(Freq, PrefSpill); + nodes[ob].addBias(Freq, PrefSpill); } } @@ -284,9 +281,9 @@ void SpillPlacement::addLinks(ArrayRef Links) { Linked.push_back(ib); if (nodes[ob].Links.empty() && !nodes[ob].mustSpill()) Linked.push_back(ob); - float Freq = getBlockFrequency(Number); - nodes[ib].addLink(ob, Freq, 1); - nodes[ob].addLink(ib, Freq, 0); + BlockFrequency Freq = BlockFrequencies[Number]; + nodes[ib].addLink(ob, Freq); + nodes[ob].addLink(ib, Freq); } } diff --git a/contrib/llvm/lib/CodeGen/SpillPlacement.h b/contrib/llvm/lib/CodeGen/SpillPlacement.h index fc412f8..105516b 100644 --- a/contrib/llvm/lib/CodeGen/SpillPlacement.h +++ b/contrib/llvm/lib/CodeGen/SpillPlacement.h @@ -30,6 +30,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/Support/BlockFrequency.h" namespace llvm { @@ -57,7 +58,7 @@ class SpillPlacement : public MachineFunctionPass { SmallVector RecentPositive; // Block frequencies are computed once. Indexed by block number. - SmallVector BlockFrequency; + SmallVector BlockFrequencies; public: static char ID; // Pass identification, replacement for typeid. @@ -139,8 +140,8 @@ public: /// getBlockFrequency - Return the estimated block execution frequency per /// function invocation. - float getBlockFrequency(unsigned Number) const { - return BlockFrequency[Number]; + BlockFrequency getBlockFrequency(unsigned Number) const { + return BlockFrequencies[Number]; } private: diff --git a/contrib/llvm/lib/CodeGen/Spiller.cpp b/contrib/llvm/lib/CodeGen/Spiller.cpp index 209792f..d5b3a4a 100644 --- a/contrib/llvm/lib/CodeGen/Spiller.cpp +++ b/contrib/llvm/lib/CodeGen/Spiller.cpp @@ -77,7 +77,7 @@ protected: DEBUG(dbgs() << "Spilling everywhere " << *li << "\n"); - assert(li->weight != HUGE_VALF && + assert(li->weight != llvm::huge_valf && "Attempting to spill already spilled value."); assert(!TargetRegisterInfo::isStackSlot(li->reg) && @@ -115,15 +115,14 @@ protected: indices.push_back(i); } - // Create a new vreg & interval for this instr. - LiveInterval *newLI = &LRE.create(); - newLI->weight = HUGE_VALF; + // Create a new virtual register for the load and/or store. + unsigned NewVReg = LRE.create(); // Update the reg operands & kill flags. for (unsigned i = 0; i < indices.size(); ++i) { unsigned mopIdx = indices[i]; MachineOperand &mop = mi->getOperand(mopIdx); - mop.setReg(newLI->reg); + mop.setReg(NewVReg); if (mop.isUse() && !mi->isRegTiedToDefOperand(mopIdx)) { mop.setIsKill(true); } @@ -133,28 +132,20 @@ protected: // Insert reload if necessary. MachineBasicBlock::iterator miItr(mi); if (hasUse) { - tii->loadRegFromStackSlot(*mi->getParent(), miItr, newLI->reg, ss, trc, + MachineInstrSpan MIS(miItr); + + tii->loadRegFromStackSlot(*mi->getParent(), miItr, NewVReg, ss, trc, tri); - MachineInstr *loadInstr(prior(miItr)); - SlotIndex loadIndex = - lis->InsertMachineInstrInMaps(loadInstr).getRegSlot(); - SlotIndex endIndex = loadIndex.getNextIndex(); - VNInfo *loadVNI = - newLI->getNextValue(loadIndex, lis->getVNInfoAllocator()); - newLI->addRange(LiveRange(loadIndex, endIndex, loadVNI)); + lis->InsertMachineInstrRangeInMaps(MIS.begin(), miItr); } // Insert store if necessary. if (hasDef) { - tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr),newLI->reg, + MachineInstrSpan MIS(miItr); + + tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), NewVReg, true, ss, trc, tri); - MachineInstr *storeInstr(llvm::next(miItr)); - SlotIndex storeIndex = - lis->InsertMachineInstrInMaps(storeInstr).getRegSlot(); - SlotIndex beginIndex = storeIndex.getPrevIndex(); - VNInfo *storeVNI = - newLI->getNextValue(beginIndex, lis->getVNInfoAllocator()); - newLI->addRange(LiveRange(beginIndex, storeIndex, storeVNI)); + lis->InsertMachineInstrRangeInMaps(llvm::next(miItr), MIS.end()); } } } diff --git a/contrib/llvm/lib/CodeGen/SplitKit.cpp b/contrib/llvm/lib/CodeGen/SplitKit.cpp index 0a3818e..68a15f7 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.cpp +++ b/contrib/llvm/lib/CodeGen/SplitKit.cpp @@ -214,7 +214,7 @@ bool SplitAnalysis::calcLiveBlockInfo() { // When not live in, the first use should be a def. if (!BI.LiveIn) { - assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + assert(LVI->start == LVI->valno->def && "Dangling Segment start"); assert(LVI->start == BI.FirstInstr && "First instr should be a def"); BI.FirstDef = BI.FirstInstr; } @@ -245,8 +245,8 @@ bool SplitAnalysis::calcLiveBlockInfo() { BI.FirstInstr = BI.FirstDef = LVI->start; } - // A LiveRange that starts in the middle of the block must be a def. - assert(LVI->start == LVI->valno->def && "Dangling LiveRange start"); + // A Segment that starts in the middle of the block must be a def. + assert(LVI->start == LVI->valno->def && "Dangling Segment start"); if (!BI.FirstDef) BI.FirstDef = LVI->start; } @@ -325,12 +325,14 @@ void SplitAnalysis::analyze(const LiveInterval *li) { SplitEditor::SplitEditor(SplitAnalysis &sa, LiveIntervals &lis, VirtRegMap &vrm, - MachineDominatorTree &mdt) + MachineDominatorTree &mdt, + MachineBlockFrequencyInfo &mbfi) : SA(sa), LIS(lis), VRM(vrm), MRI(vrm.getMachineFunction().getRegInfo()), MDT(mdt), TII(*vrm.getMachineFunction().getTarget().getInstrInfo()), TRI(*vrm.getMachineFunction().getTarget().getRegisterInfo()), + MBFI(mbfi), Edit(0), OpenIdx(0), SpillMode(SM_Partition), @@ -375,7 +377,7 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, assert(ParentVNI && "Mapping NULL value"); assert(Idx.isValid() && "Invalid SlotIndex"); assert(Edit->getParent().getVNInfoAt(Idx) == ParentVNI && "Bad Parent VNI"); - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); // Create a new value. VNInfo *VNI = LI->getNextValue(Idx, LIS.getVNInfoAllocator()); @@ -393,14 +395,14 @@ VNInfo *SplitEditor::defValue(unsigned RegIdx, // If the previous value was a simple mapping, add liveness for it now. if (VNInfo *OldVNI = InsP.first->second.getPointer()) { SlotIndex Def = OldVNI->def; - LI->addRange(LiveRange(Def, Def.getDeadSlot(), OldVNI)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), OldVNI)); // No longer a simple mapping. Switch to a complex, non-forced mapping. InsP.first->second = ValueForcePair(); } // This is a complex mapping, add liveness for VNI SlotIndex Def = VNI->def; - LI->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); return VNI; } @@ -420,7 +422,8 @@ void SplitEditor::forceRecompute(unsigned RegIdx, const VNInfo *ParentVNI) { // This was previously a single mapping. Make sure the old def is represented // by a trivial live range. SlotIndex Def = VNI->def; - Edit->get(RegIdx)->addRange(LiveRange(Def, Def.getDeadSlot(), VNI)); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); + LI->addSegment(LiveInterval::Segment(Def, Def.getDeadSlot(), VNI)); // Mark as complex mapped, forced. VFP = ValueForcePair(0, true); } @@ -432,7 +435,7 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, MachineBasicBlock::iterator I) { MachineInstr *CopyMI = 0; SlotIndex Def; - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); // We may be trying to avoid interference that ends at a deleted instruction, // so always begin RegIdx 0 early and all others late. @@ -460,11 +463,11 @@ VNInfo *SplitEditor::defFromParent(unsigned RegIdx, unsigned SplitEditor::openIntv() { // Create the complement as index 0. if (Edit->empty()) - Edit->create(); + Edit->createEmptyInterval(); // Create the open interval. OpenIdx = Edit->size(); - Edit->create(); + Edit->createEmptyInterval(); return OpenIdx; } @@ -629,7 +632,7 @@ void SplitEditor::overlapIntv(SlotIndex Start, SlotIndex End) { //===----------------------------------------------------------------------===// void SplitEditor::removeBackCopies(SmallVectorImpl &Copies) { - LiveInterval *LI = Edit->get(0); + LiveInterval *LI = &LIS.getInterval(Edit->get(0)); DEBUG(dbgs() << "Removing " << Copies.size() << " back-copies.\n"); RegAssignMap::iterator AssignI; AssignI.setMap(RegAssign); @@ -728,7 +731,7 @@ SplitEditor::findShallowDominator(MachineBasicBlock *MBB, void SplitEditor::hoistCopiesForSize() { // Get the complement interval, always RegIdx 0. - LiveInterval *LI = Edit->get(0); + LiveInterval *LI = &LIS.getInterval(Edit->get(0)); LiveInterval *Parent = &Edit->getParent(); // Track the nearest common dominator for all back-copies for each ParentVNI, @@ -859,13 +862,13 @@ bool SplitEditor::transferValues() { // The interval [Start;End) is continuously mapped to RegIdx, ParentVNI. DEBUG(dbgs() << " [" << Start << ';' << End << ")=" << RegIdx); - LiveInterval *LI = Edit->get(RegIdx); + LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); // Check for a simply defined value that can be blitted directly. ValueForcePair VFP = Values.lookup(std::make_pair(RegIdx, ParentVNI->id)); if (VNInfo *VNI = VFP.getPointer()) { DEBUG(dbgs() << ':' << VNI->id); - LI->addRange(LiveRange(Start, End, VNI)); + LR.addSegment(LiveInterval::Segment(Start, End, VNI)); Start = End; continue; } @@ -889,7 +892,7 @@ bool SplitEditor::transferValues() { // The first block may be live-in, or it may have its own def. if (Start != BlockStart) { - VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped value"); DEBUG(dbgs() << ':' << VNI->id << "*BB#" << MBB->getNumber()); // MBB has its own def. Is it also live-out? @@ -909,7 +912,7 @@ bool SplitEditor::transferValues() { if (BlockStart == ParentVNI->def) { // This block has the def of a parent PHI, so it isn't live-in. assert(ParentVNI->isPHIDef() && "Non-phi defined at block start?"); - VNInfo *VNI = LI->extendInBlock(BlockStart, std::min(BlockEnd, End)); + VNInfo *VNI = LR.extendInBlock(BlockStart, std::min(BlockEnd, End)); assert(VNI && "Missing def for complex mapped parent PHI"); if (End >= BlockEnd) LRC.setLiveOutValue(MBB, VNI); // Live-out as well. @@ -917,10 +920,10 @@ bool SplitEditor::transferValues() { // This block needs a live-in value. The last block covered may not // be live-out. if (End < BlockEnd) - LRC.addLiveInBlock(LI, MDT[MBB], End); + LRC.addLiveInBlock(LR, MDT[MBB], End); else { // Live-through, and we don't know the value. - LRC.addLiveInBlock(LI, MDT[MBB]); + LRC.addLiveInBlock(LR, MDT[MBB]); LRC.setLiveOutValue(MBB, 0); } } @@ -947,7 +950,7 @@ void SplitEditor::extendPHIKillRanges() { if (PHIVNI->isUnused() || !PHIVNI->isPHIDef()) continue; unsigned RegIdx = RegAssign.lookup(PHIVNI->def); - LiveInterval *LI = Edit->get(RegIdx); + LiveRange &LR = LIS.getInterval(Edit->get(RegIdx)); LiveRangeCalc &LRC = getLRCalc(RegIdx); MachineBasicBlock *MBB = LIS.getMBBFromIndex(PHIVNI->def); for (MachineBasicBlock::pred_iterator PI = MBB->pred_begin(), @@ -959,7 +962,7 @@ void SplitEditor::extendPHIKillRanges() { if (Edit->getParent().liveAt(LastUse)) { assert(RegAssign.lookup(LastUse) == RegIdx && "Different register assignment in phi predecessor"); - LRC.extend(LI, End); + LRC.extend(LR, End); } } } @@ -988,7 +991,7 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { // Rewrite to the mapped register at Idx. unsigned RegIdx = RegAssign.lookup(Idx); - LiveInterval *LI = Edit->get(RegIdx); + LiveInterval *LI = &LIS.getInterval(Edit->get(RegIdx)); MO.setReg(LI->reg); DEBUG(dbgs() << " rewr BB#" << MI->getParent()->getNumber() << '\t' << Idx << ':' << RegIdx << '\t' << *MI); @@ -1009,14 +1012,14 @@ void SplitEditor::rewriteAssigned(bool ExtendRanges) { } else Idx = Idx.getRegSlot(true); - getLRCalc(RegIdx).extend(LI, Idx.getNextSlot()); + getLRCalc(RegIdx).extend(*LI, Idx.getNextSlot()); } } void SplitEditor::deleteRematVictims() { SmallVector Dead; for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I){ - LiveInterval *LI = *I; + LiveInterval *LI = &LIS.getInterval(*I); for (LiveInterval::const_iterator LII = LI->begin(), LIE = LI->end(); LII != LIE; ++LII) { // Dead defs end at the dead slot. @@ -1089,8 +1092,10 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { deleteRematVictims(); // Get rid of unused values and set phi-kill flags. - for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) - (*I)->RenumberValues(LIS); + for (LiveRangeEdit::iterator I = Edit->begin(), E = Edit->end(); I != E; ++I) { + LiveInterval &LI = LIS.getInterval(*I); + LI.RenumberValues(); + } // Provide a reverse mapping from original indices to Edit ranges. if (LRMap) { @@ -1103,7 +1108,7 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { ConnectedVNInfoEqClasses ConEQ(LIS); for (unsigned i = 0, e = Edit->size(); i != e; ++i) { // Don't use iterators, they are invalidated by create() below. - LiveInterval *li = Edit->get(i); + LiveInterval *li = &LIS.getInterval(Edit->get(i)); unsigned NumComp = ConEQ.Classify(li); if (NumComp <= 1) continue; @@ -1111,7 +1116,7 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { SmallVector dups; dups.push_back(li); for (unsigned j = 1; j != NumComp; ++j) - dups.push_back(&Edit->create()); + dups.push_back(&Edit->createEmptyInterval()); ConEQ.Distribute(&dups[0], MRI); // The new intervals all map back to i. if (LRMap) @@ -1119,7 +1124,7 @@ void SplitEditor::finish(SmallVectorImpl *LRMap) { } // Calculate spill weight and allocation hints for new intervals. - Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops); + Edit->calculateRegClassAndHint(VRM.getMachineFunction(), SA.Loops, MBFI); assert(!LRMap || LRMap->size() == Edit->size()); } diff --git a/contrib/llvm/lib/CodeGen/SplitKit.h b/contrib/llvm/lib/CodeGen/SplitKit.h index 4005a3d..f029c73 100644 --- a/contrib/llvm/lib/CodeGen/SplitKit.h +++ b/contrib/llvm/lib/CodeGen/SplitKit.h @@ -27,6 +27,7 @@ class ConnectedVNInfoEqClasses; class LiveInterval; class LiveIntervals; class LiveRangeEdit; +class MachineBlockFrequencyInfo; class MachineInstr; class MachineLoopInfo; class MachineRegisterInfo; @@ -215,6 +216,7 @@ class SplitEditor { MachineDominatorTree &MDT; const TargetInstrInfo &TII; const TargetRegisterInfo &TRI; + const MachineBlockFrequencyInfo &MBFI; public: @@ -349,7 +351,7 @@ public: /// Create a new SplitEditor for editing the LiveInterval analyzed by SA. /// Newly created intervals will be appended to newIntervals. SplitEditor(SplitAnalysis &SA, LiveIntervals&, VirtRegMap&, - MachineDominatorTree&); + MachineDominatorTree&, MachineBlockFrequencyInfo &); /// reset - Prepare for a new split. void reset(LiveRangeEdit&, ComplementSpillMode = SM_Partition); diff --git a/contrib/llvm/lib/CodeGen/StackColoring.cpp b/contrib/llvm/lib/CodeGen/StackColoring.cpp index a789a25..3dbc050 100644 --- a/contrib/llvm/lib/CodeGen/StackColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackColoring.cpp @@ -42,6 +42,7 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/DebugInfo.h" #include "llvm/IR/Function.h" @@ -169,7 +170,7 @@ private: /// slots to use the joint slots. void remapInstructions(DenseMap &SlotRemap); - /// The input program may contain intructions which are not inside lifetime + /// The input program may contain instructions which are not inside lifetime /// markers. This can happen due to a bug in the compiler or due to a bug in /// user code (for example, returning a reference to a local variable). /// This procedure checks all of the instructions in the function and @@ -309,9 +310,9 @@ void StackColoring::calculateLocalLiveness() { SmallPtrSet NextBBSet; - for (SmallVector::iterator - PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); - PI != PE; ++PI) { + for (SmallVectorImpl::iterator + PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); + PI != PE; ++PI) { const MachineBasicBlock *BB = *PI; if (!BBSet.count(BB)) continue; @@ -428,17 +429,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { } // Create the interval of the blocks that we previously found to be 'alive'. - BitVector Alive = BlockLiveness[MBB].LiveIn; - Alive |= BlockLiveness[MBB].LiveOut; - - if (Alive.any()) { - for (int pos = Alive.find_first(); pos != -1; - pos = Alive.find_next(pos)) { - if (!Starts[pos].isValid()) - Starts[pos] = Indexes->getMBBStartIdx(MBB); - if (!Finishes[pos].isValid()) - Finishes[pos] = Indexes->getMBBEndIdx(MBB); - } + BlockLifetimeInfo &MBBLiveness = BlockLiveness[MBB]; + for (int pos = MBBLiveness.LiveIn.find_first(); pos != -1; + pos = MBBLiveness.LiveIn.find_next(pos)) { + Starts[pos] = Indexes->getMBBStartIdx(MBB); + } + for (int pos = MBBLiveness.LiveOut.find_first(); pos != -1; + pos = MBBLiveness.LiveOut.find_next(pos)) { + Finishes[pos] = Indexes->getMBBEndIdx(MBB); } for (unsigned i = 0; i < NumSlots; ++i) { @@ -452,14 +450,14 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { SlotIndex F = Finishes[i]; if (S < F) { // We have a single consecutive region. - Intervals[i]->addRange(LiveRange(S, F, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(S, F, ValNum)); } else { // We have two non consecutive regions. This happens when // LIFETIME_START appears after the LIFETIME_END marker. SlotIndex NewStart = Indexes->getMBBStartIdx(MBB); SlotIndex NewFin = Indexes->getMBBEndIdx(MBB); - Intervals[i]->addRange(LiveRange(NewStart, F, ValNum)); - Intervals[i]->addRange(LiveRange(S, NewFin, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(NewStart, F, ValNum)); + Intervals[i]->addSegment(LiveInterval::Segment(S, NewFin, ValNum)); } } } @@ -528,6 +526,10 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { if (!V) continue; + const PseudoSourceValue *PSV = dyn_cast(V); + if (PSV && PSV->isConstant(MFI)) + continue; + // Climb up and find the original alloca. V = GetUnderlyingObject(V); // If we did not find one, or if the one that we found is not in our @@ -761,7 +763,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Merge disjoint slots. if (!First->overlaps(*Second)) { Changed = true; - First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); + First->MergeSegmentsInAsValue(*Second, First->getValNumInfo(0)); SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; DEBUG(dbgs()<<"Merging #"< + +using namespace llvm; + +PatchPointOpers::PatchPointOpers(const MachineInstr *MI): + MI(MI), + HasDef(MI->getOperand(0).isReg() && MI->getOperand(0).isDef() && + !MI->getOperand(0).isImplicit()), + IsAnyReg(MI->getOperand(getMetaIdx(CCPos)).getImm() == CallingConv::AnyReg) { + +#ifndef NDEBUG + { + unsigned CheckStartIdx = 0, e = MI->getNumOperands(); + while (CheckStartIdx < e && MI->getOperand(CheckStartIdx).isReg() && + MI->getOperand(CheckStartIdx).isDef() && + !MI->getOperand(CheckStartIdx).isImplicit()) + ++CheckStartIdx; + + assert(getMetaIdx() == CheckStartIdx && + "Unexpected additonal definition in Patchpoint intrinsic."); + } +#endif +} + +unsigned PatchPointOpers::getNextScratchIdx(unsigned StartIdx) const { + if (!StartIdx) + StartIdx = getVarIdx(); + + // Find the next scratch register (implicit def and early clobber) + unsigned ScratchIdx = StartIdx, e = MI->getNumOperands(); + while (ScratchIdx < e && + !(MI->getOperand(ScratchIdx).isReg() && + MI->getOperand(ScratchIdx).isDef() && + MI->getOperand(ScratchIdx).isImplicit() && + MI->getOperand(ScratchIdx).isEarlyClobber())) + ++ScratchIdx; + + assert(ScratchIdx != e && "No scratch register available"); + return ScratchIdx; +} + +void StackMaps::recordStackMapOpers(const MachineInstr &MI, uint32_t ID, + MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE, + bool recordResult) { + + MCContext &OutContext = AP.OutStreamer.getContext(); + MCSymbol *MILabel = OutContext.CreateTempSymbol(); + AP.OutStreamer.EmitLabel(MILabel); + + LocationVec CallsiteLocs; + + if (recordResult) { + std::pair ParseResult = + OpParser(MI.operands_begin(), llvm::next(MI.operands_begin()), AP.TM); + + Location &Loc = ParseResult.first; + assert(Loc.LocType == Location::Register && + "Stackmap return location must be a register."); + CallsiteLocs.push_back(Loc); + } + + while (MOI != MOE) { + std::pair ParseResult = + OpParser(MOI, MOE, AP.TM); + + Location &Loc = ParseResult.first; + + // Move large constants into the constant pool. + if (Loc.LocType == Location::Constant && (Loc.Offset & ~0xFFFFFFFFULL)) { + Loc.LocType = Location::ConstantIndex; + Loc.Offset = ConstPool.getConstantIndex(Loc.Offset); + } + + CallsiteLocs.push_back(Loc); + MOI = ParseResult.second; + } + + const MCExpr *CSOffsetExpr = MCBinaryExpr::CreateSub( + MCSymbolRefExpr::Create(MILabel, OutContext), + MCSymbolRefExpr::Create(AP.CurrentFnSym, OutContext), + OutContext); + + CSInfos.push_back(CallsiteInfo(CSOffsetExpr, ID, CallsiteLocs)); +} + +static MachineInstr::const_mop_iterator +getStackMapEndMOP(MachineInstr::const_mop_iterator MOI, + MachineInstr::const_mop_iterator MOE) { + for (; MOI != MOE; ++MOI) + if (MOI->isRegMask() || (MOI->isReg() && MOI->isImplicit())) + break; + + return MOI; +} + +void StackMaps::recordStackMap(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::STACKMAP && "exected stackmap"); + + int64_t ID = MI.getOperand(0).getImm(); + assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); + recordStackMapOpers(MI, ID, llvm::next(MI.operands_begin(), 2), + getStackMapEndMOP(MI.operands_begin(), + MI.operands_end())); +} + +void StackMaps::recordPatchPoint(const MachineInstr &MI) { + assert(MI.getOpcode() == TargetOpcode::PATCHPOINT && "exected stackmap"); + + PatchPointOpers opers(&MI); + int64_t ID = opers.getMetaOper(PatchPointOpers::IDPos).getImm(); + assert((int32_t)ID == ID && "Stack maps hold 32-bit IDs"); + MachineInstr::const_mop_iterator MOI = + llvm::next(MI.operands_begin(), opers.getStackMapStartIdx()); + recordStackMapOpers(MI, ID, MOI, getStackMapEndMOP(MOI, MI.operands_end()), + opers.isAnyReg() && opers.hasDef()); + +#ifndef NDEBUG + // verify anyregcc + LocationVec &Locations = CSInfos.back().Locations; + if (opers.isAnyReg()) { + unsigned NArgs = opers.getMetaOper(PatchPointOpers::NArgPos).getImm(); + for (unsigned i = 0, e = (opers.hasDef() ? NArgs+1 : NArgs); i != e; ++i) + assert(Locations[i].LocType == Location::Register && + "anyreg arg must be in reg."); + } +#endif +} + +/// serializeToStackMapSection conceptually populates the following fields: +/// +/// uint32 : Reserved (header) +/// uint32 : NumConstants +/// int64 : Constants[NumConstants] +/// uint32 : NumRecords +/// StkMapRecord[NumRecords] { +/// uint32 : PatchPoint ID +/// uint32 : Instruction Offset +/// uint16 : Reserved (record flags) +/// uint16 : NumLocations +/// Location[NumLocations] { +/// uint8 : Register | Direct | Indirect | Constant | ConstantIndex +/// uint8 : Size in Bytes +/// uint16 : Dwarf RegNum +/// int32 : Offset +/// } +/// } +/// +/// Location Encoding, Type, Value: +/// 0x1, Register, Reg (value in register) +/// 0x2, Direct, Reg + Offset (frame index) +/// 0x3, Indirect, [Reg + Offset] (spilled value) +/// 0x4, Constant, Offset (small constant) +/// 0x5, ConstIndex, Constants[Offset] (large constant) +/// +void StackMaps::serializeToStackMapSection() { + // Bail out if there's no stack map data. + if (CSInfos.empty()) + return; + + MCContext &OutContext = AP.OutStreamer.getContext(); + const TargetRegisterInfo *TRI = AP.TM.getRegisterInfo(); + + // Create the section. + const MCSection *StackMapSection = + OutContext.getObjectFileInfo()->getStackMapSection(); + AP.OutStreamer.SwitchSection(StackMapSection); + + // Emit a dummy symbol to force section inclusion. + AP.OutStreamer.EmitLabel( + OutContext.GetOrCreateSymbol(Twine("__LLVM_StackMaps"))); + + // Serialize data. + const char *WSMP = "Stack Maps: "; + (void)WSMP; + const MCRegisterInfo &MCRI = *OutContext.getRegisterInfo(); + + DEBUG(dbgs() << "********** Stack Map Output **********\n"); + + // Header. + AP.OutStreamer.EmitIntValue(0, 4); + + // Num constants. + AP.OutStreamer.EmitIntValue(ConstPool.getNumConstants(), 4); + + // Constant pool entries. + for (unsigned i = 0; i < ConstPool.getNumConstants(); ++i) + AP.OutStreamer.EmitIntValue(ConstPool.getConstant(i), 8); + + DEBUG(dbgs() << WSMP << "#callsites = " << CSInfos.size() << "\n"); + AP.OutStreamer.EmitIntValue(CSInfos.size(), 4); + + for (CallsiteInfoList::const_iterator CSII = CSInfos.begin(), + CSIE = CSInfos.end(); + CSII != CSIE; ++CSII) { + + unsigned CallsiteID = CSII->ID; + const LocationVec &CSLocs = CSII->Locations; + + DEBUG(dbgs() << WSMP << "callsite " << CallsiteID << "\n"); + + // Verify stack map entry. It's better to communicate a problem to the + // runtime than crash in case of in-process compilation. Currently, we do + // simple overflow checks, but we may eventually communicate other + // compilation errors this way. + if (CSLocs.size() > UINT16_MAX) { + AP.OutStreamer.EmitIntValue(UINT32_MAX, 4); // Invalid ID. + AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); + AP.OutStreamer.EmitIntValue(0, 2); // Reserved. + AP.OutStreamer.EmitIntValue(0, 2); // 0 locations. + continue; + } + + AP.OutStreamer.EmitIntValue(CallsiteID, 4); + AP.OutStreamer.EmitValue(CSII->CSOffsetExpr, 4); + + // Reserved for flags. + AP.OutStreamer.EmitIntValue(0, 2); + + DEBUG(dbgs() << WSMP << " has " << CSLocs.size() << " locations\n"); + + AP.OutStreamer.EmitIntValue(CSLocs.size(), 2); + + unsigned operIdx = 0; + for (LocationVec::const_iterator LocI = CSLocs.begin(), LocE = CSLocs.end(); + LocI != LocE; ++LocI, ++operIdx) { + const Location &Loc = *LocI; + DEBUG( + dbgs() << WSMP << " Loc " << operIdx << ": "; + switch (Loc.LocType) { + case Location::Unprocessed: + dbgs() << ""; + break; + case Location::Register: + dbgs() << "Register " << MCRI.getName(Loc.Reg); + break; + case Location::Direct: + dbgs() << "Direct " << MCRI.getName(Loc.Reg); + if (Loc.Offset) + dbgs() << " + " << Loc.Offset; + break; + case Location::Indirect: + dbgs() << "Indirect " << MCRI.getName(Loc.Reg) + << " + " << Loc.Offset; + break; + case Location::Constant: + dbgs() << "Constant " << Loc.Offset; + break; + case Location::ConstantIndex: + dbgs() << "Constant Index " << Loc.Offset; + break; + } + dbgs() << "\n"; + ); + + unsigned RegNo = 0; + int Offset = Loc.Offset; + if(Loc.Reg) { + RegNo = MCRI.getDwarfRegNum(Loc.Reg, false); + for (MCSuperRegIterator SR(Loc.Reg, TRI); + SR.isValid() && (int)RegNo < 0; ++SR) { + RegNo = TRI->getDwarfRegNum(*SR, false); + } + // If this is a register location, put the subregister byte offset in + // the location offset. + if (Loc.LocType == Location::Register) { + assert(!Loc.Offset && "Register location should have zero offset"); + unsigned LLVMRegNo = MCRI.getLLVMRegNum(RegNo, false); + unsigned SubRegIdx = MCRI.getSubRegIndex(LLVMRegNo, Loc.Reg); + if (SubRegIdx) + Offset = MCRI.getSubRegIdxOffset(SubRegIdx); + } + } + else { + assert(Loc.LocType != Location::Register && + "Missing location register"); + } + AP.OutStreamer.EmitIntValue(Loc.LocType, 1); + AP.OutStreamer.EmitIntValue(Loc.Size, 1); + AP.OutStreamer.EmitIntValue(RegNo, 2); + AP.OutStreamer.EmitIntValue(Offset, 4); + } + } + + AP.OutStreamer.AddBlankLine(); + + CSInfos.clear(); +} diff --git a/contrib/llvm/lib/CodeGen/StackProtector.cpp b/contrib/llvm/lib/CodeGen/StackProtector.cpp index fbef347..9020449 100644 --- a/contrib/llvm/lib/CodeGen/StackProtector.cpp +++ b/contrib/llvm/lib/CodeGen/StackProtector.cpp @@ -15,147 +15,120 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "stack-protector" +#include "llvm/CodeGen/StackProtector.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/Triple.h" #include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/Attributes.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/GlobalValue.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/IR/Intrinsics.h" #include "llvm/IR/Module.h" -#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Target/TargetLowering.h" +#include using namespace llvm; STATISTIC(NumFunProtected, "Number of functions protected"); STATISTIC(NumAddrTaken, "Number of local variables that have their address" " taken."); -namespace { - class StackProtector : public FunctionPass { - /// TLI - Keep a pointer of a TargetLowering to consult for determining - /// target type sizes. - const TargetLoweringBase *TLI; - - Function *F; - Module *M; - - DominatorTree *DT; - - /// VisitedPHIs - The set of PHI nodes visited when determining - /// if a variable's reference has been taken. This set - /// is maintained to ensure we don't visit the same PHI node multiple - /// times. - SmallPtrSet VisitedPHIs; - - /// InsertStackProtectors - Insert code into the prologue and epilogue of - /// the function. - /// - /// - The prologue code loads and stores the stack guard onto the stack. - /// - The epilogue checks the value stored in the prologue against the - /// original value. It calls __stack_chk_fail if they differ. - bool InsertStackProtectors(); - - /// CreateFailBB - Create a basic block to jump to when the stack protector - /// check fails. - BasicBlock *CreateFailBB(); - - /// ContainsProtectableArray - Check whether the type either is an array or - /// contains an array of sufficient size so that we need stack protectors - /// for it. - bool ContainsProtectableArray(Type *Ty, bool Strong = false, - bool InStruct = false) const; - - /// \brief Check whether a stack allocation has its address taken. - bool HasAddressTaken(const Instruction *AI); - - /// RequiresStackProtector - Check whether or not this function needs a - /// stack protector based upon the stack protector level. - bool RequiresStackProtector(); - public: - static char ID; // Pass identification, replacement for typeid. - StackProtector() : FunctionPass(ID), TLI(0) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } - StackProtector(const TargetLoweringBase *tli) - : FunctionPass(ID), TLI(tli) { - initializeStackProtectorPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addPreserved(); - } - - virtual bool runOnFunction(Function &Fn); - }; -} // end anonymous namespace +static cl::opt EnableSelectionDAGSP("enable-selectiondag-sp", + cl::init(true), cl::Hidden); char StackProtector::ID = 0; -INITIALIZE_PASS(StackProtector, "stack-protector", - "Insert stack protectors", false, false) +INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors", + false, true) -FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) { - return new StackProtector(tli); +FunctionPass *llvm::createStackProtectorPass(const TargetMachine *TM) { + return new StackProtector(TM); +} + +StackProtector::SSPLayoutKind +StackProtector::getSSPLayout(const AllocaInst *AI) const { + return AI ? Layout.lookup(AI) : SSPLK_None; } bool StackProtector::runOnFunction(Function &Fn) { F = &Fn; M = F->getParent(); DT = getAnalysisIfAvailable(); + TLI = TM->getTargetLowering(); - if (!RequiresStackProtector()) return false; + if (!RequiresStackProtector()) + return false; + + Attribute Attr = Fn.getAttributes().getAttribute( + AttributeSet::FunctionIndex, "stack-protector-buffer-size"); + if (Attr.isStringAttribute()) + Attr.getValueAsString().getAsInteger(10, SSPBufferSize); ++NumFunProtected; return InsertStackProtectors(); } -/// ContainsProtectableArray - Check whether the type either is an array or -/// contains a char array of sufficient size so that we need stack protectors -/// for it. -bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong, +/// \param [out] IsLarge is set to true if a protectable array is found and +/// it is "large" ( >= ssp-buffer-size). In the case of a structure with +/// multiple arrays, this gets set if any of them is large. +bool StackProtector::ContainsProtectableArray(Type *Ty, bool &IsLarge, + bool Strong, bool InStruct) const { - if (!Ty) return false; + if (!Ty) + return false; if (ArrayType *AT = dyn_cast(Ty)) { - // In strong mode any array, regardless of type and size, triggers a - // protector - if (Strong) - return true; - const TargetMachine &TM = TLI->getTargetMachine(); if (!AT->getElementType()->isIntegerTy(8)) { - Triple Trip(TM.getTargetTriple()); - // If we're on a non-Darwin platform or we're inside of a structure, don't // add stack protectors unless the array is a character array. - if (InStruct || !Trip.isOSDarwin()) - return false; + // However, in strong mode any array, regardless of type and size, + // triggers a protector. + if (!Strong && (InStruct || !Trip.isOSDarwin())) + return false; } // If an array has more than SSPBufferSize bytes of allocated space, then we // emit stack protectors. - if (TM.Options.SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) + if (SSPBufferSize <= TLI->getDataLayout()->getTypeAllocSize(AT)) { + IsLarge = true; + return true; + } + + if (Strong) + // Require a protector for all arrays in strong mode return true; } const StructType *ST = dyn_cast(Ty); - if (!ST) return false; + if (!ST) + return false; + bool NeedsProtector = false; for (StructType::element_iterator I = ST->element_begin(), - E = ST->element_end(); I != E; ++I) - if (ContainsProtectableArray(*I, Strong, true)) - return true; + E = ST->element_end(); + I != E; ++I) + if (ContainsProtectableArray(*I, IsLarge, Strong, true)) { + // If the element is a protectable array and is large (>= SSPBufferSize) + // then we are done. If the protectable array is not large, then + // keep looking in case a subsequent element is a large array. + if (IsLarge) + return true; + NeedsProtector = true; + } - return false; + return NeedsProtector; } bool StackProtector::HasAddressTaken(const Instruction *AI) { for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); - UI != UE; ++UI) { + UI != UE; ++UI) { const User *U = *UI; if (const StoreInst *SI = dyn_cast(U)) { if (AI == SI->getValueOperand()) @@ -202,11 +175,13 @@ bool StackProtector::HasAddressTaken(const Instruction *AI) { /// address taken. bool StackProtector::RequiresStackProtector() { bool Strong = false; + bool NeedsProtector = false; if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectReq)) - return true; - else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, - Attribute::StackProtectStrong)) + Attribute::StackProtectReq)) { + NeedsProtector = true; + Strong = true; // Use the same heuristic as strong to determine SSPLayout + } else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) Strong = true; else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, Attribute::StackProtect)) @@ -215,38 +190,156 @@ bool StackProtector::RequiresStackProtector() { for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; - for (BasicBlock::iterator - II = BB->begin(), IE = BB->end(); II != IE; ++II) { + for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); II != IE; + ++II) { if (AllocaInst *AI = dyn_cast(II)) { if (AI->isArrayAllocation()) { // SSP-Strong: Enable protectors for any call to alloca, regardless // of size. if (Strong) return true; - + if (const ConstantInt *CI = - dyn_cast(AI->getArraySize())) { - unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize; - if (CI->getLimitedValue(BufferSize) >= BufferSize) + dyn_cast(AI->getArraySize())) { + if (CI->getLimitedValue(SSPBufferSize) >= SSPBufferSize) { // A call to alloca with size >= SSPBufferSize requires // stack protectors. - return true; - } else // A call to alloca with a variable size requires protectors. - return true; + Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + NeedsProtector = true; + } else if (Strong) { + // Require protectors for all alloca calls in strong mode. + Layout.insert(std::make_pair(AI, SSPLK_SmallArray)); + NeedsProtector = true; + } + } else { + // A call to alloca with a variable size requires protectors. + Layout.insert(std::make_pair(AI, SSPLK_LargeArray)); + NeedsProtector = true; + } + continue; } - if (ContainsProtectableArray(AI->getAllocatedType(), Strong)) - return true; + bool IsLarge = false; + if (ContainsProtectableArray(AI->getAllocatedType(), IsLarge, Strong)) { + Layout.insert(std::make_pair(AI, IsLarge ? SSPLK_LargeArray + : SSPLK_SmallArray)); + NeedsProtector = true; + continue; + } if (Strong && HasAddressTaken(AI)) { - ++NumAddrTaken; - return true; + ++NumAddrTaken; + Layout.insert(std::make_pair(AI, SSPLK_AddrOf)); + NeedsProtector = true; } } } } - return false; + return NeedsProtector; +} + +static bool InstructionWillNotHaveChain(const Instruction *I) { + return !I->mayHaveSideEffects() && !I->mayReadFromMemory() && + isSafeToSpeculativelyExecute(I); +} + +/// Identify if RI has a previous instruction in the "Tail Position" and return +/// it. Otherwise return 0. +/// +/// This is based off of the code in llvm::isInTailCallPosition. The difference +/// is that it inverts the first part of llvm::isInTailCallPosition since +/// isInTailCallPosition is checking if a call is in a tail call position, and +/// we are searching for an unknown tail call that might be in the tail call +/// position. Once we find the call though, the code uses the same refactored +/// code, returnTypeIsEligibleForTailCall. +static CallInst *FindPotentialTailCall(BasicBlock *BB, ReturnInst *RI, + const TargetLoweringBase *TLI) { + // Establish a reasonable upper bound on the maximum amount of instructions we + // will look through to find a tail call. + unsigned SearchCounter = 0; + const unsigned MaxSearch = 4; + bool NoInterposingChain = true; + + for (BasicBlock::reverse_iterator I = llvm::next(BB->rbegin()), + E = BB->rend(); + I != E && SearchCounter < MaxSearch; ++I) { + Instruction *Inst = &*I; + + // Skip over debug intrinsics and do not allow them to affect our MaxSearch + // counter. + if (isa(Inst)) + continue; + + // If we find a call and the following conditions are satisifed, then we + // have found a tail call that satisfies at least the target independent + // requirements of a tail call: + // + // 1. The call site has the tail marker. + // + // 2. The call site either will not cause the creation of a chain or if a + // chain is necessary there are no instructions in between the callsite and + // the call which would create an interposing chain. + // + // 3. The return type of the function does not impede tail call + // optimization. + if (CallInst *CI = dyn_cast(Inst)) { + if (CI->isTailCall() && + (InstructionWillNotHaveChain(CI) || NoInterposingChain) && + returnTypeIsEligibleForTailCall(BB->getParent(), CI, RI, *TLI)) + return CI; + } + + // If we did not find a call see if we have an instruction that may create + // an interposing chain. + NoInterposingChain = + NoInterposingChain && InstructionWillNotHaveChain(Inst); + + // Increment max search. + SearchCounter++; + } + + return 0; +} + +/// Insert code into the entry block that stores the __stack_chk_guard +/// variable onto the stack: +/// +/// entry: +/// StackGuardSlot = alloca i8* +/// StackGuard = load __stack_chk_guard +/// call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) +/// +/// Returns true if the platform/triple supports the stackprotectorcreate pseudo +/// node. +static bool CreatePrologue(Function *F, Module *M, ReturnInst *RI, + const TargetLoweringBase *TLI, const Triple &Trip, + AllocaInst *&AI, Value *&StackGuardVar) { + bool SupportsSelectionDAGSP = false; + PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); + unsigned AddressSpace, Offset; + if (TLI->getStackCookieLocation(AddressSpace, Offset)) { + Constant *OffsetVal = + ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); + + StackGuardVar = ConstantExpr::getIntToPtr( + OffsetVal, PointerType::get(PtrTy, AddressSpace)); + } else if (Trip.getOS() == llvm::Triple::OpenBSD) { + StackGuardVar = M->getOrInsertGlobal("__guard_local", PtrTy); + cast(StackGuardVar) + ->setVisibility(GlobalValue::HiddenVisibility); + } else { + SupportsSelectionDAGSP = true; + StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + } + + IRBuilder<> B(&F->getEntryBlock().front()); + AI = B.CreateAlloca(PtrTy, 0, "StackGuardSlot"); + LoadInst *LI = B.CreateLoad(StackGuardVar, "StackGuard"); + B.CreateCall2(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), LI, + AI); + + return SupportsSelectionDAGSP; } /// InsertStackProtectors - Insert code into the prologue and epilogue of the @@ -256,102 +349,102 @@ bool StackProtector::RequiresStackProtector() { /// - The epilogue checks the value stored in the prologue against the original /// value. It calls __stack_chk_fail if they differ. bool StackProtector::InsertStackProtectors() { - BasicBlock *FailBB = 0; // The basic block to jump to if check fails. - BasicBlock *FailBBDom = 0; // FailBB's dominator. - AllocaInst *AI = 0; // Place on stack that stores the stack guard. - Value *StackGuardVar = 0; // The stack guard variable. + bool HasPrologue = false; + bool SupportsSelectionDAGSP = + EnableSelectionDAGSP && !TM->Options.EnableFastISel; + AllocaInst *AI = 0; // Place on stack that stores the stack guard. + Value *StackGuardVar = 0; // The stack guard variable. - for (Function::iterator I = F->begin(), E = F->end(); I != E; ) { + for (Function::iterator I = F->begin(), E = F->end(); I != E;) { BasicBlock *BB = I++; ReturnInst *RI = dyn_cast(BB->getTerminator()); - if (!RI) continue; + if (!RI) + continue; - if (!FailBB) { - // Insert code into the entry block that stores the __stack_chk_guard - // variable onto the stack: - // - // entry: - // StackGuardSlot = alloca i8* - // StackGuard = load __stack_chk_guard - // call void @llvm.stackprotect.create(StackGuard, StackGuardSlot) - // - PointerType *PtrTy = Type::getInt8PtrTy(RI->getContext()); - unsigned AddressSpace, Offset; - if (TLI->getStackCookieLocation(AddressSpace, Offset)) { - Constant *OffsetVal = - ConstantInt::get(Type::getInt32Ty(RI->getContext()), Offset); - - StackGuardVar = ConstantExpr::getIntToPtr(OffsetVal, - PointerType::get(PtrTy, AddressSpace)); + if (!HasPrologue) { + HasPrologue = true; + SupportsSelectionDAGSP &= + CreatePrologue(F, M, RI, TLI, Trip, AI, StackGuardVar); + } + + if (SupportsSelectionDAGSP) { + // Since we have a potential tail call, insert the special stack check + // intrinsic. + Instruction *InsertionPt = 0; + if (CallInst *CI = FindPotentialTailCall(BB, RI, TLI)) { + InsertionPt = CI; } else { - StackGuardVar = M->getOrInsertGlobal("__stack_chk_guard", PtrTy); + InsertionPt = RI; + // At this point we know that BB has a return statement so it *DOES* + // have a terminator. + assert(InsertionPt != 0 && "BB must have a terminator instruction at " + "this point."); } - BasicBlock &Entry = F->getEntryBlock(); - Instruction *InsPt = &Entry.front(); - - AI = new AllocaInst(PtrTy, "StackGuardSlot", InsPt); - LoadInst *LI = new LoadInst(StackGuardVar, "StackGuard", false, InsPt); + Function *Intrinsic = + Intrinsic::getDeclaration(M, Intrinsic::stackprotectorcheck); + CallInst::Create(Intrinsic, StackGuardVar, "", InsertionPt); - Value *Args[] = { LI, AI }; - CallInst:: - Create(Intrinsic::getDeclaration(M, Intrinsic::stackprotector), - Args, "", InsPt); - - // Create the basic block to jump to when the guard check fails. - FailBB = CreateFailBB(); - } + } else { + // If we do not support SelectionDAG based tail calls, generate IR level + // tail calls. + // + // For each block with a return instruction, convert this: + // + // return: + // ... + // ret ... + // + // into this: + // + // return: + // ... + // %1 = load __stack_chk_guard + // %2 = load StackGuardSlot + // %3 = cmp i1 %1, %2 + // br i1 %3, label %SP_return, label %CallStackCheckFailBlk + // + // SP_return: + // ret ... + // + // CallStackCheckFailBlk: + // call void @__stack_chk_fail() + // unreachable + + // Create the FailBB. We duplicate the BB every time since the MI tail + // merge pass will merge together all of the various BB into one including + // fail BB generated by the stack protector pseudo instruction. + BasicBlock *FailBB = CreateFailBB(); + + // Split the basic block before the return instruction. + BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + + // Update the dominator tree if we need to. + if (DT && DT->isReachableFromEntry(BB)) { + DT->addNewBlock(NewBB, BB); + DT->addNewBlock(FailBB, BB); + } - // For each block with a return instruction, convert this: - // - // return: - // ... - // ret ... - // - // into this: - // - // return: - // ... - // %1 = load __stack_chk_guard - // %2 = load StackGuardSlot - // %3 = cmp i1 %1, %2 - // br i1 %3, label %SP_return, label %CallStackCheckFailBlk - // - // SP_return: - // ret ... - // - // CallStackCheckFailBlk: - // call void @__stack_chk_fail() - // unreachable + // Remove default branch instruction to the new BB. + BB->getTerminator()->eraseFromParent(); - // Split the basic block before the return instruction. - BasicBlock *NewBB = BB->splitBasicBlock(RI, "SP_return"); + // Move the newly created basic block to the point right after the old + // basic block so that it's in the "fall through" position. + NewBB->moveAfter(BB); - if (DT && DT->isReachableFromEntry(BB)) { - DT->addNewBlock(NewBB, BB); - FailBBDom = FailBBDom ? DT->findNearestCommonDominator(FailBBDom, BB) :BB; + // Generate the stack protector instructions in the old basic block. + IRBuilder<> B(BB); + LoadInst *LI1 = B.CreateLoad(StackGuardVar); + LoadInst *LI2 = B.CreateLoad(AI); + Value *Cmp = B.CreateICmpEQ(LI1, LI2); + B.CreateCondBr(Cmp, NewBB, FailBB); } - - // Remove default branch instruction to the new BB. - BB->getTerminator()->eraseFromParent(); - - // Move the newly created basic block to the point right after the old basic - // block so that it's in the "fall through" position. - NewBB->moveAfter(BB); - - // Generate the stack protector instructions in the old basic block. - LoadInst *LI1 = new LoadInst(StackGuardVar, "", false, BB); - LoadInst *LI2 = new LoadInst(AI, "", true, BB); - ICmpInst *Cmp = new ICmpInst(*BB, CmpInst::ICMP_EQ, LI1, LI2, ""); - BranchInst::Create(NewBB, FailBB, Cmp, BB); } // Return if we didn't modify any basic blocks. I.e., there are no return // statements in the function. - if (!FailBB) return false; - - if (DT && FailBBDom) - DT->addNewBlock(FailBB, FailBBDom); + if (!HasPrologue) + return false; return true; } @@ -359,12 +452,20 @@ bool StackProtector::InsertStackProtectors() { /// CreateFailBB - Create a basic block to jump to when the stack protector /// check fails. BasicBlock *StackProtector::CreateFailBB() { - BasicBlock *FailBB = BasicBlock::Create(F->getContext(), - "CallStackCheckFailBlk", F); - Constant *StackChkFail = - M->getOrInsertFunction("__stack_chk_fail", - Type::getVoidTy(F->getContext()), NULL); - CallInst::Create(StackChkFail, "", FailBB); - new UnreachableInst(F->getContext(), FailBB); + LLVMContext &Context = F->getContext(); + BasicBlock *FailBB = BasicBlock::Create(Context, "CallStackCheckFailBlk", F); + IRBuilder<> B(FailBB); + if (Trip.getOS() == llvm::Triple::OpenBSD) { + Constant *StackChkFail = M->getOrInsertFunction( + "__stack_smash_handler", Type::getVoidTy(Context), + Type::getInt8PtrTy(Context), NULL); + + B.CreateCall(StackChkFail, B.CreateGlobalStringPtr(F->getName(), "SSH")); + } else { + Constant *StackChkFail = M->getOrInsertFunction( + "__stack_chk_fail", Type::getVoidTy(Context), NULL); + B.CreateCall(StackChkFail); + } + B.CreateUnreachable(); return FailBB; } diff --git a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp index f951561..9f44df8 100644 --- a/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp +++ b/contrib/llvm/lib/CodeGen/StackSlotColoring.cpp @@ -14,20 +14,20 @@ #define DEBUG_TYPE "stackslotcoloring" #include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include @@ -48,13 +48,16 @@ namespace { LiveStacks* LS; MachineFrameInfo *MFI; const TargetInstrInfo *TII; - const MachineLoopInfo *loopInfo; + const MachineBlockFrequencyInfo *MBFI; // SSIntervals - Spill slot intervals. std::vector SSIntervals; - // SSRefs - Keep a list of frame index references for each spill slot. - SmallVector, 16> SSRefs; + // SSRefs - Keep a list of MachineMemOperands for each spill slot. + // MachineMemOperands can be shared between instructions, so we need + // to be careful that renames like [FI0, FI1] -> [FI1, FI2] do not + // become FI0 -> FI1 -> FI2. + SmallVector, 16> SSRefs; // OrigAlignments - Alignments of stack objects before coloring. SmallVector OrigAlignments; @@ -89,8 +92,8 @@ namespace { AU.addRequired(); AU.addPreserved(); AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); + AU.addRequired(); + AU.addPreserved(); AU.addPreservedID(MachineDominatorsID); MachineFunctionPass::getAnalysisUsage(AU); } @@ -103,7 +106,7 @@ namespace { bool OverlapWithAssignments(LiveInterval *li, int Color) const; int ColorSlot(LiveInterval *li); bool ColorSlots(MachineFunction &MF); - void RewriteInstruction(MachineInstr *MI, int OldFI, int NewFI, + void RewriteInstruction(MachineInstr *MI, SmallVectorImpl &SlotMapping, MachineFunction &MF); bool RemoveDeadStores(MachineBasicBlock* MBB); }; @@ -139,7 +142,7 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); MBBI != E; ++MBBI) { MachineBasicBlock *MBB = &*MBBI; - unsigned loopDepth = loopInfo->getLoopDepth(MBB); + BlockFrequency Freq = MBFI->getBlockFreq(MBB); for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); MII != EE; ++MII) { MachineInstr *MI = &*MII; @@ -154,8 +157,19 @@ void StackSlotColoring::ScanForSpillSlotRefs(MachineFunction &MF) { continue; LiveInterval &li = LS->getInterval(FI); if (!MI->isDebugValue()) - li.weight += LiveIntervals::getSpillWeight(false, true, loopDepth); - SSRefs[FI].push_back(MI); + li.weight += LiveIntervals::getSpillWeight(false, true, Freq); + } + for (MachineInstr::mmo_iterator MMOI = MI->memoperands_begin(), + EE = MI->memoperands_end(); MMOI != EE; ++MMOI) { + MachineMemOperand *MMO = *MMOI; + if (const Value *V = MMO->getValue()) { + if (const FixedStackPseudoSourceValue *FSV = + dyn_cast(V)) { + int FI = FSV->getFrameIndex(); + if (FI >= 0) + SSRefs[FI].push_back(MMO); + } + } } } } @@ -197,7 +211,7 @@ void StackSlotColoring::InitializeSlots() { /// LiveIntervals that have already been assigned to the specified color. bool StackSlotColoring::OverlapWithAssignments(LiveInterval *li, int Color) const { - const SmallVector &OtherLIs = Assignments[Color]; + const SmallVectorImpl &OtherLIs = Assignments[Color]; for (unsigned i = 0, e = OtherLIs.size(); i != e; ++i) { LiveInterval *OtherLI = OtherLIs[i]; if (OtherLI->overlaps(*li)) @@ -291,16 +305,26 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { if (!Changed) return false; - // Rewrite all MO_FrameIndex operands. - SmallVector, 4> NewDefs(MF.getNumBlockIDs()); + // Rewrite all MachineMemOperands. for (unsigned SS = 0, SE = SSRefs.size(); SS != SE; ++SS) { int NewFI = SlotMapping[SS]; if (NewFI == -1 || (NewFI == (int)SS)) continue; - SmallVector &RefMIs = SSRefs[SS]; - for (unsigned i = 0, e = RefMIs.size(); i != e; ++i) - RewriteInstruction(RefMIs[i], SS, NewFI, MF); + const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); + SmallVectorImpl &RefMMOs = SSRefs[SS]; + for (unsigned i = 0, e = RefMMOs.size(); i != e; ++i) + RefMMOs[i]->setValue(NewSV); + } + + // Rewrite all MO_FrameIndex operands. Look for dead stores. + for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end(); + MBBI != E; ++MBBI) { + MachineBasicBlock *MBB = &*MBBI; + for (MachineBasicBlock::iterator MII = MBB->begin(), EE = MBB->end(); + MII != EE; ++MII) + RewriteInstruction(MII, SlotMapping, MF); + RemoveDeadStores(MBB); } // Delete unused stack slots. @@ -315,28 +339,24 @@ bool StackSlotColoring::ColorSlots(MachineFunction &MF) { /// RewriteInstruction - Rewrite specified instruction by replacing references /// to old frame index with new one. -void StackSlotColoring::RewriteInstruction(MachineInstr *MI, int OldFI, - int NewFI, MachineFunction &MF) { +void StackSlotColoring::RewriteInstruction(MachineInstr *MI, + SmallVectorImpl &SlotMapping, + MachineFunction &MF) { // Update the operands. for (unsigned i = 0, ee = MI->getNumOperands(); i != ee; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isFI()) continue; - int FI = MO.getIndex(); - if (FI != OldFI) + int OldFI = MO.getIndex(); + if (OldFI < 0) + continue; + int NewFI = SlotMapping[OldFI]; + if (NewFI == -1 || NewFI == OldFI) continue; MO.setIndex(NewFI); } - // Update the memory references. This changes the MachineMemOperands - // directly. They may be in use by multiple instructions, however all - // instructions using OldFI are being rewritten to use NewFI. - const Value *OldSV = PseudoSourceValue::getFixedStack(OldFI); - const Value *NewSV = PseudoSourceValue::getFixedStack(NewFI); - for (MachineInstr::mmo_iterator I = MI->memoperands_begin(), - E = MI->memoperands_end(); I != E; ++I) - if ((*I)->getValue() == OldSV) - (*I)->setValue(NewSV); + // The MachineMemOperands have already been updated. } @@ -357,10 +377,19 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { if (DCELimit != -1 && (int)NumDead >= DCELimit) break; + int FirstSS, SecondSS; + if (TII->isStackSlotCopy(I, FirstSS, SecondSS) && + FirstSS == SecondSS && + FirstSS != -1) { + ++NumDead; + changed = true; + toErase.push_back(I); + continue; + } + MachineBasicBlock::iterator NextMI = llvm::next(I); if (NextMI == MBB->end()) continue; - int FirstSS, SecondSS; unsigned LoadReg = 0; unsigned StoreReg = 0; if (!(LoadReg = TII->isLoadFromStackSlot(I, FirstSS))) continue; @@ -379,7 +408,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) { ++I; } - for (SmallVector::iterator I = toErase.begin(), + for (SmallVectorImpl::iterator I = toErase.begin(), E = toErase.end(); I != E; ++I) (*I)->eraseFromParent(); @@ -396,7 +425,7 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { MFI = MF.getFrameInfo(); TII = MF.getTarget().getInstrInfo(); LS = &getAnalysis(); - loopInfo = &getAnalysis(); + MBFI = &getAnalysis(); bool Changed = false; @@ -430,10 +459,5 @@ bool StackSlotColoring::runOnMachineFunction(MachineFunction &MF) { Assignments[i].clear(); Assignments.clear(); - if (Changed) { - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= RemoveDeadStores(I); - } - return Changed; } diff --git a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp b/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp deleted file mode 100644 index b337c53..0000000 --- a/contrib/llvm/lib/CodeGen/StrongPHIElimination.cpp +++ /dev/null @@ -1,825 +0,0 @@ -//===- StrongPHIElimination.cpp - Eliminate PHI nodes by inserting copies -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This pass eliminates PHI instructions by aggressively coalescing the copies -// that would be inserted by a naive algorithm and only inserting the copies -// that are necessary. The coalescing technique initially assumes that all -// registers appearing in a PHI instruction do not interfere. It then eliminates -// proven interferences, using dominators to only perform a linear number of -// interference tests instead of the quadratic number of interference tests -// that this would naively require. This is a technique derived from: -// -// Budimlic, et al. Fast copy coalescing and live-range identification. -// In Proceedings of the ACM SIGPLAN 2002 Conference on Programming Language -// Design and Implementation (Berlin, Germany, June 17 - 19, 2002). -// PLDI '02. ACM, New York, NY, 25-32. -// -// The original implementation constructs a data structure they call a dominance -// forest for this purpose. The dominance forest was shown to be unnecessary, -// as it is possible to emulate the creation and traversal of a dominance forest -// by directly using the dominator tree, rather than actually constructing the -// dominance forest. This technique is explained in: -// -// Boissinot, et al. Revisiting Out-of-SSA Translation for Correctness, Code -// Quality and Efficiency, -// In Proceedings of the 7th annual IEEE/ACM International Symposium on Code -// Generation and Optimization (Seattle, Washington, March 22 - 25, 2009). -// CGO '09. IEEE, Washington, DC, 114-125. -// -// Careful implementation allows for all of the dominator forest interference -// checks to be performed at once in a single depth-first traversal of the -// dominator tree, which is what is implemented here. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "strongphielim" -#include "llvm/CodeGen/Passes.h" -#include "PHIEliminationUtils.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Target/TargetInstrInfo.h" -using namespace llvm; - -namespace { - class StrongPHIElimination : public MachineFunctionPass { - public: - static char ID; // Pass identification, replacement for typeid - StrongPHIElimination() : MachineFunctionPass(ID) { - initializeStrongPHIEliminationPass(*PassRegistry::getPassRegistry()); - } - - virtual void getAnalysisUsage(AnalysisUsage&) const; - bool runOnMachineFunction(MachineFunction&); - - private: - /// This struct represents a single node in the union-find data structure - /// representing the variable congruence classes. There is one difference - /// from a normal union-find data structure. We steal two bits from the parent - /// pointer . One of these bits is used to represent whether the register - /// itself has been isolated, and the other is used to represent whether the - /// PHI with that register as its destination has been isolated. - /// - /// Note that this leads to the strange situation where the leader of a - /// congruence class may no longer logically be a member, due to being - /// isolated. - struct Node { - enum Flags { - kRegisterIsolatedFlag = 1, - kPHIIsolatedFlag = 2 - }; - Node(unsigned v) : value(v), rank(0) { parent.setPointer(this); } - - Node *getLeader(); - - PointerIntPair parent; - unsigned value; - unsigned rank; - }; - - /// Add a register in a new congruence class containing only itself. - void addReg(unsigned); - - /// Join the congruence classes of two registers. This function is biased - /// towards the left argument, i.e. after - /// - /// addReg(r2); - /// unionRegs(r1, r2); - /// - /// the leader of the unioned congruence class is the same as the leader of - /// r1's congruence class prior to the union. This is actually relied upon - /// in the copy insertion code. - void unionRegs(unsigned, unsigned); - - /// Get the color of a register. The color is 0 if the register has been - /// isolated. - unsigned getRegColor(unsigned); - - // Isolate a register. - void isolateReg(unsigned); - - /// Get the color of a PHI. The color of a PHI is 0 if the PHI has been - /// isolated. Otherwise, it is the original color of its destination and - /// all of its operands (before they were isolated, if they were). - unsigned getPHIColor(MachineInstr*); - - /// Isolate a PHI. - void isolatePHI(MachineInstr*); - - /// Traverses a basic block, splitting any interferences found between - /// registers in the same congruence class. It takes two DenseMaps as - /// arguments that it also updates: CurrentDominatingParent, which maps - /// a color to the register in that congruence class whose definition was - /// most recently seen, and ImmediateDominatingParent, which maps a register - /// to the register in the same congruence class that most immediately - /// dominates it. - /// - /// This function assumes that it is being called in a depth-first traversal - /// of the dominator tree. - void SplitInterferencesForBasicBlock( - MachineBasicBlock&, - DenseMap &CurrentDominatingParent, - DenseMap &ImmediateDominatingParent); - - // Lowers a PHI instruction, inserting copies of the source and destination - // registers as necessary. - void InsertCopiesForPHI(MachineInstr*, MachineBasicBlock*); - - // Merges the live interval of Reg into NewReg and renames Reg to NewReg - // everywhere that Reg appears. Requires Reg and NewReg to have non- - // overlapping lifetimes. - void MergeLIsAndRename(unsigned Reg, unsigned NewReg); - - MachineRegisterInfo *MRI; - const TargetInstrInfo *TII; - MachineDominatorTree *DT; - LiveIntervals *LI; - - BumpPtrAllocator Allocator; - - DenseMap RegNodeMap; - - // Maps a basic block to a list of its defs of registers that appear as PHI - // sources. - DenseMap > PHISrcDefs; - - // Maps a color to a pair of a MachineInstr* and a virtual register, which - // is the operand of that PHI corresponding to the current basic block. - DenseMap > CurrentPHIForColor; - - // FIXME: Can these two data structures be combined? Would a std::multimap - // be any better? - - // Stores pairs of predecessor basic blocks and the source registers of - // inserted copy instructions. - typedef DenseSet > SrcCopySet; - SrcCopySet InsertedSrcCopySet; - - // Maps pairs of predecessor basic blocks and colors to their defining copy - // instructions. - typedef DenseMap, MachineInstr*> - SrcCopyMap; - SrcCopyMap InsertedSrcCopyMap; - - // Maps inserted destination copy registers to their defining copy - // instructions. - typedef DenseMap DestCopyMap; - DestCopyMap InsertedDestCopies; - }; - - struct MIIndexCompare { - MIIndexCompare(LiveIntervals *LiveIntervals) : LI(LiveIntervals) { } - - bool operator()(const MachineInstr *LHS, const MachineInstr *RHS) const { - return LI->getInstructionIndex(LHS) < LI->getInstructionIndex(RHS); - } - - LiveIntervals *LI; - }; -} // namespace - -STATISTIC(NumPHIsLowered, "Number of PHIs lowered"); -STATISTIC(NumDestCopiesInserted, "Number of destination copies inserted"); -STATISTIC(NumSrcCopiesInserted, "Number of source copies inserted"); - -char StrongPHIElimination::ID = 0; -INITIALIZE_PASS_BEGIN(StrongPHIElimination, "strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently", false, false) -INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_END(StrongPHIElimination, "strong-phi-node-elimination", - "Eliminate PHI nodes for register allocation, intelligently", false, false) - -char &llvm::StrongPHIEliminationID = StrongPHIElimination::ID; - -void StrongPHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - AU.addRequired(); - AU.addRequired(); - AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -static MachineOperand *findLastUse(MachineBasicBlock *MBB, unsigned Reg) { - // FIXME: This only needs to check from the first terminator, as only the - // first terminator can use a virtual register. - for (MachineBasicBlock::reverse_iterator RI = MBB->rbegin(); ; ++RI) { - assert (RI != MBB->rend()); - MachineInstr *MI = &*RI; - - for (MachineInstr::mop_iterator OI = MI->operands_begin(), - OE = MI->operands_end(); OI != OE; ++OI) { - MachineOperand &MO = *OI; - if (MO.isReg() && MO.isUse() && MO.getReg() == Reg) - return &MO; - } - } -} - -bool StrongPHIElimination::runOnMachineFunction(MachineFunction &MF) { - MRI = &MF.getRegInfo(); - TII = MF.getTarget().getInstrInfo(); - DT = &getAnalysis(); - LI = &getAnalysis(); - - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - unsigned DestReg = BBI->getOperand(0).getReg(); - addReg(DestReg); - PHISrcDefs[I].push_back(BBI); - - for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { - MachineOperand &SrcMO = BBI->getOperand(i); - unsigned SrcReg = SrcMO.getReg(); - addReg(SrcReg); - unionRegs(DestReg, SrcReg); - - MachineInstr *DefMI = MRI->getVRegDef(SrcReg); - if (DefMI) - PHISrcDefs[DefMI->getParent()].push_back(DefMI); - } - } - } - - // Perform a depth-first traversal of the dominator tree, splitting - // interferences amongst PHI-congruence classes. - DenseMap CurrentDominatingParent; - DenseMap ImmediateDominatingParent; - for (df_iterator DI = df_begin(DT->getRootNode()), - DE = df_end(DT->getRootNode()); DI != DE; ++DI) { - SplitInterferencesForBasicBlock(*DI->getBlock(), - CurrentDominatingParent, - ImmediateDominatingParent); - } - - // Insert copies for all PHI source and destination registers. - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - InsertCopiesForPHI(BBI, I); - } - } - - // FIXME: Preserve the equivalence classes during copy insertion and use - // the preversed equivalence classes instead of recomputing them. - RegNodeMap.clear(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - for (MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - unsigned DestReg = BBI->getOperand(0).getReg(); - addReg(DestReg); - - for (unsigned i = 1; i < BBI->getNumOperands(); i += 2) { - unsigned SrcReg = BBI->getOperand(i).getReg(); - addReg(SrcReg); - unionRegs(DestReg, SrcReg); - } - } - } - - DenseMap RegRenamingMap; - bool Changed = false; - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); - I != E; ++I) { - MachineBasicBlock::iterator BBI = I->begin(), BBE = I->end(); - while (BBI != BBE && BBI->isPHI()) { - MachineInstr *PHI = BBI; - - assert(PHI->getNumOperands() > 0); - - unsigned SrcReg = PHI->getOperand(1).getReg(); - unsigned SrcColor = getRegColor(SrcReg); - unsigned NewReg = RegRenamingMap[SrcColor]; - if (!NewReg) { - NewReg = SrcReg; - RegRenamingMap[SrcColor] = SrcReg; - } - MergeLIsAndRename(SrcReg, NewReg); - - unsigned DestReg = PHI->getOperand(0).getReg(); - if (!InsertedDestCopies.count(DestReg)) - MergeLIsAndRename(DestReg, NewReg); - - for (unsigned i = 3; i < PHI->getNumOperands(); i += 2) { - unsigned SrcReg = PHI->getOperand(i).getReg(); - MergeLIsAndRename(SrcReg, NewReg); - } - - ++BBI; - LI->RemoveMachineInstrFromMaps(PHI); - PHI->eraseFromParent(); - Changed = true; - } - } - - // Due to the insertion of copies to split live ranges, the live intervals are - // guaranteed to not overlap, except in one case: an original PHI source and a - // PHI destination copy. In this case, they have the same value and thus don't - // truly intersect, so we merge them into the value live at that point. - // FIXME: Is there some better way we can handle this? - for (DestCopyMap::iterator I = InsertedDestCopies.begin(), - E = InsertedDestCopies.end(); I != E; ++I) { - unsigned DestReg = I->first; - unsigned DestColor = getRegColor(DestReg); - unsigned NewReg = RegRenamingMap[DestColor]; - - LiveInterval &DestLI = LI->getInterval(DestReg); - LiveInterval &NewLI = LI->getInterval(NewReg); - - assert(DestLI.ranges.size() == 1 - && "PHI destination copy's live interval should be a single live " - "range from the beginning of the BB to the copy instruction."); - LiveRange *DestLR = DestLI.begin(); - VNInfo *NewVNI = NewLI.getVNInfoAt(DestLR->start); - if (!NewVNI) { - NewVNI = NewLI.createValueCopy(DestLR->valno, LI->getVNInfoAllocator()); - MachineInstr *CopyInstr = I->second; - CopyInstr->getOperand(1).setIsKill(true); - } - - LiveRange NewLR(DestLR->start, DestLR->end, NewVNI); - NewLI.addRange(NewLR); - - LI->removeInterval(DestReg); - MRI->replaceRegWith(DestReg, NewReg); - } - - // Adjust the live intervals of all PHI source registers to handle the case - // where the PHIs in successor blocks were the only later uses of the source - // register. - for (SrcCopySet::iterator I = InsertedSrcCopySet.begin(), - E = InsertedSrcCopySet.end(); I != E; ++I) { - MachineBasicBlock *MBB = I->first; - unsigned SrcReg = I->second; - if (unsigned RenamedRegister = RegRenamingMap[getRegColor(SrcReg)]) - SrcReg = RenamedRegister; - - LiveInterval &SrcLI = LI->getInterval(SrcReg); - - bool isLiveOut = false; - for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), - SE = MBB->succ_end(); SI != SE; ++SI) { - if (SrcLI.liveAt(LI->getMBBStartIdx(*SI))) { - isLiveOut = true; - break; - } - } - - if (isLiveOut) - continue; - - MachineOperand *LastUse = findLastUse(MBB, SrcReg); - assert(LastUse); - SlotIndex LastUseIndex = LI->getInstructionIndex(LastUse->getParent()); - SrcLI.removeRange(LastUseIndex.getRegSlot(), LI->getMBBEndIdx(MBB)); - LastUse->setIsKill(true); - } - - Allocator.Reset(); - RegNodeMap.clear(); - PHISrcDefs.clear(); - InsertedSrcCopySet.clear(); - InsertedSrcCopyMap.clear(); - InsertedDestCopies.clear(); - - return Changed; -} - -void StrongPHIElimination::addReg(unsigned Reg) { - Node *&N = RegNodeMap[Reg]; - if (!N) - N = new (Allocator) Node(Reg); -} - -StrongPHIElimination::Node* -StrongPHIElimination::Node::getLeader() { - Node *N = this; - Node *Parent = parent.getPointer(); - Node *Grandparent = Parent->parent.getPointer(); - - while (Parent != Grandparent) { - N->parent.setPointer(Grandparent); - N = Grandparent; - Parent = Parent->parent.getPointer(); - Grandparent = Parent->parent.getPointer(); - } - - return Parent; -} - -unsigned StrongPHIElimination::getRegColor(unsigned Reg) { - DenseMap::iterator RI = RegNodeMap.find(Reg); - if (RI == RegNodeMap.end()) - return 0; - Node *Node = RI->second; - if (Node->parent.getInt() & Node::kRegisterIsolatedFlag) - return 0; - return Node->getLeader()->value; -} - -void StrongPHIElimination::unionRegs(unsigned Reg1, unsigned Reg2) { - Node *Node1 = RegNodeMap[Reg1]->getLeader(); - Node *Node2 = RegNodeMap[Reg2]->getLeader(); - - if (Node1->rank > Node2->rank) { - Node2->parent.setPointer(Node1->getLeader()); - } else if (Node1->rank < Node2->rank) { - Node1->parent.setPointer(Node2->getLeader()); - } else if (Node1 != Node2) { - Node2->parent.setPointer(Node1->getLeader()); - Node1->rank++; - } -} - -void StrongPHIElimination::isolateReg(unsigned Reg) { - Node *Node = RegNodeMap[Reg]; - Node->parent.setInt(Node->parent.getInt() | Node::kRegisterIsolatedFlag); -} - -unsigned StrongPHIElimination::getPHIColor(MachineInstr *PHI) { - assert(PHI->isPHI()); - - unsigned DestReg = PHI->getOperand(0).getReg(); - Node *DestNode = RegNodeMap[DestReg]; - if (DestNode->parent.getInt() & Node::kPHIIsolatedFlag) - return 0; - - for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { - unsigned SrcColor = getRegColor(PHI->getOperand(i).getReg()); - if (SrcColor) - return SrcColor; - } - return 0; -} - -void StrongPHIElimination::isolatePHI(MachineInstr *PHI) { - assert(PHI->isPHI()); - Node *Node = RegNodeMap[PHI->getOperand(0).getReg()]; - Node->parent.setInt(Node->parent.getInt() | Node::kPHIIsolatedFlag); -} - -/// SplitInterferencesForBasicBlock - traverses a basic block, splitting any -/// interferences found between registers in the same congruence class. It -/// takes two DenseMaps as arguments that it also updates: -/// -/// 1) CurrentDominatingParent, which maps a color to the register in that -/// congruence class whose definition was most recently seen. -/// -/// 2) ImmediateDominatingParent, which maps a register to the register in the -/// same congruence class that most immediately dominates it. -/// -/// This function assumes that it is being called in a depth-first traversal -/// of the dominator tree. -/// -/// The algorithm used here is a generalization of the dominance-based SSA test -/// for two variables. If there are variables a_1, ..., a_n such that -/// -/// def(a_1) dom ... dom def(a_n), -/// -/// then we can test for an interference between any two a_i by only using O(n) -/// interference tests between pairs of variables. If i < j and a_i and a_j -/// interfere, then a_i is alive at def(a_j), so it is also alive at def(a_i+1). -/// Thus, in order to test for an interference involving a_i, we need only check -/// for a potential interference with a_i+1. -/// -/// This method can be generalized to arbitrary sets of variables by performing -/// a depth-first traversal of the dominator tree. As we traverse down a branch -/// of the dominator tree, we keep track of the current dominating variable and -/// only perform an interference test with that variable. However, when we go to -/// another branch of the dominator tree, the definition of the current dominating -/// variable may no longer dominate the current block. In order to correct this, -/// we need to use a stack of past choices of the current dominating variable -/// and pop from this stack until we find a variable whose definition actually -/// dominates the current block. -/// -/// There will be one push on this stack for each variable that has become the -/// current dominating variable, so instead of using an explicit stack we can -/// simply associate the previous choice for a current dominating variable with -/// the new choice. This works better in our implementation, where we test for -/// interference in multiple distinct sets at once. -void -StrongPHIElimination::SplitInterferencesForBasicBlock( - MachineBasicBlock &MBB, - DenseMap &CurrentDominatingParent, - DenseMap &ImmediateDominatingParent) { - // Sort defs by their order in the original basic block, as the code below - // assumes that it is processing definitions in dominance order. - std::vector &DefInstrs = PHISrcDefs[&MBB]; - std::sort(DefInstrs.begin(), DefInstrs.end(), MIIndexCompare(LI)); - - for (std::vector::const_iterator BBI = DefInstrs.begin(), - BBE = DefInstrs.end(); BBI != BBE; ++BBI) { - for (MachineInstr::const_mop_iterator I = (*BBI)->operands_begin(), - E = (*BBI)->operands_end(); I != E; ++I) { - const MachineOperand &MO = *I; - - // FIXME: This would be faster if it were possible to bail out of checking - // an instruction's operands after the explicit defs, but this is incorrect - // for variadic instructions, which may appear before register allocation - // in the future. - if (!MO.isReg() || !MO.isDef()) - continue; - - unsigned DestReg = MO.getReg(); - if (!DestReg || !TargetRegisterInfo::isVirtualRegister(DestReg)) - continue; - - // If the virtual register being defined is not used in any PHI or has - // already been isolated, then there are no more interferences to check. - unsigned DestColor = getRegColor(DestReg); - if (!DestColor) - continue; - - // The input to this pass sometimes is not in SSA form in every basic - // block, as some virtual registers have redefinitions. We could eliminate - // this by fixing the passes that generate the non-SSA code, or we could - // handle it here by tracking defining machine instructions rather than - // virtual registers. For now, we just handle the situation conservatively - // in a way that will possibly lead to false interferences. - unsigned &CurrentParent = CurrentDominatingParent[DestColor]; - unsigned NewParent = CurrentParent; - if (NewParent == DestReg) - continue; - - // Pop registers from the stack represented by ImmediateDominatingParent - // until we find a parent that dominates the current instruction. - while (NewParent && (!DT->dominates(MRI->getVRegDef(NewParent), *BBI) - || !getRegColor(NewParent))) - NewParent = ImmediateDominatingParent[NewParent]; - - // If NewParent is nonzero, then its definition dominates the current - // instruction, so it is only necessary to check for the liveness of - // NewParent in order to check for an interference. - if (NewParent - && LI->getInterval(NewParent).liveAt(LI->getInstructionIndex(*BBI))) { - // If there is an interference, always isolate the new register. This - // could be improved by using a heuristic that decides which of the two - // registers to isolate. - isolateReg(DestReg); - CurrentParent = NewParent; - } else { - // If there is no interference, update ImmediateDominatingParent and set - // the CurrentDominatingParent for this color to the current register. - ImmediateDominatingParent[DestReg] = NewParent; - CurrentParent = DestReg; - } - } - } - - // We now walk the PHIs in successor blocks and check for interferences. This - // is necessary because the use of a PHI's operands are logically contained in - // the predecessor block. The def of a PHI's destination register is processed - // along with the other defs in a basic block. - - CurrentPHIForColor.clear(); - - for (MachineBasicBlock::succ_iterator SI = MBB.succ_begin(), - SE = MBB.succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::iterator BBI = (*SI)->begin(), BBE = (*SI)->end(); - BBI != BBE && BBI->isPHI(); ++BBI) { - MachineInstr *PHI = BBI; - - // If a PHI is already isolated, either by being isolated directly or - // having all of its operands isolated, ignore it. - unsigned Color = getPHIColor(PHI); - if (!Color) - continue; - - // Find the index of the PHI operand that corresponds to this basic block. - unsigned PredIndex; - for (PredIndex = 1; PredIndex < PHI->getNumOperands(); PredIndex += 2) { - if (PHI->getOperand(PredIndex + 1).getMBB() == &MBB) - break; - } - assert(PredIndex < PHI->getNumOperands()); - unsigned PredOperandReg = PHI->getOperand(PredIndex).getReg(); - - // Pop registers from the stack represented by ImmediateDominatingParent - // until we find a parent that dominates the current instruction. - unsigned &CurrentParent = CurrentDominatingParent[Color]; - unsigned NewParent = CurrentParent; - while (NewParent - && (!DT->dominates(MRI->getVRegDef(NewParent)->getParent(), &MBB) - || !getRegColor(NewParent))) - NewParent = ImmediateDominatingParent[NewParent]; - CurrentParent = NewParent; - - // If there is an interference with a register, always isolate the - // register rather than the PHI. It is also possible to isolate the - // PHI, but that introduces copies for all of the registers involved - // in that PHI. - if (NewParent && LI->isLiveOutOfMBB(LI->getInterval(NewParent), &MBB) - && NewParent != PredOperandReg) - isolateReg(NewParent); - - std::pair - &CurrentPHI = CurrentPHIForColor[Color]; - - // If two PHIs have the same operand from every shared predecessor, then - // they don't actually interfere. Otherwise, isolate the current PHI. This - // could possibly be improved, e.g. we could isolate the PHI with the - // fewest operands. - if (CurrentPHI.first && CurrentPHI.second != PredOperandReg) - isolatePHI(PHI); - else - CurrentPHI = std::make_pair(PHI, PredOperandReg); - } - } -} - -void StrongPHIElimination::InsertCopiesForPHI(MachineInstr *PHI, - MachineBasicBlock *MBB) { - assert(PHI->isPHI()); - ++NumPHIsLowered; - unsigned PHIColor = getPHIColor(PHI); - - for (unsigned i = 1; i < PHI->getNumOperands(); i += 2) { - MachineOperand &SrcMO = PHI->getOperand(i); - - // If a source is defined by an implicit def, there is no need to insert a - // copy in the predecessor. - if (SrcMO.isUndef()) - continue; - - unsigned SrcReg = SrcMO.getReg(); - assert(TargetRegisterInfo::isVirtualRegister(SrcReg) && - "Machine PHI Operands must all be virtual registers!"); - - MachineBasicBlock *PredBB = PHI->getOperand(i + 1).getMBB(); - unsigned SrcColor = getRegColor(SrcReg); - - // If neither the PHI nor the operand were isolated, then we only need to - // set the phi-kill flag on the VNInfo at this PHI. - if (PHIColor && SrcColor == PHIColor) { - LiveInterval &SrcInterval = LI->getInterval(SrcReg); - SlotIndex PredIndex = LI->getMBBEndIdx(PredBB); - VNInfo *SrcVNI = SrcInterval.getVNInfoBefore(PredIndex); - (void)SrcVNI; - assert(SrcVNI); - continue; - } - - unsigned CopyReg = 0; - if (PHIColor) { - SrcCopyMap::const_iterator I - = InsertedSrcCopyMap.find(std::make_pair(PredBB, PHIColor)); - CopyReg - = I != InsertedSrcCopyMap.end() ? I->second->getOperand(0).getReg() : 0; - } - - if (!CopyReg) { - const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); - CopyReg = MRI->createVirtualRegister(RC); - - MachineBasicBlock::iterator - CopyInsertPoint = findPHICopyInsertPoint(PredBB, MBB, SrcReg); - unsigned SrcSubReg = SrcMO.getSubReg(); - MachineInstr *CopyInstr = BuildMI(*PredBB, - CopyInsertPoint, - PHI->getDebugLoc(), - TII->get(TargetOpcode::COPY), - CopyReg).addReg(SrcReg, 0, SrcSubReg); - LI->InsertMachineInstrInMaps(CopyInstr); - ++NumSrcCopiesInserted; - - // addLiveRangeToEndOfBlock() also adds the phikill flag to the VNInfo for - // the newly added range. - LI->addLiveRangeToEndOfBlock(CopyReg, CopyInstr); - InsertedSrcCopySet.insert(std::make_pair(PredBB, SrcReg)); - - addReg(CopyReg); - if (PHIColor) { - unionRegs(PHIColor, CopyReg); - assert(getRegColor(CopyReg) != CopyReg); - } else { - PHIColor = CopyReg; - assert(getRegColor(CopyReg) == CopyReg); - } - - // Insert into map if not already there. - InsertedSrcCopyMap.insert(std::make_pair(std::make_pair(PredBB, PHIColor), - CopyInstr)); - } - - SrcMO.setReg(CopyReg); - - // If SrcReg is not live beyond the PHI, trim its interval so that it is no - // longer live-in to MBB. Note that SrcReg may appear in other PHIs that are - // processed later, but this is still correct to do at this point because we - // never rely on LiveIntervals being correct while inserting copies. - // FIXME: Should this just count uses at PHIs like the normal PHIElimination - // pass does? - LiveInterval &SrcLI = LI->getInterval(SrcReg); - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - SlotIndex NextInstrIndex = PHIIndex.getNextIndex(); - if (SrcLI.liveAt(MBBStartIndex) && SrcLI.expiredAt(NextInstrIndex)) - SrcLI.removeRange(MBBStartIndex, PHIIndex, true); - } - - unsigned DestReg = PHI->getOperand(0).getReg(); - unsigned DestColor = getRegColor(DestReg); - - if (PHIColor && DestColor == PHIColor) { - LiveInterval &DestLI = LI->getInterval(DestReg); - - // Set the phi-def flag for the VN at this PHI. - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - VNInfo *DestVNI = DestLI.getVNInfoAt(PHIIndex.getRegSlot()); - assert(DestVNI); - - // Prior to PHI elimination, the live ranges of PHIs begin at their defining - // instruction. After PHI elimination, PHI instructions are replaced by VNs - // with the phi-def flag set, and the live ranges of these VNs start at the - // beginning of the basic block. - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - DestVNI->def = MBBStartIndex; - DestLI.addRange(LiveRange(MBBStartIndex, - PHIIndex.getRegSlot(), - DestVNI)); - return; - } - - const TargetRegisterClass *RC = MRI->getRegClass(DestReg); - unsigned CopyReg = MRI->createVirtualRegister(RC); - - MachineInstr *CopyInstr = BuildMI(*MBB, - MBB->SkipPHIsAndLabels(MBB->begin()), - PHI->getDebugLoc(), - TII->get(TargetOpcode::COPY), - DestReg).addReg(CopyReg); - LI->InsertMachineInstrInMaps(CopyInstr); - PHI->getOperand(0).setReg(CopyReg); - ++NumDestCopiesInserted; - - // Add the region from the beginning of MBB to the copy instruction to - // CopyReg's live interval, and give the VNInfo the phidef flag. - LiveInterval &CopyLI = LI->getOrCreateInterval(CopyReg); - SlotIndex MBBStartIndex = LI->getMBBStartIdx(MBB); - SlotIndex DestCopyIndex = LI->getInstructionIndex(CopyInstr); - VNInfo *CopyVNI = CopyLI.getNextValue(MBBStartIndex, - LI->getVNInfoAllocator()); - CopyLI.addRange(LiveRange(MBBStartIndex, - DestCopyIndex.getRegSlot(), - CopyVNI)); - - // Adjust DestReg's live interval to adjust for its new definition at - // CopyInstr. - LiveInterval &DestLI = LI->getOrCreateInterval(DestReg); - SlotIndex PHIIndex = LI->getInstructionIndex(PHI); - DestLI.removeRange(PHIIndex.getRegSlot(), DestCopyIndex.getRegSlot()); - - VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); - assert(DestVNI); - DestVNI->def = DestCopyIndex.getRegSlot(); - - InsertedDestCopies[CopyReg] = CopyInstr; -} - -void StrongPHIElimination::MergeLIsAndRename(unsigned Reg, unsigned NewReg) { - if (Reg == NewReg) - return; - - LiveInterval &OldLI = LI->getInterval(Reg); - LiveInterval &NewLI = LI->getInterval(NewReg); - - // Merge the live ranges of the two registers. - DenseMap VNMap; - for (LiveInterval::iterator LRI = OldLI.begin(), LRE = OldLI.end(); - LRI != LRE; ++LRI) { - LiveRange OldLR = *LRI; - VNInfo *OldVN = OldLR.valno; - - VNInfo *&NewVN = VNMap[OldVN]; - if (!NewVN) { - NewVN = NewLI.createValueCopy(OldVN, LI->getVNInfoAllocator()); - VNMap[OldVN] = NewVN; - } - - LiveRange LR(OldLR.start, OldLR.end, NewVN); - NewLI.addRange(LR); - } - - // Remove the LiveInterval for the register being renamed and replace all - // of its defs and uses with the new register. - LI->removeInterval(Reg); - MRI->replaceRegWith(Reg, NewReg); -} diff --git a/contrib/llvm/lib/CodeGen/TailDuplication.cpp b/contrib/llvm/lib/CodeGen/TailDuplication.cpp index 1ec8817..ff0181e 100644 --- a/contrib/llvm/lib/CodeGen/TailDuplication.cpp +++ b/contrib/llvm/lib/CodeGen/TailDuplication.cpp @@ -86,7 +86,7 @@ namespace { void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, DenseMap &LocalVRMap, - SmallVector, 4> &Copies, + SmallVectorImpl > &Copies, const DenseSet &UsedByPhi, bool Remove); void DuplicateInstruction(MachineInstr *MI, @@ -96,7 +96,7 @@ namespace { DenseMap &LocalVRMap, const DenseSet &UsedByPhi); void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVector &TDBBs, + SmallVectorImpl &TDBBs, SmallSetVector &Succs); bool TailDuplicateBlocks(MachineFunction &MF); bool shouldTailDuplicate(const MachineFunction &MF, @@ -104,14 +104,14 @@ namespace { bool isSimpleBB(MachineBasicBlock *TailBB); bool canCompletelyDuplicateBB(MachineBasicBlock &BB); bool duplicateSimpleBB(MachineBasicBlock *TailBB, - SmallVector &TDBBs, + SmallVectorImpl &TDBBs, const DenseSet &RegsUsedByPhi, - SmallVector &Copies); + SmallVectorImpl &Copies); bool TailDuplicate(MachineBasicBlock *TailBB, bool IsSimple, MachineFunction &MF, - SmallVector &TDBBs, - SmallVector &Copies); + SmallVectorImpl &TDBBs, + SmallVectorImpl &Copies); bool TailDuplicateAndUpdate(MachineBasicBlock *MBB, bool IsSimple, MachineFunction &MF); @@ -382,13 +382,11 @@ void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg, /// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB. /// Remember the source register that's contributed by PredBB and update SSA /// update map. -void TailDuplicatePass::ProcessPHI(MachineInstr *MI, - MachineBasicBlock *TailBB, - MachineBasicBlock *PredBB, - DenseMap &LocalVRMap, - SmallVector, 4> &Copies, - const DenseSet &RegsUsedByPhi, - bool Remove) { +void TailDuplicatePass::ProcessPHI( + MachineInstr *MI, MachineBasicBlock *TailBB, MachineBasicBlock *PredBB, + DenseMap &LocalVRMap, + SmallVectorImpl > &Copies, + const DenseSet &RegsUsedByPhi, bool Remove) { unsigned DefReg = MI->getOperand(0).getReg(); unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB); assert(SrcOpIdx && "Unable to find matching PHI source?"); @@ -452,7 +450,7 @@ void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI, /// instructions in them accordingly. void TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, - SmallVector &TDBBs, + SmallVectorImpl &TDBBs, SmallSetVector &Succs) { for (SmallSetVector::iterator SI = Succs.begin(), SE = Succs.end(); SI != SE; ++SI) { @@ -640,8 +638,6 @@ bothUsedInPHI(const MachineBasicBlock &A, bool TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { - SmallPtrSet Succs(BB.succ_begin(), BB.succ_end()); - for (MachineBasicBlock::pred_iterator PI = BB.pred_begin(), PE = BB.pred_end(); PI != PE; ++PI) { MachineBasicBlock *PredBB = *PI; @@ -662,9 +658,9 @@ TailDuplicatePass::canCompletelyDuplicateBB(MachineBasicBlock &BB) { bool TailDuplicatePass::duplicateSimpleBB(MachineBasicBlock *TailBB, - SmallVector &TDBBs, - const DenseSet &UsedByPhi, - SmallVector &Copies) { + SmallVectorImpl &TDBBs, + const DenseSet &UsedByPhi, + SmallVectorImpl &Copies) { SmallPtrSet Succs(TailBB->succ_begin(), TailBB->succ_end()); SmallVector Preds(TailBB->pred_begin(), @@ -742,8 +738,8 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, bool IsSimple, MachineFunction &MF, - SmallVector &TDBBs, - SmallVector &Copies) { + SmallVectorImpl &TDBBs, + SmallVectorImpl &Copies) { DEBUG(dbgs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n'); DenseSet UsedByPhi; diff --git a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp index 20eb918..bf4fd65 100644 --- a/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetInstrInfo.cpp @@ -17,6 +17,7 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" @@ -276,6 +277,36 @@ bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, return false; } +bool TargetInstrInfo::getStackSlotRange(const TargetRegisterClass *RC, + unsigned SubIdx, unsigned &Size, + unsigned &Offset, + const TargetMachine *TM) const { + if (!SubIdx) { + Size = RC->getSize(); + Offset = 0; + return true; + } + unsigned BitSize = TM->getRegisterInfo()->getSubRegIdxSize(SubIdx); + // Convert bit size to byte size to be consistent with + // MCRegisterClass::getSize(). + if (BitSize % 8) + return false; + + int BitOffset = TM->getRegisterInfo()->getSubRegIdxOffset(SubIdx); + if (BitOffset < 0 || BitOffset % 8) + return false; + + Size = BitSize /= 8; + Offset = (unsigned)BitOffset / 8; + + assert(RC->getSize() >= (Offset + Size) && "bad subregister range"); + + if (!TM->getDataLayout()->isLittleEndian()) { + Offset = RC->getSize() - (Offset + Size); + } + return true; +} + void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, unsigned DestReg, @@ -364,6 +395,7 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, // Ask the target to do the actual folding. if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + NewMI->setMemRefs(MI->memoperands_begin(), MI->memoperands_end()); // Add a memory operand, foldMemoryOperandImpl doesn't do that. assert((!(Flags & MachineMemOperand::MOStore) || NewMI->mayStore()) && @@ -424,9 +456,19 @@ TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, NewMI = MBB.insert(MI, NewMI); // Copy the memoperands from the load to the folded instruction. - NewMI->setMemRefs(LoadMI->memoperands_begin(), - LoadMI->memoperands_end()); - + if (MI->memoperands_empty()) { + NewMI->setMemRefs(LoadMI->memoperands_begin(), + LoadMI->memoperands_end()); + } + else { + // Handle the rare case of folding multiple loads. + NewMI->setMemRefs(MI->memoperands_begin(), + MI->memoperands_end()); + for (MachineInstr::mmo_iterator I = LoadMI->memoperands_begin(), + E = LoadMI->memoperands_end(); I != E; ++I) { + NewMI->addMemOperand(MF, *I); + } + } return NewMI; } @@ -630,6 +672,10 @@ unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, return 1; } +unsigned TargetInstrInfo::getPredicationCost(const MachineInstr *) const { + return 0; +} + unsigned TargetInstrInfo:: getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, @@ -668,27 +714,13 @@ getOperandLatency(const InstrItineraryData *ItinData, /// lookup, do so. Otherwise return -1. int TargetInstrInfo::computeDefOperandLatency( const InstrItineraryData *ItinData, - const MachineInstr *DefMI, bool FindMin) const { + const MachineInstr *DefMI) const { // Let the target hook getInstrLatency handle missing itineraries. if (!ItinData) return getInstrLatency(ItinData, DefMI); - // Return a latency based on the itinerary properties and defining instruction - // if possible. Some common subtargets don't require per-operand latency, - // especially for minimum latencies. - if (FindMin) { - // If MinLatency is valid, call getInstrLatency. This uses Stage latency if - // it exists before defaulting to MinLatency. - if (ItinData->SchedModel->MinLatency >= 0) - return getInstrLatency(ItinData, DefMI); - - // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. - // For empty itineraries, short-cirtuit the check and default to one cycle. - if (ItinData->isEmpty()) - return 1; - } - else if(ItinData->isEmpty()) + if(ItinData->isEmpty()) return defaultDefLatency(ItinData->SchedModel, DefMI); // ...operand lookup required @@ -709,10 +741,9 @@ int TargetInstrInfo::computeDefOperandLatency( unsigned TargetInstrInfo:: computeOperandLatency(const InstrItineraryData *ItinData, const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx, - bool FindMin) const { + const MachineInstr *UseMI, unsigned UseIdx) const { - int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin); + int DefLatency = computeDefOperandLatency(ItinData, DefMI); if (DefLatency >= 0) return DefLatency; @@ -732,8 +763,7 @@ computeOperandLatency(const InstrItineraryData *ItinData, unsigned InstrLatency = getInstrLatency(ItinData, DefMI); // Expected latency is the max of the stage latency and itinerary props. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - defaultDefLatency(ItinData->SchedModel, DefMI)); + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); return InstrLatency; } diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp index 8074d16..30305af 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringBase.cpp @@ -191,6 +191,11 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::ROUND_F32] = "roundf"; + Names[RTLIB::ROUND_F64] = "round"; + Names[RTLIB::ROUND_F80] = "roundl"; + Names[RTLIB::ROUND_F128] = "roundl"; + Names[RTLIB::ROUND_PPCF128] = "roundl"; Names[RTLIB::FLOOR_F32] = "floorf"; Names[RTLIB::FLOOR_F64] = "floor"; Names[RTLIB::FLOOR_F80] = "floorl"; @@ -313,34 +318,62 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_16] = "__sync_val_compare_and_swap_16"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_16] = "__sync_lock_test_and_set_16"; Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_16] = "__sync_fetch_and_add_16"; Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_16] = "__sync_fetch_and_sub_16"; Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_16] = "__sync_fetch_and_and_16"; Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_16] = "__sync_fetch_and_or_16"; Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_16] = "__sync_fetch_and_xor_16"; Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_16] = "__sync_fetch_and_nand_16"; + Names[RTLIB::SYNC_FETCH_AND_MAX_1] = "__sync_fetch_and_max_1"; + Names[RTLIB::SYNC_FETCH_AND_MAX_2] = "__sync_fetch_and_max_2"; + Names[RTLIB::SYNC_FETCH_AND_MAX_4] = "__sync_fetch_and_max_4"; + Names[RTLIB::SYNC_FETCH_AND_MAX_8] = "__sync_fetch_and_max_8"; + Names[RTLIB::SYNC_FETCH_AND_MAX_16] = "__sync_fetch_and_max_16"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_1] = "__sync_fetch_and_umax_1"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_2] = "__sync_fetch_and_umax_2"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_4] = "__sync_fetch_and_umax_4"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_8] = "__sync_fetch_and_umax_8"; + Names[RTLIB::SYNC_FETCH_AND_UMAX_16] = "__sync_fetch_and_umax_16"; + Names[RTLIB::SYNC_FETCH_AND_MIN_1] = "__sync_fetch_and_min_1"; + Names[RTLIB::SYNC_FETCH_AND_MIN_2] = "__sync_fetch_and_min_2"; + Names[RTLIB::SYNC_FETCH_AND_MIN_4] = "__sync_fetch_and_min_4"; + Names[RTLIB::SYNC_FETCH_AND_MIN_8] = "__sync_fetch_and_min_8"; + Names[RTLIB::SYNC_FETCH_AND_MIN_16] = "__sync_fetch_and_min_16"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_1] = "__sync_fetch_and_umin_1"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_2] = "__sync_fetch_and_umin_2"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_4] = "__sync_fetch_and_umin_4"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_8] = "__sync_fetch_and_umin_8"; + Names[RTLIB::SYNC_FETCH_AND_UMIN_16] = "__sync_fetch_and_umin_16"; if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { Names[RTLIB::SINCOS_F32] = "sincosf"; @@ -356,6 +389,13 @@ static void InitLibcallNames(const char **Names, const TargetMachine &TM) { Names[RTLIB::SINCOS_F128] = 0; Names[RTLIB::SINCOS_PPCF128] = 0; } + + if (Triple(TM.getTargetTriple()).getOS() != Triple::OpenBSD) { + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = "__stack_chk_fail"; + } else { + // These are generally not available. + Names[RTLIB::STACKPROTECTOR_CHECK_FAIL] = 0; + } } /// InitLibcallCallingConvs - Set default libcall CallingConvs. @@ -624,7 +664,6 @@ TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, // Perform these initializations only once. IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize = MaxStoresPerMemmoveOptSize = 4; @@ -682,6 +721,14 @@ void TargetLoweringBase::initActions() { // These operations default to expand. setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FROUND, (MVT::SimpleValueType)VT, Expand); + + // These operations default to expand for vector types. + if (VT >= MVT::FIRST_VECTOR_VALUETYPE && + VT <= MVT::LAST_VECTOR_VALUETYPE) + setOperationAction(ISD::FCOPYSIGN, (MVT::SimpleValueType)VT, Expand); } // Most targets ignore the @llvm.prefetch intrinsic. @@ -747,6 +794,19 @@ void TargetLoweringBase::initActions() { setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); } +MVT TargetLoweringBase::getPointerTy(uint32_t AS) const { + return MVT::getIntegerVT(getPointerSizeInBits(AS)); +} + +unsigned TargetLoweringBase::getPointerSizeInBits(uint32_t AS) const { + return TD->getPointerSizeInBits(AS); +} + +unsigned TargetLoweringBase::getPointerTypeSizeInBits(Type *Ty) const { + assert(Ty->isPointerTy()); + return getPointerSizeInBits(Ty->getPointerAddressSpace()); +} + MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { return MVT::getIntegerVT(8*TD->getPointerSize(0)); } @@ -1033,7 +1093,7 @@ void TargetLoweringBase::computeRegisterProperties() { } } -EVT TargetLoweringBase::getSetCCResultType(EVT VT) const { +EVT TargetLoweringBase::getSetCCResultType(LLVMContext &, EVT VT) const { assert(!VT.isVector() && "No default SetCC type for vectors!"); return getPointerTy(0).SimpleTy; } @@ -1162,7 +1222,7 @@ void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, Flags.setZExt(); for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); + Outs.push_back(ISD::OutputArg(Flags, PartVT, VT, /*isFixed=*/true, 0, 0)); } } @@ -1228,6 +1288,7 @@ int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { case PtrToInt: return ISD::BITCAST; case IntToPtr: return ISD::BITCAST; case BitCast: return ISD::BITCAST; + case AddrSpaceCast: return ISD::ADDRSPACECAST; case ICmp: return ISD::SETCC; case FCmp: return ISD::SETCC; case PHI: return 0; diff --git a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 7e7359a..59d7b57 100644 --- a/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -52,10 +52,10 @@ TargetLoweringObjectFileELF::getCFIPersonalitySymbol(const GlobalValue *GV, default: report_fatal_error("We do not support this DWARF encoding yet!"); case dwarf::DW_EH_PE_absptr: - return Mang->getSymbol(GV); + return getSymbol(*Mang, GV); case dwarf::DW_EH_PE_pcrel: { return getContext().GetOrCreateSymbol(StringRef("DW.ref.") + - Mang->getSymbol(GV)->getName()); + getSymbol(*Mang, GV)->getName()); } } } @@ -104,7 +104,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -252,7 +252,7 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Prefix = getSectionPrefixForGlobal(Kind); SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); Name.append(Sym->getName().begin(), Sym->getName().end()); StringRef Group = ""; unsigned Flags = getELFSectionFlags(Kind); @@ -523,6 +523,11 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, const MCSection *TargetLoweringObjectFileMachO:: SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { + + // Handle thread local data. + if (Kind.isThreadBSS()) return TLSBSSSection; + if (Kind.isThreadData()) return TLSDataSection; + if (Kind.isText()) return GV->isWeakForLinker() ? TextCoalSection : TextSection; @@ -575,10 +580,6 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (Kind.isBSSLocal()) return DataBSSSection; - // Handle thread local data. - if (Kind.isThreadBSS()) return TLSBSSSection; - if (Kind.isThreadData()) return TLSDataSection; - // Otherwise, just drop the variable in the normal data section. return DataSection; } @@ -613,7 +614,7 @@ shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { // FIXME: ObjC metadata is currently emitted as internal symbols that have // \1L and \0l prefixes on them. Fix them to be Private/LinkerPrivate and // this horrible hack can go away. - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); if (Sym->getName()[0] == 'L' || Sym->getName()[0] == 'l') return false; } @@ -642,7 +643,7 @@ getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, GV->hasHiddenVisibility() ? MachOMMI.getHiddenGVStubEntry(SSym) : MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -671,7 +672,7 @@ getCFIPersonalitySymbol(const GlobalValue *GV, Mangler *Mang, MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); MachineModuleInfoImpl::StubValueTy &StubSym = MachOMMI.getGVStubEntry(SSym); if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); + MCSymbol *Sym = getSymbol(*Mang, GV); StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); } @@ -726,14 +727,14 @@ getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, if (GV->isWeakForLinker()) { Selection = COFF::IMAGE_COMDAT_SELECT_ANY; Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; - MCSymbol *Sym = Mang->getSymbol(GV); Name.append("$"); - Name.append(Sym->getName().begin() + 1, Sym->getName().end()); + Mang->getNameWithPrefix(Name, GV, false, false); } return getContext().getCOFFSection(Name, Characteristics, - Selection, - Kind); + Kind, + "", + Selection); } static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { @@ -761,24 +762,29 @@ SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, if (GV->isWeakForLinker()) { const char *Prefix = getCOFFSectionPrefixForUniqueGlobal(Kind); SmallString<128> Name(Prefix, Prefix+strlen(Prefix)); - MCSymbol *Sym = Mang->getSymbol(GV); - Name.append(Sym->getName().begin() + 1, Sym->getName().end()); + Mang->getNameWithPrefix(Name, GV, false, false); unsigned Characteristics = getCOFFSectionFlags(Kind); Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; return getContext().getCOFFSection(Name.str(), Characteristics, - COFF::IMAGE_COMDAT_SELECT_ANY, Kind); + Kind, "", COFF::IMAGE_COMDAT_SELECT_ANY); } if (Kind.isText()) - return getTextSection(); + return TextSection; if (Kind.isThreadLocal()) - return getTLSDataSection(); + return TLSDataSection; - return getDataSection(); + if (Kind.isReadOnly()) + return ReadOnlySection; + + if (Kind.isBSS()) + return BSSSection; + + return DataSection; } void TargetLoweringObjectFileCOFF:: diff --git a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp index 435a5e7..f7bf86b 100644 --- a/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp +++ b/contrib/llvm/lib/CodeGen/TargetOptionsImpl.cpp @@ -11,6 +11,7 @@ // //===----------------------------------------------------------------------===// +#include "llvm/IR/Function.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/Target/TargetOptions.h" @@ -21,7 +22,8 @@ using namespace llvm; bool TargetOptions::DisableFramePointerElim(const MachineFunction &MF) const { // Check to see if we should eliminate non-leaf frame pointers and then // check to see if we should eliminate all frame pointers. - if (NoFramePointerElimNonLeaf && !NoFramePointerElim) { + if (MF.getFunction()->hasFnAttribute("no-frame-pointer-elim-non-leaf") && + !NoFramePointerElim) { const MachineFrameInfo *MFI = MF.getFrameInfo(); return MFI->hasCalls(); } @@ -49,30 +51,3 @@ bool TargetOptions::HonorSignDependentRoundingFPMath() const { StringRef TargetOptions::getTrapFunctionName() const { return TrapFuncName; } - -bool TargetOptions::operator==(const TargetOptions &TO) { -#define ARE_EQUAL(X) X == TO.X - return - ARE_EQUAL(UnsafeFPMath) && - ARE_EQUAL(NoInfsFPMath) && - ARE_EQUAL(NoNaNsFPMath) && - ARE_EQUAL(HonorSignDependentRoundingFPMathOption) && - ARE_EQUAL(UseSoftFloat) && - ARE_EQUAL(NoZerosInBSS) && - ARE_EQUAL(JITExceptionHandling) && - ARE_EQUAL(JITEmitDebugInfo) && - ARE_EQUAL(JITEmitDebugInfoToDisk) && - ARE_EQUAL(GuaranteedTailCallOpt) && - ARE_EQUAL(DisableTailCalls) && - ARE_EQUAL(StackAlignmentOverride) && - ARE_EQUAL(RealignStack) && - ARE_EQUAL(SSPBufferSize) && - ARE_EQUAL(EnableFastISel) && - ARE_EQUAL(PositionIndependentExecutable) && - ARE_EQUAL(EnableSegmentedStacks) && - ARE_EQUAL(UseInitArray) && - ARE_EQUAL(TrapFuncName) && - ARE_EQUAL(FloatABIType) && - ARE_EQUAL(AllowFPOpFusion); -#undef ARE_EQUAL -} diff --git a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp index 84b4bfc..5a15243 100644 --- a/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp +++ b/contrib/llvm/lib/CodeGen/TargetRegisterInfo.cpp @@ -23,10 +23,12 @@ using namespace llvm; TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, regclass_iterator RCB, regclass_iterator RCE, const char *const *SRINames, - const unsigned *SRILaneMasks) + const unsigned *SRILaneMasks, + unsigned SRICoveringLanes) : InfoDesc(ID), SubRegIndexNames(SRINames), SubRegIndexLaneMasks(SRILaneMasks), - RegClassBegin(RCB), RegClassEnd(RCE) { + RegClassBegin(RCB), RegClassEnd(RCE), + CoveringLanes(SRICoveringLanes) { } TargetRegisterInfo::~TargetRegisterInfo() {} @@ -71,6 +73,14 @@ void PrintRegUnit::print(raw_ostream &OS) const { OS << '~' << TRI->getName(*Roots); } +void PrintVRegOrUnit::print(raw_ostream &OS) const { + if (TRI && TRI->isVirtualRegister(Unit)) { + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + return; + } + PrintRegUnit::print(OS); +} + /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * @@ -83,7 +93,7 @@ TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { Base < BaseE; Base += 32) { unsigned Idx = Base; for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) { - unsigned Offset = CountTrailingZeros_32(Mask); + unsigned Offset = countTrailingZeros(Mask); const TargetRegisterClass *SubRC = getRegClass(Idx + Offset); if (SubRC->isAllocatable()) return SubRC; @@ -153,7 +163,7 @@ const TargetRegisterClass *firstCommonClass(const uint32_t *A, const TargetRegisterInfo *TRI) { for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) if (unsigned Common = *A++ & *B++) - return TRI->getRegClass(I + CountTrailingZeros_32(Common)); + return TRI->getRegClass(I + countTrailingZeros(Common)); return 0; } diff --git a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp index 1bf14db..b0f2ca6 100644 --- a/contrib/llvm/lib/CodeGen/TargetSchedule.cpp +++ b/contrib/llvm/lib/CodeGen/TargetSchedule.cpp @@ -93,33 +93,10 @@ unsigned TargetSchedModel::getNumMicroOps(const MachineInstr *MI, // effectively means infinite latency. Since users of the TargetSchedule API // don't know how to handle this, we convert it to a very large latency that is // easy to distinguish when debugging the DAG but won't induce overflow. -static unsigned convertLatency(int Cycles) { +static unsigned capLatency(int Cycles) { return Cycles >= 0 ? Cycles : 1000; } -/// If we can determine the operand latency from the def only, without machine -/// model or itinerary lookup, do so. Otherwise return -1. -int TargetSchedModel::getDefLatency(const MachineInstr *DefMI, - bool FindMin) const { - - // Return a latency based on the itinerary properties and defining instruction - // if possible. Some common subtargets don't require per-operand latency, - // especially for minimum latencies. - if (FindMin) { - // If MinLatency is invalid, then use the itinerary for MinLatency. If no - // itinerary exists either, then use single cycle latency. - if (SchedModel.MinLatency < 0 && !hasInstrItineraries()) { - return 1; - } - return SchedModel.MinLatency; - } - else if (!hasInstrSchedModel() && !hasInstrItineraries()) { - return TII->defaultDefLatency(&SchedModel, DefMI); - } - // ...operand lookup required - return -1; -} - /// Return the MCSchedClassDesc for this instruction. Some SchedClasses require /// evaluation of predicates that depend on instruction operands or flags. const MCSchedClassDesc *TargetSchedModel:: @@ -177,18 +154,16 @@ static unsigned findUseIdx(const MachineInstr *MI, unsigned UseOperIdx) { // Top-level API for clients that know the operand indices. unsigned TargetSchedModel::computeOperandLatency( const MachineInstr *DefMI, unsigned DefOperIdx, - const MachineInstr *UseMI, unsigned UseOperIdx, - bool FindMin) const { + const MachineInstr *UseMI, unsigned UseOperIdx) const { - int DefLatency = getDefLatency(DefMI, FindMin); - if (DefLatency >= 0) - return DefLatency; + if (!hasInstrSchedModel() && !hasInstrItineraries()) + return TII->defaultDefLatency(&SchedModel, DefMI); if (hasInstrItineraries()) { int OperLatency = 0; if (UseMI) { - OperLatency = - TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, UseMI, UseOperIdx); + OperLatency = TII->getOperandLatency(&InstrItins, DefMI, DefOperIdx, + UseMI, UseOperIdx); } else { unsigned DefClass = DefMI->getDesc().getSchedClass(); @@ -205,13 +180,11 @@ unsigned TargetSchedModel::computeOperandLatency( // hook to allow subtargets to specialize latency. This hook is only // applicable to the InstrItins model. InstrSchedModel should model all // special cases without TII hooks. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - TII->defaultDefLatency(&SchedModel, DefMI)); + InstrLatency = std::max(InstrLatency, + TII->defaultDefLatency(&SchedModel, DefMI)); return InstrLatency; } - assert(!FindMin && hasInstrSchedModel() && - "Expected a SchedModel for this cpu"); + // hasInstrSchedModel() const MCSchedClassDesc *SCDesc = resolveSchedClass(DefMI); unsigned DefIdx = findDefIdx(DefMI, DefOperIdx); if (DefIdx < SCDesc->NumWriteLatencyEntries) { @@ -219,7 +192,7 @@ unsigned TargetSchedModel::computeOperandLatency( const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx); unsigned WriteID = WLEntry->WriteResourceID; - unsigned Latency = convertLatency(WLEntry->Cycles); + unsigned Latency = capLatency(WLEntry->Cycles); if (!UseMI) return Latency; @@ -228,13 +201,17 @@ unsigned TargetSchedModel::computeOperandLatency( if (UseDesc->NumReadAdvanceEntries == 0) return Latency; unsigned UseIdx = findUseIdx(UseMI, UseOperIdx); - return Latency - STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + int Advance = STI->getReadAdvanceCycles(UseDesc, UseIdx, WriteID); + if (Advance > 0 && (unsigned)Advance > Latency) // unsigned wrap + return 0; + return Latency - Advance; } // If DefIdx does not exist in the model (e.g. implicit defs), then return // unit latency (defaultDefLatency may be too conservative). #ifndef NDEBUG if (SCDesc->isValid() && !DefMI->getOperand(DefOperIdx).isImplicit() - && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef()) { + && !DefMI->getDesc().OpInfo[DefOperIdx].isOptionalDef() + && SchedModel.isComplete()) { std::string Err; raw_string_ostream ss(Err); ss << "DefIdx " << DefIdx << " exceeds machine model writes for " @@ -248,10 +225,13 @@ unsigned TargetSchedModel::computeOperandLatency( return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); } -unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { +unsigned +TargetSchedModel::computeInstrLatency(const MachineInstr *MI, + bool UseDefaultDefLatency) const { // For the itinerary model, fall back to the old subtarget hook. // Allow subtargets to compute Bundle latencies outside the machine model. - if (hasInstrItineraries() || MI->isBundle()) + if (hasInstrItineraries() || MI->isBundle() || + (!hasInstrSchedModel() && !UseDefaultDefLatency)) return TII->getInstrLatency(&InstrItins, MI); if (hasInstrSchedModel()) { @@ -263,7 +243,7 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { // Lookup the definition's write latency in SubtargetInfo. const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, DefIdx); - Latency = std::max(Latency, convertLatency(WLEntry->Cycles)); + Latency = std::max(Latency, capLatency(WLEntry->Cycles)); } return Latency; } @@ -274,13 +254,10 @@ unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { unsigned TargetSchedModel:: computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const { - // MinLatency == -1 is for in-order processors that always have unit - // MinLatency. MinLatency > 0 is for in-order processors with varying min - // latencies, but since this is not a RAW dep, we always use unit latency. - if (SchedModel.MinLatency != 0) + if (SchedModel.MicroOpBufferSize <= 1) return 1; - // MinLatency == 0 indicates an out-of-order processor that can dispatch + // MicroOpBufferSize > 1 indicates an out-of-order processor that can dispatch // WAW dependencies in the same cycle. // Treat predication as a data dependency for out-of-order cpus. In-order @@ -302,7 +279,7 @@ computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, if (SCDesc->isValid()) { for (const MCWriteProcResEntry *PRI = STI->getWriteProcResBegin(SCDesc), *PRE = STI->getWriteProcResEnd(SCDesc); PRI != PRE; ++PRI) { - if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->IsBuffered) + if (!SchedModel.getProcResource(PRI->ProcResourceIdx)->BufferSize) return 1; } } diff --git a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp index 7ca2bee..b9a6b47 100644 --- a/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/contrib/llvm/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -1400,7 +1400,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); SlotIndex endIdx = LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber); - LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI)); + LI.addSegment(LiveInterval::Segment(LastCopyIdx, endIdx, VNI)); } } @@ -1457,7 +1457,7 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber); if (I->end == UseIdx) - LI.removeRange(LastCopyIdx, UseIdx); + LI.removeSegment(LastCopyIdx, UseIdx); } } else if (RemovedKillFlag) { @@ -1539,7 +1539,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { // transformations that may either eliminate the tied operands or // improve the opportunities for coalescing away the register copy. if (TiedOperands.size() == 1) { - SmallVector, 4> &TiedPairs + SmallVectorImpl > &TiedPairs = TiedOperands.begin()->second; if (TiedPairs.size() == 1) { unsigned SrcIdx = TiedPairs[0].first; diff --git a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp index a95ebcd..f735ef2 100644 --- a/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp +++ b/contrib/llvm/lib/CodeGen/UnreachableBlockElim.cpp @@ -24,7 +24,6 @@ #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ProfileInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -50,7 +49,6 @@ namespace { virtual void getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); - AU.addPreserved(); } }; } @@ -87,9 +85,7 @@ bool UnreachableBlockElim::runOnFunction(Function &F) { } // Actually remove the blocks now. - ProfileInfo *PI = getAnalysisIfAvailable(); for (unsigned i = 0, e = DeadBlocks.size(); i != e; ++i) { - if (PI) PI->removeBlock(DeadBlocks[i]); DeadBlocks[i]->eraseFromParent(); } diff --git a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp index cd012d2..e0aa405 100644 --- a/contrib/llvm/lib/CodeGen/VirtRegMap.cpp +++ b/contrib/llvm/lib/CodeGen/VirtRegMap.cpp @@ -28,6 +28,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" @@ -264,15 +265,36 @@ void VirtRegRewriter::rewrite() { SmallVector SuperDeads; SmallVector SuperDefs; SmallVector SuperKills; + SmallPtrSet NoReturnInsts; for (MachineFunction::iterator MBBI = MF->begin(), MBBE = MF->end(); MBBI != MBBE; ++MBBI) { DEBUG(MBBI->print(dbgs(), Indexes)); + bool IsExitBB = MBBI->succ_empty(); for (MachineBasicBlock::instr_iterator MII = MBBI->instr_begin(), MIE = MBBI->instr_end(); MII != MIE;) { MachineInstr *MI = MII; ++MII; + // Check if this instruction is a call to a noreturn function. + // If so, all the definitions set by this instruction can be ignored. + if (IsExitBB && MI->isCall()) + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + MachineOperand &MO = *MOI; + if (!MO.isGlobal()) + continue; + const Function *Func = dyn_cast(MO.getGlobal()); + if (!Func || !Func->hasFnAttribute(Attribute::NoReturn) || + // We need to keep correct unwind information + // even if the function will not return, since the + // runtime may need it. + !Func->hasFnAttribute(Attribute::NoUnwind)) + continue; + NoReturnInsts.insert(MI); + break; + } + for (MachineInstr::mop_iterator MOI = MI->operands_begin(), MOE = MI->operands_end(); MOI != MOE; ++MOI) { MachineOperand &MO = *MOI; @@ -353,7 +375,25 @@ void VirtRegRewriter::rewrite() { } // Tell MRI about physical registers in use. - for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) - if (!MRI->reg_nodbg_empty(Reg)) - MRI->setPhysRegUsed(Reg); + if (NoReturnInsts.empty()) { + for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) + if (!MRI->reg_nodbg_empty(Reg)) + MRI->setPhysRegUsed(Reg); + } else { + for (unsigned Reg = 1, RegE = TRI->getNumRegs(); Reg != RegE; ++Reg) { + if (MRI->reg_nodbg_empty(Reg)) + continue; + // Check if this register has a use that will impact the rest of the + // code. Uses in debug and noreturn instructions do not impact the + // generated code. + for (MachineRegisterInfo::reg_nodbg_iterator It = + MRI->reg_nodbg_begin(Reg), + EndIt = MRI->reg_nodbg_end(); It != EndIt; ++It) { + if (!NoReturnInsts.count(&(*It))) { + MRI->setPhysRegUsed(Reg); + break; + } + } + } + } } diff --git a/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp index 2de62ab..f46fd58 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp @@ -14,37 +14,51 @@ using namespace llvm; using namespace dwarf; -bool -DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr){ - return extract(data, offset_ptr, data.getULEB128(offset_ptr)); +void DWARFAbbreviationDeclaration::clear() { + Code = 0; + Tag = 0; + HasChildren = false; + Attributes.clear(); } -bool -DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr, - uint32_t code) { - Code = code; - Attribute.clear(); - if (Code) { - Tag = data.getULEB128(offset_ptr); - HasChildren = data.getU8(offset_ptr); +DWARFAbbreviationDeclaration::DWARFAbbreviationDeclaration() { + clear(); +} - while (data.isValidOffset(*offset_ptr)) { - uint16_t attr = data.getULEB128(offset_ptr); - uint16_t form = data.getULEB128(offset_ptr); +bool +DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint32_t* OffsetPtr) { + clear(); + Code = Data.getULEB128(OffsetPtr); + if (Code == 0) { + return false; + } + Tag = Data.getULEB128(OffsetPtr); + uint8_t ChildrenByte = Data.getU8(OffsetPtr); + HasChildren = (ChildrenByte == DW_CHILDREN_yes); - if (attr && form) - Attribute.push_back(DWARFAttribute(attr, form)); - else - break; + while (true) { + uint32_t CurOffset = *OffsetPtr; + uint16_t Attr = Data.getULEB128(OffsetPtr); + if (CurOffset == *OffsetPtr) { + clear(); + return false; } - - return Tag != 0; - } else { - Tag = 0; - HasChildren = false; + CurOffset = *OffsetPtr; + uint16_t Form = Data.getULEB128(OffsetPtr); + if (CurOffset == *OffsetPtr) { + clear(); + return false; + } + if (Attr == 0 && Form == 0) + break; + Attributes.push_back(AttributeSpec(Attr, Form)); } - return false; + if (Tag == 0) { + clear(); + return false; + } + return true; } void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const { @@ -55,19 +69,19 @@ void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const { else OS << format("DW_TAG_Unknown_%x", getTag()); OS << "\tDW_CHILDREN_" << (hasChildren() ? "yes" : "no") << '\n'; - for (unsigned i = 0, e = Attribute.size(); i != e; ++i) { + for (unsigned i = 0, e = Attributes.size(); i != e; ++i) { OS << '\t'; - const char *attrString = AttributeString(Attribute[i].getAttribute()); + const char *attrString = AttributeString(Attributes[i].Attr); if (attrString) OS << attrString; else - OS << format("DW_AT_Unknown_%x", Attribute[i].getAttribute()); + OS << format("DW_AT_Unknown_%x", Attributes[i].Attr); OS << '\t'; - const char *formString = FormEncodingString(Attribute[i].getForm()); + const char *formString = FormEncodingString(Attributes[i].Form); if (formString) OS << formString; else - OS << format("DW_FORM_Unknown_%x", Attribute[i].getForm()); + OS << format("DW_FORM_Unknown_%x", Attributes[i].Form); OS << '\n'; } OS << '\n'; @@ -75,8 +89,8 @@ void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const { uint32_t DWARFAbbreviationDeclaration::findAttributeIndex(uint16_t attr) const { - for (uint32_t i = 0, e = Attribute.size(); i != e; ++i) { - if (Attribute[i].getAttribute() == attr) + for (uint32_t i = 0, e = Attributes.size(); i != e; ++i) { + if (Attributes[i].Attr == attr) return i; } return -1U; diff --git a/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.h b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.h index 9a3fcd8..e9b072e 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.h +++ b/contrib/llvm/lib/DebugInfo/DWARFAbbreviationDeclaration.h @@ -10,7 +10,6 @@ #ifndef LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H #define LLVM_DEBUGINFO_DWARFABBREVIATIONDECLARATION_H -#include "DWARFAttribute.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Support/DataExtractor.h" @@ -22,31 +21,33 @@ class DWARFAbbreviationDeclaration { uint32_t Code; uint32_t Tag; bool HasChildren; - SmallVector Attribute; + + struct AttributeSpec { + AttributeSpec(uint16_t Attr, uint16_t Form) : Attr(Attr), Form(Form) {} + uint16_t Attr; + uint16_t Form; + }; + SmallVector Attributes; public: - enum { InvalidCode = 0 }; - DWARFAbbreviationDeclaration() - : Code(InvalidCode), Tag(0), HasChildren(0) {} + DWARFAbbreviationDeclaration(); uint32_t getCode() const { return Code; } uint32_t getTag() const { return Tag; } bool hasChildren() const { return HasChildren; } - uint32_t getNumAttributes() const { return Attribute.size(); } + uint32_t getNumAttributes() const { return Attributes.size(); } uint16_t getAttrByIndex(uint32_t idx) const { - return Attribute.size() > idx ? Attribute[idx].getAttribute() : 0; + return idx < Attributes.size() ? Attributes[idx].Attr : 0; } uint16_t getFormByIndex(uint32_t idx) const { - return Attribute.size() > idx ? Attribute[idx].getForm() : 0; + return idx < Attributes.size() ? Attributes[idx].Form : 0; } uint32_t findAttributeIndex(uint16_t attr) const; - bool extract(DataExtractor data, uint32_t* offset_ptr); - bool extract(DataExtractor data, uint32_t* offset_ptr, uint32_t code); - bool isValid() const { return Code != 0 && Tag != 0; } + bool extract(DataExtractor Data, uint32_t* OffsetPtr); void dump(raw_ostream &OS) const; - const SmallVectorImpl &getAttributes() const { - return Attribute; - } + +private: + void clear(); }; } diff --git a/contrib/llvm/lib/DebugInfo/DWARFAttribute.h b/contrib/llvm/lib/DebugInfo/DWARFAttribute.h deleted file mode 100644 index 6f49b63..0000000 --- a/contrib/llvm/lib/DebugInfo/DWARFAttribute.h +++ /dev/null @@ -1,30 +0,0 @@ -//===-- DWARFAttribute.h ----------------------------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_DEBUGINFO_DWARFATTRIBUTE_H -#define LLVM_DEBUGINFO_DWARFATTRIBUTE_H - -#include "llvm/Support/DataTypes.h" - -namespace llvm { - -class DWARFAttribute { - uint16_t Attribute; - uint16_t Form; - public: - DWARFAttribute(uint16_t attr, uint16_t form) - : Attribute(attr), Form(form) {} - - uint16_t getAttribute() const { return Attribute; } - uint16_t getForm() const { return Form; } -}; - -} - -#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp index 4f0eed4..33869d8 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.cpp @@ -8,96 +8,18 @@ //===----------------------------------------------------------------------===// #include "DWARFCompileUnit.h" -#include "DWARFContext.h" -#include "llvm/DebugInfo/DWARFFormValue.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -using namespace llvm; -using namespace dwarf; - -DataExtractor DWARFCompileUnit::getDebugInfoExtractor() const { - return DataExtractor(InfoSection, isLittleEndian, AddrSize); -} - -bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) { - clear(); - - Offset = *offset_ptr; - - if (debug_info.isValidOffset(*offset_ptr)) { - uint64_t abbrOffset; - Length = debug_info.getU32(offset_ptr); - Version = debug_info.getU16(offset_ptr); - abbrOffset = debug_info.getU32(offset_ptr); - AddrSize = debug_info.getU8(offset_ptr); - - bool lengthOK = debug_info.isValidOffset(getNextCompileUnitOffset()-1); - bool versionOK = DWARFContext::isSupportedVersion(Version); - bool abbrOffsetOK = AbbrevSection.size() > abbrOffset; - bool addrSizeOK = AddrSize == 4 || AddrSize == 8; - - if (lengthOK && versionOK && addrSizeOK && abbrOffsetOK && Abbrev != NULL) { - Abbrevs = Abbrev->getAbbreviationDeclarationSet(abbrOffset); - return true; - } - - // reset the offset to where we tried to parse from if anything went wrong - *offset_ptr = Offset; - } - - return false; -} - -uint32_t -DWARFCompileUnit::extract(uint32_t offset, DataExtractor debug_info_data, - const DWARFAbbreviationDeclarationSet *abbrevs) { - clear(); - Offset = offset; - - if (debug_info_data.isValidOffset(offset)) { - Length = debug_info_data.getU32(&offset); - Version = debug_info_data.getU16(&offset); - bool abbrevsOK = debug_info_data.getU32(&offset) == abbrevs->getOffset(); - Abbrevs = abbrevs; - AddrSize = debug_info_data.getU8(&offset); - - bool versionOK = DWARFContext::isSupportedVersion(Version); - bool addrSizeOK = AddrSize == 4 || AddrSize == 8; - - if (versionOK && addrSizeOK && abbrevsOK && - debug_info_data.isValidOffset(offset)) - return offset; - } - return 0; -} - -bool DWARFCompileUnit::extractRangeList(uint32_t RangeListOffset, - DWARFDebugRangeList &RangeList) const { - // Require that compile unit is extracted. - assert(DieArray.size() > 0); - DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize); - return RangeList.extract(RangesData, &RangeListOffset); -} - -void DWARFCompileUnit::clear() { - Offset = 0; - Length = 0; - Version = 0; - Abbrevs = 0; - AddrSize = 0; - BaseAddr = 0; - clearDIEs(false); -} +using namespace llvm; void DWARFCompileUnit::dump(raw_ostream &OS) { - OS << format("0x%08x", Offset) << ": Compile Unit:" - << " length = " << format("0x%08x", Length) - << " version = " << format("0x%04x", Version) - << " abbr_offset = " << format("0x%04x", Abbrevs->getOffset()) - << " addr_size = " << format("0x%02x", AddrSize) - << " (next CU at " << format("0x%08x", getNextCompileUnitOffset()) + OS << format("0x%08x", getOffset()) << ": Compile Unit:" + << " length = " << format("0x%08x", getLength()) + << " version = " << format("0x%04x", getVersion()) + << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset()) + << " addr_size = " << format("0x%02x", getAddressByteSize()) + << " (next unit at " << format("0x%08x", getNextUnitOffset()) << ")\n"; const DWARFDebugInfoEntryMinimal *CU = getCompileUnitDIE(false); @@ -105,168 +27,6 @@ void DWARFCompileUnit::dump(raw_ostream &OS) { CU->dump(OS, this, -1U); } -const char *DWARFCompileUnit::getCompilationDir() { - extractDIEsIfNeeded(true); - if (DieArray.empty()) - return 0; - return DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0); -} - -void DWARFCompileUnit::setDIERelations() { - if (DieArray.empty()) - return; - DWARFDebugInfoEntryMinimal *die_array_begin = &DieArray.front(); - DWARFDebugInfoEntryMinimal *die_array_end = &DieArray.back(); - DWARFDebugInfoEntryMinimal *curr_die; - // We purposely are skipping the last element in the array in the loop below - // so that we can always have a valid next item - for (curr_die = die_array_begin; curr_die < die_array_end; ++curr_die) { - // Since our loop doesn't include the last element, we can always - // safely access the next die in the array. - DWARFDebugInfoEntryMinimal *next_die = curr_die + 1; - - const DWARFAbbreviationDeclaration *curr_die_abbrev = - curr_die->getAbbreviationDeclarationPtr(); - - if (curr_die_abbrev) { - // Normal DIE - if (curr_die_abbrev->hasChildren()) - next_die->setParent(curr_die); - else - curr_die->setSibling(next_die); - } else { - // NULL DIE that terminates a sibling chain - DWARFDebugInfoEntryMinimal *parent = curr_die->getParent(); - if (parent) - parent->setSibling(next_die); - } - } - - // Since we skipped the last element, we need to fix it up! - if (die_array_begin < die_array_end) - curr_die->setParent(die_array_begin); -} - -size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { - const size_t initial_die_array_size = DieArray.size(); - if ((cu_die_only && initial_die_array_size > 0) || - initial_die_array_size > 1) - return 0; // Already parsed - - // Set the offset to that of the first DIE and calculate the start of the - // next compilation unit header. - uint32_t offset = getFirstDIEOffset(); - uint32_t next_cu_offset = getNextCompileUnitOffset(); - - DWARFDebugInfoEntryMinimal die; - // Keep a flat array of the DIE for binary lookup by DIE offset - uint32_t depth = 0; - // We are in our compile unit, parse starting at the offset - // we were told to parse - - const uint8_t *fixed_form_sizes = - DWARFFormValue::getFixedFormSizes(getAddressByteSize(), getVersion()); - - while (offset < next_cu_offset && - die.extractFast(this, fixed_form_sizes, &offset)) { - - if (depth == 0) { - uint64_t base_addr = - die.getAttributeValueAsUnsigned(this, DW_AT_low_pc, -1U); - if (base_addr == -1U) - base_addr = die.getAttributeValueAsUnsigned(this, DW_AT_entry_pc, 0); - setBaseAddress(base_addr); - } - - if (cu_die_only) { - addDIE(die); - return 1; - } - else if (depth == 0 && initial_die_array_size == 1) - // Don't append the CU die as we already did that - ; - else - addDIE(die); - - const DWARFAbbreviationDeclaration *abbrDecl = - die.getAbbreviationDeclarationPtr(); - if (abbrDecl) { - // Normal DIE - if (abbrDecl->hasChildren()) - ++depth; - } else { - // NULL DIE. - if (depth > 0) - --depth; - if (depth == 0) - break; // We are done with this compile unit! - } - - } - - // Give a little bit of info if we encounter corrupt DWARF (our offset - // should always terminate at or before the start of the next compilation - // unit header). - if (offset > next_cu_offset) - fprintf(stderr, "warning: DWARF compile unit extends beyond its " - "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset); - - setDIERelations(); - return DieArray.size(); -} - -void DWARFCompileUnit::clearDIEs(bool keep_compile_unit_die) { - if (DieArray.size() > (unsigned)keep_compile_unit_die) { - // std::vectors never get any smaller when resized to a smaller size, - // or when clear() or erase() are called, the size will report that it - // is smaller, but the memory allocated remains intact (call capacity() - // to see this). So we need to create a temporary vector and swap the - // contents which will cause just the internal pointers to be swapped - // so that when "tmp_array" goes out of scope, it will destroy the - // contents. - - // Save at least the compile unit DIE - std::vector tmpArray; - DieArray.swap(tmpArray); - if (keep_compile_unit_die) - DieArray.push_back(tmpArray.front()); - } -} - -void -DWARFCompileUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges, - bool clear_dies_if_already_not_parsed){ - // This function is usually called if there in no .debug_aranges section - // in order to produce a compile unit level set of address ranges that - // is accurate. If the DIEs weren't parsed, then we don't want all dies for - // all compile units to stay loaded when they weren't needed. So we can end - // up parsing the DWARF and then throwing them all away to keep memory usage - // down. - const bool clear_dies = extractDIEsIfNeeded(false) > 1 && - clear_dies_if_already_not_parsed; - DieArray[0].buildAddressRangeTable(this, debug_aranges); - - // Keep memory down by clearing DIEs if this generate function - // caused them to be parsed. - if (clear_dies) - clearDIEs(true); -} - -DWARFDebugInfoEntryMinimal::InlinedChain -DWARFCompileUnit::getInlinedChainForAddress(uint64_t Address) { - // First, find a subprogram that contains the given address (the root - // of inlined chain). - extractDIEsIfNeeded(false); - const DWARFDebugInfoEntryMinimal *SubprogramDIE = 0; - for (size_t i = 0, n = DieArray.size(); i != n; i++) { - if (DieArray[i].isSubprogramDIE() && - DieArray[i].addressRangeContainsAddress(this, Address)) { - SubprogramDIE = &DieArray[i]; - break; - } - } - // Get inlined chain rooted at this subprogram DIE. - if (!SubprogramDIE) - return DWARFDebugInfoEntryMinimal::InlinedChain(); - return SubprogramDIE->getInlinedChainForAddress(this, Address); +// VTable anchor. +DWARFCompileUnit::~DWARFCompileUnit() { } diff --git a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h index 2a74605..1c9573b 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h +++ b/contrib/llvm/lib/DebugInfo/DWARFCompileUnit.h @@ -10,132 +10,19 @@ #ifndef LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H #define LLVM_DEBUGINFO_DWARFCOMPILEUNIT_H -#include "DWARFDebugAbbrev.h" -#include "DWARFDebugInfoEntry.h" -#include "DWARFDebugRangeList.h" -#include "DWARFRelocMap.h" -#include +#include "DWARFUnit.h" namespace llvm { -class DWARFDebugAbbrev; -class StringRef; -class raw_ostream; - -class DWARFCompileUnit { - const DWARFDebugAbbrev *Abbrev; - StringRef InfoSection; - StringRef AbbrevSection; - StringRef RangeSection; - StringRef StringSection; - StringRef StringOffsetSection; - StringRef AddrOffsetSection; - const RelocAddrMap *RelocMap; - bool isLittleEndian; - - uint32_t Offset; - uint32_t Length; - uint16_t Version; - const DWARFAbbreviationDeclarationSet *Abbrevs; - uint8_t AddrSize; - uint64_t BaseAddr; - // The compile unit debug information entry item. - std::vector DieArray; +class DWARFCompileUnit : public DWARFUnit { public: - DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS, StringRef RS, StringRef SS, StringRef SOS, StringRef AOS, - const RelocAddrMap *M, bool LE) : - Abbrev(DA), InfoSection(IS), AbbrevSection(AS), - RangeSection(RS), StringSection(SS), StringOffsetSection(SOS), - AddrOffsetSection(AOS), RelocMap(M), isLittleEndian(LE) { - clear(); - } - - StringRef getStringSection() const { return StringSection; } - StringRef getStringOffsetSection() const { return StringOffsetSection; } - StringRef getAddrOffsetSection() const { return AddrOffsetSection; } - const RelocAddrMap *getRelocMap() const { return RelocMap; } - DataExtractor getDebugInfoExtractor() const; - - bool extract(DataExtractor debug_info, uint32_t* offset_ptr); - uint32_t extract(uint32_t offset, DataExtractor debug_info_data, - const DWARFAbbreviationDeclarationSet *abbrevs); - - /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it - /// hasn't already been done. Returns the number of DIEs parsed at this call. - size_t extractDIEsIfNeeded(bool cu_die_only); - /// extractRangeList - extracts the range list referenced by this compile - /// unit from .debug_ranges section. Returns true on success. - /// Requires that compile unit is already extracted. - bool extractRangeList(uint32_t RangeListOffset, - DWARFDebugRangeList &RangeList) const; - void clear(); + const RelocAddrMap *M, bool LE) + : DWARFUnit(DA, IS, AS, RS, SS, SOS, AOS, M, LE) {} void dump(raw_ostream &OS); - uint32_t getOffset() const { return Offset; } - /// Size in bytes of the compile unit header. - uint32_t getSize() const { return 11; } - bool containsDIEOffset(uint32_t die_offset) const { - return die_offset >= getFirstDIEOffset() && - die_offset < getNextCompileUnitOffset(); - } - uint32_t getFirstDIEOffset() const { return Offset + getSize(); } - uint32_t getNextCompileUnitOffset() const { return Offset + Length + 4; } - /// Size in bytes of the .debug_info data associated with this compile unit. - size_t getDebugInfoSize() const { return Length + 4 - getSize(); } - uint32_t getLength() const { return Length; } - uint16_t getVersion() const { return Version; } - const DWARFAbbreviationDeclarationSet *getAbbreviations() const { - return Abbrevs; - } - uint8_t getAddressByteSize() const { return AddrSize; } - uint64_t getBaseAddress() const { return BaseAddr; } - - void setBaseAddress(uint64_t base_addr) { - BaseAddr = base_addr; - } - - const DWARFDebugInfoEntryMinimal * - getCompileUnitDIE(bool extract_cu_die_only = true) { - extractDIEsIfNeeded(extract_cu_die_only); - if (DieArray.empty()) - return NULL; - return &DieArray[0]; - } - - const char *getCompilationDir(); - - /// setDIERelations - We read in all of the DIE entries into our flat list - /// of DIE entries and now we need to go back through all of them and set the - /// parent, sibling and child pointers for quick DIE navigation. - void setDIERelations(); - - void addDIE(DWARFDebugInfoEntryMinimal &die) { - // The average bytes per DIE entry has been seen to be - // around 14-20 so lets pre-reserve the needed memory for - // our DIE entries accordingly. Search forward for "Compute - // average bytes per DIE" to see #if'ed out code that does - // that determination. - - // Only reserve the memory if we are adding children of - // the main compile unit DIE. The compile unit DIE is always - // the first entry, so if our size is 1, then we are adding - // the first compile unit child DIE and should reserve - // the memory. - if (DieArray.empty()) - DieArray.reserve(getDebugInfoSize() / 14); - DieArray.push_back(die); - } - - void clearDIEs(bool keep_compile_unit_die); - - void buildAddressRangeTable(DWARFDebugAranges *debug_aranges, - bool clear_dies_if_already_not_parsed); - - /// getInlinedChainForAddress - fetches inlined chain for a given address. - /// Returns empty chain if there is no subprogram containing address. - DWARFDebugInfoEntryMinimal::InlinedChain getInlinedChainForAddress( - uint64_t Address); + // VTable anchor. + ~DWARFCompileUnit() LLVM_OVERRIDE; }; } diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp index 9f52133..e477190 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFContext.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFContext.cpp @@ -19,9 +19,45 @@ #include using namespace llvm; using namespace dwarf; +using namespace object; typedef DWARFDebugLine::LineTable DWARFLineTable; +DWARFContext::~DWARFContext() { + DeleteContainerPointers(CUs); + DeleteContainerPointers(TUs); + DeleteContainerPointers(DWOCUs); +} + +static void dumpPubSection(raw_ostream &OS, StringRef Name, StringRef Data, + bool LittleEndian, bool GnuStyle) { + OS << "\n." << Name << " contents:\n"; + DataExtractor pubNames(Data, LittleEndian, 0); + uint32_t offset = 0; + OS << "length = " << format("0x%08x", pubNames.getU32(&offset)); + OS << " version = " << format("0x%04x", pubNames.getU16(&offset)); + OS << " unit_offset = " << format("0x%08x", pubNames.getU32(&offset)); + OS << " unit_size = " << format("0x%08x", pubNames.getU32(&offset)) << '\n'; + if (GnuStyle) + OS << "Offset Linkage Kind Name\n"; + else + OS << "Offset Name\n"; + + while (offset < Data.size()) { + uint32_t dieRef = pubNames.getU32(&offset); + if (dieRef == 0) + break; + OS << format("0x%8.8x ", dieRef); + if (GnuStyle) { + PubIndexEntryDescriptor desc(pubNames.getU8(&offset)); + OS << format("%-8s", dwarf::GDBIndexEntryLinkageString(desc.Linkage)) + << ' ' << format("%-8s", dwarf::GDBIndexEntryKindString(desc.Kind)) + << ' '; + } + OS << '\"' << pubNames.getCStr(&offset) << "\"\n"; + } +} + void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { if (DumpType == DIDT_All || DumpType == DIDT_Abbrev) { OS << ".debug_abbrev contents:\n"; @@ -34,6 +70,17 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { getCompileUnitAtIndex(i)->dump(OS); } + if (DumpType == DIDT_All || DumpType == DIDT_Types) { + OS << "\n.debug_types contents:\n"; + for (unsigned i = 0, e = getNumTypeUnits(); i != e; ++i) + getTypeUnitAtIndex(i)->dump(OS); + } + + if (DumpType == DIDT_All || DumpType == DIDT_Loc) { + OS << "\n.debug_loc contents:\n"; + getDebugLoc()->dump(OS); + } + if (DumpType == DIDT_All || DumpType == DIDT_Frames) { OS << "\n.debug_frame contents:\n"; getDebugFrame()->dump(OS); @@ -55,13 +102,13 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { DWARFCompileUnit *cu = getCompileUnitAtIndex(i); savedAddressByteSize = cu->getAddressByteSize(); unsigned stmtOffset = - cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, - -1U); + cu->getCompileUnitDIE()->getAttributeValueAsSectionOffset( + cu, DW_AT_stmt_list, -1U); if (stmtOffset != -1U) { - DataExtractor lineData(getLineSection(), isLittleEndian(), + DataExtractor lineData(getLineSection().Data, isLittleEndian(), savedAddressByteSize); DWARFDebugLine::DumpingState state(OS); - DWARFDebugLine::parseStatementTable(lineData, &lineRelocMap(), &stmtOffset, state); + DWARFDebugLine::parseStatementTable(lineData, &getLineSection().Relocs, &stmtOffset, state); } } } @@ -91,23 +138,21 @@ void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { rangeList.dump(OS); } - if (DumpType == DIDT_All || DumpType == DIDT_Pubnames) { - OS << "\n.debug_pubnames contents:\n"; - DataExtractor pubNames(getPubNamesSection(), isLittleEndian(), 0); - offset = 0; - OS << "Length: " << pubNames.getU32(&offset) << "\n"; - OS << "Version: " << pubNames.getU16(&offset) << "\n"; - OS << "Offset in .debug_info: " << pubNames.getU32(&offset) << "\n"; - OS << "Size: " << pubNames.getU32(&offset) << "\n"; - OS << "\n Offset Name\n"; - while (offset < getPubNamesSection().size()) { - uint32_t n = pubNames.getU32(&offset); - if (n == 0) - break; - OS << format("%8x ", n); - OS << pubNames.getCStr(&offset) << "\n"; - } - } + if (DumpType == DIDT_All || DumpType == DIDT_Pubnames) + dumpPubSection(OS, "debug_pubnames", getPubNamesSection(), + isLittleEndian(), false); + + if (DumpType == DIDT_All || DumpType == DIDT_Pubtypes) + dumpPubSection(OS, "debug_pubtypes", getPubTypesSection(), + isLittleEndian(), false); + + if (DumpType == DIDT_All || DumpType == DIDT_GnuPubnames) + dumpPubSection(OS, "debug_gnu_pubnames", getGnuPubNamesSection(), + isLittleEndian(), true /* GnuStyle */); + + if (DumpType == DIDT_All || DumpType == DIDT_GnuPubtypes) + dumpPubSection(OS, "debug_gnu_pubtypes", getGnuPubTypesSection(), + isLittleEndian(), true /* GnuStyle */); if (DumpType == DIDT_All || DumpType == DIDT_AbbrevDwo) { const DWARFDebugAbbrev *D = getDebugAbbrevDWO(); @@ -170,17 +215,23 @@ const DWARFDebugAbbrev *DWARFContext::getDebugAbbrevDWO() { return AbbrevDWO.get(); } +const DWARFDebugLoc *DWARFContext::getDebugLoc() { + if (Loc) + return Loc.get(); + + DataExtractor LocData(getLocSection().Data, isLittleEndian(), 0); + Loc.reset(new DWARFDebugLoc(getLocSection().Relocs)); + // assume all compile units have the same address byte size + if (getNumCompileUnits()) + Loc->parse(LocData, getCompileUnitAtIndex(0)->getAddressByteSize()); + return Loc.get(); +} + const DWARFDebugAranges *DWARFContext::getDebugAranges() { if (Aranges) return Aranges.get(); - DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0); - Aranges.reset(new DWARFDebugAranges()); - Aranges->extract(arangesData); - // Generate aranges from DIEs: even if .debug_aranges section is present, - // it may describe only a small subset of compilation units, so we need to - // manually build aranges for the rest of them. Aranges->generate(this); return Aranges.get(); } @@ -208,11 +259,11 @@ const DWARFDebugFrame *DWARFContext::getDebugFrame() { const DWARFLineTable * DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { if (!Line) - Line.reset(new DWARFDebugLine(&lineRelocMap())); + Line.reset(new DWARFDebugLine(&getLineSection().Relocs)); unsigned stmtOffset = - cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, - -1U); + cu->getCompileUnitDIE()->getAttributeValueAsSectionOffset( + cu, DW_AT_stmt_list, -1U); if (stmtOffset == -1U) return 0; // No line table for this compile unit. @@ -221,64 +272,79 @@ DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { return lt; // We have to parse it first. - DataExtractor lineData(getLineSection(), isLittleEndian(), + DataExtractor lineData(getLineSection().Data, isLittleEndian(), cu->getAddressByteSize()); return Line->getOrParseLineTable(lineData, stmtOffset); } void DWARFContext::parseCompileUnits() { uint32_t offset = 0; - const DataExtractor &DIData = DataExtractor(getInfoSection(), + const DataExtractor &DIData = DataExtractor(getInfoSection().Data, isLittleEndian(), 0); while (DIData.isValidOffset(offset)) { - CUs.push_back(DWARFCompileUnit(getDebugAbbrev(), getInfoSection(), - getAbbrevSection(), getRangeSection(), - getStringSection(), StringRef(), - getAddrSection(), - &infoRelocMap(), - isLittleEndian())); - if (!CUs.back().extract(DIData, &offset)) { - CUs.pop_back(); + OwningPtr CU(new DWARFCompileUnit( + getDebugAbbrev(), getInfoSection().Data, getAbbrevSection(), + getRangeSection(), getStringSection(), StringRef(), getAddrSection(), + &getInfoSection().Relocs, isLittleEndian())); + if (!CU->extract(DIData, &offset)) { break; } + CUs.push_back(CU.take()); + offset = CUs.back()->getNextUnitOffset(); + } +} - offset = CUs.back().getNextCompileUnitOffset(); +void DWARFContext::parseTypeUnits() { + const std::map &Sections = getTypesSections(); + for (std::map::const_iterator + I = Sections.begin(), + E = Sections.end(); + I != E; ++I) { + uint32_t offset = 0; + const DataExtractor &DIData = + DataExtractor(I->second.Data, isLittleEndian(), 0); + while (DIData.isValidOffset(offset)) { + OwningPtr TU(new DWARFTypeUnit( + getDebugAbbrev(), I->second.Data, getAbbrevSection(), + getRangeSection(), getStringSection(), StringRef(), getAddrSection(), + &I->second.Relocs, isLittleEndian())); + if (!TU->extract(DIData, &offset)) + break; + TUs.push_back(TU.take()); + offset = TUs.back()->getNextUnitOffset(); + } } } void DWARFContext::parseDWOCompileUnits() { uint32_t offset = 0; - const DataExtractor &DIData = DataExtractor(getInfoDWOSection(), - isLittleEndian(), 0); + const DataExtractor &DIData = + DataExtractor(getInfoDWOSection().Data, isLittleEndian(), 0); while (DIData.isValidOffset(offset)) { - DWOCUs.push_back(DWARFCompileUnit(getDebugAbbrevDWO(), getInfoDWOSection(), - getAbbrevDWOSection(), - getRangeDWOSection(), - getStringDWOSection(), - getStringOffsetDWOSection(), - getAddrSection(), - &infoDWORelocMap(), - isLittleEndian())); - if (!DWOCUs.back().extract(DIData, &offset)) { - DWOCUs.pop_back(); + OwningPtr DWOCU(new DWARFCompileUnit( + getDebugAbbrevDWO(), getInfoDWOSection().Data, getAbbrevDWOSection(), + getRangeDWOSection(), getStringDWOSection(), + getStringOffsetDWOSection(), getAddrSection(), + &getInfoDWOSection().Relocs, isLittleEndian())); + if (!DWOCU->extract(DIData, &offset)) { break; } - - offset = DWOCUs.back().getNextCompileUnitOffset(); + DWOCUs.push_back(DWOCU.take()); + offset = DWOCUs.back()->getNextUnitOffset(); } } namespace { struct OffsetComparator { - bool operator()(const DWARFCompileUnit &LHS, - const DWARFCompileUnit &RHS) const { - return LHS.getOffset() < RHS.getOffset(); + bool operator()(const DWARFCompileUnit *LHS, + const DWARFCompileUnit *RHS) const { + return LHS->getOffset() < RHS->getOffset(); } - bool operator()(const DWARFCompileUnit &LHS, uint32_t RHS) const { - return LHS.getOffset() < RHS; + bool operator()(const DWARFCompileUnit *LHS, uint32_t RHS) const { + return LHS->getOffset() < RHS; } - bool operator()(uint32_t LHS, const DWARFCompileUnit &RHS) const { - return LHS < RHS.getOffset(); + bool operator()(uint32_t LHS, const DWARFCompileUnit *RHS) const { + return LHS < RHS->getOffset(); } }; } @@ -287,10 +353,11 @@ DWARFCompileUnit *DWARFContext::getCompileUnitForOffset(uint32_t Offset) { if (CUs.empty()) parseCompileUnits(); - DWARFCompileUnit *CU = std::lower_bound(CUs.begin(), CUs.end(), Offset, - OffsetComparator()); - if (CU != CUs.end()) - return &*CU; + DWARFCompileUnit **CU = + std::lower_bound(CUs.begin(), CUs.end(), Offset, OffsetComparator()); + if (CU != CUs.end()) { + return *CU; + } return 0; } @@ -358,11 +425,11 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address, // The address may correspond to instruction in some inlined function, // so we have to build the chain of inlined functions and take the // name of the topmost function in it. - const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain = + const DWARFDebugInfoEntryInlinedChain &InlinedChain = CU->getInlinedChainForAddress(Address); - if (InlinedChain.size() > 0) { - const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain[0]; - if (const char *Name = TopFunctionDIE.getSubroutineName(CU)) + if (InlinedChain.DIEs.size() > 0) { + const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain.DIEs[0]; + if (const char *Name = TopFunctionDIE.getSubroutineName(InlinedChain.U)) FunctionName = Name; } } @@ -391,23 +458,20 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address, // The address may correspond to instruction in some inlined function, // so we have to build the chain of inlined functions and take the // name of the topmost function in it. - const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain = + const DWARFDebugInfoEntryInlinedChain &InlinedChain = CU->getInlinedChainForAddress(Address); - if (InlinedChain.size() > 0) { - const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain[0]; - if (const char *Name = TopFunctionDIE.getSubroutineName(CU)) + if (InlinedChain.DIEs.size() > 0) { + const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain.DIEs[0]; + if (const char *Name = TopFunctionDIE.getSubroutineName(InlinedChain.U)) FunctionName = Name; } } - StringRef FuncNameRef = StringRef(FunctionName); - // If the Specifier says we don't need FileLineInfo, just // return the top-most function at the starting address. if (!Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { - Lines.push_back(std::make_pair(Address, - DILineInfo(StringRef(""), - FuncNameRef, 0, 0))); + Lines.push_back( + std::make_pair(Address, DILineInfo("", FunctionName, 0, 0))); return Lines; } @@ -428,9 +492,8 @@ DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address, std::string FileName = ""; getFileNameForCompileUnit(CU, LineTable, Row.File, NeedsAbsoluteFilePath, FileName); - Lines.push_back(std::make_pair(Row.Address, - DILineInfo(StringRef(FileName), - FuncNameRef, Row.Line, Row.Column))); + Lines.push_back(std::make_pair( + Row.Address, DILineInfo(FileName, FunctionName, Row.Line, Row.Column))); } return Lines; @@ -442,23 +505,23 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, if (!CU) return DIInliningInfo(); - const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain = + const DWARFDebugInfoEntryInlinedChain &InlinedChain = CU->getInlinedChainForAddress(Address); - if (InlinedChain.size() == 0) + if (InlinedChain.DIEs.size() == 0) return DIInliningInfo(); DIInliningInfo InliningInfo; uint32_t CallFile = 0, CallLine = 0, CallColumn = 0; const DWARFLineTable *LineTable = 0; - for (uint32_t i = 0, n = InlinedChain.size(); i != n; i++) { - const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain[i]; + for (uint32_t i = 0, n = InlinedChain.DIEs.size(); i != n; i++) { + const DWARFDebugInfoEntryMinimal &FunctionDIE = InlinedChain.DIEs[i]; std::string FileName = ""; std::string FunctionName = ""; uint32_t Line = 0; uint32_t Column = 0; // Get function name if necessary. if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { - if (const char *Name = FunctionDIE.getSubroutineName(CU)) + if (const char *Name = FunctionDIE.getSubroutineName(InlinedChain.U)) FunctionName = Name; } if (Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { @@ -482,7 +545,8 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, } // Get call file/line/column of a current DIE. if (i + 1 < n) { - FunctionDIE.getCallerFrame(CU, CallFile, CallLine, CallColumn); + FunctionDIE.getCallerFrame(InlinedChain.U, CallFile, CallLine, + CallColumn); } } DILineInfo Frame(StringRef(FileName), StringRef(FunctionName), @@ -538,43 +602,67 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : UncompressedSections.push_back(UncompressedSection.take()); } - StringRef *Section = StringSwitch(name) - .Case("debug_info", &InfoSection) - .Case("debug_abbrev", &AbbrevSection) - .Case("debug_line", &LineSection) - .Case("debug_aranges", &ARangeSection) - .Case("debug_frame", &DebugFrameSection) - .Case("debug_str", &StringSection) - .Case("debug_ranges", &RangeSection) - .Case("debug_pubnames", &PubNamesSection) - .Case("debug_info.dwo", &InfoDWOSection) - .Case("debug_abbrev.dwo", &AbbrevDWOSection) - .Case("debug_str.dwo", &StringDWOSection) - .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) - .Case("debug_addr", &AddrSection) - // Any more debug info sections go here. - .Default(0); - if (!Section) - continue; - *Section = data; - if (name == "debug_ranges") { - // FIXME: Use the other dwo range section when we emit it. - RangeDWOSection = data; + StringRef *Section = + StringSwitch(name) + .Case("debug_info", &InfoSection.Data) + .Case("debug_abbrev", &AbbrevSection) + .Case("debug_loc", &LocSection.Data) + .Case("debug_line", &LineSection.Data) + .Case("debug_aranges", &ARangeSection) + .Case("debug_frame", &DebugFrameSection) + .Case("debug_str", &StringSection) + .Case("debug_ranges", &RangeSection) + .Case("debug_pubnames", &PubNamesSection) + .Case("debug_pubtypes", &PubTypesSection) + .Case("debug_gnu_pubnames", &GnuPubNamesSection) + .Case("debug_gnu_pubtypes", &GnuPubTypesSection) + .Case("debug_info.dwo", &InfoDWOSection.Data) + .Case("debug_abbrev.dwo", &AbbrevDWOSection) + .Case("debug_str.dwo", &StringDWOSection) + .Case("debug_str_offsets.dwo", &StringOffsetDWOSection) + .Case("debug_addr", &AddrSection) + // Any more debug info sections go here. + .Default(0); + if (Section) { + *Section = data; + if (name == "debug_ranges") { + // FIXME: Use the other dwo range section when we emit it. + RangeDWOSection = data; + } + } else if (name == "debug_types") { + // Find debug_types data by section rather than name as there are + // multiple, comdat grouped, debug_types sections. + TypesSections[*i].Data = data; } + section_iterator RelocatedSection = i->getRelocatedSection(); + if (RelocatedSection == Obj->end_sections()) + continue; + + StringRef RelSecName; + RelocatedSection->getName(RelSecName); + RelSecName = RelSecName.substr( + RelSecName.find_first_not_of("._")); // Skip . and _ prefixes. + // TODO: Add support for relocations in other sections as needed. // Record relocations for the debug_info and debug_line sections. - RelocAddrMap *Map = StringSwitch(name) - .Case("debug_info", &InfoRelocMap) - .Case("debug_info.dwo", &InfoDWORelocMap) - .Case("debug_line", &LineRelocMap) + RelocAddrMap *Map = StringSwitch(RelSecName) + .Case("debug_info", &InfoSection.Relocs) + .Case("debug_loc", &LocSection.Relocs) + .Case("debug_info.dwo", &InfoDWOSection.Relocs) + .Case("debug_line", &LineSection.Relocs) .Default(0); - if (!Map) - continue; + if (!Map) { + if (RelSecName != "debug_types") + continue; + // Find debug_types relocs by section rather than name as there are + // multiple, comdat grouped, debug_types sections. + Map = &TypesSections[*RelocatedSection].Relocs; + } if (i->begin_relocations() != i->end_relocations()) { uint64_t SectionSize; - i->getSize(SectionSize); + RelocatedSection->getSize(SectionSize); for (object::relocation_iterator reloc_i = i->begin_relocations(), reloc_e = i->end_relocations(); reloc_i != reloc_e; reloc_i.increment(ec)) { @@ -585,9 +673,8 @@ DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : uint64_t SymAddr = 0; // ELF relocations may need the symbol address if (Obj->isELF()) { - object::SymbolRef Sym; - reloc_i->getSymbol(Sym); - Sym.getAddress(SymAddr); + object::symbol_iterator Sym = reloc_i->getSymbol(); + Sym->getAddress(SymAddr); } object::RelocVisitor V(Obj->getFileFormatName()); diff --git a/contrib/llvm/lib/DebugInfo/DWARFContext.h b/contrib/llvm/lib/DebugInfo/DWARFContext.h index 78c18e6..03863ab 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFContext.h +++ b/contrib/llvm/lib/DebugInfo/DWARFContext.h @@ -14,7 +14,9 @@ #include "DWARFDebugAranges.h" #include "DWARFDebugFrame.h" #include "DWARFDebugLine.h" +#include "DWARFDebugLoc.h" #include "DWARFDebugRangeList.h" +#include "DWARFTypeUnit.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" #include "llvm/DebugInfo/DIContext.h" @@ -26,13 +28,15 @@ namespace llvm { /// information parsing. The actual data is supplied through pure virtual /// methods that a concrete implementation provides. class DWARFContext : public DIContext { - SmallVector CUs; + SmallVector CUs; + SmallVector TUs; OwningPtr Abbrev; + OwningPtr Loc; OwningPtr Aranges; OwningPtr Line; OwningPtr DebugFrame; - SmallVector DWOCUs; + SmallVector DWOCUs; OwningPtr AbbrevDWO; DWARFContext(DWARFContext &) LLVM_DELETED_FUNCTION; @@ -41,12 +45,26 @@ class DWARFContext : public DIContext { /// Read compile units from the debug_info section and store them in CUs. void parseCompileUnits(); + /// Read type units from the debug_types sections and store them in CUs. + void parseTypeUnits(); + /// Read compile units from the debug_info.dwo section and store them in /// DWOCUs. void parseDWOCompileUnits(); public: - DWARFContext() {} + struct Section { + StringRef Data; + RelocAddrMap Relocs; + }; + + DWARFContext() : DIContext(CK_DWARF) {} + virtual ~DWARFContext(); + + static bool classof(const DIContext *DICtx) { + return DICtx->getKind() == CK_DWARF; + } + virtual void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All); /// Get the number of compile units in this context. @@ -56,6 +74,13 @@ public: return CUs.size(); } + /// Get the number of compile units in this context. + unsigned getNumTypeUnits() { + if (TUs.empty()) + parseTypeUnits(); + return TUs.size(); + } + /// Get the number of compile units in the DWO context. unsigned getNumDWOCompileUnits() { if (DWOCUs.empty()) @@ -67,19 +92,29 @@ public: DWARFCompileUnit *getCompileUnitAtIndex(unsigned index) { if (CUs.empty()) parseCompileUnits(); - return &CUs[index]; + return CUs[index]; + } + + /// Get the type unit at the specified index for this compile unit. + DWARFTypeUnit *getTypeUnitAtIndex(unsigned index) { + if (TUs.empty()) + parseTypeUnits(); + return TUs[index]; } /// Get the compile unit at the specified index for the DWO compile units. DWARFCompileUnit *getDWOCompileUnitAtIndex(unsigned index) { if (DWOCUs.empty()) parseDWOCompileUnits(); - return &DWOCUs[index]; + return DWOCUs[index]; } /// Get a pointer to the parsed DebugAbbrev object. const DWARFDebugAbbrev *getDebugAbbrev(); + /// Get a pointer to the parsed DebugLoc object. + const DWARFDebugLoc *getDebugLoc(); + /// Get a pointer to the parsed dwo abbreviations object. const DWARFDebugAbbrev *getDebugAbbrevDWO(); @@ -102,28 +137,30 @@ public: virtual bool isLittleEndian() const = 0; virtual uint8_t getAddressSize() const = 0; - virtual const RelocAddrMap &infoRelocMap() const = 0; - virtual const RelocAddrMap &lineRelocMap() const = 0; - virtual StringRef getInfoSection() = 0; + virtual const Section &getInfoSection() = 0; + virtual const std::map &getTypesSections() = 0; virtual StringRef getAbbrevSection() = 0; + virtual const Section &getLocSection() = 0; virtual StringRef getARangeSection() = 0; virtual StringRef getDebugFrameSection() = 0; - virtual StringRef getLineSection() = 0; + virtual const Section &getLineSection() = 0; virtual StringRef getStringSection() = 0; virtual StringRef getRangeSection() = 0; virtual StringRef getPubNamesSection() = 0; + virtual StringRef getPubTypesSection() = 0; + virtual StringRef getGnuPubNamesSection() = 0; + virtual StringRef getGnuPubTypesSection() = 0; // Sections for DWARF5 split dwarf proposal. - virtual StringRef getInfoDWOSection() = 0; + virtual const Section &getInfoDWOSection() = 0; virtual StringRef getAbbrevDWOSection() = 0; virtual StringRef getStringDWOSection() = 0; virtual StringRef getStringOffsetDWOSection() = 0; virtual StringRef getRangeDWOSection() = 0; virtual StringRef getAddrSection() = 0; - virtual const RelocAddrMap &infoDWORelocMap() const = 0; static bool isSupportedVersion(unsigned version) { - return version == 2 || version == 3; + return version == 2 || version == 3 || version == 4; } private: /// Return the compile unit that includes an offset (relative to .debug_info). @@ -141,20 +178,22 @@ class DWARFContextInMemory : public DWARFContext { virtual void anchor(); bool IsLittleEndian; uint8_t AddressSize; - RelocAddrMap InfoRelocMap; - RelocAddrMap LineRelocMap; - StringRef InfoSection; + Section InfoSection; + std::map TypesSections; StringRef AbbrevSection; + Section LocSection; StringRef ARangeSection; StringRef DebugFrameSection; - StringRef LineSection; + Section LineSection; StringRef StringSection; StringRef RangeSection; StringRef PubNamesSection; + StringRef PubTypesSection; + StringRef GnuPubNamesSection; + StringRef GnuPubTypesSection; // Sections for DWARF5 split dwarf proposal. - RelocAddrMap InfoDWORelocMap; - StringRef InfoDWOSection; + Section InfoDWOSection; StringRef AbbrevDWOSection; StringRef StringDWOSection; StringRef StringOffsetDWOSection; @@ -168,19 +207,24 @@ public: ~DWARFContextInMemory(); virtual bool isLittleEndian() const { return IsLittleEndian; } virtual uint8_t getAddressSize() const { return AddressSize; } - virtual const RelocAddrMap &infoRelocMap() const { return InfoRelocMap; } - virtual const RelocAddrMap &lineRelocMap() const { return LineRelocMap; } - virtual StringRef getInfoSection() { return InfoSection; } + virtual const Section &getInfoSection() { return InfoSection; } + virtual const std::map &getTypesSections() { + return TypesSections; + } virtual StringRef getAbbrevSection() { return AbbrevSection; } + virtual const Section &getLocSection() { return LocSection; } virtual StringRef getARangeSection() { return ARangeSection; } virtual StringRef getDebugFrameSection() { return DebugFrameSection; } - virtual StringRef getLineSection() { return LineSection; } + virtual const Section &getLineSection() { return LineSection; } virtual StringRef getStringSection() { return StringSection; } virtual StringRef getRangeSection() { return RangeSection; } virtual StringRef getPubNamesSection() { return PubNamesSection; } + virtual StringRef getPubTypesSection() { return PubTypesSection; } + virtual StringRef getGnuPubNamesSection() { return GnuPubNamesSection; } + virtual StringRef getGnuPubTypesSection() { return GnuPubTypesSection; } // Sections for DWARF5 split dwarf proposal. - virtual StringRef getInfoDWOSection() { return InfoDWOSection; } + virtual const Section &getInfoDWOSection() { return InfoDWOSection; } virtual StringRef getAbbrevDWOSection() { return AbbrevDWOSection; } virtual StringRef getStringDWOSection() { return StringDWOSection; } virtual StringRef getStringOffsetDWOSection() { @@ -190,9 +234,6 @@ public: virtual StringRef getAddrSection() { return AddrSection; } - virtual const RelocAddrMap &infoDWORelocMap() const { - return InfoDWORelocMap; - } }; } diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp index 7dff9ff..229376e 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.cpp @@ -20,32 +20,6 @@ void DWARFDebugArangeSet::clear() { ArangeDescriptors.clear(); } -void DWARFDebugArangeSet::compact() { - if (ArangeDescriptors.empty()) - return; - - // Iterate through all arange descriptors and combine any ranges that - // overlap or have matching boundaries. The ArangeDescriptors are assumed - // to be in ascending order. - uint32_t i = 0; - while (i + 1 < ArangeDescriptors.size()) { - if (ArangeDescriptors[i].getEndAddress() >= ArangeDescriptors[i+1].Address){ - // The current range ends at or exceeds the start of the next address - // range. Compute the max end address between the two and use that to - // make the new length. - const uint64_t max_end_addr = - std::max(ArangeDescriptors[i].getEndAddress(), - ArangeDescriptors[i+1].getEndAddress()); - ArangeDescriptors[i].Length = max_end_addr - ArangeDescriptors[i].Address; - // Now remove the next entry as it was just combined with the previous one - ArangeDescriptors.erase(ArangeDescriptors.begin()+i+1); - } else { - // Discontiguous address range, just proceed to the next one. - ++i; - } - } -} - bool DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { if (data.isValidOffset(*offset_ptr)) { @@ -126,26 +100,3 @@ void DWARFDebugArangeSet::dump(raw_ostream &OS) const { << format(" 0x%*.*" PRIx64 ")\n", hex_width, hex_width, pos->getEndAddress()); } - - -namespace { - class DescriptorContainsAddress { - const uint64_t Address; - public: - DescriptorContainsAddress(uint64_t address) : Address(address) {} - bool operator()(const DWARFDebugArangeSet::Descriptor &desc) const { - return Address >= desc.Address && Address < (desc.Address + desc.Length); - } - }; -} - -uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const { - DescriptorConstIter end = ArangeDescriptors.end(); - DescriptorConstIter pos = - std::find_if(ArangeDescriptors.begin(), end, // Range - DescriptorContainsAddress(address)); // Predicate - if (pos != end) - return HeaderData.CuOffset; - - return -1U; -} diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h index d768676..49a7132 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugArangeSet.h @@ -44,7 +44,6 @@ public: private: typedef std::vector DescriptorColl; - typedef DescriptorColl::iterator DescriptorIter; typedef DescriptorColl::const_iterator DescriptorConstIter; uint32_t Offset; @@ -54,15 +53,11 @@ private: public: DWARFDebugArangeSet() { clear(); } void clear(); - void compact(); bool extract(DataExtractor data, uint32_t *offset_ptr); void dump(raw_ostream &OS) const; uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; } - uint32_t getOffsetOfNextEntry() const { return Offset + HeaderData.Length + 4; } - uint32_t findAddress(uint64_t address) const; uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); } - const struct Header &getHeader() const { return HeaderData; } const Descriptor *getDescriptor(uint32_t i) const { if (i < ArangeDescriptors.size()) return &ArangeDescriptors[i]; diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp index f79862d..591d4bd 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.cpp @@ -16,128 +16,79 @@ #include using namespace llvm; -// Compare function DWARFDebugAranges::Range structures -static bool RangeLessThan(const DWARFDebugAranges::Range &range1, - const DWARFDebugAranges::Range &range2) { - return range1.LoPC < range2.LoPC; -} - -namespace { - class CountArangeDescriptors { - public: - CountArangeDescriptors(uint32_t &count_ref) : Count(count_ref) {} - void operator()(const DWARFDebugArangeSet &Set) { - Count += Set.getNumDescriptors(); - } - uint32_t &Count; - }; - - class AddArangeDescriptors { - public: - AddArangeDescriptors(DWARFDebugAranges::RangeColl &Ranges, - DWARFDebugAranges::ParsedCUOffsetColl &CUOffsets) - : RangeCollection(Ranges), - CUOffsetCollection(CUOffsets) {} - void operator()(const DWARFDebugArangeSet &Set) { - DWARFDebugAranges::Range Range; - Range.Offset = Set.getCompileUnitDIEOffset(); - CUOffsetCollection.insert(Range.Offset); - - for (uint32_t i = 0, n = Set.getNumDescriptors(); i < n; ++i) { - const DWARFDebugArangeSet::Descriptor *ArangeDescPtr = - Set.getDescriptor(i); - Range.LoPC = ArangeDescPtr->Address; - Range.Length = ArangeDescPtr->Length; - - // Insert each item in increasing address order so binary searching - // can later be done! - DWARFDebugAranges::RangeColl::iterator InsertPos = - std::lower_bound(RangeCollection.begin(), RangeCollection.end(), - Range, RangeLessThan); - RangeCollection.insert(InsertPos, Range); - } - - } - DWARFDebugAranges::RangeColl &RangeCollection; - DWARFDebugAranges::ParsedCUOffsetColl &CUOffsetCollection; - }; -} - -bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) { - if (debug_aranges_data.isValidOffset(0)) { - uint32_t offset = 0; - - typedef std::vector SetCollection; - SetCollection sets; - - DWARFDebugArangeSet set; - Range range; - while (set.extract(debug_aranges_data, &offset)) - sets.push_back(set); - - uint32_t count = 0; - - std::for_each(sets.begin(), sets.end(), CountArangeDescriptors(count)); - - if (count > 0) { - Aranges.reserve(count); - AddArangeDescriptors range_adder(Aranges, ParsedCUOffsets); - std::for_each(sets.begin(), sets.end(), range_adder); - } +void DWARFDebugAranges::extract(DataExtractor DebugArangesData) { + if (!DebugArangesData.isValidOffset(0)) + return; + uint32_t Offset = 0; + typedef std::vector RangeSetColl; + RangeSetColl Sets; + DWARFDebugArangeSet Set; + uint32_t TotalRanges = 0; + + while (Set.extract(DebugArangesData, &Offset)) { + Sets.push_back(Set); + TotalRanges += Set.getNumDescriptors(); } - return false; -} + if (TotalRanges == 0) + return; -bool DWARFDebugAranges::generate(DWARFContext *ctx) { - if (ctx) { - const uint32_t num_compile_units = ctx->getNumCompileUnits(); - for (uint32_t cu_idx = 0; cu_idx < num_compile_units; ++cu_idx) { - if (DWARFCompileUnit *cu = ctx->getCompileUnitAtIndex(cu_idx)) { - uint32_t CUOffset = cu->getOffset(); - if (ParsedCUOffsets.insert(CUOffset).second) - cu->buildAddressRangeTable(this, true); - } + Aranges.reserve(TotalRanges); + for (RangeSetColl::const_iterator I = Sets.begin(), E = Sets.end(); I != E; + ++I) { + uint32_t CUOffset = I->getCompileUnitDIEOffset(); + + for (uint32_t i = 0, n = I->getNumDescriptors(); i < n; ++i) { + const DWARFDebugArangeSet::Descriptor *ArangeDescPtr = + I->getDescriptor(i); + uint64_t LowPC = ArangeDescPtr->Address; + uint64_t HighPC = LowPC + ArangeDescPtr->Length; + appendRange(CUOffset, LowPC, HighPC); } } - sort(true, /* overlap size */ 0); - return !isEmpty(); } -void DWARFDebugAranges::dump(raw_ostream &OS) const { - const uint32_t num_ranges = getNumRanges(); - for (uint32_t i = 0; i < num_ranges; ++i) { - const Range &range = Aranges[i]; - OS << format("0x%8.8x: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n", - range.Offset, (uint64_t)range.LoPC, (uint64_t)range.HiPC()); +void DWARFDebugAranges::generate(DWARFContext *CTX) { + clear(); + if (!CTX) + return; + + // Extract aranges from .debug_aranges section. + DataExtractor ArangesData(CTX->getARangeSection(), CTX->isLittleEndian(), 0); + extract(ArangesData); + + // Generate aranges from DIEs: even if .debug_aranges section is present, + // it may describe only a small subset of compilation units, so we need to + // manually build aranges for the rest of them. + for (uint32_t i = 0, n = CTX->getNumCompileUnits(); i < n; ++i) { + if (DWARFCompileUnit *CU = CTX->getCompileUnitAtIndex(i)) { + uint32_t CUOffset = CU->getOffset(); + if (ParsedCUOffsets.insert(CUOffset).second) + CU->buildAddressRangeTable(this, true, CUOffset); + } } -} -void DWARFDebugAranges::Range::dump(raw_ostream &OS) const { - OS << format("{0x%8.8x}: [0x%8.8" PRIx64 " - 0x%8.8" PRIx64 ")\n", - Offset, LoPC, HiPC()); + sortAndMinimize(); } -void DWARFDebugAranges::appendRange(uint32_t offset, uint64_t low_pc, - uint64_t high_pc) { +void DWARFDebugAranges::appendRange(uint32_t CUOffset, uint64_t LowPC, + uint64_t HighPC) { if (!Aranges.empty()) { - if (Aranges.back().Offset == offset && Aranges.back().HiPC() == low_pc) { - Aranges.back().setHiPC(high_pc); + if (Aranges.back().CUOffset == CUOffset && + Aranges.back().HighPC() == LowPC) { + Aranges.back().setHighPC(HighPC); return; } } - Aranges.push_back(Range(low_pc, high_pc, offset)); + Aranges.push_back(Range(LowPC, HighPC, CUOffset)); } -void DWARFDebugAranges::sort(bool minimize, uint32_t n) { +void DWARFDebugAranges::sortAndMinimize() { const size_t orig_arange_size = Aranges.size(); // Size of one? If so, no sorting is needed if (orig_arange_size <= 1) return; // Sort our address range entries - std::stable_sort(Aranges.begin(), Aranges.end(), RangeLessThan); - - if (!minimize) - return; + std::stable_sort(Aranges.begin(), Aranges.end()); // Most address ranges are contiguous from function to function // so our new ranges will likely be smaller. We calculate the size @@ -151,7 +102,7 @@ void DWARFDebugAranges::sort(bool minimize, uint32_t n) { // copy the new minimal stuff over to the new collection. size_t minimal_size = 1; for (size_t i = 1; i < orig_arange_size; ++i) { - if (!Range::SortedOverlapCheck(Aranges[i-1], Aranges[i], n)) + if (!Range::SortedOverlapCheck(Aranges[i-1], Aranges[i])) ++minimal_size; } @@ -166,14 +117,14 @@ void DWARFDebugAranges::sort(bool minimize, uint32_t n) { uint32_t j = 0; minimal_aranges[j] = Aranges[0]; for (size_t i = 1; i < orig_arange_size; ++i) { - if(Range::SortedOverlapCheck (minimal_aranges[j], Aranges[i], n)) { - minimal_aranges[j].setHiPC (Aranges[i].HiPC()); + if (Range::SortedOverlapCheck(minimal_aranges[j], Aranges[i])) { + minimal_aranges[j].setHighPC(Aranges[i].HighPC()); } else { // Only increment j if we aren't merging minimal_aranges[++j] = Aranges[i]; } } - assert (j+1 == minimal_size); + assert(j+1 == minimal_size); // Now swap our new minimal aranges into place. The local // minimal_aranges will then contian the old big collection @@ -181,50 +132,21 @@ void DWARFDebugAranges::sort(bool minimize, uint32_t n) { minimal_aranges.swap(Aranges); } -uint32_t DWARFDebugAranges::findAddress(uint64_t address) const { +uint32_t DWARFDebugAranges::findAddress(uint64_t Address) const { if (!Aranges.empty()) { - Range range(address); + Range range(Address); RangeCollIterator begin = Aranges.begin(); RangeCollIterator end = Aranges.end(); - RangeCollIterator pos = std::lower_bound(begin, end, range, RangeLessThan); + RangeCollIterator pos = + std::lower_bound(begin, end, range); - if (pos != end && pos->LoPC <= address && address < pos->HiPC()) { - return pos->Offset; + if (pos != end && pos->containsAddress(Address)) { + return pos->CUOffset; } else if (pos != begin) { --pos; - if (pos->LoPC <= address && address < pos->HiPC()) - return (*pos).Offset; + if (pos->containsAddress(Address)) + return pos->CUOffset; } } return -1U; } - -bool -DWARFDebugAranges::allRangesAreContiguous(uint64_t &LoPC, uint64_t &HiPC) const{ - if (Aranges.empty()) - return false; - - uint64_t next_addr = 0; - RangeCollIterator begin = Aranges.begin(); - for (RangeCollIterator pos = begin, end = Aranges.end(); pos != end; - ++pos) { - if (pos != begin && pos->LoPC != next_addr) - return false; - next_addr = pos->HiPC(); - } - // We checked for empty at the start of function so front() will be valid. - LoPC = Aranges.front().LoPC; - // We checked for empty at the start of function so back() will be valid. - HiPC = Aranges.back().HiPC(); - return true; -} - -bool DWARFDebugAranges::getMaxRange(uint64_t &LoPC, uint64_t &HiPC) const { - if (Aranges.empty()) - return false; - // We checked for empty at the start of function so front() will be valid. - LoPC = Aranges.front().LoPC; - // We checked for empty at the start of function so back() will be valid. - HiPC = Aranges.back().HiPC(); - return true; -} diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h index 1509ffa..35ad8e5 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugAranges.h @@ -20,81 +20,61 @@ class DWARFContext; class DWARFDebugAranges { public: + void clear() { + Aranges.clear(); + ParsedCUOffsets.clear(); + } + + void generate(DWARFContext *CTX); + + // Use appendRange multiple times and then call sortAndMinimize. + void appendRange(uint32_t CUOffset, uint64_t LowPC, uint64_t HighPC); + + uint32_t findAddress(uint64_t Address) const; + +private: + void extract(DataExtractor DebugArangesData); + void sortAndMinimize(); + struct Range { - explicit Range(uint64_t lo = -1ULL, uint64_t hi = -1ULL, - uint32_t off = -1U) - : LoPC(lo), Length(hi-lo), Offset(off) {} - - void clear() { - LoPC = -1ULL; - Length = 0; - Offset = -1U; - } + explicit Range(uint64_t LowPC = -1ULL, uint64_t HighPC = -1ULL, + uint32_t CUOffset = -1U) + : LowPC(LowPC), Length(HighPC - LowPC), CUOffset(CUOffset) {} - void setHiPC(uint64_t HiPC) { - if (HiPC == -1ULL || HiPC <= LoPC) + void setHighPC(uint64_t HighPC) { + if (HighPC == -1ULL || HighPC <= LowPC) Length = 0; else - Length = HiPC - LoPC; + Length = HighPC - LowPC; } - uint64_t HiPC() const { + uint64_t HighPC() const { if (Length) - return LoPC + Length; + return LowPC + Length; return -1ULL; } - bool isValidRange() const { return Length > 0; } + bool containsAddress(uint64_t Address) const { + return LowPC <= Address && Address < HighPC(); + } - static bool SortedOverlapCheck(const Range &curr_range, - const Range &next_range, uint32_t n) { - if (curr_range.Offset != next_range.Offset) - return false; - return curr_range.HiPC() + n >= next_range.LoPC; + bool operator <(const Range &other) const { + return LowPC < other.LowPC; } - bool contains(const Range &range) const { - return LoPC <= range.LoPC && range.HiPC() <= HiPC(); + static bool SortedOverlapCheck(const Range &Left, const Range &Right) { + if (Left.CUOffset != Right.CUOffset) + return false; + return Left.HighPC() >= Right.LowPC; } - void dump(raw_ostream &OS) const; - uint64_t LoPC; // Start of address range - uint32_t Length; // End of address range (not including this address) - uint32_t Offset; // Offset of the compile unit or die + uint64_t LowPC; // Start of address range. + uint32_t Length; // End of address range (not including this address). + uint32_t CUOffset; // Offset of the compile unit or die. }; - void clear() { - Aranges.clear(); - ParsedCUOffsets.clear(); - } - bool allRangesAreContiguous(uint64_t& LoPC, uint64_t& HiPC) const; - bool getMaxRange(uint64_t& LoPC, uint64_t& HiPC) const; - bool extract(DataExtractor debug_aranges_data); - bool generate(DWARFContext *ctx); - - // Use append range multiple times and then call sort - void appendRange(uint32_t cu_offset, uint64_t low_pc, uint64_t high_pc); - void sort(bool minimize, uint32_t n); - - const Range *rangeAtIndex(uint32_t idx) const { - if (idx < Aranges.size()) - return &Aranges[idx]; - return NULL; - } - void dump(raw_ostream &OS) const; - uint32_t findAddress(uint64_t address) const; - bool isEmpty() const { return Aranges.empty(); } - uint32_t getNumRanges() const { return Aranges.size(); } - - uint32_t offsetAtIndex(uint32_t idx) const { - if (idx < Aranges.size()) - return Aranges[idx].Offset; - return -1U; - } - typedef std::vector RangeColl; typedef RangeColl::const_iterator RangeCollIterator; typedef DenseSet ParsedCUOffsetColl; -private: RangeColl Aranges; ParsedCUOffsetColl ParsedCUOffsets; }; diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp index 10be7b4..babfd2e 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -19,11 +19,10 @@ using namespace llvm; using namespace dwarf; -void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, - const DWARFCompileUnit *cu, +void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, const DWARFUnit *u, unsigned recurseDepth, unsigned indent) const { - DataExtractor debug_info_data = cu->getDebugInfoExtractor(); + DataExtractor debug_info_data = u->getDebugInfoExtractor(); uint32_t offset = Offset; if (debug_info_data.isValidOffset(offset)) { @@ -45,13 +44,13 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, for (uint32_t i = 0; i != numAttributes; ++i) { uint16_t attr = AbbrevDecl->getAttrByIndex(i); uint16_t form = AbbrevDecl->getFormByIndex(i); - dumpAttribute(OS, cu, &offset, attr, form, indent); + dumpAttribute(OS, u, &offset, attr, form, indent); } const DWARFDebugInfoEntryMinimal *child = getFirstChild(); if (recurseDepth > 0 && child) { while (child) { - child->dump(OS, cu, recurseDepth-1, indent+2); + child->dump(OS, u, recurseDepth-1, indent+2); child = child->getSibling(); } } @@ -66,12 +65,11 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, } void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS, - const DWARFCompileUnit *cu, - uint32_t* offset_ptr, - uint16_t attr, - uint16_t form, + const DWARFUnit *u, + uint32_t *offset_ptr, + uint16_t attr, uint16_t form, unsigned indent) const { - OS << format("0x%8.8x: ", *offset_ptr); + OS << " "; OS.indent(indent+2); const char *attrString = AttributeString(attr); if (attrString) @@ -86,57 +84,20 @@ void DWARFDebugInfoEntryMinimal::dumpAttribute(raw_ostream &OS, DWARFFormValue formValue(form); - if (!formValue.extractValue(cu->getDebugInfoExtractor(), offset_ptr, cu)) + if (!formValue.extractValue(u->getDebugInfoExtractor(), offset_ptr, u)) return; OS << "\t("; - formValue.dump(OS, cu); + formValue.dump(OS, u); OS << ")\n"; } -bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *CU, - const uint8_t *FixedFormSizes, +bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFUnit *U, uint32_t *OffsetPtr) { Offset = *OffsetPtr; - DataExtractor DebugInfoData = CU->getDebugInfoExtractor(); - uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr); - if (0 == AbbrCode) { - // NULL debug tag entry. - AbbrevDecl = NULL; - return true; - } - AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode); - assert(AbbrevDecl); - assert(FixedFormSizes); // For best performance this should be specified! - - // Skip all data in the .debug_info for the attributes - for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) { - uint16_t Form = AbbrevDecl->getFormByIndex(i); - - // FIXME: Currently we're checking if this is less than the last - // entry in the fixed_form_sizes table, but this should be changed - // to use dynamic dispatch. - uint8_t FixedFormSize = - (Form < DW_FORM_ref_sig8) ? FixedFormSizes[Form] : 0; - if (FixedFormSize) - *OffsetPtr += FixedFormSize; - else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, - CU)) { - // Restore the original offset. - *OffsetPtr = Offset; - return false; - } - } - return true; -} - -bool -DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *CU, - uint32_t *OffsetPtr) { - DataExtractor DebugInfoData = CU->getDebugInfoExtractor(); - const uint32_t CUEndOffset = CU->getNextCompileUnitOffset(); - Offset = *OffsetPtr; - if ((Offset >= CUEndOffset) || !DebugInfoData.isValidOffset(Offset)) + DataExtractor DebugInfoData = U->getDebugInfoExtractor(); + uint32_t UEndOffset = U->getNextUnitOffset(); + if (Offset >= UEndOffset || !DebugInfoData.isValidOffset(Offset)) return false; uint64_t AbbrCode = DebugInfoData.getULEB128(OffsetPtr); if (0 == AbbrCode) { @@ -144,31 +105,25 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *CU, AbbrevDecl = NULL; return true; } - AbbrevDecl = CU->getAbbreviations()->getAbbreviationDeclaration(AbbrCode); + AbbrevDecl = U->getAbbreviations()->getAbbreviationDeclaration(AbbrCode); if (0 == AbbrevDecl) { // Restore the original offset. *OffsetPtr = Offset; return false; } - bool IsCompileUnitTag = (AbbrevDecl->getTag() == DW_TAG_compile_unit); - if (IsCompileUnitTag) - const_cast(CU)->setBaseAddress(0); + ArrayRef FixedFormSizes = DWARFFormValue::getFixedFormSizes( + U->getAddressByteSize(), U->getVersion()); + assert(FixedFormSizes.size() > 0); // Skip all data in the .debug_info for the attributes for (uint32_t i = 0, n = AbbrevDecl->getNumAttributes(); i < n; ++i) { - uint16_t Attr = AbbrevDecl->getAttrByIndex(i); uint16_t Form = AbbrevDecl->getFormByIndex(i); - if (IsCompileUnitTag && - ((Attr == DW_AT_entry_pc) || (Attr == DW_AT_low_pc))) { - DWARFFormValue FormValue(Form); - if (FormValue.extractValue(DebugInfoData, OffsetPtr, CU)) { - if (Attr == DW_AT_low_pc || Attr == DW_AT_entry_pc) - const_cast(CU) - ->setBaseAddress(FormValue.getUnsigned()); - } - } else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, - CU)) { + uint8_t FixedFormSize = + (Form < FixedFormSizes.size()) ? FixedFormSizes[Form] : 0; + if (FixedFormSize) + *OffsetPtr += FixedFormSize; + else if (!DWARFFormValue::skipValue(Form, DebugInfoData, OffsetPtr, U)) { // Restore the original offset. *OffsetPtr = Offset; return false; @@ -187,203 +142,191 @@ bool DWARFDebugInfoEntryMinimal::isSubroutineDIE() const { Tag == DW_TAG_inlined_subroutine; } -uint32_t -DWARFDebugInfoEntryMinimal::getAttributeValue(const DWARFCompileUnit *cu, - const uint16_t attr, - DWARFFormValue &form_value, - uint32_t *end_attr_offset_ptr) - const { - if (AbbrevDecl) { - uint32_t attr_idx = AbbrevDecl->findAttributeIndex(attr); - - if (attr_idx != -1U) { - uint32_t offset = getOffset(); +bool DWARFDebugInfoEntryMinimal::getAttributeValue( + const DWARFUnit *U, const uint16_t Attr, DWARFFormValue &FormValue) const { + if (!AbbrevDecl) + return false; - DataExtractor debug_info_data = cu->getDebugInfoExtractor(); + uint32_t AttrIdx = AbbrevDecl->findAttributeIndex(Attr); + if (AttrIdx == -1U) + return false; - // Skip the abbreviation code so we are at the data for the attributes - debug_info_data.getULEB128(&offset); + DataExtractor DebugInfoData = U->getDebugInfoExtractor(); + uint32_t DebugInfoOffset = getOffset(); - uint32_t idx = 0; - while (idx < attr_idx) - DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(idx++), - debug_info_data, &offset, cu); + // Skip the abbreviation code so we are at the data for the attributes + DebugInfoData.getULEB128(&DebugInfoOffset); - const uint32_t attr_offset = offset; - form_value = DWARFFormValue(AbbrevDecl->getFormByIndex(idx)); - if (form_value.extractValue(debug_info_data, &offset, cu)) { - if (end_attr_offset_ptr) - *end_attr_offset_ptr = offset; - return attr_offset; - } - } + // Skip preceding attribute values. + for (uint32_t i = 0; i < AttrIdx; ++i) { + DWARFFormValue::skipValue(AbbrevDecl->getFormByIndex(i), + DebugInfoData, &DebugInfoOffset, U); } - return 0; + FormValue = DWARFFormValue(AbbrevDecl->getFormByIndex(AttrIdx)); + return FormValue.extractValue(DebugInfoData, &DebugInfoOffset, U); } -const char* -DWARFDebugInfoEntryMinimal::getAttributeValueAsString( - const DWARFCompileUnit* cu, - const uint16_t attr, - const char* fail_value) - const { - DWARFFormValue form_value; - if (getAttributeValue(cu, attr, form_value)) { - DataExtractor stringExtractor(cu->getStringSection(), false, 0); - return form_value.getAsCString(&stringExtractor); - } - return fail_value; +const char *DWARFDebugInfoEntryMinimal::getAttributeValueAsString( + const DWARFUnit *U, const uint16_t Attr, const char *FailValue) const { + DWARFFormValue FormValue; + if (!getAttributeValue(U, Attr, FormValue)) + return FailValue; + Optional Result = FormValue.getAsCString(U); + return Result.hasValue() ? Result.getValue() : FailValue; +} + +uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsAddress( + const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const { + DWARFFormValue FormValue; + if (!getAttributeValue(U, Attr, FormValue)) + return FailValue; + Optional Result = FormValue.getAsAddress(U); + return Result.hasValue() ? Result.getValue() : FailValue; } -uint64_t -DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsigned( - const DWARFCompileUnit* cu, - const uint16_t attr, - uint64_t fail_value) const { - DWARFFormValue form_value; - if (getAttributeValue(cu, attr, form_value)) - return form_value.getUnsigned(); - return fail_value; +uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsUnsignedConstant( + const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const { + DWARFFormValue FormValue; + if (!getAttributeValue(U, Attr, FormValue)) + return FailValue; + Optional Result = FormValue.getAsUnsignedConstant(); + return Result.hasValue() ? Result.getValue() : FailValue; } -int64_t -DWARFDebugInfoEntryMinimal::getAttributeValueAsSigned( - const DWARFCompileUnit* cu, - const uint16_t attr, - int64_t fail_value) const { - DWARFFormValue form_value; - if (getAttributeValue(cu, attr, form_value)) - return form_value.getSigned(); - return fail_value; +uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsReference( + const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const { + DWARFFormValue FormValue; + if (!getAttributeValue(U, Attr, FormValue)) + return FailValue; + Optional Result = FormValue.getAsReference(U); + return Result.hasValue() ? Result.getValue() : FailValue; } -uint64_t -DWARFDebugInfoEntryMinimal::getAttributeValueAsReference( - const DWARFCompileUnit* cu, - const uint16_t attr, - uint64_t fail_value) - const { - DWARFFormValue form_value; - if (getAttributeValue(cu, attr, form_value)) - return form_value.getReference(cu); - return fail_value; +uint64_t DWARFDebugInfoEntryMinimal::getAttributeValueAsSectionOffset( + const DWARFUnit *U, const uint16_t Attr, uint64_t FailValue) const { + DWARFFormValue FormValue; + if (!getAttributeValue(U, Attr, FormValue)) + return FailValue; + Optional Result = FormValue.getAsSectionOffset(); + return Result.hasValue() ? Result.getValue() : FailValue; } -bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFCompileUnit *CU, +bool DWARFDebugInfoEntryMinimal::getLowAndHighPC(const DWARFUnit *U, uint64_t &LowPC, uint64_t &HighPC) const { - HighPC = -1ULL; - LowPC = getAttributeValueAsUnsigned(CU, DW_AT_low_pc, -1ULL); - if (LowPC != -1ULL) - HighPC = getAttributeValueAsUnsigned(CU, DW_AT_high_pc, -1ULL); + LowPC = getAttributeValueAsAddress(U, DW_AT_low_pc, -1ULL); + if (LowPC == -1ULL) + return false; + HighPC = getAttributeValueAsAddress(U, DW_AT_high_pc, -1ULL); + if (HighPC == -1ULL) { + // Since DWARF4, DW_AT_high_pc may also be of class constant, in which case + // it represents function size. + HighPC = getAttributeValueAsUnsignedConstant(U, DW_AT_high_pc, -1ULL); + if (HighPC != -1ULL) + HighPC += LowPC; + } return (HighPC != -1ULL); } -void -DWARFDebugInfoEntryMinimal::buildAddressRangeTable(const DWARFCompileUnit *CU, - DWARFDebugAranges *DebugAranges) - const { +void DWARFDebugInfoEntryMinimal::buildAddressRangeTable( + const DWARFUnit *U, DWARFDebugAranges *DebugAranges, + uint32_t UOffsetInAranges) const { if (AbbrevDecl) { if (isSubprogramDIE()) { uint64_t LowPC, HighPC; - if (getLowAndHighPC(CU, LowPC, HighPC)) { - DebugAranges->appendRange(CU->getOffset(), LowPC, HighPC); - } + if (getLowAndHighPC(U, LowPC, HighPC)) + DebugAranges->appendRange(UOffsetInAranges, LowPC, HighPC); // FIXME: try to append ranges from .debug_ranges section. } - const DWARFDebugInfoEntryMinimal *child = getFirstChild(); - while (child) { - child->buildAddressRangeTable(CU, DebugAranges); - child = child->getSibling(); + const DWARFDebugInfoEntryMinimal *Child = getFirstChild(); + while (Child) { + Child->buildAddressRangeTable(U, DebugAranges, UOffsetInAranges); + Child = Child->getSibling(); } } } -bool -DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( - const DWARFCompileUnit *CU, - const uint64_t Address) - const { +bool DWARFDebugInfoEntryMinimal::addressRangeContainsAddress( + const DWARFUnit *U, const uint64_t Address) const { if (isNULL()) return false; uint64_t LowPC, HighPC; - if (getLowAndHighPC(CU, LowPC, HighPC)) + if (getLowAndHighPC(U, LowPC, HighPC)) return (LowPC <= Address && Address <= HighPC); // Try to get address ranges from .debug_ranges section. - uint32_t RangesOffset = getAttributeValueAsReference(CU, DW_AT_ranges, -1U); + uint32_t RangesOffset = + getAttributeValueAsSectionOffset(U, DW_AT_ranges, -1U); if (RangesOffset != -1U) { DWARFDebugRangeList RangeList; - if (CU->extractRangeList(RangesOffset, RangeList)) - return RangeList.containsAddress(CU->getBaseAddress(), Address); + if (U->extractRangeList(RangesOffset, RangeList)) + return RangeList.containsAddress(U->getBaseAddress(), Address); } return false; } -const char* -DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFCompileUnit *CU) - const { +const char * +DWARFDebugInfoEntryMinimal::getSubroutineName(const DWARFUnit *U) const { if (!isSubroutineDIE()) return 0; // Try to get mangled name if possible. if (const char *name = - getAttributeValueAsString(CU, DW_AT_MIPS_linkage_name, 0)) + getAttributeValueAsString(U, DW_AT_MIPS_linkage_name, 0)) return name; - if (const char *name = getAttributeValueAsString(CU, DW_AT_linkage_name, 0)) + if (const char *name = getAttributeValueAsString(U, DW_AT_linkage_name, 0)) return name; - if (const char *name = getAttributeValueAsString(CU, DW_AT_name, 0)) + if (const char *name = getAttributeValueAsString(U, DW_AT_name, 0)) return name; // Try to get name from specification DIE. uint32_t spec_ref = - getAttributeValueAsReference(CU, DW_AT_specification, -1U); + getAttributeValueAsReference(U, DW_AT_specification, -1U); if (spec_ref != -1U) { DWARFDebugInfoEntryMinimal spec_die; - if (spec_die.extract(CU, &spec_ref)) { - if (const char *name = spec_die.getSubroutineName(CU)) + if (spec_die.extractFast(U, &spec_ref)) { + if (const char *name = spec_die.getSubroutineName(U)) return name; } } // Try to get name from abstract origin DIE. uint32_t abs_origin_ref = - getAttributeValueAsReference(CU, DW_AT_abstract_origin, -1U); + getAttributeValueAsReference(U, DW_AT_abstract_origin, -1U); if (abs_origin_ref != -1U) { DWARFDebugInfoEntryMinimal abs_origin_die; - if (abs_origin_die.extract(CU, &abs_origin_ref)) { - if (const char *name = abs_origin_die.getSubroutineName(CU)) + if (abs_origin_die.extractFast(U, &abs_origin_ref)) { + if (const char *name = abs_origin_die.getSubroutineName(U)) return name; } } return 0; } -void DWARFDebugInfoEntryMinimal::getCallerFrame(const DWARFCompileUnit *CU, +void DWARFDebugInfoEntryMinimal::getCallerFrame(const DWARFUnit *U, uint32_t &CallFile, uint32_t &CallLine, uint32_t &CallColumn) const { - CallFile = getAttributeValueAsUnsigned(CU, DW_AT_call_file, 0); - CallLine = getAttributeValueAsUnsigned(CU, DW_AT_call_line, 0); - CallColumn = getAttributeValueAsUnsigned(CU, DW_AT_call_column, 0); + CallFile = getAttributeValueAsUnsignedConstant(U, DW_AT_call_file, 0); + CallLine = getAttributeValueAsUnsignedConstant(U, DW_AT_call_line, 0); + CallColumn = getAttributeValueAsUnsignedConstant(U, DW_AT_call_column, 0); } -DWARFDebugInfoEntryMinimal::InlinedChain +DWARFDebugInfoEntryInlinedChain DWARFDebugInfoEntryMinimal::getInlinedChainForAddress( - const DWARFCompileUnit *CU, - const uint64_t Address) - const { - DWARFDebugInfoEntryMinimal::InlinedChain InlinedChain; + const DWARFUnit *U, const uint64_t Address) const { + DWARFDebugInfoEntryInlinedChain InlinedChain; + InlinedChain.U = U; if (isNULL()) return InlinedChain; for (const DWARFDebugInfoEntryMinimal *DIE = this; DIE; ) { // Append current DIE to inlined chain only if it has correct tag // (e.g. it is not a lexical block). if (DIE->isSubroutineDIE()) { - InlinedChain.push_back(*DIE); + InlinedChain.DIEs.push_back(*DIE); } // Try to get child which also contains provided address. const DWARFDebugInfoEntryMinimal *Child = DIE->getFirstChild(); while (Child) { - if (Child->addressRangeContainsAddress(CU, Address)) { + if (Child->addressRangeContainsAddress(U, Address)) { // Assume there is only one such child. break; } @@ -392,6 +335,6 @@ DWARFDebugInfoEntryMinimal::getInlinedChainForAddress( DIE = Child; } // Reverse the obtained chain to make the root of inlined chain last. - std::reverse(InlinedChain.begin(), InlinedChain.end()); + std::reverse(InlinedChain.DIEs.begin(), InlinedChain.DIEs.end()); return InlinedChain; } diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h index 9003591..aa61056 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugInfoEntry.h @@ -18,9 +18,10 @@ namespace llvm { class DWARFDebugAranges; class DWARFCompileUnit; +class DWARFUnit; class DWARFContext; class DWARFFormValue; -class DWARFInlinedSubroutineChain; +struct DWARFDebugInfoEntryInlinedChain; /// DWARFDebugInfoEntryMinimal - A DIE with only the minimum required data. class DWARFDebugInfoEntryMinimal { @@ -39,23 +40,15 @@ public: DWARFDebugInfoEntryMinimal() : Offset(0), ParentIdx(0), SiblingIdx(0), AbbrevDecl(0) {} - void dump(raw_ostream &OS, const DWARFCompileUnit *cu, - unsigned recurseDepth, unsigned indent = 0) const; - void dumpAttribute(raw_ostream &OS, const DWARFCompileUnit *cu, - uint32_t *offset_ptr, uint16_t attr, uint16_t form, - unsigned indent = 0) const; + void dump(raw_ostream &OS, const DWARFUnit *u, unsigned recurseDepth, + unsigned indent = 0) const; + void dumpAttribute(raw_ostream &OS, const DWARFUnit *u, uint32_t *offset_ptr, + uint16_t attr, uint16_t form, unsigned indent = 0) const; - /// Extracts a debug info entry, which is a child of a given compile unit, + /// Extracts a debug info entry, which is a child of a given unit, /// starting at a given offset. If DIE can't be extracted, returns false and /// doesn't change OffsetPtr. - bool extractFast(const DWARFCompileUnit *CU, const uint8_t *FixedFormSizes, - uint32_t *OffsetPtr); - - /// Extract a debug info entry for a given compile unit from the - /// .debug_info and .debug_abbrev data starting at the given offset. - /// If compile unit can't be parsed, returns false and doesn't change - /// OffsetPtr. - bool extract(const DWARFCompileUnit *CU, uint32_t *OffsetPtr); + bool extractFast(const DWARFUnit *U, uint32_t *OffsetPtr); uint32_t getTag() const { return AbbrevDecl ? AbbrevDecl->getTag() : 0; } bool isNULL() const { return AbbrevDecl == 0; } @@ -120,60 +113,65 @@ public: return AbbrevDecl; } - uint32_t getAttributeValue(const DWARFCompileUnit *cu, - const uint16_t attr, DWARFFormValue &formValue, - uint32_t *end_attr_offset_ptr = 0) const; + bool getAttributeValue(const DWARFUnit *U, const uint16_t Attr, + DWARFFormValue &FormValue) const; + + const char *getAttributeValueAsString(const DWARFUnit *U, const uint16_t Attr, + const char *FailValue) const; - const char* getAttributeValueAsString(const DWARFCompileUnit* cu, - const uint16_t attr, - const char *fail_value) const; + uint64_t getAttributeValueAsAddress(const DWARFUnit *U, const uint16_t Attr, + uint64_t FailValue) const; - uint64_t getAttributeValueAsUnsigned(const DWARFCompileUnit *cu, - const uint16_t attr, - uint64_t fail_value) const; + uint64_t getAttributeValueAsUnsignedConstant(const DWARFUnit *U, + const uint16_t Attr, + uint64_t FailValue) const; - uint64_t getAttributeValueAsReference(const DWARFCompileUnit *cu, - const uint16_t attr, - uint64_t fail_value) const; + uint64_t getAttributeValueAsReference(const DWARFUnit *U, const uint16_t Attr, + uint64_t FailValue) const; - int64_t getAttributeValueAsSigned(const DWARFCompileUnit* cu, - const uint16_t attr, - int64_t fail_value) const; + uint64_t getAttributeValueAsSectionOffset(const DWARFUnit *U, + const uint16_t Attr, + uint64_t FailValue) const; /// Retrieves DW_AT_low_pc and DW_AT_high_pc from CU. /// Returns true if both attributes are present. - bool getLowAndHighPC(const DWARFCompileUnit *CU, - uint64_t &LowPC, uint64_t &HighPC) const; + bool getLowAndHighPC(const DWARFUnit *U, uint64_t &LowPC, + uint64_t &HighPC) const; - void buildAddressRangeTable(const DWARFCompileUnit *CU, - DWARFDebugAranges *DebugAranges) const; + void buildAddressRangeTable(const DWARFUnit *U, + DWARFDebugAranges *DebugAranges, + uint32_t CUOffsetInAranges) const; - bool addressRangeContainsAddress(const DWARFCompileUnit *CU, + bool addressRangeContainsAddress(const DWARFUnit *U, const uint64_t Address) const; /// If a DIE represents a subprogram (or inlined subroutine), /// returns its mangled name (or short name, if mangled is missing). /// This name may be fetched from specification or abstract origin /// for this subprogram. Returns null if no name is found. - const char* getSubroutineName(const DWARFCompileUnit *CU) const; + const char *getSubroutineName(const DWARFUnit *U) const; /// Retrieves values of DW_AT_call_file, DW_AT_call_line and /// DW_AT_call_column from DIE (or zeroes if they are missing). - void getCallerFrame(const DWARFCompileUnit *CU, uint32_t &CallFile, + void getCallerFrame(const DWARFUnit *U, uint32_t &CallFile, uint32_t &CallLine, uint32_t &CallColumn) const; - /// InlinedChain - represents a chain of inlined_subroutine - /// DIEs, (possibly ending with subprogram DIE), all of which are contained - /// in some concrete inlined instance tree. Address range for each DIE - /// (except the last DIE) in this chain is contained in address - /// range for next DIE in the chain. - typedef SmallVector InlinedChain; - /// Get inlined chain for a given address, rooted at the current DIE. /// Returns empty chain if address is not contained in address range /// of current DIE. - InlinedChain getInlinedChainForAddress(const DWARFCompileUnit *CU, - const uint64_t Address) const; + DWARFDebugInfoEntryInlinedChain + getInlinedChainForAddress(const DWARFUnit *U, const uint64_t Address) const; +}; + +/// DWARFDebugInfoEntryInlinedChain - represents a chain of inlined_subroutine +/// DIEs, (possibly ending with subprogram DIE), all of which are contained +/// in some concrete inlined instance tree. Address range for each DIE +/// (except the last DIE) in this chain is contained in address +/// range for next DIE in the chain. +struct DWARFDebugInfoEntryInlinedChain { + DWARFDebugInfoEntryInlinedChain() : U(0) {} + SmallVector DIEs; + const DWARFUnit *U; }; } diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp index 192381c..13d09dd 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugLine.cpp @@ -211,7 +211,7 @@ DWARFDebugLine::parsePrologue(DataExtractor debug_line_data, if (*offset_ptr != end_prologue_offset) { fprintf(stderr, "warning: parsing line table prologue at 0x%8.8x should" - " have ended at 0x%8.8x but it ended ad 0x%8.8x\n", + " have ended at 0x%8.8x but it ended at 0x%8.8x\n", prologue_offset, end_prologue_offset, *offset_ptr); return false; } diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugLoc.cpp b/contrib/llvm/lib/DebugInfo/DWARFDebugLoc.cpp new file mode 100644 index 0000000..3895ffa --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugLoc.cpp @@ -0,0 +1,74 @@ +//===-- DWARFDebugLoc.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFDebugLoc.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +void DWARFDebugLoc::dump(raw_ostream &OS) const { + for (LocationLists::const_iterator I = Locations.begin(), E = Locations.end(); I != E; ++I) { + OS << format("0x%8.8x: ", I->Offset); + const unsigned Indent = 12; + for (SmallVectorImpl::const_iterator I2 = I->Entries.begin(), E2 = I->Entries.end(); I2 != E2; ++I2) { + if (I2 != I->Entries.begin()) + OS.indent(Indent); + OS << "Beginning address offset: " << format("0x%016" PRIx64, I2->Begin) + << '\n'; + OS.indent(Indent) << " Ending address offset: " + << format("0x%016" PRIx64, I2->End) << '\n'; + OS.indent(Indent) << " Location description: "; + for (SmallVectorImpl::const_iterator I3 = I2->Loc.begin(), E3 = I2->Loc.end(); I3 != E3; ++I3) { + OS << format("%2.2x ", *I3); + } + OS << "\n\n"; + } + } +} + +void DWARFDebugLoc::parse(DataExtractor data, unsigned AddressSize) { + uint32_t Offset = 0; + while (data.isValidOffset(Offset)) { + Locations.resize(Locations.size() + 1); + LocationList &Loc = Locations.back(); + Loc.Offset = Offset; + // 2.6.2 Location Lists + // A location list entry consists of: + while (true) { + Entry E; + RelocAddrMap::const_iterator AI = RelocMap.find(Offset); + // 1. A beginning address offset. ... + E.Begin = data.getUnsigned(&Offset, AddressSize); + if (AI != RelocMap.end()) + E.Begin += AI->second.second; + + AI = RelocMap.find(Offset); + // 2. An ending address offset. ... + E.End = data.getUnsigned(&Offset, AddressSize); + if (AI != RelocMap.end()) + E.End += AI->second.second; + + // The end of any given location list is marked by an end of list entry, + // which consists of a 0 for the beginning address offset and a 0 for the + // ending address offset. + if (E.Begin == 0 && E.End == 0) + break; + + unsigned Bytes = data.getU16(&Offset); + // A single location description describing the location of the object... + StringRef str = data.getData().substr(Offset, Bytes); + Offset += Bytes; + E.Loc.reserve(str.size()); + std::copy(str.begin(), str.end(), std::back_inserter(E.Loc)); + Loc.Entries.push_back(llvm_move(E)); + } + } +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFDebugLoc.h b/contrib/llvm/lib/DebugInfo/DWARFDebugLoc.h new file mode 100644 index 0000000..d31aaaa --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFDebugLoc.h @@ -0,0 +1,60 @@ +//===-- DWARFDebugLoc.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGLOC_H +#define LLVM_DEBUGINFO_DWARFDEBUGLOC_H + +#include "DWARFRelocMap.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Support/DataExtractor.h" + +namespace llvm { + +class raw_ostream; + +class DWARFDebugLoc { + /// A single location within a location list. + struct Entry { + /// The beginning address of the instruction range. + uint64_t Begin; + /// The ending address of the instruction range. + uint64_t End; + /// The location of the variable within the specified range. + SmallVector Loc; + }; + + /// A list of locations that contain one variable. + struct LocationList { + /// The beginning offset where this location list is stored in the debug_loc + /// section. + unsigned Offset; + /// All the locations in which the variable is stored. + SmallVector Entries; + }; + + typedef SmallVector LocationLists; + + /// A list of all the variables in the debug_loc section, each one describing + /// the locations in which the variable is stored. + LocationLists Locations; + + /// A map used to resolve binary relocations. + const RelocAddrMap &RelocMap; + +public: + DWARFDebugLoc(const RelocAddrMap &LocRelocMap) : RelocMap(LocRelocMap) {} + /// Print the location lists found within the debug_loc section. + void dump(raw_ostream &OS) const; + /// Parse the debug_loc section accessible via the 'data' parameter using the + /// specified address size to interpret the address ranges. + void parse(DataExtractor data, unsigned AddressSize); +}; +} + +#endif diff --git a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp index c5583f9..da71fb3 100644 --- a/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp +++ b/contrib/llvm/lib/DebugInfo/DWARFFormValue.cpp @@ -10,6 +10,8 @@ #include "llvm/DebugInfo/DWARFFormValue.h" #include "DWARFCompileUnit.h" #include "DWARFContext.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" @@ -19,64 +21,114 @@ using namespace llvm; using namespace dwarf; namespace { -template struct FixedFormSizes { - static const uint8_t sizes[]; -}; +uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) { + // FIXME: Support DWARF64. + return (Version == 2) ? AddrSize : 4; } template -const uint8_t FixedFormSizes::sizes[] = { - 0, // 0x00 unused - AddrSize, // 0x01 DW_FORM_addr - 0, // 0x02 unused - 0, // 0x03 DW_FORM_block2 - 0, // 0x04 DW_FORM_block4 - 2, // 0x05 DW_FORM_data2 - 4, // 0x06 DW_FORM_data4 - 8, // 0x07 DW_FORM_data8 - 0, // 0x08 DW_FORM_string - 0, // 0x09 DW_FORM_block - 0, // 0x0a DW_FORM_block1 - 1, // 0x0b DW_FORM_data1 - 1, // 0x0c DW_FORM_flag - 0, // 0x0d DW_FORM_sdata - 4, // 0x0e DW_FORM_strp - 0, // 0x0f DW_FORM_udata - RefAddrSize, // 0x10 DW_FORM_ref_addr - 1, // 0x11 DW_FORM_ref1 - 2, // 0x12 DW_FORM_ref2 - 4, // 0x13 DW_FORM_ref4 - 8, // 0x14 DW_FORM_ref8 - 0, // 0x15 DW_FORM_ref_udata - 0, // 0x16 DW_FORM_indirect - 4, // 0x17 DW_FORM_sec_offset - 0, // 0x18 DW_FORM_exprloc - 0, // 0x19 DW_FORM_flag_present - 8, // 0x20 DW_FORM_ref_sig8 -}; - -static uint8_t getRefAddrSize(uint8_t AddrSize, uint16_t Version) { - // FIXME: Support DWARF64. - return (Version == 2) ? AddrSize : 4; +ArrayRef makeFixedFormSizesArrayRef() { + static const uint8_t sizes[] = { + 0, // 0x00 unused + AddrSize, // 0x01 DW_FORM_addr + 0, // 0x02 unused + 0, // 0x03 DW_FORM_block2 + 0, // 0x04 DW_FORM_block4 + 2, // 0x05 DW_FORM_data2 + 4, // 0x06 DW_FORM_data4 + 8, // 0x07 DW_FORM_data8 + 0, // 0x08 DW_FORM_string + 0, // 0x09 DW_FORM_block + 0, // 0x0a DW_FORM_block1 + 1, // 0x0b DW_FORM_data1 + 1, // 0x0c DW_FORM_flag + 0, // 0x0d DW_FORM_sdata + 4, // 0x0e DW_FORM_strp + 0, // 0x0f DW_FORM_udata + RefAddrSize, // 0x10 DW_FORM_ref_addr + 1, // 0x11 DW_FORM_ref1 + 2, // 0x12 DW_FORM_ref2 + 4, // 0x13 DW_FORM_ref4 + 8, // 0x14 DW_FORM_ref8 + 0, // 0x15 DW_FORM_ref_udata + 0, // 0x16 DW_FORM_indirect + 4, // 0x17 DW_FORM_sec_offset + 0, // 0x18 DW_FORM_exprloc + 0, // 0x19 DW_FORM_flag_present + }; + return makeArrayRef(sizes); +} } -const uint8_t * -DWARFFormValue::getFixedFormSizes(uint8_t AddrSize, uint16_t Version) { +ArrayRef DWARFFormValue::getFixedFormSizes(uint8_t AddrSize, + uint16_t Version) { uint8_t RefAddrSize = getRefAddrSize(AddrSize, Version); if (AddrSize == 4 && RefAddrSize == 4) - return FixedFormSizes<4, 4>::sizes; + return makeFixedFormSizesArrayRef<4, 4>(); if (AddrSize == 4 && RefAddrSize == 8) - return FixedFormSizes<4, 8>::sizes; + return makeFixedFormSizesArrayRef<4, 8>(); if (AddrSize == 8 && RefAddrSize == 4) - return FixedFormSizes<8, 4>::sizes; + return makeFixedFormSizesArrayRef<8, 4>(); if (AddrSize == 8 && RefAddrSize == 8) - return FixedFormSizes<8, 8>::sizes; - return 0; + return makeFixedFormSizesArrayRef<8, 8>(); + return None; } -bool -DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, - const DWARFCompileUnit *cu) { +static const DWARFFormValue::FormClass DWARF4FormClasses[] = { + DWARFFormValue::FC_Unknown, // 0x0 + DWARFFormValue::FC_Address, // 0x01 DW_FORM_addr + DWARFFormValue::FC_Unknown, // 0x02 unused + DWARFFormValue::FC_Block, // 0x03 DW_FORM_block2 + DWARFFormValue::FC_Block, // 0x04 DW_FORM_block4 + DWARFFormValue::FC_Constant, // 0x05 DW_FORM_data2 + // --- These can be FC_SectionOffset in DWARF3 and below: + DWARFFormValue::FC_Constant, // 0x06 DW_FORM_data4 + DWARFFormValue::FC_Constant, // 0x07 DW_FORM_data8 + // --- + DWARFFormValue::FC_String, // 0x08 DW_FORM_string + DWARFFormValue::FC_Block, // 0x09 DW_FORM_block + DWARFFormValue::FC_Block, // 0x0a DW_FORM_block1 + DWARFFormValue::FC_Constant, // 0x0b DW_FORM_data1 + DWARFFormValue::FC_Flag, // 0x0c DW_FORM_flag + DWARFFormValue::FC_Constant, // 0x0d DW_FORM_sdata + DWARFFormValue::FC_String, // 0x0e DW_FORM_strp + DWARFFormValue::FC_Constant, // 0x0f DW_FORM_udata + DWARFFormValue::FC_Reference, // 0x10 DW_FORM_ref_addr + DWARFFormValue::FC_Reference, // 0x11 DW_FORM_ref1 + DWARFFormValue::FC_Reference, // 0x12 DW_FORM_ref2 + DWARFFormValue::FC_Reference, // 0x13 DW_FORM_ref4 + DWARFFormValue::FC_Reference, // 0x14 DW_FORM_ref8 + DWARFFormValue::FC_Reference, // 0x15 DW_FORM_ref_udata + DWARFFormValue::FC_Indirect, // 0x16 DW_FORM_indirect + DWARFFormValue::FC_SectionOffset, // 0x17 DW_FORM_sec_offset + DWARFFormValue::FC_Exprloc, // 0x18 DW_FORM_exprloc + DWARFFormValue::FC_Flag, // 0x19 DW_FORM_flag_present +}; + +bool DWARFFormValue::isFormClass(DWARFFormValue::FormClass FC) const { + // First, check DWARF4 form classes. + if (Form < ArrayRef(DWARF4FormClasses).size() && + DWARF4FormClasses[Form] == FC) + return true; + // Check DW_FORM_ref_sig8 from DWARF4. + if (Form == DW_FORM_ref_sig8) + return (FC == FC_Reference); + // Check for some DWARF5 forms. + if (Form == DW_FORM_GNU_addr_index) + return (FC == FC_Address); + if (Form == DW_FORM_GNU_str_index) + return (FC == FC_String); + // In DWARF3 DW_FORM_data4 and DW_FORM_data8 served also as a section offset. + // Don't check for DWARF version here, as some producers may still do this + // by mistake. + if ((Form == DW_FORM_data4 || Form == DW_FORM_data8) && + FC == FC_SectionOffset) + return true; + return false; +} + +bool DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, + const DWARFUnit *cu) { bool indirect = false; bool is_block = false; Value.data = NULL; @@ -126,9 +178,13 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, Value.uval = data.getU16(offset_ptr); break; case DW_FORM_data4: - case DW_FORM_ref4: + case DW_FORM_ref4: { + RelocAddrMap::const_iterator AI = cu->getRelocMap()->find(*offset_ptr); Value.uval = data.getU32(offset_ptr); + if (AI != cu->getRelocMap()->end()) + Value.uval += AI->second.second; break; + } case DW_FORM_data8: case DW_FORM_ref8: Value.uval = data.getU64(offset_ptr); @@ -152,10 +208,6 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, break; case DW_FORM_string: Value.cstr = data.getCStr(offset_ptr); - // Set the string value to also be the data for inlined cstr form - // values only so we can tell the differnence between DW_FORM_string - // and DW_FORM_strp form values - Value.data = (const uint8_t*)Value.cstr; break; case DW_FORM_indirect: Form = data.getULEB128(offset_ptr); @@ -179,8 +231,6 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, Value.uval = data.getU64(offset_ptr); break; case DW_FORM_GNU_addr_index: - Value.uval = data.getULEB128(offset_ptr); - break; case DW_FORM_GNU_str_index: Value.uval = data.getULEB128(offset_ptr); break; @@ -203,13 +253,13 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, bool DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr, - const DWARFCompileUnit *cu) const { + const DWARFUnit *cu) const { return DWARFFormValue::skipValue(Form, debug_info_data, offset_ptr, cu); } bool DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, - uint32_t *offset_ptr, const DWARFCompileUnit *cu) { + uint32_t *offset_ptr, const DWARFUnit *cu) { bool indirect = false; do { switch (form) { @@ -309,21 +359,20 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, } void -DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { +DWARFFormValue::dump(raw_ostream &OS, const DWARFUnit *cu) const { DataExtractor debug_str_data(cu->getStringSection(), true, 0); DataExtractor debug_str_offset_data(cu->getStringOffsetSection(), true, 0); - uint64_t uvalue = getUnsigned(); + uint64_t uvalue = Value.uval; bool cu_relative_offset = false; switch (Form) { case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break; case DW_FORM_GNU_addr_index: { - StringRef AddrOffsetSec = cu->getAddrOffsetSection(); OS << format(" indexed (%8.8x) address = ", (uint32_t)uvalue); - if (AddrOffsetSec.size() != 0) { - DataExtractor DA(AddrOffsetSec, true, cu->getAddressByteSize()); - OS << format("0x%016" PRIx64, getIndirectAddress(&DA, cu)); - } else + uint64_t Address; + if (cu->getAddrOffsetSectionItem(uvalue, Address)) + OS << format("0x%016" PRIx64, Address); + else OS << ""; break; } @@ -336,7 +385,7 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { case DW_FORM_data8: OS << format("0x%016" PRIx64, uvalue); break; case DW_FORM_string: OS << '"'; - OS.write_escaped(getAsCString(NULL)); + OS.write_escaped(Value.cstr); OS << '"'; break; case DW_FORM_exprloc: @@ -368,25 +417,24 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { } break; - case DW_FORM_sdata: OS << getSigned(); break; - case DW_FORM_udata: OS << getUnsigned(); break; + case DW_FORM_sdata: OS << Value.sval; break; + case DW_FORM_udata: OS << Value.uval; break; case DW_FORM_strp: { OS << format(" .debug_str[0x%8.8x] = ", (uint32_t)uvalue); - const char* dbg_str = getAsCString(&debug_str_data); - if (dbg_str) { + Optional DbgStr = getAsCString(cu); + if (DbgStr.hasValue()) { OS << '"'; - OS.write_escaped(dbg_str); + OS.write_escaped(DbgStr.getValue()); OS << '"'; } break; } case DW_FORM_GNU_str_index: { OS << format(" indexed (%8.8x) string = ", (uint32_t)uvalue); - const char *dbg_str = getIndirectCString(&debug_str_data, - &debug_str_offset_data); - if (dbg_str) { + Optional DbgStr = getAsCString(cu); + if (DbgStr.hasValue()) { OS << '"'; - OS.write_escaped(dbg_str); + OS.write_escaped(DbgStr.getValue()); OS << '"'; } break; @@ -435,97 +483,67 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { OS << format(" => {0x%8.8" PRIx64 "}", uvalue + (cu ? cu->getOffset() : 0)); } -const char* -DWARFFormValue::getAsCString(const DataExtractor *debug_str_data_ptr) const { - if (isInlinedCStr()) { +Optional DWARFFormValue::getAsCString(const DWARFUnit *U) const { + if (!isFormClass(FC_String)) + return None; + if (Form == DW_FORM_string) return Value.cstr; - } else if (debug_str_data_ptr) { - uint32_t offset = Value.uval; - return debug_str_data_ptr->getCStr(&offset); + if (U == 0) + return None; + uint32_t Offset = Value.uval; + if (Form == DW_FORM_GNU_str_index) { + uint32_t StrOffset; + if (!U->getStringOffsetSectionItem(Offset, StrOffset)) + return None; + Offset = StrOffset; } - return NULL; -} - -const char* -DWARFFormValue::getIndirectCString(const DataExtractor *DS, - const DataExtractor *DSO) const { - if (!DS || !DSO) return NULL; - - uint32_t offset = Value.uval * 4; - uint32_t soffset = DSO->getU32(&offset); - return DS->getCStr(&soffset); -} - -uint64_t -DWARFFormValue::getIndirectAddress(const DataExtractor *DA, - const DWARFCompileUnit *cu) const { - if (!DA) return 0; - - uint32_t offset = Value.uval * cu->getAddressByteSize(); - return DA->getAddress(&offset); + if (const char *Str = U->getStringExtractor().getCStr(&Offset)) { + return Str; + } + return None; } -uint64_t DWARFFormValue::getReference(const DWARFCompileUnit *cu) const { - uint64_t die_offset = Value.uval; - switch (Form) { - case DW_FORM_ref1: - case DW_FORM_ref2: - case DW_FORM_ref4: - case DW_FORM_ref8: - case DW_FORM_ref_udata: - die_offset += (cu ? cu->getOffset() : 0); - break; - default: - break; +Optional DWARFFormValue::getAsAddress(const DWARFUnit *U) const { + if (!isFormClass(FC_Address)) + return None; + if (Form == DW_FORM_GNU_addr_index) { + uint32_t Index = Value.uval; + uint64_t Result; + if (U == 0 || !U->getAddrOffsetSectionItem(Index, Result)) + return None; + return Result; } - - return die_offset; + return Value.uval; } -bool -DWARFFormValue::resolveCompileUnitReferences(const DWARFCompileUnit *cu) { +Optional DWARFFormValue::getAsReference(const DWARFUnit *U) const { + if (!isFormClass(FC_Reference)) + return None; switch (Form) { case DW_FORM_ref1: case DW_FORM_ref2: case DW_FORM_ref4: case DW_FORM_ref8: case DW_FORM_ref_udata: - Value.uval += cu->getOffset(); - Form = DW_FORM_ref_addr; - return true; + if (U == 0) + return None; + return Value.uval + U->getOffset(); + case DW_FORM_ref_addr: + return Value.uval; + // FIXME: Add proper support for DW_FORM_ref_sig8 default: - break; + return Value.uval; } - return false; -} - -const uint8_t *DWARFFormValue::BlockData() const { - if (!isInlinedCStr()) - return Value.data; - return NULL; } -bool DWARFFormValue::isBlockForm(uint16_t form) { - switch (form) { - case DW_FORM_exprloc: - case DW_FORM_block: - case DW_FORM_block1: - case DW_FORM_block2: - case DW_FORM_block4: - return true; - } - return false; +Optional DWARFFormValue::getAsSectionOffset() const { + if (!isFormClass(FC_SectionOffset)) + return None; + return Value.uval; } -bool DWARFFormValue::isDataForm(uint16_t form) { - switch (form) { - case DW_FORM_sdata: - case DW_FORM_udata: - case DW_FORM_data1: - case DW_FORM_data2: - case DW_FORM_data4: - case DW_FORM_data8: - return true; - } - return false; +Optional DWARFFormValue::getAsUnsignedConstant() const { + if (!isFormClass(FC_Constant) || Form == DW_FORM_sdata) + return None; + return Value.uval; } diff --git a/contrib/llvm/lib/DebugInfo/DWARFTypeUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARFTypeUnit.cpp new file mode 100644 index 0000000..303bf70 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFTypeUnit.cpp @@ -0,0 +1,39 @@ +//===-- DWARFTypeUnit.cpp -------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFTypeUnit.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +bool DWARFTypeUnit::extractImpl(DataExtractor debug_info, + uint32_t *offset_ptr) { + if (!DWARFUnit::extractImpl(debug_info, offset_ptr)) + return false; + TypeHash = debug_info.getU64(offset_ptr); + TypeOffset = debug_info.getU32(offset_ptr); + return TypeOffset < getLength(); +} + +void DWARFTypeUnit::dump(raw_ostream &OS) { + OS << format("0x%08x", getOffset()) << ": Type Unit:" + << " length = " << format("0x%08x", getLength()) + << " version = " << format("0x%04x", getVersion()) + << " abbr_offset = " << format("0x%04x", getAbbreviations()->getOffset()) + << " addr_size = " << format("0x%02x", getAddressByteSize()) + << " type_signature = " << format("0x%16" PRIx64, TypeHash) + << " type_offset = " << format("0x%04x", TypeOffset) + << " (next unit at " << format("0x%08x", getNextUnitOffset()) + << ")\n"; + + const DWARFDebugInfoEntryMinimal *CU = getCompileUnitDIE(false); + assert(CU && "Null Compile Unit?"); + CU->dump(OS, this, -1U); +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFTypeUnit.h b/contrib/llvm/lib/DebugInfo/DWARFTypeUnit.h new file mode 100644 index 0000000..7a0dab2 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFTypeUnit.h @@ -0,0 +1,35 @@ +//===-- DWARFTypeUnit.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFTYPEUNIT_H +#define LLVM_DEBUGINFO_DWARFTYPEUNIT_H + +#include "DWARFUnit.h" + +namespace llvm { + +class DWARFTypeUnit : public DWARFUnit { +private: + uint64_t TypeHash; + uint32_t TypeOffset; +public: + DWARFTypeUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS, + StringRef RS, StringRef SS, StringRef SOS, StringRef AOS, + const RelocAddrMap *M, bool LE) + : DWARFUnit(DA, IS, AS, RS, SS, SOS, AOS, M, LE) {} + uint32_t getSize() const LLVM_OVERRIDE { return DWARFUnit::getSize() + 12; } + void dump(raw_ostream &OS); +protected: + bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) LLVM_OVERRIDE; +}; + +} + +#endif + diff --git a/contrib/llvm/lib/DebugInfo/DWARFUnit.cpp b/contrib/llvm/lib/DebugInfo/DWARFUnit.cpp new file mode 100644 index 0000000..5167eb9 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFUnit.cpp @@ -0,0 +1,365 @@ +//===-- DWARFUnit.cpp -----------------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFUnit.h" +#include "DWARFContext.h" +#include "llvm/DebugInfo/DWARFFormValue.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Path.h" +#include + +using namespace llvm; +using namespace dwarf; + +DWARFUnit::DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS, + StringRef RS, StringRef SS, StringRef SOS, StringRef AOS, + const RelocAddrMap *M, bool LE) + : Abbrev(DA), InfoSection(IS), AbbrevSection(AS), RangeSection(RS), + StringSection(SS), StringOffsetSection(SOS), AddrOffsetSection(AOS), + RelocMap(M), isLittleEndian(LE) { + clear(); +} + +DWARFUnit::~DWARFUnit() { +} + +bool DWARFUnit::getAddrOffsetSectionItem(uint32_t Index, + uint64_t &Result) const { + uint32_t Offset = AddrOffsetSectionBase + Index * AddrSize; + if (AddrOffsetSection.size() < Offset + AddrSize) + return false; + DataExtractor DA(AddrOffsetSection, isLittleEndian, AddrSize); + Result = DA.getAddress(&Offset); + return true; +} + +bool DWARFUnit::getStringOffsetSectionItem(uint32_t Index, + uint32_t &Result) const { + // FIXME: string offset section entries are 8-byte for DWARF64. + const uint32_t ItemSize = 4; + uint32_t Offset = Index * ItemSize; + if (StringOffsetSection.size() < Offset + ItemSize) + return false; + DataExtractor DA(StringOffsetSection, isLittleEndian, 0); + Result = DA.getU32(&Offset); + return true; +} + +bool DWARFUnit::extractImpl(DataExtractor debug_info, uint32_t *offset_ptr) { + Length = debug_info.getU32(offset_ptr); + Version = debug_info.getU16(offset_ptr); + uint64_t abbrOffset = debug_info.getU32(offset_ptr); + AddrSize = debug_info.getU8(offset_ptr); + + bool lengthOK = debug_info.isValidOffset(getNextUnitOffset() - 1); + bool versionOK = DWARFContext::isSupportedVersion(Version); + bool abbrOffsetOK = AbbrevSection.size() > abbrOffset; + bool addrSizeOK = AddrSize == 4 || AddrSize == 8; + + if (!lengthOK || !versionOK || !addrSizeOK || !abbrOffsetOK) + return false; + + Abbrevs = Abbrev->getAbbreviationDeclarationSet(abbrOffset); + return true; +} + +bool DWARFUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) { + clear(); + + Offset = *offset_ptr; + + if (debug_info.isValidOffset(*offset_ptr)) { + if (extractImpl(debug_info, offset_ptr)) + return true; + + // reset the offset to where we tried to parse from if anything went wrong + *offset_ptr = Offset; + } + + return false; +} + +bool DWARFUnit::extractRangeList(uint32_t RangeListOffset, + DWARFDebugRangeList &RangeList) const { + // Require that compile unit is extracted. + assert(DieArray.size() > 0); + DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize); + uint32_t ActualRangeListOffset = RangeSectionBase + RangeListOffset; + return RangeList.extract(RangesData, &ActualRangeListOffset); +} + +void DWARFUnit::clear() { + Offset = 0; + Length = 0; + Version = 0; + Abbrevs = 0; + AddrSize = 0; + BaseAddr = 0; + RangeSectionBase = 0; + AddrOffsetSectionBase = 0; + clearDIEs(false); + DWO.reset(); +} + +const char *DWARFUnit::getCompilationDir() { + extractDIEsIfNeeded(true); + if (DieArray.empty()) + return 0; + return DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0); +} + +uint64_t DWARFUnit::getDWOId() { + extractDIEsIfNeeded(true); + const uint64_t FailValue = -1ULL; + if (DieArray.empty()) + return FailValue; + return DieArray[0] + .getAttributeValueAsUnsignedConstant(this, DW_AT_GNU_dwo_id, FailValue); +} + +void DWARFUnit::setDIERelations() { + if (DieArray.empty()) + return; + DWARFDebugInfoEntryMinimal *die_array_begin = &DieArray.front(); + DWARFDebugInfoEntryMinimal *die_array_end = &DieArray.back(); + DWARFDebugInfoEntryMinimal *curr_die; + // We purposely are skipping the last element in the array in the loop below + // so that we can always have a valid next item + for (curr_die = die_array_begin; curr_die < die_array_end; ++curr_die) { + // Since our loop doesn't include the last element, we can always + // safely access the next die in the array. + DWARFDebugInfoEntryMinimal *next_die = curr_die + 1; + + const DWARFAbbreviationDeclaration *curr_die_abbrev = + curr_die->getAbbreviationDeclarationPtr(); + + if (curr_die_abbrev) { + // Normal DIE + if (curr_die_abbrev->hasChildren()) + next_die->setParent(curr_die); + else + curr_die->setSibling(next_die); + } else { + // NULL DIE that terminates a sibling chain + DWARFDebugInfoEntryMinimal *parent = curr_die->getParent(); + if (parent) + parent->setSibling(next_die); + } + } + + // Since we skipped the last element, we need to fix it up! + if (die_array_begin < die_array_end) + curr_die->setParent(die_array_begin); +} + +void DWARFUnit::extractDIEsToVector( + bool AppendCUDie, bool AppendNonCUDies, + std::vector &Dies) const { + if (!AppendCUDie && !AppendNonCUDies) + return; + + // Set the offset to that of the first DIE and calculate the start of the + // next compilation unit header. + uint32_t Offset = getFirstDIEOffset(); + uint32_t NextCUOffset = getNextUnitOffset(); + DWARFDebugInfoEntryMinimal DIE; + uint32_t Depth = 0; + bool IsCUDie = true; + + while (Offset < NextCUOffset && DIE.extractFast(this, &Offset)) { + if (IsCUDie) { + if (AppendCUDie) + Dies.push_back(DIE); + if (!AppendNonCUDies) + break; + // The average bytes per DIE entry has been seen to be + // around 14-20 so let's pre-reserve the needed memory for + // our DIE entries accordingly. + Dies.reserve(Dies.size() + getDebugInfoSize() / 14); + IsCUDie = false; + } else { + Dies.push_back(DIE); + } + + const DWARFAbbreviationDeclaration *AbbrDecl = + DIE.getAbbreviationDeclarationPtr(); + if (AbbrDecl) { + // Normal DIE + if (AbbrDecl->hasChildren()) + ++Depth; + } else { + // NULL DIE. + if (Depth > 0) + --Depth; + if (Depth == 0) + break; // We are done with this compile unit! + } + } + + // Give a little bit of info if we encounter corrupt DWARF (our offset + // should always terminate at or before the start of the next compilation + // unit header). + if (Offset > NextCUOffset) + fprintf(stderr, "warning: DWARF compile unit extends beyond its " + "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), Offset); +} + +size_t DWARFUnit::extractDIEsIfNeeded(bool CUDieOnly) { + if ((CUDieOnly && DieArray.size() > 0) || + DieArray.size() > 1) + return 0; // Already parsed. + + bool HasCUDie = DieArray.size() > 0; + extractDIEsToVector(!HasCUDie, !CUDieOnly, DieArray); + + if (DieArray.empty()) + return 0; + + // If CU DIE was just parsed, copy several attribute values from it. + if (!HasCUDie) { + uint64_t BaseAddr = + DieArray[0].getAttributeValueAsAddress(this, DW_AT_low_pc, -1ULL); + if (BaseAddr == -1ULL) + BaseAddr = DieArray[0].getAttributeValueAsAddress(this, DW_AT_entry_pc, 0); + setBaseAddress(BaseAddr); + AddrOffsetSectionBase = DieArray[0].getAttributeValueAsSectionOffset( + this, DW_AT_GNU_addr_base, 0); + RangeSectionBase = DieArray[0].getAttributeValueAsSectionOffset( + this, DW_AT_GNU_ranges_base, 0); + } + + setDIERelations(); + return DieArray.size(); +} + +DWARFUnit::DWOHolder::DWOHolder(object::ObjectFile *DWOFile) + : DWOFile(DWOFile), + DWOContext(cast(DIContext::getDWARFContext(DWOFile))), + DWOU(0) { + if (DWOContext->getNumDWOCompileUnits() > 0) + DWOU = DWOContext->getDWOCompileUnitAtIndex(0); +} + +bool DWARFUnit::parseDWO() { + if (DWO.get() != 0) + return false; + extractDIEsIfNeeded(true); + if (DieArray.empty()) + return false; + const char *DWOFileName = + DieArray[0].getAttributeValueAsString(this, DW_AT_GNU_dwo_name, 0); + if (DWOFileName == 0) + return false; + const char *CompilationDir = + DieArray[0].getAttributeValueAsString(this, DW_AT_comp_dir, 0); + SmallString<16> AbsolutePath; + if (sys::path::is_relative(DWOFileName) && CompilationDir != 0) { + sys::path::append(AbsolutePath, CompilationDir); + } + sys::path::append(AbsolutePath, DWOFileName); + object::ObjectFile *DWOFile = + object::ObjectFile::createObjectFile(AbsolutePath); + if (!DWOFile) + return false; + // Reset DWOHolder. + DWO.reset(new DWOHolder(DWOFile)); + DWARFUnit *DWOCU = DWO->getUnit(); + // Verify that compile unit in .dwo file is valid. + if (DWOCU == 0 || DWOCU->getDWOId() != getDWOId()) { + DWO.reset(); + return false; + } + // Share .debug_addr and .debug_ranges section with compile unit in .dwo + DWOCU->setAddrOffsetSection(AddrOffsetSection, AddrOffsetSectionBase); + DWOCU->setRangesSection(RangeSection, RangeSectionBase); + return true; +} + +void DWARFUnit::clearDIEs(bool KeepCUDie) { + if (DieArray.size() > (unsigned)KeepCUDie) { + // std::vectors never get any smaller when resized to a smaller size, + // or when clear() or erase() are called, the size will report that it + // is smaller, but the memory allocated remains intact (call capacity() + // to see this). So we need to create a temporary vector and swap the + // contents which will cause just the internal pointers to be swapped + // so that when temporary vector goes out of scope, it will destroy the + // contents. + std::vector TmpArray; + DieArray.swap(TmpArray); + // Save at least the compile unit DIE + if (KeepCUDie) + DieArray.push_back(TmpArray.front()); + } +} + +void +DWARFUnit::buildAddressRangeTable(DWARFDebugAranges *debug_aranges, + bool clear_dies_if_already_not_parsed, + uint32_t CUOffsetInAranges) { + // This function is usually called if there in no .debug_aranges section + // in order to produce a compile unit level set of address ranges that + // is accurate. If the DIEs weren't parsed, then we don't want all dies for + // all compile units to stay loaded when they weren't needed. So we can end + // up parsing the DWARF and then throwing them all away to keep memory usage + // down. + const bool clear_dies = extractDIEsIfNeeded(false) > 1 && + clear_dies_if_already_not_parsed; + DieArray[0].buildAddressRangeTable(this, debug_aranges, CUOffsetInAranges); + bool DWOCreated = parseDWO(); + if (DWO.get()) { + // If there is a .dwo file for this compile unit, then skeleton CU DIE + // doesn't have children, and we should instead build address range table + // from DIEs in the .debug_info.dwo section of .dwo file. + DWO->getUnit()->buildAddressRangeTable( + debug_aranges, clear_dies_if_already_not_parsed, CUOffsetInAranges); + } + if (DWOCreated && clear_dies_if_already_not_parsed) + DWO.reset(); + + // Keep memory down by clearing DIEs if this generate function + // caused them to be parsed. + if (clear_dies) + clearDIEs(true); +} + +const DWARFDebugInfoEntryMinimal * +DWARFUnit::getSubprogramForAddress(uint64_t Address) { + extractDIEsIfNeeded(false); + for (size_t i = 0, n = DieArray.size(); i != n; i++) + if (DieArray[i].isSubprogramDIE() && + DieArray[i].addressRangeContainsAddress(this, Address)) { + return &DieArray[i]; + } + return 0; +} + +DWARFDebugInfoEntryInlinedChain +DWARFUnit::getInlinedChainForAddress(uint64_t Address) { + // First, find a subprogram that contains the given address (the root + // of inlined chain). + const DWARFUnit *ChainCU = 0; + const DWARFDebugInfoEntryMinimal *SubprogramDIE = + getSubprogramForAddress(Address); + if (SubprogramDIE) { + ChainCU = this; + } else { + // Try to look for subprogram DIEs in the DWO file. + parseDWO(); + if (DWO.get()) { + SubprogramDIE = DWO->getUnit()->getSubprogramForAddress(Address); + if (SubprogramDIE) + ChainCU = DWO->getUnit(); + } + } + + // Get inlined chain rooted at this subprogram DIE. + if (!SubprogramDIE) + return DWARFDebugInfoEntryInlinedChain(); + return SubprogramDIE->getInlinedChainForAddress(ChainCU, Address); +} diff --git a/contrib/llvm/lib/DebugInfo/DWARFUnit.h b/contrib/llvm/lib/DebugInfo/DWARFUnit.h new file mode 100644 index 0000000..bd768a6 --- /dev/null +++ b/contrib/llvm/lib/DebugInfo/DWARFUnit.h @@ -0,0 +1,168 @@ +//===-- DWARFUnit.h ---------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFUNIT_H +#define LLVM_DEBUGINFO_DWARFUNIT_H + +#include "llvm/ADT/OwningPtr.h" +#include "DWARFDebugAbbrev.h" +#include "DWARFDebugInfoEntry.h" +#include "DWARFDebugRangeList.h" +#include "DWARFRelocMap.h" +#include + +namespace llvm { + +namespace object { +class ObjectFile; +} + +class DWARFDebugAbbrev; +class StringRef; +class raw_ostream; + +class DWARFUnit { + const DWARFDebugAbbrev *Abbrev; + StringRef InfoSection; + StringRef AbbrevSection; + StringRef RangeSection; + uint32_t RangeSectionBase; + StringRef StringSection; + StringRef StringOffsetSection; + StringRef AddrOffsetSection; + uint32_t AddrOffsetSectionBase; + const RelocAddrMap *RelocMap; + bool isLittleEndian; + + uint32_t Offset; + uint32_t Length; + uint16_t Version; + const DWARFAbbreviationDeclarationSet *Abbrevs; + uint8_t AddrSize; + uint64_t BaseAddr; + // The compile unit debug information entry items. + std::vector DieArray; + + class DWOHolder { + OwningPtr DWOFile; + OwningPtr DWOContext; + DWARFUnit *DWOU; + public: + DWOHolder(object::ObjectFile *DWOFile); + DWARFUnit *getUnit() const { return DWOU; } + }; + OwningPtr DWO; + +protected: + virtual bool extractImpl(DataExtractor debug_info, uint32_t *offset_ptr); + +public: + + DWARFUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS, + StringRef RS, StringRef SS, StringRef SOS, StringRef AOS, + const RelocAddrMap *M, bool LE); + + virtual ~DWARFUnit(); + + StringRef getStringSection() const { return StringSection; } + StringRef getStringOffsetSection() const { return StringOffsetSection; } + void setAddrOffsetSection(StringRef AOS, uint32_t Base) { + AddrOffsetSection = AOS; + AddrOffsetSectionBase = Base; + } + void setRangesSection(StringRef RS, uint32_t Base) { + RangeSection = RS; + RangeSectionBase = Base; + } + + bool getAddrOffsetSectionItem(uint32_t Index, uint64_t &Result) const; + // FIXME: Result should be uint64_t in DWARF64. + bool getStringOffsetSectionItem(uint32_t Index, uint32_t &Result) const; + + DataExtractor getDebugInfoExtractor() const { + return DataExtractor(InfoSection, isLittleEndian, AddrSize); + } + DataExtractor getStringExtractor() const { + return DataExtractor(StringSection, false, 0); + } + + const RelocAddrMap *getRelocMap() const { return RelocMap; } + + bool extract(DataExtractor debug_info, uint32_t* offset_ptr); + + /// extractRangeList - extracts the range list referenced by this compile + /// unit from .debug_ranges section. Returns true on success. + /// Requires that compile unit is already extracted. + bool extractRangeList(uint32_t RangeListOffset, + DWARFDebugRangeList &RangeList) const; + void clear(); + uint32_t getOffset() const { return Offset; } + /// Size in bytes of the compile unit header. + virtual uint32_t getSize() const { return 11; } + uint32_t getFirstDIEOffset() const { return Offset + getSize(); } + uint32_t getNextUnitOffset() const { return Offset + Length + 4; } + /// Size in bytes of the .debug_info data associated with this compile unit. + size_t getDebugInfoSize() const { return Length + 4 - getSize(); } + uint32_t getLength() const { return Length; } + uint16_t getVersion() const { return Version; } + const DWARFAbbreviationDeclarationSet *getAbbreviations() const { + return Abbrevs; + } + uint8_t getAddressByteSize() const { return AddrSize; } + uint64_t getBaseAddress() const { return BaseAddr; } + + void setBaseAddress(uint64_t base_addr) { + BaseAddr = base_addr; + } + + const DWARFDebugInfoEntryMinimal * + getCompileUnitDIE(bool extract_cu_die_only = true) { + extractDIEsIfNeeded(extract_cu_die_only); + return DieArray.empty() ? NULL : &DieArray[0]; + } + + const char *getCompilationDir(); + uint64_t getDWOId(); + + void buildAddressRangeTable(DWARFDebugAranges *debug_aranges, + bool clear_dies_if_already_not_parsed, + uint32_t CUOffsetInAranges); + + /// getInlinedChainForAddress - fetches inlined chain for a given address. + /// Returns empty chain if there is no subprogram containing address. The + /// chain is valid as long as parsed compile unit DIEs are not cleared. + DWARFDebugInfoEntryInlinedChain getInlinedChainForAddress(uint64_t Address); + +private: + /// extractDIEsIfNeeded - Parses a compile unit and indexes its DIEs if it + /// hasn't already been done. Returns the number of DIEs parsed at this call. + size_t extractDIEsIfNeeded(bool CUDieOnly); + /// extractDIEsToVector - Appends all parsed DIEs to a vector. + void extractDIEsToVector(bool AppendCUDie, bool AppendNonCUDIEs, + std::vector &DIEs) const; + /// setDIERelations - We read in all of the DIE entries into our flat list + /// of DIE entries and now we need to go back through all of them and set the + /// parent, sibling and child pointers for quick DIE navigation. + void setDIERelations(); + /// clearDIEs - Clear parsed DIEs to keep memory usage low. + void clearDIEs(bool KeepCUDie); + + /// parseDWO - Parses .dwo file for current compile unit. Returns true if + /// it was actually constructed. + bool parseDWO(); + + /// getSubprogramForAddress - Returns subprogram DIE with address range + /// encompassing the provided address. The pointer is alive as long as parsed + /// compile unit DIEs are not cleared. + const DWARFDebugInfoEntryMinimal *getSubprogramForAddress(uint64_t Address); +}; + +} + +#endif diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp index e43ba4f..2a610d5 100644 --- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngine.cpp @@ -14,6 +14,8 @@ #define DEBUG_TYPE "jit" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/JITMemoryManager.h" +#include "llvm/ExecutionEngine/ObjectCache.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" #include "llvm/ExecutionEngine/GenericValue.h" @@ -38,6 +40,11 @@ using namespace llvm; STATISTIC(NumInitBytes, "Number of bytes of global vars initialized"); STATISTIC(NumGlobals , "Number of global vars initialized"); +// Pin the vtable to this file. +void ObjectCache::anchor() {} +void ObjectBuffer::anchor() {} +void ObjectBufferStream::anchor() {} + ExecutionEngine *(*ExecutionEngine::JITCtor)( Module *M, std::string *ErrorStr, @@ -47,7 +54,7 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)( ExecutionEngine *(*ExecutionEngine::MCJITCtor)( Module *M, std::string *ErrorStr, - JITMemoryManager *JMM, + RTDyldMemoryManager *MCJMM, bool GVsWithCode, TargetMachine *TM) = 0; ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M, @@ -55,9 +62,7 @@ ExecutionEngine *(*ExecutionEngine::InterpCtor)(Module *M, ExecutionEngine::ExecutionEngine(Module *M) : EEState(*this), - LazyFunctionCreator(0), - ExceptionTableRegister(0), - ExceptionTableDeregister(0) { + LazyFunctionCreator(0) { CompilingLazily = false; GVCompilationDisabled = false; SymbolSearchingDisabled = false; @@ -71,16 +76,6 @@ ExecutionEngine::~ExecutionEngine() { delete Modules[i]; } -void ExecutionEngine::DeregisterAllTables() { - if (ExceptionTableDeregister) { - DenseMap::iterator it = AllExceptionTables.begin(); - DenseMap::iterator ite = AllExceptionTables.end(); - for (; it != ite; ++it) - ExceptionTableDeregister(it->second); - AllExceptionTables.clear(); - } -} - namespace { /// \brief Helper class which uses a value handler to automatically deletes the /// memory block when the GlobalVariable is destroyed. @@ -117,7 +112,7 @@ char *ExecutionEngine::getMemoryForGV(const GlobalVariable *GV) { } bool ExecutionEngine::removeModule(Module *M) { - for(SmallVector::iterator I = Modules.begin(), + for(SmallVectorImpl::iterator I = Modules.begin(), E = Modules.end(); I != E; ++I) { Module *Found = *I; if (Found == M) { @@ -455,10 +450,12 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr)) return 0; + assert(!(JMM && MCJMM)); + // If the user specified a memory manager but didn't specify which engine to // create, we assume they only want the JIT, and we fail if they only want // the interpreter. - if (JMM) { + if (JMM || MCJMM) { if (WhichEngine & EngineKind::JIT) WhichEngine = EngineKind::JIT; else { @@ -467,6 +464,14 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { return 0; } } + + if (MCJMM && ! UseMCJIT) { + if (ErrorStr) + *ErrorStr = + "Cannot create a legacy JIT with a runtime dyld memory " + "manager."; + return 0; + } // Unless the interpreter was explicitly selected or the JIT is not linked, // try making a JIT. @@ -480,7 +485,7 @@ ExecutionEngine *EngineBuilder::create(TargetMachine *TM) { if (UseMCJIT && ExecutionEngine::MCJITCtor) { ExecutionEngine *EE = - ExecutionEngine::MCJITCtor(M, ErrorStr, JMM, + ExecutionEngine::MCJITCtor(M, ErrorStr, MCJMM ? MCJMM : JMM, AllocateGVsWithCode, TheTM.take()); if (EE) return EE; } else if (ExecutionEngine::JITCtor) { @@ -545,6 +550,24 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { // with the correct bit width. Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0); break; + case Type::StructTyID: { + // if the whole struct is 'undef' just reserve memory for the value. + if(StructType *STy = dyn_cast(C->getType())) { + unsigned int elemNum = STy->getNumElements(); + Result.AggregateVal.resize(elemNum); + for (unsigned int i = 0; i < elemNum; ++i) { + Type *ElemTy = STy->getElementType(i); + if (ElemTy->isIntegerTy()) + Result.AggregateVal[i].IntVal = + APInt(ElemTy->getPrimitiveSizeInBits(), 0); + else if (ElemTy->isAggregateType()) { + const Constant *ElemUndef = UndefValue::get(ElemTy); + Result.AggregateVal[i] = getConstantValue(ElemUndef); + } + } + } + } + break; case Type::VectorTyID: // if the whole vector is 'undef' just reserve memory for the value. const VectorType* VTy = dyn_cast(C->getType()); @@ -553,7 +576,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { Result.AggregateVal.resize(elemNum); if (ElemTy->isIntegerTy()) for (unsigned int i = 0; i < elemNum; ++i) - Result.AggregateVal[i].IntVal = + Result.AggregateVal[i].IntVal = APInt(ElemTy->getPrimitiveSizeInBits(), 0); break; } @@ -1272,6 +1295,10 @@ void ExecutionEngine::EmitGlobalVariable(const GlobalVariable *GV) { if (GA == 0) { // If it's not already specified, allocate memory for the global. GA = getMemoryForGV(GV); + + // If we failed to allocate memory for this global, return. + if (GA == 0) return; + addGlobalMapping(GV, GA); } diff --git a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp index f9b08a0..2d34eea 100644 --- a/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/contrib/llvm/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -15,6 +15,7 @@ #include "llvm-c/ExecutionEngine.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" @@ -157,10 +158,8 @@ LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT, void LLVMInitializeMCJITCompilerOptions(LLVMMCJITCompilerOptions *PassedOptions, size_t SizeOfPassedOptions) { LLVMMCJITCompilerOptions options; - options.OptLevel = 0; + memset(&options, 0, sizeof(options)); // Most fields are zero by default. options.CodeModel = LLVMCodeModelJITDefault; - options.NoFramePointerElim = false; - options.EnableFastISel = false; memcpy(PassedOptions, &options, std::min(sizeof(options), SizeOfPassedOptions)); @@ -199,6 +198,8 @@ LLVMBool LLVMCreateMCJITCompilerForModule( .setOptLevel((CodeGenOpt::Level)options.OptLevel) .setCodeModel(unwrap(options.CodeModel)) .setTargetOptions(targetOptions); + if (options.MCJMM) + builder.setMCJITMemoryManager(unwrap(options.MCJMM)); if (ExecutionEngine *JIT = builder.create()) { *OutJIT = wrap(JIT); return 0; @@ -332,3 +333,107 @@ void *LLVMGetPointerToGlobal(LLVMExecutionEngineRef EE, LLVMValueRef Global) { return unwrap(EE)->getPointerToGlobal(unwrap(Global)); } + +/*===-- Operations on memory managers -------------------------------------===*/ + +namespace { + +struct SimpleBindingMMFunctions { + LLVMMemoryManagerAllocateCodeSectionCallback AllocateCodeSection; + LLVMMemoryManagerAllocateDataSectionCallback AllocateDataSection; + LLVMMemoryManagerFinalizeMemoryCallback FinalizeMemory; + LLVMMemoryManagerDestroyCallback Destroy; +}; + +class SimpleBindingMemoryManager : public RTDyldMemoryManager { +public: + SimpleBindingMemoryManager(const SimpleBindingMMFunctions& Functions, + void *Opaque); + virtual ~SimpleBindingMemoryManager(); + + virtual uint8_t *allocateCodeSection( + uintptr_t Size, unsigned Alignment, unsigned SectionID, + StringRef SectionName); + + virtual uint8_t *allocateDataSection( + uintptr_t Size, unsigned Alignment, unsigned SectionID, + StringRef SectionName, bool isReadOnly); + + virtual bool finalizeMemory(std::string *ErrMsg); + +private: + SimpleBindingMMFunctions Functions; + void *Opaque; +}; + +SimpleBindingMemoryManager::SimpleBindingMemoryManager( + const SimpleBindingMMFunctions& Functions, + void *Opaque) + : Functions(Functions), Opaque(Opaque) { + assert(Functions.AllocateCodeSection && + "No AllocateCodeSection function provided!"); + assert(Functions.AllocateDataSection && + "No AllocateDataSection function provided!"); + assert(Functions.FinalizeMemory && + "No FinalizeMemory function provided!"); + assert(Functions.Destroy && + "No Destroy function provided!"); +} + +SimpleBindingMemoryManager::~SimpleBindingMemoryManager() { + Functions.Destroy(Opaque); +} + +uint8_t *SimpleBindingMemoryManager::allocateCodeSection( + uintptr_t Size, unsigned Alignment, unsigned SectionID, + StringRef SectionName) { + return Functions.AllocateCodeSection(Opaque, Size, Alignment, SectionID, + SectionName.str().c_str()); +} + +uint8_t *SimpleBindingMemoryManager::allocateDataSection( + uintptr_t Size, unsigned Alignment, unsigned SectionID, + StringRef SectionName, bool isReadOnly) { + return Functions.AllocateDataSection(Opaque, Size, Alignment, SectionID, + SectionName.str().c_str(), + isReadOnly); +} + +bool SimpleBindingMemoryManager::finalizeMemory(std::string *ErrMsg) { + char *errMsgCString = 0; + bool result = Functions.FinalizeMemory(Opaque, &errMsgCString); + assert((result || !errMsgCString) && + "Did not expect an error message if FinalizeMemory succeeded"); + if (errMsgCString) { + if (ErrMsg) + *ErrMsg = errMsgCString; + free(errMsgCString); + } + return result; +} + +} // anonymous namespace + +LLVMMCJITMemoryManagerRef LLVMCreateSimpleMCJITMemoryManager( + void *Opaque, + LLVMMemoryManagerAllocateCodeSectionCallback AllocateCodeSection, + LLVMMemoryManagerAllocateDataSectionCallback AllocateDataSection, + LLVMMemoryManagerFinalizeMemoryCallback FinalizeMemory, + LLVMMemoryManagerDestroyCallback Destroy) { + + if (!AllocateCodeSection || !AllocateDataSection || !FinalizeMemory || + !Destroy) + return NULL; + + SimpleBindingMMFunctions functions; + functions.AllocateCodeSection = AllocateCodeSection; + functions.AllocateDataSection = AllocateDataSection; + functions.FinalizeMemory = FinalizeMemory; + functions.Destroy = Destroy; + return wrap(new SimpleBindingMemoryManager(functions, Opaque)); +} + +void LLVMDisposeMCJITMemoryManager(LLVMMCJITMemoryManagerRef MM) { + delete unwrap(MM); +} + diff --git a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h index 3d9ff535..777d0f1 100644 --- a/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h +++ b/contrib/llvm/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h @@ -61,7 +61,7 @@ public: GetNewMethodIDFunc(GetNewMethodIDImpl) { } - // Sends an event anncouncing that a function has been emitted + // Sends an event announcing that a function has been emitted // return values are event-specific. See Intel documentation for details. int iJIT_NotifyEvent(iJIT_JVM_EVENT EventType, void *EventSpecificData) { if (!NotifyEventFunc) diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp index b95a9e8..5de0659 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -786,20 +786,31 @@ void Interpreter::visitBinaryOperator(BinaryOperator &I) { } static GenericValue executeSelectInst(GenericValue Src1, GenericValue Src2, - GenericValue Src3) { - return Src1.IntVal == 0 ? Src3 : Src2; + GenericValue Src3, const Type *Ty) { + GenericValue Dest; + if(Ty->isVectorTy()) { + assert(Src1.AggregateVal.size() == Src2.AggregateVal.size()); + assert(Src2.AggregateVal.size() == Src3.AggregateVal.size()); + Dest.AggregateVal.resize( Src1.AggregateVal.size() ); + for (size_t i = 0; i < Src1.AggregateVal.size(); ++i) + Dest.AggregateVal[i] = (Src1.AggregateVal[i].IntVal == 0) ? + Src3.AggregateVal[i] : Src2.AggregateVal[i]; + } else { + Dest = (Src1.IntVal == 0) ? Src3 : Src2; + } + return Dest; } void Interpreter::visitSelectInst(SelectInst &I) { ExecutionContext &SF = ECStack.back(); + const Type * Ty = I.getOperand(0)->getType(); GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue Src3 = getOperandValue(I.getOperand(2), SF); - GenericValue R = executeSelectInst(Src1, Src2, Src3); + GenericValue R = executeSelectInst(Src1, Src2, Src3, Ty); SetValue(&I, R, SF); } - //===----------------------------------------------------------------------===// // Terminator Instruction Implementations //===----------------------------------------------------------------------===// @@ -887,40 +898,11 @@ void Interpreter::visitSwitchInst(SwitchInst &I) { // Check to see if any of the cases match... BasicBlock *Dest = 0; for (SwitchInst::CaseIt i = I.case_begin(), e = I.case_end(); i != e; ++i) { - IntegersSubset& Case = i.getCaseValueEx(); - if (Case.isSingleNumber()) { - // FIXME: Currently work with ConstantInt based numbers. - const ConstantInt *CI = Case.getSingleNumber(0).toConstantInt(); - GenericValue Val = getOperandValue(const_cast(CI), SF); - if (executeICMP_EQ(Val, CondVal, ElTy).IntVal != 0) { - Dest = cast(i.getCaseSuccessor()); - break; - } + GenericValue CaseVal = getOperandValue(i.getCaseValue(), SF); + if (executeICMP_EQ(CondVal, CaseVal, ElTy).IntVal != 0) { + Dest = cast(i.getCaseSuccessor()); + break; } - if (Case.isSingleNumbersOnly()) { - for (unsigned n = 0, en = Case.getNumItems(); n != en; ++n) { - // FIXME: Currently work with ConstantInt based numbers. - const ConstantInt *CI = Case.getSingleNumber(n).toConstantInt(); - GenericValue Val = getOperandValue(const_cast(CI), SF); - if (executeICMP_EQ(Val, CondVal, ElTy).IntVal != 0) { - Dest = cast(i.getCaseSuccessor()); - break; - } - } - } else - for (unsigned n = 0, en = Case.getNumItems(); n != en; ++n) { - IntegersSubset::Range r = Case.getItem(n); - // FIXME: Currently work with ConstantInt based numbers. - const ConstantInt *LowCI = r.getLow().toConstantInt(); - const ConstantInt *HighCI = r.getHigh().toConstantInt(); - GenericValue Low = getOperandValue(const_cast(LowCI), SF); - GenericValue High = getOperandValue(const_cast(HighCI), SF); - if (executeICMP_ULE(Low, CondVal, ElTy).IntVal != 0 && - executeICMP_ULE(CondVal, High, ElTy).IntVal != 0) { - Dest = cast(i.getCaseSuccessor()); - break; - } - } } if (!Dest) Dest = I.getDefaultDest(); // No cases matched: use default SwitchToNewBasicBlock(Dest, SF); @@ -1138,16 +1120,42 @@ void Interpreter::visitCallSite(CallSite CS) { callFunction((Function*)GVTOP(SRC), ArgVals); } +// auxilary function for shift operations +static unsigned getShiftAmount(uint64_t orgShiftAmount, + llvm::APInt valueToShift) { + unsigned valueWidth = valueToShift.getBitWidth(); + if (orgShiftAmount < (uint64_t)valueWidth) + return orgShiftAmount; + // according to the llvm documentation, if orgShiftAmount > valueWidth, + // the result is undfeined. but we do shift by this rule: + return (NextPowerOf2(valueWidth-1) - 1) & orgShiftAmount; +} + + void Interpreter::visitShl(BinaryOperator &I) { ExecutionContext &SF = ECStack.back(); GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue Dest; - if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth()) - Dest.IntVal = Src1.IntVal.shl(Src2.IntVal.getZExtValue()); - else - Dest.IntVal = Src1.IntVal; - + const Type *Ty = I.getType(); + + if (Ty->isVectorTy()) { + uint32_t src1Size = uint32_t(Src1.AggregateVal.size()); + assert(src1Size == Src2.AggregateVal.size()); + for (unsigned i = 0; i < src1Size; i++) { + GenericValue Result; + uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue(); + llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal; + Result.IntVal = valueToShift.shl(getShiftAmount(shiftAmount, valueToShift)); + Dest.AggregateVal.push_back(Result); + } + } else { + // scalar + uint64_t shiftAmount = Src2.IntVal.getZExtValue(); + llvm::APInt valueToShift = Src1.IntVal; + Dest.IntVal = valueToShift.shl(getShiftAmount(shiftAmount, valueToShift)); + } + SetValue(&I, Dest, SF); } @@ -1156,11 +1164,25 @@ void Interpreter::visitLShr(BinaryOperator &I) { GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue Dest; - if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth()) - Dest.IntVal = Src1.IntVal.lshr(Src2.IntVal.getZExtValue()); - else - Dest.IntVal = Src1.IntVal; - + const Type *Ty = I.getType(); + + if (Ty->isVectorTy()) { + uint32_t src1Size = uint32_t(Src1.AggregateVal.size()); + assert(src1Size == Src2.AggregateVal.size()); + for (unsigned i = 0; i < src1Size; i++) { + GenericValue Result; + uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue(); + llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal; + Result.IntVal = valueToShift.lshr(getShiftAmount(shiftAmount, valueToShift)); + Dest.AggregateVal.push_back(Result); + } + } else { + // scalar + uint64_t shiftAmount = Src2.IntVal.getZExtValue(); + llvm::APInt valueToShift = Src1.IntVal; + Dest.IntVal = valueToShift.lshr(getShiftAmount(shiftAmount, valueToShift)); + } + SetValue(&I, Dest, SF); } @@ -1169,110 +1191,273 @@ void Interpreter::visitAShr(BinaryOperator &I) { GenericValue Src1 = getOperandValue(I.getOperand(0), SF); GenericValue Src2 = getOperandValue(I.getOperand(1), SF); GenericValue Dest; - if (Src2.IntVal.getZExtValue() < Src1.IntVal.getBitWidth()) - Dest.IntVal = Src1.IntVal.ashr(Src2.IntVal.getZExtValue()); - else - Dest.IntVal = Src1.IntVal; - + const Type *Ty = I.getType(); + + if (Ty->isVectorTy()) { + size_t src1Size = Src1.AggregateVal.size(); + assert(src1Size == Src2.AggregateVal.size()); + for (unsigned i = 0; i < src1Size; i++) { + GenericValue Result; + uint64_t shiftAmount = Src2.AggregateVal[i].IntVal.getZExtValue(); + llvm::APInt valueToShift = Src1.AggregateVal[i].IntVal; + Result.IntVal = valueToShift.ashr(getShiftAmount(shiftAmount, valueToShift)); + Dest.AggregateVal.push_back(Result); + } + } else { + // scalar + uint64_t shiftAmount = Src2.IntVal.getZExtValue(); + llvm::APInt valueToShift = Src1.IntVal; + Dest.IntVal = valueToShift.ashr(getShiftAmount(shiftAmount, valueToShift)); + } + SetValue(&I, Dest, SF); } GenericValue Interpreter::executeTruncInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - IntegerType *DITy = cast(DstTy); - unsigned DBitWidth = DITy->getBitWidth(); - Dest.IntVal = Src.IntVal.trunc(DBitWidth); + Type *SrcTy = SrcVal->getType(); + if (SrcTy->isVectorTy()) { + Type *DstVecTy = DstTy->getScalarType(); + unsigned DBitWidth = cast(DstVecTy)->getBitWidth(); + unsigned NumElts = Src.AggregateVal.size(); + // the sizes of src and dst vectors must be equal + Dest.AggregateVal.resize(NumElts); + for (unsigned i = 0; i < NumElts; i++) + Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.trunc(DBitWidth); + } else { + IntegerType *DITy = cast(DstTy); + unsigned DBitWidth = DITy->getBitWidth(); + Dest.IntVal = Src.IntVal.trunc(DBitWidth); + } return Dest; } GenericValue Interpreter::executeSExtInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { + const Type *SrcTy = SrcVal->getType(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); - IntegerType *DITy = cast(DstTy); - unsigned DBitWidth = DITy->getBitWidth(); - Dest.IntVal = Src.IntVal.sext(DBitWidth); + if (SrcTy->isVectorTy()) { + const Type *DstVecTy = DstTy->getScalarType(); + unsigned DBitWidth = cast(DstVecTy)->getBitWidth(); + unsigned size = Src.AggregateVal.size(); + // the sizes of src and dst vectors must be equal. + Dest.AggregateVal.resize(size); + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.sext(DBitWidth); + } else { + const IntegerType *DITy = cast(DstTy); + unsigned DBitWidth = DITy->getBitWidth(); + Dest.IntVal = Src.IntVal.sext(DBitWidth); + } return Dest; } GenericValue Interpreter::executeZExtInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { + const Type *SrcTy = SrcVal->getType(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); - IntegerType *DITy = cast(DstTy); - unsigned DBitWidth = DITy->getBitWidth(); - Dest.IntVal = Src.IntVal.zext(DBitWidth); + if (SrcTy->isVectorTy()) { + const Type *DstVecTy = DstTy->getScalarType(); + unsigned DBitWidth = cast(DstVecTy)->getBitWidth(); + + unsigned size = Src.AggregateVal.size(); + // the sizes of src and dst vectors must be equal. + Dest.AggregateVal.resize(size); + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].IntVal = Src.AggregateVal[i].IntVal.zext(DBitWidth); + } else { + const IntegerType *DITy = cast(DstTy); + unsigned DBitWidth = DITy->getBitWidth(); + Dest.IntVal = Src.IntVal.zext(DBitWidth); + } return Dest; } GenericValue Interpreter::executeFPTruncInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() && - "Invalid FPTrunc instruction"); - Dest.FloatVal = (float) Src.DoubleVal; + + if (SrcVal->getType()->getTypeID() == Type::VectorTyID) { + assert(SrcVal->getType()->getScalarType()->isDoubleTy() && + DstTy->getScalarType()->isFloatTy() && + "Invalid FPTrunc instruction"); + + unsigned size = Src.AggregateVal.size(); + // the sizes of src and dst vectors must be equal. + Dest.AggregateVal.resize(size); + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].FloatVal = (float)Src.AggregateVal[i].DoubleVal; + } else { + assert(SrcVal->getType()->isDoubleTy() && DstTy->isFloatTy() && + "Invalid FPTrunc instruction"); + Dest.FloatVal = (float)Src.DoubleVal; + } + return Dest; } GenericValue Interpreter::executeFPExtInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() && - "Invalid FPTrunc instruction"); - Dest.DoubleVal = (double) Src.FloatVal; + + if (SrcVal->getType()->getTypeID() == Type::VectorTyID) { + assert(SrcVal->getType()->getScalarType()->isFloatTy() && + DstTy->getScalarType()->isDoubleTy() && "Invalid FPExt instruction"); + + unsigned size = Src.AggregateVal.size(); + // the sizes of src and dst vectors must be equal. + Dest.AggregateVal.resize(size); + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].DoubleVal = (double)Src.AggregateVal[i].FloatVal; + } else { + assert(SrcVal->getType()->isFloatTy() && DstTy->isDoubleTy() && + "Invalid FPExt instruction"); + Dest.DoubleVal = (double)Src.FloatVal; + } + return Dest; } GenericValue Interpreter::executeFPToUIInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { Type *SrcTy = SrcVal->getType(); - uint32_t DBitWidth = cast(DstTy)->getBitWidth(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction"); - if (SrcTy->getTypeID() == Type::FloatTyID) - Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth); - else - Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth); + if (SrcTy->getTypeID() == Type::VectorTyID) { + const Type *DstVecTy = DstTy->getScalarType(); + const Type *SrcVecTy = SrcTy->getScalarType(); + uint32_t DBitWidth = cast(DstVecTy)->getBitWidth(); + unsigned size = Src.AggregateVal.size(); + // the sizes of src and dst vectors must be equal. + Dest.AggregateVal.resize(size); + + if (SrcVecTy->getTypeID() == Type::FloatTyID) { + assert(SrcVecTy->isFloatingPointTy() && "Invalid FPToUI instruction"); + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].IntVal = APIntOps::RoundFloatToAPInt( + Src.AggregateVal[i].FloatVal, DBitWidth); + } else { + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].IntVal = APIntOps::RoundDoubleToAPInt( + Src.AggregateVal[i].DoubleVal, DBitWidth); + } + } else { + // scalar + uint32_t DBitWidth = cast(DstTy)->getBitWidth(); + assert(SrcTy->isFloatingPointTy() && "Invalid FPToUI instruction"); + + if (SrcTy->getTypeID() == Type::FloatTyID) + Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth); + else { + Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth); + } + } + return Dest; } GenericValue Interpreter::executeFPToSIInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { Type *SrcTy = SrcVal->getType(); - uint32_t DBitWidth = cast(DstTy)->getBitWidth(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction"); - if (SrcTy->getTypeID() == Type::FloatTyID) - Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth); - else - Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth); + if (SrcTy->getTypeID() == Type::VectorTyID) { + const Type *DstVecTy = DstTy->getScalarType(); + const Type *SrcVecTy = SrcTy->getScalarType(); + uint32_t DBitWidth = cast(DstVecTy)->getBitWidth(); + unsigned size = Src.AggregateVal.size(); + // the sizes of src and dst vectors must be equal + Dest.AggregateVal.resize(size); + + if (SrcVecTy->getTypeID() == Type::FloatTyID) { + assert(SrcVecTy->isFloatingPointTy() && "Invalid FPToSI instruction"); + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].IntVal = APIntOps::RoundFloatToAPInt( + Src.AggregateVal[i].FloatVal, DBitWidth); + } else { + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].IntVal = APIntOps::RoundDoubleToAPInt( + Src.AggregateVal[i].DoubleVal, DBitWidth); + } + } else { + // scalar + unsigned DBitWidth = cast(DstTy)->getBitWidth(); + assert(SrcTy->isFloatingPointTy() && "Invalid FPToSI instruction"); + + if (SrcTy->getTypeID() == Type::FloatTyID) + Dest.IntVal = APIntOps::RoundFloatToAPInt(Src.FloatVal, DBitWidth); + else { + Dest.IntVal = APIntOps::RoundDoubleToAPInt(Src.DoubleVal, DBitWidth); + } + } return Dest; } GenericValue Interpreter::executeUIToFPInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction"); - if (DstTy->getTypeID() == Type::FloatTyID) - Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal); - else - Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal); + if (SrcVal->getType()->getTypeID() == Type::VectorTyID) { + const Type *DstVecTy = DstTy->getScalarType(); + unsigned size = Src.AggregateVal.size(); + // the sizes of src and dst vectors must be equal + Dest.AggregateVal.resize(size); + + if (DstVecTy->getTypeID() == Type::FloatTyID) { + assert(DstVecTy->isFloatingPointTy() && "Invalid UIToFP instruction"); + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].FloatVal = + APIntOps::RoundAPIntToFloat(Src.AggregateVal[i].IntVal); + } else { + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].DoubleVal = + APIntOps::RoundAPIntToDouble(Src.AggregateVal[i].IntVal); + } + } else { + // scalar + assert(DstTy->isFloatingPointTy() && "Invalid UIToFP instruction"); + if (DstTy->getTypeID() == Type::FloatTyID) + Dest.FloatVal = APIntOps::RoundAPIntToFloat(Src.IntVal); + else { + Dest.DoubleVal = APIntOps::RoundAPIntToDouble(Src.IntVal); + } + } return Dest; } GenericValue Interpreter::executeSIToFPInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { GenericValue Dest, Src = getOperandValue(SrcVal, SF); - assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction"); - if (DstTy->getTypeID() == Type::FloatTyID) - Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal); - else - Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal); - return Dest; + if (SrcVal->getType()->getTypeID() == Type::VectorTyID) { + const Type *DstVecTy = DstTy->getScalarType(); + unsigned size = Src.AggregateVal.size(); + // the sizes of src and dst vectors must be equal + Dest.AggregateVal.resize(size); + + if (DstVecTy->getTypeID() == Type::FloatTyID) { + assert(DstVecTy->isFloatingPointTy() && "Invalid SIToFP instruction"); + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].FloatVal = + APIntOps::RoundSignedAPIntToFloat(Src.AggregateVal[i].IntVal); + } else { + for (unsigned i = 0; i < size; i++) + Dest.AggregateVal[i].DoubleVal = + APIntOps::RoundSignedAPIntToDouble(Src.AggregateVal[i].IntVal); + } + } else { + // scalar + assert(DstTy->isFloatingPointTy() && "Invalid SIToFP instruction"); + + if (DstTy->getTypeID() == Type::FloatTyID) + Dest.FloatVal = APIntOps::RoundSignedAPIntToFloat(Src.IntVal); + else { + Dest.DoubleVal = APIntOps::RoundSignedAPIntToDouble(Src.IntVal); + } + } + return Dest; } GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, Type *DstTy, @@ -1300,33 +1485,167 @@ GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, Type *DstTy, GenericValue Interpreter::executeBitCastInst(Value *SrcVal, Type *DstTy, ExecutionContext &SF) { - + + // This instruction supports bitwise conversion of vectors to integers and + // to vectors of other types (as long as they have the same size) Type *SrcTy = SrcVal->getType(); GenericValue Dest, Src = getOperandValue(SrcVal, SF); - if (DstTy->isPointerTy()) { - assert(SrcTy->isPointerTy() && "Invalid BitCast"); - Dest.PointerVal = Src.PointerVal; - } else if (DstTy->isIntegerTy()) { - if (SrcTy->isFloatTy()) { - Dest.IntVal = APInt::floatToBits(Src.FloatVal); - } else if (SrcTy->isDoubleTy()) { - Dest.IntVal = APInt::doubleToBits(Src.DoubleVal); - } else if (SrcTy->isIntegerTy()) { - Dest.IntVal = Src.IntVal; - } else + + if ((SrcTy->getTypeID() == Type::VectorTyID) || + (DstTy->getTypeID() == Type::VectorTyID)) { + // vector src bitcast to vector dst or vector src bitcast to scalar dst or + // scalar src bitcast to vector dst + bool isLittleEndian = TD.isLittleEndian(); + GenericValue TempDst, TempSrc, SrcVec; + const Type *SrcElemTy; + const Type *DstElemTy; + unsigned SrcBitSize; + unsigned DstBitSize; + unsigned SrcNum; + unsigned DstNum; + + if (SrcTy->getTypeID() == Type::VectorTyID) { + SrcElemTy = SrcTy->getScalarType(); + SrcBitSize = SrcTy->getScalarSizeInBits(); + SrcNum = Src.AggregateVal.size(); + SrcVec = Src; + } else { + // if src is scalar value, make it vector <1 x type> + SrcElemTy = SrcTy; + SrcBitSize = SrcTy->getPrimitiveSizeInBits(); + SrcNum = 1; + SrcVec.AggregateVal.push_back(Src); + } + + if (DstTy->getTypeID() == Type::VectorTyID) { + DstElemTy = DstTy->getScalarType(); + DstBitSize = DstTy->getScalarSizeInBits(); + DstNum = (SrcNum * SrcBitSize) / DstBitSize; + } else { + DstElemTy = DstTy; + DstBitSize = DstTy->getPrimitiveSizeInBits(); + DstNum = 1; + } + + if (SrcNum * SrcBitSize != DstNum * DstBitSize) llvm_unreachable("Invalid BitCast"); - } else if (DstTy->isFloatTy()) { - if (SrcTy->isIntegerTy()) - Dest.FloatVal = Src.IntVal.bitsToFloat(); - else - Dest.FloatVal = Src.FloatVal; - } else if (DstTy->isDoubleTy()) { - if (SrcTy->isIntegerTy()) - Dest.DoubleVal = Src.IntVal.bitsToDouble(); - else - Dest.DoubleVal = Src.DoubleVal; - } else - llvm_unreachable("Invalid Bitcast"); + + // If src is floating point, cast to integer first. + TempSrc.AggregateVal.resize(SrcNum); + if (SrcElemTy->isFloatTy()) { + for (unsigned i = 0; i < SrcNum; i++) + TempSrc.AggregateVal[i].IntVal = + APInt::floatToBits(SrcVec.AggregateVal[i].FloatVal); + + } else if (SrcElemTy->isDoubleTy()) { + for (unsigned i = 0; i < SrcNum; i++) + TempSrc.AggregateVal[i].IntVal = + APInt::doubleToBits(SrcVec.AggregateVal[i].DoubleVal); + } else if (SrcElemTy->isIntegerTy()) { + for (unsigned i = 0; i < SrcNum; i++) + TempSrc.AggregateVal[i].IntVal = SrcVec.AggregateVal[i].IntVal; + } else { + // Pointers are not allowed as the element type of vector. + llvm_unreachable("Invalid Bitcast"); + } + + // now TempSrc is integer type vector + if (DstNum < SrcNum) { + // Example: bitcast <4 x i32> to <2 x i64> + unsigned Ratio = SrcNum / DstNum; + unsigned SrcElt = 0; + for (unsigned i = 0; i < DstNum; i++) { + GenericValue Elt; + Elt.IntVal = 0; + Elt.IntVal = Elt.IntVal.zext(DstBitSize); + unsigned ShiftAmt = isLittleEndian ? 0 : SrcBitSize * (Ratio - 1); + for (unsigned j = 0; j < Ratio; j++) { + APInt Tmp; + Tmp = Tmp.zext(SrcBitSize); + Tmp = TempSrc.AggregateVal[SrcElt++].IntVal; + Tmp = Tmp.zext(DstBitSize); + Tmp = Tmp.shl(ShiftAmt); + ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; + Elt.IntVal |= Tmp; + } + TempDst.AggregateVal.push_back(Elt); + } + } else { + // Example: bitcast <2 x i64> to <4 x i32> + unsigned Ratio = DstNum / SrcNum; + for (unsigned i = 0; i < SrcNum; i++) { + unsigned ShiftAmt = isLittleEndian ? 0 : DstBitSize * (Ratio - 1); + for (unsigned j = 0; j < Ratio; j++) { + GenericValue Elt; + Elt.IntVal = Elt.IntVal.zext(SrcBitSize); + Elt.IntVal = TempSrc.AggregateVal[i].IntVal; + Elt.IntVal = Elt.IntVal.lshr(ShiftAmt); + // it could be DstBitSize == SrcBitSize, so check it + if (DstBitSize < SrcBitSize) + Elt.IntVal = Elt.IntVal.trunc(DstBitSize); + ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; + TempDst.AggregateVal.push_back(Elt); + } + } + } + + // convert result from integer to specified type + if (DstTy->getTypeID() == Type::VectorTyID) { + if (DstElemTy->isDoubleTy()) { + Dest.AggregateVal.resize(DstNum); + for (unsigned i = 0; i < DstNum; i++) + Dest.AggregateVal[i].DoubleVal = + TempDst.AggregateVal[i].IntVal.bitsToDouble(); + } else if (DstElemTy->isFloatTy()) { + Dest.AggregateVal.resize(DstNum); + for (unsigned i = 0; i < DstNum; i++) + Dest.AggregateVal[i].FloatVal = + TempDst.AggregateVal[i].IntVal.bitsToFloat(); + } else { + Dest = TempDst; + } + } else { + if (DstElemTy->isDoubleTy()) + Dest.DoubleVal = TempDst.AggregateVal[0].IntVal.bitsToDouble(); + else if (DstElemTy->isFloatTy()) { + Dest.FloatVal = TempDst.AggregateVal[0].IntVal.bitsToFloat(); + } else { + Dest.IntVal = TempDst.AggregateVal[0].IntVal; + } + } + } else { // if ((SrcTy->getTypeID() == Type::VectorTyID) || + // (DstTy->getTypeID() == Type::VectorTyID)) + + // scalar src bitcast to scalar dst + if (DstTy->isPointerTy()) { + assert(SrcTy->isPointerTy() && "Invalid BitCast"); + Dest.PointerVal = Src.PointerVal; + } else if (DstTy->isIntegerTy()) { + if (SrcTy->isFloatTy()) + Dest.IntVal = APInt::floatToBits(Src.FloatVal); + else if (SrcTy->isDoubleTy()) { + Dest.IntVal = APInt::doubleToBits(Src.DoubleVal); + } else if (SrcTy->isIntegerTy()) { + Dest.IntVal = Src.IntVal; + } else { + llvm_unreachable("Invalid BitCast"); + } + } else if (DstTy->isFloatTy()) { + if (SrcTy->isIntegerTy()) + Dest.FloatVal = Src.IntVal.bitsToFloat(); + else { + Dest.FloatVal = Src.FloatVal; + } + } else if (DstTy->isDoubleTy()) { + if (SrcTy->isIntegerTy()) + Dest.DoubleVal = Src.IntVal.bitsToDouble(); + else { + Dest.DoubleVal = Src.DoubleVal; + } + } else { + llvm_unreachable("Invalid Bitcast"); + } + } return Dest; } @@ -1456,10 +1775,204 @@ void Interpreter::visitExtractElementInst(ExtractElementInst &I) { SetValue(&I, Dest, SF); } +void Interpreter::visitInsertElementInst(InsertElementInst &I) { + ExecutionContext &SF = ECStack.back(); + Type *Ty = I.getType(); + + if(!(Ty->isVectorTy()) ) + llvm_unreachable("Unhandled dest type for insertelement instruction"); + + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue Src3 = getOperandValue(I.getOperand(2), SF); + GenericValue Dest; + + Type *TyContained = Ty->getContainedType(0); + + const unsigned indx = unsigned(Src3.IntVal.getZExtValue()); + Dest.AggregateVal = Src1.AggregateVal; + + if(Src1.AggregateVal.size() <= indx) + llvm_unreachable("Invalid index in insertelement instruction"); + switch (TyContained->getTypeID()) { + default: + llvm_unreachable("Unhandled dest type for insertelement instruction"); + case Type::IntegerTyID: + Dest.AggregateVal[indx].IntVal = Src2.IntVal; + break; + case Type::FloatTyID: + Dest.AggregateVal[indx].FloatVal = Src2.FloatVal; + break; + case Type::DoubleTyID: + Dest.AggregateVal[indx].DoubleVal = Src2.DoubleVal; + break; + } + SetValue(&I, Dest, SF); +} + +void Interpreter::visitShuffleVectorInst(ShuffleVectorInst &I){ + ExecutionContext &SF = ECStack.back(); + + Type *Ty = I.getType(); + if(!(Ty->isVectorTy())) + llvm_unreachable("Unhandled dest type for shufflevector instruction"); + + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue Src3 = getOperandValue(I.getOperand(2), SF); + GenericValue Dest; + + // There is no need to check types of src1 and src2, because the compiled + // bytecode can't contain different types for src1 and src2 for a + // shufflevector instruction. + + Type *TyContained = Ty->getContainedType(0); + unsigned src1Size = (unsigned)Src1.AggregateVal.size(); + unsigned src2Size = (unsigned)Src2.AggregateVal.size(); + unsigned src3Size = (unsigned)Src3.AggregateVal.size(); + + Dest.AggregateVal.resize(src3Size); + + switch (TyContained->getTypeID()) { + default: + llvm_unreachable("Unhandled dest type for insertelement instruction"); + break; + case Type::IntegerTyID: + for( unsigned i=0; i , <2 x i32> undef, + // <2 x i32> < i32 0, i32 5 >, + // where i32 5 is invalid, but let it be additional check here: + llvm_unreachable("Invalid mask in shufflevector instruction"); + } + break; + case Type::FloatTyID: + for( unsigned i=0; iAggregateVal[*IdxBegin]; + ++IdxBegin; + } + + Type *IndexedType = ExtractValueInst::getIndexedType(Agg->getType(), I.getIndices()); + switch (IndexedType->getTypeID()) { + default: + llvm_unreachable("Unhandled dest type for extractelement instruction"); + break; + case Type::IntegerTyID: + Dest.IntVal = pSrc->IntVal; + break; + case Type::FloatTyID: + Dest.FloatVal = pSrc->FloatVal; + break; + case Type::DoubleTyID: + Dest.DoubleVal = pSrc->DoubleVal; + break; + case Type::ArrayTyID: + case Type::StructTyID: + case Type::VectorTyID: + Dest.AggregateVal = pSrc->AggregateVal; + break; + case Type::PointerTyID: + Dest.PointerVal = pSrc->PointerVal; + break; + } + + SetValue(&I, Dest, SF); +} + +void Interpreter::visitInsertValueInst(InsertValueInst &I) { + + ExecutionContext &SF = ECStack.back(); + Value *Agg = I.getAggregateOperand(); + + GenericValue Src1 = getOperandValue(Agg, SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue Dest = Src1; // Dest is a slightly changed Src1 + + ExtractValueInst::idx_iterator IdxBegin = I.idx_begin(); + unsigned Num = I.getNumIndices(); + + GenericValue *pDest = &Dest; + for (unsigned i = 0 ; i < Num; ++i) { + pDest = &pDest->AggregateVal[*IdxBegin]; + ++IdxBegin; + } + // pDest points to the target value in the Dest now + + Type *IndexedType = ExtractValueInst::getIndexedType(Agg->getType(), I.getIndices()); + + switch (IndexedType->getTypeID()) { + default: + llvm_unreachable("Unhandled dest type for insertelement instruction"); + break; + case Type::IntegerTyID: + pDest->IntVal = Src2.IntVal; + break; + case Type::FloatTyID: + pDest->FloatVal = Src2.FloatVal; + break; + case Type::DoubleTyID: + pDest->DoubleVal = Src2.DoubleVal; + break; + case Type::ArrayTyID: + case Type::StructTyID: + case Type::VectorTyID: + pDest->AggregateVal = Src2.AggregateVal; + break; + case Type::PointerTyID: + pDest->PointerVal = Src2.PointerVal; + break; + } + + SetValue(&I, Dest, SF); +} + GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, ExecutionContext &SF) { switch (CE->getOpcode()) { - case Instruction::Trunc: + case Instruction::Trunc: return executeTruncInst(CE->getOperand(0), CE->getType(), SF); case Instruction::ZExt: return executeZExtInst(CE->getOperand(0), CE->getType(), SF); @@ -1495,7 +2008,8 @@ GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, case Instruction::Select: return executeSelectInst(getOperandValue(CE->getOperand(0), SF), getOperandValue(CE->getOperand(1), SF), - getOperandValue(CE->getOperand(2), SF)); + getOperandValue(CE->getOperand(2), SF), + CE->getOperand(0)->getType()); default : break; } diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index bef4bbf..a03c7f5 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -406,6 +406,7 @@ GenericValue lle_X_sprintf(FunctionType *FT, break; } } + return GV; } // int printf(const char *, ...) - a very rough implementation to make output @@ -434,7 +435,7 @@ GenericValue lle_X_sscanf(FunctionType *FT, GenericValue GV; GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4], - Args[5], Args[6], Args[7], Args[8], Args[9])); + Args[5], Args[6], Args[7], Args[8], Args[9])); return GV; } @@ -450,7 +451,7 @@ GenericValue lle_X_scanf(FunctionType *FT, GenericValue GV; GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4], - Args[5], Args[6], Args[7], Args[8], Args[9])); + Args[5], Args[6], Args[7], Args[8], Args[9])); return GV; } @@ -470,6 +471,30 @@ GenericValue lle_X_fprintf(FunctionType *FT, return GV; } +static GenericValue lle_X_memset(FunctionType *FT, + const std::vector &Args) { + int val = (int)Args[1].IntVal.getSExtValue(); + size_t len = (size_t)Args[2].IntVal.getZExtValue(); + memset((void *)GVTOP(Args[0]), val, len); + // llvm.memset.* returns void, lle_X_* returns GenericValue, + // so here we return GenericValue with IntVal set to zero + GenericValue GV; + GV.IntVal = 0; + return GV; +} + +static GenericValue lle_X_memcpy(FunctionType *FT, + const std::vector &Args) { + memcpy(GVTOP(Args[0]), GVTOP(Args[1]), + (size_t)(Args[2].IntVal.getLimitedValue())); + + // llvm.memcpy* returns void, lle_X_* returns GenericValue, + // so here we return GenericValue with IntVal set to zero + GenericValue GV; + GV.IntVal = 0; + return GV; +} + void Interpreter::initializeExternalFunctions() { sys::ScopedLock Writer(*FunctionsLock); FuncNames["lle_X_atexit"] = lle_X_atexit; @@ -481,4 +506,6 @@ void Interpreter::initializeExternalFunctions() { FuncNames["lle_X_sscanf"] = lle_X_sscanf; FuncNames["lle_X_scanf"] = lle_X_scanf; FuncNames["lle_X_fprintf"] = lle_X_fprintf; + FuncNames["lle_X_memset"] = lle_X_memset; + FuncNames["lle_X_memcpy"] = lle_X_memcpy; } diff --git a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h index 2952d7e..98269ef 100644 --- a/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/contrib/llvm/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -179,6 +179,12 @@ public: void visitVAArgInst(VAArgInst &I); void visitExtractElementInst(ExtractElementInst &I); + void visitInsertElementInst(InsertElementInst &I); + void visitShuffleVectorInst(ShuffleVectorInst &I); + + void visitExtractValueInst(ExtractValueInst &I); + void visitInsertValueInst(InsertValueInst &I); + void visitInstruction(Instruction &I) { errs() << I << "\n"; llvm_unreachable("Instruction not interpretable yet!"); diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp index 53ea0a2..246a675 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JIT.cpp @@ -67,140 +67,6 @@ static struct RegisterJIT { extern "C" void LLVMLinkInJIT() { } -// Determine whether we can register EH tables. -#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \ - !defined(__USING_SJLJ_EXCEPTIONS__)) -#define HAVE_EHTABLE_SUPPORT 1 -#else -#define HAVE_EHTABLE_SUPPORT 0 -#endif - -#if HAVE_EHTABLE_SUPPORT - -// libgcc defines the __register_frame function to dynamically register new -// dwarf frames for exception handling. This functionality is not portable -// across compilers and is only provided by GCC. We use the __register_frame -// function here so that code generated by the JIT cooperates with the unwinding -// runtime of libgcc. When JITting with exception handling enable, LLVM -// generates dwarf frames and registers it to libgcc with __register_frame. -// -// The __register_frame function works with Linux. -// -// Unfortunately, this functionality seems to be in libgcc after the unwinding -// library of libgcc for darwin was written. The code for darwin overwrites the -// value updated by __register_frame with a value fetched with "keymgr". -// "keymgr" is an obsolete functionality, which should be rewritten some day. -// In the meantime, since "keymgr" is on all libgccs shipped with apple-gcc, we -// need a workaround in LLVM which uses the "keymgr" to dynamically modify the -// values of an opaque key, used by libgcc to find dwarf tables. - -extern "C" void __register_frame(void*); -extern "C" void __deregister_frame(void*); - -#if defined(__APPLE__) && MAC_OS_X_VERSION_MAX_ALLOWED <= 1050 -# define USE_KEYMGR 1 -#else -# define USE_KEYMGR 0 -#endif - -#if USE_KEYMGR - -namespace { - -// LibgccObject - This is the structure defined in libgcc. There is no #include -// provided for this structure, so we also define it here. libgcc calls it -// "struct object". The structure is undocumented in libgcc. -struct LibgccObject { - void *unused1; - void *unused2; - void *unused3; - - /// frame - Pointer to the exception table. - void *frame; - - /// encoding - The encoding of the object? - union { - struct { - unsigned long sorted : 1; - unsigned long from_array : 1; - unsigned long mixed_encoding : 1; - unsigned long encoding : 8; - unsigned long count : 21; - } b; - size_t i; - } encoding; - - /// fde_end - libgcc defines this field only if some macro is defined. We - /// include this field even if it may not there, to make libgcc happy. - char *fde_end; - - /// next - At least we know it's a chained list! - struct LibgccObject *next; -}; - -// "kemgr" stuff. Apparently, all frame tables are stored there. -extern "C" void _keymgr_set_and_unlock_processwide_ptr(int, void *); -extern "C" void *_keymgr_get_and_lock_processwide_ptr(int); -#define KEYMGR_GCC3_DW2_OBJ_LIST 302 /* Dwarf2 object list */ - -/// LibgccObjectInfo - libgcc defines this struct as km_object_info. It -/// probably contains all dwarf tables that are loaded. -struct LibgccObjectInfo { - - /// seenObjects - LibgccObjects already parsed by the unwinding runtime. - /// - struct LibgccObject* seenObjects; - - /// unseenObjects - LibgccObjects not parsed yet by the unwinding runtime. - /// - struct LibgccObject* unseenObjects; - - unsigned unused[2]; -}; - -/// darwin_register_frame - Since __register_frame does not work with darwin's -/// libgcc,we provide our own function, which "tricks" libgcc by modifying the -/// "Dwarf2 object list" key. -void DarwinRegisterFrame(void* FrameBegin) { - // Get the key. - LibgccObjectInfo* LOI = (struct LibgccObjectInfo*) - _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST); - assert(LOI && "This should be preallocated by the runtime"); - - // Allocate a new LibgccObject to represent this frame. Deallocation of this - // object may be impossible: since darwin code in libgcc was written after - // the ability to dynamically register frames, things may crash if we - // deallocate it. - struct LibgccObject* ob = (struct LibgccObject*) - malloc(sizeof(struct LibgccObject)); - - // Do like libgcc for the values of the field. - ob->unused1 = (void *)-1; - ob->unused2 = 0; - ob->unused3 = 0; - ob->frame = FrameBegin; - ob->encoding.i = 0; - ob->encoding.b.encoding = llvm::dwarf::DW_EH_PE_omit; - - // Put the info on both places, as libgcc uses the first or the second - // field. Note that we rely on having two pointers here. If fde_end was a - // char, things would get complicated. - ob->fde_end = (char*)LOI->unseenObjects; - ob->next = LOI->unseenObjects; - - // Update the key's unseenObjects list. - LOI->unseenObjects = ob; - - // Finally update the "key". Apparently, libgcc requires it. - _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, - LOI); - -} - -} -#endif // __APPLE__ -#endif // HAVE_EHTABLE_SUPPORT - /// createJIT - This is the factory method for creating a JIT for the current /// machine, it does not fall back to the interpreter. This takes ownership /// of the module. @@ -293,33 +159,11 @@ JIT::JIT(Module *M, TargetMachine &tm, TargetJITInfo &tji, report_fatal_error("Target does not support machine code emission!"); } - // Register routine for informing unwinding runtime about new EH frames -#if HAVE_EHTABLE_SUPPORT -#if USE_KEYMGR - struct LibgccObjectInfo* LOI = (struct LibgccObjectInfo*) - _keymgr_get_and_lock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST); - - // The key is created on demand, and libgcc creates it the first time an - // exception occurs. Since we need the key to register frames, we create - // it now. - if (!LOI) - LOI = (LibgccObjectInfo*)calloc(sizeof(struct LibgccObjectInfo), 1); - _keymgr_set_and_unlock_processwide_ptr(KEYMGR_GCC3_DW2_OBJ_LIST, LOI); - InstallExceptionTableRegister(DarwinRegisterFrame); - // Not sure about how to deregister on Darwin. -#else - InstallExceptionTableRegister(__register_frame); - InstallExceptionTableDeregister(__deregister_frame); -#endif // __APPLE__ -#endif // HAVE_EHTABLE_SUPPORT - // Initialize passes. PM.doInitialization(); } JIT::~JIT() { - // Unregister all exception tables registered by this JIT. - DeregisterAllTables(); // Cleanup. AllJits->Remove(this); delete jitstate; diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp deleted file mode 100644 index 35d2b8b..0000000 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ /dev/null @@ -1,596 +0,0 @@ -//===----- JITDwarfEmitter.cpp - Write dwarf tables into memory -----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a JITDwarfEmitter object that is used by the JIT to -// write dwarf tables to memory. -// -//===----------------------------------------------------------------------===// - -#include "JITDwarfEmitter.h" -#include "JIT.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/IR/DataLayout.h" -#include "llvm/IR/Function.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCSymbol.h" -#include "llvm/MC/MachineLocation.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -using namespace llvm; - -JITDwarfEmitter::JITDwarfEmitter(JIT& theJit) : MMI(0), Jit(theJit) {} - - -unsigned char* JITDwarfEmitter::EmitDwarfTable(MachineFunction& F, - JITCodeEmitter& jce, - unsigned char* StartFunction, - unsigned char* EndFunction, - unsigned char* &EHFramePtr) { - assert(MMI && "MachineModuleInfo not registered!"); - - const TargetMachine& TM = F.getTarget(); - TD = TM.getDataLayout(); - stackGrowthDirection = TM.getFrameLowering()->getStackGrowthDirection(); - RI = TM.getRegisterInfo(); - MAI = TM.getMCAsmInfo(); - JCE = &jce; - - unsigned char* ExceptionTable = EmitExceptionTable(&F, StartFunction, - EndFunction); - - unsigned char* Result = 0; - - const std::vector Personalities = MMI->getPersonalities(); - EHFramePtr = EmitCommonEHFrame(Personalities[MMI->getPersonalityIndex()]); - - Result = EmitEHFrame(Personalities[MMI->getPersonalityIndex()], EHFramePtr, - StartFunction, EndFunction, ExceptionTable); - - return Result; -} - - -void -JITDwarfEmitter::EmitFrameMoves(intptr_t BaseLabelPtr, - const std::vector &Moves) const { - unsigned PointerSize = TD->getPointerSize(); - int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ? - PointerSize : -PointerSize; - MCSymbol *BaseLabel = 0; - - for (unsigned i = 0, N = Moves.size(); i < N; ++i) { - const MachineMove &Move = Moves[i]; - MCSymbol *Label = Move.getLabel(); - - // Throw out move if the label is invalid. - if (Label && (*JCE->getLabelLocations())[Label] == 0) - continue; - - intptr_t LabelPtr = 0; - if (Label) LabelPtr = JCE->getLabelAddress(Label); - - const MachineLocation &Dst = Move.getDestination(); - const MachineLocation &Src = Move.getSource(); - - // Advance row if new location. - if (BaseLabelPtr && Label && BaseLabel != Label) { - JCE->emitByte(dwarf::DW_CFA_advance_loc4); - JCE->emitInt32(LabelPtr - BaseLabelPtr); - - BaseLabel = Label; - BaseLabelPtr = LabelPtr; - } - - // If advancing cfa. - if (Dst.isReg() && Dst.getReg() == MachineLocation::VirtualFP) { - if (!Src.isReg()) { - if (Src.getReg() == MachineLocation::VirtualFP) { - JCE->emitByte(dwarf::DW_CFA_def_cfa_offset); - } else { - JCE->emitByte(dwarf::DW_CFA_def_cfa); - JCE->emitULEB128Bytes(RI->getDwarfRegNum(Src.getReg(), true)); - } - - JCE->emitULEB128Bytes(-Src.getOffset()); - } else { - llvm_unreachable("Machine move not supported yet."); - } - } else if (Src.isReg() && - Src.getReg() == MachineLocation::VirtualFP) { - if (Dst.isReg()) { - JCE->emitByte(dwarf::DW_CFA_def_cfa_register); - JCE->emitULEB128Bytes(RI->getDwarfRegNum(Dst.getReg(), true)); - } else { - llvm_unreachable("Machine move not supported yet."); - } - } else { - unsigned Reg = RI->getDwarfRegNum(Src.getReg(), true); - int Offset = Dst.getOffset() / stackGrowth; - - if (Offset < 0) { - JCE->emitByte(dwarf::DW_CFA_offset_extended_sf); - JCE->emitULEB128Bytes(Reg); - JCE->emitSLEB128Bytes(Offset); - } else if (Reg < 64) { - JCE->emitByte(dwarf::DW_CFA_offset + Reg); - JCE->emitULEB128Bytes(Offset); - } else { - JCE->emitByte(dwarf::DW_CFA_offset_extended); - JCE->emitULEB128Bytes(Reg); - JCE->emitULEB128Bytes(Offset); - } - } - } -} - -/// SharedTypeIds - How many leading type ids two landing pads have in common. -static unsigned SharedTypeIds(const LandingPadInfo *L, - const LandingPadInfo *R) { - const std::vector &LIds = L->TypeIds, &RIds = R->TypeIds; - unsigned LSize = LIds.size(), RSize = RIds.size(); - unsigned MinSize = LSize < RSize ? LSize : RSize; - unsigned Count = 0; - - for (; Count != MinSize; ++Count) - if (LIds[Count] != RIds[Count]) - return Count; - - return Count; -} - - -/// PadLT - Order landing pads lexicographically by type id. -static bool PadLT(const LandingPadInfo *L, const LandingPadInfo *R) { - const std::vector &LIds = L->TypeIds, &RIds = R->TypeIds; - unsigned LSize = LIds.size(), RSize = RIds.size(); - unsigned MinSize = LSize < RSize ? LSize : RSize; - - for (unsigned i = 0; i != MinSize; ++i) - if (LIds[i] != RIds[i]) - return LIds[i] < RIds[i]; - - return LSize < RSize; -} - -namespace { - -/// ActionEntry - Structure describing an entry in the actions table. -struct ActionEntry { - int ValueForTypeID; // The value to write - may not be equal to the type id. - int NextAction; - struct ActionEntry *Previous; -}; - -/// PadRange - Structure holding a try-range and the associated landing pad. -struct PadRange { - // The index of the landing pad. - unsigned PadIndex; - // The index of the begin and end labels in the landing pad's label lists. - unsigned RangeIndex; -}; - -typedef DenseMap RangeMapType; - -/// CallSiteEntry - Structure describing an entry in the call-site table. -struct CallSiteEntry { - MCSymbol *BeginLabel; // zero indicates the start of the function. - MCSymbol *EndLabel; // zero indicates the end of the function. - MCSymbol *PadLabel; // zero indicates that there is no landing pad. - unsigned Action; -}; - -} - -unsigned char* JITDwarfEmitter::EmitExceptionTable(MachineFunction* MF, - unsigned char* StartFunction, - unsigned char* EndFunction) const { - assert(MMI && "MachineModuleInfo not registered!"); - - // Map all labels and get rid of any dead landing pads. - MMI->TidyLandingPads(JCE->getLabelLocations()); - - const std::vector &TypeInfos = MMI->getTypeInfos(); - const std::vector &FilterIds = MMI->getFilterIds(); - const std::vector &PadInfos = MMI->getLandingPads(); - if (PadInfos.empty()) return 0; - - // Sort the landing pads in order of their type ids. This is used to fold - // duplicate actions. - SmallVector LandingPads; - LandingPads.reserve(PadInfos.size()); - for (unsigned i = 0, N = PadInfos.size(); i != N; ++i) - LandingPads.push_back(&PadInfos[i]); - std::sort(LandingPads.begin(), LandingPads.end(), PadLT); - - // Negative type ids index into FilterIds, positive type ids index into - // TypeInfos. The value written for a positive type id is just the type - // id itself. For a negative type id, however, the value written is the - // (negative) byte offset of the corresponding FilterIds entry. The byte - // offset is usually equal to the type id, because the FilterIds entries - // are written using a variable width encoding which outputs one byte per - // entry as long as the value written is not too large, but can differ. - // This kind of complication does not occur for positive type ids because - // type infos are output using a fixed width encoding. - // FilterOffsets[i] holds the byte offset corresponding to FilterIds[i]. - SmallVector FilterOffsets; - FilterOffsets.reserve(FilterIds.size()); - int Offset = -1; - for(std::vector::const_iterator I = FilterIds.begin(), - E = FilterIds.end(); I != E; ++I) { - FilterOffsets.push_back(Offset); - Offset -= MCAsmInfo::getULEB128Size(*I); - } - - // Compute the actions table and gather the first action index for each - // landing pad site. - SmallVector Actions; - SmallVector FirstActions; - FirstActions.reserve(LandingPads.size()); - - int FirstAction = 0; - unsigned SizeActions = 0; - for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { - const LandingPadInfo *LP = LandingPads[i]; - const std::vector &TypeIds = LP->TypeIds; - const unsigned NumShared = i ? SharedTypeIds(LP, LandingPads[i-1]) : 0; - unsigned SizeSiteActions = 0; - - if (NumShared < TypeIds.size()) { - unsigned SizeAction = 0; - ActionEntry *PrevAction = 0; - - if (NumShared) { - const unsigned SizePrevIds = LandingPads[i-1]->TypeIds.size(); - assert(Actions.size()); - PrevAction = &Actions.back(); - SizeAction = MCAsmInfo::getSLEB128Size(PrevAction->NextAction) + - MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); - for (unsigned j = NumShared; j != SizePrevIds; ++j) { - SizeAction -= MCAsmInfo::getSLEB128Size(PrevAction->ValueForTypeID); - SizeAction += -PrevAction->NextAction; - PrevAction = PrevAction->Previous; - } - } - - // Compute the actions. - for (unsigned I = NumShared, M = TypeIds.size(); I != M; ++I) { - int TypeID = TypeIds[I]; - assert(-1-TypeID < (int)FilterOffsets.size() && "Unknown filter id!"); - int ValueForTypeID = TypeID < 0 ? FilterOffsets[-1 - TypeID] : TypeID; - unsigned SizeTypeID = MCAsmInfo::getSLEB128Size(ValueForTypeID); - - int NextAction = SizeAction ? -(SizeAction + SizeTypeID) : 0; - SizeAction = SizeTypeID + MCAsmInfo::getSLEB128Size(NextAction); - SizeSiteActions += SizeAction; - - ActionEntry Action = {ValueForTypeID, NextAction, PrevAction}; - Actions.push_back(Action); - - PrevAction = &Actions.back(); - } - - // Record the first action of the landing pad site. - FirstAction = SizeActions + SizeSiteActions - SizeAction + 1; - } // else identical - re-use previous FirstAction - - FirstActions.push_back(FirstAction); - - // Compute this sites contribution to size. - SizeActions += SizeSiteActions; - } - - // Compute the call-site table. Entries must be ordered by address. - SmallVector CallSites; - - RangeMapType PadMap; - for (unsigned i = 0, N = LandingPads.size(); i != N; ++i) { - const LandingPadInfo *LandingPad = LandingPads[i]; - for (unsigned j=0, E = LandingPad->BeginLabels.size(); j != E; ++j) { - MCSymbol *BeginLabel = LandingPad->BeginLabels[j]; - assert(!PadMap.count(BeginLabel) && "Duplicate landing pad labels!"); - PadRange P = { i, j }; - PadMap[BeginLabel] = P; - } - } - - bool MayThrow = false; - MCSymbol *LastLabel = 0; - for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); - I != E; ++I) { - for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end(); - MI != E; ++MI) { - if (!MI->isLabel()) { - MayThrow |= MI->isCall(); - continue; - } - - MCSymbol *BeginLabel = MI->getOperand(0).getMCSymbol(); - assert(BeginLabel && "Invalid label!"); - - if (BeginLabel == LastLabel) - MayThrow = false; - - RangeMapType::iterator L = PadMap.find(BeginLabel); - - if (L == PadMap.end()) - continue; - - PadRange P = L->second; - const LandingPadInfo *LandingPad = LandingPads[P.PadIndex]; - - assert(BeginLabel == LandingPad->BeginLabels[P.RangeIndex] && - "Inconsistent landing pad map!"); - - // If some instruction between the previous try-range and this one may - // throw, create a call-site entry with no landing pad for the region - // between the try-ranges. - if (MayThrow) { - CallSiteEntry Site = {LastLabel, BeginLabel, 0, 0}; - CallSites.push_back(Site); - } - - LastLabel = LandingPad->EndLabels[P.RangeIndex]; - CallSiteEntry Site = {BeginLabel, LastLabel, - LandingPad->LandingPadLabel, FirstActions[P.PadIndex]}; - - assert(Site.BeginLabel && Site.EndLabel && Site.PadLabel && - "Invalid landing pad!"); - - // Try to merge with the previous call-site. - if (CallSites.size()) { - CallSiteEntry &Prev = CallSites.back(); - if (Site.PadLabel == Prev.PadLabel && Site.Action == Prev.Action) { - // Extend the range of the previous entry. - Prev.EndLabel = Site.EndLabel; - continue; - } - } - - // Otherwise, create a new call-site. - CallSites.push_back(Site); - } - } - // If some instruction between the previous try-range and the end of the - // function may throw, create a call-site entry with no landing pad for the - // region following the try-range. - if (MayThrow) { - CallSiteEntry Site = {LastLabel, 0, 0, 0}; - CallSites.push_back(Site); - } - - // Final tallies. - unsigned SizeSites = CallSites.size() * (sizeof(int32_t) + // Site start. - sizeof(int32_t) + // Site length. - sizeof(int32_t)); // Landing pad. - for (unsigned i = 0, e = CallSites.size(); i < e; ++i) - SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action); - - unsigned SizeTypes = TypeInfos.size() * TD->getPointerSize(); - - unsigned TypeOffset = sizeof(int8_t) + // Call site format - // Call-site table length - MCAsmInfo::getULEB128Size(SizeSites) + - SizeSites + SizeActions + SizeTypes; - - // Begin the exception table. - JCE->emitAlignmentWithFill(4, 0); - // Asm->EOL("Padding"); - - unsigned char* DwarfExceptionTable = (unsigned char*)JCE->getCurrentPCValue(); - - // Emit the header. - JCE->emitByte(dwarf::DW_EH_PE_omit); - // Asm->EOL("LPStart format (DW_EH_PE_omit)"); - JCE->emitByte(dwarf::DW_EH_PE_absptr); - // Asm->EOL("TType format (DW_EH_PE_absptr)"); - JCE->emitULEB128Bytes(TypeOffset); - // Asm->EOL("TType base offset"); - JCE->emitByte(dwarf::DW_EH_PE_udata4); - // Asm->EOL("Call site format (DW_EH_PE_udata4)"); - JCE->emitULEB128Bytes(SizeSites); - // Asm->EOL("Call-site table length"); - - // Emit the landing pad site information. - for (unsigned i = 0; i < CallSites.size(); ++i) { - CallSiteEntry &S = CallSites[i]; - intptr_t BeginLabelPtr = 0; - intptr_t EndLabelPtr = 0; - - if (!S.BeginLabel) { - BeginLabelPtr = (intptr_t)StartFunction; - JCE->emitInt32(0); - } else { - BeginLabelPtr = JCE->getLabelAddress(S.BeginLabel); - JCE->emitInt32(BeginLabelPtr - (intptr_t)StartFunction); - } - - // Asm->EOL("Region start"); - - if (!S.EndLabel) - EndLabelPtr = (intptr_t)EndFunction; - else - EndLabelPtr = JCE->getLabelAddress(S.EndLabel); - - JCE->emitInt32(EndLabelPtr - BeginLabelPtr); - //Asm->EOL("Region length"); - - if (!S.PadLabel) { - JCE->emitInt32(0); - } else { - unsigned PadLabelPtr = JCE->getLabelAddress(S.PadLabel); - JCE->emitInt32(PadLabelPtr - (intptr_t)StartFunction); - } - // Asm->EOL("Landing pad"); - - JCE->emitULEB128Bytes(S.Action); - // Asm->EOL("Action"); - } - - // Emit the actions. - for (unsigned I = 0, N = Actions.size(); I != N; ++I) { - ActionEntry &Action = Actions[I]; - - JCE->emitSLEB128Bytes(Action.ValueForTypeID); - //Asm->EOL("TypeInfo index"); - JCE->emitSLEB128Bytes(Action.NextAction); - //Asm->EOL("Next action"); - } - - // Emit the type ids. - for (unsigned M = TypeInfos.size(); M; --M) { - const GlobalVariable *GV = TypeInfos[M - 1]; - - if (GV) { - if (TD->getPointerSize() == sizeof(int32_t)) - JCE->emitInt32((intptr_t)Jit.getOrEmitGlobalVariable(GV)); - else - JCE->emitInt64((intptr_t)Jit.getOrEmitGlobalVariable(GV)); - } else { - if (TD->getPointerSize() == sizeof(int32_t)) - JCE->emitInt32(0); - else - JCE->emitInt64(0); - } - // Asm->EOL("TypeInfo"); - } - - // Emit the filter typeids. - for (unsigned j = 0, M = FilterIds.size(); j < M; ++j) { - unsigned TypeID = FilterIds[j]; - JCE->emitULEB128Bytes(TypeID); - //Asm->EOL("Filter TypeInfo index"); - } - - JCE->emitAlignmentWithFill(4, 0); - - return DwarfExceptionTable; -} - -unsigned char* -JITDwarfEmitter::EmitCommonEHFrame(const Function* Personality) const { - unsigned PointerSize = TD->getPointerSize(); - int stackGrowth = stackGrowthDirection == TargetFrameLowering::StackGrowsUp ? - PointerSize : -PointerSize; - - unsigned char* StartCommonPtr = (unsigned char*)JCE->getCurrentPCValue(); - // EH Common Frame header - JCE->allocateSpace(4, 0); - unsigned char* FrameCommonBeginPtr = (unsigned char*)JCE->getCurrentPCValue(); - JCE->emitInt32((int)0); - JCE->emitByte(dwarf::DW_CIE_VERSION); - JCE->emitString(Personality ? "zPLR" : "zR"); - JCE->emitULEB128Bytes(1); - JCE->emitSLEB128Bytes(stackGrowth); - JCE->emitByte(RI->getDwarfRegNum(RI->getRARegister(), true)); - - if (Personality) { - // Augmentation Size: 3 small ULEBs of one byte each, and the personality - // function which size is PointerSize. - JCE->emitULEB128Bytes(3 + PointerSize); - - // We set the encoding of the personality as direct encoding because we use - // the function pointer. The encoding is not relative because the current - // PC value may be bigger than the personality function pointer. - if (PointerSize == 4) { - JCE->emitByte(dwarf::DW_EH_PE_sdata4); - JCE->emitInt32(((intptr_t)Jit.getPointerToGlobal(Personality))); - } else { - JCE->emitByte(dwarf::DW_EH_PE_sdata8); - JCE->emitInt64(((intptr_t)Jit.getPointerToGlobal(Personality))); - } - - // LSDA encoding: This must match the encoding used in EmitEHFrame () - if (PointerSize == 4) - JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - else - JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata8); - JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - } else { - JCE->emitULEB128Bytes(1); - JCE->emitULEB128Bytes(dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_sdata4); - } - - EmitFrameMoves(0, MAI->getInitialFrameState()); - - JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop); - - JCE->emitInt32At((uintptr_t*)StartCommonPtr, - (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() - - FrameCommonBeginPtr)); - - return StartCommonPtr; -} - - -unsigned char* -JITDwarfEmitter::EmitEHFrame(const Function* Personality, - unsigned char* StartCommonPtr, - unsigned char* StartFunction, - unsigned char* EndFunction, - unsigned char* ExceptionTable) const { - unsigned PointerSize = TD->getPointerSize(); - - // EH frame header. - unsigned char* StartEHPtr = (unsigned char*)JCE->getCurrentPCValue(); - JCE->allocateSpace(4, 0); - unsigned char* FrameBeginPtr = (unsigned char*)JCE->getCurrentPCValue(); - // FDE CIE Offset - JCE->emitInt32(FrameBeginPtr - StartCommonPtr); - JCE->emitInt32(StartFunction - (unsigned char*)JCE->getCurrentPCValue()); - JCE->emitInt32(EndFunction - StartFunction); - - // If there is a personality and landing pads then point to the language - // specific data area in the exception table. - if (Personality) { - JCE->emitULEB128Bytes(PointerSize == 4 ? 4 : 8); - - if (PointerSize == 4) { - if (!MMI->getLandingPads().empty()) - JCE->emitInt32(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue()); - else - JCE->emitInt32((int)0); - } else { - if (!MMI->getLandingPads().empty()) - JCE->emitInt64(ExceptionTable-(unsigned char*)JCE->getCurrentPCValue()); - else - JCE->emitInt64((int)0); - } - } else { - JCE->emitULEB128Bytes(0); - } - - // Indicate locations of function specific callee saved registers in - // frame. - EmitFrameMoves((intptr_t)StartFunction, MMI->getFrameMoves()); - - JCE->emitAlignmentWithFill(PointerSize, dwarf::DW_CFA_nop); - - // Indicate the size of the table - JCE->emitInt32At((uintptr_t*)StartEHPtr, - (uintptr_t)((unsigned char*)JCE->getCurrentPCValue() - - StartEHPtr)); - - // Double zeroes for the unwind runtime - if (PointerSize == 8) { - JCE->emitInt64(0); - JCE->emitInt64(0); - } else { - JCE->emitInt32(0); - JCE->emitInt32(0); - } - - return StartEHPtr; -} diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h deleted file mode 100644 index 98ac340..0000000 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITDwarfEmitter.h +++ /dev/null @@ -1,77 +0,0 @@ -//===------ JITDwarfEmitter.h - Write dwarf tables into memory ------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines a JITDwarfEmitter object that is used by the JIT to -// write dwarf tables to memory. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H -#define LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H - -#include "llvm/Support/DataTypes.h" -#include - -namespace llvm { - -class Function; -class JIT; -class JITCodeEmitter; -class MachineFunction; -class MachineModuleInfo; -class MachineMove; -class MCAsmInfo; -class DataLayout; -class TargetMachine; -class TargetRegisterInfo; - -class JITDwarfEmitter { - const DataLayout* TD; - JITCodeEmitter* JCE; - const TargetRegisterInfo* RI; - const MCAsmInfo *MAI; - MachineModuleInfo* MMI; - JIT& Jit; - bool stackGrowthDirection; - - unsigned char* EmitExceptionTable(MachineFunction* MF, - unsigned char* StartFunction, - unsigned char* EndFunction) const; - - void EmitFrameMoves(intptr_t BaseLabelPtr, - const std::vector &Moves) const; - - unsigned char* EmitCommonEHFrame(const Function* Personality) const; - - unsigned char* EmitEHFrame(const Function* Personality, - unsigned char* StartBufferPtr, - unsigned char* StartFunction, - unsigned char* EndFunction, - unsigned char* ExceptionTable) const; - -public: - - JITDwarfEmitter(JIT& jit); - - unsigned char* EmitDwarfTable(MachineFunction& F, - JITCodeEmitter& JCE, - unsigned char* StartFunction, - unsigned char* EndFunction, - unsigned char* &EHFramePtr); - - - void setModuleInfo(MachineModuleInfo* Info) { - MMI = Info; - } -}; - - -} // end namespace llvm - -#endif // LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp index c273876..acbbfa1 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "jit" #include "JIT.h" -#include "JITDwarfEmitter.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallPtrSet.h" @@ -325,9 +324,6 @@ namespace { /// Resolver - This contains info about the currently resolved functions. JITResolver Resolver; - /// DE - The dwarf emitter for the jit. - OwningPtr DE; - /// LabelLocations - This vector is a mapping from Label ID's to their /// address. DenseMap LabelLocations; @@ -363,22 +359,16 @@ namespace { /// Instance of the JIT JIT *TheJIT; - bool JITExceptionHandling; - public: JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM) : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0), - EmittedFunctions(this), TheJIT(&jit), - JITExceptionHandling(TM.Options.JITExceptionHandling) { + EmittedFunctions(this), TheJIT(&jit) { MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager(); if (jit.getJITInfo().needsGOT()) { MemMgr->AllocateGOT(); DEBUG(dbgs() << "JIT is managing a GOT\n"); } - if (JITExceptionHandling) { - DE.reset(new JITDwarfEmitter(jit)); - } } ~JITEmitter() { delete MemMgr; @@ -460,7 +450,6 @@ namespace { virtual void setModuleInfo(MachineModuleInfo* Info) { MMI = Info; - if (DE.get()) DE->setModuleInfo(Info); } private: @@ -964,40 +953,6 @@ bool JITEmitter::finishFunction(MachineFunction &F) { } }); - if (JITExceptionHandling) { - uintptr_t ActualSize = 0; - SavedBufferBegin = BufferBegin; - SavedBufferEnd = BufferEnd; - SavedCurBufferPtr = CurBufferPtr; - uint8_t *FrameRegister; - - while (true) { - BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(), - ActualSize); - BufferEnd = BufferBegin+ActualSize; - EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin; - uint8_t *EhStart; - FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd, EhStart); - - // If the buffer was large enough to hold the table then we are done. - if (CurBufferPtr != BufferEnd) - break; - - // Try again with twice as much space. - ActualSize = (CurBufferPtr - BufferBegin) * 2; - MemMgr->deallocateExceptionTable(BufferBegin); - } - MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr, - FrameRegister); - BufferBegin = SavedBufferBegin; - BufferEnd = SavedBufferEnd; - CurBufferPtr = SavedCurBufferPtr; - - if (JITExceptionHandling) { - TheJIT->RegisterTable(F.getFunction(), FrameRegister); - } - } - if (MMI) MMI->EndFunction(); @@ -1027,15 +982,10 @@ void JITEmitter::deallocateMemForFunction(const Function *F) { Emitted = EmittedFunctions.find(F); if (Emitted != EmittedFunctions.end()) { MemMgr->deallocateFunctionBody(Emitted->second.FunctionBody); - MemMgr->deallocateExceptionTable(Emitted->second.ExceptionTable); TheJIT->NotifyFreeingMachineCode(Emitted->second.Code); EmittedFunctions.erase(Emitted); } - - if (JITExceptionHandling) { - TheJIT->DeregisterTable(F); - } } diff --git a/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 66aeb77..f58d31b 100644 --- a/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/contrib/llvm/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -464,11 +464,15 @@ namespace { /// allocateCodeSection - Allocate memory for a code section. uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID) { + unsigned SectionID, StringRef SectionName) { // Grow the required block size to account for the block header Size += sizeof(*CurBlock); - // FIXME: Alignement handling. + // Alignment handling. + if (!Alignment) + Alignment = 16; + Size += Alignment - 1; + FreeRangeHeader* candidateBlock = FreeMemoryList; FreeRangeHeader* head = FreeMemoryList; FreeRangeHeader* iter = head->Next; @@ -500,39 +504,21 @@ namespace { FreeMemoryList = candidateBlock->AllocateBlock(); // Release the memory at the end of this block that isn't needed. FreeMemoryList = CurBlock->TrimAllocationToSize(FreeMemoryList, Size); - return (uint8_t *)(CurBlock + 1); + uintptr_t unalignedAddr = (uintptr_t)CurBlock + sizeof(*CurBlock); + return (uint8_t*)RoundUpToAlignment((uint64_t)unalignedAddr, Alignment); } /// allocateDataSection - Allocate memory for a data section. uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID, bool IsReadOnly) { + unsigned SectionID, StringRef SectionName, + bool IsReadOnly) { return (uint8_t*)DataAllocator.Allocate(Size, Alignment); } - bool applyPermissions(std::string *ErrMsg) { + bool finalizeMemory(std::string *ErrMsg) { return false; } - /// startExceptionTable - Use startFunctionBody to allocate memory for the - /// function's exception table. - uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) { - return startFunctionBody(F, ActualSize); - } - - /// endExceptionTable - The exception table of F is now allocated, - /// and takes the memory in the range [TableStart,TableEnd). - void endExceptionTable(const Function *F, uint8_t *TableStart, - uint8_t *TableEnd, uint8_t* FrameRegister) { - assert(TableEnd > TableStart); - assert(TableStart == (uint8_t *)(CurBlock+1) && - "Mismatched table start/end!"); - - uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock; - - // Release the memory at the end of this block that isn't needed. - FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize); - } - uint8_t *getGOTBase() const { return GOTBase; } @@ -557,12 +543,6 @@ namespace { if (Body) deallocateBlock(Body); } - /// deallocateExceptionTable - Deallocate memory for the specified - /// exception table. - void deallocateExceptionTable(void *ET) { - if (ET) deallocateBlock(ET); - } - /// setMemoryWritable - When code generation is in progress, /// the code pages may need permissions changed. void setMemoryWritable() @@ -814,7 +794,7 @@ static void runAtExitHandlers() { // not inlined, and hiding their real definitions in a separate archive file // that the dynamic linker can't see. For more info, search for // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. -#if defined(__linux__) +#if defined(__linux__) && defined(__GLIBC__) /* stat functions are redirecting to __xstat with a version number. On x86-64 * linking with libc_nonshared.a and -Wl,--export-dynamic doesn't make 'stat' * available as an exported symbol, so we have to add it explicitly. diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 38aa547..195c458 100644 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -14,10 +14,12 @@ #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/ObjectBuffer.h" #include "llvm/ExecutionEngine/ObjectImage.h" +#include "llvm/PassManager.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/Function.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/ErrorHandling.h" @@ -39,7 +41,7 @@ extern "C" void LLVMLinkInMCJIT() { ExecutionEngine *MCJIT::createJIT(Module *M, std::string *ErrorStr, - JITMemoryManager *JMM, + RTDyldMemoryManager *MemMgr, bool GVsWithCode, TargetMachine *TM) { // Try to register the program as a source of symbols to resolve against. @@ -47,43 +49,69 @@ ExecutionEngine *MCJIT::createJIT(Module *M, // FIXME: Don't do this here. sys::DynamicLibrary::LoadLibraryPermanently(0, NULL); - return new MCJIT(M, TM, JMM ? JMM : new SectionMemoryManager(), GVsWithCode); + return new MCJIT(M, TM, MemMgr ? MemMgr : new SectionMemoryManager(), + GVsWithCode); } MCJIT::MCJIT(Module *m, TargetMachine *tm, RTDyldMemoryManager *MM, bool AllocateGVsWithCode) - : ExecutionEngine(m), TM(tm), Ctx(0), - MemMgr(MM ? MM : new SectionMemoryManager()), Dyld(MemMgr), - IsLoaded(false), M(m), ObjCache(0) { + : ExecutionEngine(m), TM(tm), Ctx(0), MemMgr(this, MM), Dyld(&MemMgr), + ObjCache(0) { + OwnedModules.addModule(m); setDataLayout(TM->getDataLayout()); } MCJIT::~MCJIT() { - if (LoadedObject) - NotifyFreeingObject(*LoadedObject.get()); - delete MemMgr; + MutexGuard locked(lock); + // FIXME: We are managing our modules, so we do not want the base class + // ExecutionEngine to manage them as well. To avoid double destruction + // of the first (and only) module added in ExecutionEngine constructor + // we remove it from EE and will destruct it ourselves. + // + // It may make sense to move our module manager (based on SmallStPtr) back + // into EE if the JIT and Interpreter can live with it. + // If so, additional functions: addModule, removeModule, FindFunctionNamed, + // runStaticConstructorsDestructors could be moved back to EE as well. + // + Modules.clear(); + Dyld.deregisterEHFrames(); + + LoadedObjectMap::iterator it, end = LoadedObjects.end(); + for (it = LoadedObjects.begin(); it != end; ++it) { + ObjectImage *Obj = it->second; + if (Obj) { + NotifyFreeingObject(*Obj); + delete Obj; + } + } + LoadedObjects.clear(); delete TM; } +void MCJIT::addModule(Module *M) { + MutexGuard locked(lock); + OwnedModules.addModule(M); +} + +bool MCJIT::removeModule(Module *M) { + MutexGuard locked(lock); + return OwnedModules.removeModule(M); +} + + + void MCJIT::setObjectCache(ObjectCache* NewCache) { + MutexGuard locked(lock); ObjCache = NewCache; } -ObjectBufferStream* MCJIT::emitObject(Module *m) { - /// Currently, MCJIT only supports a single module and the module passed to - /// this function call is expected to be the contained module. The module - /// is passed as a parameter here to prepare for multiple module support in - /// the future. - assert(M == m); - - // Get a thread lock to make sure we aren't trying to compile multiple times +ObjectBufferStream* MCJIT::emitObject(Module *M) { MutexGuard locked(lock); - // FIXME: Track compilation state on a per-module basis when multiple modules - // are supported. - // Re-compilation is not supported - assert(!IsLoaded); + // This must be a module which has already been added but not loaded to this + // MCJIT instance, since these conditions are tested by our caller, + // generateCodeForModule. PassManager PM; @@ -99,7 +127,7 @@ ObjectBufferStream* MCJIT::emitObject(Module *m) { } // Initialize passes. - PM.run(*m); + PM.run(*M); // Flush the output buffer to get the generated code into memory CompiledObject->flush(); @@ -109,27 +137,28 @@ ObjectBufferStream* MCJIT::emitObject(Module *m) { // MemoryBuffer is a thin wrapper around the actual memory, so it's OK // to create a temporary object here and delete it after the call. OwningPtr MB(CompiledObject->getMemBuffer()); - ObjCache->notifyObjectCompiled(m, MB.get()); + ObjCache->notifyObjectCompiled(M, MB.get()); } return CompiledObject.take(); } -void MCJIT::loadObject(Module *M) { - +void MCJIT::generateCodeForModule(Module *M) { // Get a thread lock to make sure we aren't trying to load multiple times MutexGuard locked(lock); - // FIXME: Track compilation state on a per-module basis when multiple modules - // are supported. + // This must be a module which has already been added to this MCJIT instance. + assert(OwnedModules.ownsModule(M) && + "MCJIT::generateCodeForModule: Unknown module."); + // Re-compilation is not supported - if (IsLoaded) + if (OwnedModules.hasModuleBeenLoaded(M)) return; OwningPtr ObjectToLoad; // Try to load the pre-compiled object from cache if possible if (0 != ObjCache) { - OwningPtr PreCompiledObject(ObjCache->getObjectCopy(M)); + OwningPtr PreCompiledObject(ObjCache->getObject(M)); if (0 != PreCompiledObject.get()) ObjectToLoad.reset(new ObjectBuffer(PreCompiledObject.take())); } @@ -141,59 +170,137 @@ void MCJIT::loadObject(Module *M) { } // Load the object into the dynamic linker. - // handing off ownership of the buffer - LoadedObject.reset(Dyld.loadObject(ObjectToLoad.take())); + // MCJIT now owns the ObjectImage pointer (via its LoadedObjects map). + ObjectImage *LoadedObject = Dyld.loadObject(ObjectToLoad.take()); + LoadedObjects[M] = LoadedObject; if (!LoadedObject) report_fatal_error(Dyld.getErrorString()); - // Resolve any relocations. - Dyld.resolveRelocations(); - // FIXME: Make this optional, maybe even move it to a JIT event listener LoadedObject->registerWithDebugger(); NotifyObjectEmitted(*LoadedObject); - // FIXME: Add support for per-module compilation state - IsLoaded = true; + OwnedModules.markModuleAsLoaded(M); } -// FIXME: Add a parameter to identify which object is being finalized when -// MCJIT supports multiple modules. -// FIXME: Provide a way to separate code emission, relocations and page -// protection in the interface. +void MCJIT::finalizeLoadedModules() { + MutexGuard locked(lock); + + // Resolve any outstanding relocations. + Dyld.resolveRelocations(); + + OwnedModules.markAllLoadedModulesAsFinalized(); + + // Register EH frame data for any module we own which has been loaded + Dyld.registerEHFrames(); + + // Set page permissions. + MemMgr.finalizeMemory(); +} + +// FIXME: Rename this. void MCJIT::finalizeObject() { - // If the module hasn't been compiled, just do that. - if (!IsLoaded) { - // If the call to Dyld.resolveRelocations() is removed from loadObject() - // we'll need to do that here. - loadObject(M); - } else { - // Resolve any relocations. - Dyld.resolveRelocations(); + MutexGuard locked(lock); + + for (ModulePtrSet::iterator I = OwnedModules.begin_added(), + E = OwnedModules.end_added(); + I != E; ++I) { + Module *M = *I; + generateCodeForModule(M); } - StringRef EHData = Dyld.getEHFrameSection(); - if (!EHData.empty()) - MemMgr->registerEHFrames(EHData); + finalizeLoadedModules(); +} - // Set page permissions. - MemMgr->applyPermissions(); +void MCJIT::finalizeModule(Module *M) { + MutexGuard locked(lock); + + // This must be a module which has already been added to this MCJIT instance. + assert(OwnedModules.ownsModule(M) && "MCJIT::finalizeModule: Unknown module."); + + // If the module hasn't been compiled, just do that. + if (!OwnedModules.hasModuleBeenLoaded(M)) + generateCodeForModule(M); + + finalizeLoadedModules(); } void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { report_fatal_error("not yet implemented"); } -void *MCJIT::getPointerToFunction(Function *F) { - // FIXME: This should really return a uint64_t since it's a pointer in the - // target address space, not our local address space. That's part of the - // ExecutionEngine interface, though. Fix that when the old JIT finally - // dies. +uint64_t MCJIT::getExistingSymbolAddress(const std::string &Name) { + // Check with the RuntimeDyld to see if we already have this symbol. + if (Name[0] == '\1') + return Dyld.getSymbolLoadAddress(Name.substr(1)); + return Dyld.getSymbolLoadAddress((TM->getMCAsmInfo()->getGlobalPrefix() + + Name)); +} + +Module *MCJIT::findModuleForSymbol(const std::string &Name, + bool CheckFunctionsOnly) { + MutexGuard locked(lock); + + // If it hasn't already been generated, see if it's in one of our modules. + for (ModulePtrSet::iterator I = OwnedModules.begin_added(), + E = OwnedModules.end_added(); + I != E; ++I) { + Module *M = *I; + Function *F = M->getFunction(Name); + if (F && !F->isDeclaration()) + return M; + if (!CheckFunctionsOnly) { + GlobalVariable *G = M->getGlobalVariable(Name); + if (G && !G->isDeclaration()) + return M; + // FIXME: Do we need to worry about global aliases? + } + } + // We didn't find the symbol in any of our modules. + return NULL; +} + +uint64_t MCJIT::getSymbolAddress(const std::string &Name, + bool CheckFunctionsOnly) +{ + MutexGuard locked(lock); + + // First, check to see if we already have this symbol. + uint64_t Addr = getExistingSymbolAddress(Name); + if (Addr) + return Addr; + + // If it hasn't already been generated, see if it's in one of our modules. + Module *M = findModuleForSymbol(Name, CheckFunctionsOnly); + if (!M) + return 0; + + generateCodeForModule(M); + + // Check the RuntimeDyld table again, it should be there now. + return getExistingSymbolAddress(Name); +} - // FIXME: Add support for per-module compilation state - if (!IsLoaded) - loadObject(M); +uint64_t MCJIT::getGlobalValueAddress(const std::string &Name) { + MutexGuard locked(lock); + uint64_t Result = getSymbolAddress(Name, false); + if (Result != 0) + finalizeLoadedModules(); + return Result; +} + +uint64_t MCJIT::getFunctionAddress(const std::string &Name) { + MutexGuard locked(lock); + uint64_t Result = getSymbolAddress(Name, true); + if (Result != 0) + finalizeLoadedModules(); + return Result; +} + +// Deprecated. Use getFunctionAddress instead. +void *MCJIT::getPointerToFunction(Function *F) { + MutexGuard locked(lock); if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) { bool AbortOnFailure = !F->hasExternalWeakLinkage(); @@ -202,6 +309,16 @@ void *MCJIT::getPointerToFunction(Function *F) { return Addr; } + Module *M = F->getParent(); + bool HasBeenAddedButNotLoaded = OwnedModules.hasModuleBeenAddedButNotLoaded(M); + + // Make sure the relevant module has been compiled and loaded. + if (HasBeenAddedButNotLoaded) + generateCodeForModule(M); + else if (!OwnedModules.hasModuleBeenLoaded(M)) + // If this function doesn't belong to one of our modules, we're done. + return NULL; + // FIXME: Should the Dyld be retaining module information? Probably not. // FIXME: Should we be using the mangler for this? Probably. // @@ -222,6 +339,45 @@ void MCJIT::freeMachineCodeForFunction(Function *F) { report_fatal_error("not yet implemented"); } +void MCJIT::runStaticConstructorsDestructorsInModulePtrSet( + bool isDtors, ModulePtrSet::iterator I, ModulePtrSet::iterator E) { + for (; I != E; ++I) { + ExecutionEngine::runStaticConstructorsDestructors(*I, isDtors); + } +} + +void MCJIT::runStaticConstructorsDestructors(bool isDtors) { + // Execute global ctors/dtors for each module in the program. + runStaticConstructorsDestructorsInModulePtrSet( + isDtors, OwnedModules.begin_added(), OwnedModules.end_added()); + runStaticConstructorsDestructorsInModulePtrSet( + isDtors, OwnedModules.begin_loaded(), OwnedModules.end_loaded()); + runStaticConstructorsDestructorsInModulePtrSet( + isDtors, OwnedModules.begin_finalized(), OwnedModules.end_finalized()); +} + +Function *MCJIT::FindFunctionNamedInModulePtrSet(const char *FnName, + ModulePtrSet::iterator I, + ModulePtrSet::iterator E) { + for (; I != E; ++I) { + if (Function *F = (*I)->getFunction(FnName)) + return F; + } + return 0; +} + +Function *MCJIT::FindFunctionNamed(const char *FnName) { + Function *F = FindFunctionNamedInModulePtrSet( + FnName, OwnedModules.begin_added(), OwnedModules.end_added()); + if (!F) + F = FindFunctionNamedInModulePtrSet(FnName, OwnedModules.begin_loaded(), + OwnedModules.end_loaded()); + if (!F) + F = FindFunctionNamedInModulePtrSet(FnName, OwnedModules.begin_finalized(), + OwnedModules.end_finalized()); + return F; +} + GenericValue MCJIT::runFunction(Function *F, const std::vector &ArgValues) { assert(F && "Function *F was null at entry to run()"); @@ -324,12 +480,8 @@ GenericValue MCJIT::runFunction(Function *F, void *MCJIT::getPointerToNamedFunction(const std::string &Name, bool AbortOnFailure) { - // FIXME: Add support for per-module compilation state - if (!IsLoaded) - loadObject(M); - - if (!isSymbolSearchingDisabled() && MemMgr) { - void *ptr = MemMgr->getPointerToNamedFunction(Name, false); + if (!isSymbolSearchingDisabled()) { + void *ptr = MemMgr.getPointerToNamedFunction(Name, false); if (ptr) return ptr; } @@ -365,6 +517,7 @@ void MCJIT::UnregisterJITEventListener(JITEventListener *L) { } void MCJIT::NotifyObjectEmitted(const ObjectImage& Obj) { MutexGuard locked(lock); + MemMgr.notifyObjectLoaded(this, &Obj); for (unsigned I = 0, S = EventListeners.size(); I < S; ++I) { EventListeners[I]->NotifyObjectEmitted(Obj); } @@ -375,3 +528,14 @@ void MCJIT::NotifyFreeingObject(const ObjectImage& Obj) { EventListeners[I]->NotifyFreeingObject(Obj); } } + +uint64_t LinkingMemoryManager::getSymbolAddress(const std::string &Name) { + uint64_t Result = ParentEngine->getSymbolAddress(Name, false); + // If the symbols wasn't found and it begins with an underscore, try again + // without the underscore. + if (!Result && Name[0] == '_') + Result = ParentEngine->getSymbolAddress(Name.substr(1), false); + if (Result) + return Result; + return ClientMM->getSymbolAddress(Name); +} diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h index 8c4bf6e..86b478b 100644 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -10,49 +10,253 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H #define LLVM_LIB_EXECUTIONENGINE_MCJIT_H +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/ObjectCache.h" +#include "llvm/ExecutionEngine/ObjectImage.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "llvm/PassManager.h" +#include "llvm/IR/Module.h" namespace llvm { +class MCJIT; -class ObjectImage; +// This is a helper class that the MCJIT execution engine uses for linking +// functions across modules that it owns. It aggregates the memory manager +// that is passed in to the MCJIT constructor and defers most functionality +// to that object. +class LinkingMemoryManager : public RTDyldMemoryManager { +public: + LinkingMemoryManager(MCJIT *Parent, RTDyldMemoryManager *MM) + : ParentEngine(Parent), ClientMM(MM) {} + + virtual uint64_t getSymbolAddress(const std::string &Name); + + // Functions deferred to client memory manager + virtual uint8_t *allocateCodeSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, StringRef SectionName) { + return ClientMM->allocateCodeSection(Size, Alignment, SectionID, SectionName); + } + + virtual uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, + unsigned SectionID, StringRef SectionName, + bool IsReadOnly) { + return ClientMM->allocateDataSection(Size, Alignment, + SectionID, SectionName, IsReadOnly); + } + + virtual void notifyObjectLoaded(ExecutionEngine *EE, + const ObjectImage *Obj) { + ClientMM->notifyObjectLoaded(EE, Obj); + } + + virtual void registerEHFrames(uint8_t *Addr, uint64_t LoadAddr, size_t Size) { + ClientMM->registerEHFrames(Addr, LoadAddr, Size); + } + + virtual void deregisterEHFrames(uint8_t *Addr, + uint64_t LoadAddr, + size_t Size) { + ClientMM->deregisterEHFrames(Addr, LoadAddr, Size); + } + + virtual bool finalizeMemory(std::string *ErrMsg = 0) { + return ClientMM->finalizeMemory(ErrMsg); + } -// FIXME: This makes all kinds of horrible assumptions for the time being, -// like only having one module, not needing to worry about multi-threading, -// blah blah. Purely in get-it-up-and-limping mode for now. +private: + MCJIT *ParentEngine; + OwningPtr ClientMM; +}; + +// About Module states: added->loaded->finalized. +// +// The purpose of the "added" state is having modules in standby. (added=known +// but not compiled). The idea is that you can add a module to provide function +// definitions but if nothing in that module is referenced by a module in which +// a function is executed (note the wording here because it's not exactly the +// ideal case) then the module never gets compiled. This is sort of lazy +// compilation. +// +// The purpose of the "loaded" state (loaded=compiled and required sections +// copied into local memory but not yet ready for execution) is to have an +// intermediate state wherein clients can remap the addresses of sections, using +// MCJIT::mapSectionAddress, (in preparation for later copying to a new location +// or an external process) before relocations and page permissions are applied. +// +// It might not be obvious at first glance, but the "remote-mcjit" case in the +// lli tool does this. In that case, the intermediate action is taken by the +// RemoteMemoryManager in response to the notifyObjectLoaded function being +// called. class MCJIT : public ExecutionEngine { MCJIT(Module *M, TargetMachine *tm, RTDyldMemoryManager *MemMgr, bool AllocateGVsWithCode); + typedef llvm::SmallPtrSet ModulePtrSet; + + class OwningModuleContainer { + public: + OwningModuleContainer() { + } + ~OwningModuleContainer() { + freeModulePtrSet(AddedModules); + freeModulePtrSet(LoadedModules); + freeModulePtrSet(FinalizedModules); + } + + ModulePtrSet::iterator begin_added() { return AddedModules.begin(); } + ModulePtrSet::iterator end_added() { return AddedModules.end(); } + + ModulePtrSet::iterator begin_loaded() { return LoadedModules.begin(); } + ModulePtrSet::iterator end_loaded() { return LoadedModules.end(); } + + ModulePtrSet::iterator begin_finalized() { return FinalizedModules.begin(); } + ModulePtrSet::iterator end_finalized() { return FinalizedModules.end(); } + + void addModule(Module *M) { + AddedModules.insert(M); + } + + bool removeModule(Module *M) { + return AddedModules.erase(M) || LoadedModules.erase(M) || + FinalizedModules.erase(M); + } + + bool hasModuleBeenAddedButNotLoaded(Module *M) { + return AddedModules.count(M) != 0; + } + + bool hasModuleBeenLoaded(Module *M) { + // If the module is in either the "loaded" or "finalized" sections it + // has been loaded. + return (LoadedModules.count(M) != 0 ) || (FinalizedModules.count(M) != 0); + } + + bool hasModuleBeenFinalized(Module *M) { + return FinalizedModules.count(M) != 0; + } + + bool ownsModule(Module* M) { + return (AddedModules.count(M) != 0) || (LoadedModules.count(M) != 0) || + (FinalizedModules.count(M) != 0); + } + + void markModuleAsLoaded(Module *M) { + // This checks against logic errors in the MCJIT implementation. + // This function should never be called with either a Module that MCJIT + // does not own or a Module that has already been loaded and/or finalized. + assert(AddedModules.count(M) && + "markModuleAsLoaded: Module not found in AddedModules"); + + // Remove the module from the "Added" set. + AddedModules.erase(M); + + // Add the Module to the "Loaded" set. + LoadedModules.insert(M); + } + + void markModuleAsFinalized(Module *M) { + // This checks against logic errors in the MCJIT implementation. + // This function should never be called with either a Module that MCJIT + // does not own, a Module that has not been loaded or a Module that has + // already been finalized. + assert(LoadedModules.count(M) && + "markModuleAsFinalized: Module not found in LoadedModules"); + + // Remove the module from the "Loaded" section of the list. + LoadedModules.erase(M); + + // Add the Module to the "Finalized" section of the list by inserting it + // before the 'end' iterator. + FinalizedModules.insert(M); + } + + void markAllLoadedModulesAsFinalized() { + for (ModulePtrSet::iterator I = LoadedModules.begin(), + E = LoadedModules.end(); + I != E; ++I) { + Module *M = *I; + FinalizedModules.insert(M); + } + LoadedModules.clear(); + } + + private: + ModulePtrSet AddedModules; + ModulePtrSet LoadedModules; + ModulePtrSet FinalizedModules; + + void freeModulePtrSet(ModulePtrSet& MPS) { + // Go through the module set and delete everything. + for (ModulePtrSet::iterator I = MPS.begin(), E = MPS.end(); I != E; ++I) { + Module *M = *I; + delete M; + } + MPS.clear(); + } + }; + TargetMachine *TM; MCContext *Ctx; - RTDyldMemoryManager *MemMgr; + LinkingMemoryManager MemMgr; RuntimeDyld Dyld; SmallVector EventListeners; - // FIXME: Add support for multiple modules - bool IsLoaded; - Module *M; - OwningPtr LoadedObject; + OwningModuleContainer OwnedModules; + + typedef DenseMap LoadedObjectMap; + LoadedObjectMap LoadedObjects; // An optional ObjectCache to be notified of compiled objects and used to // perform lookup of pre-compiled code to avoid re-compilation. ObjectCache *ObjCache; + Function *FindFunctionNamedInModulePtrSet(const char *FnName, + ModulePtrSet::iterator I, + ModulePtrSet::iterator E); + + void runStaticConstructorsDestructorsInModulePtrSet(bool isDtors, + ModulePtrSet::iterator I, + ModulePtrSet::iterator E); + public: ~MCJIT(); /// @name ExecutionEngine interface implementation /// @{ + virtual void addModule(Module *M); + virtual bool removeModule(Module *M); + + /// FindFunctionNamed - Search all of the active modules to find the one that + /// defines FnName. This is very slow operation and shouldn't be used for + /// general code. + virtual Function *FindFunctionNamed(const char *FnName); /// Sets the object manager that MCJIT should use to avoid compilation. virtual void setObjectCache(ObjectCache *manager); + virtual void generateCodeForModule(Module *M); + + /// finalizeObject - ensure the module is fully processed and is usable. + /// + /// It is the user-level function for completing the process of making the + /// object usable for execution. It should be called after sections within an + /// object have been relocated using mapSectionAddress. When this method is + /// called the MCJIT execution engine will reapply relocations for a loaded + /// object. + /// Is it OK to finalize a set of modules, add modules and finalize again. + // FIXME: Do we really need both of these? virtual void finalizeObject(); + virtual void finalizeModule(Module *); + void finalizeLoadedModules(); + + /// runStaticConstructorsDestructors - This method is used to execute all of + /// the static constructors or destructors for a program. + /// + /// \param isDtors - Run the destructors instead of constructors. + void runStaticConstructorsDestructors(bool isDtors); virtual void *getPointerToBasicBlock(BasicBlock *BB); @@ -84,10 +288,15 @@ public: uint64_t TargetAddress) { Dyld.mapSectionAddress(LocalAddress, TargetAddress); } - virtual void RegisterJITEventListener(JITEventListener *L); virtual void UnregisterJITEventListener(JITEventListener *L); + // If successful, these function will implicitly finalize all loaded objects. + // To get a function address within MCJIT without causing a finalize, use + // getSymbolAddress. + virtual uint64_t getGlobalValueAddress(const std::string &Name); + virtual uint64_t getFunctionAddress(const std::string &Name); + /// @} /// @name (Private) Registration Interfaces /// @{ @@ -98,12 +307,17 @@ public: static ExecutionEngine *createJIT(Module *M, std::string *ErrorStr, - JITMemoryManager *JMM, + RTDyldMemoryManager *MemMgr, bool GVsWithCode, TargetMachine *TM); // @} + // This is not directly exposed via the ExecutionEngine API, but it is + // used by the LinkingMemoryManager. + uint64_t getSymbolAddress(const std::string &Name, + bool CheckFunctionsOnly); + protected: /// emitObject -- Generate a JITed object in memory from the specified module /// Currently, MCJIT only supports a single module and the module passed to @@ -112,10 +326,12 @@ protected: /// the future. ObjectBufferStream* emitObject(Module *M); - void loadObject(Module *M); - void NotifyObjectEmitted(const ObjectImage& Obj); void NotifyFreeingObject(const ObjectImage& Obj); + + uint64_t getExistingSymbolAddress(const std::string &Name); + Module *findModuleForSymbol(const std::string &Name, + bool CheckFunctionsOnly); }; } // End llvm namespace diff --git a/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp index bac77ce..cf90e77 100644 --- a/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp +++ b/contrib/llvm/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp @@ -14,25 +14,15 @@ #include "llvm/Config/config.h" #include "llvm/ExecutionEngine/SectionMemoryManager.h" -#include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/MathExtras.h" -#ifdef __linux__ - // These includes used by SectionMemoryManager::getPointerToNamedFunction() - // for Glibc trickery. See comments in this function for more information. - #ifdef HAVE_SYS_STAT_H - #include - #endif - #include - #include -#endif - namespace llvm { uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size, - unsigned Alignment, - unsigned SectionID, - bool IsReadOnly) { + unsigned Alignment, + unsigned SectionID, + StringRef SectionName, + bool IsReadOnly) { if (IsReadOnly) return allocateSection(RODataMem, Size, Alignment); return allocateSection(RWDataMem, Size, Alignment); @@ -40,7 +30,8 @@ uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size, uint8_t *SectionMemoryManager::allocateCodeSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID) { + unsigned SectionID, + StringRef SectionName) { return allocateSection(CodeMem, Size, Alignment); } @@ -111,11 +102,14 @@ uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup, return (uint8_t*)Addr; } -bool SectionMemoryManager::applyPermissions(std::string *ErrMsg) +bool SectionMemoryManager::finalizeMemory(std::string *ErrMsg) { // FIXME: Should in-progress permissions be reverted if an error occurs? error_code ec; + // Don't allow free memory blocks to be used after setting protection flags. + CodeMem.FreeMem.clear(); + // Make code memory executable. ec = applyMemoryGroupPermissions(CodeMem, sys::Memory::MF_READ | sys::Memory::MF_EXEC); @@ -126,6 +120,9 @@ bool SectionMemoryManager::applyPermissions(std::string *ErrMsg) return true; } + // Don't allow free memory blocks to be used after setting protection flags. + RODataMem.FreeMem.clear(); + // Make read-only data memory read-only. ec = applyMemoryGroupPermissions(RODataMem, sys::Memory::MF_READ | sys::Memory::MF_EXEC); @@ -146,38 +143,6 @@ bool SectionMemoryManager::applyPermissions(std::string *ErrMsg) return false; } -// Determine whether we can register EH tables. -#if (defined(__GNUC__) && !defined(__ARM_EABI__) && \ - !defined(__USING_SJLJ_EXCEPTIONS__)) -#define HAVE_EHTABLE_SUPPORT 1 -#else -#define HAVE_EHTABLE_SUPPORT 0 -#endif - -#if HAVE_EHTABLE_SUPPORT -extern "C" void __register_frame(void*); - -static const char *processFDE(const char *Entry) { - const char *P = Entry; - uint32_t Length = *((uint32_t*)P); - P += 4; - uint32_t Offset = *((uint32_t*)P); - if (Offset != 0) - __register_frame((void*)Entry); - return P + Length; -} -#endif - -void SectionMemoryManager::registerEHFrames(StringRef SectionData) { -#if HAVE_EHTABLE_SUPPORT - const char *P = SectionData.data(); - const char *End = SectionData.data() + SectionData.size(); - do { - P = processFDE(P); - } while(P != End); -#endif -} - error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup, unsigned Permissions) { @@ -199,57 +164,6 @@ void SectionMemoryManager::invalidateInstructionCache() { CodeMem.AllocatedMem[i].size()); } -static int jit_noop() { - return 0; -} - -void *SectionMemoryManager::getPointerToNamedFunction(const std::string &Name, - bool AbortOnFailure) { -#if defined(__linux__) - //===--------------------------------------------------------------------===// - // Function stubs that are invoked instead of certain library calls - // - // Force the following functions to be linked in to anything that uses the - // JIT. This is a hack designed to work around the all-too-clever Glibc - // strategy of making these functions work differently when inlined vs. when - // not inlined, and hiding their real definitions in a separate archive file - // that the dynamic linker can't see. For more info, search for - // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. - if (Name == "stat") return (void*)(intptr_t)&stat; - if (Name == "fstat") return (void*)(intptr_t)&fstat; - if (Name == "lstat") return (void*)(intptr_t)&lstat; - if (Name == "stat64") return (void*)(intptr_t)&stat64; - if (Name == "fstat64") return (void*)(intptr_t)&fstat64; - if (Name == "lstat64") return (void*)(intptr_t)&lstat64; - if (Name == "atexit") return (void*)(intptr_t)&atexit; - if (Name == "mknod") return (void*)(intptr_t)&mknod; -#endif // __linux__ - - // We should not invoke parent's ctors/dtors from generated main()! - // On Mingw and Cygwin, the symbol __main is resolved to - // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors - // (and register wrong callee's dtors with atexit(3)). - // We expect ExecutionEngine::runStaticConstructorsDestructors() - // is called before ExecutionEngine::runFunctionAsMain() is called. - if (Name == "__main") return (void*)(intptr_t)&jit_noop; - - const char *NameStr = Name.c_str(); - void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); - if (Ptr) return Ptr; - - // If it wasn't found and if it starts with an underscore ('_') character, - // try again without the underscore. - if (NameStr[0] == '_') { - Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); - if (Ptr) return Ptr; - } - - if (AbortOnFailure) - report_fatal_error("Program used external function '" + Name + - "' which could not be resolved!"); - return 0; -} - SectionMemoryManager::~SectionMemoryManager() { for (unsigned i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i) sys::Memory::releaseMappedMemory(CodeMem.AllocatedMem[i]); diff --git a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 38867ec..f11df82 100644 --- a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -20,7 +20,9 @@ #include "llvm/IR/Function.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/ExecutionEngine/ObjectImage.h" #include "llvm/ExecutionEngine/OProfileWrapper.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Errno.h" @@ -52,6 +54,10 @@ public: const JITEvent_EmittedFunctionDetails &Details); virtual void NotifyFreeingMachineCode(void *OldPtr); + + virtual void NotifyObjectEmitted(const ObjectImage &Obj); + + virtual void NotifyFreeingObject(const ObjectImage &Obj); }; void OProfileJITEventListener::initialize() { @@ -159,6 +165,66 @@ void OProfileJITEventListener::NotifyFreeingMachineCode(void *FnStart) { } } +void OProfileJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { + if (!Wrapper.isAgentAvailable()) { + return; + } + + // Use symbol info to iterate functions in the object. + error_code ec; + for (object::symbol_iterator I = Obj.begin_symbols(), + E = Obj.end_symbols(); + I != E && !ec; + I.increment(ec)) { + object::SymbolRef::Type SymType; + if (I->getType(SymType)) continue; + if (SymType == object::SymbolRef::ST_Function) { + StringRef Name; + uint64_t Addr; + uint64_t Size; + if (I->getName(Name)) continue; + if (I->getAddress(Addr)) continue; + if (I->getSize(Size)) continue; + + if (Wrapper.op_write_native_code(Name.data(), Addr, (void*)Addr, Size) + == -1) { + DEBUG(dbgs() << "Failed to tell OProfile about native function " + << Name << " at [" + << (void*)Addr << "-" << ((char*)Addr + Size) << "]\n"); + continue; + } + // TODO: support line number info (similar to IntelJITEventListener.cpp) + } + } +} + +void OProfileJITEventListener::NotifyFreeingObject(const ObjectImage &Obj) { + if (!Wrapper.isAgentAvailable()) { + return; + } + + // Use symbol info to iterate functions in the object. + error_code ec; + for (object::symbol_iterator I = Obj.begin_symbols(), + E = Obj.end_symbols(); + I != E && !ec; + I.increment(ec)) { + object::SymbolRef::Type SymType; + if (I->getType(SymType)) continue; + if (SymType == object::SymbolRef::ST_Function) { + uint64_t Addr; + if (I->getAddress(Addr)) continue; + + if (Wrapper.op_unload_native_code(Addr) == -1) { + DEBUG(dbgs() + << "Failed to tell OProfile about unload of native function at " + << (void*)Addr << "\n"); + continue; + } + } + } +} + } // anonymous namespace. namespace llvm { diff --git a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp index 7c0d395..61d8dc2 100644 --- a/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp +++ b/contrib/llvm/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp @@ -13,22 +13,20 @@ // //===----------------------------------------------------------------------===// -#include "llvm/ExecutionEngine/OProfileWrapper.h" - #define DEBUG_TYPE "oprofile-wrapper" +#include "llvm/ExecutionEngine/OProfileWrapper.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/MutexGuard.h" #include "llvm/ADT/SmallString.h" - -#include #include -#include #include -#include #include +#include +#include +#include #include namespace { @@ -143,6 +141,10 @@ bool OProfileWrapper::checkForOProfileProcEntry() { close(CmdLineFD); ssize_t Idx = 0; + if (ExeName[0] != '/') { + BaseName = ExeName; + } + // Find the terminator for the first string while (Idx < NumRead-1 && ExeName[Idx] != 0) { Idx++; @@ -161,7 +163,8 @@ bool OProfileWrapper::checkForOProfileProcEntry() { } // Test this to see if it is the oprofile daemon - if (BaseName != 0 && !strcmp("oprofiled", BaseName)) { + if (BaseName != 0 && (!strcmp("oprofiled", BaseName) || + !strcmp("operf", BaseName))) { // If it is, we're done closedir(ProcDir); return true; diff --git a/contrib/llvm/lib/ExecutionEngine/RTDyldMemoryManager.cpp b/contrib/llvm/lib/ExecutionEngine/RTDyldMemoryManager.cpp new file mode 100644 index 0000000..26e1fdd --- /dev/null +++ b/contrib/llvm/lib/ExecutionEngine/RTDyldMemoryManager.cpp @@ -0,0 +1,282 @@ +//===-- RTDyldMemoryManager.cpp - Memory manager for MC-JIT -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// Implementation of the runtime dynamic memory manager base class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" +#include "llvm/ExecutionEngine/RTDyldMemoryManager.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/ErrorHandling.h" + +#include + +#ifdef __linux__ + // These includes used by RTDyldMemoryManager::getPointerToNamedFunction() + // for Glibc trickery. See comments in this function for more information. + #ifdef HAVE_SYS_STAT_H + #include + #endif + #include + #include +#endif + +namespace llvm { + +RTDyldMemoryManager::~RTDyldMemoryManager() {} + +// Determine whether we can register EH tables. +#if (defined(__GNUC__) && !defined(__ARM_EABI__) && !defined(__ia64__) && \ + !defined(__SEH__) && !defined(__USING_SJLJ_EXCEPTIONS__)) +#define HAVE_EHTABLE_SUPPORT 1 +#else +#define HAVE_EHTABLE_SUPPORT 0 +#endif + +#if HAVE_EHTABLE_SUPPORT +extern "C" void __register_frame(void*); +extern "C" void __deregister_frame(void*); +#else +// The building compiler does not have __(de)register_frame but +// it may be found at runtime in a dynamically-loaded library. +// For example, this happens when building LLVM with Visual C++ +// but using the MingW runtime. +void __register_frame(void *p) { + static bool Searched = false; + static void *rf = 0; + + if (!Searched) { + Searched = true; + rf = llvm::sys::DynamicLibrary::SearchForAddressOfSymbol( + "__register_frame"); + } + if (rf) + ((void (*)(void *))rf)(p); +} + +void __deregister_frame(void *p) { + static bool Searched = false; + static void *df = 0; + + if (!Searched) { + Searched = true; + df = llvm::sys::DynamicLibrary::SearchForAddressOfSymbol( + "__deregister_frame"); + } + if (df) + ((void (*)(void *))df)(p); +} +#endif + +#ifdef __APPLE__ + +static const char *processFDE(const char *Entry, bool isDeregister) { + const char *P = Entry; + uint32_t Length = *((const uint32_t *)P); + P += 4; + uint32_t Offset = *((const uint32_t *)P); + if (Offset != 0) { + if (isDeregister) + __deregister_frame(const_cast(Entry)); + else + __register_frame(const_cast(Entry)); + } + return P + Length; +} + +// This implementation handles frame registration for local targets. +// Memory managers for remote targets should re-implement this function +// and use the LoadAddr parameter. +void RTDyldMemoryManager::registerEHFrames(uint8_t *Addr, + uint64_t LoadAddr, + size_t Size) { + // On OS X OS X __register_frame takes a single FDE as an argument. + // See http://lists.cs.uiuc.edu/pipermail/llvmdev/2013-April/061768.html + const char *P = (const char *)Addr; + const char *End = P + Size; + do { + P = processFDE(P, false); + } while(P != End); +} + +void RTDyldMemoryManager::deregisterEHFrames(uint8_t *Addr, + uint64_t LoadAddr, + size_t Size) { + const char *P = (const char *)Addr; + const char *End = P + Size; + do { + P = processFDE(P, true); + } while(P != End); +} + +#else + +void RTDyldMemoryManager::registerEHFrames(uint8_t *Addr, + uint64_t LoadAddr, + size_t Size) { + // On Linux __register_frame takes a single argument: + // a pointer to the start of the .eh_frame section. + + // How can it find the end? Because crtendS.o is linked + // in and it has an .eh_frame section with four zero chars. + __register_frame(Addr); +} + +void RTDyldMemoryManager::deregisterEHFrames(uint8_t *Addr, + uint64_t LoadAddr, + size_t Size) { + __deregister_frame(Addr); +} + +#endif + +static int jit_noop() { + return 0; +} + +// ARM math functions are statically linked on Android from libgcc.a, but not +// available at runtime for dynamic linking. On Linux these are usually placed +// in libgcc_s.so so can be found by normal dynamic lookup. +#if defined(__BIONIC__) && defined(__arm__) +// List of functions which are statically linked on Android and can be generated +// by LLVM. This is done as a nested macro which is used once to declare the +// imported functions with ARM_MATH_DECL and once to compare them to the +// user-requested symbol in getSymbolAddress with ARM_MATH_CHECK. The test +// assumes that all functions start with __aeabi_ and getSymbolAddress must be +// modified if that changes. +#define ARM_MATH_IMPORTS(PP) \ + PP(__aeabi_d2f) \ + PP(__aeabi_d2iz) \ + PP(__aeabi_d2lz) \ + PP(__aeabi_d2uiz) \ + PP(__aeabi_d2ulz) \ + PP(__aeabi_dadd) \ + PP(__aeabi_dcmpeq) \ + PP(__aeabi_dcmpge) \ + PP(__aeabi_dcmpgt) \ + PP(__aeabi_dcmple) \ + PP(__aeabi_dcmplt) \ + PP(__aeabi_dcmpun) \ + PP(__aeabi_ddiv) \ + PP(__aeabi_dmul) \ + PP(__aeabi_dsub) \ + PP(__aeabi_f2d) \ + PP(__aeabi_f2iz) \ + PP(__aeabi_f2lz) \ + PP(__aeabi_f2uiz) \ + PP(__aeabi_f2ulz) \ + PP(__aeabi_fadd) \ + PP(__aeabi_fcmpeq) \ + PP(__aeabi_fcmpge) \ + PP(__aeabi_fcmpgt) \ + PP(__aeabi_fcmple) \ + PP(__aeabi_fcmplt) \ + PP(__aeabi_fcmpun) \ + PP(__aeabi_fdiv) \ + PP(__aeabi_fmul) \ + PP(__aeabi_fsub) \ + PP(__aeabi_i2d) \ + PP(__aeabi_i2f) \ + PP(__aeabi_idiv) \ + PP(__aeabi_idivmod) \ + PP(__aeabi_l2d) \ + PP(__aeabi_l2f) \ + PP(__aeabi_lasr) \ + PP(__aeabi_ldivmod) \ + PP(__aeabi_llsl) \ + PP(__aeabi_llsr) \ + PP(__aeabi_lmul) \ + PP(__aeabi_ui2d) \ + PP(__aeabi_ui2f) \ + PP(__aeabi_uidiv) \ + PP(__aeabi_uidivmod) \ + PP(__aeabi_ul2d) \ + PP(__aeabi_ul2f) \ + PP(__aeabi_uldivmod) + +// Declare statically linked math functions on ARM. The function declarations +// here do not have the correct prototypes for each function in +// ARM_MATH_IMPORTS, but it doesn't matter because only the symbol addresses are +// needed. In particular the __aeabi_*divmod functions do not have calling +// conventions which match any C prototype. +#define ARM_MATH_DECL(name) extern "C" void name(); +ARM_MATH_IMPORTS(ARM_MATH_DECL) +#undef ARM_MATH_DECL +#endif + +uint64_t RTDyldMemoryManager::getSymbolAddress(const std::string &Name) { + // This implementation assumes that the host program is the target. + // Clients generating code for a remote target should implement their own + // memory manager. +#if defined(__linux__) && defined(__GLIBC__) + //===--------------------------------------------------------------------===// + // Function stubs that are invoked instead of certain library calls + // + // Force the following functions to be linked in to anything that uses the + // JIT. This is a hack designed to work around the all-too-clever Glibc + // strategy of making these functions work differently when inlined vs. when + // not inlined, and hiding their real definitions in a separate archive file + // that the dynamic linker can't see. For more info, search for + // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. + if (Name == "stat") return (uint64_t)&stat; + if (Name == "fstat") return (uint64_t)&fstat; + if (Name == "lstat") return (uint64_t)&lstat; + if (Name == "stat64") return (uint64_t)&stat64; + if (Name == "fstat64") return (uint64_t)&fstat64; + if (Name == "lstat64") return (uint64_t)&lstat64; + if (Name == "atexit") return (uint64_t)&atexit; + if (Name == "mknod") return (uint64_t)&mknod; +#endif // __linux__ && __GLIBC__ + + // See ARM_MATH_IMPORTS definition for explanation +#if defined(__BIONIC__) && defined(__arm__) + if (Name.compare(0, 8, "__aeabi_") == 0) { + // Check if the user has requested any of the functions listed in + // ARM_MATH_IMPORTS, and if so redirect to the statically linked symbol. +#define ARM_MATH_CHECK(fn) if (Name == #fn) return (uint64_t)&fn; + ARM_MATH_IMPORTS(ARM_MATH_CHECK) +#undef ARM_MATH_CHECK + } +#endif + + // We should not invoke parent's ctors/dtors from generated main()! + // On Mingw and Cygwin, the symbol __main is resolved to + // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors + // (and register wrong callee's dtors with atexit(3)). + // We expect ExecutionEngine::runStaticConstructorsDestructors() + // is called before ExecutionEngine::runFunctionAsMain() is called. + if (Name == "__main") return (uint64_t)&jit_noop; + + const char *NameStr = Name.c_str(); + void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); + if (Ptr) + return (uint64_t)Ptr; + + // If it wasn't found and if it starts with an underscore ('_') character, + // try again without the underscore. + if (NameStr[0] == '_') { + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); + if (Ptr) + return (uint64_t)Ptr; + } + return 0; +} + +void *RTDyldMemoryManager::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure) { + uint64_t Addr = getSymbolAddress(Name); + + if (!Addr && AbortOnFailure) + report_fatal_error("Program used external function '" + Name + + "' which could not be resolved!"); + return (void*)Addr; +} + +} // namespace llvm diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h index 69e9dbe..6a514ea 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/JITRegistrar.h @@ -16,6 +16,7 @@ namespace llvm { /// Global access point for the JIT debugging interface. class JITRegistrar { + virtual void anchor(); public: /// Instantiates the JIT service. JITRegistrar() {} diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h index 89350cc..9cbde5d 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h @@ -23,6 +23,7 @@ namespace llvm { class ObjectImageCommon : public ObjectImage { ObjectImageCommon(); // = delete ObjectImageCommon(const ObjectImageCommon &other); // = delete + virtual void anchor(); protected: object::ObjectFile *ObjFile; diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index a08b508..161135a 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -13,45 +13,60 @@ #define DEBUG_TYPE "dyld" #include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "JITRegistrar.h" #include "ObjectImageCommon.h" #include "RuntimeDyldELF.h" #include "RuntimeDyldImpl.h" #include "RuntimeDyldMachO.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/Path.h" +#include "llvm/Support/MutexGuard.h" +#include "llvm/Object/ELF.h" using namespace llvm; using namespace llvm::object; // Empty out-of-line virtual destructor as the key function. -RTDyldMemoryManager::~RTDyldMemoryManager() {} -void RTDyldMemoryManager::registerEHFrames(StringRef SectionData) {} RuntimeDyldImpl::~RuntimeDyldImpl() {} +// Pin the JITRegistrar's and ObjectImage*'s vtables to this file. +void JITRegistrar::anchor() {} +void ObjectImage::anchor() {} +void ObjectImageCommon::anchor() {} + namespace llvm { -StringRef RuntimeDyldImpl::getEHFrameSection() { - return StringRef(); +void RuntimeDyldImpl::registerEHFrames() { +} + +void RuntimeDyldImpl::deregisterEHFrames() { } // Resolve the relocations for all symbols we currently know about. void RuntimeDyldImpl::resolveRelocations() { + MutexGuard locked(lock); + // First, resolve relocations associated with external symbols. resolveExternalSymbols(); // Just iterate over the sections we have and resolve all the relocations // in them. Gross overkill, but it gets the job done. for (int i = 0, e = Sections.size(); i != e; ++i) { + // The Section here (Sections[i]) refers to the section in which the + // symbol for the relocation is located. The SectionID in the relocation + // entry provides the section to which the relocation will be applied. uint64_t Addr = Sections[i].LoadAddress; DEBUG(dbgs() << "Resolving relocations Section #" << i << "\t" << format("%p", (uint8_t *)Addr) << "\n"); resolveRelocationList(Relocations[i], Addr); + Relocations.erase(i); } } void RuntimeDyldImpl::mapSectionAddress(const void *LocalAddress, uint64_t TargetAddress) { + MutexGuard locked(lock); for (unsigned i = 0, e = Sections.size(); i != e; ++i) { if (Sections[i].Address == LocalAddress) { reassignSectionAddress(i, TargetAddress); @@ -68,11 +83,15 @@ ObjectImage *RuntimeDyldImpl::createObjectImage(ObjectBuffer *InputBuffer) { } ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) { + MutexGuard locked(lock); + OwningPtr obj(createObjectImage(InputBuffer)); if (!obj) report_fatal_error("Unable to create object image from memory buffer!"); + // Save information about our target Arch = (Triple::ArchType)obj->getArch(); + IsTargetLittleEndian = obj->getObjectFile()->isLittleEndian(); // Symbols found in this object StringMap LocalSymbols; @@ -148,6 +167,7 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) { bool isFirstRelocation = true; unsigned SectionID = 0; StubMap Stubs; + section_iterator RelocatedSection = si->getRelocatedSection(); for (relocation_iterator i = si->begin_relocations(), e = si->end_relocations(); i != e; i.increment(err)) { @@ -155,7 +175,8 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) { // If it's the first relocation in this section, find its SectionID if (isFirstRelocation) { - SectionID = findOrEmitSection(*obj, *si, true, LocalSections); + SectionID = + findOrEmitSection(*obj, *RelocatedSection, true, LocalSections); DEBUG(dbgs() << "\tSectionID: " << SectionID << "\n"); isFirstRelocation = false; } @@ -165,6 +186,9 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) { } } + // Give the subclasses a chance to tie-up any loose ends. + finalizeLoad(LocalSections); + return obj.take(); } @@ -174,8 +198,8 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, SymbolTableMap &SymbolTable) { // Allocate memory for the section unsigned SectionID = Sections.size(); - uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*), - SectionID, false); + uint8_t *Addr = MemMgr->allocateDataSection( + TotalSize, sizeof(void*), SectionID, StringRef(), false); if (!Addr) report_fatal_error("Unable to allocate memory for common symbols!"); uint64_t Offset = 0; @@ -216,11 +240,25 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, unsigned StubBufSize = 0, StubSize = getMaxStubSize(); error_code err; + const ObjectFile *ObjFile = Obj.getObjectFile(); + // FIXME: this is an inefficient way to handle this. We should computed the + // necessary section allocation size in loadObject by walking all the sections + // once. if (StubSize > 0) { - for (relocation_iterator i = Section.begin_relocations(), - e = Section.end_relocations(); i != e; i.increment(err), Check(err)) - StubBufSize += StubSize; + for (section_iterator SI = ObjFile->begin_sections(), + SE = ObjFile->end_sections(); + SI != SE; SI.increment(err), Check(err)) { + section_iterator RelSecI = SI->getRelocatedSection(); + if (!(RelSecI == Section)) + continue; + + for (relocation_iterator I = SI->begin_relocations(), + E = SI->end_relocations(); I != E; I.increment(err), Check(err)) { + StubBufSize += StubSize; + } + } } + StringRef data; uint64_t Alignment64; Check(Section.getContents(data)); @@ -232,6 +270,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, bool IsZeroInit; bool IsReadOnly; uint64_t DataSize; + unsigned PaddingSize = 0; StringRef Name; Check(Section.isRequiredForExecution(IsRequired)); Check(Section.isVirtual(IsVirtual)); @@ -246,6 +285,12 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, StubBufSize += StubAlignment - EndAlignment; } + // The .eh_frame section (at least on Linux) needs an extra four bytes padded + // with zeroes added at the end. For MachO objects, this section has a + // slightly different name, so this won't have any effect for MachO objects. + if (Name == ".eh_frame") + PaddingSize = 4; + unsigned Allocate; unsigned SectionID = Sections.size(); uint8_t *Addr; @@ -254,10 +299,11 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, // Some sections, such as debug info, don't need to be loaded for execution. // Leave those where they are. if (IsRequired) { - Allocate = DataSize + StubBufSize; + Allocate = DataSize + PaddingSize + StubBufSize; Addr = IsCode - ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID) - : MemMgr->allocateDataSection(Allocate, Alignment, SectionID, IsReadOnly); + ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID, Name) + : MemMgr->allocateDataSection(Allocate, Alignment, SectionID, Name, + IsReadOnly); if (!Addr) report_fatal_error("Unable to allocate section memory!"); @@ -271,6 +317,13 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, else memcpy(Addr, pData, DataSize); + // Fill in any extra bytes we allocated for padding + if (PaddingSize != 0) { + memset(Addr + DataSize, 0, PaddingSize); + // Update the DataSize variable so that the stub offset is set correctly. + DataSize += PaddingSize; + } + DEBUG(dbgs() << "emitSection SectionID: " << SectionID << " Name: " << Name << " obj addr: " << format("%p", pData) @@ -381,7 +434,7 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { StubAddr++; *StubAddr = NopInstr; return Addr; - } else if (Arch == Triple::ppc64) { + } else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) { // PowerPC64 stub: the address points to a function descriptor // instead of the function itself. Load the function address // on r11 and sets it to control register. Also loads the function @@ -406,6 +459,10 @@ uint8_t *RuntimeDyldImpl::createStubFunction(uint8_t *Addr) { writeInt16BE(Addr+6, 0x07F1); // brc 15,%r1 // 8-byte address stored at Addr + 8 return Addr; + } else if (Arch == Triple::x86_64) { + *Addr = 0xFF; // jmp + *(Addr+1) = 0x25; // rip + // 32-bit PC-relative address of the GOT entry will be stored at Addr+2 } return Addr; } @@ -439,30 +496,52 @@ void RuntimeDyldImpl::resolveRelocationList(const RelocationList &Relocs, } void RuntimeDyldImpl::resolveExternalSymbols() { - StringMap::iterator i = ExternalSymbolRelocations.begin(), - e = ExternalSymbolRelocations.end(); - for (; i != e; i++) { + while(!ExternalSymbolRelocations.empty()) { + StringMap::iterator i = ExternalSymbolRelocations.begin(); + StringRef Name = i->first(); - RelocationList &Relocs = i->second; - SymbolTableMap::const_iterator Loc = GlobalSymbolTable.find(Name); - if (Loc == GlobalSymbolTable.end()) { - if (Name.size() == 0) { - // This is an absolute symbol, use an address of zero. - DEBUG(dbgs() << "Resolving absolute relocations." << "\n"); - resolveRelocationList(Relocs, 0); + if (Name.size() == 0) { + // This is an absolute symbol, use an address of zero. + DEBUG(dbgs() << "Resolving absolute relocations." << "\n"); + RelocationList &Relocs = i->second; + resolveRelocationList(Relocs, 0); + } else { + uint64_t Addr = 0; + SymbolTableMap::const_iterator Loc = GlobalSymbolTable.find(Name); + if (Loc == GlobalSymbolTable.end()) { + // This is an external symbol, try to get its address from + // MemoryManager. + Addr = MemMgr->getSymbolAddress(Name.data()); + // The call to getSymbolAddress may have caused additional modules to + // be loaded, which may have added new entries to the + // ExternalSymbolRelocations map. Consquently, we need to update our + // iterator. This is also why retrieval of the relocation list + // associated with this symbol is deferred until below this point. + // New entries may have been added to the relocation list. + i = ExternalSymbolRelocations.find(Name); } else { - // This is an external symbol, try to get its address from - // MemoryManager. - uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(), - true); - DEBUG(dbgs() << "Resolving relocations Name: " << Name - << "\t" << format("%p", Addr) - << "\n"); - resolveRelocationList(Relocs, (uintptr_t)Addr); + // We found the symbol in our global table. It was probably in a + // Module that we loaded previously. + SymbolLoc SymLoc = Loc->second; + Addr = getSectionLoadAddress(SymLoc.first) + SymLoc.second; } - } else { - report_fatal_error("Expected external symbol"); + + // FIXME: Implement error handling that doesn't kill the host program! + if (!Addr) + report_fatal_error("Program used external function '" + Name + + "' which could not be resolved!"); + + updateGOTEntries(Name, Addr); + DEBUG(dbgs() << "Resolving relocations Name: " << Name + << "\t" << format("0x%lx", Addr) + << "\n"); + // This list may have been updated when we called getSymbolAddress, so + // don't change this code to get the list earlier. + RelocationList &Relocs = i->second; + resolveRelocationList(Relocs, Addr); } + + ExternalSymbolRelocations.erase(i); } } @@ -486,33 +565,36 @@ RuntimeDyld::~RuntimeDyld() { ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) { if (!Dyld) { - sys::LLVMFileType type = sys::IdentifyFileType( - InputBuffer->getBufferStart(), - static_cast(InputBuffer->getBufferSize())); - switch (type) { - case sys::ELF_Relocatable_FileType: - case sys::ELF_Executable_FileType: - case sys::ELF_SharedObject_FileType: - case sys::ELF_Core_FileType: - Dyld = new RuntimeDyldELF(MM); - break; - case sys::Mach_O_Object_FileType: - case sys::Mach_O_Executable_FileType: - case sys::Mach_O_FixedVirtualMemorySharedLib_FileType: - case sys::Mach_O_Core_FileType: - case sys::Mach_O_PreloadExecutable_FileType: - case sys::Mach_O_DynamicallyLinkedSharedLib_FileType: - case sys::Mach_O_DynamicLinker_FileType: - case sys::Mach_O_Bundle_FileType: - case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: - case sys::Mach_O_DSYMCompanion_FileType: - Dyld = new RuntimeDyldMachO(MM); - break; - case sys::Unknown_FileType: - case sys::Bitcode_FileType: - case sys::Archive_FileType: - case sys::COFF_FileType: - report_fatal_error("Incompatible object format!"); + sys::fs::file_magic Type = + sys::fs::identify_magic(InputBuffer->getBuffer()); + switch (Type) { + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::elf_executable: + case sys::fs::file_magic::elf_shared_object: + case sys::fs::file_magic::elf_core: + Dyld = new RuntimeDyldELF(MM); + break; + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::macho_executable: + case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: + case sys::fs::file_magic::macho_core: + case sys::fs::file_magic::macho_preload_executable: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib: + case sys::fs::file_magic::macho_dynamic_linker: + case sys::fs::file_magic::macho_bundle: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: + case sys::fs::file_magic::macho_dsym_companion: + Dyld = new RuntimeDyldMachO(MM); + break; + case sys::fs::file_magic::unknown: + case sys::fs::file_magic::bitcode: + case sys::fs::file_magic::archive: + case sys::fs::file_magic::coff_object: + case sys::fs::file_magic::coff_import_library: + case sys::fs::file_magic::pecoff_executable: + case sys::fs::file_magic::macho_universal_binary: + case sys::fs::file_magic::windows_resource: + report_fatal_error("Incompatible object format!"); } } else { if (!Dyld->isCompatibleFormat(InputBuffer)) @@ -523,10 +605,14 @@ ObjectImage *RuntimeDyld::loadObject(ObjectBuffer *InputBuffer) { } void *RuntimeDyld::getSymbolAddress(StringRef Name) { + if (!Dyld) + return NULL; return Dyld->getSymbolAddress(Name); } uint64_t RuntimeDyld::getSymbolLoadAddress(StringRef Name) { + if (!Dyld) + return 0; return Dyld->getSymbolLoadAddress(Name); } @@ -548,8 +634,14 @@ StringRef RuntimeDyld::getErrorString() { return Dyld->getErrorString(); } -StringRef RuntimeDyld::getEHFrameSection() { - return Dyld->getEHFrameSection(); +void RuntimeDyld::registerEHFrames() { + if (Dyld) + Dyld->registerEHFrames(); +} + +void RuntimeDyld::deregisterEHFrames() { + if (Dyld) + Dyld->deregisterEHFrames(); } } // end namespace llvm diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index d4d84d3..f2c69fc 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -22,7 +22,7 @@ #include "llvm/ADT/Triple.h" #include "llvm/ExecutionEngine/ObjectBuffer.h" #include "llvm/ExecutionEngine/ObjectImage.h" -#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/ELF.h" using namespace llvm; @@ -151,12 +151,31 @@ void DyldELFObject::updateSymbolAddress(const SymbolRef &SymRef, namespace llvm { -StringRef RuntimeDyldELF::getEHFrameSection() { - for (int i = 0, e = Sections.size(); i != e; ++i) { - if (Sections[i].Name == ".eh_frame") - return StringRef((const char*)Sections[i].Address, Sections[i].Size); +void RuntimeDyldELF::registerEHFrames() { + if (!MemMgr) + return; + for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) { + SID EHFrameSID = UnregisteredEHFrameSections[i]; + uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; + uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; + size_t EHFrameSize = Sections[EHFrameSID].Size; + MemMgr->registerEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); + RegisteredEHFrameSections.push_back(EHFrameSID); } - return StringRef(); + UnregisteredEHFrameSections.clear(); +} + +void RuntimeDyldELF::deregisterEHFrames() { + if (!MemMgr) + return; + for (int i = 0, e = RegisteredEHFrameSections.size(); i != e; ++i) { + SID EHFrameSID = RegisteredEHFrameSections[i]; + uint8_t *EHFrameAddr = Sections[EHFrameSID].Address; + uint64_t EHFrameLoadAddr = Sections[EHFrameSID].LoadAddress; + size_t EHFrameSize = Sections[EHFrameSID].Size; + MemMgr->deregisterEHFrames(EHFrameAddr, EHFrameLoadAddr, EHFrameSize); + } + RegisteredEHFrameSections.clear(); } ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) { @@ -202,7 +221,8 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, - int64_t Addend) { + int64_t Addend, + uint64_t SymOffset) { switch (Type) { default: llvm_unreachable("Relocation type not implemented yet!"); @@ -227,6 +247,21 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, << " at " << format("%p\n",Target)); break; } + case ELF::R_X86_64_GOTPCREL: { + // findGOTEntry returns the 'G + GOT' part of the relocation calculation + // based on the load/target address of the GOT (not the current/local addr). + uint64_t GOTAddr = findGOTEntry(Value, SymOffset); + uint32_t *Target = reinterpret_cast(Section.Address + Offset); + uint64_t FinalAddress = Section.LoadAddress + Offset; + // The processRelocationRef method combines the symbol offset and the addend + // and in most cases that's what we want. For this relocation type, we need + // the raw addend, so we subtract the symbol offset to get it. + int64_t RealOffset = GOTAddr + Addend - SymOffset - FinalAddress; + assert(RealOffset <= INT32_MAX && RealOffset >= INT32_MIN); + int32_t TruncOffset = (RealOffset & 0xFFFFFFFF); + *Target = TruncOffset; + break; + } case ELF::R_X86_64_PC32: { // Get the placeholder value from the generated object since // a previous relocation attempt may have overwritten the loaded version @@ -240,6 +275,16 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, *Target = TruncOffset; break; } + case ELF::R_X86_64_PC64: { + // Get the placeholder value from the generated object since + // a previous relocation attempt may have overwritten the loaded version + uint64_t *Placeholder = reinterpret_cast(Section.ObjAddress + + Offset); + uint64_t *Target = reinterpret_cast(Section.Address + Offset); + uint64_t FinalAddress = Section.LoadAddress + Offset; + *Target = *Placeholder + Value + Addend - FinalAddress; + break; + } } } @@ -302,9 +347,9 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, *TargetPtr = Value + Addend; break; } - case ELF::R_AARCH64_PREL32: { // test-shift.ll (.eh_frame) + case ELF::R_AARCH64_PREL32: { uint64_t Result = Value + Addend - FinalAddress; - assert(static_cast(Result) >= INT32_MIN && + assert(static_cast(Result) >= INT32_MIN && static_cast(Result) <= UINT32_MAX); *TargetPtr = static_cast(Result & 0xffffffffU); break; @@ -316,41 +361,62 @@ void RuntimeDyldELF::resolveAArch64Relocation(const SectionEntry &Section, uint64_t BranchImm = Value + Addend - FinalAddress; // "Check that -2^27 <= result < 2^27". - assert(-(1LL << 27) <= static_cast(BranchImm) && + assert(-(1LL << 27) <= static_cast(BranchImm) && static_cast(BranchImm) < (1LL << 27)); + + // AArch64 code is emitted with .rela relocations. The data already in any + // bits affected by the relocation on entry is garbage. + *TargetPtr &= 0xfc000000U; // Immediate goes in bits 25:0 of B and BL. *TargetPtr |= static_cast(BranchImm & 0xffffffcU) >> 2; break; } case ELF::R_AARCH64_MOVW_UABS_G3: { uint64_t Result = Value + Addend; + + // AArch64 code is emitted with .rela relocations. The data already in any + // bits affected by the relocation on entry is garbage. + *TargetPtr &= 0xffe0001fU; // Immediate goes in bits 20:5 of MOVZ/MOVK instruction *TargetPtr |= Result >> (48 - 5); - // Shift is "lsl #48", in bits 22:21 - *TargetPtr |= 3 << 21; + // Shift must be "lsl #48", in bits 22:21 + assert((*TargetPtr >> 21 & 0x3) == 3 && "invalid shift for relocation"); break; } case ELF::R_AARCH64_MOVW_UABS_G2_NC: { uint64_t Result = Value + Addend; + + // AArch64 code is emitted with .rela relocations. The data already in any + // bits affected by the relocation on entry is garbage. + *TargetPtr &= 0xffe0001fU; // Immediate goes in bits 20:5 of MOVZ/MOVK instruction *TargetPtr |= ((Result & 0xffff00000000ULL) >> (32 - 5)); - // Shift is "lsl #32", in bits 22:21 - *TargetPtr |= 2 << 21; + // Shift must be "lsl #32", in bits 22:21 + assert((*TargetPtr >> 21 & 0x3) == 2 && "invalid shift for relocation"); break; } case ELF::R_AARCH64_MOVW_UABS_G1_NC: { uint64_t Result = Value + Addend; + + // AArch64 code is emitted with .rela relocations. The data already in any + // bits affected by the relocation on entry is garbage. + *TargetPtr &= 0xffe0001fU; // Immediate goes in bits 20:5 of MOVZ/MOVK instruction *TargetPtr |= ((Result & 0xffff0000U) >> (16 - 5)); - // Shift is "lsl #16", in bits 22:21 - *TargetPtr |= 1 << 21; + // Shift must be "lsl #16", in bits 22:2 + assert((*TargetPtr >> 21 & 0x3) == 1 && "invalid shift for relocation"); break; } case ELF::R_AARCH64_MOVW_UABS_G0_NC: { uint64_t Result = Value + Addend; + + // AArch64 code is emitted with .rela relocations. The data already in any + // bits affected by the relocation on entry is garbage. + *TargetPtr &= 0xffe0001fU; // Immediate goes in bits 20:5 of MOVZ/MOVK instruction *TargetPtr |= ((Result & 0xffffU) << 5); - // Shift is "lsl #0", in bits 22:21. No action needed. + // Shift must be "lsl #0", in bits 22:21. + assert((*TargetPtr >> 21 & 0x3) == 0 && "invalid shift for relocation"); break; } } @@ -362,6 +428,8 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, uint32_t Type, int32_t Addend) { // TODO: Add Thumb relocations. + uint32_t *Placeholder = reinterpret_cast(Section.ObjAddress + + Offset); uint32_t* TargetPtr = (uint32_t*)(Section.Address + Offset); uint32_t FinalAddress = ((Section.LoadAddress + Offset) & 0xFFFFFFFF); Value += Addend; @@ -380,44 +448,51 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, // Write a 32bit value to relocation address, taking into account the // implicit addend encoded in the target. - case ELF::R_ARM_TARGET1 : - case ELF::R_ARM_ABS32 : - *TargetPtr += Value; + case ELF::R_ARM_TARGET1: + case ELF::R_ARM_ABS32: + *TargetPtr = *Placeholder + Value; break; - // Write first 16 bit of 32 bit value to the mov instruction. // Last 4 bit should be shifted. - case ELF::R_ARM_MOVW_ABS_NC : + case ELF::R_ARM_MOVW_ABS_NC: // We are not expecting any other addend in the relocation address. // Using 0x000F0FFF because MOVW has its 16 bit immediate split into 2 // non-contiguous fields. - assert((*TargetPtr & 0x000F0FFF) == 0); + assert((*Placeholder & 0x000F0FFF) == 0); Value = Value & 0xFFFF; - *TargetPtr |= Value & 0xFFF; + *TargetPtr = *Placeholder | (Value & 0xFFF); *TargetPtr |= ((Value >> 12) & 0xF) << 16; break; - // Write last 16 bit of 32 bit value to the mov instruction. // Last 4 bit should be shifted. - case ELF::R_ARM_MOVT_ABS : + case ELF::R_ARM_MOVT_ABS: // We are not expecting any other addend in the relocation address. // Use 0x000F0FFF for the same reason as R_ARM_MOVW_ABS_NC. - assert((*TargetPtr & 0x000F0FFF) == 0); + assert((*Placeholder & 0x000F0FFF) == 0); + Value = (Value >> 16) & 0xFFFF; - *TargetPtr |= Value & 0xFFF; + *TargetPtr = *Placeholder | (Value & 0xFFF); *TargetPtr |= ((Value >> 12) & 0xF) << 16; break; - // Write 24 bit relative value to the branch instruction. case ELF::R_ARM_PC24 : // Fall through. case ELF::R_ARM_CALL : // Fall through. - case ELF::R_ARM_JUMP24 : + case ELF::R_ARM_JUMP24: { int32_t RelValue = static_cast(Value - FinalAddress - 8); RelValue = (RelValue & 0x03FFFFFC) >> 2; + assert((*TargetPtr & 0xFFFFFF) == 0xFFFFFE); *TargetPtr &= 0xFF000000; *TargetPtr |= RelValue; break; } + case ELF::R_ARM_PRIVATE_0: + // This relocation is reserved by the ARM ELF ABI for internal use. We + // appropriate it here to act as an R_ARM_ABS32 without any addend for use + // in the stubs created during JIT (which can't put an addend into the + // original object file). + *TargetPtr = Value; + break; + } } void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section, @@ -425,6 +500,8 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section, uint32_t Value, uint32_t Type, int32_t Addend) { + uint32_t *Placeholder = reinterpret_cast(Section.ObjAddress + + Offset); uint32_t* TargetPtr = (uint32_t*)(Section.Address + Offset); Value += Addend; @@ -442,19 +519,30 @@ void RuntimeDyldELF::resolveMIPSRelocation(const SectionEntry &Section, llvm_unreachable("Not implemented relocation type!"); break; case ELF::R_MIPS_32: - *TargetPtr = Value + (*TargetPtr); + *TargetPtr = Value + (*Placeholder); break; case ELF::R_MIPS_26: - *TargetPtr = ((*TargetPtr) & 0xfc000000) | (( Value & 0x0fffffff) >> 2); + *TargetPtr = ((*Placeholder) & 0xfc000000) | (( Value & 0x0fffffff) >> 2); break; case ELF::R_MIPS_HI16: // Get the higher 16-bits. Also add 1 if bit 15 is 1. - Value += ((*TargetPtr) & 0x0000ffff) << 16; + Value += ((*Placeholder) & 0x0000ffff) << 16; + *TargetPtr = ((*Placeholder) & 0xffff0000) | + (((Value + 0x8000) >> 16) & 0xffff); + break; + case ELF::R_MIPS_LO16: + Value += ((*Placeholder) & 0x0000ffff); + *TargetPtr = ((*Placeholder) & 0xffff0000) | (Value & 0xffff); + break; + case ELF::R_MIPS_UNUSED1: + // Similar to ELF::R_ARM_PRIVATE_0, R_MIPS_UNUSED1 and R_MIPS_UNUSED2 + // are used for internal JIT purpose. These relocations are similar to + // R_MIPS_HI16 and R_MIPS_LO16, but they do not take any addend into + // account. *TargetPtr = ((*TargetPtr) & 0xffff0000) | (((Value + 0x8000) >> 16) & 0xffff); break; - case ELF::R_MIPS_LO16: - Value += ((*TargetPtr) & 0x0000ffff); + case ELF::R_MIPS_UNUSED2: *TargetPtr = ((*TargetPtr) & 0xffff0000) | (Value & 0xffff); break; } @@ -499,9 +587,13 @@ void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj, error_code err; for (section_iterator si = Obj.begin_sections(), se = Obj.end_sections(); si != se; si.increment(err)) { - StringRef SectionName; - check(si->getName(SectionName)); - if (SectionName != ".opd") + section_iterator RelSecI = si->getRelocatedSection(); + if (RelSecI == Obj.end_sections()) + continue; + + StringRef RelSectionName; + check(RelSecI->getName(RelSectionName)); + if (RelSectionName != ".opd") continue; for (relocation_iterator i = si->begin_relocations(), @@ -517,12 +609,11 @@ void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj, continue; } - SymbolRef TargetSymbol; uint64_t TargetSymbolOffset; - int64_t TargetAdditionalInfo; - check(i->getSymbol(TargetSymbol)); + symbol_iterator TargetSymbol = i->getSymbol(); check(i->getOffset(TargetSymbolOffset)); - check(i->getAdditionalInfo(TargetAdditionalInfo)); + int64_t Addend; + check(getELFRelocationAddend(*i, Addend)); i = i.increment(err); if (i == e) @@ -538,13 +629,13 @@ void RuntimeDyldELF::findOPDEntrySection(ObjectImage &Obj, // Finally compares the Symbol value and the target symbol offset // to check if this .opd entry refers to the symbol the relocation // points to. - if (Rel.Addend != (intptr_t)TargetSymbolOffset) + if (Rel.Addend != (int64_t)TargetSymbolOffset) continue; section_iterator tsi(Obj.end_sections()); - check(TargetSymbol.getSection(tsi)); + check(TargetSymbol->getSection(tsi)); Rel.SectionID = findOrEmitSection(Obj, (*tsi), true, LocalSections); - Rel.Addend = (intptr_t)TargetAdditionalInfo; + Rel.Addend = (intptr_t)Addend; return; } } @@ -688,20 +779,42 @@ void RuntimeDyldELF::resolveSystemZRelocation(const SectionEntry &Section, } } +// The target location for the relocation is described by RE.SectionID and +// RE.Offset. RE.SectionID can be used to find the SectionEntry. Each +// SectionEntry has three members describing its location. +// SectionEntry::Address is the address at which the section has been loaded +// into memory in the current (host) process. SectionEntry::LoadAddress is the +// address that the section will have in the target process. +// SectionEntry::ObjAddress is the address of the bits for this section in the +// original emitted object image (also in the current address space). +// +// Relocations will be applied as if the section were loaded at +// SectionEntry::LoadAddress, but they will be applied at an address based +// on SectionEntry::Address. SectionEntry::ObjAddress will be used to refer to +// Target memory contents if they are required for value calculations. +// +// The Value parameter here is the load address of the symbol for the +// relocation to be applied. For relocations which refer to symbols in the +// current object Value will be the LoadAddress of the section in which +// the symbol resides (RE.Addend provides additional information about the +// symbol location). For external symbols, Value will be the address of the +// symbol in the target address space. void RuntimeDyldELF::resolveRelocation(const RelocationEntry &RE, - uint64_t Value) { + uint64_t Value) { const SectionEntry &Section = Sections[RE.SectionID]; - return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend); + return resolveRelocation(Section, RE.Offset, Value, RE.RelType, RE.Addend, + RE.SymOffset); } void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, - int64_t Addend) { + int64_t Addend, + uint64_t SymOffset) { switch (Arch) { case Triple::x86_64: - resolveX86_64Relocation(Section, Offset, Value, Type, Addend); + resolveX86_64Relocation(Section, Offset, Value, Type, Addend, SymOffset); break; case Triple::x86: resolveX86Relocation(Section, Offset, @@ -723,7 +836,8 @@ void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, (uint32_t)(Value & 0xffffffffL), Type, (uint32_t)(Addend & 0xffffffffL)); break; - case Triple::ppc64: + case Triple::ppc64: // Fall through. + case Triple::ppc64le: resolvePPC64Relocation(Section, Offset, Value, Type, Addend); break; case Triple::systemz: @@ -742,31 +856,37 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID, uint64_t RelType; Check(RelI.getType(RelType)); int64_t Addend; - Check(RelI.getAdditionalInfo(Addend)); - SymbolRef Symbol; - Check(RelI.getSymbol(Symbol)); + Check(getELFRelocationAddend(RelI, Addend)); + symbol_iterator Symbol = RelI.getSymbol(); // Obtain the symbol name which is referenced in the relocation StringRef TargetName; - Symbol.getName(TargetName); + if (Symbol != Obj.end_symbols()) + Symbol->getName(TargetName); DEBUG(dbgs() << "\t\tRelType: " << RelType << " Addend: " << Addend << " TargetName: " << TargetName << "\n"); RelocationValueRef Value; // First search for the symbol in the local symbol table - SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data()); - SymbolRef::Type SymType; - Symbol.getType(SymType); + SymbolTableMap::const_iterator lsi = Symbols.end(); + SymbolRef::Type SymType = SymbolRef::ST_Unknown; + if (Symbol != Obj.end_symbols()) { + lsi = Symbols.find(TargetName.data()); + Symbol->getType(SymType); + } if (lsi != Symbols.end()) { Value.SectionID = lsi->second.first; + Value.Offset = lsi->second.second; Value.Addend = lsi->second.second + Addend; } else { // Search for the symbol in the global symbol table - SymbolTableMap::const_iterator gsi = - GlobalSymbolTable.find(TargetName.data()); + SymbolTableMap::const_iterator gsi = GlobalSymbolTable.end(); + if (Symbol != Obj.end_symbols()) + gsi = GlobalSymbolTable.find(TargetName.data()); if (gsi != GlobalSymbolTable.end()) { Value.SectionID = gsi->second.first; + Value.Offset = gsi->second.second; Value.Addend = gsi->second.second + Addend; } else { switch (SymType) { @@ -775,7 +895,7 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID, // and can be changed by another developers. Maybe best way is add // a new symbol type ST_Section to SymbolRef and use it. section_iterator si(Obj.end_sections()); - Symbol.getSection(si); + Symbol->getSection(si); if (si == Obj.end_sections()) llvm_unreachable("Symbol section not found, bad object file format!"); DEBUG(dbgs() << "\t\tThis is section symbol\n"); @@ -789,9 +909,17 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID, Value.Addend = Addend; break; } + case SymbolRef::ST_Data: case SymbolRef::ST_Unknown: { Value.SymbolName = TargetName.data(); Value.Addend = Addend; + + // Absolute relocations will have a zero symbol ID (STN_UNDEF), which + // will manifest here as a NULL symbol name. + // We can set this as a valid (but empty) symbol name, and rely + // on addRelocationForSymbol to handle this. + if (!Value.SymbolName) + Value.SymbolName = ""; break; } default: @@ -876,7 +1004,7 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID, uint8_t *StubTargetAddr = createStubFunction(Section.Address + Section.StubOffset); RelocationEntry RE(SectionID, StubTargetAddr - Section.Address, - ELF::R_ARM_ABS32, Value.Addend); + ELF::R_ARM_PRIVATE_0, Value.Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else @@ -903,8 +1031,8 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID, // Look up for existing stub. StubMap::const_iterator i = Stubs.find(Value); if (i != Stubs.end()) { - resolveRelocation(Section, Offset, - (uint64_t)Section.Address + i->second, RelType, 0); + RelocationEntry RE(SectionID, Offset, RelType, i->second); + addRelocationForSection(RE, SectionID); DEBUG(dbgs() << " Stub function found\n"); } else { // Create a new stub function. @@ -916,10 +1044,10 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID, // Creating Hi and Lo relocations for the filled stub instructions. RelocationEntry REHi(SectionID, StubTargetAddr - Section.Address, - ELF::R_MIPS_HI16, Value.Addend); + ELF::R_MIPS_UNUSED1, Value.Addend); RelocationEntry RELo(SectionID, StubTargetAddr - Section.Address + 4, - ELF::R_MIPS_LO16, Value.Addend); + ELF::R_MIPS_UNUSED2, Value.Addend); if (Value.SymbolName) { addRelocationForSymbol(REHi, Value.SymbolName); @@ -929,12 +1057,11 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID, addRelocationForSection(RELo, Value.SectionID); } - resolveRelocation(Section, Offset, - (uint64_t)Section.Address + Section.StubOffset, - RelType, 0); + RelocationEntry RE(SectionID, Offset, RelType, Section.StubOffset); + addRelocationForSection(RE, SectionID); Section.StubOffset += getMaxStubSize(); } - } else if (Arch == Triple::ppc64) { + } else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) { if (RelType == ELF::R_PPC64_REL24) { // A PPC branch relocation will need a stub function if the target is // an external symbol (Symbol::ST_Unknown) or if the target address @@ -1017,7 +1144,10 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID, RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); // Extra check to avoid relocation againt empty symbols (usually // the R_PPC64_TOC). - if (Value.SymbolName && !TargetName.empty()) + if (SymType != SymbolRef::ST_Unknown && TargetName.empty()) + Value.SymbolName = NULL; + + if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else addRelocationForSection(RE, Value.SectionID); @@ -1069,8 +1199,67 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID, ELF::R_390_PC32DBL, Addend); else resolveRelocation(Section, Offset, StubAddress, RelType, Addend); + } else if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_PLT32) { + // The way the PLT relocations normally work is that the linker allocates the + // PLT and this relocation makes a PC-relative call into the PLT. The PLT + // entry will then jump to an address provided by the GOT. On first call, the + // GOT address will point back into PLT code that resolves the symbol. After + // the first call, the GOT entry points to the actual function. + // + // For local functions we're ignoring all of that here and just replacing + // the PLT32 relocation type with PC32, which will translate the relocation + // into a PC-relative call directly to the function. For external symbols we + // can't be sure the function will be within 2^32 bytes of the call site, so + // we need to create a stub, which calls into the GOT. This case is + // equivalent to the usual PLT implementation except that we use the stub + // mechanism in RuntimeDyld (which puts stubs at the end of the section) + // rather than allocating a PLT section. + if (Value.SymbolName) { + // This is a call to an external function. + // Look for an existing stub. + SectionEntry &Section = Sections[SectionID]; + StubMap::const_iterator i = Stubs.find(Value); + uintptr_t StubAddress; + if (i != Stubs.end()) { + StubAddress = uintptr_t(Section.Address) + i->second; + DEBUG(dbgs() << " Stub function found\n"); + } else { + // Create a new stub function (equivalent to a PLT entry). + DEBUG(dbgs() << " Create a new stub function\n"); + + uintptr_t BaseAddress = uintptr_t(Section.Address); + uintptr_t StubAlignment = getStubAlignment(); + StubAddress = (BaseAddress + Section.StubOffset + + StubAlignment - 1) & -StubAlignment; + unsigned StubOffset = StubAddress - BaseAddress; + Stubs[Value] = StubOffset; + createStubFunction((uint8_t *)StubAddress); + + // Create a GOT entry for the external function. + GOTEntries.push_back(Value); + + // Make our stub function a relative call to the GOT entry. + RelocationEntry RE(SectionID, StubOffset + 2, + ELF::R_X86_64_GOTPCREL, -4); + addRelocationForSymbol(RE, Value.SymbolName); + + // Bump our stub offset counter + Section.StubOffset = StubOffset + getMaxStubSize(); + } + + // Make the target call a call into the stub table. + resolveRelocation(Section, Offset, StubAddress, + ELF::R_X86_64_PC32, Addend); + } else { + RelocationEntry RE(SectionID, Offset, ELF::R_X86_64_PC32, Value.Addend, + Value.Offset); + addRelocationForSection(RE, Value.SectionID); + } } else { - RelocationEntry RE(SectionID, Offset, RelType, Value.Addend); + if (Arch == Triple::x86_64 && RelType == ELF::R_X86_64_GOTPCREL) { + GOTEntries.push_back(Value); + } + RelocationEntry RE(SectionID, Offset, RelType, Value.Addend, Value.Offset); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else @@ -1078,6 +1267,137 @@ void RuntimeDyldELF::processRelocationRef(unsigned SectionID, } } +void RuntimeDyldELF::updateGOTEntries(StringRef Name, uint64_t Addr) { + + SmallVectorImpl >::iterator it; + SmallVectorImpl >::iterator end = GOTs.end(); + + for (it = GOTs.begin(); it != end; ++it) { + GOTRelocations &GOTEntries = it->second; + for (int i = 0, e = GOTEntries.size(); i != e; ++i) { + if (GOTEntries[i].SymbolName != 0 && GOTEntries[i].SymbolName == Name) { + GOTEntries[i].Offset = Addr; + } + } + } +} + +size_t RuntimeDyldELF::getGOTEntrySize() { + // We don't use the GOT in all of these cases, but it's essentially free + // to put them all here. + size_t Result = 0; + switch (Arch) { + case Triple::x86_64: + case Triple::aarch64: + case Triple::ppc64: + case Triple::ppc64le: + case Triple::systemz: + Result = sizeof(uint64_t); + break; + case Triple::x86: + case Triple::arm: + case Triple::thumb: + case Triple::mips: + case Triple::mipsel: + Result = sizeof(uint32_t); + break; + default: llvm_unreachable("Unsupported CPU type!"); + } + return Result; +} + +uint64_t RuntimeDyldELF::findGOTEntry(uint64_t LoadAddress, + uint64_t Offset) { + + const size_t GOTEntrySize = getGOTEntrySize(); + + SmallVectorImpl >::const_iterator it; + SmallVectorImpl >::const_iterator end = GOTs.end(); + + int GOTIndex = -1; + for (it = GOTs.begin(); it != end; ++it) { + SID GOTSectionID = it->first; + const GOTRelocations &GOTEntries = it->second; + + // Find the matching entry in our vector. + uint64_t SymbolOffset = 0; + for (int i = 0, e = GOTEntries.size(); i != e; ++i) { + if (GOTEntries[i].SymbolName == 0) { + if (getSectionLoadAddress(GOTEntries[i].SectionID) == LoadAddress && + GOTEntries[i].Offset == Offset) { + GOTIndex = i; + SymbolOffset = GOTEntries[i].Offset; + break; + } + } else { + // GOT entries for external symbols use the addend as the address when + // the external symbol has been resolved. + if (GOTEntries[i].Offset == LoadAddress) { + GOTIndex = i; + // Don't use the Addend here. The relocation handler will use it. + break; + } + } + } + + if (GOTIndex != -1) { + if (GOTEntrySize == sizeof(uint64_t)) { + uint64_t *LocalGOTAddr = (uint64_t*)getSectionAddress(GOTSectionID); + // Fill in this entry with the address of the symbol being referenced. + LocalGOTAddr[GOTIndex] = LoadAddress + SymbolOffset; + } else { + uint32_t *LocalGOTAddr = (uint32_t*)getSectionAddress(GOTSectionID); + // Fill in this entry with the address of the symbol being referenced. + LocalGOTAddr[GOTIndex] = (uint32_t)(LoadAddress + SymbolOffset); + } + + // Calculate the load address of this entry + return getSectionLoadAddress(GOTSectionID) + (GOTIndex * GOTEntrySize); + } + } + + assert(GOTIndex != -1 && "Unable to find requested GOT entry."); + return 0; +} + +void RuntimeDyldELF::finalizeLoad(ObjSectionToIDMap &SectionMap) { + // If necessary, allocate the global offset table + if (MemMgr) { + // Allocate the GOT if necessary + size_t numGOTEntries = GOTEntries.size(); + if (numGOTEntries != 0) { + // Allocate memory for the section + unsigned SectionID = Sections.size(); + size_t TotalSize = numGOTEntries * getGOTEntrySize(); + uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, getGOTEntrySize(), + SectionID, ".got", false); + if (!Addr) + report_fatal_error("Unable to allocate memory for GOT!"); + + GOTs.push_back(std::make_pair(SectionID, GOTEntries)); + Sections.push_back(SectionEntry(".got", Addr, TotalSize, 0)); + // For now, initialize all GOT entries to zero. We'll fill them in as + // needed when GOT-based relocations are applied. + memset(Addr, 0, TotalSize); + } + } + else { + report_fatal_error("Unable to allocate memory for GOT!"); + } + + // Look for and record the EH frame section. + ObjSectionToIDMap::iterator i, e; + for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) { + const SectionRef &Section = i->first; + StringRef Name; + Section.getName(Name); + if (Name == ".eh_frame") { + UnregisteredEHFrameSections.push_back(i->second); + break; + } + } +} + bool RuntimeDyldELF::isCompatibleFormat(const ObjectBuffer *Buffer) const { if (Buffer->getBufferSize() < strlen(ELF::ElfMagic)) return false; diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h index 794c7ec..3adf827 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.h @@ -15,6 +15,7 @@ #define LLVM_RUNTIME_DYLD_ELF_H #include "RuntimeDyldImpl.h" +#include "llvm/ADT/DenseMap.h" using namespace llvm; @@ -35,13 +36,15 @@ class RuntimeDyldELF : public RuntimeDyldImpl { uint64_t Offset, uint64_t Value, uint32_t Type, - int64_t Addend); + int64_t Addend, + uint64_t SymOffset=0); void resolveX86_64Relocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, uint32_t Type, - int64_t Addend); + int64_t Addend, + uint64_t SymOffset); void resolveX86Relocation(const SectionEntry &Section, uint64_t Offset, @@ -79,13 +82,55 @@ class RuntimeDyldELF : public RuntimeDyldImpl { uint32_t Type, int64_t Addend); + unsigned getMaxStubSize() { + if (Arch == Triple::aarch64) + return 20; // movz; movk; movk; movk; br + if (Arch == Triple::arm || Arch == Triple::thumb) + return 8; // 32-bit instruction and 32-bit address + else if (Arch == Triple::mipsel || Arch == Triple::mips) + return 16; + else if (Arch == Triple::ppc64 || Arch == Triple::ppc64le) + return 44; + else if (Arch == Triple::x86_64) + return 6; // 2-byte jmp instruction + 32-bit relative address + else if (Arch == Triple::systemz) + return 16; + else + return 0; + } + + unsigned getStubAlignment() { + if (Arch == Triple::systemz) + return 8; + else + return 1; + } + uint64_t findPPC64TOC() const; void findOPDEntrySection(ObjectImage &Obj, ObjSectionToIDMap &LocalSections, RelocationValueRef &Rel); + uint64_t findGOTEntry(uint64_t LoadAddr, uint64_t Offset); + size_t getGOTEntrySize(); + + virtual void updateGOTEntries(StringRef Name, uint64_t Addr); + + // Relocation entries for symbols whose position-independant offset is + // updated in a global offset table. + typedef SmallVector GOTRelocations; + GOTRelocations GOTEntries; // List of entries requiring finalization. + SmallVector, 8> GOTs; // Allocated tables. + + // When a module is loaded we save the SectionID of the EH frame section + // in a table until we receive a request to register all unregistered + // EH frame sections with the memory manager. + SmallVector UnregisteredEHFrameSections; + SmallVector RegisteredEHFrameSections; + public: - RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} + RuntimeDyldELF(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) + {} virtual void resolveRelocation(const RelocationEntry &RE, uint64_t Value); virtual void processRelocationRef(unsigned SectionID, @@ -96,7 +141,9 @@ public: StubMap &Stubs); virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const; virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer); - virtual StringRef getEHFrameSection(); + virtual void registerEHFrames(); + virtual void deregisterEHFrames(); + virtual void finalizeLoad(ObjSectionToIDMap &SectionMap); virtual ~RuntimeDyldELF(); }; diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 383ffab..3014b30 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -25,6 +25,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" +#include "llvm/Support/Mutex.h" #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" @@ -80,14 +81,18 @@ public: unsigned SectionID; /// Offset - offset into the section. - uintptr_t Offset; + uint64_t Offset; /// RelType - relocation type. uint32_t RelType; /// Addend - the relocation addend encoded in the instruction itself. Also /// used to make a relocation section relative instead of symbol relative. - intptr_t Addend; + int64_t Addend; + + /// SymOffset - Section offset of the relocation entry's symbol (used for GOT + /// lookup). + uint64_t SymOffset; /// True if this is a PCRel relocation (MachO specific). bool IsPCRel; @@ -97,26 +102,39 @@ public: RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend) : SectionID(id), Offset(offset), RelType(type), Addend(addend), - IsPCRel(false), Size(0) {} + SymOffset(0), IsPCRel(false), Size(0) {} + + RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend, + uint64_t symoffset) + : SectionID(id), Offset(offset), RelType(type), Addend(addend), + SymOffset(symoffset), IsPCRel(false), Size(0) {} RelocationEntry(unsigned id, uint64_t offset, uint32_t type, int64_t addend, bool IsPCRel, unsigned Size) : SectionID(id), Offset(offset), RelType(type), Addend(addend), - IsPCRel(IsPCRel), Size(Size) {} + SymOffset(0), IsPCRel(IsPCRel), Size(Size) {} }; class RelocationValueRef { public: unsigned SectionID; - intptr_t Addend; + uint64_t Offset; + int64_t Addend; const char *SymbolName; - RelocationValueRef(): SectionID(0), Addend(0), SymbolName(0) {} + RelocationValueRef(): SectionID(0), Offset(0), Addend(0), SymbolName(0) {} inline bool operator==(const RelocationValueRef &Other) const { - return std::memcmp(this, &Other, sizeof(RelocationValueRef)) == 0; + return SectionID == Other.SectionID && Offset == Other.Offset && + Addend == Other.Addend && SymbolName == Other.SymbolName; } inline bool operator <(const RelocationValueRef &Other) const { - return std::memcmp(this, &Other, sizeof(RelocationValueRef)) < 0; + if (SectionID != Other.SectionID) + return SectionID < Other.SectionID; + if (Offset != Other.Offset) + return Offset < Other.Offset; + if (Addend != Other.Addend) + return Addend < Other.Addend; + return SymbolName < Other.SymbolName; } }; @@ -130,6 +148,9 @@ protected: typedef SmallVector SectionList; SectionList Sections; + typedef unsigned SID; // Type for SectionIDs + #define RTDYLD_INVALID_SECTION_ID ((SID)(-1)) + // Keep a map of sections from object file to the SectionID which // references it. typedef std::map ObjSectionToIDMap; @@ -164,30 +185,22 @@ protected: typedef std::map StubMap; Triple::ArchType Arch; - - inline unsigned getMaxStubSize() { - if (Arch == Triple::aarch64) - return 20; // movz; movk; movk; movk; br - if (Arch == Triple::arm || Arch == Triple::thumb) - return 8; // 32-bit instruction and 32-bit address - else if (Arch == Triple::mipsel || Arch == Triple::mips) - return 16; - else if (Arch == Triple::ppc64) - return 44; - else if (Arch == Triple::x86_64) - return 8; // GOT - else if (Arch == Triple::systemz) - return 16; - else - return 0; - } - - inline unsigned getStubAlignment() { - if (Arch == Triple::systemz) - return 8; - else - return 1; - } + bool IsTargetLittleEndian; + + // This mutex prevents simultaneously loading objects from two different + // threads. This keeps us from having to protect individual data structures + // and guarantees that section allocation requests to the memory manager + // won't be interleaved between modules. It is also used in mapSectionAddress + // and resolveRelocations to protect write access to internal data structures. + // + // loadObject may be called on the same thread during the handling of of + // processRelocations, and that's OK. The handling of the relocation lists + // is written in such a way as to work correctly if new elements are added to + // the end of the list while the list is being processed. + sys::Mutex lock; + + virtual unsigned getMaxStubSize() = 0; + virtual unsigned getStubAlignment() = 0; bool HasError; std::string ErrorStr; @@ -208,14 +221,14 @@ protected: } void writeInt16BE(uint8_t *Addr, uint16_t Value) { - if (sys::IsLittleEndianHost) + if (IsTargetLittleEndian) Value = sys::SwapByteOrder(Value); *Addr = (Value >> 8) & 0xFF; *(Addr+1) = Value & 0xFF; } void writeInt32BE(uint8_t *Addr, uint32_t Value) { - if (sys::IsLittleEndianHost) + if (IsTargetLittleEndian) Value = sys::SwapByteOrder(Value); *Addr = (Value >> 24) & 0xFF; *(Addr+1) = (Value >> 16) & 0xFF; @@ -224,7 +237,7 @@ protected: } void writeInt64BE(uint8_t *Addr, uint64_t Value) { - if (sys::IsLittleEndianHost) + if (IsTargetLittleEndian) Value = sys::SwapByteOrder(Value); *Addr = (Value >> 56) & 0xFF; *(Addr+1) = (Value >> 48) & 0xFF; @@ -292,6 +305,11 @@ protected: /// \brief Resolve relocations to external symbols. void resolveExternalSymbols(); + + /// \brief Update GOT entries for external symbols. + // The base class does nothing. ELF overrides this. + virtual void updateGOTEntries(StringRef Name, uint64_t Addr) {} + virtual ObjectImage *createObjectImage(ObjectBuffer *InputBuffer); public: RuntimeDyldImpl(RTDyldMemoryManager *mm) : MemMgr(mm), HasError(false) {} @@ -303,18 +321,20 @@ public: void *getSymbolAddress(StringRef Name) { // FIXME: Just look up as a function for now. Overly simple of course. // Work in progress. - if (GlobalSymbolTable.find(Name) == GlobalSymbolTable.end()) + SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name); + if (pos == GlobalSymbolTable.end()) return 0; - SymbolLoc Loc = GlobalSymbolTable.lookup(Name); + SymbolLoc Loc = pos->second; return getSectionAddress(Loc.first) + Loc.second; } uint64_t getSymbolLoadAddress(StringRef Name) { // FIXME: Just look up as a function for now. Overly simple of course. // Work in progress. - if (GlobalSymbolTable.find(Name) == GlobalSymbolTable.end()) + SymbolTableMap::const_iterator pos = GlobalSymbolTable.find(Name); + if (pos == GlobalSymbolTable.end()) return 0; - SymbolLoc Loc = GlobalSymbolTable.lookup(Name); + SymbolLoc Loc = pos->second; return getSectionLoadAddress(Loc.first) + Loc.second; } @@ -335,7 +355,11 @@ public: virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const = 0; - virtual StringRef getEHFrameSection(); + virtual void registerEHFrames(); + + virtual void deregisterEHFrames(); + + virtual void finalizeLoad(ObjSectionToIDMap &SectionMap) {} }; } // end namespace llvm diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 01a3fd9..5b92867 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -55,35 +55,80 @@ static intptr_t computeDelta(SectionEntry *A, SectionEntry *B) { return ObjDistance - MemDistance; } -StringRef RuntimeDyldMachO::getEHFrameSection() { - SectionEntry *Text = NULL; - SectionEntry *EHFrame = NULL; - SectionEntry *ExceptTab = NULL; - for (int i = 0, e = Sections.size(); i != e; ++i) { - if (Sections[i].Name == "__eh_frame") - EHFrame = &Sections[i]; - else if (Sections[i].Name == "__text") - Text = &Sections[i]; - else if (Sections[i].Name == "__gcc_except_tab") - ExceptTab = &Sections[i]; +void RuntimeDyldMachO::registerEHFrames() { + + if (!MemMgr) + return; + for (int i = 0, e = UnregisteredEHFrameSections.size(); i != e; ++i) { + EHFrameRelatedSections &SectionInfo = UnregisteredEHFrameSections[i]; + if (SectionInfo.EHFrameSID == RTDYLD_INVALID_SECTION_ID || + SectionInfo.TextSID == RTDYLD_INVALID_SECTION_ID) + continue; + SectionEntry *Text = &Sections[SectionInfo.TextSID]; + SectionEntry *EHFrame = &Sections[SectionInfo.EHFrameSID]; + SectionEntry *ExceptTab = NULL; + if (SectionInfo.ExceptTabSID != RTDYLD_INVALID_SECTION_ID) + ExceptTab = &Sections[SectionInfo.ExceptTabSID]; + + intptr_t DeltaForText = computeDelta(Text, EHFrame); + intptr_t DeltaForEH = 0; + if (ExceptTab) + DeltaForEH = computeDelta(ExceptTab, EHFrame); + + unsigned char *P = EHFrame->Address; + unsigned char *End = P + EHFrame->Size; + do { + P = processFDE(P, DeltaForText, DeltaForEH); + } while(P != End); + + MemMgr->registerEHFrames(EHFrame->Address, + EHFrame->LoadAddress, + EHFrame->Size); } - if (Text == NULL || EHFrame == NULL) - return StringRef(); - - intptr_t DeltaForText = computeDelta(Text, EHFrame); - intptr_t DeltaForEH = 0; - if (ExceptTab) - DeltaForEH = computeDelta(ExceptTab, EHFrame); - - unsigned char *P = EHFrame->Address; - unsigned char *End = P + EHFrame->Size; - do { - P = processFDE(P, DeltaForText, DeltaForEH); - } while(P != End); + UnregisteredEHFrameSections.clear(); +} - return StringRef((char*)EHFrame->Address, EHFrame->Size); +void RuntimeDyldMachO::finalizeLoad(ObjSectionToIDMap &SectionMap) { + unsigned EHFrameSID = RTDYLD_INVALID_SECTION_ID; + unsigned TextSID = RTDYLD_INVALID_SECTION_ID; + unsigned ExceptTabSID = RTDYLD_INVALID_SECTION_ID; + ObjSectionToIDMap::iterator i, e; + for (i = SectionMap.begin(), e = SectionMap.end(); i != e; ++i) { + const SectionRef &Section = i->first; + StringRef Name; + Section.getName(Name); + if (Name == "__eh_frame") + EHFrameSID = i->second; + else if (Name == "__text") + TextSID = i->second; + else if (Name == "__gcc_except_tab") + ExceptTabSID = i->second; + } + UnregisteredEHFrameSections.push_back(EHFrameRelatedSections(EHFrameSID, + TextSID, + ExceptTabSID)); } +// The target location for the relocation is described by RE.SectionID and +// RE.Offset. RE.SectionID can be used to find the SectionEntry. Each +// SectionEntry has three members describing its location. +// SectionEntry::Address is the address at which the section has been loaded +// into memory in the current (host) process. SectionEntry::LoadAddress is the +// address that the section will have in the target process. +// SectionEntry::ObjAddress is the address of the bits for this section in the +// original emitted object image (also in the current address space). +// +// Relocations will be applied as if the section were loaded at +// SectionEntry::LoadAddress, but they will be applied at an address based +// on SectionEntry::Address. SectionEntry::ObjAddress will be used to refer to +// Target memory contents if they are required for value calculations. +// +// The Value parameter here is the load address of the symbol for the +// relocation to be applied. For relocations which refer to symbols in the +// current object Value will be the LoadAddress of the section in which +// the symbol resides (RE.Addend provides additional information about the +// symbol location). For external symbols, Value will be the address of the +// symbol in the target address space. void RuntimeDyldMachO::resolveRelocation(const RelocationEntry &RE, uint64_t Value) { const SectionEntry &Section = Sections[RE.SectionID]; @@ -160,7 +205,7 @@ bool RuntimeDyldMachO::resolveI386Relocation(uint8_t *LocalAddress, switch (Type) { default: llvm_unreachable("Invalid relocation type!"); - case macho::RIT_Vanilla: { + case MachO::GENERIC_RELOC_VANILLA: { uint8_t *p = LocalAddress; uint64_t ValueToWrite = Value + Addend; for (unsigned i = 0; i < Size; ++i) { @@ -169,9 +214,9 @@ bool RuntimeDyldMachO::resolveI386Relocation(uint8_t *LocalAddress, } return false; } - case macho::RIT_Difference: - case macho::RIT_Generic_LocalDifference: - case macho::RIT_Generic_PreboundLazyPointer: + case MachO::GENERIC_RELOC_SECTDIFF: + case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: + case MachO::GENERIC_RELOC_PB_LA_PTR: return Error("Relocation type not implemented yet!"); } } @@ -193,12 +238,12 @@ bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress, switch(Type) { default: llvm_unreachable("Invalid relocation type!"); - case macho::RIT_X86_64_Signed1: - case macho::RIT_X86_64_Signed2: - case macho::RIT_X86_64_Signed4: - case macho::RIT_X86_64_Signed: - case macho::RIT_X86_64_Unsigned: - case macho::RIT_X86_64_Branch: { + case MachO::X86_64_RELOC_SIGNED_1: + case MachO::X86_64_RELOC_SIGNED_2: + case MachO::X86_64_RELOC_SIGNED_4: + case MachO::X86_64_RELOC_SIGNED: + case MachO::X86_64_RELOC_UNSIGNED: + case MachO::X86_64_RELOC_BRANCH: { Value += Addend; // Mask in the target value a byte at a time (we don't have an alignment // guarantee for the target address, so this is safest). @@ -209,10 +254,10 @@ bool RuntimeDyldMachO::resolveX86_64Relocation(uint8_t *LocalAddress, } return false; } - case macho::RIT_X86_64_GOTLoad: - case macho::RIT_X86_64_GOT: - case macho::RIT_X86_64_Subtractor: - case macho::RIT_X86_64_TLV: + case MachO::X86_64_RELOC_GOT_LOAD: + case MachO::X86_64_RELOC_GOT: + case MachO::X86_64_RELOC_SUBTRACTOR: + case MachO::X86_64_RELOC_TLV: return Error("Relocation type not implemented yet!"); } } @@ -237,7 +282,7 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress, switch(Type) { default: llvm_unreachable("Invalid relocation type!"); - case macho::RIT_Vanilla: { + case MachO::ARM_RELOC_VANILLA: { // Mask in the target value a byte at a time (we don't have an alignment // guarantee for the target address, so this is safest). uint8_t *p = (uint8_t*)LocalAddress; @@ -247,7 +292,7 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress, } break; } - case macho::RIT_ARM_Branch24Bit: { + case MachO::ARM_RELOC_BR24: { // Mask the value into the target address. We know instructions are // 32-bit aligned, so we can do it all at once. uint32_t *p = (uint32_t*)LocalAddress; @@ -263,14 +308,14 @@ bool RuntimeDyldMachO::resolveARMRelocation(uint8_t *LocalAddress, *p = (*p & ~0xffffff) | Value; break; } - case macho::RIT_ARM_ThumbBranch22Bit: - case macho::RIT_ARM_ThumbBranch32Bit: - case macho::RIT_ARM_Half: - case macho::RIT_ARM_HalfDifference: - case macho::RIT_Pair: - case macho::RIT_Difference: - case macho::RIT_ARM_LocalDifference: - case macho::RIT_ARM_PreboundLazyPointer: + case MachO::ARM_THUMB_RELOC_BR22: + case MachO::ARM_THUMB_32BIT_BRANCH: + case MachO::ARM_RELOC_HALF: + case MachO::ARM_RELOC_HALF_SECTDIFF: + case MachO::ARM_RELOC_PAIR: + case MachO::ARM_RELOC_SECTDIFF: + case MachO::ARM_RELOC_LOCAL_SECTDIFF: + case MachO::ARM_RELOC_PB_LA_PTR: return Error("Relocation type not implemented yet!"); } return false; @@ -284,9 +329,19 @@ void RuntimeDyldMachO::processRelocationRef(unsigned SectionID, StubMap &Stubs) { const ObjectFile *OF = Obj.getObjectFile(); const MachOObjectFile *MachO = static_cast(OF); - macho::RelocationEntry RE = MachO->getRelocation(RelI.getRawDataRefImpl()); + MachO::any_relocation_info RE= MachO->getRelocation(RelI.getRawDataRefImpl()); uint32_t RelType = MachO->getAnyRelocationType(RE); + + // FIXME: Properly handle scattered relocations. + // For now, optimistically skip these: they can often be ignored, as + // the static linker will already have applied the relocation, and it + // only needs to be reapplied if symbols move relative to one another. + // Note: This will fail horribly where the relocations *do* need to be + // applied, but that was already the case. + if (MachO->isRelocationScattered(RE)) + return; + RelocationValueRef Value; SectionEntry &Section = Sections[SectionID]; @@ -302,10 +357,9 @@ void RuntimeDyldMachO::processRelocationRef(unsigned SectionID, if (isExtern) { // Obtain the symbol name which is referenced in the relocation - SymbolRef Symbol; - RelI.getSymbol(Symbol); + symbol_iterator Symbol = RelI.getSymbol(); StringRef TargetName; - Symbol.getName(TargetName); + Symbol->getName(TargetName); // First search for the symbol in the local symbol table SymbolTableMap::const_iterator lsi = Symbols.find(TargetName.data()); if (lsi != Symbols.end()) { @@ -330,7 +384,8 @@ void RuntimeDyldMachO::processRelocationRef(unsigned SectionID, Value.Addend = Addend - Addr; } - if (Arch == Triple::x86_64 && RelType == macho::RIT_X86_64_GOT) { + if (Arch == Triple::x86_64 && (RelType == MachO::X86_64_RELOC_GOT || + RelType == MachO::X86_64_RELOC_GOT_LOAD)) { assert(IsPCRel); assert(Size == 2); StubMap::const_iterator i = Stubs.find(Value); @@ -341,8 +396,7 @@ void RuntimeDyldMachO::processRelocationRef(unsigned SectionID, Stubs[Value] = Section.StubOffset; uint8_t *GOTEntry = Section.Address + Section.StubOffset; RelocationEntry RE(SectionID, Section.StubOffset, - macho::RIT_X86_64_Unsigned, Value.Addend - 4, false, - 3); + MachO::X86_64_RELOC_UNSIGNED, 0, false, 3); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else @@ -351,9 +405,9 @@ void RuntimeDyldMachO::processRelocationRef(unsigned SectionID, Addr = GOTEntry; } resolveRelocation(Section, Offset, (uint64_t)Addr, - macho::RIT_X86_64_Unsigned, 4, true, 2); + MachO::X86_64_RELOC_UNSIGNED, Value.Addend, true, 2); } else if (Arch == Triple::arm && - (RelType & 0xf) == macho::RIT_ARM_Branch24Bit) { + (RelType & 0xf) == MachO::ARM_RELOC_BR24) { // This is an ARM branch relocation, need to use a stub function. // Look up for existing stub. @@ -368,7 +422,7 @@ void RuntimeDyldMachO::processRelocationRef(unsigned SectionID, uint8_t *StubTargetAddr = createStubFunction(Section.Address + Section.StubOffset); RelocationEntry RE(SectionID, StubTargetAddr - Section.Address, - macho::RIT_Vanilla, Value.Addend); + MachO::GENERIC_RELOC_VANILLA, Value.Addend); if (Value.SymbolName) addRelocationForSymbol(RE, Value.SymbolName); else diff --git a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index df8d3bb..bbf6aa9 100644 --- a/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/contrib/llvm/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -54,6 +54,35 @@ class RuntimeDyldMachO : public RuntimeDyldImpl { int64_t Addend, bool isPCRel, unsigned Size); + + unsigned getMaxStubSize() { + if (Arch == Triple::arm || Arch == Triple::thumb) + return 8; // 32-bit instruction and 32-bit address + else if (Arch == Triple::x86_64) + return 8; // GOT entry + else + return 0; + } + + unsigned getStubAlignment() { + return 1; + } + + struct EHFrameRelatedSections { + EHFrameRelatedSections() : EHFrameSID(RTDYLD_INVALID_SECTION_ID), + TextSID(RTDYLD_INVALID_SECTION_ID), + ExceptTabSID(RTDYLD_INVALID_SECTION_ID) {} + EHFrameRelatedSections(SID EH, SID T, SID Ex) + : EHFrameSID(EH), TextSID(T), ExceptTabSID(Ex) {} + SID EHFrameSID; + SID TextSID; + SID ExceptTabSID; + }; + + // When a module is loaded we save the SectionID of the EH frame section + // in a table until we receive a request to register all unregistered + // EH frame sections with the memory manager. + SmallVector UnregisteredEHFrameSections; public: RuntimeDyldMachO(RTDyldMemoryManager *mm) : RuntimeDyldImpl(mm) {} @@ -65,7 +94,8 @@ public: const SymbolTableMap &Symbols, StubMap &Stubs); virtual bool isCompatibleFormat(const ObjectBuffer *Buffer) const; - virtual StringRef getEHFrameSection(); + virtual void registerEHFrames(); + virtual void finalizeLoad(ObjSectionToIDMap &SectionMap); }; } // end namespace llvm diff --git a/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp index ca4330f..9b7d348 100644 --- a/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp +++ b/contrib/llvm/lib/ExecutionEngine/TargetSelect.cpp @@ -88,6 +88,14 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, FeaturesStr = Features.getString(); } + // FIXME: non-iOS ARM FastISel is broken with MCJIT. + if (UseMCJIT && + TheTriple.getArch() == Triple::arm && + !TheTriple.isiOS() && + OptLevel == CodeGenOpt::None) { + OptLevel = CodeGenOpt::Less; + } + // Allocate a target... TargetMachine *Target = TheTarget->createTargetMachine(TheTriple.getTriple(), MCPU, FeaturesStr, diff --git a/contrib/llvm/lib/IR/AsmWriter.cpp b/contrib/llvm/lib/IR/AsmWriter.cpp index a78b19c..7decffd 100644 --- a/contrib/llvm/lib/IR/AsmWriter.cpp +++ b/contrib/llvm/lib/IR/AsmWriter.cpp @@ -14,6 +14,8 @@ // //===----------------------------------------------------------------------===// +#include "AsmWriter.h" + #include "llvm/Assembly/Writer.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/STLExtras.h" @@ -38,6 +40,7 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/MathExtras.h" + #include #include using namespace llvm; @@ -71,6 +74,8 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { default: Out << "cc" << cc; break; case CallingConv::Fast: Out << "fastcc"; break; case CallingConv::Cold: Out << "coldcc"; break; + case CallingConv::WebKit_JS: Out << "webkit_jscc"; break; + case CallingConv::AnyReg: Out << "anyregcc"; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; @@ -155,35 +160,8 @@ static void PrintLLVMName(raw_ostream &OS, const Value *V) { isa(V) ? GlobalPrefix : LocalPrefix); } -//===----------------------------------------------------------------------===// -// TypePrinting Class: Type printing machinery -//===----------------------------------------------------------------------===// - -/// TypePrinting - Type printing machinery. -namespace { -class TypePrinting { - TypePrinting(const TypePrinting &) LLVM_DELETED_FUNCTION; - void operator=(const TypePrinting&) LLVM_DELETED_FUNCTION; -public: - - /// NamedTypes - The named types that are used by the current module. - TypeFinder NamedTypes; - - /// NumberedTypes - The numbered types, along with their value. - DenseMap NumberedTypes; - - - TypePrinting() {} - ~TypePrinting() {} - - void incorporateTypes(const Module &M); - - void print(Type *Ty, raw_ostream &OS); - - void printStructBody(StructType *Ty, raw_ostream &OS); -}; -} // end anonymous namespace. +namespace llvm { void TypePrinting::incorporateTypes(const Module &M) { NamedTypes.run(M, false); @@ -315,14 +293,9 @@ void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { OS << '>'; } - - //===----------------------------------------------------------------------===// // SlotTracker Class: Enumerate slot numbers for unnamed values //===----------------------------------------------------------------------===// - -namespace { - /// This class provides computation of slot numbers for LLVM Assembly writing. /// class SlotTracker { @@ -420,8 +393,9 @@ private: void operator=(const SlotTracker &) LLVM_DELETED_FUNCTION; }; -} // end anonymous namespace - +SlotTracker *createSlotTracker(const Module *M) { + return new SlotTracker(M); +} static SlotTracker *createSlotTracker(const Value *V) { if (const Argument *FA = dyn_cast(V)) @@ -1202,8 +1176,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, Out << ""; } -void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, - bool PrintType, const Module *Context) { +void WriteAsOperand(raw_ostream &Out, const Value *V, + bool PrintType, const Module *Context) { // Fast path: Don't construct and populate a TypePrinting object if we // won't be needing any types printed. @@ -1227,50 +1201,27 @@ void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, WriteAsOperandInternal(Out, V, &TypePrinter, 0, Context); } -namespace { - -class AssemblyWriter { - formatted_raw_ostream &Out; - SlotTracker &Machine; - const Module *TheModule; - TypePrinting TypePrinter; - AssemblyAnnotationWriter *AnnotationWriter; - -public: - inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, - const Module *M, - AssemblyAnnotationWriter *AAW) - : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) { - if (M) - TypePrinter.incorporateTypes(*M); - } - - void printMDNodeBody(const MDNode *MD); - void printNamedMDNode(const NamedMDNode *NMD); - - void printModule(const Module *M); +void AssemblyWriter::init() { + if (TheModule) + TypePrinter.incorporateTypes(*TheModule); +} - void writeOperand(const Value *Op, bool PrintType); - void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx); - void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); - void writeAllMDNodes(); - void writeAllAttributeGroups(); +AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, + const Module *M, + AssemblyAnnotationWriter *AAW) + : Out(o), TheModule(M), Machine(Mac), AnnotationWriter(AAW) { + init(); +} - void printTypeIdentities(); - void printGlobal(const GlobalVariable *GV); - void printAlias(const GlobalAlias *GV); - void printFunction(const Function *F); - void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx); - void printBasicBlock(const BasicBlock *BB); - void printInstruction(const Instruction &I); +AssemblyWriter::AssemblyWriter(formatted_raw_ostream &o, const Module *M, + AssemblyAnnotationWriter *AAW) + : Out(o), TheModule(M), ModuleSlotTracker(createSlotTracker(M)), + Machine(*ModuleSlotTracker), AnnotationWriter(AAW) { + init(); +} -private: - // printInfoComment - Print a little comment after the instruction indicating - // which slot it occupies. - void printInfoComment(const Value &V); -}; -} // end of anonymous namespace +AssemblyWriter::~AssemblyWriter() { } void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { if (Operand == 0) { @@ -1445,9 +1396,6 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT, case GlobalValue::InternalLinkage: Out << "internal "; break; case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break; case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break; - case GlobalValue::LinkOnceODRAutoHideLinkage: - Out << "linkonce_odr_auto_hide "; - break; case GlobalValue::WeakAnyLinkage: Out << "weak "; break; case GlobalValue::WeakODRLinkage: Out << "weak_odr "; break; case GlobalValue::CommonLinkage: Out << "common "; break; @@ -1698,6 +1646,10 @@ void AssemblyWriter::printFunction(const Function *F) { Out << " align " << F->getAlignment(); if (F->hasGC()) Out << " gc \"" << F->getGC() << '"'; + if (F->hasPrefixData()) { + Out << " prefix "; + writeOperand(F->getPrefixData(), true); + } if (F->isDeclaration()) { Out << '\n'; } else { @@ -1774,13 +1726,18 @@ void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { // Output all of the instructions in the basic block... for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - printInstruction(*I); - Out << '\n'; + printInstructionLine(*I); } if (AnnotationWriter) AnnotationWriter->emitBasicBlockEndAnnot(BB, Out); } +/// printInstructionLine - Print an instruction and a newline character. +void AssemblyWriter::printInstructionLine(const Instruction &I) { + printInstruction(I); + Out << '\n'; +} + /// printInfoComment - Print a little comment after the instruction indicating /// which slot it occupies. /// @@ -2095,9 +2052,9 @@ void AssemblyWriter::printInstruction(const Instruction &I) { unsigned Kind = InstMD[i].first; if (Kind < MDNames.size()) { Out << ", !" << MDNames[Kind]; - } else { - Out << ", !"; - } + } else { + Out << ", !"; + } Out << ' '; WriteAsOperandInternal(Out, InstMD[i].second, &TypePrinter, &Machine, TheModule); @@ -2129,6 +2086,11 @@ static void WriteMDNodeComment(const MDNode *Node, } } +void AssemblyWriter::writeMDNode(unsigned Slot, const MDNode *Node) { + Out << '!' << Slot << " = metadata "; + printMDNodeBody(Node); +} + void AssemblyWriter::writeAllMDNodes() { SmallVector Nodes; Nodes.resize(Machine.mdn_size()); @@ -2137,8 +2099,7 @@ void AssemblyWriter::writeAllMDNodes() { Nodes[I->second] = cast(I->first); for (unsigned i = 0, e = Nodes.size(); i != e; ++i) { - Out << '!' << i << " = metadata "; - printMDNodeBody(Nodes[i]); + writeMDNode(i, Nodes[i]); } } @@ -2162,6 +2123,8 @@ void AssemblyWriter::writeAllAttributeGroups() { << I->first.getAsString(AttributeSet::FunctionIndex, true) << " }\n"; } +} // namespace llvm + //===----------------------------------------------------------------------===// // External Interface declarations //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/IR/AsmWriter.h b/contrib/llvm/lib/IR/AsmWriter.h new file mode 100644 index 0000000..8f4a377 --- /dev/null +++ b/contrib/llvm/lib/IR/AsmWriter.h @@ -0,0 +1,118 @@ +//===-- llvm/IR/AsmWriter.h - Printing LLVM IR as an assembly file - C++ --===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This files defines the interface for the AssemblyWriter class used to print +// LLVM IR and various helper classes that are used in printing. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_IR_ASSEMBLYWRITER_H +#define LLVM_IR_ASSEMBLYWRITER_H + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/TypeFinder.h" +#include "llvm/Support/FormattedStream.h" + +namespace llvm { + +class BasicBlock; +class Function; +class GlobalValue; +class Module; +class NamedMDNode; +class Value; +class SlotTracker; + +/// Create a new SlotTracker for a Module +SlotTracker *createSlotTracker(const Module *M); + +//===----------------------------------------------------------------------===// +// TypePrinting Class: Type printing machinery +//===----------------------------------------------------------------------===// + +class TypePrinting { + TypePrinting(const TypePrinting &) LLVM_DELETED_FUNCTION; + void operator=(const TypePrinting&) LLVM_DELETED_FUNCTION; +public: + + /// NamedTypes - The named types that are used by the current module. + TypeFinder NamedTypes; + + /// NumberedTypes - The numbered types, along with their value. + DenseMap NumberedTypes; + + + TypePrinting() {} + ~TypePrinting() {} + + void incorporateTypes(const Module &M); + + void print(Type *Ty, raw_ostream &OS); + + void printStructBody(StructType *Ty, raw_ostream &OS); +}; + +class AssemblyWriter { +protected: + formatted_raw_ostream &Out; + const Module *TheModule; + +private: + OwningPtr ModuleSlotTracker; + SlotTracker &Machine; + TypePrinting TypePrinter; + AssemblyAnnotationWriter *AnnotationWriter; + +public: + /// Construct an AssemblyWriter with an external SlotTracker + AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, + const Module *M, AssemblyAnnotationWriter *AAW); + + /// Construct an AssemblyWriter with an internally allocated SlotTracker + AssemblyWriter(formatted_raw_ostream &o, const Module *M, + AssemblyAnnotationWriter *AAW); + + virtual ~AssemblyWriter(); + + void printMDNodeBody(const MDNode *MD); + void printNamedMDNode(const NamedMDNode *NMD); + + void printModule(const Module *M); + + void writeOperand(const Value *Op, bool PrintType); + void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx); + void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); + + void writeAllMDNodes(); + void writeMDNode(unsigned Slot, const MDNode *Node); + void writeAllAttributeGroups(); + + void printTypeIdentities(); + void printGlobal(const GlobalVariable *GV); + void printAlias(const GlobalAlias *GV); + void printFunction(const Function *F); + void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx); + void printBasicBlock(const BasicBlock *BB); + void printInstructionLine(const Instruction &I); + void printInstruction(const Instruction &I); + +private: + void init(); + + // printInfoComment - Print a little comment after the instruction indicating + // which slot it occupies. + void printInfoComment(const Value &V); +}; + +} // namespace llvm + +#endif //LLVM_IR_ASMWRITER_H diff --git a/contrib/llvm/lib/IR/AttributeImpl.h b/contrib/llvm/lib/IR/AttributeImpl.h index 0b6228b..ea954ac 100644 --- a/contrib/llvm/lib/IR/AttributeImpl.h +++ b/contrib/llvm/lib/IR/AttributeImpl.h @@ -27,97 +27,30 @@ class LLVMContext; //===----------------------------------------------------------------------===// /// \class -/// \brief A set of classes that contain the kind and (optional) value of the -/// attribute object. There are three main categories: enum attribute entries, -/// represented by Attribute::AttrKind; alignment attribute entries; and string -/// attribute enties, which are for target-dependent attributes. -class AttributeEntry { - unsigned char KindID; +/// \brief This class represents a single, uniqued attribute. That attribute +/// could be a single enum, a tuple, or a string. +class AttributeImpl : public FoldingSetNode { + unsigned char KindID; ///< Holds the AttrEntryKind of the attribute + + // AttributesImpl is uniqued, these should not be publicly available. + void operator=(const AttributeImpl &) LLVM_DELETED_FUNCTION; + AttributeImpl(const AttributeImpl &) LLVM_DELETED_FUNCTION; + protected: enum AttrEntryKind { EnumAttrEntry, AlignAttrEntry, StringAttrEntry }; -public: - AttributeEntry(AttrEntryKind Kind) - : KindID(Kind) {} - virtual ~AttributeEntry() {} - unsigned getKindID() const { return KindID; } + AttributeImpl(AttrEntryKind KindID) : KindID(KindID) {} - static inline bool classof(const AttributeEntry *) { return true; } -}; - -class EnumAttributeEntry : public AttributeEntry { - Attribute::AttrKind Kind; public: - EnumAttributeEntry(Attribute::AttrKind Kind) - : AttributeEntry(EnumAttrEntry), Kind(Kind) {} - - Attribute::AttrKind getEnumKind() const { return Kind; } - - static inline bool classof(const AttributeEntry *AE) { - return AE->getKindID() == EnumAttrEntry; - } - static inline bool classof(const EnumAttributeEntry *) { return true; } -}; + virtual ~AttributeImpl(); -class AlignAttributeEntry : public AttributeEntry { - Attribute::AttrKind Kind; - unsigned Align; -public: - AlignAttributeEntry(Attribute::AttrKind Kind, unsigned Align) - : AttributeEntry(AlignAttrEntry), Kind(Kind), Align(Align) {} - - Attribute::AttrKind getEnumKind() const { return Kind; } - unsigned getAlignment() const { return Align; } - - static inline bool classof(const AttributeEntry *AE) { - return AE->getKindID() == AlignAttrEntry; - } - static inline bool classof(const AlignAttributeEntry *) { return true; } -}; - -class StringAttributeEntry : public AttributeEntry { - std::string Kind; - std::string Val; -public: - StringAttributeEntry(StringRef Kind, StringRef Val = StringRef()) - : AttributeEntry(StringAttrEntry), Kind(Kind), Val(Val) {} - - StringRef getStringKind() const { return Kind; } - StringRef getStringValue() const { return Val; } - - static inline bool classof(const AttributeEntry *AE) { - return AE->getKindID() == StringAttrEntry; - } - static inline bool classof(const StringAttributeEntry *) { return true; } -}; - -//===----------------------------------------------------------------------===// -/// \class -/// \brief This class represents a single, uniqued attribute. That attribute -/// could be a single enum, a tuple, or a string. -class AttributeImpl : public FoldingSetNode { - LLVMContext &Context; ///< Global context for uniquing objects - - AttributeEntry *Entry; ///< Holds the kind and value of the attribute - - // AttributesImpl is uniqued, these should not be publicly available. - void operator=(const AttributeImpl &) LLVM_DELETED_FUNCTION; - AttributeImpl(const AttributeImpl &) LLVM_DELETED_FUNCTION; -public: - AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind); - AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind, unsigned Align); - AttributeImpl(LLVMContext &C, StringRef Kind, StringRef Val = StringRef()); - ~AttributeImpl(); - - LLVMContext &getContext() { return Context; } - - bool isEnumAttribute() const; - bool isAlignAttribute() const; - bool isStringAttribute() const; + bool isEnumAttribute() const { return KindID == EnumAttrEntry; } + bool isAlignAttribute() const { return KindID == AlignAttrEntry; } + bool isStringAttribute() const { return KindID == StringAttrEntry; } bool hasAttribute(Attribute::AttrKind A) const; bool hasAttribute(StringRef Kind) const; @@ -155,13 +88,66 @@ public: //===----------------------------------------------------------------------===// /// \class +/// \brief A set of classes that contain the value of the +/// attribute object. There are three main categories: enum attribute entries, +/// represented by Attribute::AttrKind; alignment attribute entries; and string +/// attribute enties, which are for target-dependent attributes. + +class EnumAttributeImpl : public AttributeImpl { + virtual void anchor(); + Attribute::AttrKind Kind; + +protected: + EnumAttributeImpl(AttrEntryKind ID, Attribute::AttrKind Kind) + : AttributeImpl(ID), Kind(Kind) {} + +public: + EnumAttributeImpl(Attribute::AttrKind Kind) + : AttributeImpl(EnumAttrEntry), Kind(Kind) {} + + Attribute::AttrKind getEnumKind() const { return Kind; } +}; + +class AlignAttributeImpl : public EnumAttributeImpl { + virtual void anchor(); + unsigned Align; + +public: + AlignAttributeImpl(Attribute::AttrKind Kind, unsigned Align) + : EnumAttributeImpl(AlignAttrEntry, Kind), Align(Align) { + assert( + (Kind == Attribute::Alignment || Kind == Attribute::StackAlignment) && + "Wrong kind for alignment attribute!"); + } + + unsigned getAlignment() const { return Align; } +}; + +class StringAttributeImpl : public AttributeImpl { + virtual void anchor(); + std::string Kind; + std::string Val; + +public: + StringAttributeImpl(StringRef Kind, StringRef Val = StringRef()) + : AttributeImpl(StringAttrEntry), Kind(Kind), Val(Val) {} + + StringRef getStringKind() const { return Kind; } + StringRef getStringValue() const { return Val; } +}; + +//===----------------------------------------------------------------------===// +/// \class /// \brief This class represents a group of attributes that apply to one /// element: function, return type, or parameter. class AttributeSetNode : public FoldingSetNode { - SmallVector AttrList; + unsigned NumAttrs; ///< Number of attributes in this node. - AttributeSetNode(ArrayRef Attrs) - : AttrList(Attrs.begin(), Attrs.end()) {} + AttributeSetNode(ArrayRef Attrs) : NumAttrs(Attrs.size()) { + // There's memory after the node where we can store the entries in. + std::copy(Attrs.begin(), Attrs.end(), + reinterpret_cast(this + 1)); + } // AttributesSetNode is uniqued, these should not be publicly available. void operator=(const AttributeSetNode &) LLVM_DELETED_FUNCTION; @@ -171,7 +157,7 @@ public: bool hasAttribute(Attribute::AttrKind Kind) const; bool hasAttribute(StringRef Kind) const; - bool hasAttributes() const { return !AttrList.empty(); } + bool hasAttributes() const { return NumAttrs != 0; } Attribute getAttribute(Attribute::AttrKind Kind) const; Attribute getAttribute(StringRef Kind) const; @@ -180,17 +166,12 @@ public: unsigned getStackAlignment() const; std::string getAsString(bool InAttrGrp) const; - typedef SmallVectorImpl::iterator iterator; - typedef SmallVectorImpl::const_iterator const_iterator; - - iterator begin() { return AttrList.begin(); } - iterator end() { return AttrList.end(); } - - const_iterator begin() const { return AttrList.begin(); } - const_iterator end() const { return AttrList.end(); } + typedef const Attribute *iterator; + iterator begin() const { return reinterpret_cast(this + 1); } + iterator end() const { return begin() + NumAttrs; } void Profile(FoldingSetNodeID &ID) const { - Profile(ID, AttrList); + Profile(ID, makeArrayRef(begin(), end())); } static void Profile(FoldingSetNodeID &ID, ArrayRef AttrList) { for (unsigned I = 0, E = AttrList.size(); I != E; ++I) @@ -208,58 +189,67 @@ class AttributeSetImpl : public FoldingSetNode { LLVMContext &Context; typedef std::pair IndexAttrPair; - SmallVector AttrNodes; + unsigned NumAttrs; ///< Number of entries in this set. + + /// \brief Return a pointer to the IndexAttrPair for the specified slot. + const IndexAttrPair *getNode(unsigned Slot) const { + return reinterpret_cast(this + 1) + Slot; + } // AttributesSet is uniqued, these should not be publicly available. void operator=(const AttributeSetImpl &) LLVM_DELETED_FUNCTION; AttributeSetImpl(const AttributeSetImpl &) LLVM_DELETED_FUNCTION; public: AttributeSetImpl(LLVMContext &C, - ArrayRef > attrs) - : Context(C), AttrNodes(attrs.begin(), attrs.end()) {} + ArrayRef > Attrs) + : Context(C), NumAttrs(Attrs.size()) { +#ifndef NDEBUG + if (Attrs.size() >= 2) { + for (const std::pair *i = Attrs.begin() + 1, + *e = Attrs.end(); + i != e; ++i) { + assert((i-1)->first <= i->first && "Attribute set not ordered!"); + } + } +#endif + // There's memory after the node where we can store the entries in. + std::copy(Attrs.begin(), Attrs.end(), + reinterpret_cast(this + 1)); + } /// \brief Get the context that created this AttributeSetImpl. LLVMContext &getContext() { return Context; } /// \brief Return the number of attributes this AttributeSet contains. - unsigned getNumAttributes() const { return AttrNodes.size(); } + unsigned getNumAttributes() const { return NumAttrs; } /// \brief Get the index of the given "slot" in the AttrNodes list. This index /// is the index of the return, parameter, or function object that the /// attributes are applied to, not the index into the AttrNodes list where the /// attributes reside. unsigned getSlotIndex(unsigned Slot) const { - return AttrNodes[Slot].first; + return getNode(Slot)->first; } /// \brief Retrieve the attributes for the given "slot" in the AttrNode list. /// \p Slot is an index into the AttrNodes list, not the index of the return / /// parameter/ function which the attributes apply to. AttributeSet getSlotAttributes(unsigned Slot) const { - return AttributeSet::get(Context, AttrNodes[Slot]); + return AttributeSet::get(Context, *getNode(Slot)); } /// \brief Retrieve the attribute set node for the given "slot" in the /// AttrNode list. AttributeSetNode *getSlotNode(unsigned Slot) const { - return AttrNodes[Slot].second; + return getNode(Slot)->second; } - typedef AttributeSetNode::iterator iterator; - typedef AttributeSetNode::const_iterator const_iterator; - - iterator begin(unsigned Slot) - { return AttrNodes[Slot].second->begin(); } - iterator end(unsigned Slot) - { return AttrNodes[Slot].second->end(); } - - const_iterator begin(unsigned Slot) const - { return AttrNodes[Slot].second->begin(); } - const_iterator end(unsigned Slot) const - { return AttrNodes[Slot].second->end(); } + typedef AttributeSetNode::iterator iterator; + iterator begin(unsigned Slot) const { return getSlotNode(Slot)->begin(); } + iterator end(unsigned Slot) const { return getSlotNode(Slot)->end(); } void Profile(FoldingSetNodeID &ID) const { - Profile(ID, AttrNodes); + Profile(ID, makeArrayRef(getNode(0), getNumAttributes())); } static void Profile(FoldingSetNodeID &ID, ArrayRef > Nodes) { @@ -271,6 +261,8 @@ public: // FIXME: This atrocity is temporary. uint64_t Raw(unsigned Index) const; + + void dump() const; }; } // end llvm namespace diff --git a/contrib/llvm/lib/IR/Attributes.cpp b/contrib/llvm/lib/IR/Attributes.cpp index 4fe6f9d..0f2b7a0 100644 --- a/contrib/llvm/lib/IR/Attributes.cpp +++ b/contrib/llvm/lib/IR/Attributes.cpp @@ -43,9 +43,10 @@ Attribute Attribute::get(LLVMContext &Context, Attribute::AttrKind Kind, if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = !Val ? - new AttributeImpl(Context, Kind) : - new AttributeImpl(Context, Kind, Val); + if (!Val) + PA = new EnumAttributeImpl(Kind); + else + PA = new AlignAttributeImpl(Kind, Val); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -65,7 +66,7 @@ Attribute Attribute::get(LLVMContext &Context, StringRef Kind, StringRef Val) { if (!PA) { // If we didn't find any existing attributes of the same shape then create a // new one and insert it. - PA = new AttributeImpl(Context, Kind, Val); + PA = new StringAttributeImpl(Kind, Val); pImpl->AttrsSet.InsertNode(PA, InsertPoint); } @@ -103,24 +104,28 @@ bool Attribute::isStringAttribute() const { } Attribute::AttrKind Attribute::getKindAsEnum() const { + if (!pImpl) return None; assert((isEnumAttribute() || isAlignAttribute()) && "Invalid attribute type to get the kind as an enum!"); return pImpl ? pImpl->getKindAsEnum() : None; } uint64_t Attribute::getValueAsInt() const { + if (!pImpl) return 0; assert(isAlignAttribute() && "Expected the attribute to be an alignment attribute!"); return pImpl ? pImpl->getValueAsInt() : 0; } StringRef Attribute::getKindAsString() const { + if (!pImpl) return StringRef(); assert(isStringAttribute() && "Invalid attribute type to get the kind as a string!"); return pImpl ? pImpl->getKindAsString() : StringRef(); } StringRef Attribute::getValueAsString() const { + if (!pImpl) return StringRef(); assert(isStringAttribute() && "Invalid attribute type to get the value as a string!"); return pImpl ? pImpl->getValueAsString() : StringRef(); @@ -157,6 +162,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "sanitize_address"; if (hasAttribute(Attribute::AlwaysInline)) return "alwaysinline"; + if (hasAttribute(Attribute::Builtin)) + return "builtin"; if (hasAttribute(Attribute::ByVal)) return "byval"; if (hasAttribute(Attribute::InlineHint)) @@ -189,6 +196,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "noreturn"; if (hasAttribute(Attribute::NoUnwind)) return "nounwind"; + if (hasAttribute(Attribute::OptimizeNone)) + return "optnone"; if (hasAttribute(Attribute::OptimizeForSize)) return "optsize"; if (hasAttribute(Attribute::ReadNone)) @@ -217,6 +226,8 @@ std::string Attribute::getAsString(bool InAttrGrp) const { return "uwtable"; if (hasAttribute(Attribute::ZExt)) return "zeroext"; + if (hasAttribute(Attribute::Cold)) + return "cold"; // FIXME: These should be output like this: // @@ -275,35 +286,11 @@ bool Attribute::operator<(Attribute A) const { // AttributeImpl Definition //===----------------------------------------------------------------------===// -AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind) - : Context(C), Entry(new EnumAttributeEntry(Kind)) {} - -AttributeImpl::AttributeImpl(LLVMContext &C, Attribute::AttrKind Kind, - unsigned Align) - : Context(C) { - assert((Kind == Attribute::Alignment || Kind == Attribute::StackAlignment) && - "Wrong kind for alignment attribute!"); - Entry = new AlignAttributeEntry(Kind, Align); -} - -AttributeImpl::AttributeImpl(LLVMContext &C, StringRef Kind, StringRef Val) - : Context(C), Entry(new StringAttributeEntry(Kind, Val)) {} - -AttributeImpl::~AttributeImpl() { - delete Entry; -} - -bool AttributeImpl::isEnumAttribute() const { - return isa(Entry); -} - -bool AttributeImpl::isAlignAttribute() const { - return isa(Entry); -} - -bool AttributeImpl::isStringAttribute() const { - return isa(Entry); -} +// Pin the vtabels to this file. +AttributeImpl::~AttributeImpl() {} +void EnumAttributeImpl::anchor() {} +void AlignAttributeImpl::anchor() {} +void StringAttributeImpl::anchor() {} bool AttributeImpl::hasAttribute(Attribute::AttrKind A) const { if (isStringAttribute()) return false; @@ -316,21 +303,23 @@ bool AttributeImpl::hasAttribute(StringRef Kind) const { } Attribute::AttrKind AttributeImpl::getKindAsEnum() const { - if (EnumAttributeEntry *E = dyn_cast(Entry)) - return E->getEnumKind(); - return cast(Entry)->getEnumKind(); + assert(isEnumAttribute() || isAlignAttribute()); + return static_cast(this)->getEnumKind(); } uint64_t AttributeImpl::getValueAsInt() const { - return cast(Entry)->getAlignment(); + assert(isAlignAttribute()); + return static_cast(this)->getAlignment(); } StringRef AttributeImpl::getKindAsString() const { - return cast(Entry)->getStringKind(); + assert(isStringAttribute()); + return static_cast(this)->getStringKind(); } StringRef AttributeImpl::getValueAsString() const { - return cast(Entry)->getStringValue(); + assert(isStringAttribute()); + return static_cast(this)->getStringValue(); } bool AttributeImpl::operator<(const AttributeImpl &AI) const { @@ -396,6 +385,9 @@ uint64_t AttributeImpl::getAttrMask(Attribute::AttrKind Val) { case Attribute::SanitizeMemory: return 1ULL << 37; case Attribute::NoBuiltin: return 1ULL << 38; case Attribute::Returned: return 1ULL << 39; + case Attribute::Cold: return 1ULL << 40; + case Attribute::Builtin: return 1ULL << 41; + case Attribute::OptimizeNone: return 1ULL << 42; } llvm_unreachable("Unsupported attribute type"); } @@ -427,7 +419,10 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, // If we didn't find any existing attributes of the same shape then create a // new one and insert it. if (!PA) { - PA = new AttributeSetNode(SortedAttrs); + // Coallocate entries after the AttributeSetNode itself. + void *Mem = ::operator new(sizeof(AttributeSetNode) + + sizeof(Attribute) * SortedAttrs.size()); + PA = new (Mem) AttributeSetNode(SortedAttrs); pImpl->AttrsSetNodes.InsertNode(PA, InsertPoint); } @@ -436,48 +431,42 @@ AttributeSetNode *AttributeSetNode::get(LLVMContext &C, } bool AttributeSetNode::hasAttribute(Attribute::AttrKind Kind) const { - for (SmallVectorImpl::const_iterator I = AttrList.begin(), - E = AttrList.end(); I != E; ++I) + for (iterator I = begin(), E = end(); I != E; ++I) if (I->hasAttribute(Kind)) return true; return false; } bool AttributeSetNode::hasAttribute(StringRef Kind) const { - for (SmallVectorImpl::const_iterator I = AttrList.begin(), - E = AttrList.end(); I != E; ++I) + for (iterator I = begin(), E = end(); I != E; ++I) if (I->hasAttribute(Kind)) return true; return false; } Attribute AttributeSetNode::getAttribute(Attribute::AttrKind Kind) const { - for (SmallVectorImpl::const_iterator I = AttrList.begin(), - E = AttrList.end(); I != E; ++I) + for (iterator I = begin(), E = end(); I != E; ++I) if (I->hasAttribute(Kind)) return *I; return Attribute(); } Attribute AttributeSetNode::getAttribute(StringRef Kind) const { - for (SmallVectorImpl::const_iterator I = AttrList.begin(), - E = AttrList.end(); I != E; ++I) + for (iterator I = begin(), E = end(); I != E; ++I) if (I->hasAttribute(Kind)) return *I; return Attribute(); } unsigned AttributeSetNode::getAlignment() const { - for (SmallVectorImpl::const_iterator I = AttrList.begin(), - E = AttrList.end(); I != E; ++I) + for (iterator I = begin(), E = end(); I != E; ++I) if (I->hasAttribute(Attribute::Alignment)) return I->getAlignment(); return 0; } unsigned AttributeSetNode::getStackAlignment() const { - for (SmallVectorImpl::const_iterator I = AttrList.begin(), - E = AttrList.end(); I != E; ++I) + for (iterator I = begin(), E = end(); I != E; ++I) if (I->hasAttribute(Attribute::StackAlignment)) return I->getStackAlignment(); return 0; @@ -485,9 +474,8 @@ unsigned AttributeSetNode::getStackAlignment() const { std::string AttributeSetNode::getAsString(bool InAttrGrp) const { std::string Str; - for (SmallVectorImpl::const_iterator I = AttrList.begin(), - E = AttrList.end(); I != E; ++I) { - if (I != AttrList.begin()) + for (iterator I = begin(), E = end(); I != E; ++I) { + if (I != begin()) Str += ' '; Str += I->getAsString(InAttrGrp); } @@ -501,10 +489,10 @@ std::string AttributeSetNode::getAsString(bool InAttrGrp) const { uint64_t AttributeSetImpl::Raw(unsigned Index) const { for (unsigned I = 0, E = getNumAttributes(); I != E; ++I) { if (getSlotIndex(I) != Index) continue; - const AttributeSetNode *ASN = AttrNodes[I].second; + const AttributeSetNode *ASN = getSlotNode(I); uint64_t Mask = 0; - for (AttributeSetNode::const_iterator II = ASN->begin(), + for (AttributeSetNode::iterator II = ASN->begin(), IE = ASN->end(); II != IE; ++II) { Attribute Attr = *II; @@ -527,6 +515,10 @@ uint64_t AttributeSetImpl::Raw(unsigned Index) const { return 0; } +void AttributeSetImpl::dump() const { + AttributeSet(const_cast(this)).dump(); +} + //===----------------------------------------------------------------------===// // AttributeSet Construction and Mutation Methods //===----------------------------------------------------------------------===// @@ -544,7 +536,11 @@ AttributeSet::getImpl(LLVMContext &C, // If we didn't find any existing attributes of the same shape then // create a new one and insert it. if (!PA) { - PA = new AttributeSetImpl(C, Attrs); + // Coallocate entries after the AttributeSetImpl itself. + void *Mem = ::operator new(sizeof(AttributeSetImpl) + + sizeof(std::pair) * + Attrs.size()); + PA = new (Mem) AttributeSetImpl(C, Attrs); pImpl->AttrsLists.InsertNode(PA, InsertPoint); } @@ -636,12 +632,30 @@ AttributeSet AttributeSet::get(LLVMContext &C, unsigned Index, AttributeSet AttributeSet::get(LLVMContext &C, ArrayRef Attrs) { if (Attrs.empty()) return AttributeSet(); + if (Attrs.size() == 1) return Attrs[0]; SmallVector, 8> AttrNodeVec; - for (unsigned I = 0, E = Attrs.size(); I != E; ++I) { - AttributeSet AS = Attrs[I]; - if (!AS.pImpl) continue; - AttrNodeVec.append(AS.pImpl->AttrNodes.begin(), AS.pImpl->AttrNodes.end()); + AttributeSetImpl *A0 = Attrs[0].pImpl; + if (A0) + AttrNodeVec.append(A0->getNode(0), A0->getNode(A0->getNumAttributes())); + // Copy all attributes from Attrs into AttrNodeVec while keeping AttrNodeVec + // ordered by index. Because we know that each list in Attrs is ordered by + // index we only need to merge each successive list in rather than doing a + // full sort. + for (unsigned I = 1, E = Attrs.size(); I != E; ++I) { + AttributeSetImpl *AS = Attrs[I].pImpl; + if (!AS) continue; + SmallVector, 8>::iterator + ANVI = AttrNodeVec.begin(), ANVE; + for (const AttributeSetImpl::IndexAttrPair + *AI = AS->getNode(0), + *AE = AS->getNode(AS->getNumAttributes()); + AI != AE; ++AI) { + ANVE = AttrNodeVec.end(); + while (ANVI != ANVE && ANVI->first <= AI->first) + ++ANVI; + ANVI = AttrNodeVec.insert(ANVI, *AI) + 1; + } } return getImpl(C, AttrNodeVec); @@ -660,6 +674,13 @@ AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index, return addAttributes(C, Index, AttributeSet::get(C, Index, B)); } +AttributeSet AttributeSet::addAttribute(LLVMContext &C, unsigned Index, + StringRef Kind, StringRef Value) const { + llvm::AttrBuilder B; + B.addAttribute(Kind, Value); + return addAttributes(C, Index, AttributeSet::get(C, Index, B)); +} + AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Index, AttributeSet Attrs) const { if (!pImpl) return Attrs; @@ -694,7 +715,7 @@ AttributeSet AttributeSet::addAttributes(LLVMContext &C, unsigned Index, for (unsigned I = 0, E = Attrs.pImpl->getNumAttributes(); I != E; ++I) if (Attrs.getSlotIndex(I) == Index) { - for (AttributeSetImpl::const_iterator II = Attrs.pImpl->begin(I), + for (AttributeSetImpl::iterator II = Attrs.pImpl->begin(I), IE = Attrs.pImpl->end(I); II != IE; ++II) B.addAttribute(*II); break; @@ -815,7 +836,7 @@ bool AttributeSet::hasAttrSomewhere(Attribute::AttrKind Attr) const { if (pImpl == 0) return false; for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) - for (AttributeSetImpl::const_iterator II = pImpl->begin(I), + for (AttributeSetImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I); II != IE; ++II) if (II->hasAttribute(Attr)) return true; @@ -931,7 +952,7 @@ AttrBuilder::AttrBuilder(AttributeSet AS, unsigned Index) for (unsigned I = 0, E = pImpl->getNumAttributes(); I != E; ++I) { if (pImpl->getSlotIndex(I) != Index) continue; - for (AttributeSetImpl::const_iterator II = pImpl->begin(I), + for (AttributeSetImpl::iterator II = pImpl->begin(I), IE = pImpl->end(I); II != IE; ++II) addAttribute(*II); @@ -1151,6 +1172,8 @@ AttributeSet AttributeFuncs::typeIncompatible(Type *Ty, uint64_t Index) { .addAttribute(Attribute::Nest) .addAttribute(Attribute::NoAlias) .addAttribute(Attribute::NoCapture) + .addAttribute(Attribute::ReadNone) + .addAttribute(Attribute::ReadOnly) .addAttribute(Attribute::StructRet); return AttributeSet::get(Ty->getContext(), Index, Incompatible); diff --git a/contrib/llvm/lib/IR/AutoUpgrade.cpp b/contrib/llvm/lib/IR/AutoUpgrade.cpp index f237537..d12bf7b 100644 --- a/contrib/llvm/lib/IR/AutoUpgrade.cpp +++ b/contrib/llvm/lib/IR/AutoUpgrade.cpp @@ -7,11 +7,12 @@ // //===----------------------------------------------------------------------===// // -// This file implements the auto-upgrade helper functions +// This file implements the auto-upgrade helper functions // //===----------------------------------------------------------------------===// #include "llvm/AutoUpgrade.h" +#include "llvm/DebugInfo.h" #include "llvm/IR/Constants.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" @@ -55,14 +56,14 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { case 'a': { if (Name.startswith("arm.neon.vclz")) { Type* args[2] = { - F->arg_begin()->getType(), + F->arg_begin()->getType(), Type::getInt1Ty(F->getContext()) }; // Can't use Intrinsic::getDeclaration here as it adds a ".i1" to // the end of the name. Change name from llvm.arm.neon.vclz.* to // llvm.ctlz.* FunctionType* fType = FunctionType::get(F->getReturnType(), args, false); - NewFn = Function::Create(fType, F->getLinkage(), + NewFn = Function::Create(fType, F->getLinkage(), "llvm.ctlz." + Name.substr(14), F->getParent()); return true; } @@ -88,6 +89,20 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { } break; } + case 'o': + // We only need to change the name to match the mangling including the + // address space. + if (F->arg_size() == 2 && Name.startswith("objectsize.")) { + Type *Tys[2] = { F->getReturnType(), F->arg_begin()->getType() }; + if (F->getName() != Intrinsic::getName(Intrinsic::objectsize, Tys)) { + F->setName(Name + ".old"); + NewFn = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::objectsize, Tys); + return true; + } + } + break; + case 'x': { if (Name.startswith("x86.sse2.pcmpeq.") || Name.startswith("x86.sse2.pcmpgt.") || @@ -97,6 +112,7 @@ static bool UpgradeIntrinsicFunction1(Function *F, Function *&NewFn) { Name == "x86.avx.movnt.dq.256" || Name == "x86.avx.movnt.pd.256" || Name == "x86.avx.movnt.ps.256" || + Name == "x86.sse42.crc32.64.8" || (Name.startswith("x86.xop.vpcom") && F->arg_size() == 2)) { NewFn = 0; return true; @@ -257,6 +273,12 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { Function *VPCOM = Intrinsic::getDeclaration(F->getParent(), intID); Rep = Builder.CreateCall3(VPCOM, CI->getArgOperand(0), CI->getArgOperand(1), Builder.getInt8(Imm)); + } else if (Name == "llvm.x86.sse42.crc32.64.8") { + Function *CRC32 = Intrinsic::getDeclaration(F->getParent(), + Intrinsic::x86_sse42_crc32_32_8); + Value *Trunc0 = Builder.CreateTrunc(CI->getArgOperand(0), Type::getInt32Ty(C)); + Rep = Builder.CreateCall2(CRC32, Trunc0, CI->getArgOperand(1)); + Rep = Builder.CreateZExt(Rep, CI->getType(), ""); } else { bool PD128 = false, PD256 = false, PS128 = false, PS256 = false; if (Name == "llvm.x86.avx.vpermil.pd.256") @@ -317,6 +339,14 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { CI->eraseFromParent(); return; + case Intrinsic::objectsize: + CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, + CI->getArgOperand(0), + CI->getArgOperand(1), + Name)); + CI->eraseFromParent(); + return; + case Intrinsic::arm_neon_vclz: { // Change name from llvm.arm.neon.vclz.* to llvm.ctlz.* CI->replaceAllUsesWith(Builder.CreateCall2(NewFn, CI->getArgOperand(0), @@ -369,8 +399,8 @@ void llvm::UpgradeIntrinsicCall(CallInst *CI, Function *NewFn) { } } -// This tests each Function to determine if it needs upgrading. When we find -// one we are interested in, we then upgrade all calls to reflect the new +// This tests each Function to determine if it needs upgrading. When we find +// one we are interested in, we then upgrade all calls to reflect the new // function. void llvm::UpgradeCallsToIntrinsic(Function* F) { assert(F && "Illegal attempt to upgrade a non-existent intrinsic."); @@ -391,3 +421,81 @@ void llvm::UpgradeCallsToIntrinsic(Function* F) { } } +void llvm::UpgradeInstWithTBAATag(Instruction *I) { + MDNode *MD = I->getMetadata(LLVMContext::MD_tbaa); + assert(MD && "UpgradeInstWithTBAATag should have a TBAA tag"); + // Check if the tag uses struct-path aware TBAA format. + if (isa(MD->getOperand(0)) && MD->getNumOperands() >= 3) + return; + + if (MD->getNumOperands() == 3) { + Value *Elts[] = { + MD->getOperand(0), + MD->getOperand(1) + }; + MDNode *ScalarType = MDNode::get(I->getContext(), Elts); + // Create a MDNode + Value *Elts2[] = { + ScalarType, ScalarType, + Constant::getNullValue(Type::getInt64Ty(I->getContext())), + MD->getOperand(2) + }; + I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts2)); + } else { + // Create a MDNode + Value *Elts[] = {MD, MD, + Constant::getNullValue(Type::getInt64Ty(I->getContext()))}; + I->setMetadata(LLVMContext::MD_tbaa, MDNode::get(I->getContext(), Elts)); + } +} + +Instruction *llvm::UpgradeBitCastInst(unsigned Opc, Value *V, Type *DestTy, + Instruction *&Temp) { + if (Opc != Instruction::BitCast) + return 0; + + Temp = 0; + Type *SrcTy = V->getType(); + if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && + SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { + LLVMContext &Context = V->getContext(); + + // We have no information about target data layout, so we assume that + // the maximum pointer size is 64bit. + Type *MidTy = Type::getInt64Ty(Context); + Temp = CastInst::Create(Instruction::PtrToInt, V, MidTy); + + return CastInst::Create(Instruction::IntToPtr, Temp, DestTy); + } + + return 0; +} + +Value *llvm::UpgradeBitCastExpr(unsigned Opc, Constant *C, Type *DestTy) { + if (Opc != Instruction::BitCast) + return 0; + + Type *SrcTy = C->getType(); + if (SrcTy->isPtrOrPtrVectorTy() && DestTy->isPtrOrPtrVectorTy() && + SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace()) { + LLVMContext &Context = C->getContext(); + + // We have no information about target data layout, so we assume that + // the maximum pointer size is 64bit. + Type *MidTy = Type::getInt64Ty(Context); + + return ConstantExpr::getIntToPtr(ConstantExpr::getPtrToInt(C, MidTy), + DestTy); + } + + return 0; +} + +/// Check the debug info version number, if it is out-dated, drop the debug +/// info. Return true if module is modified. +bool llvm::UpgradeDebugInfo(Module &M) { + if (getDebugMetadataVersionFromModule(M) == DEBUG_METADATA_VERSION) + return false; + + return StripDebugInfo(M); +} diff --git a/contrib/llvm/lib/IR/ConstantFold.cpp b/contrib/llvm/lib/IR/ConstantFold.cpp index bf93d4f..f5e225c 100644 --- a/contrib/llvm/lib/IR/ConstantFold.cpp +++ b/contrib/llvm/lib/IR/ConstantFold.cpp @@ -75,7 +75,7 @@ static unsigned foldConstantCastPair( unsigned opc, ///< opcode of the second cast constant expression ConstantExpr *Op, ///< the first cast constant expression - Type *DstTy ///< desintation type of the first cast + Type *DstTy ///< destination type of the first cast ) { assert(Op && Op->isCast() && "Can't fold cast of cast without a cast!"); assert(DstTy && DstTy->isFirstClassType() && "Invalid cast destination type"); @@ -87,13 +87,14 @@ foldConstantCastPair( Instruction::CastOps firstOp = Instruction::CastOps(Op->getOpcode()); Instruction::CastOps secondOp = Instruction::CastOps(opc); - // Assume that pointers are never more than 64 bits wide. + // Assume that pointers are never more than 64 bits wide, and only use this + // for the middle type. Otherwise we could end up folding away illegal + // bitcasts between address spaces with different sizes. IntegerType *FakeIntPtrTy = Type::getInt64Ty(DstTy->getContext()); // Let CastInst::isEliminableCastPair do the heavy lifting. return CastInst::isEliminableCastPair(firstOp, secondOp, SrcTy, MidTy, DstTy, - FakeIntPtrTy, FakeIntPtrTy, - FakeIntPtrTy); + 0, FakeIntPtrTy, 0); } static Constant *FoldBitCast(Constant *V, Type *DestTy) { @@ -688,6 +689,8 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V, } case Instruction::BitCast: return FoldBitCast(V, DestTy); + case Instruction::AddrSpaceCast: + return 0; } } @@ -1857,9 +1860,9 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred, if (CE1Inverse == CE1Op0) { // Check whether we can safely truncate the right hand side. Constant *C2Inverse = ConstantExpr::getTrunc(C2, CE1Op0->getType()); - if (ConstantExpr::getZExt(C2Inverse, C2->getType()) == C2) { + if (ConstantExpr::getCast(CE1->getOpcode(), C2Inverse, + C2->getType()) == C2) return ConstantExpr::getICmp(pred, CE1Inverse, C2Inverse); - } } } } @@ -1896,6 +1899,37 @@ static bool isInBoundsIndices(ArrayRef Idxs) { return true; } +/// \brief Test whether a given ConstantInt is in-range for a SequentialType. +static bool isIndexInRangeOfSequentialType(const SequentialType *STy, + const ConstantInt *CI) { + if (const PointerType *PTy = dyn_cast(STy)) + // Only handle pointers to sized types, not pointers to functions. + return PTy->getElementType()->isSized(); + + uint64_t NumElements = 0; + // Determine the number of elements in our sequential type. + if (const ArrayType *ATy = dyn_cast(STy)) + NumElements = ATy->getNumElements(); + else if (const VectorType *VTy = dyn_cast(STy)) + NumElements = VTy->getNumElements(); + + assert((isa(STy) || NumElements > 0) && + "didn't expect non-array type to have zero elements!"); + + // We cannot bounds check the index if it doesn't fit in an int64_t. + if (CI->getValue().getActiveBits() > 64) + return false; + + // A negative index or an index past the end of our sequential type is + // considered out-of-range. + int64_t IndexVal = CI->getSExtValue(); + if (IndexVal < 0 || (NumElements > 0 && (uint64_t)IndexVal >= NumElements)) + return false; + + // Otherwise, it is in-range. + return true; +} + template static Constant *ConstantFoldGetElementPtrImpl(Constant *C, bool inBounds, @@ -1939,7 +1973,32 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, I != E; ++I) LastTy = *I; - if ((LastTy && isa(LastTy)) || Idx0->isNullValue()) { + // We cannot combine indices if doing so would take us outside of an + // array or vector. Doing otherwise could trick us if we evaluated such a + // GEP as part of a load. + // + // e.g. Consider if the original GEP was: + // i8* getelementptr ({ [2 x i8], i32, i8, [3 x i8] }* @main.c, + // i32 0, i32 0, i64 0) + // + // If we then tried to offset it by '8' to get to the third element, + // an i8, we should *not* get: + // i8* getelementptr ({ [2 x i8], i32, i8, [3 x i8] }* @main.c, + // i32 0, i32 0, i64 8) + // + // This GEP tries to index array element '8 which runs out-of-bounds. + // Subsequent evaluation would get confused and produce erroneous results. + // + // The following prohibits such a GEP from being formed by checking to see + // if the index is in-range with respect to an array or vector. + bool PerformFold = false; + if (Idx0->isNullValue()) + PerformFold = true; + else if (SequentialType *STy = dyn_cast_or_null(LastTy)) + if (ConstantInt *CI = dyn_cast(Idx0)) + PerformFold = isIndexInRangeOfSequentialType(STy, CI); + + if (PerformFold) { SmallVector NewIndices; NewIndices.reserve(Idxs.size() + CE->getNumOperands()); for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i) @@ -1999,8 +2058,8 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, } // Check to see if any array indices are not within the corresponding - // notional array bounds. If so, try to determine if they can be factored - // out into preceding dimensions. + // notional array or vector bounds. If so, try to determine if they can be + // factored out into preceding dimensions. bool Unknown = false; SmallVector NewIdxs; Type *Ty = C->getType(); @@ -2008,16 +2067,20 @@ static Constant *ConstantFoldGetElementPtrImpl(Constant *C, for (unsigned i = 0, e = Idxs.size(); i != e; Prev = Ty, Ty = cast(Ty)->getTypeAtIndex(Idxs[i]), ++i) { if (ConstantInt *CI = dyn_cast(Idxs[i])) { - if (ArrayType *ATy = dyn_cast(Ty)) - if (ATy->getNumElements() <= INT64_MAX && - ATy->getNumElements() != 0 && - CI->getSExtValue() >= (int64_t)ATy->getNumElements()) { + if (isa(Ty) || isa(Ty)) + if (CI->getSExtValue() > 0 && + !isIndexInRangeOfSequentialType(cast(Ty), CI)) { if (isa(Prev)) { // It's out of range, but we can factor it into the prior // dimension. NewIdxs.resize(Idxs.size()); - ConstantInt *Factor = ConstantInt::get(CI->getType(), - ATy->getNumElements()); + uint64_t NumElements = 0; + if (const ArrayType *ATy = dyn_cast(Ty)) + NumElements = ATy->getNumElements(); + else + NumElements = cast(Ty)->getNumElements(); + + ConstantInt *Factor = ConstantInt::get(CI->getType(), NumElements); NewIdxs[i] = ConstantExpr::getSRem(CI, Factor); Constant *PrevIdx = cast(Idxs[i-1]); diff --git a/contrib/llvm/lib/IR/Constants.cpp b/contrib/llvm/lib/IR/Constants.cpp index 2c6971c..690ac59 100644 --- a/contrib/llvm/lib/IR/Constants.cpp +++ b/contrib/llvm/lib/IR/Constants.cpp @@ -483,8 +483,8 @@ ConstantInt *ConstantInt::get(LLVMContext &Context, const APInt &V) { // Get the corresponding integer type for the bit width of the value. IntegerType *ITy = IntegerType::get(Context, V.getBitWidth()); // get an existing value or the insertion position - DenseMapAPIntKeyInfo::KeyTy Key(V, ITy); - ConstantInt *&Slot = Context.pImpl->IntConstants[Key]; + LLVMContextImpl *pImpl = Context.pImpl; + ConstantInt *&Slot = pImpl->IntConstants[DenseMapAPIntKeyInfo::KeyTy(V, ITy)]; if (!Slot) Slot = new ConstantInt(ITy, V); return Slot; } @@ -608,11 +608,9 @@ Constant *ConstantFP::getZeroValueForNegation(Type *Ty) { // ConstantFP accessors. ConstantFP* ConstantFP::get(LLVMContext &Context, const APFloat& V) { - DenseMapAPFloatKeyInfo::KeyTy Key(V); - LLVMContextImpl* pImpl = Context.pImpl; - ConstantFP *&Slot = pImpl->FPConstants[Key]; + ConstantFP *&Slot = pImpl->FPConstants[DenseMapAPFloatKeyInfo::KeyTy(V)]; if (!Slot) { Type *Ty; @@ -1128,6 +1126,7 @@ getWithOperands(ArrayRef Ops, Type *Ty) const { case Instruction::PtrToInt: case Instruction::IntToPtr: case Instruction::BitCast: + case Instruction::AddrSpaceCast: return ConstantExpr::getCast(getOpcode(), Ops[0], Ty); case Instruction::Select: return ConstantExpr::getSelect(Ops[0], Ops[1], Ops[2]); @@ -1391,7 +1390,7 @@ void BlockAddress::replaceUsesOfWithOnConstant(Value *From, Value *To, Use *U) { BasicBlock *NewBB = getBasicBlock(); if (U == &Op<0>()) - NewF = cast(To); + NewF = cast(To->stripPointerCasts()); else NewBB = cast(To); @@ -1463,6 +1462,7 @@ Constant *ConstantExpr::getCast(unsigned oc, Constant *C, Type *Ty) { case Instruction::PtrToInt: return getPtrToInt(C, Ty); case Instruction::IntToPtr: return getIntToPtr(C, Ty); case Instruction::BitCast: return getBitCast(C, Ty); + case Instruction::AddrSpaceCast: return getAddrSpaceCast(C, Ty); } } @@ -1491,10 +1491,26 @@ Constant *ConstantExpr::getPointerCast(Constant *S, Type *Ty) { if (Ty->isIntOrIntVectorTy()) return getPtrToInt(S, Ty); + + unsigned SrcAS = S->getType()->getPointerAddressSpace(); + if (Ty->isPtrOrPtrVectorTy() && SrcAS != Ty->getPointerAddressSpace()) + return getAddrSpaceCast(S, Ty); + + return getBitCast(S, Ty); +} + +Constant *ConstantExpr::getPointerBitCastOrAddrSpaceCast(Constant *S, + Type *Ty) { + assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast"); + assert(Ty->isPtrOrPtrVectorTy() && "Invalid cast"); + + if (S->getType()->getPointerAddressSpace() != Ty->getPointerAddressSpace()) + return getAddrSpaceCast(S, Ty); + return getBitCast(S, Ty); } -Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty, +Constant *ConstantExpr::getIntegerCast(Constant *C, Type *Ty, bool isSigned) { assert(C->getType()->isIntOrIntVectorTy() && Ty->isIntOrIntVectorTy() && "Invalid cast"); @@ -1664,6 +1680,13 @@ Constant *ConstantExpr::getBitCast(Constant *C, Type *DstTy) { return getFoldedCast(Instruction::BitCast, C, DstTy); } +Constant *ConstantExpr::getAddrSpaceCast(Constant *C, Type *DstTy) { + assert(CastInst::castIsValid(Instruction::AddrSpaceCast, C, DstTy) && + "Invalid constantexpr addrspacecast!"); + + return getFoldedCast(Instruction::AddrSpaceCast, C, DstTy); +} + Constant *ConstantExpr::get(unsigned Opcode, Constant *C1, Constant *C2, unsigned Flags) { // Check the operands for consistency first. @@ -1956,14 +1979,22 @@ Constant *ConstantExpr::getShuffleVector(Constant *V1, Constant *V2, Constant *ConstantExpr::getInsertValue(Constant *Agg, Constant *Val, ArrayRef Idxs) { + assert(Agg->getType()->isFirstClassType() && + "Non-first-class type for constant insertvalue expression"); + assert(ExtractValueInst::getIndexedType(Agg->getType(), Idxs) == Val->getType() && "insertvalue indices invalid!"); - assert(Agg->getType()->isFirstClassType() && - "Non-first-class type for constant insertvalue expression"); - Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs); - assert(FC && "insertvalue constant expr couldn't be folded!"); - return FC; + Type *ReqTy = Val->getType(); + + if (Constant *FC = ConstantFoldInsertValueInstruction(Agg, Val, Idxs)) + return FC; + + Constant *ArgVec[] = { Agg, Val }; + const ExprMapKeyType Key(Instruction::InsertValue, ArgVec, 0, 0, Idxs); + + LLVMContextImpl *pImpl = Agg->getContext().pImpl; + return pImpl->ExprConstants.getOrCreate(ReqTy, Key); } Constant *ConstantExpr::getExtractValue(Constant *Agg, @@ -1977,9 +2008,14 @@ Constant *ConstantExpr::getExtractValue(Constant *Agg, assert(Agg->getType()->isFirstClassType() && "Non-first-class type for constant extractvalue expression"); - Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs); - assert(FC && "ExtractValue constant expr couldn't be folded!"); - return FC; + if (Constant *FC = ConstantFoldExtractValueInstruction(Agg, Idxs)) + return FC; + + Constant *ArgVec[] = { Agg }; + const ExprMapKeyType Key(Instruction::ExtractValue, ArgVec, 0, 0, Idxs); + + LLVMContextImpl *pImpl = Agg->getContext().pImpl; + return pImpl->ExprConstants.getOrCreate(ReqTy, Key); } Constant *ConstantExpr::getNeg(Constant *C, bool HasNUW, bool HasNSW) { diff --git a/contrib/llvm/lib/IR/Core.cpp b/contrib/llvm/lib/IR/Core.cpp index 889d574..c70f459 100644 --- a/contrib/llvm/lib/IR/Core.cpp +++ b/contrib/llvm/lib/IR/Core.cpp @@ -58,6 +58,10 @@ void LLVMShutdown() { /*===-- Error handling ----------------------------------------------------===*/ +char *LLVMCreateMessage(const char *Message) { + return strdup(Message); +} + void LLVMDisposeMessage(char *Message) { free(Message); } @@ -93,7 +97,7 @@ LLVMModuleRef LLVMModuleCreateWithName(const char *ModuleID) { return wrap(new Module(ModuleID, getGlobalContext())); } -LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID, +LLVMModuleRef LLVMModuleCreateWithNameInContext(const char *ModuleID, LLVMContextRef C) { return wrap(new Module(ModuleID, *unwrap(C))); } @@ -143,6 +147,16 @@ LLVMBool LLVMPrintModuleToFile(LLVMModuleRef M, const char *Filename, return false; } +char *LLVMPrintModuleToString(LLVMModuleRef M) { + std::string buf; + raw_string_ostream os(buf); + + unwrap(M)->print(os, NULL); + os.flush(); + + return strdup(buf.c_str()); +} + /*--.. Operations on inline assembler ......................................--*/ void LLVMSetModuleInlineAsm(LLVMModuleRef M, const char *Asm) { unwrap(M)->setModuleInlineAsm(StringRef(Asm)); @@ -206,6 +220,20 @@ LLVMContextRef LLVMGetTypeContext(LLVMTypeRef Ty) { return wrap(&unwrap(Ty)->getContext()); } +void LLVMDumpType(LLVMTypeRef Ty) { + return unwrap(Ty)->dump(); +} + +char *LLVMPrintTypeToString(LLVMTypeRef Ty) { + std::string buf; + raw_string_ostream os(buf); + + unwrap(Ty)->print(os); + os.flush(); + + return strdup(buf.c_str()); +} + /*--.. Operations on integer types .........................................--*/ LLVMTypeRef LLVMInt1TypeInContext(LLVMContextRef C) { @@ -446,6 +474,16 @@ void LLVMDumpValue(LLVMValueRef Val) { unwrap(Val)->dump(); } +char* LLVMPrintValueToString(LLVMValueRef Val) { + std::string buf; + raw_string_ostream os(buf); + + unwrap(Val)->print(os); + os.flush(); + + return strdup(buf.c_str()); +} + void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) { unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal)); } @@ -677,7 +715,7 @@ LLVMValueRef LLVMConstStringInContext(LLVMContextRef C, const char *Str, return wrap(ConstantDataArray::getString(*unwrap(C), StringRef(Str, Length), DontNullTerminate == 0)); } -LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, +LLVMValueRef LLVMConstStructInContext(LLVMContextRef C, LLVMValueRef *ConstantVals, unsigned Count, LLVMBool Packed) { Constant **Elements = unwrap(ConstantVals, Count); @@ -995,6 +1033,12 @@ LLVMValueRef LLVMConstBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { unwrap(ToType))); } +LLVMValueRef LLVMConstAddrSpaceCast(LLVMValueRef ConstantVal, + LLVMTypeRef ToType) { + return wrap(ConstantExpr::getAddrSpaceCast(unwrap(ConstantVal), + unwrap(ToType))); +} + LLVMValueRef LLVMConstZExtOrBitCast(LLVMValueRef ConstantVal, LLVMTypeRef ToType) { return wrap(ConstantExpr::getZExtOrBitCast(unwrap(ConstantVal), @@ -1106,8 +1150,6 @@ LLVMLinkage LLVMGetLinkage(LLVMValueRef Global) { return LLVMLinkOnceAnyLinkage; case GlobalValue::LinkOnceODRLinkage: return LLVMLinkOnceODRLinkage; - case GlobalValue::LinkOnceODRAutoHideLinkage: - return LLVMLinkOnceODRAutoHideLinkage; case GlobalValue::WeakAnyLinkage: return LLVMWeakAnyLinkage; case GlobalValue::WeakODRLinkage: @@ -1152,7 +1194,8 @@ void LLVMSetLinkage(LLVMValueRef Global, LLVMLinkage Linkage) { GV->setLinkage(GlobalValue::LinkOnceODRLinkage); break; case LLVMLinkOnceODRAutoHideLinkage: - GV->setLinkage(GlobalValue::LinkOnceODRAutoHideLinkage); + DEBUG(errs() << "LLVMSetLinkage(): LLVMLinkOnceODRAutoHideLinkage is no " + "longer supported."); break; case LLVMWeakAnyLinkage: GV->setLinkage(GlobalValue::WeakAnyLinkage); @@ -1212,12 +1255,30 @@ void LLVMSetVisibility(LLVMValueRef Global, LLVMVisibility Viz) { ->setVisibility(static_cast(Viz)); } -unsigned LLVMGetAlignment(LLVMValueRef Global) { - return unwrap(Global)->getAlignment(); +/*--.. Operations on global variables, load and store instructions .........--*/ + +unsigned LLVMGetAlignment(LLVMValueRef V) { + Value *P = unwrap(V); + if (GlobalValue *GV = dyn_cast(P)) + return GV->getAlignment(); + if (LoadInst *LI = dyn_cast(P)) + return LI->getAlignment(); + if (StoreInst *SI = dyn_cast(P)) + return SI->getAlignment(); + + llvm_unreachable("only GlobalValue, LoadInst and StoreInst have alignment"); } -void LLVMSetAlignment(LLVMValueRef Global, unsigned Bytes) { - unwrap(Global)->setAlignment(Bytes); +void LLVMSetAlignment(LLVMValueRef V, unsigned Bytes) { + Value *P = unwrap(V); + if (GlobalValue *GV = dyn_cast(P)) + GV->setAlignment(Bytes); + else if (LoadInst *LI = dyn_cast(P)) + LI->setAlignment(Bytes); + else if (StoreInst *SI = dyn_cast(P)) + SI->setAlignment(Bytes); + else + llvm_unreachable("only GlobalValue, LoadInst and StoreInst have alignment"); } /*--.. Operations on global variables ......................................--*/ @@ -1549,7 +1610,7 @@ LLVMAttribute LLVMGetAttribute(LLVMValueRef Arg) { return (LLVMAttribute)A->getParent()->getAttributes(). Raw(A->getArgNo()+1); } - + void LLVMSetParamAlignment(LLVMValueRef Arg, unsigned align) { Argument *A = unwrap(Arg); @@ -1741,7 +1802,7 @@ void LLVMSetInstructionCallConv(LLVMValueRef Instr, unsigned CC) { llvm_unreachable("LLVMSetInstructionCallConv applies only to call and invoke!"); } -void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, +void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, LLVMAttribute PA) { CallSite Call = CallSite(unwrap(Instr)); AttrBuilder B(PA); @@ -1751,7 +1812,7 @@ void LLVMAddInstrAttribute(LLVMValueRef Instr, unsigned index, index, B))); } -void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, +void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, LLVMAttribute PA) { CallSite Call = CallSite(unwrap(Instr)); AttrBuilder B(PA); @@ -1761,7 +1822,7 @@ void LLVMRemoveInstrAttribute(LLVMValueRef Instr, unsigned index, index, B))); } -void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, +void LLVMSetInstrParamAlignment(LLVMValueRef Instr, unsigned index, unsigned align) { CallSite Call = CallSite(unwrap(Instr)); AttrBuilder B; @@ -2115,8 +2176,8 @@ LLVMValueRef LLVMBuildMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty)); AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy); - Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), - ITy, unwrap(Ty), AllocSize, + Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), + ITy, unwrap(Ty), AllocSize, 0, 0, ""); return wrap(unwrap(B)->Insert(Malloc, Twine(Name))); } @@ -2126,8 +2187,8 @@ LLVMValueRef LLVMBuildArrayMalloc(LLVMBuilderRef B, LLVMTypeRef Ty, Type* ITy = Type::getInt32Ty(unwrap(B)->GetInsertBlock()->getContext()); Constant* AllocSize = ConstantExpr::getSizeOf(unwrap(Ty)); AllocSize = ConstantExpr::getTruncOrBitCast(AllocSize, ITy); - Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), - ITy, unwrap(Ty), AllocSize, + Instruction* Malloc = CallInst::CreateMalloc(unwrap(B)->GetInsertBlock(), + ITy, unwrap(Ty), AllocSize, unwrap(Val), 0, ""); return wrap(unwrap(B)->Insert(Malloc, Twine(Name))); } @@ -2153,7 +2214,7 @@ LLVMValueRef LLVMBuildLoad(LLVMBuilderRef B, LLVMValueRef PointerVal, return wrap(unwrap(B)->CreateLoad(unwrap(PointerVal), Name)); } -LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val, +LLVMValueRef LLVMBuildStore(LLVMBuilderRef B, LLVMValueRef Val, LLVMValueRef PointerVal) { return wrap(unwrap(B)->CreateStore(unwrap(Val), unwrap(PointerVal))); } @@ -2263,6 +2324,11 @@ LLVMValueRef LLVMBuildBitCast(LLVMBuilderRef B, LLVMValueRef Val, return wrap(unwrap(B)->CreateBitCast(unwrap(Val), unwrap(DestTy), Name)); } +LLVMValueRef LLVMBuildAddrSpaceCast(LLVMBuilderRef B, LLVMValueRef Val, + LLVMTypeRef DestTy, const char *Name) { + return wrap(unwrap(B)->CreateAddrSpaceCast(unwrap(Val), unwrap(DestTy), Name)); +} + LLVMValueRef LLVMBuildZExtOrBitCast(LLVMBuilderRef B, LLVMValueRef Val, LLVMTypeRef DestTy, const char *Name) { return wrap(unwrap(B)->CreateZExtOrBitCast(unwrap(Val), unwrap(DestTy), @@ -2392,9 +2458,9 @@ LLVMValueRef LLVMBuildPtrDiff(LLVMBuilderRef B, LLVMValueRef LHS, return wrap(unwrap(B)->CreatePtrDiff(unwrap(LHS), unwrap(RHS), Name)); } -LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, - LLVMValueRef PTR, LLVMValueRef Val, - LLVMAtomicOrdering ordering, +LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, + LLVMValueRef PTR, LLVMValueRef Val, + LLVMAtomicOrdering ordering, LLVMBool singleThread) { AtomicRMWInst::BinOp intop; switch (op) { @@ -2417,14 +2483,14 @@ LLVMValueRef LLVMBuildAtomicRMW(LLVMBuilderRef B,LLVMAtomicRMWBinOp op, case LLVMAtomicOrderingMonotonic: intordering = Monotonic; break; case LLVMAtomicOrderingAcquire: intordering = Acquire; break; case LLVMAtomicOrderingRelease: intordering = Release; break; - case LLVMAtomicOrderingAcquireRelease: - intordering = AcquireRelease; + case LLVMAtomicOrderingAcquireRelease: + intordering = AcquireRelease; break; - case LLVMAtomicOrderingSequentiallyConsistent: - intordering = SequentiallyConsistent; + case LLVMAtomicOrderingSequentiallyConsistent: + intordering = SequentiallyConsistent; break; } - return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), + return wrap(unwrap(B)->CreateAtomicRMW(intop, unwrap(PTR), unwrap(Val), intordering, singleThread ? SingleThread : CrossThread)); } diff --git a/contrib/llvm/lib/IR/DIBuilder.cpp b/contrib/llvm/lib/IR/DIBuilder.cpp index 0980e80..c4a9f41 100644 --- a/contrib/llvm/lib/IR/DIBuilder.cpp +++ b/contrib/llvm/lib/IR/DIBuilder.cpp @@ -30,17 +30,24 @@ static Constant *GetTagConstant(LLVMContext &VMContext, unsigned Tag) { } DIBuilder::DIBuilder(Module &m) - : M(m), VMContext(M.getContext()), TheCU(0), TempEnumTypes(0), - TempRetainTypes(0), TempSubprograms(0), TempGVs(0), DeclareFn(0), - ValueFn(0) -{} + : M(m), VMContext(M.getContext()), TempEnumTypes(0), TempRetainTypes(0), + TempSubprograms(0), TempGVs(0), DeclareFn(0), ValueFn(0) {} /// finalize - Construct any deferred debug info descriptors. void DIBuilder::finalize() { DIArray Enums = getOrCreateArray(AllEnumTypes); DIType(TempEnumTypes).replaceAllUsesWith(Enums); - DIArray RetainTypes = getOrCreateArray(AllRetainTypes); + SmallVector RetainValues; + // Declarations and definitions of the same type may be retained. Some + // clients RAUW these pairs, leaving duplicates in the retained types + // list. Use a set to remove the duplicates while we transform the + // TrackingVHs back into Values. + SmallPtrSet RetainSet; + for (unsigned I = 0, E = AllRetainTypes.size(); I < E; I++) + if (RetainSet.insert(AllRetainTypes[I])) + RetainValues.push_back(AllRetainTypes[I]); + DIArray RetainTypes = getOrCreateArray(RetainValues); DIType(TempRetainTypes).replaceAllUsesWith(RetainTypes); DIArray SPs = getOrCreateArray(AllSubprograms); @@ -79,17 +86,18 @@ static MDNode *createFilePathPair(LLVMContext &VMContext, StringRef Filename, assert(!Filename.empty() && "Unable to create file without name"); Value *Pair[] = { MDString::get(VMContext, Filename), - MDString::get(VMContext, Directory), + MDString::get(VMContext, Directory) }; return MDNode::get(VMContext, Pair); } /// createCompileUnit - A CompileUnit provides an anchor for all debugging /// information generated during this instance of compilation. -void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, - StringRef Directory, StringRef Producer, - bool isOptimized, StringRef Flags, - unsigned RunTimeVer, StringRef SplitName) { +DICompileUnit DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, + StringRef Directory, + StringRef Producer, bool isOptimized, + StringRef Flags, unsigned RunTimeVer, + StringRef SplitName) { assert(((Lang <= dwarf::DW_LANG_Python && Lang >= dwarf::DW_LANG_C89) || (Lang <= dwarf::DW_LANG_hi_user && Lang >= dwarf::DW_LANG_lo_user)) && "Invalid Language tag"); @@ -121,23 +129,70 @@ void DIBuilder::createCompileUnit(unsigned Lang, StringRef Filename, TempImportedModules, MDString::get(VMContext, SplitName) }; - TheCU = DICompileUnit(MDNode::get(VMContext, Elts)); + + MDNode *CUNode = MDNode::get(VMContext, Elts); // Create a named metadata so that it is easier to find cu in a module. NamedMDNode *NMD = M.getOrInsertNamedMetadata("llvm.dbg.cu"); - NMD->addOperand(TheCU); + NMD->addOperand(CUNode); + + return DICompileUnit(CUNode); +} + +static DIImportedEntity +createImportedModule(LLVMContext &C, DIScope Context, DIDescriptor NS, + unsigned Line, StringRef Name, + SmallVectorImpl &AllImportedModules) { + const MDNode *R; + if (Name.empty()) { + Value *Elts[] = { + GetTagConstant(C, dwarf::DW_TAG_imported_module), + Context, + NS, + ConstantInt::get(Type::getInt32Ty(C), Line), + }; + R = MDNode::get(C, Elts); + } else { + Value *Elts[] = { + GetTagConstant(C, dwarf::DW_TAG_imported_module), + Context, + NS, + ConstantInt::get(Type::getInt32Ty(C), Line), + MDString::get(C, Name) + }; + R = MDNode::get(C, Elts); + } + DIImportedEntity M(R); + assert(M.Verify() && "Imported module should be valid"); + AllImportedModules.push_back(M); + return M; } -DIImportedModule DIBuilder::createImportedModule(DIScope Context, - DINameSpace NS, - unsigned Line) { +DIImportedEntity DIBuilder::createImportedModule(DIScope Context, + DINameSpace NS, unsigned Line, + StringRef Name) { + return ::createImportedModule(VMContext, Context, NS, Line, Name, + AllImportedModules); +} + +DIImportedEntity DIBuilder::createImportedModule(DIScope Context, + DIImportedEntity NS, + unsigned Line, + StringRef Name) { + return ::createImportedModule(VMContext, Context, NS, Line, Name, + AllImportedModules); +} + +DIImportedEntity DIBuilder::createImportedDeclaration(DIScope Context, + DIDescriptor Decl, + unsigned Line) { Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_imported_module), + GetTagConstant(VMContext, dwarf::DW_TAG_imported_declaration), Context, - NS, + Decl, ConstantInt::get(Type::getInt32Ty(VMContext), Line), }; - DIImportedModule M(MDNode::get(VMContext, Elts)); + DIImportedEntity M(MDNode::get(VMContext, Elts)); assert(M.Verify() && "Imported module should be valid"); AllImportedModules.push_back(M); return M; @@ -154,7 +209,7 @@ DIFile DIBuilder::createFile(StringRef Filename, StringRef Directory) { } /// createEnumerator - Create a single enumerator value. -DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { +DIEnumerator DIBuilder::createEnumerator(StringRef Name, int64_t Val) { assert(!Name.empty() && "Unable to create enumerator without name"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumerator), @@ -164,15 +219,15 @@ DIEnumerator DIBuilder::createEnumerator(StringRef Name, uint64_t Val) { return DIEnumerator(MDNode::get(VMContext, Elts)); } -/// createNullPtrType - Create C++0x nullptr type. -DIType DIBuilder::createNullPtrType(StringRef Name) { +/// \brief Create a DWARF unspecified type. +DIBasicType DIBuilder::createUnspecifiedType(StringRef Name) { assert(!Name.empty() && "Unable to create type without name"); - // nullptr is encoded in DIBasicType format. Line number, filename, - // ,size, alignment, offset and flags are always empty here. + // Unspecified types are encoded in DIBasicType format. Line number, filename, + // size, alignment, offset and flags are always empty here. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_unspecified_type), NULL, // Filename - NULL, //TheCU, + NULL, // Unused MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size @@ -181,7 +236,12 @@ DIType DIBuilder::createNullPtrType(StringRef Name) { ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags; ConstantInt::get(Type::getInt32Ty(VMContext), 0) // Encoding }; - return DIType(MDNode::get(VMContext, Elts)); + return DIBasicType(MDNode::get(VMContext, Elts)); +} + +/// \brief Create C++11 nullptr type. +DIBasicType DIBuilder::createNullPtrType() { + return createUnspecifiedType("decltype(nullptr)"); } /// createBasicType - Create debugging information entry for a basic @@ -195,7 +255,7 @@ DIBuilder::createBasicType(StringRef Name, uint64_t SizeInBits, Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_base_type), NULL, // File/directory name - NULL, //TheCU, + NULL, // Unused MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), @@ -214,14 +274,14 @@ DIDerivedType DIBuilder::createQualifiedType(unsigned Tag, DIType FromTy) { Value *Elts[] = { GetTagConstant(VMContext, Tag), NULL, // Filename - NULL, //TheCU, + NULL, // Unused MDString::get(VMContext, StringRef()), // Empty name. ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags - FromTy + FromTy.getRef() }; return DIDerivedType(MDNode::get(VMContext, Elts)); } @@ -234,14 +294,14 @@ DIBuilder::createPointerType(DIType PointeeTy, uint64_t SizeInBits, Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_pointer_type), NULL, // Filename - NULL, //TheCU, + NULL, // Unused MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags - PointeeTy + PointeeTy.getRef() }; return DIDerivedType(MDNode::get(VMContext, Elts)); } @@ -252,15 +312,15 @@ DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_ptr_to_member_type), NULL, // Filename - NULL, //TheCU, + NULL, // Unused NULL, ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line - ConstantInt::get(Type::getInt64Ty(VMContext), 0), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags - PointeeTy, - Base + PointeeTy.getRef(), + Base.getRef() }; return DIDerivedType(MDNode::get(VMContext, Elts)); } @@ -268,7 +328,7 @@ DIDerivedType DIBuilder::createMemberPointerType(DIType PointeeTy, /// createReferenceType - Create debugging information entry for a reference /// type. DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { - assert(RTy.Verify() && "Unable to create reference type"); + assert(RTy.isType() && "Unable to create reference type"); // References are encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, Tag), @@ -280,7 +340,7 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags - RTy + RTy.getRef() }; return DIDerivedType(MDNode::get(VMContext, Elts)); } @@ -289,117 +349,120 @@ DIDerivedType DIBuilder::createReferenceType(unsigned Tag, DIType RTy) { DIDerivedType DIBuilder::createTypedef(DIType Ty, StringRef Name, DIFile File, unsigned LineNo, DIDescriptor Context) { // typedefs are encoded in DIDerivedType format. - assert(Ty.Verify() && "Invalid typedef type!"); + assert(Ty.isType() && "Invalid typedef type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_typedef), File.getFileNode(), - getNonCompileUnitScope(Context), + DIScope(getNonCompileUnitScope(Context)).getRef(), MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags - Ty + Ty.getRef() }; return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createFriend - Create debugging information entry for a 'friend'. -DIType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { +DIDerivedType DIBuilder::createFriend(DIType Ty, DIType FriendTy) { // typedefs are encoded in DIDerivedType format. - assert(Ty.Verify() && "Invalid type!"); - assert(FriendTy.Verify() && "Invalid friend type!"); + assert(Ty.isType() && "Invalid type!"); + assert(FriendTy.isType() && "Invalid friend type!"); Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_friend), NULL, - Ty, + Ty.getRef(), NULL, // Name ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags - FriendTy + FriendTy.getRef() }; - return DIType(MDNode::get(VMContext, Elts)); + return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createInheritance - Create debugging information entry to establish /// inheritance relationship between two types. -DIDerivedType DIBuilder::createInheritance( - DIType Ty, DIType BaseTy, uint64_t BaseOffset, unsigned Flags) { - assert(Ty.Verify() && "Unable to create inheritance"); +DIDerivedType DIBuilder::createInheritance(DIType Ty, DIType BaseTy, + uint64_t BaseOffset, + unsigned Flags) { + assert(Ty.isType() && "Unable to create inheritance"); // TAG_inheritance is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_inheritance), NULL, - Ty, + Ty.getRef(), NULL, // Name ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align ConstantInt::get(Type::getInt64Ty(VMContext), BaseOffset), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - BaseTy + BaseTy.getRef() }; return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createMemberType - Create debugging information entry for a member. -DIDerivedType DIBuilder::createMemberType( - DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, uint64_t OffsetInBits, - unsigned Flags, DIType Ty) { +DIDerivedType DIBuilder::createMemberType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty) { // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), File.getFileNode(), - getNonCompileUnitScope(Scope), + DIScope(getNonCompileUnitScope(Scope)).getRef(), MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt64Ty(VMContext), OffsetInBits), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - Ty + Ty.getRef() }; return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createStaticMemberType - Create debugging information entry for a /// C++ static data member. -DIType DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name, - DIFile File, unsigned LineNumber, - DIType Ty, unsigned Flags, - llvm::Value *Val) { +DIDerivedType +DIBuilder::createStaticMemberType(DIDescriptor Scope, StringRef Name, + DIFile File, unsigned LineNumber, + DIType Ty, unsigned Flags, + llvm::Value *Val) { // TAG_member is encoded in DIDerivedType format. Flags |= DIDescriptor::FlagStaticMember; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), File.getFileNode(), - getNonCompileUnitScope(Scope), + DIScope(getNonCompileUnitScope(Scope)).getRef(), MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), - ConstantInt::get(Type::getInt64Ty(VMContext), 0/*SizeInBits*/), - ConstantInt::get(Type::getInt64Ty(VMContext), 0/*AlignInBits*/), - ConstantInt::get(Type::getInt64Ty(VMContext), 0/*OffsetInBits*/), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - Ty, + Ty.getRef(), Val }; - return DIType(MDNode::get(VMContext, Elts)); + return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createObjCIVar - Create debugging information entry for Objective-C /// instance variable. -DIType DIBuilder::createObjCIVar(StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType Ty, StringRef PropertyName, - StringRef GetterName, StringRef SetterName, - unsigned PropertyAttributes) { +DIDerivedType +DIBuilder::createObjCIVar(StringRef Name, DIFile File, unsigned LineNumber, + uint64_t SizeInBits, uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, DIType Ty, + StringRef PropertyName, StringRef GetterName, + StringRef SetterName, unsigned PropertyAttributes) { // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), @@ -417,16 +480,17 @@ DIType DIBuilder::createObjCIVar(StringRef Name, MDString::get(VMContext, SetterName), ConstantInt::get(Type::getInt32Ty(VMContext), PropertyAttributes) }; - return DIType(MDNode::get(VMContext, Elts)); + return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createObjCIVar - Create debugging information entry for Objective-C /// instance variable. -DIType DIBuilder::createObjCIVar(StringRef Name, - DIFile File, unsigned LineNumber, - uint64_t SizeInBits, uint64_t AlignInBits, - uint64_t OffsetInBits, unsigned Flags, - DIType Ty, MDNode *PropertyNode) { +DIDerivedType DIBuilder::createObjCIVar(StringRef Name, DIFile File, + unsigned LineNumber, + uint64_t SizeInBits, + uint64_t AlignInBits, + uint64_t OffsetInBits, unsigned Flags, + DIType Ty, MDNode *PropertyNode) { // TAG_member is encoded in DIDerivedType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_member), @@ -441,17 +505,15 @@ DIType DIBuilder::createObjCIVar(StringRef Name, Ty, PropertyNode }; - return DIType(MDNode::get(VMContext, Elts)); + return DIDerivedType(MDNode::get(VMContext, Elts)); } /// createObjCProperty - Create debugging information entry for Objective-C /// property. -DIObjCProperty DIBuilder::createObjCProperty(StringRef Name, - DIFile File, unsigned LineNumber, - StringRef GetterName, - StringRef SetterName, - unsigned PropertyAttributes, - DIType Ty) { +DIObjCProperty +DIBuilder::createObjCProperty(StringRef Name, DIFile File, unsigned LineNumber, + StringRef GetterName, StringRef SetterName, + unsigned PropertyAttributes, DIType Ty) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_APPLE_property), MDString::get(VMContext, Name), @@ -473,9 +535,9 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, unsigned ColumnNo) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_template_type_parameter), - getNonCompileUnitScope(Context), + DIScope(getNonCompileUnitScope(Context)).getRef(), MDString::get(VMContext, Name), - Ty, + Ty.getRef(), File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) @@ -483,19 +545,18 @@ DIBuilder::createTemplateTypeParameter(DIDescriptor Context, StringRef Name, return DITemplateTypeParameter(MDNode::get(VMContext, Elts)); } -/// createTemplateValueParameter - Create debugging information for template -/// value parameter. DITemplateValueParameter -DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, - DIType Ty, uint64_t Val, - MDNode *File, unsigned LineNo, +DIBuilder::createTemplateValueParameter(unsigned Tag, DIDescriptor Context, + StringRef Name, DIType Ty, + Value *Val, MDNode *File, + unsigned LineNo, unsigned ColumnNo) { Value *Elts[] = { - GetTagConstant(VMContext, dwarf::DW_TAG_template_value_parameter), - getNonCompileUnitScope(Context), + GetTagConstant(VMContext, Tag), + DIScope(getNonCompileUnitScope(Context)).getRef(), MDString::get(VMContext, Name), - Ty, - ConstantInt::get(Type::getInt64Ty(VMContext), Val), + Ty.getRef(), + Val, File, ConstantInt::get(Type::getInt32Ty(VMContext), LineNo), ConstantInt::get(Type::getInt32Ty(VMContext), ColumnNo) @@ -503,6 +564,38 @@ DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, return DITemplateValueParameter(MDNode::get(VMContext, Elts)); } +/// createTemplateValueParameter - Create debugging information for template +/// value parameter. +DITemplateValueParameter +DIBuilder::createTemplateValueParameter(DIDescriptor Context, StringRef Name, + DIType Ty, Value *Val, + MDNode *File, unsigned LineNo, + unsigned ColumnNo) { + return createTemplateValueParameter(dwarf::DW_TAG_template_value_parameter, + Context, Name, Ty, Val, File, LineNo, + ColumnNo); +} + +DITemplateValueParameter +DIBuilder::createTemplateTemplateParameter(DIDescriptor Context, StringRef Name, + DIType Ty, StringRef Val, + MDNode *File, unsigned LineNo, + unsigned ColumnNo) { + return createTemplateValueParameter( + dwarf::DW_TAG_GNU_template_template_param, Context, Name, Ty, + MDString::get(VMContext, Val), File, LineNo, ColumnNo); +} + +DITemplateValueParameter +DIBuilder::createTemplateParameterPack(DIDescriptor Context, StringRef Name, + DIType Ty, DIArray Val, + MDNode *File, unsigned LineNo, + unsigned ColumnNo) { + return createTemplateValueParameter(dwarf::DW_TAG_GNU_template_parameter_pack, + Context, Name, Ty, Val, File, LineNo, + ColumnNo); +} + /// createClassType - Create debugging information entry for a class. DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, DIFile File, unsigned LineNumber, @@ -511,29 +604,34 @@ DICompositeType DIBuilder::createClassType(DIDescriptor Context, StringRef Name, uint64_t OffsetInBits, unsigned Flags, DIType DerivedFrom, DIArray Elements, - MDNode *VTableHolder, - MDNode *TemplateParams) { - assert((!Context || Context.Verify()) && + DIType VTableHolder, + MDNode *TemplateParams, + StringRef UniqueIdentifier) { + assert((!Context || Context.isScope() || Context.isType()) && "createClassType should be called with a valid Context"); // TAG_class_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_class_type), File.getFileNode(), - getNonCompileUnitScope(Context), + DIScope(getNonCompileUnitScope(Context)).getRef(), MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt32Ty(VMContext), OffsetInBits), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom, + DerivedFrom.getRef(), Elements, ConstantInt::get(Type::getInt32Ty(VMContext), 0), - VTableHolder, - TemplateParams + VTableHolder.getRef(), + TemplateParams, + UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier) }; DICompositeType R(MDNode::get(VMContext, Elts)); - assert(R.Verify() && "createClassType should return a verifiable DIType"); + assert(R.isCompositeType() && + "createClassType should return a DICompositeType"); + if (!UniqueIdentifier.empty()) + retainType(R); return R; } @@ -546,26 +644,31 @@ DICompositeType DIBuilder::createStructType(DIDescriptor Context, unsigned Flags, DIType DerivedFrom, DIArray Elements, unsigned RunTimeLang, - MDNode *VTableHolder) { + DIType VTableHolder, + StringRef UniqueIdentifier) { // TAG_structure_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_structure_type), File.getFileNode(), - getNonCompileUnitScope(Context), + DIScope(getNonCompileUnitScope(Context)).getRef(), MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), ConstantInt::get(Type::getInt32Ty(VMContext), 0), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), - DerivedFrom, + DerivedFrom.getRef(), Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), - VTableHolder, + VTableHolder.getRef(), NULL, + UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier) }; DICompositeType R(MDNode::get(VMContext, Elts)); - assert(R.Verify() && "createStructType should return a verifiable DIType"); + assert(R.isCompositeType() && + "createStructType should return a DICompositeType"); + if (!UniqueIdentifier.empty()) + retainType(R); return R; } @@ -575,45 +678,52 @@ DICompositeType DIBuilder::createUnionType(DIDescriptor Scope, StringRef Name, uint64_t SizeInBits, uint64_t AlignInBits, unsigned Flags, DIArray Elements, - unsigned RunTimeLang) { + unsigned RunTimeLang, + StringRef UniqueIdentifier) { // TAG_union_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_union_type), File.getFileNode(), - getNonCompileUnitScope(Scope), + DIScope(getNonCompileUnitScope(Scope)).getRef(), MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), Flags), NULL, Elements, ConstantInt::get(Type::getInt32Ty(VMContext), RunTimeLang), - Constant::getNullValue(Type::getInt32Ty(VMContext)), - NULL + NULL, + NULL, + UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier) }; - return DICompositeType(MDNode::get(VMContext, Elts)); + DICompositeType R(MDNode::get(VMContext, Elts)); + if (!UniqueIdentifier.empty()) + retainType(R); + return R; } /// createSubroutineType - Create subroutine type. -DICompositeType -DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { +DICompositeType DIBuilder::createSubroutineType(DIFile File, + DIArray ParameterTypes) { // TAG_subroutine_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subroutine_type), Constant::getNullValue(Type::getInt32Ty(VMContext)), - Constant::getNullValue(Type::getInt32Ty(VMContext)), + NULL, MDString::get(VMContext, ""), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), - ConstantInt::get(Type::getInt64Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Size + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Align + ConstantInt::get(Type::getInt64Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags NULL, ParameterTypes, ConstantInt::get(Type::getInt32Ty(VMContext), 0), - Constant::getNullValue(Type::getInt32Ty(VMContext)) + NULL, + NULL, + NULL // Type Identifer }; return DICompositeType(MDNode::get(VMContext, Elts)); } @@ -623,26 +733,30 @@ DIBuilder::createSubroutineType(DIFile File, DIArray ParameterTypes) { DICompositeType DIBuilder::createEnumerationType( DIDescriptor Scope, StringRef Name, DIFile File, unsigned LineNumber, uint64_t SizeInBits, uint64_t AlignInBits, DIArray Elements, - DIType UnderlyingType) { + DIType UnderlyingType, StringRef UniqueIdentifier) { // TAG_enumeration_type is encoded in DICompositeType format. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_enumeration_type), File.getFileNode(), - getNonCompileUnitScope(Scope), + DIScope(getNonCompileUnitScope(Scope)).getRef(), MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), LineNumber), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - UnderlyingType, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + UnderlyingType.getRef(), Elements, ConstantInt::get(Type::getInt32Ty(VMContext), 0), - Constant::getNullValue(Type::getInt32Ty(VMContext)) + NULL, + NULL, + UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier) }; - MDNode *Node = MDNode::get(VMContext, Elts); - AllEnumTypes.push_back(Node); - return DICompositeType(Node); + DICompositeType CTy(MDNode::get(VMContext, Elts)); + AllEnumTypes.push_back(CTy); + if (!UniqueIdentifier.empty()) + retainType(CTy); + return CTy; } /// createArrayType - Create debugging information entry for an array. @@ -652,42 +766,45 @@ DICompositeType DIBuilder::createArrayType(uint64_t Size, uint64_t AlignInBits, Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_array_type), NULL, // Filename/Directory, - NULL, //TheCU, + NULL, // Unused MDString::get(VMContext, ""), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), Size), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - Ty, + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Flags + Ty.getRef(), Subscripts, ConstantInt::get(Type::getInt32Ty(VMContext), 0), - Constant::getNullValue(Type::getInt32Ty(VMContext)) + NULL, + NULL, + NULL // Type Identifer }; return DICompositeType(MDNode::get(VMContext, Elts)); } /// createVectorType - Create debugging information entry for a vector. -DIType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, - DIType Ty, DIArray Subscripts) { - +DICompositeType DIBuilder::createVectorType(uint64_t Size, uint64_t AlignInBits, + DIType Ty, DIArray Subscripts) { // A vector is an array type with the FlagVector flag applied. Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_array_type), NULL, // Filename/Directory, - NULL, //TheCU, + NULL, // Unused MDString::get(VMContext, ""), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Line ConstantInt::get(Type::getInt64Ty(VMContext), Size), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset ConstantInt::get(Type::getInt32Ty(VMContext), DIType::FlagVector), - Ty, + Ty.getRef(), Subscripts, ConstantInt::get(Type::getInt32Ty(VMContext), 0), - Constant::getNullValue(Type::getInt32Ty(VMContext)) + NULL, + NULL, + NULL // Type Identifer }; - return DIType(MDNode::get(VMContext, Elts)); + return DICompositeType(MDNode::get(VMContext, Elts)); } /// createArtificialType - Create a new DIType with "artificial" flag set. @@ -698,17 +815,14 @@ DIType DIBuilder::createArtificialType(DIType Ty) { SmallVector Elts; MDNode *N = Ty; assert (N && "Unexpected input DIType!"); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - if (Value *V = N->getOperand(i)) - Elts.push_back(V); - else - Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); - } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Elts.push_back(N->getOperand(i)); unsigned CurFlags = Ty.getFlags(); CurFlags = CurFlags | DIType::FlagArtificial; // Flags are stored at this slot. + // FIXME: Add an enum for this magic value. Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); return DIType(MDNode::get(VMContext, Elts)); @@ -723,17 +837,14 @@ DIType DIBuilder::createObjectPointerType(DIType Ty) { SmallVector Elts; MDNode *N = Ty; assert (N && "Unexpected input DIType!"); - for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) { - if (Value *V = N->getOperand(i)) - Elts.push_back(V); - else - Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))); - } + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + Elts.push_back(N->getOperand(i)); unsigned CurFlags = Ty.getFlags(); CurFlags = CurFlags | (DIType::FlagObjectPointer | DIType::FlagArtificial); // Flags are stored at this slot. + // FIXME: Add an enum for this magic value. Elts[8] = ConstantInt::get(Type::getInt32Ty(VMContext), CurFlags); return DIType(MDNode::get(VMContext, Elts)); @@ -742,7 +853,7 @@ DIType DIBuilder::createObjectPointerType(DIType Ty) { /// retainType - Retain DIType in a module even if it is not referenced /// through debug info anchors. void DIBuilder::retainType(DIType T) { - AllRetainTypes.push_back(T); + AllRetainTypes.push_back(TrackingVH(T)); } /// createUnspecifiedParameter - Create unspeicified type descriptor @@ -756,31 +867,36 @@ DIDescriptor DIBuilder::createUnspecifiedParameter() { /// createForwardDecl - Create a temporary forward-declared type that /// can be RAUW'd if the full type is seen. -DIType DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, - DIDescriptor Scope, DIFile F, - unsigned Line, unsigned RuntimeLang, - uint64_t SizeInBits, - uint64_t AlignInBits) { +DICompositeType +DIBuilder::createForwardDecl(unsigned Tag, StringRef Name, DIDescriptor Scope, + DIFile F, unsigned Line, unsigned RuntimeLang, + uint64_t SizeInBits, uint64_t AlignInBits, + StringRef UniqueIdentifier) { // Create a temporary MDNode. Value *Elts[] = { GetTagConstant(VMContext, Tag), F.getFileNode(), - getNonCompileUnitScope(Scope), + DIScope(getNonCompileUnitScope(Scope)).getRef(), MDString::get(VMContext, Name), ConstantInt::get(Type::getInt32Ty(VMContext), Line), ConstantInt::get(Type::getInt64Ty(VMContext), SizeInBits), ConstantInt::get(Type::getInt64Ty(VMContext), AlignInBits), - ConstantInt::get(Type::getInt32Ty(VMContext), 0), - ConstantInt::get(Type::getInt32Ty(VMContext), - DIDescriptor::FlagFwdDecl), + ConstantInt::get(Type::getInt32Ty(VMContext), 0), // Offset + ConstantInt::get(Type::getInt32Ty(VMContext), DIDescriptor::FlagFwdDecl), NULL, DIArray(), - ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang) + ConstantInt::get(Type::getInt32Ty(VMContext), RuntimeLang), + NULL, + NULL, //TemplateParams + UniqueIdentifier.empty() ? NULL : MDString::get(VMContext, UniqueIdentifier) }; MDNode *Node = MDNode::getTemporary(VMContext, Elts); - assert(DIType(Node).Verify() && - "createForwardDecl result should be verifiable"); - return DIType(Node); + DICompositeType RetTy(Node); + assert(RetTy.isCompositeType() && + "createForwardDecl result should be a DIType"); + if (!UniqueIdentifier.empty()) + retainType(RetTy); + return RetTy; } /// getOrCreateArray - Get a DIArray, create one if required. @@ -805,10 +921,11 @@ DISubrange DIBuilder::getOrCreateSubrange(int64_t Lo, int64_t Count) { } /// \brief Create a new descriptor for the specified global. -DIGlobalVariable DIBuilder:: -createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile F, - unsigned LineNumber, DIType Ty, bool isLocalToUnit, - Value *Val) { +DIGlobalVariable DIBuilder::createGlobalVariable(StringRef Name, + StringRef LinkageName, + DIFile F, unsigned LineNumber, + DIType Ty, bool isLocalToUnit, + Value *Val) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -830,19 +947,22 @@ createGlobalVariable(StringRef Name, StringRef LinkageName, DIFile F, } /// \brief Create a new descriptor for the specified global. -DIGlobalVariable DIBuilder:: -createGlobalVariable(StringRef Name, DIFile F, unsigned LineNumber, - DIType Ty, bool isLocalToUnit, Value *Val) { +DIGlobalVariable DIBuilder::createGlobalVariable(StringRef Name, DIFile F, + unsigned LineNumber, DIType Ty, + bool isLocalToUnit, + Value *Val) { return createGlobalVariable(Name, Name, F, LineNumber, Ty, isLocalToUnit, Val); } /// createStaticVariable - Create a new descriptor for the specified static /// variable. -DIGlobalVariable DIBuilder:: -createStaticVariable(DIDescriptor Context, StringRef Name, - StringRef LinkageName, DIFile F, unsigned LineNumber, - DIType Ty, bool isLocalToUnit, Value *Val, MDNode *Decl) { +DIGlobalVariable DIBuilder::createStaticVariable(DIDescriptor Context, + StringRef Name, + StringRef LinkageName, + DIFile F, unsigned LineNumber, + DIType Ty, bool isLocalToUnit, + Value *Val, MDNode *Decl) { Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_variable), Constant::getNullValue(Type::getInt32Ty(VMContext)), @@ -870,9 +990,9 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, bool AlwaysPreserve, unsigned Flags, unsigned ArgNo) { DIDescriptor Context(getNonCompileUnitScope(Scope)); - assert((!Context || Context.Verify()) && + assert((!Context || Context.isScope()) && "createLocalVariable should be called with a valid Context"); - assert(Ty.Verify() && + assert(Ty.isType() && "createLocalVariable should be called with a valid type"); Value *Elts[] = { GetTagConstant(VMContext, Tag), @@ -893,9 +1013,10 @@ DIVariable DIBuilder::createLocalVariable(unsigned Tag, DIDescriptor Scope, NamedMDNode *FnLocals = getOrInsertFnSpecificMDNode(M, Fn); FnLocals->addOperand(Node); } - assert(DIVariable(Node).Verify() && - "createLocalVariable should return a verifiable DIVariable"); - return DIVariable(Node); + DIVariable RetVar(Node); + assert(RetVar.isVariable() && + "createLocalVariable should return a valid DIVariable"); + return RetVar; } /// createComplexVariable - Create a new descriptor for the specified variable @@ -921,22 +1042,38 @@ DIVariable DIBuilder::createComplexVariable(unsigned Tag, DIDescriptor Scope, } /// createFunction - Create a new descriptor for the specified function. -DISubprogram DIBuilder::createFunction(DIDescriptor Context, - StringRef Name, - StringRef LinkageName, - DIFile File, unsigned LineNo, - DIType Ty, +/// FIXME: this is added for dragonegg. Once we update dragonegg +/// to call resolve function, this will be removed. +DISubprogram DIBuilder::createFunction(DIScopeRef Context, StringRef Name, + StringRef LinkageName, DIFile File, + unsigned LineNo, DICompositeType Ty, + bool isLocalToUnit, bool isDefinition, + unsigned ScopeLine, unsigned Flags, + bool isOptimized, Function *Fn, + MDNode *TParams, MDNode *Decl) { + // dragonegg does not generate identifier for types, so using an empty map + // to resolve the context should be fine. + DITypeIdentifierMap EmptyMap; + return createFunction(Context.resolve(EmptyMap), Name, LinkageName, File, + LineNo, Ty, isLocalToUnit, isDefinition, ScopeLine, + Flags, isOptimized, Fn, TParams, Decl); +} + +/// createFunction - Create a new descriptor for the specified function. +DISubprogram DIBuilder::createFunction(DIDescriptor Context, StringRef Name, + StringRef LinkageName, DIFile File, + unsigned LineNo, DICompositeType Ty, bool isLocalToUnit, bool isDefinition, - unsigned ScopeLine, - unsigned Flags, bool isOptimized, - Function *Fn, - MDNode *TParams, - MDNode *Decl) { + unsigned ScopeLine, unsigned Flags, + bool isOptimized, Function *Fn, + MDNode *TParams, MDNode *Decl) { + assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type && + "function types should be subroutines"); Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), File.getFileNode(), - getNonCompileUnitScope(Context), + DIScope(getNonCompileUnitScope(Context)).getRef(), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), @@ -961,29 +1098,29 @@ DISubprogram DIBuilder::createFunction(DIDescriptor Context, if (isDefinition) AllSubprograms.push_back(Node); DISubprogram S(Node); - assert(S.Verify() && "createFunction should return a valid DISubprogram"); + assert(S.isSubprogram() && "createFunction should return a valid DISubprogram"); return S; } /// createMethod - Create a new descriptor for the specified C++ method. -DISubprogram DIBuilder::createMethod(DIDescriptor Context, - StringRef Name, - StringRef LinkageName, - DIFile F, - unsigned LineNo, DIType Ty, - bool isLocalToUnit, - bool isDefinition, +DISubprogram DIBuilder::createMethod(DIDescriptor Context, StringRef Name, + StringRef LinkageName, DIFile F, + unsigned LineNo, DICompositeType Ty, + bool isLocalToUnit, bool isDefinition, unsigned VK, unsigned VIndex, - MDNode *VTableHolder, - unsigned Flags, - bool isOptimized, - Function *Fn, + DIType VTableHolder, unsigned Flags, + bool isOptimized, Function *Fn, MDNode *TParam) { + assert(Ty.getTag() == dwarf::DW_TAG_subroutine_type && + "function types should be subroutines"); + assert(getNonCompileUnitScope(Context) && + "Methods should have both a Context and a context that isn't " + "the compile unit."); Value *TElts[] = { GetTagConstant(VMContext, DW_TAG_base_type) }; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_subprogram), F.getFileNode(), - getNonCompileUnitScope(Context), + DIScope(Context).getRef(), MDString::get(VMContext, Name), MDString::get(VMContext, Name), MDString::get(VMContext, LinkageName), @@ -991,9 +1128,9 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, Ty, ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit), ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition), - ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK), + ConstantInt::get(Type::getInt32Ty(VMContext), VK), ConstantInt::get(Type::getInt32Ty(VMContext), VIndex), - VTableHolder, + VTableHolder.getRef(), ConstantInt::get(Type::getInt32Ty(VMContext), Flags), ConstantInt::get(Type::getInt1Ty(VMContext), isOptimized), Fn, @@ -1007,7 +1144,7 @@ DISubprogram DIBuilder::createMethod(DIDescriptor Context, if (isDefinition) AllSubprograms.push_back(Node); DISubprogram S(Node); - assert(S.Verify() && "createMethod should return a valid DISubprogram"); + assert(S.isSubprogram() && "createMethod should return a valid DISubprogram"); return S; } @@ -1046,7 +1183,7 @@ DILexicalBlockFile DIBuilder::createLexicalBlockFile(DIDescriptor Scope, DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, unsigned Line, unsigned Col) { - // Defeat MDNode uniqing for lexical blocks by using unique id. + // Defeat MDNode uniquing for lexical blocks by using unique id. static unsigned int unique_id = 0; Value *Elts[] = { GetTagConstant(VMContext, dwarf::DW_TAG_lexical_block), @@ -1066,7 +1203,8 @@ DILexicalBlock DIBuilder::createLexicalBlock(DIDescriptor Scope, DIFile File, Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, Instruction *InsertBefore) { assert(Storage && "no storage passed to dbg.declare"); - assert(VarInfo.Verify() && "empty DIVariable passed to dbg.declare"); + assert(VarInfo.isVariable() && + "empty or invalid DIVariable passed to dbg.declare"); if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); @@ -1078,7 +1216,8 @@ Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, Instruction *DIBuilder::insertDeclare(Value *Storage, DIVariable VarInfo, BasicBlock *InsertAtEnd) { assert(Storage && "no storage passed to dbg.declare"); - assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.declare"); + assert(VarInfo.isVariable() && + "empty or invalid DIVariable passed to dbg.declare"); if (!DeclareFn) DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare); @@ -1097,7 +1236,8 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable VarInfo, Instruction *InsertBefore) { assert(V && "no value passed to dbg.value"); - assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value"); + assert(VarInfo.isVariable() && + "empty or invalid DIVariable passed to dbg.value"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); @@ -1112,7 +1252,8 @@ Instruction *DIBuilder::insertDbgValueIntrinsic(Value *V, uint64_t Offset, DIVariable VarInfo, BasicBlock *InsertAtEnd) { assert(V && "no value passed to dbg.value"); - assert(VarInfo.Verify() && "invalid DIVariable passed to dbg.value"); + assert(VarInfo.isVariable() && + "empty or invalid DIVariable passed to dbg.value"); if (!ValueFn) ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value); diff --git a/contrib/llvm/lib/IR/DataLayout.cpp b/contrib/llvm/lib/IR/DataLayout.cpp index 5658f56..6bdc09e 100644 --- a/contrib/llvm/lib/IR/DataLayout.cpp +++ b/contrib/llvm/lib/IR/DataLayout.cpp @@ -200,9 +200,7 @@ static unsigned inBytes(unsigned Bits) { } void DataLayout::parseSpecifier(StringRef Desc) { - while (!Desc.empty()) { - // Split at '-'. std::pair Split = split(Desc, '-'); Desc = Split.second; @@ -482,7 +480,7 @@ std::string DataLayout::getStringRepresentation() const { addrSpaces.push_back(pib->first); } std::sort(addrSpaces.begin(), addrSpaces.end()); - for (SmallVector::iterator asb = addrSpaces.begin(), + for (SmallVectorImpl::iterator asb = addrSpaces.begin(), ase = addrSpaces.end(); asb != ase; ++asb) { const PointerAlignElem &PI = Pointers.find(*asb)->second; OS << "-p"; @@ -509,6 +507,15 @@ std::string DataLayout::getStringRepresentation() const { return OS.str(); } +unsigned DataLayout::getPointerTypeSizeInBits(Type *Ty) const { + assert(Ty->isPtrOrPtrVectorTy() && + "This should only be called with a pointer or pointer vector type"); + + if (Ty->isPointerTy()) + return getTypeSizeInBits(Ty); + + return getTypeSizeInBits(Ty->getScalarType()); +} /*! \param abi_or_pref Flag that determines which alignment is returned. true @@ -582,7 +589,6 @@ unsigned DataLayout::getABIIntegerTypeAlignment(unsigned BitWidth) const { return getAlignmentInfo(INTEGER_ALIGN, BitWidth, true, 0); } - unsigned DataLayout::getCallFrameTypeAlignment(Type *Ty) const { for (unsigned i = 0, e = Alignments.size(); i != e; ++i) if (Alignments[i].AlignType == STACK_ALIGN) @@ -601,16 +607,11 @@ unsigned DataLayout::getPreferredTypeAlignmentShift(Type *Ty) const { return Log2_32(Align); } -/// getIntPtrType - Return an integer type with size at least as big as that -/// of a pointer in the given address space. IntegerType *DataLayout::getIntPtrType(LLVMContext &C, unsigned AddressSpace) const { return IntegerType::get(C, getPointerSizeInBits(AddressSpace)); } -/// getIntPtrType - Return an integer (vector of integer) type with size at -/// least as big as that of a pointer of the given pointer (vector of pointer) -/// type. Type *DataLayout::getIntPtrType(Type *Ty) const { assert(Ty->isPtrOrPtrVectorTy() && "Expected a pointer or pointer vector type."); @@ -628,6 +629,13 @@ Type *DataLayout::getSmallestLegalIntType(LLVMContext &C, unsigned Width) const return 0; } +unsigned DataLayout::getLargestLegalIntTypeSize() const { + unsigned MaxWidth = 0; + for (unsigned i = 0, e = (unsigned)LegalIntWidths.size(); i != e; ++i) + MaxWidth = std::max(MaxWidth, LegalIntWidths[i]); + return MaxWidth; +} + uint64_t DataLayout::getIndexedOffset(Type *ptrTy, ArrayRef Indices) const { Type *Ty = ptrTy; diff --git a/contrib/llvm/lib/IR/DebugInfo.cpp b/contrib/llvm/lib/IR/DebugInfo.cpp index ec83dca..70a756f 100644 --- a/contrib/llvm/lib/IR/DebugInfo.cpp +++ b/contrib/llvm/lib/IR/DebugInfo.cpp @@ -34,24 +34,6 @@ using namespace llvm::dwarf; // DIDescriptor //===----------------------------------------------------------------------===// -DIDescriptor::DIDescriptor(const DIFile F) : DbgNode(F.DbgNode) { -} - -DIDescriptor::DIDescriptor(const DISubprogram F) : DbgNode(F.DbgNode) { -} - -DIDescriptor::DIDescriptor(const DILexicalBlockFile F) : DbgNode(F.DbgNode) { -} - -DIDescriptor::DIDescriptor(const DILexicalBlock F) : DbgNode(F.DbgNode) { -} - -DIDescriptor::DIDescriptor(const DIVariable F) : DbgNode(F.DbgNode) { -} - -DIDescriptor::DIDescriptor(const DIType F) : DbgNode(F.DbgNode) { -} - bool DIDescriptor::Verify() const { return DbgNode && (DIDerivedType(DbgNode).Verify() || @@ -65,7 +47,7 @@ bool DIDescriptor::Verify() const { DIObjCProperty(DbgNode).Verify() || DITemplateTypeParameter(DbgNode).Verify() || DITemplateValueParameter(DbgNode).Verify() || - DIImportedModule(DbgNode).Verify()); + DIImportedEntity(DbgNode).Verify()); } static Value *getField(const MDNode *DbgNode, unsigned Elt) { @@ -74,10 +56,8 @@ static Value *getField(const MDNode *DbgNode, unsigned Elt) { return DbgNode->getOperand(Elt); } -static const MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) { - if (const MDNode *R = dyn_cast_or_null(getField(DbgNode, Elt))) - return R; - return 0; +static MDNode *getNodeField(const MDNode *DbgNode, unsigned Elt) { + return dyn_cast_or_null(getField(DbgNode, Elt)); } static StringRef getStringField(const MDNode *DbgNode, unsigned Elt) { @@ -95,8 +75,8 @@ uint64_t DIDescriptor::getUInt64Field(unsigned Elt) const { return 0; if (Elt < DbgNode->getNumOperands()) - if (ConstantInt *CI - = dyn_cast_or_null(DbgNode->getOperand(Elt))) + if (ConstantInt *CI = + dyn_cast_or_null(DbgNode->getOperand(Elt))) return CI->getZExtValue(); return 0; @@ -107,21 +87,16 @@ int64_t DIDescriptor::getInt64Field(unsigned Elt) const { return 0; if (Elt < DbgNode->getNumOperands()) - if (ConstantInt *CI - = dyn_cast_or_null(DbgNode->getOperand(Elt))) + if (ConstantInt *CI = + dyn_cast_or_null(DbgNode->getOperand(Elt))) return CI->getSExtValue(); return 0; } DIDescriptor DIDescriptor::getDescriptorField(unsigned Elt) const { - if (DbgNode == 0) - return DIDescriptor(); - - if (Elt < DbgNode->getNumOperands()) - return - DIDescriptor(dyn_cast_or_null(DbgNode->getOperand(Elt))); - return DIDescriptor(); + MDNode *Field = getNodeField(DbgNode, Elt); + return DIDescriptor(Field); } GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { @@ -129,7 +104,7 @@ GlobalVariable *DIDescriptor::getGlobalVariableField(unsigned Elt) const { return 0; if (Elt < DbgNode->getNumOperands()) - return dyn_cast_or_null(DbgNode->getOperand(Elt)); + return dyn_cast_or_null(DbgNode->getOperand(Elt)); return 0; } @@ -138,7 +113,7 @@ Constant *DIDescriptor::getConstantField(unsigned Elt) const { return 0; if (Elt < DbgNode->getNumOperands()) - return dyn_cast_or_null(DbgNode->getOperand(Elt)); + return dyn_cast_or_null(DbgNode->getOperand(Elt)); return 0; } @@ -147,7 +122,7 @@ Function *DIDescriptor::getFunctionField(unsigned Elt) const { return 0; if (Elt < DbgNode->getNumOperands()) - return dyn_cast_or_null(DbgNode->getOperand(Elt)); + return dyn_cast_or_null(DbgNode->getOperand(Elt)); return 0; } @@ -156,19 +131,17 @@ void DIDescriptor::replaceFunctionField(unsigned Elt, Function *F) { return; if (Elt < DbgNode->getNumOperands()) { - MDNode *Node = const_cast(DbgNode); + MDNode *Node = const_cast(DbgNode); Node->replaceOperandWith(Elt, F); } } unsigned DIVariable::getNumAddrElements() const { - return DbgNode->getNumOperands()-8; + return DbgNode->getNumOperands() - 8; } /// getInlinedAt - If this variable is inlined then return inline location. -MDNode *DIVariable::getInlinedAt() const { - return dyn_cast_or_null(DbgNode->getOperand(7)); -} +MDNode *DIVariable::getInlinedAt() const { return getNodeField(DbgNode, 7); } //===----------------------------------------------------------------------===// // Predicates @@ -177,7 +150,8 @@ MDNode *DIVariable::getInlinedAt() const { /// isBasicType - Return true if the specified tag is legal for /// DIBasicType. bool DIDescriptor::isBasicType() const { - if (!DbgNode) return false; + if (!DbgNode) + return false; switch (getTag()) { case dwarf::DW_TAG_base_type: case dwarf::DW_TAG_unspecified_type: @@ -189,7 +163,8 @@ bool DIDescriptor::isBasicType() const { /// isDerivedType - Return true if the specified tag is legal for DIDerivedType. bool DIDescriptor::isDerivedType() const { - if (!DbgNode) return false; + if (!DbgNode) + return false; switch (getTag()) { case dwarf::DW_TAG_typedef: case dwarf::DW_TAG_pointer_type: @@ -212,7 +187,8 @@ bool DIDescriptor::isDerivedType() const { /// isCompositeType - Return true if the specified tag is legal for /// DICompositeType. bool DIDescriptor::isCompositeType() const { - if (!DbgNode) return false; + if (!DbgNode) + return false; switch (getTag()) { case dwarf::DW_TAG_array_type: case dwarf::DW_TAG_structure_type: @@ -228,7 +204,8 @@ bool DIDescriptor::isCompositeType() const { /// isVariable - Return true if the specified tag is legal for DIVariable. bool DIDescriptor::isVariable() const { - if (!DbgNode) return false; + if (!DbgNode) + return false; switch (getTag()) { case dwarf::DW_TAG_auto_variable: case dwarf::DW_TAG_arg_variable: @@ -256,11 +233,6 @@ bool DIDescriptor::isGlobalVariable() const { getTag() == dwarf::DW_TAG_constant); } -/// isGlobal - Return true if the specified tag is legal for DIGlobal. -bool DIDescriptor::isGlobal() const { - return isGlobalVariable(); -} - /// isUnspecifiedParmeter - Return true if the specified tag is /// DW_TAG_unspecified_parameters. bool DIDescriptor::isUnspecifiedParameter() const { @@ -270,17 +242,19 @@ bool DIDescriptor::isUnspecifiedParameter() const { /// isScope - Return true if the specified tag is one of the scope /// related tag. bool DIDescriptor::isScope() const { - if (!DbgNode) return false; + if (!DbgNode) + return false; switch (getTag()) { case dwarf::DW_TAG_compile_unit: case dwarf::DW_TAG_lexical_block: case dwarf::DW_TAG_subprogram: case dwarf::DW_TAG_namespace: + case dwarf::DW_TAG_file_type: return true; default: break; } - return false; + return isType(); } /// isTemplateTypeParameter - Return true if the specified tag is @@ -292,7 +266,9 @@ bool DIDescriptor::isTemplateTypeParameter() const { /// isTemplateValueParameter - Return true if the specified tag is /// DW_TAG_template_value_parameter. bool DIDescriptor::isTemplateValueParameter() const { - return DbgNode && getTag() == dwarf::DW_TAG_template_value_parameter; + return DbgNode && (getTag() == dwarf::DW_TAG_template_value_parameter || + getTag() == dwarf::DW_TAG_GNU_template_template_param || + getTag() == dwarf::DW_TAG_GNU_template_parameter_pack); } /// isCompileUnit - Return true if the specified tag is DW_TAG_compile_unit. @@ -314,13 +290,13 @@ bool DIDescriptor::isNameSpace() const { /// lexical block with an extra file. bool DIDescriptor::isLexicalBlockFile() const { return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && - (DbgNode->getNumOperands() == 3); + (DbgNode->getNumOperands() == 3); } /// isLexicalBlock - Return true if the specified tag is DW_TAG_lexical_block. bool DIDescriptor::isLexicalBlock() const { return DbgNode && getTag() == dwarf::DW_TAG_lexical_block && - (DbgNode->getNumOperands() > 3); + (DbgNode->getNumOperands() > 3); } /// isSubrange - Return true if the specified tag is DW_TAG_subrange_type. @@ -338,33 +314,28 @@ bool DIDescriptor::isObjCProperty() const { return DbgNode && getTag() == dwarf::DW_TAG_APPLE_property; } -/// \brief Return true if the specified tag is DW_TAG_imported_module. -bool DIDescriptor::isImportedModule() const { - return DbgNode && getTag() == dwarf::DW_TAG_imported_module; +/// \brief Return true if the specified tag is DW_TAG_imported_module or +/// DW_TAG_imported_declaration. +bool DIDescriptor::isImportedEntity() const { + return DbgNode && (getTag() == dwarf::DW_TAG_imported_module || + getTag() == dwarf::DW_TAG_imported_declaration); } //===----------------------------------------------------------------------===// // Simple Descriptor Constructors and other Methods //===----------------------------------------------------------------------===// -DIType::DIType(const MDNode *N) : DIScope(N) { - if (!N) return; - if (!isBasicType() && !isDerivedType() && !isCompositeType()) { - DbgNode = 0; - } -} - unsigned DIArray::getNumElements() const { if (!DbgNode) return 0; return DbgNode->getNumOperands(); } -/// replaceAllUsesWith - Replace all uses of debug info referenced by -/// this descriptor. +/// replaceAllUsesWith - Replace all uses of the MDNode used by this +/// type with the one in the passed descriptor. void DIType::replaceAllUsesWith(DIDescriptor &D) { - if (!DbgNode) - return; + + assert(DbgNode && "Trying to replace an unverified type!"); // Since we use a TrackingVH for the node, its easy for clients to manufacture // legitimate situations where they want to replaceAllUsesWith() on something @@ -372,19 +343,19 @@ void DIType::replaceAllUsesWith(DIDescriptor &D) { // this detail by allowing a value to be replaced with replaceAllUsesWith() // itself. if (DbgNode != D) { - MDNode *Node = const_cast(DbgNode); + MDNode *Node = const_cast(DbgNode); const MDNode *DN = D; const Value *V = cast_or_null(DN); - Node->replaceAllUsesWith(const_cast(V)); + Node->replaceAllUsesWith(const_cast(V)); MDNode::deleteTemporary(Node); } } -/// replaceAllUsesWith - Replace all uses of debug info referenced by -/// this descriptor. +/// replaceAllUsesWith - Replace all uses of the MDNode used by this +/// type with the one in D. void DIType::replaceAllUsesWith(MDNode *D) { - if (!DbgNode) - return; + + assert(DbgNode && "Trying to replace an unverified type!"); // Since we use a TrackingVH for the node, its easy for clients to manufacture // legitimate situations where they want to replaceAllUsesWith() on something @@ -392,39 +363,24 @@ void DIType::replaceAllUsesWith(MDNode *D) { // this detail by allowing a value to be replaced with replaceAllUsesWith() // itself. if (DbgNode != D) { - MDNode *Node = const_cast(DbgNode); + MDNode *Node = const_cast(DbgNode); const MDNode *DN = D; const Value *V = cast_or_null(DN); - Node->replaceAllUsesWith(const_cast(V)); + Node->replaceAllUsesWith(const_cast(V)); MDNode::deleteTemporary(Node); } } -/// isUnsignedDIType - Return true if type encoding is unsigned. -bool DIType::isUnsignedDIType() { - DIDerivedType DTy(DbgNode); - if (DTy.Verify()) - return DTy.getTypeDerivedFrom().isUnsignedDIType(); - - DIBasicType BTy(DbgNode); - if (BTy.Verify()) { - unsigned Encoding = BTy.getEncoding(); - if (Encoding == dwarf::DW_ATE_unsigned || - Encoding == dwarf::DW_ATE_unsigned_char || - Encoding == dwarf::DW_ATE_boolean) - return true; - } - return false; -} - /// Verify - Verify that a compile unit is well formed. bool DICompileUnit::Verify() const { if (!isCompileUnit()) return false; - StringRef N = getFilename(); - if (N.empty()) + + // Don't bother verifying the compilation directory or producer string + // as those could be empty. + if (getFilename().empty()) return false; - // It is possible that directory and produce string is empty. + return DbgNode->getNumOperands() == 13; } @@ -433,31 +389,85 @@ bool DIObjCProperty::Verify() const { if (!isObjCProperty()) return false; - DIType Ty = getType(); - if (!Ty.Verify()) return false; - // Don't worry about the rest of the strings for now. return DbgNode->getNumOperands() == 8; } +/// Check if a field at position Elt of a MDNode is a MDNode. +/// We currently allow an empty string and an integer. +/// But we don't allow a non-empty string in a MDNode field. +static bool fieldIsMDNode(const MDNode *DbgNode, unsigned Elt) { + // FIXME: This function should return true, if the field is null or the field + // is indeed a MDNode: return !Fld || isa(Fld). + Value *Fld = getField(DbgNode, Elt); + if (Fld && isa(Fld) && !cast(Fld)->getString().empty()) + return false; + return true; +} + +/// Check if a field at position Elt of a MDNode is a MDString. +static bool fieldIsMDString(const MDNode *DbgNode, unsigned Elt) { + Value *Fld = getField(DbgNode, Elt); + return !Fld || isa(Fld); +} + +/// Check if a value can be a reference to a type. +static bool isTypeRef(const Value *Val) { + return !Val || + (isa(Val) && !cast(Val)->getString().empty()) || + (isa(Val) && DIType(cast(Val)).isType()); +} + +/// Check if a field at position Elt of a MDNode can be a reference to a type. +static bool fieldIsTypeRef(const MDNode *DbgNode, unsigned Elt) { + Value *Fld = getField(DbgNode, Elt); + return isTypeRef(Fld); +} + +/// Check if a value can be a ScopeRef. +static bool isScopeRef(const Value *Val) { + return !Val || + (isa(Val) && !cast(Val)->getString().empty()) || + (isa(Val) && DIScope(cast(Val)).isScope()); +} + +/// Check if a field at position Elt of a MDNode can be a ScopeRef. +static bool fieldIsScopeRef(const MDNode *DbgNode, unsigned Elt) { + Value *Fld = getField(DbgNode, Elt); + return isScopeRef(Fld); +} + /// Verify - Verify that a type descriptor is well formed. bool DIType::Verify() const { if (!isType()) return false; - if (getContext() && !getContext().Verify()) + // Make sure Context @ field 2 is MDNode. + if (!fieldIsScopeRef(DbgNode, 2)) return false; - unsigned Tag = getTag(); + + // FIXME: Sink this into the various subclass verifies. + uint16_t Tag = getTag(); if (!isBasicType() && Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && Tag != dwarf::DW_TAG_pointer_type && Tag != dwarf::DW_TAG_ptr_to_member_type && Tag != dwarf::DW_TAG_reference_type && Tag != dwarf::DW_TAG_rvalue_reference_type && - Tag != dwarf::DW_TAG_restrict_type && - Tag != dwarf::DW_TAG_array_type && + Tag != dwarf::DW_TAG_restrict_type && Tag != dwarf::DW_TAG_array_type && Tag != dwarf::DW_TAG_enumeration_type && Tag != dwarf::DW_TAG_subroutine_type && + Tag != dwarf::DW_TAG_inheritance && Tag != dwarf::DW_TAG_friend && getFilename().empty()) return false; + // DIType is abstract, it should be a BasicType, a DerivedType or + // a CompositeType. + if (isBasicType()) + DIBasicType(DbgNode).Verify(); + else if (isCompositeType()) + DICompositeType(DbgNode).Verify(); + else if (isDerivedType()) + DIDerivedType(DbgNode).Verify(); + else + return false; return true; } @@ -468,6 +478,14 @@ bool DIBasicType::Verify() const { /// Verify - Verify that a derived type descriptor is well formed. bool DIDerivedType::Verify() const { + // Make sure DerivedFrom @ field 9 is TypeRef. + if (!fieldIsTypeRef(DbgNode, 9)) + return false; + if (getTag() == dwarf::DW_TAG_ptr_to_member_type) + // Make sure ClassType @ field 10 is a TypeRef. + if (!fieldIsTypeRef(DbgNode, 10)) + return false; + return isDerivedType() && DbgNode->getNumOperands() >= 10 && DbgNode->getNumOperands() <= 14; } @@ -476,10 +494,18 @@ bool DIDerivedType::Verify() const { bool DICompositeType::Verify() const { if (!isCompositeType()) return false; - if (getContext() && !getContext().Verify()) + + // Make sure DerivedFrom @ field 9 and ContainingType @ field 12 are TypeRef. + if (!fieldIsTypeRef(DbgNode, 9)) + return false; + if (!fieldIsTypeRef(DbgNode, 12)) + return false; + + // Make sure the type identifier at field 14 is MDString, it can be null. + if (!fieldIsMDString(DbgNode, 14)) return false; - return DbgNode->getNumOperands() >= 10 && DbgNode->getNumOperands() <= 14; + return DbgNode->getNumOperands() == 15; } /// Verify - Verify that a subprogram descriptor is well formed. @@ -487,11 +513,13 @@ bool DISubprogram::Verify() const { if (!isSubprogram()) return false; - if (getContext() && !getContext().Verify()) + // Make sure context @ field 2 is a ScopeRef and type @ field 7 is a MDNode. + if (!fieldIsScopeRef(DbgNode, 2)) return false; - - DICompositeType Ty = getType(); - if (!Ty.Verify()) + if (!fieldIsMDNode(DbgNode, 7)) + return false; + // Containing type @ field 12. + if (!fieldIsTypeRef(DbgNode, 12)) return false; return DbgNode->getNumOperands() == 20; } @@ -503,15 +531,13 @@ bool DIGlobalVariable::Verify() const { if (getDisplayName().empty()) return false; - - if (getContext() && !getContext().Verify()) + // Make sure context @ field 2 and type @ field 8 are MDNodes. + if (!fieldIsMDNode(DbgNode, 2)) return false; - - DIType Ty = getType(); - if (!Ty.Verify()) + if (!fieldIsMDNode(DbgNode, 8)) return false; - - if (!getGlobal() && !getConstant()) + // Make sure StaticDataMemberDeclaration @ field 12 is MDNode. + if (!fieldIsMDNode(DbgNode, 12)) return false; return DbgNode->getNumOperands() == 13; @@ -522,13 +548,11 @@ bool DIVariable::Verify() const { if (!isVariable()) return false; - if (getContext() && !getContext().Verify()) + // Make sure context @ field 1 and type @ field 5 are MDNodes. + if (!fieldIsMDNode(DbgNode, 1)) return false; - - DIType Ty = getType(); - if (!Ty.Verify()) + if (!fieldIsMDNode(DbgNode, 5)) return false; - return DbgNode->getNumOperands() >= 8; } @@ -548,9 +572,7 @@ bool DINameSpace::Verify() const { } /// \brief Retrieve the MDNode for the directory/file pair. -MDNode *DIFile::getFileNode() const { - return const_cast(getNodeField(DbgNode, 1)); -} +MDNode *DIFile::getFileNode() const { return getNodeField(DbgNode, 1); } /// \brief Verify that the file descriptor is well formed. bool DIFile::Verify() const { @@ -588,62 +610,82 @@ bool DITemplateValueParameter::Verify() const { } /// \brief Verify that the imported module descriptor is well formed. -bool DIImportedModule::Verify() const { - return isImportedModule() && DbgNode->getNumOperands() == 4; +bool DIImportedEntity::Verify() const { + return isImportedEntity() && + (DbgNode->getNumOperands() == 4 || DbgNode->getNumOperands() == 5); } -/// getOriginalTypeSize - If this type is derived from a base type then -/// return base type size. -uint64_t DIDerivedType::getOriginalTypeSize() const { - unsigned Tag = getTag(); - - if (Tag != dwarf::DW_TAG_member && Tag != dwarf::DW_TAG_typedef && - Tag != dwarf::DW_TAG_const_type && Tag != dwarf::DW_TAG_volatile_type && - Tag != dwarf::DW_TAG_restrict_type) - return getSizeInBits(); - - DIType BaseType = getTypeDerivedFrom(); - - // If this type is not derived from any type then take conservative approach. - if (!BaseType.isValid()) - return getSizeInBits(); - - // If this is a derived type, go ahead and get the base type, unless it's a - // reference then it's just the size of the field. Pointer types have no need - // of this since they're a different type of qualification on the type. - if (BaseType.getTag() == dwarf::DW_TAG_reference_type || - BaseType.getTag() == dwarf::DW_TAG_rvalue_reference_type) - return getSizeInBits(); - - if (BaseType.isDerivedType()) - return DIDerivedType(BaseType).getOriginalTypeSize(); +/// getObjCProperty - Return property node, if this ivar is associated with one. +MDNode *DIDerivedType::getObjCProperty() const { + return getNodeField(DbgNode, 10); +} - return BaseType.getSizeInBits(); +MDString *DICompositeType::getIdentifier() const { + return cast_or_null(getField(DbgNode, 14)); } -/// getObjCProperty - Return property node, if this ivar is associated with one. -MDNode *DIDerivedType::getObjCProperty() const { - if (DbgNode->getNumOperands() <= 10) - return NULL; - return dyn_cast_or_null(DbgNode->getOperand(10)); +#ifndef NDEBUG +static void VerifySubsetOf(const MDNode *LHS, const MDNode *RHS) { + for (unsigned i = 0; i != LHS->getNumOperands(); ++i) { + // Skip the 'empty' list (that's a single i32 0, rather than truly empty). + if (i == 0 && isa(LHS->getOperand(i))) + continue; + const MDNode *E = cast(LHS->getOperand(i)); + bool found = false; + for (unsigned j = 0; !found && j != RHS->getNumOperands(); ++j) + found = E == RHS->getOperand(j); + assert(found && "Losing a member during member list replacement"); + } } +#endif /// \brief Set the array of member DITypes. void DICompositeType::setTypeArray(DIArray Elements, DIArray TParams) { - assert((!TParams || DbgNode->getNumOperands() == 14) && + assert((!TParams || DbgNode->getNumOperands() == 15) && "If you're setting the template parameters this should include a slot " "for that!"); TrackingVH N(*this); - N->replaceOperandWith(10, Elements); + if (Elements) { +#ifndef NDEBUG + // Check that the new list of members contains all the old members as well. + if (const MDNode *El = cast_or_null(N->getOperand(10))) + VerifySubsetOf(El, Elements); +#endif + N->replaceOperandWith(10, Elements); + } if (TParams) N->replaceOperandWith(13, TParams); DbgNode = N; } +void DICompositeType::addMember(DIDescriptor D) { + SmallVector M; + DIArray OrigM = getTypeArray(); + unsigned Elements = OrigM.getNumElements(); + if (Elements == 1 && !OrigM.getElement(0)) + Elements = 0; + M.reserve(Elements + 1); + for (unsigned i = 0; i != Elements; ++i) + M.push_back(OrigM.getElement(i)); + M.push_back(D); + setTypeArray(DIArray(MDNode::get(DbgNode->getContext(), M))); +} + +/// Generate a reference to this DIType. Uses the type identifier instead +/// of the actual MDNode if possible, to help type uniquing. +DIScopeRef DIScope::getRef() const { + if (!isCompositeType()) + return DIScopeRef(*this); + DICompositeType DTy(DbgNode); + if (!DTy.getIdentifier()) + return DIScopeRef(*this); + return DIScopeRef(DTy.getIdentifier()); +} + /// \brief Set the containing type. void DICompositeType::setContainingType(DICompositeType ContainingType) { TrackingVH N(*this); - N->replaceOperandWith(12, ContainingType); + N->replaceOperandWith(12, ContainingType.getRef()); DbgNode = N; } @@ -673,24 +715,59 @@ bool DISubprogram::describes(const Function *F) { } unsigned DISubprogram::isOptimized() const { - assert (DbgNode && "Invalid subprogram descriptor!"); + assert(DbgNode && "Invalid subprogram descriptor!"); if (DbgNode->getNumOperands() == 15) return getUnsignedField(14); return 0; } MDNode *DISubprogram::getVariablesNodes() const { - if (!DbgNode || DbgNode->getNumOperands() <= 18) - return NULL; - return dyn_cast_or_null(DbgNode->getOperand(18)); + return getNodeField(DbgNode, 18); } DIArray DISubprogram::getVariables() const { - if (!DbgNode || DbgNode->getNumOperands() <= 18) - return DIArray(); - if (MDNode *T = dyn_cast_or_null(DbgNode->getOperand(18))) - return DIArray(T); - return DIArray(); + return DIArray(getNodeField(DbgNode, 18)); +} + +Value *DITemplateValueParameter::getValue() const { + return getField(DbgNode, 4); +} + +// If the current node has a parent scope then return that, +// else return an empty scope. +DIScopeRef DIScope::getContext() const { + + if (isType()) + return DIType(DbgNode).getContext(); + + if (isSubprogram()) + return DIScopeRef(DISubprogram(DbgNode).getContext()); + + if (isLexicalBlock()) + return DIScopeRef(DILexicalBlock(DbgNode).getContext()); + + if (isLexicalBlockFile()) + return DIScopeRef(DILexicalBlockFile(DbgNode).getContext()); + + if (isNameSpace()) + return DIScopeRef(DINameSpace(DbgNode).getContext()); + + assert((isFile() || isCompileUnit()) && "Unhandled type of scope."); + return DIScopeRef(NULL); +} + +// If the scope node has a name, return that, else return an empty string. +StringRef DIScope::getName() const { + if (isType()) + return DIType(DbgNode).getName(); + if (isSubprogram()) + return DISubprogram(DbgNode).getName(); + if (isNameSpace()) + return DINameSpace(DbgNode).getName(); + assert((isLexicalBlock() || isLexicalBlockFile() || isFile() || + isCompileUnit()) && + "Unhandled type of scope."); + return StringRef(); } StringRef DIScope::getFilename() const { @@ -709,54 +786,51 @@ DIArray DICompileUnit::getEnumTypes() const { if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); - if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(7))) - return DIArray(N); - return DIArray(); + return DIArray(getNodeField(DbgNode, 7)); } DIArray DICompileUnit::getRetainedTypes() const { if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); - if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(8))) - return DIArray(N); - return DIArray(); + return DIArray(getNodeField(DbgNode, 8)); } DIArray DICompileUnit::getSubprograms() const { if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); - if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(9))) - return DIArray(N); - return DIArray(); + return DIArray(getNodeField(DbgNode, 9)); } - DIArray DICompileUnit::getGlobalVariables() const { if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); - if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(10))) - return DIArray(N); - return DIArray(); + return DIArray(getNodeField(DbgNode, 10)); } -DIArray DICompileUnit::getImportedModules() const { +DIArray DICompileUnit::getImportedEntities() const { if (!DbgNode || DbgNode->getNumOperands() < 13) return DIArray(); - if (MDNode *N = dyn_cast_or_null(DbgNode->getOperand(11))) - return DIArray(N); - return DIArray(); + return DIArray(getNodeField(DbgNode, 11)); } -/// fixupObjcLikeName - Replace contains special characters used +/// fixupSubprogramName - Replace contains special characters used /// in a typical Objective-C names with '.' in a given string. -static void fixupObjcLikeName(StringRef Str, SmallVectorImpl &Out) { +static void fixupSubprogramName(DISubprogram Fn, SmallVectorImpl &Out) { + StringRef FName = + Fn.getFunction() ? Fn.getFunction()->getName() : Fn.getName(); + FName = Function::getRealLinkageName(FName); + + StringRef Prefix("llvm.dbg.lv."); + Out.reserve(FName.size() + Prefix.size()); + Out.append(Prefix.begin(), Prefix.end()); + bool isObjCLike = false; - for (size_t i = 0, e = Str.size(); i < e; ++i) { - char C = Str[i]; + for (size_t i = 0, e = FName.size(); i < e; ++i) { + char C = FName[i]; if (C == '[') isObjCLike = true; @@ -771,33 +845,16 @@ static void fixupObjcLikeName(StringRef Str, SmallVectorImpl &Out) { /// getFnSpecificMDNode - Return a NameMDNode, if available, that is /// suitable to hold function specific information. NamedMDNode *llvm::getFnSpecificMDNode(const Module &M, DISubprogram Fn) { - SmallString<32> Name = StringRef("llvm.dbg.lv."); - StringRef FName = "fn"; - if (Fn.getFunction()) - FName = Fn.getFunction()->getName(); - else - FName = Fn.getName(); - char One = '\1'; - if (FName.startswith(StringRef(&One, 1))) - FName = FName.substr(1); - fixupObjcLikeName(FName, Name); + SmallString<32> Name; + fixupSubprogramName(Fn, Name); return M.getNamedMetadata(Name.str()); } /// getOrInsertFnSpecificMDNode - Return a NameMDNode that is suitable /// to hold function specific information. NamedMDNode *llvm::getOrInsertFnSpecificMDNode(Module &M, DISubprogram Fn) { - SmallString<32> Name = StringRef("llvm.dbg.lv."); - StringRef FName = "fn"; - if (Fn.getFunction()) - FName = Fn.getFunction()->getName(); - else - FName = Fn.getName(); - char One = '\1'; - if (FName.startswith(StringRef(&One, 1))) - FName = FName.substr(1); - fixupObjcLikeName(FName, Name); - + SmallString<32> Name; + fixupSubprogramName(Fn, Name); return M.getOrInsertNamedMetadata(Name.str()); } @@ -810,8 +867,7 @@ DIVariable llvm::createInlinedVariable(MDNode *DV, MDNode *InlinedScope, SmallVector Elts; // Insert inlined scope as 7th element. for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i) - i == 7 ? Elts.push_back(InlinedScope) : - Elts.push_back(DV->getOperand(i)); + i == 7 ? Elts.push_back(InlinedScope) : Elts.push_back(DV->getOperand(i)); return DIVariable(MDNode::get(VMContext, Elts)); } @@ -820,9 +876,8 @@ DIVariable llvm::cleanseInlinedVariable(MDNode *DV, LLVMContext &VMContext) { SmallVector Elts; // Insert inlined scope as 7th element. for (unsigned i = 0, e = DV->getNumOperands(); i != e; ++i) - i == 7 ? - Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))): - Elts.push_back(DV->getOperand(i)); + i == 7 ? Elts.push_back(Constant::getNullValue(Type::getInt32Ty(VMContext))) + : Elts.push_back(DV->getOperand(i)); return DIVariable(MDNode::get(VMContext, Elts)); } @@ -846,31 +901,70 @@ DICompositeType llvm::getDICompositeType(DIType T) { if (T.isCompositeType()) return DICompositeType(T); - if (T.isDerivedType()) - return getDICompositeType(DIDerivedType(T).getTypeDerivedFrom()); + if (T.isDerivedType()) { + // This function is currently used by dragonegg and dragonegg does + // not generate identifier for types, so using an empty map to resolve + // DerivedFrom should be fine. + DITypeIdentifierMap EmptyMap; + return getDICompositeType( + DIDerivedType(T).getTypeDerivedFrom().resolve(EmptyMap)); + } return DICompositeType(); } -/// isSubprogramContext - Return true if Context is either a subprogram -/// or another context nested inside a subprogram. -bool llvm::isSubprogramContext(const MDNode *Context) { - if (!Context) - return false; - DIDescriptor D(Context); - if (D.isSubprogram()) - return true; - if (D.isType()) - return isSubprogramContext(DIType(Context).getContext()); - return false; +/// Update DITypeIdentifierMap by going through retained types of each CU. +DITypeIdentifierMap +llvm::generateDITypeIdentifierMap(const NamedMDNode *CU_Nodes) { + DITypeIdentifierMap Map; + for (unsigned CUi = 0, CUe = CU_Nodes->getNumOperands(); CUi != CUe; ++CUi) { + DICompileUnit CU(CU_Nodes->getOperand(CUi)); + DIArray Retain = CU.getRetainedTypes(); + for (unsigned Ti = 0, Te = Retain.getNumElements(); Ti != Te; ++Ti) { + if (!Retain.getElement(Ti).isCompositeType()) + continue; + DICompositeType Ty(Retain.getElement(Ti)); + if (MDString *TypeId = Ty.getIdentifier()) { + // Definition has priority over declaration. + // Try to insert (TypeId, Ty) to Map. + std::pair P = + Map.insert(std::make_pair(TypeId, Ty)); + // If TypeId already exists in Map and this is a definition, replace + // whatever we had (declaration or definition) with the definition. + if (!P.second && !Ty.isForwardDecl()) + P.first->second = Ty; + } + } + } + return Map; } //===----------------------------------------------------------------------===// // DebugInfoFinder implementations. //===----------------------------------------------------------------------===// +void DebugInfoFinder::reset() { + CUs.clear(); + SPs.clear(); + GVs.clear(); + TYs.clear(); + Scopes.clear(); + NodesSeen.clear(); + TypeIdentifierMap.clear(); + TypeMapInitialized = false; +} + +void DebugInfoFinder::InitializeTypeMap(const Module &M) { + if (!TypeMapInitialized) + if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) { + TypeIdentifierMap = generateDITypeIdentifierMap(CU_Nodes); + TypeMapInitialized = true; + } +} + /// processModule - Process entire module and collect debug info. void DebugInfoFinder::processModule(const Module &M) { + InitializeTypeMap(M); if (NamedMDNode *CU_Nodes = M.getNamedMetadata("llvm.dbg.cu")) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit CU(CU_Nodes->getOperand(i)); @@ -878,8 +972,10 @@ void DebugInfoFinder::processModule(const Module &M) { DIArray GVs = CU.getGlobalVariables(); for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) { DIGlobalVariable DIG(GVs.getElement(i)); - if (addGlobalVariable(DIG)) + if (addGlobalVariable(DIG)) { + processScope(DIG.getContext()); processType(DIG.getType()); + } } DIArray SPs = CU.getSubprograms(); for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) @@ -890,36 +986,38 @@ void DebugInfoFinder::processModule(const Module &M) { DIArray RetainedTypes = CU.getRetainedTypes(); for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) processType(DIType(RetainedTypes.getElement(i))); - // FIXME: We really shouldn't be bailing out after visiting just one CU - return; + DIArray Imports = CU.getImportedEntities(); + for (unsigned i = 0, e = Imports.getNumElements(); i != e; ++i) { + DIImportedEntity Import = DIImportedEntity(Imports.getElement(i)); + DIDescriptor Entity = Import.getEntity(); + if (Entity.isType()) + processType(DIType(Entity)); + else if (Entity.isSubprogram()) + processSubprogram(DISubprogram(Entity)); + else if (Entity.isNameSpace()) + processScope(DINameSpace(Entity).getContext()); + } } } } /// processLocation - Process DILocation. -void DebugInfoFinder::processLocation(DILocation Loc) { - if (!Loc.Verify()) return; - DIDescriptor S(Loc.getScope()); - if (S.isCompileUnit()) - addCompileUnit(DICompileUnit(S)); - else if (S.isSubprogram()) - processSubprogram(DISubprogram(S)); - else if (S.isLexicalBlock()) - processLexicalBlock(DILexicalBlock(S)); - else if (S.isLexicalBlockFile()) { - DILexicalBlockFile DBF = DILexicalBlockFile(S); - processLexicalBlock(DILexicalBlock(DBF.getScope())); - } - processLocation(Loc.getOrigLocation()); +void DebugInfoFinder::processLocation(const Module &M, DILocation Loc) { + if (!Loc) + return; + InitializeTypeMap(M); + processScope(Loc.getScope()); + processLocation(M, Loc.getOrigLocation()); } /// processType - Process DIType. void DebugInfoFinder::processType(DIType DT) { if (!addType(DT)) return; + processScope(DT.getContext().resolve(TypeIdentifierMap)); if (DT.isCompositeType()) { DICompositeType DCT(DT); - processType(DCT.getTypeDerivedFrom()); + processType(DCT.getTypeDerivedFrom().resolve(TypeIdentifierMap)); DIArray DA = DCT.getTypeArray(); for (unsigned i = 0, e = DA.getNumElements(); i != e; ++i) { DIDescriptor D = DA.getElement(i); @@ -930,7 +1028,35 @@ void DebugInfoFinder::processType(DIType DT) { } } else if (DT.isDerivedType()) { DIDerivedType DDT(DT); - processType(DDT.getTypeDerivedFrom()); + processType(DDT.getTypeDerivedFrom().resolve(TypeIdentifierMap)); + } +} + +void DebugInfoFinder::processScope(DIScope Scope) { + if (Scope.isType()) { + DIType Ty(Scope); + processType(Ty); + return; + } + if (Scope.isCompileUnit()) { + addCompileUnit(DICompileUnit(Scope)); + return; + } + if (Scope.isSubprogram()) { + processSubprogram(DISubprogram(Scope)); + return; + } + if (!addScope(Scope)) + return; + if (Scope.isLexicalBlock()) { + DILexicalBlock LB(Scope); + processScope(LB.getContext()); + } else if (Scope.isLexicalBlockFile()) { + DILexicalBlockFile LBF = DILexicalBlockFile(Scope); + processScope(LBF.getScope()); + } else if (Scope.isNameSpace()) { + DINameSpace NS(Scope); + processScope(NS.getContext()); } } @@ -942,8 +1068,7 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { else if (Context.isLexicalBlockFile()) { DILexicalBlockFile DBF = DILexicalBlockFile(Context); return processLexicalBlock(DILexicalBlock(DBF.getScope())); - } - else + } else return processSubprogram(DISubprogram(Context)); } @@ -951,13 +1076,30 @@ void DebugInfoFinder::processLexicalBlock(DILexicalBlock LB) { void DebugInfoFinder::processSubprogram(DISubprogram SP) { if (!addSubprogram(SP)) return; + processScope(SP.getContext().resolve(TypeIdentifierMap)); processType(SP.getType()); + DIArray TParams = SP.getTemplateParams(); + for (unsigned I = 0, E = TParams.getNumElements(); I != E; ++I) { + DIDescriptor Element = TParams.getElement(I); + if (Element.isTemplateTypeParameter()) { + DITemplateTypeParameter TType(Element); + processScope(TType.getContext().resolve(TypeIdentifierMap)); + processType(TType.getType().resolve(TypeIdentifierMap)); + } else if (Element.isTemplateValueParameter()) { + DITemplateValueParameter TVal(Element); + processScope(TVal.getContext().resolve(TypeIdentifierMap)); + processType(TVal.getType().resolve(TypeIdentifierMap)); + } + } } /// processDeclare - Process DbgDeclareInst. -void DebugInfoFinder::processDeclare(const DbgDeclareInst *DDI) { +void DebugInfoFinder::processDeclare(const Module &M, + const DbgDeclareInst *DDI) { MDNode *N = dyn_cast(DDI->getVariable()); - if (!N) return; + if (!N) + return; + InitializeTypeMap(M); DIDescriptor DV(N); if (!DV.isVariable()) @@ -965,12 +1107,29 @@ void DebugInfoFinder::processDeclare(const DbgDeclareInst *DDI) { if (!NodesSeen.insert(DV)) return; + processScope(DIVariable(N).getContext()); + processType(DIVariable(N).getType()); +} + +void DebugInfoFinder::processValue(const Module &M, const DbgValueInst *DVI) { + MDNode *N = dyn_cast(DVI->getVariable()); + if (!N) + return; + InitializeTypeMap(M); + + DIDescriptor DV(N); + if (!DV.isVariable()) + return; + + if (!NodesSeen.insert(DV)) + return; + processScope(DIVariable(N).getContext()); processType(DIVariable(N).getType()); } /// addType - Add type into Tys. bool DebugInfoFinder::addType(DIType DT) { - if (!DT.isValid()) + if (!DT) return false; if (!NodesSeen.insert(DT)) @@ -982,9 +1141,8 @@ bool DebugInfoFinder::addType(DIType DT) { /// addCompileUnit - Add compile unit into CUs. bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) { - if (!CU.Verify()) + if (!CU) return false; - if (!NodesSeen.insert(CU)) return false; @@ -994,7 +1152,7 @@ bool DebugInfoFinder::addCompileUnit(DICompileUnit CU) { /// addGlobalVariable - Add global variable into GVs. bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) { - if (!DIDescriptor(DIG).isGlobalVariable()) + if (!DIG) return false; if (!NodesSeen.insert(DIG)) @@ -1006,7 +1164,7 @@ bool DebugInfoFinder::addGlobalVariable(DIGlobalVariable DIG) { // addSubprogram - Add subprgoram into SPs. bool DebugInfoFinder::addSubprogram(DISubprogram SP) { - if (!DIDescriptor(SP).isSubprogram()) + if (!SP) return false; if (!NodesSeen.insert(SP)) @@ -1016,18 +1174,33 @@ bool DebugInfoFinder::addSubprogram(DISubprogram SP) { return true; } +bool DebugInfoFinder::addScope(DIScope Scope) { + if (!Scope) + return false; + // FIXME: Ocaml binding generates a scope with no content, we treat it + // as null for now. + if (Scope->getNumOperands() == 0) + return false; + if (!NodesSeen.insert(Scope)) + return false; + Scopes.push_back(Scope); + return true; +} + //===----------------------------------------------------------------------===// // DIDescriptor: dump routines for all descriptors. //===----------------------------------------------------------------------===// /// dump - Print descriptor to dbgs() with a newline. void DIDescriptor::dump() const { - print(dbgs()); dbgs() << '\n'; + print(dbgs()); + dbgs() << '\n'; } /// print - Print descriptor. void DIDescriptor::print(raw_ostream &OS) const { - if (!DbgNode) return; + if (!DbgNode) + return; if (const char *Tag = dwarf::TagString(getTag())) OS << "[ " << Tag << " ]"; @@ -1089,7 +1262,8 @@ void DIEnumerator::printInternal(raw_ostream &OS) const { } void DIType::printInternal(raw_ostream &OS) const { - if (!DbgNode) return; + if (!DbgNode) + return; StringRef Res = getName(); if (!Res.empty()) @@ -1097,13 +1271,11 @@ void DIType::printInternal(raw_ostream &OS) const { // TODO: Print context? - OS << " [line " << getLineNumber() - << ", size " << getSizeInBits() - << ", align " << getAlignInBits() - << ", offset " << getOffsetInBits(); + OS << " [line " << getLineNumber() << ", size " << getSizeInBits() + << ", align " << getAlignInBits() << ", offset " << getOffsetInBits(); if (isBasicType()) if (const char *Enc = - dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding())) + dwarf::AttributeEncodingString(DIBasicType(DbgNode).getEncoding())) OS << ", enc " << Enc; OS << "]"; @@ -1116,7 +1288,12 @@ void DIType::printInternal(raw_ostream &OS) const { OS << " [artificial]"; if (isForwardDecl()) - OS << " [fwd]"; + OS << " [decl]"; + else if (getTag() == dwarf::DW_TAG_structure_type || + getTag() == dwarf::DW_TAG_union_type || + getTag() == dwarf::DW_TAG_enumeration_type || + getTag() == dwarf::DW_TAG_class_type) + OS << " [def]"; if (isVector()) OS << " [vector]"; if (isStaticMember()) @@ -1194,19 +1371,17 @@ void DIObjCProperty::printInternal(raw_ostream &OS) const { if (!Name.empty()) OS << " [" << Name << ']'; - OS << " [line " << getLineNumber() - << ", properties " << getUnsignedField(6) << ']'; + OS << " [line " << getLineNumber() << ", properties " << getUnsignedField(6) + << ']'; } static void printDebugLoc(DebugLoc DL, raw_ostream &CommentOS, const LLVMContext &Ctx) { - if (!DL.isUnknown()) { // Print source line info. + if (!DL.isUnknown()) { // Print source line info. DIScope Scope(DL.getScope(Ctx)); + assert(Scope.isScope() && "Scope of a DebugLoc should be a DIScope."); // Omit the directory, because it's likely to be long and uninteresting. - if (Scope.Verify()) - CommentOS << Scope.getFilename(); - else - CommentOS << ""; + CommentOS << Scope.getFilename(); CommentOS << ':' << DL.getLine(); if (DL.getCol() != 0) CommentOS << ':' << DL.getCol(); @@ -1233,3 +1408,81 @@ void DIVariable::printExtendedName(raw_ostream &OS) const { } } } + +/// Specialize constructor to make sure it has the correct type. +template <> DIRef::DIRef(const Value *V) : Val(V) { + assert(isScopeRef(V) && "DIScopeRef should be a MDString or MDNode"); +} +template <> DIRef::DIRef(const Value *V) : Val(V) { + assert(isTypeRef(V) && "DITypeRef should be a MDString or MDNode"); +} + +/// Specialize getFieldAs to handle fields that are references to DIScopes. +template <> +DIScopeRef DIDescriptor::getFieldAs(unsigned Elt) const { + return DIScopeRef(getField(DbgNode, Elt)); +} +/// Specialize getFieldAs to handle fields that are references to DITypes. +template <> DITypeRef DIDescriptor::getFieldAs(unsigned Elt) const { + return DITypeRef(getField(DbgNode, Elt)); +} + +/// Strip debug info in the module if it exists. +/// To do this, we remove all calls to the debugger intrinsics and any named +/// metadata for debugging. We also remove debug locations for instructions. +/// Return true if module is modified. +bool llvm::StripDebugInfo(Module &M) { + + bool Changed = false; + + // Remove all of the calls to the debugger intrinsics, and remove them from + // the module. + if (Function *Declare = M.getFunction("llvm.dbg.declare")) { + while (!Declare->use_empty()) { + CallInst *CI = cast(Declare->use_back()); + CI->eraseFromParent(); + } + Declare->eraseFromParent(); + Changed = true; + } + + if (Function *DbgVal = M.getFunction("llvm.dbg.value")) { + while (!DbgVal->use_empty()) { + CallInst *CI = cast(DbgVal->use_back()); + CI->eraseFromParent(); + } + DbgVal->eraseFromParent(); + Changed = true; + } + + for (Module::named_metadata_iterator NMI = M.named_metadata_begin(), + NME = M.named_metadata_end(); NMI != NME;) { + NamedMDNode *NMD = NMI; + ++NMI; + if (NMD->getName().startswith("llvm.dbg.")) { + NMD->eraseFromParent(); + Changed = true; + } + } + + for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI) + for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE; + ++FI) + for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE; + ++BI) { + if (!BI->getDebugLoc().isUnknown()) { + Changed = true; + BI->setDebugLoc(DebugLoc()); + } + } + + return Changed; +} + +/// Return Debug Info Metadata Version by checking module flags. +unsigned llvm::getDebugMetadataVersionFromModule(const Module &M) { + Value *Val = M.getModuleFlag("Debug Info Version"); + if (!Val) + return 0; + return cast(Val)->getZExtValue(); +} diff --git a/contrib/llvm/lib/IR/Function.cpp b/contrib/llvm/lib/IR/Function.cpp index 7f7efab..e8a2402 100644 --- a/contrib/llvm/lib/IR/Function.cpp +++ b/contrib/llvm/lib/IR/Function.cpp @@ -131,6 +131,15 @@ bool Argument::hasReturnedAttr() const { hasAttribute(getArgNo()+1, Attribute::Returned); } +/// Return true if this argument has the readonly or readnone attribute on it +/// in its containing function. +bool Argument::onlyReadsMemory() const { + return getParent()->getAttributes(). + hasAttribute(getArgNo()+1, Attribute::ReadOnly) || + getParent()->getAttributes(). + hasAttribute(getArgNo()+1, Attribute::ReadNone); +} + /// addAttr - Add attributes to an argument. void Argument::addAttr(AttributeSet AS) { assert(AS.getNumSlots() <= 1 && @@ -267,6 +276,9 @@ void Function::dropAllReferences() { // blockaddresses, but BasicBlock's destructor takes care of those. while (!BasicBlocks.empty()) BasicBlocks.begin()->eraseFromParent(); + + // Prefix data is stored in a side table. + setPrefixData(0); } void Function::addAttribute(unsigned i, Attribute::AttrKind attr) { @@ -342,6 +354,10 @@ void Function::copyAttributesFrom(const GlobalValue *Src) { setGC(SrcF->getGC()); else clearGC(); + if (SrcF->hasPrefixData()) + setPrefixData(SrcF->getPrefixData()); + else + setPrefixData(0); } /// getIntrinsicID - This method returns the ID number of the specified @@ -437,7 +453,9 @@ enum IIT_Info { IIT_STRUCT5 = 22, IIT_EXTEND_VEC_ARG = 23, IIT_TRUNC_VEC_ARG = 24, - IIT_ANYPTR = 25 + IIT_ANYPTR = 25, + IIT_V1 = 26, + IIT_VARARG = 27 }; @@ -451,6 +469,9 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, case IIT_Done: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Void, 0)); return; + case IIT_VARARG: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::VarArg, 0)); + return; case IIT_MMX: OutputTable.push_back(IITDescriptor::get(IITDescriptor::MMX, 0)); return; @@ -481,6 +502,10 @@ static void DecodeIITType(unsigned &NextElt, ArrayRef Infos, case IIT_I64: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Integer, 64)); return; + case IIT_V1: + OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 1)); + DecodeIITType(NextElt, Infos, OutputTable); + return; case IIT_V2: OutputTable.push_back(IITDescriptor::get(IITDescriptor::Vector, 2)); DecodeIITType(NextElt, Infos, OutputTable); @@ -592,6 +617,7 @@ static Type *DecodeFixedType(ArrayRef &Infos, switch (D.Kind) { case IITDescriptor::Void: return Type::getVoidTy(Context); + case IITDescriptor::VarArg: return Type::getVoidTy(Context); case IITDescriptor::MMX: return Type::getX86_MMXTy(Context); case IITDescriptor::Metadata: return Type::getMetadataTy(Context); case IITDescriptor::Half: return Type::getHalfTy(Context); @@ -712,3 +738,31 @@ bool Function::callsFunctionThatReturnsTwice() const { return false; } +Constant *Function::getPrefixData() const { + assert(hasPrefixData()); + const LLVMContextImpl::PrefixDataMapTy &PDMap = + getContext().pImpl->PrefixDataMap; + assert(PDMap.find(this) != PDMap.end()); + return cast(PDMap.find(this)->second->getReturnValue()); +} + +void Function::setPrefixData(Constant *PrefixData) { + if (!PrefixData && !hasPrefixData()) + return; + + unsigned SCData = getSubclassDataFromValue(); + LLVMContextImpl::PrefixDataMapTy &PDMap = getContext().pImpl->PrefixDataMap; + ReturnInst *&PDHolder = PDMap[this]; + if (PrefixData) { + if (PDHolder) + PDHolder->setOperand(0, PrefixData); + else + PDHolder = ReturnInst::Create(getContext(), PrefixData); + SCData |= 2; + } else { + delete PDHolder; + PDMap.erase(this); + SCData &= ~2; + } + setValueSubclassData(SCData); +} diff --git a/contrib/llvm/lib/IR/GCOV.cpp b/contrib/llvm/lib/IR/GCOV.cpp index ea2f0a6..f0f8c7d 100644 --- a/contrib/llvm/lib/IR/GCOV.cpp +++ b/contrib/llvm/lib/IR/GCOV.cpp @@ -7,14 +7,16 @@ // //===----------------------------------------------------------------------===// // -// GCOV implements the interface to read and write coverage files that use +// GCOV implements the interface to read and write coverage files that use // 'gcov' format. // //===----------------------------------------------------------------------===// +#include "llvm/Support/Debug.h" #include "llvm/Support/GCOV.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/Support/Format.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/system_error.h" using namespace llvm; @@ -43,27 +45,47 @@ bool GCOVFile::read(GCOVBuffer &Buffer) { if (Format == GCOV::InvalidGCOV) return false; - unsigned i = 0; - while (1) { - GCOVFunction *GFun = NULL; - if (isGCDAFile(Format)) { - // Use existing function while reading .gcda file. - assert(i < Functions.size() && ".gcda data does not match .gcno data"); - GFun = Functions[i]; - } else if (isGCNOFile(Format)){ - GFun = new GCOVFunction(); + if (isGCNOFile(Format)) { + while (true) { + if (!Buffer.readFunctionTag()) break; + GCOVFunction *GFun = new GCOVFunction(); + if (!GFun->read(Buffer, Format)) + return false; Functions.push_back(GFun); } - if (!GFun || !GFun->read(Buffer, Format)) - break; - ++i; } + else if (isGCDAFile(Format)) { + for (size_t i = 0, e = Functions.size(); i < e; ++i) { + if (!Buffer.readFunctionTag()) { + errs() << "Unexpected number of functions.\n"; + return false; + } + if (!Functions[i]->read(Buffer, Format)) + return false; + } + if (Buffer.readObjectTag()) { + uint32_t Length; + uint32_t Dummy; + if (!Buffer.readInt(Length)) return false; + if (!Buffer.readInt(Dummy)) return false; // checksum + if (!Buffer.readInt(Dummy)) return false; // num + if (!Buffer.readInt(RunCount)) return false;; + Buffer.advanceCursor(Length-3); + } + while (Buffer.readProgramTag()) { + uint32_t Length; + if (!Buffer.readInt(Length)) return false; + Buffer.advanceCursor(Length); + ++ProgramCount; + } + } + return true; } -/// dump - Dump GCOVFile content on standard out for debugging purposes. +/// dump - Dump GCOVFile content to dbgs() for debugging purposes. void GCOVFile::dump() { - for (SmallVector::iterator I = Functions.begin(), + for (SmallVectorImpl::iterator I = Functions.begin(), E = Functions.end(); I != E; ++I) (*I)->dump(); } @@ -71,10 +93,11 @@ void GCOVFile::dump() { /// collectLineCounts - Collect line counts. This must be used after /// reading .gcno and .gcda files. void GCOVFile::collectLineCounts(FileInfo &FI) { - for (SmallVector::iterator I = Functions.begin(), - E = Functions.end(); I != E; ++I) + for (SmallVectorImpl::iterator I = Functions.begin(), + E = Functions.end(); I != E; ++I) (*I)->collectLineCounts(FI); - FI.print(); + FI.setRunCount(RunCount); + FI.setProgramCount(ProgramCount); } //===----------------------------------------------------------------------===// @@ -85,77 +108,122 @@ GCOVFunction::~GCOVFunction() { DeleteContainerPointers(Blocks); } -/// read - Read a aunction from the buffer. Return false if buffer cursor +/// read - Read a function from the buffer. Return false if buffer cursor /// does not point to a function tag. bool GCOVFunction::read(GCOVBuffer &Buff, GCOV::GCOVFormat Format) { - if (!Buff.readFunctionTag()) - return false; + uint32_t Dummy; + if (!Buff.readInt(Dummy)) return false; // Function header length + if (!Buff.readInt(Ident)) return false; + if (!Buff.readInt(Dummy)) return false; // Checksum #1 + if (Format != GCOV::GCNO_402 && Format != GCOV::GCDA_402) + if (!Buff.readInt(Dummy)) return false; // Checksum #2 - Buff.readInt(); // Function header length - Ident = Buff.readInt(); - Buff.readInt(); // Checksum #1 - if (Format != GCOV::GCNO_402) - Buff.readInt(); // Checksum #2 + if (!Buff.readString(Name)) return false; - Name = Buff.readString(); if (Format == GCOV::GCNO_402 || Format == GCOV::GCNO_404) - Filename = Buff.readString(); + if (!Buff.readString(Filename)) return false; if (Format == GCOV::GCDA_402 || Format == GCOV::GCDA_404) { - Buff.readArcTag(); - uint32_t Count = Buff.readInt() / 2; - for (unsigned i = 0, e = Count; i != e; ++i) { - Blocks[i]->addCount(Buff.readInt64()); + if (!Buff.readArcTag()) { + errs() << "Arc tag not found.\n"; + return false; + } + uint32_t Count; + if (!Buff.readInt(Count)) return false; + Count /= 2; + + // This for loop adds the counts for each block. A second nested loop is + // required to combine the edge counts that are contained in the GCDA file. + for (uint32_t Line = 0; Count > 0; ++Line) { + if (Line >= Blocks.size()) { + errs() << "Unexpected number of edges.\n"; + return false; + } + GCOVBlock &Block = *Blocks[Line]; + for (size_t Edge = 0, End = Block.getNumEdges(); Edge < End; ++Edge) { + if (Count == 0) { + errs() << "Unexpected number of edges.\n"; + return false; + } + uint64_t ArcCount; + if (!Buff.readInt64(ArcCount)) return false; + Block.addCount(ArcCount); + --Count; + } } return true; } - LineNumber = Buff.readInt(); + if (!Buff.readInt(LineNumber)) return false; // read blocks. - bool BlockTagFound = Buff.readBlockTag(); - (void)BlockTagFound; - assert(BlockTagFound && "Block Tag not found!"); - uint32_t BlockCount = Buff.readInt(); - for (int i = 0, e = BlockCount; i != e; ++i) { - Buff.readInt(); // Block flags; - Blocks.push_back(new GCOVBlock(i)); + if (!Buff.readBlockTag()) { + errs() << "Block tag not found.\n"; + return false; + } + uint32_t BlockCount; + if (!Buff.readInt(BlockCount)) return false; + for (uint32_t i = 0, e = BlockCount; i != e; ++i) { + if (!Buff.readInt(Dummy)) return false; // Block flags; + Blocks.push_back(new GCOVBlock(*this, i)); } // read edges. while (Buff.readEdgeTag()) { - uint32_t EdgeCount = (Buff.readInt() - 1) / 2; - uint32_t BlockNo = Buff.readInt(); - assert(BlockNo < BlockCount && "Unexpected Block number!"); - for (int i = 0, e = EdgeCount; i != e; ++i) { - Blocks[BlockNo]->addEdge(Buff.readInt()); - Buff.readInt(); // Edge flag + uint32_t EdgeCount; + if (!Buff.readInt(EdgeCount)) return false; + EdgeCount = (EdgeCount - 1) / 2; + uint32_t BlockNo; + if (!Buff.readInt(BlockNo)) return false; + if (BlockNo >= BlockCount) { + errs() << "Unexpected block number.\n"; + return false; + } + for (uint32_t i = 0, e = EdgeCount; i != e; ++i) { + uint32_t Dst; + if (!Buff.readInt(Dst)) return false; + Blocks[BlockNo]->addEdge(Dst); + if (!Buff.readInt(Dummy)) return false; // Edge flag } } // read line table. while (Buff.readLineTag()) { - uint32_t LineTableLength = Buff.readInt(); - uint32_t Size = Buff.getCursor() + LineTableLength*4; - uint32_t BlockNo = Buff.readInt(); - assert(BlockNo < BlockCount && "Unexpected Block number!"); + uint32_t LineTableLength; + if (!Buff.readInt(LineTableLength)) return false; + uint32_t EndPos = Buff.getCursor() + LineTableLength*4; + uint32_t BlockNo; + if (!Buff.readInt(BlockNo)) return false; + if (BlockNo >= BlockCount) { + errs() << "Unexpected block number.\n"; + return false; + } GCOVBlock *Block = Blocks[BlockNo]; - Buff.readInt(); // flag - while (Buff.getCursor() != (Size - 4)) { - StringRef Filename = Buff.readString(); - if (Buff.getCursor() == (Size - 4)) break; - while (uint32_t L = Buff.readInt()) - Block->addLine(Filename, L); + if (!Buff.readInt(Dummy)) return false; // flag + while (Buff.getCursor() != (EndPos - 4)) { + StringRef F; + if (!Buff.readString(F)) return false; + if (F != Filename) { + errs() << "Multiple sources for a single basic block.\n"; + return false; + } + if (Buff.getCursor() == (EndPos - 4)) break; + while (true) { + uint32_t Line; + if (!Buff.readInt(Line)) return false; + if (!Line) break; + Block->addLine(Line); + } } - Buff.readInt(); // flag + if (!Buff.readInt(Dummy)) return false; // flag } return true; } -/// dump - Dump GCOVFunction content on standard out for debugging purposes. +/// dump - Dump GCOVFunction content to dbgs() for debugging purposes. void GCOVFunction::dump() { - outs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n"; - for (SmallVector::iterator I = Blocks.begin(), + dbgs() << "===== " << Name << " @ " << Filename << ":" << LineNumber << "\n"; + for (SmallVectorImpl::iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) (*I)->dump(); } @@ -163,7 +231,7 @@ void GCOVFunction::dump() { /// collectLineCounts - Collect line counts. This must be used after /// reading .gcno and .gcda files. void GCOVFunction::collectLineCounts(FileInfo &FI) { - for (SmallVector::iterator I = Blocks.begin(), + for (SmallVectorImpl::iterator I = Blocks.begin(), E = Blocks.end(); I != E; ++I) (*I)->collectLineCounts(FI); } @@ -174,110 +242,73 @@ void GCOVFunction::collectLineCounts(FileInfo &FI) { /// ~GCOVBlock - Delete GCOVBlock and its content. GCOVBlock::~GCOVBlock() { Edges.clear(); - DeleteContainerSeconds(Lines); -} - -void GCOVBlock::addLine(StringRef Filename, uint32_t LineNo) { - GCOVLines *&LinesForFile = Lines[Filename]; - if (!LinesForFile) - LinesForFile = new GCOVLines(); - LinesForFile->add(LineNo); + Lines.clear(); } /// collectLineCounts - Collect line counts. This must be used after /// reading .gcno and .gcda files. void GCOVBlock::collectLineCounts(FileInfo &FI) { - for (StringMap::iterator I = Lines.begin(), + for (SmallVectorImpl::iterator I = Lines.begin(), E = Lines.end(); I != E; ++I) - I->second->collectLineCounts(FI, I->first(), Counter); + FI.addLineCount(Parent.getFilename(), *I, Counter); } -/// dump - Dump GCOVBlock content on standard out for debugging purposes. +/// dump - Dump GCOVBlock content to dbgs() for debugging purposes. void GCOVBlock::dump() { - outs() << "Block : " << Number << " Counter : " << Counter << "\n"; + dbgs() << "Block : " << Number << " Counter : " << Counter << "\n"; if (!Edges.empty()) { - outs() << "\tEdges : "; - for (SmallVector::iterator I = Edges.begin(), E = Edges.end(); + dbgs() << "\tEdges : "; + for (SmallVectorImpl::iterator I = Edges.begin(), E = Edges.end(); I != E; ++I) - outs() << (*I) << ","; - outs() << "\n"; + dbgs() << (*I) << ","; + dbgs() << "\n"; } if (!Lines.empty()) { - outs() << "\tLines : "; - for (StringMap::iterator LI = Lines.begin(), - LE = Lines.end(); LI != LE; ++LI) { - outs() << LI->first() << " -> "; - LI->second->dump(); - outs() << "\n"; - } + dbgs() << "\tLines : "; + for (SmallVectorImpl::iterator I = Lines.begin(), + E = Lines.end(); I != E; ++I) + dbgs() << (*I) << ","; + dbgs() << "\n"; } } //===----------------------------------------------------------------------===// -// GCOVLines implementation. - -/// collectLineCounts - Collect line counts. This must be used after -/// reading .gcno and .gcda files. -void GCOVLines::collectLineCounts(FileInfo &FI, StringRef Filename, - uint32_t Count) { - for (SmallVector::iterator I = Lines.begin(), - E = Lines.end(); I != E; ++I) - FI.addLineCount(Filename, *I, Count); -} - -/// dump - Dump GCOVLines content on standard out for debugging purposes. -void GCOVLines::dump() { - for (SmallVector::iterator I = Lines.begin(), - E = Lines.end(); I != E; ++I) - outs() << (*I) << ","; -} - -//===----------------------------------------------------------------------===// // FileInfo implementation. -/// addLineCount - Add line count for the given line number in a file. -void FileInfo::addLineCount(StringRef Filename, uint32_t Line, uint32_t Count) { - if (LineInfo.find(Filename) == LineInfo.end()) { - OwningPtr Buff; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { - errs() << Filename << ": " << ec.message() << "\n"; - return; - } - StringRef AllLines = Buff.take()->getBuffer(); - LineCounts L(AllLines.count('\n')+2); - L[Line-1] = Count; - LineInfo[Filename] = L; - return; - } - LineCounts &L = LineInfo[Filename]; - L[Line-1] = Count; -} - /// print - Print source files with collected line count information. -void FileInfo::print() { +void FileInfo::print(raw_fd_ostream &OS, StringRef gcnoFile, + StringRef gcdaFile) { for (StringMap::iterator I = LineInfo.begin(), E = LineInfo.end(); I != E; ++I) { StringRef Filename = I->first(); - outs() << Filename << "\n"; + OS << " -: 0:Source:" << Filename << "\n"; + OS << " -: 0:Graph:" << gcnoFile << "\n"; + OS << " -: 0:Data:" << gcdaFile << "\n"; + OS << " -: 0:Runs:" << RunCount << "\n"; + OS << " -: 0:Programs:" << ProgramCount << "\n"; LineCounts &L = LineInfo[Filename]; OwningPtr Buff; if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, Buff)) { errs() << Filename << ": " << ec.message() << "\n"; return; } - StringRef AllLines = Buff.take()->getBuffer(); - for (unsigned i = 0, e = L.size(); i != e; ++i) { - if (L[i]) - outs() << L[i] << ":\t"; - else - outs() << " :\t"; + StringRef AllLines = Buff->getBuffer(); + uint32_t i = 0; + while (!AllLines.empty()) { + if (L.find(i) != L.end()) { + if (L[i] == 0) + OS << " #####:"; + else + OS << format("%9" PRIu64 ":", L[i]); + } else { + OS << " -:"; + } std::pair P = AllLines.split('\n'); if (AllLines != P.first) - outs() << P.first; - outs() << "\n"; + OS << format("%5u:", i+1) << P.first; + OS << "\n"; AllLines = P.second; + ++i; } } } - - diff --git a/contrib/llvm/lib/IR/Globals.cpp b/contrib/llvm/lib/IR/Globals.cpp index 6d547f3..da3b02a 100644 --- a/contrib/llvm/lib/IR/Globals.cpp +++ b/contrib/llvm/lib/IR/Globals.cpp @@ -229,14 +229,14 @@ void GlobalAlias::setAliasee(Constant *Aliasee) { setOperand(0, Aliasee); } -const GlobalValue *GlobalAlias::getAliasedGlobal() const { - const Constant *C = getAliasee(); +GlobalValue *GlobalAlias::getAliasedGlobal() { + Constant *C = getAliasee(); if (C == 0) return 0; - if (const GlobalValue *GV = dyn_cast(C)) + if (GlobalValue *GV = dyn_cast(C)) return GV; - const ConstantExpr *CE = cast(C); + ConstantExpr *CE = cast(C); assert((CE->getOpcode() == Instruction::BitCast || CE->getOpcode() == Instruction::GetElementPtr) && "Unsupported aliasee"); @@ -244,18 +244,18 @@ const GlobalValue *GlobalAlias::getAliasedGlobal() const { return cast(CE->getOperand(0)); } -const GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) const { - SmallPtrSet Visited; +GlobalValue *GlobalAlias::resolveAliasedGlobal(bool stopOnWeak) { + SmallPtrSet Visited; // Check if we need to stop early. if (stopOnWeak && mayBeOverridden()) return this; - const GlobalValue *GV = getAliasedGlobal(); + GlobalValue *GV = getAliasedGlobal(); Visited.insert(GV); // Iterate over aliasing chain, stopping on weak alias if necessary. - while (const GlobalAlias *GA = dyn_cast(GV)) { + while (GlobalAlias *GA = dyn_cast(GV)) { if (stopOnWeak && GA->mayBeOverridden()) break; diff --git a/contrib/llvm/lib/IR/Instruction.cpp b/contrib/llvm/lib/IR/Instruction.cpp index 2b5a0b3..a7773c4 100644 --- a/contrib/llvm/lib/IR/Instruction.cpp +++ b/contrib/llvm/lib/IR/Instruction.cpp @@ -223,18 +223,19 @@ const char *Instruction::getOpcodeName(unsigned OpCode) { case GetElementPtr: return "getelementptr"; // Convert instructions... - case Trunc: return "trunc"; - case ZExt: return "zext"; - case SExt: return "sext"; - case FPTrunc: return "fptrunc"; - case FPExt: return "fpext"; - case FPToUI: return "fptoui"; - case FPToSI: return "fptosi"; - case UIToFP: return "uitofp"; - case SIToFP: return "sitofp"; - case IntToPtr: return "inttoptr"; - case PtrToInt: return "ptrtoint"; - case BitCast: return "bitcast"; + case Trunc: return "trunc"; + case ZExt: return "zext"; + case SExt: return "sext"; + case FPTrunc: return "fptrunc"; + case FPExt: return "fpext"; + case FPToUI: return "fptoui"; + case FPToSI: return "fptosi"; + case UIToFP: return "uitofp"; + case SIToFP: return "sitofp"; + case IntToPtr: return "inttoptr"; + case PtrToInt: return "ptrtoint"; + case BitCast: return "bitcast"; + case AddrSpaceCast: return "addrspacecast"; // Other instructions... case ICmp: return "icmp"; diff --git a/contrib/llvm/lib/IR/Instructions.cpp b/contrib/llvm/lib/IR/Instructions.cpp index d58877e..8a6b77b 100644 --- a/contrib/llvm/lib/IR/Instructions.cpp +++ b/contrib/llvm/lib/IR/Instructions.cpp @@ -346,7 +346,7 @@ void CallInst::removeAttribute(unsigned i, Attribute attr) { setAttributes(PAL); } -bool CallInst::hasFnAttr(Attribute::AttrKind A) const { +bool CallInst::hasFnAttrImpl(Attribute::AttrKind A) const { if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A)) return true; if (const Function *F = getCalledFunction()) @@ -574,7 +574,7 @@ void InvokeInst::setSuccessorV(unsigned idx, BasicBlock *B) { return setSuccessor(idx, B); } -bool InvokeInst::hasFnAttr(Attribute::AttrKind A) const { +bool InvokeInst::hasFnAttrImpl(Attribute::AttrKind A) const { if (AttributeList.hasAttribute(AttributeSet::FunctionIndex, A)) return true; if (const Function *F = getCalledFunction()) @@ -2095,7 +2095,9 @@ bool CastInst::isNoopCast(Instruction::CastOps Opcode, case Instruction::SIToFP: case Instruction::FPToUI: case Instruction::FPToSI: - return false; // These always modify bits + case Instruction::AddrSpaceCast: + // TODO: Target informations may give a more accurate answer here. + return false; case Instruction::BitCast: return true; // BitCast never modifies bits. case Instruction::PtrToInt: @@ -2137,44 +2139,46 @@ unsigned CastInst::isEliminableCastPair( // ZEXT < Integral Unsigned Integer Any // SEXT < Integral Signed Integer Any // FPTOUI n/a FloatPt n/a Integral Unsigned - // FPTOSI n/a FloatPt n/a Integral Signed - // UITOFP n/a Integral Unsigned FloatPt n/a - // SITOFP n/a Integral Signed FloatPt n/a - // FPTRUNC > FloatPt n/a FloatPt n/a - // FPEXT < FloatPt n/a FloatPt n/a + // FPTOSI n/a FloatPt n/a Integral Signed + // UITOFP n/a Integral Unsigned FloatPt n/a + // SITOFP n/a Integral Signed FloatPt n/a + // FPTRUNC > FloatPt n/a FloatPt n/a + // FPEXT < FloatPt n/a FloatPt n/a // PTRTOINT n/a Pointer n/a Integral Unsigned // INTTOPTR n/a Integral Unsigned Pointer n/a - // BITCAST = FirstClass n/a FirstClass n/a + // BITCAST = FirstClass n/a FirstClass n/a + // ADDRSPCST n/a Pointer n/a Pointer n/a // // NOTE: some transforms are safe, but we consider them to be non-profitable. // For example, we could merge "fptoui double to i32" + "zext i32 to i64", // into "fptoui double to i64", but this loses information about the range - // of the produced value (we no longer know the top-part is all zeros). + // of the produced value (we no longer know the top-part is all zeros). // Further this conversion is often much more expensive for typical hardware, - // and causes issues when building libgcc. We disallow fptosi+sext for the + // and causes issues when building libgcc. We disallow fptosi+sext for the // same reason. - const unsigned numCastOps = + const unsigned numCastOps = Instruction::CastOpsEnd - Instruction::CastOpsBegin; static const uint8_t CastResults[numCastOps][numCastOps] = { - // T F F U S F F P I B -+ - // R Z S P P I I T P 2 N T | - // U E E 2 2 2 2 R E I T C +- secondOp - // N X X U S F F N X N 2 V | - // C T T I I P P C T T P T -+ - { 1, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // Trunc -+ - { 8, 1, 9,99,99, 2, 0,99,99,99, 2, 3 }, // ZExt | - { 8, 0, 1,99,99, 0, 2,99,99,99, 0, 3 }, // SExt | - { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToUI | - { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3 }, // FPToSI | - { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // UIToFP +- firstOp - { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4 }, // SIToFP | - { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4 }, // FPTrunc | - { 99,99,99, 2, 2,99,99,10, 2,99,99, 4 }, // FPExt | - { 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3 }, // PtrToInt | - { 99,99,99,99,99,99,99,99,99,13,99,12 }, // IntToPtr | - { 5, 5, 5, 6, 6, 5, 5, 6, 6,11, 5, 1 }, // BitCast -+ + // T F F U S F F P I B A -+ + // R Z S P P I I T P 2 N T S | + // U E E 2 2 2 2 R E I T C C +- secondOp + // N X X U S F F N X N 2 V V | + // C T T I I P P C T T P T T -+ + { 1, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // Trunc -+ + { 8, 1, 9,99,99, 2, 0,99,99,99, 2, 3, 0}, // ZExt | + { 8, 0, 1,99,99, 0, 2,99,99,99, 0, 3, 0}, // SExt | + { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToUI | + { 0, 0, 0,99,99, 0, 0,99,99,99, 0, 3, 0}, // FPToSI | + { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // UIToFP +- firstOp + { 99,99,99, 0, 0,99,99, 0, 0,99,99, 4, 0}, // SIToFP | + { 99,99,99, 0, 0,99,99, 1, 0,99,99, 4, 0}, // FPTrunc | + { 99,99,99, 2, 2,99,99,10, 2,99,99, 4, 0}, // FPExt | + { 1, 0, 0,99,99, 0, 0,99,99,99, 7, 3, 0}, // PtrToInt | + { 99,99,99,99,99,99,99,99,99,11,99,15, 0}, // IntToPtr | + { 5, 5, 5, 6, 6, 5, 5, 6, 6,16, 5, 1,14}, // BitCast | + { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,13,12}, // AddrSpaceCast -+ }; - + // If either of the casts are a bitcast from scalar to vector, disallow the // merging. However, bitcast of A->B->A are allowed. bool isFirstBitcast = (firstOp == Instruction::BitCast); @@ -2191,45 +2195,56 @@ unsigned CastInst::isEliminableCastPair( [secondOp-Instruction::CastOpsBegin]; switch (ElimCase) { case 0: - // categorically disallowed + // Categorically disallowed. return 0; case 1: - // allowed, use first cast's opcode + // Allowed, use first cast's opcode. return firstOp; case 2: - // allowed, use second cast's opcode + // Allowed, use second cast's opcode. return secondOp; case 3: - // no-op cast in second op implies firstOp as long as the DestTy + // No-op cast in second op implies firstOp as long as the DestTy // is integer and we are not converting between a vector and a // non vector type. if (!SrcTy->isVectorTy() && DstTy->isIntegerTy()) return firstOp; return 0; case 4: - // no-op cast in second op implies firstOp as long as the DestTy + // No-op cast in second op implies firstOp as long as the DestTy // is floating point. if (DstTy->isFloatingPointTy()) return firstOp; return 0; case 5: - // no-op cast in first op implies secondOp as long as the SrcTy + // No-op cast in first op implies secondOp as long as the SrcTy // is an integer. if (SrcTy->isIntegerTy()) return secondOp; return 0; case 6: - // no-op cast in first op implies secondOp as long as the SrcTy + // No-op cast in first op implies secondOp as long as the SrcTy // is a floating point. if (SrcTy->isFloatingPointTy()) return secondOp; return 0; - case 7: { - // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size + case 7: { + // Cannot simplify if address spaces are different! + if (SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) + return 0; + + unsigned MidSize = MidTy->getScalarSizeInBits(); + // We can still fold this without knowing the actual sizes as long we + // know that the intermediate pointer is the largest possible + // pointer size. + // FIXME: Is this always true? + if (MidSize == 64) + return Instruction::BitCast; + + // ptrtoint, inttoptr -> bitcast (ptr -> ptr) if int size is >= ptr size. if (!SrcIntPtrTy || DstIntPtrTy != SrcIntPtrTy) return 0; unsigned PtrSize = SrcIntPtrTy->getScalarSizeInBits(); - unsigned MidSize = MidTy->getScalarSizeInBits(); if (MidSize >= PtrSize) return Instruction::BitCast; return 0; @@ -2246,7 +2261,8 @@ unsigned CastInst::isEliminableCastPair( return firstOp; return secondOp; } - case 9: // zext, sext -> zext, because sext can't sign extend after zext + case 9: + // zext, sext -> zext, because sext can't sign extend after zext return Instruction::ZExt; case 10: // fpext followed by ftrunc is allowed if the bit size returned to is @@ -2254,18 +2270,7 @@ unsigned CastInst::isEliminableCastPair( if (SrcTy == DstTy) return Instruction::BitCast; return 0; // If the types are not the same we can't eliminate it. - case 11: - // bitcast followed by ptrtoint is allowed as long as the bitcast - // is a pointer to pointer cast. - if (SrcTy->isPointerTy() && MidTy->isPointerTy()) - return secondOp; - return 0; - case 12: - // inttoptr, bitcast -> intptr if bitcast is a ptr to ptr cast - if (MidTy->isPointerTy() && DstTy->isPointerTy()) - return firstOp; - return 0; - case 13: { + case 11: { // inttoptr, ptrtoint -> bitcast if SrcSize<=PtrSize and SrcSize==DstSize if (!MidIntPtrTy) return 0; @@ -2276,8 +2281,65 @@ unsigned CastInst::isEliminableCastPair( return Instruction::BitCast; return 0; } + case 12: { + // addrspacecast, addrspacecast -> bitcast, if SrcAS == DstAS + // addrspacecast, addrspacecast -> addrspacecast, if SrcAS != DstAS + if (SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace()) + return Instruction::AddrSpaceCast; + return Instruction::BitCast; + } + case 13: + // FIXME: this state can be merged with (1), but the following assert + // is useful to check the correcteness of the sequence due to semantic + // change of bitcast. + assert( + SrcTy->isPtrOrPtrVectorTy() && + MidTy->isPtrOrPtrVectorTy() && + DstTy->isPtrOrPtrVectorTy() && + SrcTy->getPointerAddressSpace() != MidTy->getPointerAddressSpace() && + MidTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace() && + "Illegal addrspacecast, bitcast sequence!"); + // Allowed, use first cast's opcode + return firstOp; + case 14: + // FIXME: this state can be merged with (2), but the following assert + // is useful to check the correcteness of the sequence due to semantic + // change of bitcast. + assert( + SrcTy->isPtrOrPtrVectorTy() && + MidTy->isPtrOrPtrVectorTy() && + DstTy->isPtrOrPtrVectorTy() && + SrcTy->getPointerAddressSpace() == MidTy->getPointerAddressSpace() && + MidTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace() && + "Illegal bitcast, addrspacecast sequence!"); + // Allowed, use second cast's opcode + return secondOp; + case 15: + // FIXME: this state can be merged with (1), but the following assert + // is useful to check the correcteness of the sequence due to semantic + // change of bitcast. + assert( + SrcTy->isIntOrIntVectorTy() && + MidTy->isPtrOrPtrVectorTy() && + DstTy->isPtrOrPtrVectorTy() && + MidTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace() && + "Illegal inttoptr, bitcast sequence!"); + // Allowed, use first cast's opcode + return firstOp; + case 16: + // FIXME: this state can be merged with (2), but the following assert + // is useful to check the correcteness of the sequence due to semantic + // change of bitcast. + assert( + SrcTy->isPtrOrPtrVectorTy() && + MidTy->isPtrOrPtrVectorTy() && + DstTy->isIntOrIntVectorTy() && + SrcTy->getPointerAddressSpace() == MidTy->getPointerAddressSpace() && + "Illegal bitcast, ptrtoint sequence!"); + // Allowed, use second cast's opcode + return secondOp; case 99: - // cast combination can't happen (error in input). This is for all cases + // Cast combination can't happen (error in input). This is for all cases // where the MidTy is not the same for the two cast instructions. llvm_unreachable("Invalid Cast Combination"); default: @@ -2290,19 +2352,20 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, assert(castIsValid(op, S, Ty) && "Invalid cast!"); // Construct and return the appropriate CastInst subclass switch (op) { - case Trunc: return new TruncInst (S, Ty, Name, InsertBefore); - case ZExt: return new ZExtInst (S, Ty, Name, InsertBefore); - case SExt: return new SExtInst (S, Ty, Name, InsertBefore); - case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertBefore); - case FPExt: return new FPExtInst (S, Ty, Name, InsertBefore); - case UIToFP: return new UIToFPInst (S, Ty, Name, InsertBefore); - case SIToFP: return new SIToFPInst (S, Ty, Name, InsertBefore); - case FPToUI: return new FPToUIInst (S, Ty, Name, InsertBefore); - case FPToSI: return new FPToSIInst (S, Ty, Name, InsertBefore); - case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore); - case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore); - case BitCast: return new BitCastInst (S, Ty, Name, InsertBefore); - default: llvm_unreachable("Invalid opcode provided"); + case Trunc: return new TruncInst (S, Ty, Name, InsertBefore); + case ZExt: return new ZExtInst (S, Ty, Name, InsertBefore); + case SExt: return new SExtInst (S, Ty, Name, InsertBefore); + case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertBefore); + case FPExt: return new FPExtInst (S, Ty, Name, InsertBefore); + case UIToFP: return new UIToFPInst (S, Ty, Name, InsertBefore); + case SIToFP: return new SIToFPInst (S, Ty, Name, InsertBefore); + case FPToUI: return new FPToUIInst (S, Ty, Name, InsertBefore); + case FPToSI: return new FPToSIInst (S, Ty, Name, InsertBefore); + case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertBefore); + case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertBefore); + case BitCast: return new BitCastInst (S, Ty, Name, InsertBefore); + case AddrSpaceCast: return new AddrSpaceCastInst (S, Ty, Name, InsertBefore); + default: llvm_unreachable("Invalid opcode provided"); } } @@ -2311,19 +2374,20 @@ CastInst *CastInst::Create(Instruction::CastOps op, Value *S, Type *Ty, assert(castIsValid(op, S, Ty) && "Invalid cast!"); // Construct and return the appropriate CastInst subclass switch (op) { - case Trunc: return new TruncInst (S, Ty, Name, InsertAtEnd); - case ZExt: return new ZExtInst (S, Ty, Name, InsertAtEnd); - case SExt: return new SExtInst (S, Ty, Name, InsertAtEnd); - case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertAtEnd); - case FPExt: return new FPExtInst (S, Ty, Name, InsertAtEnd); - case UIToFP: return new UIToFPInst (S, Ty, Name, InsertAtEnd); - case SIToFP: return new SIToFPInst (S, Ty, Name, InsertAtEnd); - case FPToUI: return new FPToUIInst (S, Ty, Name, InsertAtEnd); - case FPToSI: return new FPToSIInst (S, Ty, Name, InsertAtEnd); - case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd); - case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd); - case BitCast: return new BitCastInst (S, Ty, Name, InsertAtEnd); - default: llvm_unreachable("Invalid opcode provided"); + case Trunc: return new TruncInst (S, Ty, Name, InsertAtEnd); + case ZExt: return new ZExtInst (S, Ty, Name, InsertAtEnd); + case SExt: return new SExtInst (S, Ty, Name, InsertAtEnd); + case FPTrunc: return new FPTruncInst (S, Ty, Name, InsertAtEnd); + case FPExt: return new FPExtInst (S, Ty, Name, InsertAtEnd); + case UIToFP: return new UIToFPInst (S, Ty, Name, InsertAtEnd); + case SIToFP: return new SIToFPInst (S, Ty, Name, InsertAtEnd); + case FPToUI: return new FPToUIInst (S, Ty, Name, InsertAtEnd); + case FPToSI: return new FPToSIInst (S, Ty, Name, InsertAtEnd); + case PtrToInt: return new PtrToIntInst (S, Ty, Name, InsertAtEnd); + case IntToPtr: return new IntToPtrInst (S, Ty, Name, InsertAtEnd); + case BitCast: return new BitCastInst (S, Ty, Name, InsertAtEnd); + case AddrSpaceCast: return new AddrSpaceCastInst (S, Ty, Name, InsertAtEnd); + default: llvm_unreachable("Invalid opcode provided"); } } @@ -2378,25 +2442,43 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, Type *Ty, CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd) { - assert(S->getType()->isPointerTy() && "Invalid cast"); - assert((Ty->isIntegerTy() || Ty->isPointerTy()) && + assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast"); + assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) && + "Invalid cast"); + assert(Ty->isVectorTy() == S->getType()->isVectorTy() && "Invalid cast"); + assert((!Ty->isVectorTy() || + Ty->getVectorNumElements() == S->getType()->getVectorNumElements()) && "Invalid cast"); - if (Ty->isIntegerTy()) + if (Ty->isIntOrIntVectorTy()) return Create(Instruction::PtrToInt, S, Ty, Name, InsertAtEnd); + + Type *STy = S->getType(); + if (STy->getPointerAddressSpace() != Ty->getPointerAddressSpace()) + return Create(Instruction::AddrSpaceCast, S, Ty, Name, InsertAtEnd); + return Create(Instruction::BitCast, S, Ty, Name, InsertAtEnd); } /// @brief Create a BitCast or a PtrToInt cast instruction -CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty, - const Twine &Name, +CastInst *CastInst::CreatePointerCast(Value *S, Type *Ty, + const Twine &Name, Instruction *InsertBefore) { assert(S->getType()->isPtrOrPtrVectorTy() && "Invalid cast"); assert((Ty->isIntOrIntVectorTy() || Ty->isPtrOrPtrVectorTy()) && "Invalid cast"); + assert(Ty->isVectorTy() == S->getType()->isVectorTy() && "Invalid cast"); + assert((!Ty->isVectorTy() || + Ty->getVectorNumElements() == S->getType()->getVectorNumElements()) && + "Invalid cast"); if (Ty->isIntOrIntVectorTy()) return Create(Instruction::PtrToInt, S, Ty, Name, InsertBefore); + + Type *STy = S->getType(); + if (STy->getPointerAddressSpace() != Ty->getPointerAddressSpace()) + return Create(Instruction::AddrSpaceCast, S, Ty, Name, InsertBefore); + return Create(Instruction::BitCast, S, Ty, Name, InsertBefore); } @@ -2517,8 +2599,48 @@ bool CastInst::isCastable(Type *SrcTy, Type *DestTy) { } } -// Provide a way to get a "cast" where the cast opcode is inferred from the -// types and size of the operand. This, basically, is a parallel of the +bool CastInst::isBitCastable(Type *SrcTy, Type *DestTy) { + if (!SrcTy->isFirstClassType() || !DestTy->isFirstClassType()) + return false; + + if (SrcTy == DestTy) + return true; + + if (VectorType *SrcVecTy = dyn_cast(SrcTy)) { + if (VectorType *DestVecTy = dyn_cast(DestTy)) { + if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) { + // An element by element cast. Valid if casting the elements is valid. + SrcTy = SrcVecTy->getElementType(); + DestTy = DestVecTy->getElementType(); + } + } + } + + if (PointerType *DestPtrTy = dyn_cast(DestTy)) { + if (PointerType *SrcPtrTy = dyn_cast(SrcTy)) { + return SrcPtrTy->getAddressSpace() == DestPtrTy->getAddressSpace(); + } + } + + unsigned SrcBits = SrcTy->getPrimitiveSizeInBits(); // 0 for ptr + unsigned DestBits = DestTy->getPrimitiveSizeInBits(); // 0 for ptr + + // Could still have vectors of pointers if the number of elements doesn't + // match + if (SrcBits == 0 || DestBits == 0) + return false; + + if (SrcBits != DestBits) + return false; + + if (DestTy->isX86_MMXTy() || SrcTy->isX86_MMXTy()) + return false; + + return true; +} + +// Provide a way to get a "cast" where the cast opcode is inferred from the +// types and size of the operand. This, basically, is a parallel of the // logic in the castIsValid function below. This axiom should hold: // castIsValid( getCastOpcode(Val, Ty), Val, Ty) // should not assert in castIsValid. In other words, this produces a "correct" @@ -2535,6 +2657,7 @@ CastInst::getCastOpcode( if (SrcTy == DestTy) return BitCast; + // FIXME: Check address space sizes here if (VectorType *SrcVecTy = dyn_cast(SrcTy)) if (VectorType *DestVecTy = dyn_cast(DestTy)) if (SrcVecTy->getNumElements() == DestVecTy->getNumElements()) { @@ -2601,6 +2724,8 @@ CastInst::getCastOpcode( return BitCast; } else if (DestTy->isPointerTy()) { if (SrcTy->isPointerTy()) { + if (DestTy->getPointerAddressSpace() != SrcTy->getPointerAddressSpace()) + return AddrSpaceCast; return BitCast; // ptr -> ptr } else if (SrcTy->isIntegerTy()) { return IntToPtr; // int -> ptr @@ -2695,13 +2820,27 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, Type *DstTy) { case Instruction::BitCast: // BitCast implies a no-op cast of type only. No bits change. // However, you can't cast pointers to anything but pointers. - if (SrcTy->isPointerTy() != DstTy->isPointerTy()) + if (SrcTy->isPtrOrPtrVectorTy() != DstTy->isPtrOrPtrVectorTy()) return false; - // Now we know we're not dealing with a pointer/non-pointer mismatch. In all - // these cases, the cast is okay if the source and destination bit widths - // are identical. - return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits(); + // For non pointer cases, the cast is okay if the source and destination bit + // widths are identical. + if (!SrcTy->isPtrOrPtrVectorTy()) + return SrcTy->getPrimitiveSizeInBits() == DstTy->getPrimitiveSizeInBits(); + + // If both are pointers then the address spaces must match and vector of + // pointers must have the same number of elements. + return SrcTy->getPointerAddressSpace() == DstTy->getPointerAddressSpace() && + SrcTy->isVectorTy() == DstTy->isVectorTy() && + (!SrcTy->isVectorTy() || + SrcTy->getVectorNumElements() == SrcTy->getVectorNumElements()); + + case Instruction::AddrSpaceCast: + return SrcTy->isPtrOrPtrVectorTy() && DstTy->isPtrOrPtrVectorTy() && + SrcTy->getPointerAddressSpace() != DstTy->getPointerAddressSpace() && + SrcTy->isVectorTy() == DstTy->isVectorTy() && + (!SrcTy->isVectorTy() || + SrcTy->getVectorNumElements() == SrcTy->getVectorNumElements()); } } @@ -2848,6 +2987,18 @@ BitCastInst::BitCastInst( assert(castIsValid(getOpcode(), S, Ty) && "Illegal BitCast"); } +AddrSpaceCastInst::AddrSpaceCastInst( + Value *S, Type *Ty, const Twine &Name, Instruction *InsertBefore +) : CastInst(Ty, AddrSpaceCast, S, Name, InsertBefore) { + assert(castIsValid(getOpcode(), S, Ty) && "Illegal AddrSpaceCast"); +} + +AddrSpaceCastInst::AddrSpaceCastInst( + Value *S, Type *Ty, const Twine &Name, BasicBlock *InsertAtEnd +) : CastInst(Ty, AddrSpaceCast, S, Name, InsertAtEnd) { + assert(castIsValid(getOpcode(), S, Ty) && "Illegal AddrSpaceCast"); +} + //===----------------------------------------------------------------------===// // CmpInst Classes //===----------------------------------------------------------------------===// @@ -3180,7 +3331,6 @@ SwitchInst::SwitchInst(const SwitchInst &SI) OL[i] = InOL[i]; OL[i+1] = InOL[i+1]; } - TheSubsets = SI.TheSubsets; SubclassOptionalData = SI.SubclassOptionalData; } @@ -3192,16 +3342,6 @@ SwitchInst::~SwitchInst() { /// addCase - Add an entry to the switch instruction... /// void SwitchInst::addCase(ConstantInt *OnVal, BasicBlock *Dest) { - IntegersSubsetToBB Mapping; - - // FIXME: Currently we work with ConstantInt based cases. - // So inititalize IntItem container directly from ConstantInt. - Mapping.add(IntItem::fromConstantInt(OnVal)); - IntegersSubset CaseRanges = Mapping.getCase(); - addCase(CaseRanges, Dest); -} - -void SwitchInst::addCase(IntegersSubset& OnVal, BasicBlock *Dest) { unsigned NewCaseIdx = getNumCases(); unsigned OpNo = NumOperands; if (OpNo+2 > ReservedSpace) @@ -3209,17 +3349,14 @@ void SwitchInst::addCase(IntegersSubset& OnVal, BasicBlock *Dest) { // Initialize some new operands. assert(OpNo+1 < ReservedSpace && "Growing didn't work!"); NumOperands = OpNo+2; - - SubsetsIt TheSubsetsIt = TheSubsets.insert(TheSubsets.end(), OnVal); - - CaseIt Case(this, NewCaseIdx, TheSubsetsIt); - Case.updateCaseValueOperand(OnVal); + CaseIt Case(this, NewCaseIdx); + Case.setValue(OnVal); Case.setSuccessor(Dest); } /// removeCase - This method removes the specified case and its successor /// from the switch instruction. -void SwitchInst::removeCase(CaseIt& i) { +void SwitchInst::removeCase(CaseIt i) { unsigned idx = i.getCaseIndex(); assert(2 + idx*2 < getNumOperands() && "Case index out of range!!!"); @@ -3236,16 +3373,6 @@ void SwitchInst::removeCase(CaseIt& i) { // Nuke the last value. OL[NumOps-2].set(0); OL[NumOps-2+1].set(0); - - // Do the same with TheCases collection: - if (i.SubsetIt != --TheSubsets.end()) { - *i.SubsetIt = TheSubsets.back(); - TheSubsets.pop_back(); - } else { - TheSubsets.pop_back(); - i.SubsetIt = TheSubsets.end(); - } - NumOperands = NumOps-2; } @@ -3490,6 +3617,10 @@ BitCastInst *BitCastInst::clone_impl() const { return new BitCastInst(getOperand(0), getType()); } +AddrSpaceCastInst *AddrSpaceCastInst::clone_impl() const { + return new AddrSpaceCastInst(getOperand(0), getType()); +} + CallInst *CallInst::clone_impl() const { return new(getNumOperands()) CallInst(*this); } diff --git a/contrib/llvm/lib/IR/LLVMContextImpl.h b/contrib/llvm/lib/IR/LLVMContextImpl.h index 0c659b8..407b985 100644 --- a/contrib/llvm/lib/IR/LLVMContextImpl.h +++ b/contrib/llvm/lib/IR/LLVMContextImpl.h @@ -355,6 +355,11 @@ public: typedef DenseMap IntrinsicIDCacheTy; IntrinsicIDCacheTy IntrinsicIDCache; + /// \brief Mapping from a function to its prefix data, which is stored as the + /// operand of an unparented ReturnInst so that the prefix data has a Use. + typedef DenseMap PrefixDataMapTy; + PrefixDataMapTy PrefixDataMap; + int getOrAddScopeRecordIdxEntry(MDNode *N, int ExistingIdx); int getOrAddScopeInlinedAtIdxEntry(MDNode *Scope, MDNode *IA,int ExistingIdx); diff --git a/contrib/llvm/lib/IR/LegacyPassManager.cpp b/contrib/llvm/lib/IR/LegacyPassManager.cpp new file mode 100644 index 0000000..a431d82 --- /dev/null +++ b/contrib/llvm/lib/IR/LegacyPassManager.cpp @@ -0,0 +1,1920 @@ +//===- LegacyPassManager.cpp - LLVM Pass Infrastructure Implementation ----===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the legacy LLVM Pass Manager infrastructure. +// +//===----------------------------------------------------------------------===// + + +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/LegacyPassManager.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/LegacyPassManagers.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/PassNameParser.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include +#include +using namespace llvm; +using namespace llvm::legacy; + +// See PassManagers.h for Pass Manager infrastructure overview. + +//===----------------------------------------------------------------------===// +// Pass debugging information. Often it is useful to find out what pass is +// running when a crash occurs in a utility. When this library is compiled with +// debugging on, a command line option (--debug-pass) is enabled that causes the +// pass name to be printed before it executes. +// + +namespace { +// Different debug levels that can be enabled... +enum PassDebugLevel { + Disabled, Arguments, Structure, Executions, Details +}; +} + +static cl::opt +PassDebugging("debug-pass", cl::Hidden, + cl::desc("Print PassManager debugging information"), + cl::values( + clEnumVal(Disabled , "disable debug output"), + clEnumVal(Arguments , "print pass arguments to pass to 'opt'"), + clEnumVal(Structure , "print pass structure before run()"), + clEnumVal(Executions, "print pass name before it is executed"), + clEnumVal(Details , "print pass details when it is executed"), + clEnumValEnd)); + +namespace { +typedef llvm::cl::list +PassOptionList; +} + +// Print IR out before/after specified passes. +static PassOptionList +PrintBefore("print-before", + llvm::cl::desc("Print IR before specified passes"), + cl::Hidden); + +static PassOptionList +PrintAfter("print-after", + llvm::cl::desc("Print IR after specified passes"), + cl::Hidden); + +static cl::opt +PrintBeforeAll("print-before-all", + llvm::cl::desc("Print IR before each pass"), + cl::init(false)); +static cl::opt +PrintAfterAll("print-after-all", + llvm::cl::desc("Print IR after each pass"), + cl::init(false)); + +/// This is a helper to determine whether to print IR before or +/// after a pass. + +static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI, + PassOptionList &PassesToPrint) { + for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) { + const llvm::PassInfo *PassInf = PassesToPrint[i]; + if (PassInf) + if (PassInf->getPassArgument() == PI->getPassArgument()) { + return true; + } + } + return false; +} + +/// This is a utility to check whether a pass should have IR dumped +/// before it. +static bool ShouldPrintBeforePass(const PassInfo *PI) { + return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PI, PrintBefore); +} + +/// This is a utility to check whether a pass should have IR dumped +/// after it. +static bool ShouldPrintAfterPass(const PassInfo *PI) { + return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter); +} + +/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions +/// or higher is specified. +bool PMDataManager::isPassDebuggingExecutionsOrMore() const { + return PassDebugging >= Executions; +} + + + + +void PassManagerPrettyStackEntry::print(raw_ostream &OS) const { + if (V == 0 && M == 0) + OS << "Releasing pass '"; + else + OS << "Running pass '"; + + OS << P->getPassName() << "'"; + + if (M) { + OS << " on module '" << M->getModuleIdentifier() << "'.\n"; + return; + } + if (V == 0) { + OS << '\n'; + return; + } + + OS << " on "; + if (isa(V)) + OS << "function"; + else if (isa(V)) + OS << "basic block"; + else + OS << "value"; + + OS << " '"; + WriteAsOperand(OS, V, /*PrintTy=*/false, M); + OS << "'\n"; +} + + +namespace { +//===----------------------------------------------------------------------===// +// BBPassManager +// +/// BBPassManager manages BasicBlockPass. It batches all the +/// pass together and sequence them to process one basic block before +/// processing next basic block. +class BBPassManager : public PMDataManager, public FunctionPass { + +public: + static char ID; + explicit BBPassManager() + : PMDataManager(), FunctionPass(ID) {} + + /// Execute all of the passes scheduled for execution. Keep track of + /// whether any of the passes modifies the function, and if so, return true. + bool runOnFunction(Function &F); + + /// Pass Manager itself does not invalidate any analysis info. + void getAnalysisUsage(AnalysisUsage &Info) const { + Info.setPreservesAll(); + } + + bool doInitialization(Module &M); + bool doInitialization(Function &F); + bool doFinalization(Module &M); + bool doFinalization(Function &F); + + virtual PMDataManager *getAsPMDataManager() { return this; } + virtual Pass *getAsPass() { return this; } + + virtual const char *getPassName() const { + return "BasicBlock Pass Manager"; + } + + // Print passes managed by this manager + void dumpPassStructure(unsigned Offset) { + llvm::dbgs().indent(Offset*2) << "BasicBlockPass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + BasicBlockPass *BP = getContainedPass(Index); + BP->dumpPassStructure(Offset + 1); + dumpLastUses(BP, Offset+1); + } + } + + BasicBlockPass *getContainedPass(unsigned N) { + assert(N < PassVector.size() && "Pass number out of range!"); + BasicBlockPass *BP = static_cast(PassVector[N]); + return BP; + } + + virtual PassManagerType getPassManagerType() const { + return PMT_BasicBlockPassManager; + } +}; + +char BBPassManager::ID = 0; +} // End anonymous namespace + +namespace llvm { +namespace legacy { +//===----------------------------------------------------------------------===// +// FunctionPassManagerImpl +// +/// FunctionPassManagerImpl manages FPPassManagers +class FunctionPassManagerImpl : public Pass, + public PMDataManager, + public PMTopLevelManager { + virtual void anchor(); +private: + bool wasRun; +public: + static char ID; + explicit FunctionPassManagerImpl() : + Pass(PT_PassManager, ID), PMDataManager(), + PMTopLevelManager(new FPPassManager()), wasRun(false) {} + + /// add - Add a pass to the queue of passes to run. This passes ownership of + /// the Pass to the PassManager. When the PassManager is destroyed, the pass + /// will be destroyed as well, so there is no need to delete the pass. This + /// implies that all passes MUST be allocated with 'new'. + void add(Pass *P) { + schedulePass(P); + } + + /// createPrinterPass - Get a function printer pass. + Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const { + return createPrintFunctionPass(Banner, &O); + } + + // Prepare for running an on the fly pass, freeing memory if needed + // from a previous run. + void releaseMemoryOnTheFly(); + + /// run - Execute all of the passes scheduled for execution. Keep track of + /// whether any of the passes modifies the module, and if so, return true. + bool run(Function &F); + + /// doInitialization - Run all of the initializers for the function passes. + /// + bool doInitialization(Module &M); + + /// doFinalization - Run all of the finalizers for the function passes. + /// + bool doFinalization(Module &M); + + + virtual PMDataManager *getAsPMDataManager() { return this; } + virtual Pass *getAsPass() { return this; } + virtual PassManagerType getTopLevelPassManagerType() { + return PMT_FunctionPassManager; + } + + /// Pass Manager itself does not invalidate any analysis info. + void getAnalysisUsage(AnalysisUsage &Info) const { + Info.setPreservesAll(); + } + + FPPassManager *getContainedManager(unsigned N) { + assert(N < PassManagers.size() && "Pass number out of range!"); + FPPassManager *FP = static_cast(PassManagers[N]); + return FP; + } +}; + +void FunctionPassManagerImpl::anchor() {} + +char FunctionPassManagerImpl::ID = 0; +} // End of legacy namespace +} // End of llvm namespace + +namespace { +//===----------------------------------------------------------------------===// +// MPPassManager +// +/// MPPassManager manages ModulePasses and function pass managers. +/// It batches all Module passes and function pass managers together and +/// sequences them to process one module. +class MPPassManager : public Pass, public PMDataManager { +public: + static char ID; + explicit MPPassManager() : + Pass(PT_PassManager, ID), PMDataManager() { } + + // Delete on the fly managers. + virtual ~MPPassManager() { + for (std::map::iterator + I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end(); + I != E; ++I) { + FunctionPassManagerImpl *FPP = I->second; + delete FPP; + } + } + + /// createPrinterPass - Get a module printer pass. + Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const { + return createPrintModulePass(&O, false, Banner); + } + + /// run - Execute all of the passes scheduled for execution. Keep track of + /// whether any of the passes modifies the module, and if so, return true. + bool runOnModule(Module &M); + + using llvm::Pass::doInitialization; + using llvm::Pass::doFinalization; + + /// doInitialization - Run all of the initializers for the module passes. + /// + bool doInitialization(); + + /// doFinalization - Run all of the finalizers for the module passes. + /// + bool doFinalization(); + + /// Pass Manager itself does not invalidate any analysis info. + void getAnalysisUsage(AnalysisUsage &Info) const { + Info.setPreservesAll(); + } + + /// Add RequiredPass into list of lower level passes required by pass P. + /// RequiredPass is run on the fly by Pass Manager when P requests it + /// through getAnalysis interface. + virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass); + + /// Return function pass corresponding to PassInfo PI, that is + /// required by module pass MP. Instantiate analysis pass, by using + /// its runOnFunction() for function F. + virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F); + + virtual const char *getPassName() const { + return "Module Pass Manager"; + } + + virtual PMDataManager *getAsPMDataManager() { return this; } + virtual Pass *getAsPass() { return this; } + + // Print passes managed by this manager + void dumpPassStructure(unsigned Offset) { + llvm::dbgs().indent(Offset*2) << "ModulePass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + ModulePass *MP = getContainedPass(Index); + MP->dumpPassStructure(Offset + 1); + std::map::const_iterator I = + OnTheFlyManagers.find(MP); + if (I != OnTheFlyManagers.end()) + I->second->dumpPassStructure(Offset + 2); + dumpLastUses(MP, Offset+1); + } + } + + ModulePass *getContainedPass(unsigned N) { + assert(N < PassVector.size() && "Pass number out of range!"); + return static_cast(PassVector[N]); + } + + virtual PassManagerType getPassManagerType() const { + return PMT_ModulePassManager; + } + + private: + /// Collection of on the fly FPPassManagers. These managers manage + /// function passes that are required by module passes. + std::map OnTheFlyManagers; +}; + +char MPPassManager::ID = 0; +} // End anonymous namespace + +namespace llvm { +namespace legacy { +//===----------------------------------------------------------------------===// +// PassManagerImpl +// + +/// PassManagerImpl manages MPPassManagers +class PassManagerImpl : public Pass, + public PMDataManager, + public PMTopLevelManager { + virtual void anchor(); + +public: + static char ID; + explicit PassManagerImpl() : + Pass(PT_PassManager, ID), PMDataManager(), + PMTopLevelManager(new MPPassManager()) {} + + /// add - Add a pass to the queue of passes to run. This passes ownership of + /// the Pass to the PassManager. When the PassManager is destroyed, the pass + /// will be destroyed as well, so there is no need to delete the pass. This + /// implies that all passes MUST be allocated with 'new'. + void add(Pass *P) { + schedulePass(P); + } + + /// createPrinterPass - Get a module printer pass. + Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const { + return createPrintModulePass(&O, false, Banner); + } + + /// run - Execute all of the passes scheduled for execution. Keep track of + /// whether any of the passes modifies the module, and if so, return true. + bool run(Module &M); + + using llvm::Pass::doInitialization; + using llvm::Pass::doFinalization; + + /// doInitialization - Run all of the initializers for the module passes. + /// + bool doInitialization(); + + /// doFinalization - Run all of the finalizers for the module passes. + /// + bool doFinalization(); + + /// Pass Manager itself does not invalidate any analysis info. + void getAnalysisUsage(AnalysisUsage &Info) const { + Info.setPreservesAll(); + } + + virtual PMDataManager *getAsPMDataManager() { return this; } + virtual Pass *getAsPass() { return this; } + virtual PassManagerType getTopLevelPassManagerType() { + return PMT_ModulePassManager; + } + + MPPassManager *getContainedManager(unsigned N) { + assert(N < PassManagers.size() && "Pass number out of range!"); + MPPassManager *MP = static_cast(PassManagers[N]); + return MP; + } +}; + +void PassManagerImpl::anchor() {} + +char PassManagerImpl::ID = 0; +} // End of legacy namespace +} // End of llvm namespace + +namespace { + +//===----------------------------------------------------------------------===// +/// TimingInfo Class - This class is used to calculate information about the +/// amount of time each pass takes to execute. This only happens when +/// -time-passes is enabled on the command line. +/// + +static ManagedStatic > TimingInfoMutex; + +class TimingInfo { + DenseMap TimingData; + TimerGroup TG; +public: + // Use 'create' member to get this. + TimingInfo() : TG("... Pass execution timing report ...") {} + + // TimingDtor - Print out information about timing information + ~TimingInfo() { + // Delete all of the timers, which accumulate their info into the + // TimerGroup. + for (DenseMap::iterator I = TimingData.begin(), + E = TimingData.end(); I != E; ++I) + delete I->second; + // TimerGroup is deleted next, printing the report. + } + + // createTheTimeInfo - This method either initializes the TheTimeInfo pointer + // to a non null value (if the -time-passes option is enabled) or it leaves it + // null. It may be called multiple times. + static void createTheTimeInfo(); + + /// getPassTimer - Return the timer for the specified pass if it exists. + Timer *getPassTimer(Pass *P) { + if (P->getAsPMDataManager()) + return 0; + + sys::SmartScopedLock Lock(*TimingInfoMutex); + Timer *&T = TimingData[P]; + if (T == 0) + T = new Timer(P->getPassName(), TG); + return T; + } +}; + +} // End of anon namespace + +static TimingInfo *TheTimeInfo; + +//===----------------------------------------------------------------------===// +// PMTopLevelManager implementation + +/// Initialize top level manager. Create first pass manager. +PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) { + PMDM->setTopLevelManager(this); + addPassManager(PMDM); + activeStack.push(PMDM); +} + +/// Set pass P as the last user of the given analysis passes. +void +PMTopLevelManager::setLastUser(ArrayRef AnalysisPasses, Pass *P) { + unsigned PDepth = 0; + if (P->getResolver()) + PDepth = P->getResolver()->getPMDataManager().getDepth(); + + for (SmallVectorImpl::const_iterator I = AnalysisPasses.begin(), + E = AnalysisPasses.end(); I != E; ++I) { + Pass *AP = *I; + LastUser[AP] = P; + + if (P == AP) + continue; + + // Update the last users of passes that are required transitive by AP. + AnalysisUsage *AnUsage = findAnalysisUsage(AP); + const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet(); + SmallVector LastUses; + SmallVector LastPMUses; + for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(), + E = IDs.end(); I != E; ++I) { + Pass *AnalysisPass = findAnalysisPass(*I); + assert(AnalysisPass && "Expected analysis pass to exist."); + AnalysisResolver *AR = AnalysisPass->getResolver(); + assert(AR && "Expected analysis resolver to exist."); + unsigned APDepth = AR->getPMDataManager().getDepth(); + + if (PDepth == APDepth) + LastUses.push_back(AnalysisPass); + else if (PDepth > APDepth) + LastPMUses.push_back(AnalysisPass); + } + + setLastUser(LastUses, P); + + // If this pass has a corresponding pass manager, push higher level + // analysis to this pass manager. + if (P->getResolver()) + setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass()); + + + // If AP is the last user of other passes then make P last user of + // such passes. + for (DenseMap::iterator LUI = LastUser.begin(), + LUE = LastUser.end(); LUI != LUE; ++LUI) { + if (LUI->second == AP) + // DenseMap iterator is not invalidated here because + // this is just updating existing entries. + LastUser[LUI->first] = P; + } + } +} + +/// Collect passes whose last user is P +void PMTopLevelManager::collectLastUses(SmallVectorImpl &LastUses, + Pass *P) { + DenseMap >::iterator DMI = + InversedLastUser.find(P); + if (DMI == InversedLastUser.end()) + return; + + SmallPtrSet &LU = DMI->second; + for (SmallPtrSet::iterator I = LU.begin(), + E = LU.end(); I != E; ++I) { + LastUses.push_back(*I); + } + +} + +AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { + AnalysisUsage *AnUsage = NULL; + DenseMap::iterator DMI = AnUsageMap.find(P); + if (DMI != AnUsageMap.end()) + AnUsage = DMI->second; + else { + AnUsage = new AnalysisUsage(); + P->getAnalysisUsage(*AnUsage); + AnUsageMap[P] = AnUsage; + } + return AnUsage; +} + +/// Schedule pass P for execution. Make sure that passes required by +/// P are run before P is run. Update analysis info maintained by +/// the manager. Remove dead passes. This is a recursive function. +void PMTopLevelManager::schedulePass(Pass *P) { + + // TODO : Allocate function manager for this pass, other wise required set + // may be inserted into previous function manager + + // Give pass a chance to prepare the stage. + P->preparePassManager(activeStack); + + // If P is an analysis pass and it is available then do not + // generate the analysis again. Stale analysis info should not be + // available at this point. + const PassInfo *PI = + PassRegistry::getPassRegistry()->getPassInfo(P->getPassID()); + if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) { + delete P; + return; + } + + AnalysisUsage *AnUsage = findAnalysisUsage(P); + + bool checkAnalysis = true; + while (checkAnalysis) { + checkAnalysis = false; + + const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet(); + for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(), + E = RequiredSet.end(); I != E; ++I) { + + Pass *AnalysisPass = findAnalysisPass(*I); + if (!AnalysisPass) { + const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); + + if (PI == NULL) { + // Pass P is not in the global PassRegistry + dbgs() << "Pass '" << P->getPassName() << "' is not initialized." << "\n"; + dbgs() << "Verify if there is a pass dependency cycle." << "\n"; + dbgs() << "Required Passes:" << "\n"; + for (AnalysisUsage::VectorType::const_iterator I2 = RequiredSet.begin(), + E = RequiredSet.end(); I2 != E && I2 != I; ++I2) { + Pass *AnalysisPass2 = findAnalysisPass(*I2); + if (AnalysisPass2) { + dbgs() << "\t" << AnalysisPass2->getPassName() << "\n"; + } else { + dbgs() << "\t" << "Error: Required pass not found! Possible causes:" << "\n"; + dbgs() << "\t\t" << "- Pass misconfiguration (e.g.: missing macros)" << "\n"; + dbgs() << "\t\t" << "- Corruption of the global PassRegistry" << "\n"; + } + } + } + + assert(PI && "Expected required passes to be initialized"); + AnalysisPass = PI->createPass(); + if (P->getPotentialPassManagerType () == + AnalysisPass->getPotentialPassManagerType()) + // Schedule analysis pass that is managed by the same pass manager. + schedulePass(AnalysisPass); + else if (P->getPotentialPassManagerType () > + AnalysisPass->getPotentialPassManagerType()) { + // Schedule analysis pass that is managed by a new manager. + schedulePass(AnalysisPass); + // Recheck analysis passes to ensure that required analyses that + // are already checked are still available. + checkAnalysis = true; + } else + // Do not schedule this analysis. Lower level analsyis + // passes are run on the fly. + delete AnalysisPass; + } + } + } + + // Now all required passes are available. + if (ImmutablePass *IP = P->getAsImmutablePass()) { + // P is a immutable pass and it will be managed by this + // top level manager. Set up analysis resolver to connect them. + PMDataManager *DM = getAsPMDataManager(); + AnalysisResolver *AR = new AnalysisResolver(*DM); + P->setResolver(AR); + DM->initializeAnalysisImpl(P); + addImmutablePass(IP); + DM->recordAvailableAnalysis(IP); + return; + } + + if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) { + Pass *PP = P->createPrinterPass( + dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***"); + PP->assignPassManager(activeStack, getTopLevelPassManagerType()); + } + + // Add the requested pass to the best available pass manager. + P->assignPassManager(activeStack, getTopLevelPassManagerType()); + + if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) { + Pass *PP = P->createPrinterPass( + dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***"); + PP->assignPassManager(activeStack, getTopLevelPassManagerType()); + } +} + +/// Find the pass that implements Analysis AID. Search immutable +/// passes and all pass managers. If desired pass is not found +/// then return NULL. +Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) { + + // Check pass managers + for (SmallVectorImpl::iterator I = PassManagers.begin(), + E = PassManagers.end(); I != E; ++I) + if (Pass *P = (*I)->findAnalysisPass(AID, false)) + return P; + + // Check other pass managers + for (SmallVectorImpl::iterator + I = IndirectPassManagers.begin(), + E = IndirectPassManagers.end(); I != E; ++I) + if (Pass *P = (*I)->findAnalysisPass(AID, false)) + return P; + + // Check the immutable passes. Iterate in reverse order so that we find + // the most recently registered passes first. + for (SmallVectorImpl::reverse_iterator I = + ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) { + AnalysisID PI = (*I)->getPassID(); + if (PI == AID) + return *I; + + // If Pass not found then check the interfaces implemented by Immutable Pass + const PassInfo *PassInf = + PassRegistry::getPassRegistry()->getPassInfo(PI); + assert(PassInf && "Expected all immutable passes to be initialized"); + const std::vector &ImmPI = + PassInf->getInterfacesImplemented(); + for (std::vector::const_iterator II = ImmPI.begin(), + EE = ImmPI.end(); II != EE; ++II) { + if ((*II)->getTypeInfo() == AID) + return *I; + } + } + + return 0; +} + +// Print passes managed by this top level manager. +void PMTopLevelManager::dumpPasses() const { + + if (PassDebugging < Structure) + return; + + // Print out the immutable passes + for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) { + ImmutablePasses[i]->dumpPassStructure(0); + } + + // Every class that derives from PMDataManager also derives from Pass + // (sometimes indirectly), but there's no inheritance relationship + // between PMDataManager and Pass, so we have to getAsPass to get + // from a PMDataManager* to a Pass*. + for (SmallVectorImpl::const_iterator I = + PassManagers.begin(), E = PassManagers.end(); I != E; ++I) + (*I)->getAsPass()->dumpPassStructure(1); +} + +void PMTopLevelManager::dumpArguments() const { + + if (PassDebugging < Arguments) + return; + + dbgs() << "Pass Arguments: "; + for (SmallVectorImpl::const_iterator I = + ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I) + if (const PassInfo *PI = + PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) { + assert(PI && "Expected all immutable passes to be initialized"); + if (!PI->isAnalysisGroup()) + dbgs() << " -" << PI->getPassArgument(); + } + for (SmallVectorImpl::const_iterator I = + PassManagers.begin(), E = PassManagers.end(); I != E; ++I) + (*I)->dumpPassArguments(); + dbgs() << "\n"; +} + +void PMTopLevelManager::initializeAllAnalysisInfo() { + for (SmallVectorImpl::iterator I = PassManagers.begin(), + E = PassManagers.end(); I != E; ++I) + (*I)->initializeAnalysisInfo(); + + // Initailize other pass managers + for (SmallVectorImpl::iterator + I = IndirectPassManagers.begin(), E = IndirectPassManagers.end(); + I != E; ++I) + (*I)->initializeAnalysisInfo(); + + for (DenseMap::iterator DMI = LastUser.begin(), + DME = LastUser.end(); DMI != DME; ++DMI) { + DenseMap >::iterator InvDMI = + InversedLastUser.find(DMI->second); + if (InvDMI != InversedLastUser.end()) { + SmallPtrSet &L = InvDMI->second; + L.insert(DMI->first); + } else { + SmallPtrSet L; L.insert(DMI->first); + InversedLastUser[DMI->second] = L; + } + } +} + +/// Destructor +PMTopLevelManager::~PMTopLevelManager() { + for (SmallVectorImpl::iterator I = PassManagers.begin(), + E = PassManagers.end(); I != E; ++I) + delete *I; + + for (SmallVectorImpl::iterator + I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I) + delete *I; + + for (DenseMap::iterator DMI = AnUsageMap.begin(), + DME = AnUsageMap.end(); DMI != DME; ++DMI) + delete DMI->second; +} + +//===----------------------------------------------------------------------===// +// PMDataManager implementation + +/// Augement AvailableAnalysis by adding analysis made available by pass P. +void PMDataManager::recordAvailableAnalysis(Pass *P) { + AnalysisID PI = P->getPassID(); + + AvailableAnalysis[PI] = P; + + assert(!AvailableAnalysis.empty()); + + // This pass is the current implementation of all of the interfaces it + // implements as well. + const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI); + if (PInf == 0) return; + const std::vector &II = PInf->getInterfacesImplemented(); + for (unsigned i = 0, e = II.size(); i != e; ++i) + AvailableAnalysis[II[i]->getTypeInfo()] = P; +} + +// Return true if P preserves high level analysis used by other +// passes managed by this manager +bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) { + AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); + if (AnUsage->getPreservesAll()) + return true; + + const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet(); + for (SmallVectorImpl::iterator I = HigherLevelAnalysis.begin(), + E = HigherLevelAnalysis.end(); I != E; ++I) { + Pass *P1 = *I; + if (P1->getAsImmutablePass() == 0 && + std::find(PreservedSet.begin(), PreservedSet.end(), + P1->getPassID()) == + PreservedSet.end()) + return false; + } + + return true; +} + +/// verifyPreservedAnalysis -- Verify analysis preserved by pass P. +void PMDataManager::verifyPreservedAnalysis(Pass *P) { + // Don't do this unless assertions are enabled. +#ifdef NDEBUG + return; +#endif + AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); + const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet(); + + // Verify preserved analysis + for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(), + E = PreservedSet.end(); I != E; ++I) { + AnalysisID AID = *I; + if (Pass *AP = findAnalysisPass(AID, true)) { + TimeRegion PassTimer(getPassTimer(AP)); + AP->verifyAnalysis(); + } + } +} + +/// Remove Analysis not preserved by Pass P +void PMDataManager::removeNotPreservedAnalysis(Pass *P) { + AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); + if (AnUsage->getPreservesAll()) + return; + + const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet(); + for (DenseMap::iterator I = AvailableAnalysis.begin(), + E = AvailableAnalysis.end(); I != E; ) { + DenseMap::iterator Info = I++; + if (Info->second->getAsImmutablePass() == 0 && + std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == + PreservedSet.end()) { + // Remove this analysis + if (PassDebugging >= Details) { + Pass *S = Info->second; + dbgs() << " -- '" << P->getPassName() << "' is not preserving '"; + dbgs() << S->getPassName() << "'\n"; + } + AvailableAnalysis.erase(Info); + } + } + + // Check inherited analysis also. If P is not preserving analysis + // provided by parent manager then remove it here. + for (unsigned Index = 0; Index < PMT_Last; ++Index) { + + if (!InheritedAnalysis[Index]) + continue; + + for (DenseMap::iterator + I = InheritedAnalysis[Index]->begin(), + E = InheritedAnalysis[Index]->end(); I != E; ) { + DenseMap::iterator Info = I++; + if (Info->second->getAsImmutablePass() == 0 && + std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == + PreservedSet.end()) { + // Remove this analysis + if (PassDebugging >= Details) { + Pass *S = Info->second; + dbgs() << " -- '" << P->getPassName() << "' is not preserving '"; + dbgs() << S->getPassName() << "'\n"; + } + InheritedAnalysis[Index]->erase(Info); + } + } + } +} + +/// Remove analysis passes that are not used any longer +void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg, + enum PassDebuggingString DBG_STR) { + + SmallVector DeadPasses; + + // If this is a on the fly manager then it does not have TPM. + if (!TPM) + return; + + TPM->collectLastUses(DeadPasses, P); + + if (PassDebugging >= Details && !DeadPasses.empty()) { + dbgs() << " -*- '" << P->getPassName(); + dbgs() << "' is the last user of following pass instances."; + dbgs() << " Free these instances\n"; + } + + for (SmallVectorImpl::iterator I = DeadPasses.begin(), + E = DeadPasses.end(); I != E; ++I) + freePass(*I, Msg, DBG_STR); +} + +void PMDataManager::freePass(Pass *P, StringRef Msg, + enum PassDebuggingString DBG_STR) { + dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg); + + { + // If the pass crashes releasing memory, remember this. + PassManagerPrettyStackEntry X(P); + TimeRegion PassTimer(getPassTimer(P)); + + P->releaseMemory(); + } + + AnalysisID PI = P->getPassID(); + if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) { + // Remove the pass itself (if it is not already removed). + AvailableAnalysis.erase(PI); + + // Remove all interfaces this pass implements, for which it is also + // listed as the available implementation. + const std::vector &II = PInf->getInterfacesImplemented(); + for (unsigned i = 0, e = II.size(); i != e; ++i) { + DenseMap::iterator Pos = + AvailableAnalysis.find(II[i]->getTypeInfo()); + if (Pos != AvailableAnalysis.end() && Pos->second == P) + AvailableAnalysis.erase(Pos); + } + } +} + +/// Add pass P into the PassVector. Update +/// AvailableAnalysis appropriately if ProcessAnalysis is true. +void PMDataManager::add(Pass *P, bool ProcessAnalysis) { + // This manager is going to manage pass P. Set up analysis resolver + // to connect them. + AnalysisResolver *AR = new AnalysisResolver(*this); + P->setResolver(AR); + + // If a FunctionPass F is the last user of ModulePass info M + // then the F's manager, not F, records itself as a last user of M. + SmallVector TransferLastUses; + + if (!ProcessAnalysis) { + // Add pass + PassVector.push_back(P); + return; + } + + // At the moment, this pass is the last user of all required passes. + SmallVector LastUses; + SmallVector RequiredPasses; + SmallVector ReqAnalysisNotAvailable; + + unsigned PDepth = this->getDepth(); + + collectRequiredAnalysis(RequiredPasses, + ReqAnalysisNotAvailable, P); + for (SmallVectorImpl::iterator I = RequiredPasses.begin(), + E = RequiredPasses.end(); I != E; ++I) { + Pass *PRequired = *I; + unsigned RDepth = 0; + + assert(PRequired->getResolver() && "Analysis Resolver is not set"); + PMDataManager &DM = PRequired->getResolver()->getPMDataManager(); + RDepth = DM.getDepth(); + + if (PDepth == RDepth) + LastUses.push_back(PRequired); + else if (PDepth > RDepth) { + // Let the parent claim responsibility of last use + TransferLastUses.push_back(PRequired); + // Keep track of higher level analysis used by this manager. + HigherLevelAnalysis.push_back(PRequired); + } else + llvm_unreachable("Unable to accommodate Required Pass"); + } + + // Set P as P's last user until someone starts using P. + // However, if P is a Pass Manager then it does not need + // to record its last user. + if (P->getAsPMDataManager() == 0) + LastUses.push_back(P); + TPM->setLastUser(LastUses, P); + + if (!TransferLastUses.empty()) { + Pass *My_PM = getAsPass(); + TPM->setLastUser(TransferLastUses, My_PM); + TransferLastUses.clear(); + } + + // Now, take care of required analyses that are not available. + for (SmallVectorImpl::iterator + I = ReqAnalysisNotAvailable.begin(), + E = ReqAnalysisNotAvailable.end() ;I != E; ++I) { + const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); + Pass *AnalysisPass = PI->createPass(); + this->addLowerLevelRequiredPass(P, AnalysisPass); + } + + // Take a note of analysis required and made available by this pass. + // Remove the analysis not preserved by this pass + removeNotPreservedAnalysis(P); + recordAvailableAnalysis(P); + + // Add pass + PassVector.push_back(P); +} + + +/// Populate RP with analysis pass that are required by +/// pass P and are available. Populate RP_NotAvail with analysis +/// pass that are required by pass P but are not available. +void PMDataManager::collectRequiredAnalysis(SmallVectorImpl &RP, + SmallVectorImpl &RP_NotAvail, + Pass *P) { + AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); + const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet(); + for (AnalysisUsage::VectorType::const_iterator + I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) { + if (Pass *AnalysisPass = findAnalysisPass(*I, true)) + RP.push_back(AnalysisPass); + else + RP_NotAvail.push_back(*I); + } + + const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet(); + for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(), + E = IDs.end(); I != E; ++I) { + if (Pass *AnalysisPass = findAnalysisPass(*I, true)) + RP.push_back(AnalysisPass); + else + RP_NotAvail.push_back(*I); + } +} + +// All Required analyses should be available to the pass as it runs! Here +// we fill in the AnalysisImpls member of the pass so that it can +// successfully use the getAnalysis() method to retrieve the +// implementations it needs. +// +void PMDataManager::initializeAnalysisImpl(Pass *P) { + AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); + + for (AnalysisUsage::VectorType::const_iterator + I = AnUsage->getRequiredSet().begin(), + E = AnUsage->getRequiredSet().end(); I != E; ++I) { + Pass *Impl = findAnalysisPass(*I, true); + if (Impl == 0) + // This may be analysis pass that is initialized on the fly. + // If that is not the case then it will raise an assert when it is used. + continue; + AnalysisResolver *AR = P->getResolver(); + assert(AR && "Analysis Resolver is not set"); + AR->addAnalysisImplsPair(*I, Impl); + } +} + +/// Find the pass that implements Analysis AID. If desired pass is not found +/// then return NULL. +Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) { + + // Check if AvailableAnalysis map has one entry. + DenseMap::const_iterator I = AvailableAnalysis.find(AID); + + if (I != AvailableAnalysis.end()) + return I->second; + + // Search Parents through TopLevelManager + if (SearchParent) + return TPM->findAnalysisPass(AID); + + return NULL; +} + +// Print list of passes that are last used by P. +void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{ + + SmallVector LUses; + + // If this is a on the fly manager then it does not have TPM. + if (!TPM) + return; + + TPM->collectLastUses(LUses, P); + + for (SmallVectorImpl::iterator I = LUses.begin(), + E = LUses.end(); I != E; ++I) { + llvm::dbgs() << "--" << std::string(Offset*2, ' '); + (*I)->dumpPassStructure(0); + } +} + +void PMDataManager::dumpPassArguments() const { + for (SmallVectorImpl::const_iterator I = PassVector.begin(), + E = PassVector.end(); I != E; ++I) { + if (PMDataManager *PMD = (*I)->getAsPMDataManager()) + PMD->dumpPassArguments(); + else + if (const PassInfo *PI = + PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) + if (!PI->isAnalysisGroup()) + dbgs() << " -" << PI->getPassArgument(); + } +} + +void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1, + enum PassDebuggingString S2, + StringRef Msg) { + if (PassDebugging < Executions) + return; + dbgs() << (void*)this << std::string(getDepth()*2+1, ' '); + switch (S1) { + case EXECUTION_MSG: + dbgs() << "Executing Pass '" << P->getPassName(); + break; + case MODIFICATION_MSG: + dbgs() << "Made Modification '" << P->getPassName(); + break; + case FREEING_MSG: + dbgs() << " Freeing Pass '" << P->getPassName(); + break; + default: + break; + } + switch (S2) { + case ON_BASICBLOCK_MSG: + dbgs() << "' on BasicBlock '" << Msg << "'...\n"; + break; + case ON_FUNCTION_MSG: + dbgs() << "' on Function '" << Msg << "'...\n"; + break; + case ON_MODULE_MSG: + dbgs() << "' on Module '" << Msg << "'...\n"; + break; + case ON_REGION_MSG: + dbgs() << "' on Region '" << Msg << "'...\n"; + break; + case ON_LOOP_MSG: + dbgs() << "' on Loop '" << Msg << "'...\n"; + break; + case ON_CG_MSG: + dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n"; + break; + default: + break; + } +} + +void PMDataManager::dumpRequiredSet(const Pass *P) const { + if (PassDebugging < Details) + return; + + AnalysisUsage analysisUsage; + P->getAnalysisUsage(analysisUsage); + dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet()); +} + +void PMDataManager::dumpPreservedSet(const Pass *P) const { + if (PassDebugging < Details) + return; + + AnalysisUsage analysisUsage; + P->getAnalysisUsage(analysisUsage); + dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet()); +} + +void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P, + const AnalysisUsage::VectorType &Set) const { + assert(PassDebugging >= Details); + if (Set.empty()) + return; + dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; + for (unsigned i = 0; i != Set.size(); ++i) { + if (i) dbgs() << ','; + const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]); + if (!PInf) { + // Some preserved passes, such as AliasAnalysis, may not be initialized by + // all drivers. + dbgs() << " Uninitialized Pass"; + continue; + } + dbgs() << ' ' << PInf->getPassName(); + } + dbgs() << '\n'; +} + +/// Add RequiredPass into list of lower level passes required by pass P. +/// RequiredPass is run on the fly by Pass Manager when P requests it +/// through getAnalysis interface. +/// This should be handled by specific pass manager. +void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { + if (TPM) { + TPM->dumpArguments(); + TPM->dumpPasses(); + } + + // Module Level pass may required Function Level analysis info + // (e.g. dominator info). Pass manager uses on the fly function pass manager + // to provide this on demand. In that case, in Pass manager terminology, + // module level pass is requiring lower level analysis info managed by + // lower level pass manager. + + // When Pass manager is not able to order required analysis info, Pass manager + // checks whether any lower level manager will be able to provide this + // analysis info on demand or not. +#ifndef NDEBUG + dbgs() << "Unable to schedule '" << RequiredPass->getPassName(); + dbgs() << "' required by '" << P->getPassName() << "'\n"; +#endif + llvm_unreachable("Unable to schedule pass"); +} + +Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) { + llvm_unreachable("Unable to find on the fly pass"); +} + +// Destructor +PMDataManager::~PMDataManager() { + for (SmallVectorImpl::iterator I = PassVector.begin(), + E = PassVector.end(); I != E; ++I) + delete *I; +} + +//===----------------------------------------------------------------------===// +// NOTE: Is this the right place to define this method ? +// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist. +Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const { + return PM.findAnalysisPass(ID, dir); +} + +Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI, + Function &F) { + return PM.getOnTheFlyPass(P, AnalysisPI, F); +} + +//===----------------------------------------------------------------------===// +// BBPassManager implementation + +/// Execute all of the passes scheduled for execution by invoking +/// runOnBasicBlock method. Keep track of whether any of the passes modifies +/// the function, and if so, return true. +bool BBPassManager::runOnFunction(Function &F) { + if (F.isDeclaration()) + return false; + + bool Changed = doInitialization(F); + + for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + BasicBlockPass *BP = getContainedPass(Index); + bool LocalChanged = false; + + dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName()); + dumpRequiredSet(BP); + + initializeAnalysisImpl(BP); + + { + // If the pass crashes, remember this. + PassManagerPrettyStackEntry X(BP, *I); + TimeRegion PassTimer(getPassTimer(BP)); + + LocalChanged |= BP->runOnBasicBlock(*I); + } + + Changed |= LocalChanged; + if (LocalChanged) + dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG, + I->getName()); + dumpPreservedSet(BP); + + verifyPreservedAnalysis(BP); + removeNotPreservedAnalysis(BP); + recordAvailableAnalysis(BP); + removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG); + } + + return doFinalization(F) || Changed; +} + +// Implement doInitialization and doFinalization +bool BBPassManager::doInitialization(Module &M) { + bool Changed = false; + + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) + Changed |= getContainedPass(Index)->doInitialization(M); + + return Changed; +} + +bool BBPassManager::doFinalization(Module &M) { + bool Changed = false; + + for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index) + Changed |= getContainedPass(Index)->doFinalization(M); + + return Changed; +} + +bool BBPassManager::doInitialization(Function &F) { + bool Changed = false; + + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + BasicBlockPass *BP = getContainedPass(Index); + Changed |= BP->doInitialization(F); + } + + return Changed; +} + +bool BBPassManager::doFinalization(Function &F) { + bool Changed = false; + + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + BasicBlockPass *BP = getContainedPass(Index); + Changed |= BP->doFinalization(F); + } + + return Changed; +} + + +//===----------------------------------------------------------------------===// +// FunctionPassManager implementation + +/// Create new Function pass manager +FunctionPassManager::FunctionPassManager(Module *m) : M(m) { + FPM = new FunctionPassManagerImpl(); + // FPM is the top level manager. + FPM->setTopLevelManager(FPM); + + AnalysisResolver *AR = new AnalysisResolver(*FPM); + FPM->setResolver(AR); +} + +FunctionPassManager::~FunctionPassManager() { + delete FPM; +} + +/// add - Add a pass to the queue of passes to run. This passes +/// ownership of the Pass to the PassManager. When the +/// PassManager_X is destroyed, the pass will be destroyed as well, so +/// there is no need to delete the pass. (TODO delete passes.) +/// This implies that all passes MUST be allocated with 'new'. +void FunctionPassManager::add(Pass *P) { + FPM->add(P); +} + +/// run - Execute all of the passes scheduled for execution. Keep +/// track of whether any of the passes modifies the function, and if +/// so, return true. +/// +bool FunctionPassManager::run(Function &F) { + if (F.isMaterializable()) { + std::string errstr; + if (F.Materialize(&errstr)) + report_fatal_error("Error reading bitcode file: " + Twine(errstr)); + } + return FPM->run(F); +} + + +/// doInitialization - Run all of the initializers for the function passes. +/// +bool FunctionPassManager::doInitialization() { + return FPM->doInitialization(*M); +} + +/// doFinalization - Run all of the finalizers for the function passes. +/// +bool FunctionPassManager::doFinalization() { + return FPM->doFinalization(*M); +} + +//===----------------------------------------------------------------------===// +// FunctionPassManagerImpl implementation +// +bool FunctionPassManagerImpl::doInitialization(Module &M) { + bool Changed = false; + + dumpArguments(); + dumpPasses(); + + SmallVectorImpl& IPV = getImmutablePasses(); + for (SmallVectorImpl::const_iterator I = IPV.begin(), + E = IPV.end(); I != E; ++I) { + Changed |= (*I)->doInitialization(M); + } + + for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) + Changed |= getContainedManager(Index)->doInitialization(M); + + return Changed; +} + +bool FunctionPassManagerImpl::doFinalization(Module &M) { + bool Changed = false; + + for (int Index = getNumContainedManagers() - 1; Index >= 0; --Index) + Changed |= getContainedManager(Index)->doFinalization(M); + + SmallVectorImpl& IPV = getImmutablePasses(); + for (SmallVectorImpl::const_iterator I = IPV.begin(), + E = IPV.end(); I != E; ++I) { + Changed |= (*I)->doFinalization(M); + } + + return Changed; +} + +/// cleanup - After running all passes, clean up pass manager cache. +void FPPassManager::cleanup() { + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + FunctionPass *FP = getContainedPass(Index); + AnalysisResolver *AR = FP->getResolver(); + assert(AR && "Analysis Resolver is not set"); + AR->clearAnalysisImpls(); + } +} + +void FunctionPassManagerImpl::releaseMemoryOnTheFly() { + if (!wasRun) + return; + for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) { + FPPassManager *FPPM = getContainedManager(Index); + for (unsigned Index = 0; Index < FPPM->getNumContainedPasses(); ++Index) { + FPPM->getContainedPass(Index)->releaseMemory(); + } + } + wasRun = false; +} + +// Execute all the passes managed by this top level manager. +// Return true if any function is modified by a pass. +bool FunctionPassManagerImpl::run(Function &F) { + bool Changed = false; + TimingInfo::createTheTimeInfo(); + + initializeAllAnalysisInfo(); + for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) + Changed |= getContainedManager(Index)->runOnFunction(F); + + for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) + getContainedManager(Index)->cleanup(); + + wasRun = true; + return Changed; +} + +//===----------------------------------------------------------------------===// +// FPPassManager implementation + +char FPPassManager::ID = 0; +/// Print passes managed by this manager +void FPPassManager::dumpPassStructure(unsigned Offset) { + dbgs().indent(Offset*2) << "FunctionPass Manager\n"; + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + FunctionPass *FP = getContainedPass(Index); + FP->dumpPassStructure(Offset + 1); + dumpLastUses(FP, Offset+1); + } +} + + +/// Execute all of the passes scheduled for execution by invoking +/// runOnFunction method. Keep track of whether any of the passes modifies +/// the function, and if so, return true. +bool FPPassManager::runOnFunction(Function &F) { + if (F.isDeclaration()) + return false; + + bool Changed = false; + + // Collect inherited analysis from Module level pass manager. + populateInheritedAnalysis(TPM->activeStack); + + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + FunctionPass *FP = getContainedPass(Index); + bool LocalChanged = false; + + dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName()); + dumpRequiredSet(FP); + + initializeAnalysisImpl(FP); + + { + PassManagerPrettyStackEntry X(FP, F); + TimeRegion PassTimer(getPassTimer(FP)); + + LocalChanged |= FP->runOnFunction(F); + } + + Changed |= LocalChanged; + if (LocalChanged) + dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName()); + dumpPreservedSet(FP); + + verifyPreservedAnalysis(FP); + removeNotPreservedAnalysis(FP); + recordAvailableAnalysis(FP); + removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG); + } + return Changed; +} + +bool FPPassManager::runOnModule(Module &M) { + bool Changed = false; + + for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) + Changed |= runOnFunction(*I); + + return Changed; +} + +bool FPPassManager::doInitialization(Module &M) { + bool Changed = false; + + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) + Changed |= getContainedPass(Index)->doInitialization(M); + + return Changed; +} + +bool FPPassManager::doFinalization(Module &M) { + bool Changed = false; + + for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index) + Changed |= getContainedPass(Index)->doFinalization(M); + + return Changed; +} + +//===----------------------------------------------------------------------===// +// MPPassManager implementation + +/// Execute all of the passes scheduled for execution by invoking +/// runOnModule method. Keep track of whether any of the passes modifies +/// the module, and if so, return true. +bool +MPPassManager::runOnModule(Module &M) { + bool Changed = false; + + // Initialize on-the-fly passes + for (std::map::iterator + I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end(); + I != E; ++I) { + FunctionPassManagerImpl *FPP = I->second; + Changed |= FPP->doInitialization(M); + } + + // Initialize module passes + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) + Changed |= getContainedPass(Index)->doInitialization(M); + + for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { + ModulePass *MP = getContainedPass(Index); + bool LocalChanged = false; + + dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier()); + dumpRequiredSet(MP); + + initializeAnalysisImpl(MP); + + { + PassManagerPrettyStackEntry X(MP, M); + TimeRegion PassTimer(getPassTimer(MP)); + + LocalChanged |= MP->runOnModule(M); + } + + Changed |= LocalChanged; + if (LocalChanged) + dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG, + M.getModuleIdentifier()); + dumpPreservedSet(MP); + + verifyPreservedAnalysis(MP); + removeNotPreservedAnalysis(MP); + recordAvailableAnalysis(MP); + removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG); + } + + // Finalize module passes + for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index) + Changed |= getContainedPass(Index)->doFinalization(M); + + // Finalize on-the-fly passes + for (std::map::iterator + I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end(); + I != E; ++I) { + FunctionPassManagerImpl *FPP = I->second; + // We don't know when is the last time an on-the-fly pass is run, + // so we need to releaseMemory / finalize here + FPP->releaseMemoryOnTheFly(); + Changed |= FPP->doFinalization(M); + } + + return Changed; +} + +/// Add RequiredPass into list of lower level passes required by pass P. +/// RequiredPass is run on the fly by Pass Manager when P requests it +/// through getAnalysis interface. +void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { + assert(P->getPotentialPassManagerType() == PMT_ModulePassManager && + "Unable to handle Pass that requires lower level Analysis pass"); + assert((P->getPotentialPassManagerType() < + RequiredPass->getPotentialPassManagerType()) && + "Unable to handle Pass that requires lower level Analysis pass"); + + FunctionPassManagerImpl *FPP = OnTheFlyManagers[P]; + if (!FPP) { + FPP = new FunctionPassManagerImpl(); + // FPP is the top level manager. + FPP->setTopLevelManager(FPP); + + OnTheFlyManagers[P] = FPP; + } + FPP->add(RequiredPass); + + // Register P as the last user of RequiredPass. + if (RequiredPass) { + SmallVector LU; + LU.push_back(RequiredPass); + FPP->setLastUser(LU, P); + } +} + +/// Return function pass corresponding to PassInfo PI, that is +/// required by module pass MP. Instantiate analysis pass, by using +/// its runOnFunction() for function F. +Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){ + FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP]; + assert(FPP && "Unable to find on the fly pass"); + + FPP->releaseMemoryOnTheFly(); + FPP->run(F); + return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI); +} + + +//===----------------------------------------------------------------------===// +// PassManagerImpl implementation + +// +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the module, and if so, return true. +bool PassManagerImpl::run(Module &M) { + bool Changed = false; + TimingInfo::createTheTimeInfo(); + + dumpArguments(); + dumpPasses(); + + SmallVectorImpl& IPV = getImmutablePasses(); + for (SmallVectorImpl::const_iterator I = IPV.begin(), + E = IPV.end(); I != E; ++I) { + Changed |= (*I)->doInitialization(M); + } + + initializeAllAnalysisInfo(); + for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) + Changed |= getContainedManager(Index)->runOnModule(M); + + for (SmallVectorImpl::const_iterator I = IPV.begin(), + E = IPV.end(); I != E; ++I) { + Changed |= (*I)->doFinalization(M); + } + + return Changed; +} + +//===----------------------------------------------------------------------===// +// PassManager implementation + +/// Create new pass manager +PassManager::PassManager() { + PM = new PassManagerImpl(); + // PM is the top level manager + PM->setTopLevelManager(PM); +} + +PassManager::~PassManager() { + delete PM; +} + +/// add - Add a pass to the queue of passes to run. This passes ownership of +/// the Pass to the PassManager. When the PassManager is destroyed, the pass +/// will be destroyed as well, so there is no need to delete the pass. This +/// implies that all passes MUST be allocated with 'new'. +void PassManager::add(Pass *P) { + PM->add(P); +} + +/// run - Execute all of the passes scheduled for execution. Keep track of +/// whether any of the passes modifies the module, and if so, return true. +bool PassManager::run(Module &M) { + return PM->run(M); +} + +//===----------------------------------------------------------------------===// +// TimingInfo implementation + +bool llvm::TimePassesIsEnabled = false; +static cl::opt +EnableTiming("time-passes", cl::location(TimePassesIsEnabled), + cl::desc("Time each pass, printing elapsed time for each on exit")); + +// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to +// a non null value (if the -time-passes option is enabled) or it leaves it +// null. It may be called multiple times. +void TimingInfo::createTheTimeInfo() { + if (!TimePassesIsEnabled || TheTimeInfo) return; + + // Constructed the first time this is called, iff -time-passes is enabled. + // This guarantees that the object will be constructed before static globals, + // thus it will be destroyed before them. + static ManagedStatic TTI; + TheTimeInfo = &*TTI; +} + +/// If TimingInfo is enabled then start pass timer. +Timer *llvm::getPassTimer(Pass *P) { + if (TheTimeInfo) + return TheTimeInfo->getPassTimer(P); + return 0; +} + +//===----------------------------------------------------------------------===// +// PMStack implementation +// + +// Pop Pass Manager from the stack and clear its analysis info. +void PMStack::pop() { + + PMDataManager *Top = this->top(); + Top->initializeAnalysisInfo(); + + S.pop_back(); +} + +// Push PM on the stack and set its top level manager. +void PMStack::push(PMDataManager *PM) { + assert(PM && "Unable to push. Pass Manager expected"); + assert(PM->getDepth()==0 && "Pass Manager depth set too early"); + + if (!this->empty()) { + assert(PM->getPassManagerType() > this->top()->getPassManagerType() + && "pushing bad pass manager to PMStack"); + PMTopLevelManager *TPM = this->top()->getTopLevelManager(); + + assert(TPM && "Unable to find top level manager"); + TPM->addIndirectPassManager(PM); + PM->setTopLevelManager(TPM); + PM->setDepth(this->top()->getDepth()+1); + } else { + assert((PM->getPassManagerType() == PMT_ModulePassManager + || PM->getPassManagerType() == PMT_FunctionPassManager) + && "pushing bad pass manager to PMStack"); + PM->setDepth(1); + } + + S.push_back(PM); +} + +// Dump content of the pass manager stack. +void PMStack::dump() const { + for (std::vector::const_iterator I = S.begin(), + E = S.end(); I != E; ++I) + dbgs() << (*I)->getAsPass()->getPassName() << ' '; + + if (!S.empty()) + dbgs() << '\n'; +} + +/// Find appropriate Module Pass Manager in the PM Stack and +/// add self into that manager. +void ModulePass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + // Find Module Pass Manager + while (!PMS.empty()) { + PassManagerType TopPMType = PMS.top()->getPassManagerType(); + if (TopPMType == PreferredType) + break; // We found desired pass manager + else if (TopPMType > PMT_ModulePassManager) + PMS.pop(); // Pop children pass managers + else + break; + } + assert(!PMS.empty() && "Unable to find appropriate Pass Manager"); + PMS.top()->add(this); +} + +/// Find appropriate Function Pass Manager or Call Graph Pass Manager +/// in the PM Stack and add self into that manager. +void FunctionPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + + // Find Function Pass Manager + while (!PMS.empty()) { + if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager) + PMS.pop(); + else + break; + } + + // Create new Function Pass Manager if needed. + FPPassManager *FPP; + if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) { + FPP = (FPPassManager *)PMS.top(); + } else { + assert(!PMS.empty() && "Unable to create Function Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Function Pass Manager + FPP = new FPPassManager(); + FPP->populateInheritedAnalysis(PMS); + + // [2] Set up new manager's top level manager + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(FPP); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + FPP->assignPassManager(PMS, PMD->getPassManagerType()); + + // [4] Push new manager into PMS + PMS.push(FPP); + } + + // Assign FPP as the manager of this pass. + FPP->add(this); +} + +/// Find appropriate Basic Pass Manager or Call Graph Pass Manager +/// in the PM Stack and add self into that manager. +void BasicBlockPass::assignPassManager(PMStack &PMS, + PassManagerType PreferredType) { + BBPassManager *BBP; + + // Basic Pass Manager is a leaf pass manager. It does not handle + // any other pass manager. + if (!PMS.empty() && + PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) { + BBP = (BBPassManager *)PMS.top(); + } else { + // If leaf manager is not Basic Block Pass manager then create new + // basic Block Pass manager. + assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager"); + PMDataManager *PMD = PMS.top(); + + // [1] Create new Basic Block Manager + BBP = new BBPassManager(); + + // [2] Set up new manager's top level manager + // Basic Block Pass Manager does not live by itself + PMTopLevelManager *TPM = PMD->getTopLevelManager(); + TPM->addIndirectPassManager(BBP); + + // [3] Assign manager to manage this new manager. This may create + // and push new managers into PMS + BBP->assignPassManager(PMS, PreferredType); + + // [4] Push new manager into PMS + PMS.push(BBP); + } + + // Assign BBP as the manager of this pass. + BBP->add(this); +} + +PassManagerBase::~PassManagerBase() {} diff --git a/contrib/llvm/lib/IR/Metadata.cpp b/contrib/llvm/lib/IR/Metadata.cpp index 6a6b7af..a32d25c 100644 --- a/contrib/llvm/lib/IR/Metadata.cpp +++ b/contrib/llvm/lib/IR/Metadata.cpp @@ -65,7 +65,7 @@ class MDNodeOperand : public CallbackVH { public: MDNodeOperand(Value *V) : CallbackVH(V) {} - ~MDNodeOperand() {} + virtual ~MDNodeOperand(); void set(Value *V) { unsigned IsFirst = this->getValPtrInt(); @@ -82,6 +82,8 @@ public: }; } // end namespace llvm. +// Provide out-of-line definition to prevent weak vtable. +MDNodeOperand::~MDNodeOperand() {} void MDNodeOperand::deleted() { getParent()->replaceOperand(this, 0); @@ -422,7 +424,7 @@ static bool canBeMerged(const ConstantRange &A, const ConstantRange &B) { return !A.intersectWith(B).isEmptySet() || isContiguous(A, B); } -static bool tryMergeRange(SmallVector &EndPoints, ConstantInt *Low, +static bool tryMergeRange(SmallVectorImpl &EndPoints, ConstantInt *Low, ConstantInt *High) { ConstantRange NewRange(Low->getValue(), High->getValue()); unsigned Size = EndPoints.size(); @@ -439,7 +441,7 @@ static bool tryMergeRange(SmallVector &EndPoints, ConstantInt *Low, return false; } -static void addRange(SmallVector &EndPoints, ConstantInt *Low, +static void addRange(SmallVectorImpl &EndPoints, ConstantInt *Low, ConstantInt *High) { if (!EndPoints.empty()) if (tryMergeRange(EndPoints, Low, High)) diff --git a/contrib/llvm/lib/IR/Module.cpp b/contrib/llvm/lib/IR/Module.cpp index 8affcc9..4f240c7 100644 --- a/contrib/llvm/lib/IR/Module.cpp +++ b/contrib/llvm/lib/IR/Module.cpp @@ -168,23 +168,6 @@ Constant *Module::getOrInsertFunction(StringRef Name, return F; } -Constant *Module::getOrInsertTargetIntrinsic(StringRef Name, - FunctionType *Ty, - AttributeSet AttributeList) { - // See if we have a definition for the specified function already. - GlobalValue *F = getNamedValue(Name); - if (F == 0) { - // Nope, add it - Function *New = Function::Create(Ty, GlobalVariable::ExternalLinkage, Name); - New->setAttributes(AttributeList); - FunctionList.push_back(New); - return New; // Return the new prototype. - } - - // Otherwise, we just found the existing function or a prototype. - return F; -} - Constant *Module::getOrInsertFunction(StringRef Name, FunctionType *Ty) { return getOrInsertFunction(Name, Ty, AttributeSet()); @@ -250,8 +233,7 @@ Function *Module::getFunction(StringRef Name) const { /// If AllowLocal is set to true, this function will return types that /// have an local. By default, these types are not returned. /// -GlobalVariable *Module::getGlobalVariable(StringRef Name, - bool AllowLocal) const { +GlobalVariable *Module::getGlobalVariable(StringRef Name, bool AllowLocal) { if (GlobalVariable *Result = dyn_cast_or_null(getNamedValue(Name))) if (AllowLocal || !Result->hasLocalLinkage()) @@ -263,7 +245,7 @@ GlobalVariable *Module::getGlobalVariable(StringRef Name, /// 1. If it does not exist, add a declaration of the global and return it. /// 2. Else, the global exists but has the wrong type: return the function /// with a constantexpr cast to the right type. -/// 3. Finally, if the existing global is the correct delclaration, return the +/// 3. Finally, if the existing global is the correct declaration, return the /// existing global. Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) { // See if we have a definition for the specified global already. @@ -278,8 +260,10 @@ Constant *Module::getOrInsertGlobal(StringRef Name, Type *Ty) { // If the variable exists but has the wrong type, return a bitcast to the // right type. - if (GV->getType() != PointerType::getUnqual(Ty)) - return ConstantExpr::getBitCast(GV, PointerType::getUnqual(Ty)); + Type *GVTy = GV->getType(); + PointerType *PTy = PointerType::get(Ty, GVTy->getPointerAddressSpace()); + if (GVTy != PTy) + return ConstantExpr::getBitCast(GV, PTy); // Otherwise, we just found the existing function or a prototype. return GV; @@ -334,12 +318,30 @@ getModuleFlagsMetadata(SmallVectorImpl &Flags) const { for (unsigned i = 0, e = ModFlags->getNumOperands(); i != e; ++i) { MDNode *Flag = ModFlags->getOperand(i); - ConstantInt *Behavior = cast(Flag->getOperand(0)); - MDString *Key = cast(Flag->getOperand(1)); - Value *Val = Flag->getOperand(2); - Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()), - Key, Val)); + if (Flag->getNumOperands() >= 3 && isa(Flag->getOperand(0)) && + isa(Flag->getOperand(1))) { + // Check the operands of the MDNode before accessing the operands. + // The verifier will actually catch these failures. + ConstantInt *Behavior = cast(Flag->getOperand(0)); + MDString *Key = cast(Flag->getOperand(1)); + Value *Val = Flag->getOperand(2); + Flags.push_back(ModuleFlagEntry(ModFlagBehavior(Behavior->getZExtValue()), + Key, Val)); + } + } +} + +/// Return the corresponding value if Key appears in module flags, otherwise +/// return null. +Value *Module::getModuleFlag(StringRef Key) const { + SmallVector ModuleFlags; + getModuleFlagsMetadata(ModuleFlags); + for (unsigned I = 0, E = ModuleFlags.size(); I < E; ++I) { + const ModuleFlagEntry &MFE = ModuleFlags[I]; + if (Key == MFE.Key->getString()) + return MFE.Val; } + return 0; } /// getModuleFlagsMetadata - Returns the NamedMDNode in the module that @@ -404,9 +406,15 @@ bool Module::isDematerializable(const GlobalValue *GV) const { } bool Module::Materialize(GlobalValue *GV, std::string *ErrInfo) { - if (Materializer) - return Materializer->Materialize(GV, ErrInfo); - return false; + if (!Materializer) + return false; + + error_code EC = Materializer->Materialize(GV); + if (!EC) + return false; + if (ErrInfo) + *ErrInfo = EC.message(); + return true; } void Module::Dematerialize(GlobalValue *GV) { @@ -417,7 +425,12 @@ void Module::Dematerialize(GlobalValue *GV) { bool Module::MaterializeAll(std::string *ErrInfo) { if (!Materializer) return false; - return Materializer->MaterializeModule(this, ErrInfo); + error_code EC = Materializer->MaterializeModule(this); + if (!EC) + return false; + if (ErrInfo) + *ErrInfo = EC.message(); + return true; } bool Module::MaterializeAllPermanently(std::string *ErrInfo) { diff --git a/contrib/llvm/lib/IR/PassManager.cpp b/contrib/llvm/lib/IR/PassManager.cpp index 387094a..966af7d 100644 --- a/contrib/llvm/lib/IR/PassManager.cpp +++ b/contrib/llvm/lib/IR/PassManager.cpp @@ -1,4 +1,4 @@ -//===- PassManager.cpp - LLVM Pass Infrastructure Implementation ----------===// +//===- PassManager.h - Infrastructure for managing & running IR passes ----===// // // The LLVM Compiler Infrastructure // @@ -6,1907 +6,152 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This file implements the LLVM Pass Manager infrastructure. -// -//===----------------------------------------------------------------------===// +#include "llvm/IR/PassManager.h" +#include "llvm/ADT/STLExtras.h" -#include "llvm/PassManagers.h" -#include "llvm/Assembly/PrintModulePass.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/IR/Module.h" -#include "llvm/PassManager.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/PassNameParser.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include -#include using namespace llvm; -// See PassManagers.h for Pass Manager infrastructure overview. - -namespace llvm { - -//===----------------------------------------------------------------------===// -// Pass debugging information. Often it is useful to find out what pass is -// running when a crash occurs in a utility. When this library is compiled with -// debugging on, a command line option (--debug-pass) is enabled that causes the -// pass name to be printed before it executes. -// - -// Different debug levels that can be enabled... -enum PassDebugLevel { - Disabled, Arguments, Structure, Executions, Details -}; - -static cl::opt -PassDebugging("debug-pass", cl::Hidden, - cl::desc("Print PassManager debugging information"), - cl::values( - clEnumVal(Disabled , "disable debug output"), - clEnumVal(Arguments , "print pass arguments to pass to 'opt'"), - clEnumVal(Structure , "print pass structure before run()"), - clEnumVal(Executions, "print pass name before it is executed"), - clEnumVal(Details , "print pass details when it is executed"), - clEnumValEnd)); - -typedef llvm::cl::list -PassOptionList; - -// Print IR out before/after specified passes. -static PassOptionList -PrintBefore("print-before", - llvm::cl::desc("Print IR before specified passes"), - cl::Hidden); - -static PassOptionList -PrintAfter("print-after", - llvm::cl::desc("Print IR after specified passes"), - cl::Hidden); - -static cl::opt -PrintBeforeAll("print-before-all", - llvm::cl::desc("Print IR before each pass"), - cl::init(false)); -static cl::opt -PrintAfterAll("print-after-all", - llvm::cl::desc("Print IR after each pass"), - cl::init(false)); - -/// This is a helper to determine whether to print IR before or -/// after a pass. - -static bool ShouldPrintBeforeOrAfterPass(const PassInfo *PI, - PassOptionList &PassesToPrint) { - for (unsigned i = 0, ie = PassesToPrint.size(); i < ie; ++i) { - const llvm::PassInfo *PassInf = PassesToPrint[i]; - if (PassInf) - if (PassInf->getPassArgument() == PI->getPassArgument()) { - return true; - } - } - return false; -} - -/// This is a utility to check whether a pass should have IR dumped -/// before it. -static bool ShouldPrintBeforePass(const PassInfo *PI) { - return PrintBeforeAll || ShouldPrintBeforeOrAfterPass(PI, PrintBefore); -} - -/// This is a utility to check whether a pass should have IR dumped -/// after it. -static bool ShouldPrintAfterPass(const PassInfo *PI) { - return PrintAfterAll || ShouldPrintBeforeOrAfterPass(PI, PrintAfter); -} - -} // End of llvm namespace - -/// isPassDebuggingExecutionsOrMore - Return true if -debug-pass=Executions -/// or higher is specified. -bool PMDataManager::isPassDebuggingExecutionsOrMore() const { - return PassDebugging >= Executions; -} - - - - -void PassManagerPrettyStackEntry::print(raw_ostream &OS) const { - if (V == 0 && M == 0) - OS << "Releasing pass '"; - else - OS << "Running pass '"; - - OS << P->getPassName() << "'"; - - if (M) { - OS << " on module '" << M->getModuleIdentifier() << "'.\n"; - return; - } - if (V == 0) { - OS << '\n'; - return; - } - - OS << " on "; - if (isa(V)) - OS << "function"; - else if (isa(V)) - OS << "basic block"; - else - OS << "value"; - - OS << " '"; - WriteAsOperand(OS, V, /*PrintTy=*/false, M); - OS << "'\n"; -} - - -namespace { - -//===----------------------------------------------------------------------===// -// BBPassManager -// -/// BBPassManager manages BasicBlockPass. It batches all the -/// pass together and sequence them to process one basic block before -/// processing next basic block. -class BBPassManager : public PMDataManager, public FunctionPass { - -public: - static char ID; - explicit BBPassManager() - : PMDataManager(), FunctionPass(ID) {} - - /// Execute all of the passes scheduled for execution. Keep track of - /// whether any of the passes modifies the function, and if so, return true. - bool runOnFunction(Function &F); - - /// Pass Manager itself does not invalidate any analysis info. - void getAnalysisUsage(AnalysisUsage &Info) const { - Info.setPreservesAll(); - } - - bool doInitialization(Module &M); - bool doInitialization(Function &F); - bool doFinalization(Module &M); - bool doFinalization(Function &F); - - virtual PMDataManager *getAsPMDataManager() { return this; } - virtual Pass *getAsPass() { return this; } - - virtual const char *getPassName() const { - return "BasicBlock Pass Manager"; - } - - // Print passes managed by this manager - void dumpPassStructure(unsigned Offset) { - llvm::dbgs().indent(Offset*2) << "BasicBlockPass Manager\n"; - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - BasicBlockPass *BP = getContainedPass(Index); - BP->dumpPassStructure(Offset + 1); - dumpLastUses(BP, Offset+1); - } - } - - BasicBlockPass *getContainedPass(unsigned N) { - assert(N < PassVector.size() && "Pass number out of range!"); - BasicBlockPass *BP = static_cast(PassVector[N]); - return BP; - } - - virtual PassManagerType getPassManagerType() const { - return PMT_BasicBlockPassManager; - } -}; - -char BBPassManager::ID = 0; -} - -namespace llvm { - -//===----------------------------------------------------------------------===// -// FunctionPassManagerImpl -// -/// FunctionPassManagerImpl manages FPPassManagers -class FunctionPassManagerImpl : public Pass, - public PMDataManager, - public PMTopLevelManager { - virtual void anchor(); -private: - bool wasRun; -public: - static char ID; - explicit FunctionPassManagerImpl() : - Pass(PT_PassManager, ID), PMDataManager(), - PMTopLevelManager(new FPPassManager()), wasRun(false) {} - - /// add - Add a pass to the queue of passes to run. This passes ownership of - /// the Pass to the PassManager. When the PassManager is destroyed, the pass - /// will be destroyed as well, so there is no need to delete the pass. This - /// implies that all passes MUST be allocated with 'new'. - void add(Pass *P) { - schedulePass(P); - } - - /// createPrinterPass - Get a function printer pass. - Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const { - return createPrintFunctionPass(Banner, &O); - } - - // Prepare for running an on the fly pass, freeing memory if needed - // from a previous run. - void releaseMemoryOnTheFly(); - - /// run - Execute all of the passes scheduled for execution. Keep track of - /// whether any of the passes modifies the module, and if so, return true. - bool run(Function &F); - - /// doInitialization - Run all of the initializers for the function passes. - /// - bool doInitialization(Module &M); - - /// doFinalization - Run all of the finalizers for the function passes. - /// - bool doFinalization(Module &M); - - - virtual PMDataManager *getAsPMDataManager() { return this; } - virtual Pass *getAsPass() { return this; } - virtual PassManagerType getTopLevelPassManagerType() { - return PMT_FunctionPassManager; - } - - /// Pass Manager itself does not invalidate any analysis info. - void getAnalysisUsage(AnalysisUsage &Info) const { - Info.setPreservesAll(); - } - - FPPassManager *getContainedManager(unsigned N) { - assert(N < PassManagers.size() && "Pass number out of range!"); - FPPassManager *FP = static_cast(PassManagers[N]); - return FP; - } -}; - -void FunctionPassManagerImpl::anchor() {} - -char FunctionPassManagerImpl::ID = 0; - -//===----------------------------------------------------------------------===// -// MPPassManager -// -/// MPPassManager manages ModulePasses and function pass managers. -/// It batches all Module passes and function pass managers together and -/// sequences them to process one module. -class MPPassManager : public Pass, public PMDataManager { -public: - static char ID; - explicit MPPassManager() : - Pass(PT_PassManager, ID), PMDataManager() { } - - // Delete on the fly managers. - virtual ~MPPassManager() { - for (std::map::iterator - I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end(); - I != E; ++I) { - FunctionPassManagerImpl *FPP = I->second; - delete FPP; - } - } - - /// createPrinterPass - Get a module printer pass. - Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const { - return createPrintModulePass(&O, false, Banner); - } - - /// run - Execute all of the passes scheduled for execution. Keep track of - /// whether any of the passes modifies the module, and if so, return true. - bool runOnModule(Module &M); - - using llvm::Pass::doInitialization; - using llvm::Pass::doFinalization; - - /// doInitialization - Run all of the initializers for the module passes. - /// - bool doInitialization(); - - /// doFinalization - Run all of the finalizers for the module passes. - /// - bool doFinalization(); - - /// Pass Manager itself does not invalidate any analysis info. - void getAnalysisUsage(AnalysisUsage &Info) const { - Info.setPreservesAll(); - } - - /// Add RequiredPass into list of lower level passes required by pass P. - /// RequiredPass is run on the fly by Pass Manager when P requests it - /// through getAnalysis interface. - virtual void addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass); - - /// Return function pass corresponding to PassInfo PI, that is - /// required by module pass MP. Instantiate analysis pass, by using - /// its runOnFunction() for function F. - virtual Pass* getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F); - - virtual const char *getPassName() const { - return "Module Pass Manager"; - } - - virtual PMDataManager *getAsPMDataManager() { return this; } - virtual Pass *getAsPass() { return this; } - - // Print passes managed by this manager - void dumpPassStructure(unsigned Offset) { - llvm::dbgs().indent(Offset*2) << "ModulePass Manager\n"; - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - ModulePass *MP = getContainedPass(Index); - MP->dumpPassStructure(Offset + 1); - std::map::const_iterator I = - OnTheFlyManagers.find(MP); - if (I != OnTheFlyManagers.end()) - I->second->dumpPassStructure(Offset + 2); - dumpLastUses(MP, Offset+1); - } - } - - ModulePass *getContainedPass(unsigned N) { - assert(N < PassVector.size() && "Pass number out of range!"); - return static_cast(PassVector[N]); - } - - virtual PassManagerType getPassManagerType() const { - return PMT_ModulePassManager; - } - - private: - /// Collection of on the fly FPPassManagers. These managers manage - /// function passes that are required by module passes. - std::map OnTheFlyManagers; -}; - -char MPPassManager::ID = 0; -//===----------------------------------------------------------------------===// -// PassManagerImpl -// - -/// PassManagerImpl manages MPPassManagers -class PassManagerImpl : public Pass, - public PMDataManager, - public PMTopLevelManager { - virtual void anchor(); - -public: - static char ID; - explicit PassManagerImpl() : - Pass(PT_PassManager, ID), PMDataManager(), - PMTopLevelManager(new MPPassManager()) {} - - /// add - Add a pass to the queue of passes to run. This passes ownership of - /// the Pass to the PassManager. When the PassManager is destroyed, the pass - /// will be destroyed as well, so there is no need to delete the pass. This - /// implies that all passes MUST be allocated with 'new'. - void add(Pass *P) { - schedulePass(P); - } - - /// createPrinterPass - Get a module printer pass. - Pass *createPrinterPass(raw_ostream &O, const std::string &Banner) const { - return createPrintModulePass(&O, false, Banner); - } - - /// run - Execute all of the passes scheduled for execution. Keep track of - /// whether any of the passes modifies the module, and if so, return true. - bool run(Module &M); - - using llvm::Pass::doInitialization; - using llvm::Pass::doFinalization; - - /// doInitialization - Run all of the initializers for the module passes. - /// - bool doInitialization(); - - /// doFinalization - Run all of the finalizers for the module passes. - /// - bool doFinalization(); - - /// Pass Manager itself does not invalidate any analysis info. - void getAnalysisUsage(AnalysisUsage &Info) const { - Info.setPreservesAll(); - } - - virtual PMDataManager *getAsPMDataManager() { return this; } - virtual Pass *getAsPass() { return this; } - virtual PassManagerType getTopLevelPassManagerType() { - return PMT_ModulePassManager; - } - - MPPassManager *getContainedManager(unsigned N) { - assert(N < PassManagers.size() && "Pass number out of range!"); - MPPassManager *MP = static_cast(PassManagers[N]); - return MP; - } -}; - -void PassManagerImpl::anchor() {} - -char PassManagerImpl::ID = 0; -} // End of llvm namespace - -namespace { - -//===----------------------------------------------------------------------===// -/// TimingInfo Class - This class is used to calculate information about the -/// amount of time each pass takes to execute. This only happens when -/// -time-passes is enabled on the command line. -/// - -static ManagedStatic > TimingInfoMutex; - -class TimingInfo { - DenseMap TimingData; - TimerGroup TG; -public: - // Use 'create' member to get this. - TimingInfo() : TG("... Pass execution timing report ...") {} - - // TimingDtor - Print out information about timing information - ~TimingInfo() { - // Delete all of the timers, which accumulate their info into the - // TimerGroup. - for (DenseMap::iterator I = TimingData.begin(), - E = TimingData.end(); I != E; ++I) - delete I->second; - // TimerGroup is deleted next, printing the report. - } - - // createTheTimeInfo - This method either initializes the TheTimeInfo pointer - // to a non null value (if the -time-passes option is enabled) or it leaves it - // null. It may be called multiple times. - static void createTheTimeInfo(); - - /// getPassTimer - Return the timer for the specified pass if it exists. - Timer *getPassTimer(Pass *P) { - if (P->getAsPMDataManager()) - return 0; - - sys::SmartScopedLock Lock(*TimingInfoMutex); - Timer *&T = TimingData[P]; - if (T == 0) - T = new Timer(P->getPassName(), TG); - return T; - } -}; - -} // End of anon namespace - -static TimingInfo *TheTimeInfo; - -//===----------------------------------------------------------------------===// -// PMTopLevelManager implementation - -/// Initialize top level manager. Create first pass manager. -PMTopLevelManager::PMTopLevelManager(PMDataManager *PMDM) { - PMDM->setTopLevelManager(this); - addPassManager(PMDM); - activeStack.push(PMDM); -} - -/// Set pass P as the last user of the given analysis passes. -void -PMTopLevelManager::setLastUser(ArrayRef AnalysisPasses, Pass *P) { - unsigned PDepth = 0; - if (P->getResolver()) - PDepth = P->getResolver()->getPMDataManager().getDepth(); - - for (SmallVectorImpl::const_iterator I = AnalysisPasses.begin(), - E = AnalysisPasses.end(); I != E; ++I) { - Pass *AP = *I; - LastUser[AP] = P; - - if (P == AP) - continue; - - // Update the last users of passes that are required transitive by AP. - AnalysisUsage *AnUsage = findAnalysisUsage(AP); - const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet(); - SmallVector LastUses; - SmallVector LastPMUses; - for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(), - E = IDs.end(); I != E; ++I) { - Pass *AnalysisPass = findAnalysisPass(*I); - assert(AnalysisPass && "Expected analysis pass to exist."); - AnalysisResolver *AR = AnalysisPass->getResolver(); - assert(AR && "Expected analysis resolver to exist."); - unsigned APDepth = AR->getPMDataManager().getDepth(); - - if (PDepth == APDepth) - LastUses.push_back(AnalysisPass); - else if (PDepth > APDepth) - LastPMUses.push_back(AnalysisPass); - } - - setLastUser(LastUses, P); - - // If this pass has a corresponding pass manager, push higher level - // analysis to this pass manager. - if (P->getResolver()) - setLastUser(LastPMUses, P->getResolver()->getPMDataManager().getAsPass()); - - - // If AP is the last user of other passes then make P last user of - // such passes. - for (DenseMap::iterator LUI = LastUser.begin(), - LUE = LastUser.end(); LUI != LUE; ++LUI) { - if (LUI->second == AP) - // DenseMap iterator is not invalidated here because - // this is just updating existing entries. - LastUser[LUI->first] = P; - } - } -} - -/// Collect passes whose last user is P -void PMTopLevelManager::collectLastUses(SmallVectorImpl &LastUses, - Pass *P) { - DenseMap >::iterator DMI = - InversedLastUser.find(P); - if (DMI == InversedLastUser.end()) - return; - - SmallPtrSet &LU = DMI->second; - for (SmallPtrSet::iterator I = LU.begin(), - E = LU.end(); I != E; ++I) { - LastUses.push_back(*I); - } - +void ModulePassManager::run() { + for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) + if (Passes[Idx]->run(M)) + if (AM) AM->invalidateAll(M); } -AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { - AnalysisUsage *AnUsage = NULL; - DenseMap::iterator DMI = AnUsageMap.find(P); - if (DMI != AnUsageMap.end()) - AnUsage = DMI->second; - else { - AnUsage = new AnalysisUsage(); - P->getAnalysisUsage(*AnUsage); - AnUsageMap[P] = AnUsage; - } - return AnUsage; -} - -/// Schedule pass P for execution. Make sure that passes required by -/// P are run before P is run. Update analysis info maintained by -/// the manager. Remove dead passes. This is a recursive function. -void PMTopLevelManager::schedulePass(Pass *P) { - - // TODO : Allocate function manager for this pass, other wise required set - // may be inserted into previous function manager - - // Give pass a chance to prepare the stage. - P->preparePassManager(activeStack); - - // If P is an analysis pass and it is available then do not - // generate the analysis again. Stale analysis info should not be - // available at this point. - const PassInfo *PI = - PassRegistry::getPassRegistry()->getPassInfo(P->getPassID()); - if (PI && PI->isAnalysis() && findAnalysisPass(P->getPassID())) { - delete P; - return; - } - - AnalysisUsage *AnUsage = findAnalysisUsage(P); - - bool checkAnalysis = true; - while (checkAnalysis) { - checkAnalysis = false; - - const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet(); - for (AnalysisUsage::VectorType::const_iterator I = RequiredSet.begin(), - E = RequiredSet.end(); I != E; ++I) { - - Pass *AnalysisPass = findAnalysisPass(*I); - if (!AnalysisPass) { - const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); - - if (PI == NULL) { - // Pass P is not in the global PassRegistry - dbgs() << "Pass '" << P->getPassName() << "' is not initialized." << "\n"; - dbgs() << "Verify if there is a pass dependency cycle." << "\n"; - dbgs() << "Required Passes:" << "\n"; - for (AnalysisUsage::VectorType::const_iterator I2 = RequiredSet.begin(), - E = RequiredSet.end(); I2 != E && I2 != I; ++I2) { - Pass *AnalysisPass2 = findAnalysisPass(*I2); - if (AnalysisPass2) { - dbgs() << "\t" << AnalysisPass2->getPassName() << "\n"; - } else { - dbgs() << "\t" << "Error: Required pass not found! Possible causes:" << "\n"; - dbgs() << "\t\t" << "- Pass misconfiguration (e.g.: missing macros)" << "\n"; - dbgs() << "\t\t" << "- Corruption of the global PassRegistry" << "\n"; - } - } - } - - assert(PI && "Expected required passes to be initialized"); - AnalysisPass = PI->createPass(); - if (P->getPotentialPassManagerType () == - AnalysisPass->getPotentialPassManagerType()) - // Schedule analysis pass that is managed by the same pass manager. - schedulePass(AnalysisPass); - else if (P->getPotentialPassManagerType () > - AnalysisPass->getPotentialPassManagerType()) { - // Schedule analysis pass that is managed by a new manager. - schedulePass(AnalysisPass); - // Recheck analysis passes to ensure that required analyses that - // are already checked are still available. - checkAnalysis = true; - } else - // Do not schedule this analysis. Lower level analsyis - // passes are run on the fly. - delete AnalysisPass; +bool FunctionPassManager::run(Module *M) { + bool Changed = false; + for (Module::iterator I = M->begin(), E = M->end(); I != E; ++I) + for (unsigned Idx = 0, Size = Passes.size(); Idx != Size; ++Idx) + if (Passes[Idx]->run(I)) { + Changed = true; + if (AM) AM->invalidateAll(I); } - } - } - - // Now all required passes are available. - if (ImmutablePass *IP = P->getAsImmutablePass()) { - // P is a immutable pass and it will be managed by this - // top level manager. Set up analysis resolver to connect them. - PMDataManager *DM = getAsPMDataManager(); - AnalysisResolver *AR = new AnalysisResolver(*DM); - P->setResolver(AR); - DM->initializeAnalysisImpl(P); - addImmutablePass(IP); - DM->recordAvailableAnalysis(IP); - return; - } - - if (PI && !PI->isAnalysis() && ShouldPrintBeforePass(PI)) { - Pass *PP = P->createPrinterPass( - dbgs(), std::string("*** IR Dump Before ") + P->getPassName() + " ***"); - PP->assignPassManager(activeStack, getTopLevelPassManagerType()); - } - - // Add the requested pass to the best available pass manager. - P->assignPassManager(activeStack, getTopLevelPassManagerType()); - - if (PI && !PI->isAnalysis() && ShouldPrintAfterPass(PI)) { - Pass *PP = P->createPrinterPass( - dbgs(), std::string("*** IR Dump After ") + P->getPassName() + " ***"); - PP->assignPassManager(activeStack, getTopLevelPassManagerType()); - } -} - -/// Find the pass that implements Analysis AID. Search immutable -/// passes and all pass managers. If desired pass is not found -/// then return NULL. -Pass *PMTopLevelManager::findAnalysisPass(AnalysisID AID) { - - // Check pass managers - for (SmallVectorImpl::iterator I = PassManagers.begin(), - E = PassManagers.end(); I != E; ++I) - if (Pass *P = (*I)->findAnalysisPass(AID, false)) - return P; - - // Check other pass managers - for (SmallVectorImpl::iterator - I = IndirectPassManagers.begin(), - E = IndirectPassManagers.end(); I != E; ++I) - if (Pass *P = (*I)->findAnalysisPass(AID, false)) - return P; - - // Check the immutable passes. Iterate in reverse order so that we find - // the most recently registered passes first. - for (SmallVector::reverse_iterator I = - ImmutablePasses.rbegin(), E = ImmutablePasses.rend(); I != E; ++I) { - AnalysisID PI = (*I)->getPassID(); - if (PI == AID) - return *I; - - // If Pass not found then check the interfaces implemented by Immutable Pass - const PassInfo *PassInf = - PassRegistry::getPassRegistry()->getPassInfo(PI); - assert(PassInf && "Expected all immutable passes to be initialized"); - const std::vector &ImmPI = - PassInf->getInterfacesImplemented(); - for (std::vector::const_iterator II = ImmPI.begin(), - EE = ImmPI.end(); II != EE; ++II) { - if ((*II)->getTypeInfo() == AID) - return *I; - } - } - - return 0; -} - -// Print passes managed by this top level manager. -void PMTopLevelManager::dumpPasses() const { - - if (PassDebugging < Structure) - return; - - // Print out the immutable passes - for (unsigned i = 0, e = ImmutablePasses.size(); i != e; ++i) { - ImmutablePasses[i]->dumpPassStructure(0); - } - - // Every class that derives from PMDataManager also derives from Pass - // (sometimes indirectly), but there's no inheritance relationship - // between PMDataManager and Pass, so we have to getAsPass to get - // from a PMDataManager* to a Pass*. - for (SmallVector::const_iterator I = PassManagers.begin(), - E = PassManagers.end(); I != E; ++I) - (*I)->getAsPass()->dumpPassStructure(1); -} - -void PMTopLevelManager::dumpArguments() const { - - if (PassDebugging < Arguments) - return; - - dbgs() << "Pass Arguments: "; - for (SmallVector::const_iterator I = - ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I) - if (const PassInfo *PI = - PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) { - assert(PI && "Expected all immutable passes to be initialized"); - if (!PI->isAnalysisGroup()) - dbgs() << " -" << PI->getPassArgument(); - } - for (SmallVector::const_iterator I = PassManagers.begin(), - E = PassManagers.end(); I != E; ++I) - (*I)->dumpPassArguments(); - dbgs() << "\n"; + return Changed; } -void PMTopLevelManager::initializeAllAnalysisInfo() { - for (SmallVectorImpl::iterator I = PassManagers.begin(), - E = PassManagers.end(); I != E; ++I) - (*I)->initializeAnalysisInfo(); +void AnalysisManager::invalidateAll(Function *F) { + assert(F->getParent() == M && "Invalidating a function from another module!"); - // Initailize other pass managers - for (SmallVectorImpl::iterator - I = IndirectPassManagers.begin(), E = IndirectPassManagers.end(); + // First invalidate any module results we still have laying about. + // FIXME: This is a total hack based on the fact that erasure doesn't + // invalidate iteration for DenseMap. + for (ModuleAnalysisResultMapT::iterator I = ModuleAnalysisResults.begin(), + E = ModuleAnalysisResults.end(); I != E; ++I) - (*I)->initializeAnalysisInfo(); - - for (DenseMap::iterator DMI = LastUser.begin(), - DME = LastUser.end(); DMI != DME; ++DMI) { - DenseMap >::iterator InvDMI = - InversedLastUser.find(DMI->second); - if (InvDMI != InversedLastUser.end()) { - SmallPtrSet &L = InvDMI->second; - L.insert(DMI->first); + if (I->second->invalidate(M)) + ModuleAnalysisResults.erase(I); + + // Now clear all the invalidated results associated specifically with this + // function. + SmallVector InvalidatedPassIDs; + FunctionAnalysisResultListT &ResultsList = FunctionAnalysisResultLists[F]; + for (FunctionAnalysisResultListT::iterator I = ResultsList.begin(), + E = ResultsList.end(); + I != E;) + if (I->second->invalidate(F)) { + InvalidatedPassIDs.push_back(I->first); + I = ResultsList.erase(I); } else { - SmallPtrSet L; L.insert(DMI->first); - InversedLastUser[DMI->second] = L; - } - } -} - -/// Destructor -PMTopLevelManager::~PMTopLevelManager() { - for (SmallVectorImpl::iterator I = PassManagers.begin(), - E = PassManagers.end(); I != E; ++I) - delete *I; - - for (SmallVectorImpl::iterator - I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I) - delete *I; - - for (DenseMap::iterator DMI = AnUsageMap.begin(), - DME = AnUsageMap.end(); DMI != DME; ++DMI) - delete DMI->second; -} - -//===----------------------------------------------------------------------===// -// PMDataManager implementation - -/// Augement AvailableAnalysis by adding analysis made available by pass P. -void PMDataManager::recordAvailableAnalysis(Pass *P) { - AnalysisID PI = P->getPassID(); - - AvailableAnalysis[PI] = P; - - assert(!AvailableAnalysis.empty()); - - // This pass is the current implementation of all of the interfaces it - // implements as well. - const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI); - if (PInf == 0) return; - const std::vector &II = PInf->getInterfacesImplemented(); - for (unsigned i = 0, e = II.size(); i != e; ++i) - AvailableAnalysis[II[i]->getTypeInfo()] = P; -} - -// Return true if P preserves high level analysis used by other -// passes managed by this manager -bool PMDataManager::preserveHigherLevelAnalysis(Pass *P) { - AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); - if (AnUsage->getPreservesAll()) - return true; - - const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet(); - for (SmallVectorImpl::iterator I = HigherLevelAnalysis.begin(), - E = HigherLevelAnalysis.end(); I != E; ++I) { - Pass *P1 = *I; - if (P1->getAsImmutablePass() == 0 && - std::find(PreservedSet.begin(), PreservedSet.end(), - P1->getPassID()) == - PreservedSet.end()) - return false; - } - - return true; -} - -/// verifyPreservedAnalysis -- Verify analysis preserved by pass P. -void PMDataManager::verifyPreservedAnalysis(Pass *P) { - // Don't do this unless assertions are enabled. -#ifdef NDEBUG - return; -#endif - AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); - const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet(); - - // Verify preserved analysis - for (AnalysisUsage::VectorType::const_iterator I = PreservedSet.begin(), - E = PreservedSet.end(); I != E; ++I) { - AnalysisID AID = *I; - if (Pass *AP = findAnalysisPass(AID, true)) { - TimeRegion PassTimer(getPassTimer(AP)); - AP->verifyAnalysis(); + ++I; } - } + while (!InvalidatedPassIDs.empty()) + FunctionAnalysisResults.erase( + std::make_pair(InvalidatedPassIDs.pop_back_val(), F)); } -/// Remove Analysis not preserved by Pass P -void PMDataManager::removeNotPreservedAnalysis(Pass *P) { - AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); - if (AnUsage->getPreservesAll()) - return; - - const AnalysisUsage::VectorType &PreservedSet = AnUsage->getPreservedSet(); - for (DenseMap::iterator I = AvailableAnalysis.begin(), - E = AvailableAnalysis.end(); I != E; ) { - DenseMap::iterator Info = I++; - if (Info->second->getAsImmutablePass() == 0 && - std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == - PreservedSet.end()) { - // Remove this analysis - if (PassDebugging >= Details) { - Pass *S = Info->second; - dbgs() << " -- '" << P->getPassName() << "' is not preserving '"; - dbgs() << S->getPassName() << "'\n"; - } - AvailableAnalysis.erase(Info); - } - } - - // Check inherited analysis also. If P is not preserving analysis - // provided by parent manager then remove it here. - for (unsigned Index = 0; Index < PMT_Last; ++Index) { - - if (!InheritedAnalysis[Index]) - continue; - - for (DenseMap::iterator - I = InheritedAnalysis[Index]->begin(), - E = InheritedAnalysis[Index]->end(); I != E; ) { - DenseMap::iterator Info = I++; - if (Info->second->getAsImmutablePass() == 0 && - std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) == - PreservedSet.end()) { - // Remove this analysis - if (PassDebugging >= Details) { - Pass *S = Info->second; - dbgs() << " -- '" << P->getPassName() << "' is not preserving '"; - dbgs() << S->getPassName() << "'\n"; - } - InheritedAnalysis[Index]->erase(Info); +void AnalysisManager::invalidateAll(Module *M) { + // First invalidate any module results we still have laying about. + // FIXME: This is a total hack based on the fact that erasure doesn't + // invalidate iteration for DenseMap. + for (ModuleAnalysisResultMapT::iterator I = ModuleAnalysisResults.begin(), + E = ModuleAnalysisResults.end(); + I != E; ++I) + if (I->second->invalidate(M)) + ModuleAnalysisResults.erase(I); + + // Now walk all of the functions for which there are cached results, and + // attempt to invalidate each of those as the entire module may have changed. + // FIXME: How do we handle functions which have been deleted or RAUWed? + SmallVector InvalidatedPassIDs; + for (FunctionAnalysisResultListMapT::iterator + FI = FunctionAnalysisResultLists.begin(), + FE = FunctionAnalysisResultLists.end(); + FI != FE; ++FI) { + Function *F = FI->first; + FunctionAnalysisResultListT &ResultsList = FI->second; + for (FunctionAnalysisResultListT::iterator I = ResultsList.begin(), + E = ResultsList.end(); + I != E;) + if (I->second->invalidate(F)) { + InvalidatedPassIDs.push_back(I->first); + I = ResultsList.erase(I); + } else { + ++I; } - } + while (!InvalidatedPassIDs.empty()) + FunctionAnalysisResults.erase( + std::make_pair(InvalidatedPassIDs.pop_back_val(), F)); } } -/// Remove analysis passes that are not used any longer -void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg, - enum PassDebuggingString DBG_STR) { - - SmallVector DeadPasses; - - // If this is a on the fly manager then it does not have TPM. - if (!TPM) - return; - - TPM->collectLastUses(DeadPasses, P); +const AnalysisManager::AnalysisResultConcept & +AnalysisManager::getResultImpl(void *PassID, Module *M) { + assert(M == this->M && "Wrong module used when querying the AnalysisManager"); + ModuleAnalysisResultMapT::iterator RI; + bool Inserted; + llvm::tie(RI, Inserted) = ModuleAnalysisResults.insert(std::make_pair( + PassID, polymorphic_ptr >())); - if (PassDebugging >= Details && !DeadPasses.empty()) { - dbgs() << " -*- '" << P->getPassName(); - dbgs() << "' is the last user of following pass instances."; - dbgs() << " Free these instances\n"; + if (Inserted) { + // We don't have a cached result for this result. Look up the pass and run + // it to produce a result, which we then add to the cache. + ModuleAnalysisPassMapT::const_iterator PI = + ModuleAnalysisPasses.find(PassID); + assert(PI != ModuleAnalysisPasses.end() && + "Analysis passes must be registered prior to being queried!"); + RI->second = PI->second->run(M); } - for (SmallVectorImpl::iterator I = DeadPasses.begin(), - E = DeadPasses.end(); I != E; ++I) - freePass(*I, Msg, DBG_STR); + return *RI->second; } -void PMDataManager::freePass(Pass *P, StringRef Msg, - enum PassDebuggingString DBG_STR) { - dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg); +const AnalysisManager::AnalysisResultConcept & +AnalysisManager::getResultImpl(void *PassID, Function *F) { + assert(F->getParent() == M && "Analyzing a function from another module!"); - { - // If the pass crashes releasing memory, remember this. - PassManagerPrettyStackEntry X(P); - TimeRegion PassTimer(getPassTimer(P)); + FunctionAnalysisResultMapT::iterator RI; + bool Inserted; + llvm::tie(RI, Inserted) = FunctionAnalysisResults.insert(std::make_pair( + std::make_pair(PassID, F), FunctionAnalysisResultListT::iterator())); - P->releaseMemory(); + if (Inserted) { + // We don't have a cached result for this result. Look up the pass and run + // it to produce a result, which we then add to the cache. + FunctionAnalysisPassMapT::const_iterator PI = + FunctionAnalysisPasses.find(PassID); + assert(PI != FunctionAnalysisPasses.end() && + "Analysis passes must be registered prior to being queried!"); + FunctionAnalysisResultListT &ResultList = FunctionAnalysisResultLists[F]; + ResultList.push_back(std::make_pair(PassID, PI->second->run(F))); + RI->second = llvm::prior(ResultList.end()); } - AnalysisID PI = P->getPassID(); - if (const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(PI)) { - // Remove the pass itself (if it is not already removed). - AvailableAnalysis.erase(PI); - - // Remove all interfaces this pass implements, for which it is also - // listed as the available implementation. - const std::vector &II = PInf->getInterfacesImplemented(); - for (unsigned i = 0, e = II.size(); i != e; ++i) { - DenseMap::iterator Pos = - AvailableAnalysis.find(II[i]->getTypeInfo()); - if (Pos != AvailableAnalysis.end() && Pos->second == P) - AvailableAnalysis.erase(Pos); - } - } + return *RI->second->second; } -/// Add pass P into the PassVector. Update -/// AvailableAnalysis appropriately if ProcessAnalysis is true. -void PMDataManager::add(Pass *P, bool ProcessAnalysis) { - // This manager is going to manage pass P. Set up analysis resolver - // to connect them. - AnalysisResolver *AR = new AnalysisResolver(*this); - P->setResolver(AR); +void AnalysisManager::invalidateImpl(void *PassID, Module *M) { + assert(M == this->M && "Invalidating a pass over a different module!"); + ModuleAnalysisResults.erase(PassID); +} - // If a FunctionPass F is the last user of ModulePass info M - // then the F's manager, not F, records itself as a last user of M. - SmallVector TransferLastUses; +void AnalysisManager::invalidateImpl(void *PassID, Function *F) { + assert(F->getParent() == M && + "Invalidating a pass over a function from another module!"); - if (!ProcessAnalysis) { - // Add pass - PassVector.push_back(P); + FunctionAnalysisResultMapT::iterator RI = + FunctionAnalysisResults.find(std::make_pair(PassID, F)); + if (RI == FunctionAnalysisResults.end()) return; - } - - // At the moment, this pass is the last user of all required passes. - SmallVector LastUses; - SmallVector RequiredPasses; - SmallVector ReqAnalysisNotAvailable; - - unsigned PDepth = this->getDepth(); - collectRequiredAnalysis(RequiredPasses, - ReqAnalysisNotAvailable, P); - for (SmallVectorImpl::iterator I = RequiredPasses.begin(), - E = RequiredPasses.end(); I != E; ++I) { - Pass *PRequired = *I; - unsigned RDepth = 0; - - assert(PRequired->getResolver() && "Analysis Resolver is not set"); - PMDataManager &DM = PRequired->getResolver()->getPMDataManager(); - RDepth = DM.getDepth(); - - if (PDepth == RDepth) - LastUses.push_back(PRequired); - else if (PDepth > RDepth) { - // Let the parent claim responsibility of last use - TransferLastUses.push_back(PRequired); - // Keep track of higher level analysis used by this manager. - HigherLevelAnalysis.push_back(PRequired); - } else - llvm_unreachable("Unable to accommodate Required Pass"); - } - - // Set P as P's last user until someone starts using P. - // However, if P is a Pass Manager then it does not need - // to record its last user. - if (P->getAsPMDataManager() == 0) - LastUses.push_back(P); - TPM->setLastUser(LastUses, P); - - if (!TransferLastUses.empty()) { - Pass *My_PM = getAsPass(); - TPM->setLastUser(TransferLastUses, My_PM); - TransferLastUses.clear(); - } - - // Now, take care of required analyses that are not available. - for (SmallVectorImpl::iterator - I = ReqAnalysisNotAvailable.begin(), - E = ReqAnalysisNotAvailable.end() ;I != E; ++I) { - const PassInfo *PI = PassRegistry::getPassRegistry()->getPassInfo(*I); - Pass *AnalysisPass = PI->createPass(); - this->addLowerLevelRequiredPass(P, AnalysisPass); - } - - // Take a note of analysis required and made available by this pass. - // Remove the analysis not preserved by this pass - removeNotPreservedAnalysis(P); - recordAvailableAnalysis(P); - - // Add pass - PassVector.push_back(P); -} - - -/// Populate RP with analysis pass that are required by -/// pass P and are available. Populate RP_NotAvail with analysis -/// pass that are required by pass P but are not available. -void PMDataManager::collectRequiredAnalysis(SmallVectorImpl &RP, - SmallVectorImpl &RP_NotAvail, - Pass *P) { - AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); - const AnalysisUsage::VectorType &RequiredSet = AnUsage->getRequiredSet(); - for (AnalysisUsage::VectorType::const_iterator - I = RequiredSet.begin(), E = RequiredSet.end(); I != E; ++I) { - if (Pass *AnalysisPass = findAnalysisPass(*I, true)) - RP.push_back(AnalysisPass); - else - RP_NotAvail.push_back(*I); - } - - const AnalysisUsage::VectorType &IDs = AnUsage->getRequiredTransitiveSet(); - for (AnalysisUsage::VectorType::const_iterator I = IDs.begin(), - E = IDs.end(); I != E; ++I) { - if (Pass *AnalysisPass = findAnalysisPass(*I, true)) - RP.push_back(AnalysisPass); - else - RP_NotAvail.push_back(*I); - } -} - -// All Required analyses should be available to the pass as it runs! Here -// we fill in the AnalysisImpls member of the pass so that it can -// successfully use the getAnalysis() method to retrieve the -// implementations it needs. -// -void PMDataManager::initializeAnalysisImpl(Pass *P) { - AnalysisUsage *AnUsage = TPM->findAnalysisUsage(P); - - for (AnalysisUsage::VectorType::const_iterator - I = AnUsage->getRequiredSet().begin(), - E = AnUsage->getRequiredSet().end(); I != E; ++I) { - Pass *Impl = findAnalysisPass(*I, true); - if (Impl == 0) - // This may be analysis pass that is initialized on the fly. - // If that is not the case then it will raise an assert when it is used. - continue; - AnalysisResolver *AR = P->getResolver(); - assert(AR && "Analysis Resolver is not set"); - AR->addAnalysisImplsPair(*I, Impl); - } -} - -/// Find the pass that implements Analysis AID. If desired pass is not found -/// then return NULL. -Pass *PMDataManager::findAnalysisPass(AnalysisID AID, bool SearchParent) { - - // Check if AvailableAnalysis map has one entry. - DenseMap::const_iterator I = AvailableAnalysis.find(AID); - - if (I != AvailableAnalysis.end()) - return I->second; - - // Search Parents through TopLevelManager - if (SearchParent) - return TPM->findAnalysisPass(AID); - - return NULL; -} - -// Print list of passes that are last used by P. -void PMDataManager::dumpLastUses(Pass *P, unsigned Offset) const{ - - SmallVector LUses; - - // If this is a on the fly manager then it does not have TPM. - if (!TPM) - return; - - TPM->collectLastUses(LUses, P); - - for (SmallVectorImpl::iterator I = LUses.begin(), - E = LUses.end(); I != E; ++I) { - llvm::dbgs() << "--" << std::string(Offset*2, ' '); - (*I)->dumpPassStructure(0); - } -} - -void PMDataManager::dumpPassArguments() const { - for (SmallVectorImpl::const_iterator I = PassVector.begin(), - E = PassVector.end(); I != E; ++I) { - if (PMDataManager *PMD = (*I)->getAsPMDataManager()) - PMD->dumpPassArguments(); - else - if (const PassInfo *PI = - PassRegistry::getPassRegistry()->getPassInfo((*I)->getPassID())) - if (!PI->isAnalysisGroup()) - dbgs() << " -" << PI->getPassArgument(); - } -} - -void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1, - enum PassDebuggingString S2, - StringRef Msg) { - if (PassDebugging < Executions) - return; - dbgs() << (void*)this << std::string(getDepth()*2+1, ' '); - switch (S1) { - case EXECUTION_MSG: - dbgs() << "Executing Pass '" << P->getPassName(); - break; - case MODIFICATION_MSG: - dbgs() << "Made Modification '" << P->getPassName(); - break; - case FREEING_MSG: - dbgs() << " Freeing Pass '" << P->getPassName(); - break; - default: - break; - } - switch (S2) { - case ON_BASICBLOCK_MSG: - dbgs() << "' on BasicBlock '" << Msg << "'...\n"; - break; - case ON_FUNCTION_MSG: - dbgs() << "' on Function '" << Msg << "'...\n"; - break; - case ON_MODULE_MSG: - dbgs() << "' on Module '" << Msg << "'...\n"; - break; - case ON_REGION_MSG: - dbgs() << "' on Region '" << Msg << "'...\n"; - break; - case ON_LOOP_MSG: - dbgs() << "' on Loop '" << Msg << "'...\n"; - break; - case ON_CG_MSG: - dbgs() << "' on Call Graph Nodes '" << Msg << "'...\n"; - break; - default: - break; - } -} - -void PMDataManager::dumpRequiredSet(const Pass *P) const { - if (PassDebugging < Details) - return; - - AnalysisUsage analysisUsage; - P->getAnalysisUsage(analysisUsage); - dumpAnalysisUsage("Required", P, analysisUsage.getRequiredSet()); -} - -void PMDataManager::dumpPreservedSet(const Pass *P) const { - if (PassDebugging < Details) - return; - - AnalysisUsage analysisUsage; - P->getAnalysisUsage(analysisUsage); - dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet()); -} - -void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P, - const AnalysisUsage::VectorType &Set) const { - assert(PassDebugging >= Details); - if (Set.empty()) - return; - dbgs() << (const void*)P << std::string(getDepth()*2+3, ' ') << Msg << " Analyses:"; - for (unsigned i = 0; i != Set.size(); ++i) { - if (i) dbgs() << ','; - const PassInfo *PInf = PassRegistry::getPassRegistry()->getPassInfo(Set[i]); - if (!PInf) { - // Some preserved passes, such as AliasAnalysis, may not be initialized by - // all drivers. - dbgs() << " Uninitialized Pass"; - continue; - } - dbgs() << ' ' << PInf->getPassName(); - } - dbgs() << '\n'; -} - -/// Add RequiredPass into list of lower level passes required by pass P. -/// RequiredPass is run on the fly by Pass Manager when P requests it -/// through getAnalysis interface. -/// This should be handled by specific pass manager. -void PMDataManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { - if (TPM) { - TPM->dumpArguments(); - TPM->dumpPasses(); - } - - // Module Level pass may required Function Level analysis info - // (e.g. dominator info). Pass manager uses on the fly function pass manager - // to provide this on demand. In that case, in Pass manager terminology, - // module level pass is requiring lower level analysis info managed by - // lower level pass manager. - - // When Pass manager is not able to order required analysis info, Pass manager - // checks whether any lower level manager will be able to provide this - // analysis info on demand or not. -#ifndef NDEBUG - dbgs() << "Unable to schedule '" << RequiredPass->getPassName(); - dbgs() << "' required by '" << P->getPassName() << "'\n"; -#endif - llvm_unreachable("Unable to schedule pass"); -} - -Pass *PMDataManager::getOnTheFlyPass(Pass *P, AnalysisID PI, Function &F) { - llvm_unreachable("Unable to find on the fly pass"); -} - -// Destructor -PMDataManager::~PMDataManager() { - for (SmallVectorImpl::iterator I = PassVector.begin(), - E = PassVector.end(); I != E; ++I) - delete *I; -} - -//===----------------------------------------------------------------------===// -// NOTE: Is this the right place to define this method ? -// getAnalysisIfAvailable - Return analysis result or null if it doesn't exist. -Pass *AnalysisResolver::getAnalysisIfAvailable(AnalysisID ID, bool dir) const { - return PM.findAnalysisPass(ID, dir); -} - -Pass *AnalysisResolver::findImplPass(Pass *P, AnalysisID AnalysisPI, - Function &F) { - return PM.getOnTheFlyPass(P, AnalysisPI, F); -} - -//===----------------------------------------------------------------------===// -// BBPassManager implementation - -/// Execute all of the passes scheduled for execution by invoking -/// runOnBasicBlock method. Keep track of whether any of the passes modifies -/// the function, and if so, return true. -bool BBPassManager::runOnFunction(Function &F) { - if (F.isDeclaration()) - return false; - - bool Changed = doInitialization(F); - - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - BasicBlockPass *BP = getContainedPass(Index); - bool LocalChanged = false; - - dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName()); - dumpRequiredSet(BP); - - initializeAnalysisImpl(BP); - - { - // If the pass crashes, remember this. - PassManagerPrettyStackEntry X(BP, *I); - TimeRegion PassTimer(getPassTimer(BP)); - - LocalChanged |= BP->runOnBasicBlock(*I); - } - - Changed |= LocalChanged; - if (LocalChanged) - dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG, - I->getName()); - dumpPreservedSet(BP); - - verifyPreservedAnalysis(BP); - removeNotPreservedAnalysis(BP); - recordAvailableAnalysis(BP); - removeDeadPasses(BP, I->getName(), ON_BASICBLOCK_MSG); - } - - return doFinalization(F) || Changed; -} - -// Implement doInitialization and doFinalization -bool BBPassManager::doInitialization(Module &M) { - bool Changed = false; - - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) - Changed |= getContainedPass(Index)->doInitialization(M); - - return Changed; -} - -bool BBPassManager::doFinalization(Module &M) { - bool Changed = false; - - for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index) - Changed |= getContainedPass(Index)->doFinalization(M); - - return Changed; -} - -bool BBPassManager::doInitialization(Function &F) { - bool Changed = false; - - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - BasicBlockPass *BP = getContainedPass(Index); - Changed |= BP->doInitialization(F); - } - - return Changed; -} - -bool BBPassManager::doFinalization(Function &F) { - bool Changed = false; - - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - BasicBlockPass *BP = getContainedPass(Index); - Changed |= BP->doFinalization(F); - } - - return Changed; -} - - -//===----------------------------------------------------------------------===// -// FunctionPassManager implementation - -/// Create new Function pass manager -FunctionPassManager::FunctionPassManager(Module *m) : M(m) { - FPM = new FunctionPassManagerImpl(); - // FPM is the top level manager. - FPM->setTopLevelManager(FPM); - - AnalysisResolver *AR = new AnalysisResolver(*FPM); - FPM->setResolver(AR); -} - -FunctionPassManager::~FunctionPassManager() { - delete FPM; -} - -/// add - Add a pass to the queue of passes to run. This passes -/// ownership of the Pass to the PassManager. When the -/// PassManager_X is destroyed, the pass will be destroyed as well, so -/// there is no need to delete the pass. (TODO delete passes.) -/// This implies that all passes MUST be allocated with 'new'. -void FunctionPassManager::add(Pass *P) { - FPM->add(P); -} - -/// run - Execute all of the passes scheduled for execution. Keep -/// track of whether any of the passes modifies the function, and if -/// so, return true. -/// -bool FunctionPassManager::run(Function &F) { - if (F.isMaterializable()) { - std::string errstr; - if (F.Materialize(&errstr)) - report_fatal_error("Error reading bitcode file: " + Twine(errstr)); - } - return FPM->run(F); -} - - -/// doInitialization - Run all of the initializers for the function passes. -/// -bool FunctionPassManager::doInitialization() { - return FPM->doInitialization(*M); -} - -/// doFinalization - Run all of the finalizers for the function passes. -/// -bool FunctionPassManager::doFinalization() { - return FPM->doFinalization(*M); -} - -//===----------------------------------------------------------------------===// -// FunctionPassManagerImpl implementation -// -bool FunctionPassManagerImpl::doInitialization(Module &M) { - bool Changed = false; - - dumpArguments(); - dumpPasses(); - - SmallVectorImpl& IPV = getImmutablePasses(); - for (SmallVectorImpl::const_iterator I = IPV.begin(), - E = IPV.end(); I != E; ++I) { - Changed |= (*I)->doInitialization(M); - } - - for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) - Changed |= getContainedManager(Index)->doInitialization(M); - - return Changed; + FunctionAnalysisResultLists[F].erase(RI->second); } - -bool FunctionPassManagerImpl::doFinalization(Module &M) { - bool Changed = false; - - for (int Index = getNumContainedManagers() - 1; Index >= 0; --Index) - Changed |= getContainedManager(Index)->doFinalization(M); - - SmallVectorImpl& IPV = getImmutablePasses(); - for (SmallVectorImpl::const_iterator I = IPV.begin(), - E = IPV.end(); I != E; ++I) { - Changed |= (*I)->doFinalization(M); - } - - return Changed; -} - -/// cleanup - After running all passes, clean up pass manager cache. -void FPPassManager::cleanup() { - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - FunctionPass *FP = getContainedPass(Index); - AnalysisResolver *AR = FP->getResolver(); - assert(AR && "Analysis Resolver is not set"); - AR->clearAnalysisImpls(); - } -} - -void FunctionPassManagerImpl::releaseMemoryOnTheFly() { - if (!wasRun) - return; - for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) { - FPPassManager *FPPM = getContainedManager(Index); - for (unsigned Index = 0; Index < FPPM->getNumContainedPasses(); ++Index) { - FPPM->getContainedPass(Index)->releaseMemory(); - } - } - wasRun = false; -} - -// Execute all the passes managed by this top level manager. -// Return true if any function is modified by a pass. -bool FunctionPassManagerImpl::run(Function &F) { - bool Changed = false; - TimingInfo::createTheTimeInfo(); - - initializeAllAnalysisInfo(); - for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) - Changed |= getContainedManager(Index)->runOnFunction(F); - - for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) - getContainedManager(Index)->cleanup(); - - wasRun = true; - return Changed; -} - -//===----------------------------------------------------------------------===// -// FPPassManager implementation - -char FPPassManager::ID = 0; -/// Print passes managed by this manager -void FPPassManager::dumpPassStructure(unsigned Offset) { - dbgs().indent(Offset*2) << "FunctionPass Manager\n"; - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - FunctionPass *FP = getContainedPass(Index); - FP->dumpPassStructure(Offset + 1); - dumpLastUses(FP, Offset+1); - } -} - - -/// Execute all of the passes scheduled for execution by invoking -/// runOnFunction method. Keep track of whether any of the passes modifies -/// the function, and if so, return true. -bool FPPassManager::runOnFunction(Function &F) { - if (F.isDeclaration()) - return false; - - bool Changed = false; - - // Collect inherited analysis from Module level pass manager. - populateInheritedAnalysis(TPM->activeStack); - - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - FunctionPass *FP = getContainedPass(Index); - bool LocalChanged = false; - - dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName()); - dumpRequiredSet(FP); - - initializeAnalysisImpl(FP); - - { - PassManagerPrettyStackEntry X(FP, F); - TimeRegion PassTimer(getPassTimer(FP)); - - LocalChanged |= FP->runOnFunction(F); - } - - Changed |= LocalChanged; - if (LocalChanged) - dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName()); - dumpPreservedSet(FP); - - verifyPreservedAnalysis(FP); - removeNotPreservedAnalysis(FP); - recordAvailableAnalysis(FP); - removeDeadPasses(FP, F.getName(), ON_FUNCTION_MSG); - } - return Changed; -} - -bool FPPassManager::runOnModule(Module &M) { - bool Changed = false; - - for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) - Changed |= runOnFunction(*I); - - return Changed; -} - -bool FPPassManager::doInitialization(Module &M) { - bool Changed = false; - - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) - Changed |= getContainedPass(Index)->doInitialization(M); - - return Changed; -} - -bool FPPassManager::doFinalization(Module &M) { - bool Changed = false; - - for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index) - Changed |= getContainedPass(Index)->doFinalization(M); - - return Changed; -} - -//===----------------------------------------------------------------------===// -// MPPassManager implementation - -/// Execute all of the passes scheduled for execution by invoking -/// runOnModule method. Keep track of whether any of the passes modifies -/// the module, and if so, return true. -bool -MPPassManager::runOnModule(Module &M) { - bool Changed = false; - - // Initialize on-the-fly passes - for (std::map::iterator - I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end(); - I != E; ++I) { - FunctionPassManagerImpl *FPP = I->second; - Changed |= FPP->doInitialization(M); - } - - // Initialize module passes - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) - Changed |= getContainedPass(Index)->doInitialization(M); - - for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) { - ModulePass *MP = getContainedPass(Index); - bool LocalChanged = false; - - dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier()); - dumpRequiredSet(MP); - - initializeAnalysisImpl(MP); - - { - PassManagerPrettyStackEntry X(MP, M); - TimeRegion PassTimer(getPassTimer(MP)); - - LocalChanged |= MP->runOnModule(M); - } - - Changed |= LocalChanged; - if (LocalChanged) - dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG, - M.getModuleIdentifier()); - dumpPreservedSet(MP); - - verifyPreservedAnalysis(MP); - removeNotPreservedAnalysis(MP); - recordAvailableAnalysis(MP); - removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG); - } - - // Finalize module passes - for (int Index = getNumContainedPasses() - 1; Index >= 0; --Index) - Changed |= getContainedPass(Index)->doFinalization(M); - - // Finalize on-the-fly passes - for (std::map::iterator - I = OnTheFlyManagers.begin(), E = OnTheFlyManagers.end(); - I != E; ++I) { - FunctionPassManagerImpl *FPP = I->second; - // We don't know when is the last time an on-the-fly pass is run, - // so we need to releaseMemory / finalize here - FPP->releaseMemoryOnTheFly(); - Changed |= FPP->doFinalization(M); - } - - return Changed; -} - -/// Add RequiredPass into list of lower level passes required by pass P. -/// RequiredPass is run on the fly by Pass Manager when P requests it -/// through getAnalysis interface. -void MPPassManager::addLowerLevelRequiredPass(Pass *P, Pass *RequiredPass) { - assert(P->getPotentialPassManagerType() == PMT_ModulePassManager && - "Unable to handle Pass that requires lower level Analysis pass"); - assert((P->getPotentialPassManagerType() < - RequiredPass->getPotentialPassManagerType()) && - "Unable to handle Pass that requires lower level Analysis pass"); - - FunctionPassManagerImpl *FPP = OnTheFlyManagers[P]; - if (!FPP) { - FPP = new FunctionPassManagerImpl(); - // FPP is the top level manager. - FPP->setTopLevelManager(FPP); - - OnTheFlyManagers[P] = FPP; - } - FPP->add(RequiredPass); - - // Register P as the last user of RequiredPass. - if (RequiredPass) { - SmallVector LU; - LU.push_back(RequiredPass); - FPP->setLastUser(LU, P); - } -} - -/// Return function pass corresponding to PassInfo PI, that is -/// required by module pass MP. Instantiate analysis pass, by using -/// its runOnFunction() for function F. -Pass* MPPassManager::getOnTheFlyPass(Pass *MP, AnalysisID PI, Function &F){ - FunctionPassManagerImpl *FPP = OnTheFlyManagers[MP]; - assert(FPP && "Unable to find on the fly pass"); - - FPP->releaseMemoryOnTheFly(); - FPP->run(F); - return ((PMTopLevelManager*)FPP)->findAnalysisPass(PI); -} - - -//===----------------------------------------------------------------------===// -// PassManagerImpl implementation - -// -/// run - Execute all of the passes scheduled for execution. Keep track of -/// whether any of the passes modifies the module, and if so, return true. -bool PassManagerImpl::run(Module &M) { - bool Changed = false; - TimingInfo::createTheTimeInfo(); - - dumpArguments(); - dumpPasses(); - - SmallVectorImpl& IPV = getImmutablePasses(); - for (SmallVectorImpl::const_iterator I = IPV.begin(), - E = IPV.end(); I != E; ++I) { - Changed |= (*I)->doInitialization(M); - } - - initializeAllAnalysisInfo(); - for (unsigned Index = 0; Index < getNumContainedManagers(); ++Index) - Changed |= getContainedManager(Index)->runOnModule(M); - - for (SmallVectorImpl::const_iterator I = IPV.begin(), - E = IPV.end(); I != E; ++I) { - Changed |= (*I)->doFinalization(M); - } - - return Changed; -} - -//===----------------------------------------------------------------------===// -// PassManager implementation - -/// Create new pass manager -PassManager::PassManager() { - PM = new PassManagerImpl(); - // PM is the top level manager - PM->setTopLevelManager(PM); -} - -PassManager::~PassManager() { - delete PM; -} - -/// add - Add a pass to the queue of passes to run. This passes ownership of -/// the Pass to the PassManager. When the PassManager is destroyed, the pass -/// will be destroyed as well, so there is no need to delete the pass. This -/// implies that all passes MUST be allocated with 'new'. -void PassManager::add(Pass *P) { - PM->add(P); -} - -/// run - Execute all of the passes scheduled for execution. Keep track of -/// whether any of the passes modifies the module, and if so, return true. -bool PassManager::run(Module &M) { - return PM->run(M); -} - -//===----------------------------------------------------------------------===// -// TimingInfo implementation - -bool llvm::TimePassesIsEnabled = false; -static cl::opt -EnableTiming("time-passes", cl::location(TimePassesIsEnabled), - cl::desc("Time each pass, printing elapsed time for each on exit")); - -// createTheTimeInfo - This method either initializes the TheTimeInfo pointer to -// a non null value (if the -time-passes option is enabled) or it leaves it -// null. It may be called multiple times. -void TimingInfo::createTheTimeInfo() { - if (!TimePassesIsEnabled || TheTimeInfo) return; - - // Constructed the first time this is called, iff -time-passes is enabled. - // This guarantees that the object will be constructed before static globals, - // thus it will be destroyed before them. - static ManagedStatic TTI; - TheTimeInfo = &*TTI; -} - -/// If TimingInfo is enabled then start pass timer. -Timer *llvm::getPassTimer(Pass *P) { - if (TheTimeInfo) - return TheTimeInfo->getPassTimer(P); - return 0; -} - -//===----------------------------------------------------------------------===// -// PMStack implementation -// - -// Pop Pass Manager from the stack and clear its analysis info. -void PMStack::pop() { - - PMDataManager *Top = this->top(); - Top->initializeAnalysisInfo(); - - S.pop_back(); -} - -// Push PM on the stack and set its top level manager. -void PMStack::push(PMDataManager *PM) { - assert(PM && "Unable to push. Pass Manager expected"); - assert(PM->getDepth()==0 && "Pass Manager depth set too early"); - - if (!this->empty()) { - assert(PM->getPassManagerType() > this->top()->getPassManagerType() - && "pushing bad pass manager to PMStack"); - PMTopLevelManager *TPM = this->top()->getTopLevelManager(); - - assert(TPM && "Unable to find top level manager"); - TPM->addIndirectPassManager(PM); - PM->setTopLevelManager(TPM); - PM->setDepth(this->top()->getDepth()+1); - } else { - assert((PM->getPassManagerType() == PMT_ModulePassManager - || PM->getPassManagerType() == PMT_FunctionPassManager) - && "pushing bad pass manager to PMStack"); - PM->setDepth(1); - } - - S.push_back(PM); -} - -// Dump content of the pass manager stack. -void PMStack::dump() const { - for (std::vector::const_iterator I = S.begin(), - E = S.end(); I != E; ++I) - dbgs() << (*I)->getAsPass()->getPassName() << ' '; - - if (!S.empty()) - dbgs() << '\n'; -} - -/// Find appropriate Module Pass Manager in the PM Stack and -/// add self into that manager. -void ModulePass::assignPassManager(PMStack &PMS, - PassManagerType PreferredType) { - // Find Module Pass Manager - while (!PMS.empty()) { - PassManagerType TopPMType = PMS.top()->getPassManagerType(); - if (TopPMType == PreferredType) - break; // We found desired pass manager - else if (TopPMType > PMT_ModulePassManager) - PMS.pop(); // Pop children pass managers - else - break; - } - assert(!PMS.empty() && "Unable to find appropriate Pass Manager"); - PMS.top()->add(this); -} - -/// Find appropriate Function Pass Manager or Call Graph Pass Manager -/// in the PM Stack and add self into that manager. -void FunctionPass::assignPassManager(PMStack &PMS, - PassManagerType PreferredType) { - - // Find Function Pass Manager - while (!PMS.empty()) { - if (PMS.top()->getPassManagerType() > PMT_FunctionPassManager) - PMS.pop(); - else - break; - } - - // Create new Function Pass Manager if needed. - FPPassManager *FPP; - if (PMS.top()->getPassManagerType() == PMT_FunctionPassManager) { - FPP = (FPPassManager *)PMS.top(); - } else { - assert(!PMS.empty() && "Unable to create Function Pass Manager"); - PMDataManager *PMD = PMS.top(); - - // [1] Create new Function Pass Manager - FPP = new FPPassManager(); - FPP->populateInheritedAnalysis(PMS); - - // [2] Set up new manager's top level manager - PMTopLevelManager *TPM = PMD->getTopLevelManager(); - TPM->addIndirectPassManager(FPP); - - // [3] Assign manager to manage this new manager. This may create - // and push new managers into PMS - FPP->assignPassManager(PMS, PMD->getPassManagerType()); - - // [4] Push new manager into PMS - PMS.push(FPP); - } - - // Assign FPP as the manager of this pass. - FPP->add(this); -} - -/// Find appropriate Basic Pass Manager or Call Graph Pass Manager -/// in the PM Stack and add self into that manager. -void BasicBlockPass::assignPassManager(PMStack &PMS, - PassManagerType PreferredType) { - BBPassManager *BBP; - - // Basic Pass Manager is a leaf pass manager. It does not handle - // any other pass manager. - if (!PMS.empty() && - PMS.top()->getPassManagerType() == PMT_BasicBlockPassManager) { - BBP = (BBPassManager *)PMS.top(); - } else { - // If leaf manager is not Basic Block Pass manager then create new - // basic Block Pass manager. - assert(!PMS.empty() && "Unable to create BasicBlock Pass Manager"); - PMDataManager *PMD = PMS.top(); - - // [1] Create new Basic Block Manager - BBP = new BBPassManager(); - - // [2] Set up new manager's top level manager - // Basic Block Pass Manager does not live by itself - PMTopLevelManager *TPM = PMD->getTopLevelManager(); - TPM->addIndirectPassManager(BBP); - - // [3] Assign manager to manage this new manager. This may create - // and push new managers into PMS - BBP->assignPassManager(PMS, PreferredType); - - // [4] Push new manager into PMS - PMS.push(BBP); - } - - // Assign BBP as the manager of this pass. - BBP->add(this); -} - -PassManagerBase::~PassManagerBase() {} diff --git a/contrib/llvm/lib/IR/PassRegistry.cpp b/contrib/llvm/lib/IR/PassRegistry.cpp index a0b64ed..d3b2f1f 100644 --- a/contrib/llvm/lib/IR/PassRegistry.cpp +++ b/contrib/llvm/lib/IR/PassRegistry.cpp @@ -21,6 +21,7 @@ #include "llvm/Support/Compiler.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" +#include "llvm/Support/RWMutex.h" #include using namespace llvm; @@ -35,7 +36,7 @@ PassRegistry *PassRegistry::getPassRegistry() { return &*PassRegistryObj; } -static ManagedStatic > Lock; +static ManagedStatic > Lock; //===----------------------------------------------------------------------===// // PassRegistryImpl @@ -72,7 +73,7 @@ void *PassRegistry::getImpl() const { // PassRegistry::~PassRegistry() { - sys::SmartScopedLock Guard(*Lock); + sys::SmartScopedWriter Guard(*Lock); PassRegistryImpl *Impl = static_cast(pImpl); for (std::vector::iterator I = Impl->ToFree.begin(), @@ -84,14 +85,14 @@ PassRegistry::~PassRegistry() { } const PassInfo *PassRegistry::getPassInfo(const void *TI) const { - sys::SmartScopedLock Guard(*Lock); + sys::SmartScopedReader Guard(*Lock); PassRegistryImpl *Impl = static_cast(getImpl()); PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.find(TI); return I != Impl->PassInfoMap.end() ? I->second : 0; } const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const { - sys::SmartScopedLock Guard(*Lock); + sys::SmartScopedReader Guard(*Lock); PassRegistryImpl *Impl = static_cast(getImpl()); PassRegistryImpl::StringMapType::const_iterator I = Impl->PassInfoStringMap.find(Arg); @@ -103,7 +104,7 @@ const PassInfo *PassRegistry::getPassInfo(StringRef Arg) const { // void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) { - sys::SmartScopedLock Guard(*Lock); + sys::SmartScopedWriter Guard(*Lock); PassRegistryImpl *Impl = static_cast(getImpl()); bool Inserted = Impl->PassInfoMap.insert(std::make_pair(PI.getTypeInfo(),&PI)).second; @@ -120,7 +121,7 @@ void PassRegistry::registerPass(const PassInfo &PI, bool ShouldFree) { } void PassRegistry::unregisterPass(const PassInfo &PI) { - sys::SmartScopedLock Guard(*Lock); + sys::SmartScopedWriter Guard(*Lock); PassRegistryImpl *Impl = static_cast(getImpl()); PassRegistryImpl::MapType::iterator I = Impl->PassInfoMap.find(PI.getTypeInfo()); @@ -132,7 +133,7 @@ void PassRegistry::unregisterPass(const PassInfo &PI) { } void PassRegistry::enumerateWith(PassRegistrationListener *L) { - sys::SmartScopedLock Guard(*Lock); + sys::SmartScopedReader Guard(*Lock); PassRegistryImpl *Impl = static_cast(getImpl()); for (PassRegistryImpl::MapType::const_iterator I = Impl->PassInfoMap.begin(), E = Impl->PassInfoMap.end(); I != E; ++I) @@ -160,7 +161,7 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID, assert(ImplementationInfo && "Must register pass before adding to AnalysisGroup!"); - sys::SmartScopedLock Guard(*Lock); + sys::SmartScopedWriter Guard(*Lock); // Make sure we keep track of the fact that the implementation implements // the interface. @@ -186,13 +187,13 @@ void PassRegistry::registerAnalysisGroup(const void *InterfaceID, } void PassRegistry::addRegistrationListener(PassRegistrationListener *L) { - sys::SmartScopedLock Guard(*Lock); + sys::SmartScopedWriter Guard(*Lock); PassRegistryImpl *Impl = static_cast(getImpl()); Impl->Listeners.push_back(L); } void PassRegistry::removeRegistrationListener(PassRegistrationListener *L) { - sys::SmartScopedLock Guard(*Lock); + sys::SmartScopedWriter Guard(*Lock); // NOTE: This is necessary, because removeRegistrationListener() can be called // as part of the llvm_shutdown sequence. Since we have no control over the diff --git a/contrib/llvm/lib/IR/Type.cpp b/contrib/llvm/lib/IR/Type.cpp index 46c61fc..432cbc9 100644 --- a/contrib/llvm/lib/IR/Type.cpp +++ b/contrib/llvm/lib/IR/Type.cpp @@ -616,11 +616,7 @@ bool StructType::isLayoutIdentical(StructType *Other) const { /// getTypeByName - Return the type with the specified name, or null if there /// is none by that name. StructType *Module::getTypeByName(StringRef Name) const { - StringMap::iterator I = - getContext().pImpl->NamedStructTypes.find(Name); - if (I != getContext().pImpl->NamedStructTypes.end()) - return I->second; - return 0; + return getContext().pImpl->NamedStructTypes.lookup(Name); } diff --git a/contrib/llvm/lib/IR/TypeFinder.cpp b/contrib/llvm/lib/IR/TypeFinder.cpp index d5e6203..689b903 100644 --- a/contrib/llvm/lib/IR/TypeFinder.cpp +++ b/contrib/llvm/lib/IR/TypeFinder.cpp @@ -44,6 +44,9 @@ void TypeFinder::run(const Module &M, bool onlyNamed) { for (Module::const_iterator FI = M.begin(), E = M.end(); FI != E; ++FI) { incorporateType(FI->getType()); + if (FI->hasPrefixData()) + incorporateValue(FI->getPrefixData()); + // First incorporate the arguments. for (Function::const_arg_iterator AI = FI->arg_begin(), AE = FI->arg_end(); AI != AE; ++AI) @@ -91,19 +94,27 @@ void TypeFinder::clear() { /// incorporateType - This method adds the type to the list of used structures /// if it's not in there already. void TypeFinder::incorporateType(Type *Ty) { - // Check to see if we're already visited this type. + // Check to see if we've already visited this type. if (!VisitedTypes.insert(Ty).second) return; - // If this is a structure or opaque type, add a name for the type. - if (StructType *STy = dyn_cast(Ty)) - if (!OnlyNamed || STy->hasName()) - StructTypes.push_back(STy); - - // Recursively walk all contained types. - for (Type::subtype_iterator I = Ty->subtype_begin(), - E = Ty->subtype_end(); I != E; ++I) - incorporateType(*I); + SmallVector TypeWorklist; + TypeWorklist.push_back(Ty); + do { + Ty = TypeWorklist.pop_back_val(); + + // If this is a structure or opaque type, add a name for the type. + if (StructType *STy = dyn_cast(Ty)) + if (!OnlyNamed || STy->hasName()) + StructTypes.push_back(STy); + + // Add all unvisited subtypes to worklist for processing + for (Type::subtype_reverse_iterator I = Ty->subtype_rbegin(), + E = Ty->subtype_rend(); + I != E; ++I) + if (VisitedTypes.insert(*I).second) + TypeWorklist.push_back(*I); + } while (!TypeWorklist.empty()); } /// incorporateValue - This method is used to walk operand lists finding types diff --git a/contrib/llvm/lib/IR/Value.cpp b/contrib/llvm/lib/IR/Value.cpp index 89a3c05..62a3b31 100644 --- a/contrib/llvm/lib/IR/Value.cpp +++ b/contrib/llvm/lib/IR/Value.cpp @@ -112,21 +112,20 @@ bool Value::hasNUsesOrMore(unsigned N) const { /// isUsedInBasicBlock - Return true if this value is used in the specified /// basic block. bool Value::isUsedInBasicBlock(const BasicBlock *BB) const { - // Start by scanning over the instructions looking for a use before we start - // the expensive use iteration. - unsigned MaxBlockSize = 3; - for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { - if (std::find(I->op_begin(), I->op_end(), this) != I->op_end()) + // This can be computed either by scanning the instructions in BB, or by + // scanning the use list of this Value. Both lists can be very long, but + // usually one is quite short. + // + // Scan both lists simultaneously until one is exhausted. This limits the + // search to the shorter list. + BasicBlock::const_iterator BI = BB->begin(), BE = BB->end(); + const_use_iterator UI = use_begin(), UE = use_end(); + for (; BI != BE && UI != UE; ++BI, ++UI) { + // Scan basic block: Check if this Value is used by the instruction at BI. + if (std::find(BI->op_begin(), BI->op_end(), this) != BI->op_end()) return true; - if (--MaxBlockSize == 0) // If the block is larger fall back to use_iterator - break; - } - - if (MaxBlockSize != 0) // We scanned the entire block and found no use. - return false; - - for (const_use_iterator I = use_begin(), E = use_end(); I != E; ++I) { - const Instruction *User = dyn_cast(*I); + // Scan use list: Check if the use at UI is in BB. + const Instruction *User = dyn_cast(*UI); if (User && User->getParent() == BB) return true; } @@ -366,7 +365,8 @@ static Value *stripPointerCastsAndOffsets(Value *V) { break; } V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { + } else if (Operator::getOpcode(V) == Instruction::BitCast || + Operator::getOpcode(V) == Instruction::AddrSpaceCast) { V = cast(V)->getOperand(0); } else if (GlobalAlias *GA = dyn_cast(V)) { if (StripKind == PSK_ZeroIndices || GA->mayBeOverridden()) @@ -394,6 +394,42 @@ Value *Value::stripInBoundsConstantOffsets() { return stripPointerCastsAndOffsets(this); } +Value *Value::stripAndAccumulateInBoundsConstantOffsets(const DataLayout &DL, + APInt &Offset) { + if (!getType()->isPointerTy()) + return this; + + assert(Offset.getBitWidth() == DL.getPointerSizeInBits(cast( + getType())->getAddressSpace()) && + "The offset must have exactly as many bits as our pointer."); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet Visited; + Visited.insert(this); + Value *V = this; + do { + if (GEPOperator *GEP = dyn_cast(V)) { + if (!GEP->isInBounds()) + return V; + APInt GEPOffset(Offset); + if (!GEP->accumulateConstantOffset(DL, GEPOffset)) + return V; + Offset = GEPOffset; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast(V)) { + V = GA->getAliasee(); + } else { + return V; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(V)); + + return V; +} + Value *Value::stripInBoundsOffsets() { return stripPointerCastsAndOffsets(this); } @@ -699,9 +735,5 @@ void ValueHandleBase::ValueIsRAUWd(Value *Old, Value *New) { #endif } -// Default implementation for CallbackVH. -void CallbackVH::allUsesReplacedWith(Value *) {} - -void CallbackVH::deleted() { - setValPtr(NULL); -} +// Pin the vtable to this file. +void CallbackVH::anchor() {} diff --git a/contrib/llvm/lib/IR/ValueTypes.cpp b/contrib/llvm/lib/IR/ValueTypes.cpp index ba04d60..2d4da95 100644 --- a/contrib/llvm/lib/IR/ValueTypes.cpp +++ b/contrib/llvm/lib/IR/ValueTypes.cpp @@ -134,6 +134,7 @@ std::string EVT::getEVTString() const { case MVT::v16i1: return "v16i1"; case MVT::v32i1: return "v32i1"; case MVT::v64i1: return "v64i1"; + case MVT::v1i8: return "v1i8"; case MVT::v2i8: return "v2i8"; case MVT::v4i8: return "v4i8"; case MVT::v8i8: return "v8i8"; @@ -156,11 +157,15 @@ std::string EVT::getEVTString() const { case MVT::v4i64: return "v4i64"; case MVT::v8i64: return "v8i64"; case MVT::v16i64: return "v16i64"; + case MVT::v1f32: return "v1f32"; case MVT::v2f32: return "v2f32"; case MVT::v2f16: return "v2f16"; + case MVT::v4f16: return "v4f16"; + case MVT::v8f16: return "v8f16"; case MVT::v4f32: return "v4f32"; case MVT::v8f32: return "v8f32"; case MVT::v16f32: return "v16f32"; + case MVT::v1f64: return "v1f64"; case MVT::v2f64: return "v2f64"; case MVT::v4f64: return "v4f64"; case MVT::v8f64: return "v8f64"; @@ -197,6 +202,7 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v16i1: return VectorType::get(Type::getInt1Ty(Context), 16); case MVT::v32i1: return VectorType::get(Type::getInt1Ty(Context), 32); case MVT::v64i1: return VectorType::get(Type::getInt1Ty(Context), 64); + case MVT::v1i8: return VectorType::get(Type::getInt8Ty(Context), 1); case MVT::v2i8: return VectorType::get(Type::getInt8Ty(Context), 2); case MVT::v4i8: return VectorType::get(Type::getInt8Ty(Context), 4); case MVT::v8i8: return VectorType::get(Type::getInt8Ty(Context), 8); @@ -220,10 +226,14 @@ Type *EVT::getTypeForEVT(LLVMContext &Context) const { case MVT::v8i64: return VectorType::get(Type::getInt64Ty(Context), 8); case MVT::v16i64: return VectorType::get(Type::getInt64Ty(Context), 16); case MVT::v2f16: return VectorType::get(Type::getHalfTy(Context), 2); + case MVT::v4f16: return VectorType::get(Type::getHalfTy(Context), 4); + case MVT::v8f16: return VectorType::get(Type::getHalfTy(Context), 8); + case MVT::v1f32: return VectorType::get(Type::getFloatTy(Context), 1); case MVT::v2f32: return VectorType::get(Type::getFloatTy(Context), 2); case MVT::v4f32: return VectorType::get(Type::getFloatTy(Context), 4); case MVT::v8f32: return VectorType::get(Type::getFloatTy(Context), 8); case MVT::v16f32: return VectorType::get(Type::getFloatTy(Context), 16); + case MVT::v1f64: return VectorType::get(Type::getDoubleTy(Context), 1); case MVT::v2f64: return VectorType::get(Type::getDoubleTy(Context), 2); case MVT::v4f64: return VectorType::get(Type::getDoubleTy(Context), 4); case MVT::v8f64: return VectorType::get(Type::getDoubleTy(Context), 8); diff --git a/contrib/llvm/lib/IR/Verifier.cpp b/contrib/llvm/lib/IR/Verifier.cpp index d106173..da6b573 100644 --- a/contrib/llvm/lib/IR/Verifier.cpp +++ b/contrib/llvm/lib/IR/Verifier.cpp @@ -53,8 +53,10 @@ #include "llvm/ADT/StringExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Assembly/Writer.h" +#include "llvm/DebugInfo.h" #include "llvm/IR/CallingConv.h" #include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/InlineAsm.h" #include "llvm/IR/IntrinsicInst.h" @@ -66,6 +68,7 @@ #include "llvm/PassManager.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -74,6 +77,9 @@ #include using namespace llvm; +static cl::opt DisableDebugInfoVerifier("disable-debug-info-verifier", + cl::init(true)); + namespace { // Anonymous namespace for class struct PreVerifier : public FunctionPass { static char ID; // Pass ID, replacement for typeid @@ -93,7 +99,7 @@ namespace { // Anonymous namespace for class for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { if (I->empty() || !I->back().isTerminator()) { - dbgs() << "Basic Block in function '" << F.getName() + dbgs() << "Basic Block in function '" << F.getName() << "' does not have terminator!\n"; WriteAsOperand(dbgs(), I, true); dbgs() << "\n"; @@ -110,7 +116,7 @@ namespace { // Anonymous namespace for class } char PreVerifier::ID = 0; -INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification", +INITIALIZE_PASS(PreVerifier, "preverify", "Preliminary module verification", false, false) static char &PreVerifyID = PreVerifier::ID; @@ -123,6 +129,7 @@ namespace { Module *Mod; // Module we are verifying right now LLVMContext *Context; // Context within which we are verifying DominatorTree *DT; // Dominator Tree, caution can be null! + const DataLayout *DL; std::string Messages; raw_string_ostream MessagesStr; @@ -142,15 +149,18 @@ namespace { /// the same personality function. const Value *PersonalityFn; + /// Finder keeps track of all debug info MDNodes in a Module. + DebugInfoFinder Finder; + Verifier() : FunctionPass(ID), Broken(false), - action(AbortProcessAction), Mod(0), Context(0), DT(0), + action(AbortProcessAction), Mod(0), Context(0), DT(0), DL(0), MessagesStr(Messages), PersonalityFn(0) { initializeVerifierPass(*PassRegistry::getPassRegistry()); } explicit Verifier(VerifierFailureAction ctn) : FunctionPass(ID), Broken(false), action(ctn), Mod(0), - Context(0), DT(0), MessagesStr(Messages), PersonalityFn(0) { + Context(0), DT(0), DL(0), MessagesStr(Messages), PersonalityFn(0) { initializeVerifierPass(*PassRegistry::getPassRegistry()); } @@ -158,6 +168,8 @@ namespace { Mod = &M; Context = &M.getContext(); + DL = getAnalysisIfAvailable(); + // We must abort before returning back to the pass manager, or else the // pass manager may try to run other passes on the broken module. return abortIfBroken(); @@ -170,10 +182,15 @@ namespace { Mod = F.getParent(); if (!Context) Context = &F.getContext(); + Finder.reset(); visit(F); InstsInThisBlock.clear(); PersonalityFn = 0; + if (!DisableDebugInfoVerifier) + // Verify Debug Info. + verifyDebugInfo(); + // We must abort before returning back to the pass manager, or else the // pass manager may try to run other passes on the broken module. return abortIfBroken(); @@ -188,11 +205,11 @@ namespace { if (I->isDeclaration()) visitFunction(*I); } - for (Module::global_iterator I = M.global_begin(), E = M.global_end(); + for (Module::global_iterator I = M.global_begin(), E = M.global_end(); I != E; ++I) visitGlobalVariable(*I); - for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); + for (Module::alias_iterator I = M.alias_begin(), E = M.alias_end(); I != E; ++I) visitGlobalAlias(*I); @@ -201,6 +218,14 @@ namespace { visitNamedMDNode(*I); visitModuleFlags(M); + visitModuleIdents(M); + + if (!DisableDebugInfoVerifier) { + Finder.reset(); + Finder.processModule(M); + // Verify Debug Info. + verifyDebugInfo(); + } // If the module is broken, abort at this time. return abortIfBroken(); @@ -242,6 +267,7 @@ namespace { void visitGlobalAlias(GlobalAlias &GA); void visitNamedMDNode(NamedMDNode &NMD); void visitMDNode(MDNode &MD, Function *F); + void visitModuleIdents(Module &M); void visitModuleFlags(Module &M); void visitModuleFlag(MDNode *Op, DenseMap &SeenIDs, SmallVectorImpl &Requirements); @@ -263,6 +289,7 @@ namespace { void visitIntToPtrInst(IntToPtrInst &I); void visitPtrToIntInst(PtrToIntInst &I); void visitBitCastInst(BitCastInst &I); + void visitAddrSpaceCastInst(AddrSpaceCastInst &I); void visitPHINode(PHINode &PN); void visitBinaryOperator(BinaryOperator &B); void visitICmpInst(ICmpInst &IC); @@ -301,6 +328,8 @@ namespace { bool VerifyIntrinsicType(Type *Ty, ArrayRef &Infos, SmallVectorImpl &ArgTys); + bool VerifyIntrinsicIsVarArg(bool isVarArg, + ArrayRef &Infos); bool VerifyAttributeCount(AttributeSet Attrs, unsigned Params); void VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx, bool isFunction, const Value *V); @@ -309,6 +338,11 @@ namespace { void VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, const Value *V); + void VerifyBitcastType(const Value *V, Type *DestTy, Type *SrcTy); + void VerifyConstantExprBitcastType(const ConstantExpr *CE); + + void verifyDebugInfo(); + void WriteValue(const Value *V) { if (!V) return; if (isa(V)) { @@ -406,10 +440,6 @@ void Verifier::visitGlobalValue(GlobalValue &GV) { Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(), "Only global arrays can have appending linkage!", GVar); } - - Assert1(!GV.hasLinkOnceODRAutoHideLinkage() || GV.hasDefaultVisibility(), - "linkonce_odr_auto_hide can only have default visibility!", - &GV); } void Verifier::visitGlobalVariable(GlobalVariable &GV) { @@ -450,7 +480,7 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) { } if (GV.hasName() && (GV.getName() == "llvm.used" || - GV.getName() == "llvm.compiler_used")) { + GV.getName() == "llvm.compiler.used")) { Assert1(!GV.hasInitializer() || GV.hasAppendingLinkage(), "invalid linkage for intrinsic global variable", &GV); Type *GVType = GV.getType()->getElementType(); @@ -463,24 +493,50 @@ void Verifier::visitGlobalVariable(GlobalVariable &GV) { Assert1(InitArray, "wrong initalizer for intrinsic global variable", Init); for (unsigned i = 0, e = InitArray->getNumOperands(); i != e; ++i) { - Value *V = Init->getOperand(i)->stripPointerCasts(); - // stripPointerCasts strips aliases, so we only need to check for - // variables and functions. - Assert1(isa(V) || isa(V), - "invalid llvm.used member", V); + Value *V = Init->getOperand(i)->stripPointerCastsNoFollowAliases(); + Assert1( + isa(V) || isa(V) || isa(V), + "invalid llvm.used member", V); + Assert1(V->hasName(), "members of llvm.used must be named", V); } } } } + if (!GV.hasInitializer()) { + visitGlobalValue(GV); + return; + } + + // Walk any aggregate initializers looking for bitcasts between address spaces + SmallPtrSet Visited; + SmallVector WorkStack; + WorkStack.push_back(cast(GV.getInitializer())); + + while (!WorkStack.empty()) { + const Value *V = WorkStack.pop_back_val(); + if (!Visited.insert(V)) + continue; + + if (const User *U = dyn_cast(V)) { + for (unsigned I = 0, N = U->getNumOperands(); I != N; ++I) + WorkStack.push_back(U->getOperand(I)); + } + + if (const ConstantExpr *CE = dyn_cast(V)) { + VerifyConstantExprBitcastType(CE); + if (Broken) + return; + } + } + visitGlobalValue(GV); } void Verifier::visitGlobalAlias(GlobalAlias &GA) { Assert1(!GA.getName().empty(), "Alias name cannot be empty!", &GA); - Assert1(GA.hasExternalLinkage() || GA.hasLocalLinkage() || - GA.hasWeakLinkage(), + Assert1(GlobalAlias::isValidLinkage(GA.getLinkage()), "Alias should have external or external weak linkage!", &GA); Assert1(GA.getAliasee(), "Aliasee cannot be NULL!", &GA); @@ -488,18 +544,29 @@ void Verifier::visitGlobalAlias(GlobalAlias &GA) { "Alias and aliasee types should match!", &GA); Assert1(!GA.hasUnnamedAddr(), "Alias cannot have unnamed_addr!", &GA); - if (!isa(GA.getAliasee())) { - const ConstantExpr *CE = dyn_cast(GA.getAliasee()); - Assert1(CE && + Constant *Aliasee = GA.getAliasee(); + + if (!isa(Aliasee)) { + ConstantExpr *CE = dyn_cast(Aliasee); + Assert1(CE && (CE->getOpcode() == Instruction::BitCast || CE->getOpcode() == Instruction::GetElementPtr) && isa(CE->getOperand(0)), "Aliasee should be either GlobalValue or bitcast of GlobalValue", &GA); + + if (CE->getOpcode() == Instruction::BitCast) { + unsigned SrcAS = CE->getOperand(0)->getType()->getPointerAddressSpace(); + unsigned DstAS = CE->getType()->getPointerAddressSpace(); + + Assert1(SrcAS == DstAS, + "Alias bitcasts cannot be between different address spaces", + &GA); + } } - const GlobalValue* Aliasee = GA.resolveAliasedGlobal(/*stopOnWeak*/ false); - Assert1(Aliasee, + const GlobalValue* Resolved = GA.resolveAliasedGlobal(/*stopOnWeak*/ false); + Assert1(Resolved, "Aliasing chain should end with function or global variable", &GA); visitGlobalValue(GA); @@ -553,6 +620,24 @@ void Verifier::visitMDNode(MDNode &MD, Function *F) { } } +void Verifier::visitModuleIdents(Module &M) { + const NamedMDNode *Idents = M.getNamedMetadata("llvm.ident"); + if (!Idents) + return; + + // llvm.ident takes a list of metadata entry. Each entry has only one string. + // Scan each llvm.ident entry and make sure that this requirement is met. + for (unsigned i = 0, e = Idents->getNumOperands(); i != e; ++i) { + const MDNode *N = Idents->getOperand(i); + Assert1(N->getNumOperands() == 1, + "incorrect number of operands in llvm.ident metadata", N); + Assert1(isa(N->getOperand(0)), + ("invalid value for llvm.ident metadata entry operand" + "(the operand should be a string)"), + N->getOperand(0)); + } +} + void Verifier::visitModuleFlags(Module &M) { const NamedMDNode *Flags = M.getModuleFlagsMetadata(); if (!Flags) return; @@ -654,7 +739,7 @@ void Verifier::visitModuleFlag(MDNode *Op, DenseMap&SeenIDs, } void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx, - bool isFunction, const Value* V) { + bool isFunction, const Value *V) { unsigned Slot = ~0U; for (unsigned I = 0, E = Attrs.getNumSlots(); I != E; ++I) if (Attrs.getSlotIndex(I) == Idx) { @@ -671,8 +756,6 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx, if (I->getKindAsEnum() == Attribute::NoReturn || I->getKindAsEnum() == Attribute::NoUnwind || - I->getKindAsEnum() == Attribute::ReadNone || - I->getKindAsEnum() == Attribute::ReadOnly || I->getKindAsEnum() == Attribute::NoInline || I->getKindAsEnum() == Attribute::AlwaysInline || I->getKindAsEnum() == Attribute::OptimizeForSize || @@ -692,15 +775,26 @@ void Verifier::VerifyAttributeTypes(AttributeSet Attrs, unsigned Idx, I->getKindAsEnum() == Attribute::SanitizeMemory || I->getKindAsEnum() == Attribute::MinSize || I->getKindAsEnum() == Attribute::NoDuplicate || - I->getKindAsEnum() == Attribute::NoBuiltin) { - if (!isFunction) - CheckFailed("Attribute '" + I->getKindAsString() + - "' only applies to functions!", V); - return; - } else if (isFunction) { - CheckFailed("Attribute '" + I->getKindAsString() + - "' does not apply to functions!", V); + I->getKindAsEnum() == Attribute::Builtin || + I->getKindAsEnum() == Attribute::NoBuiltin || + I->getKindAsEnum() == Attribute::Cold || + I->getKindAsEnum() == Attribute::OptimizeNone) { + if (!isFunction) { + CheckFailed("Attribute '" + I->getAsString() + + "' only applies to functions!", V); return; + } + } else if (I->getKindAsEnum() == Attribute::ReadOnly || + I->getKindAsEnum() == Attribute::ReadNone) { + if (Idx == 0) { + CheckFailed("Attribute '" + I->getAsString() + + "' does not apply to function returns"); + return; + } + } else if (isFunction) { + CheckFailed("Attribute '" + I->getAsString() + + "' does not apply to functions!", V); + return; } } } @@ -830,6 +924,65 @@ void Verifier::VerifyFunctionAttrs(FunctionType *FT, AttributeSet Attrs, Attrs.hasAttribute(AttributeSet::FunctionIndex, Attribute::AlwaysInline)), "Attributes 'noinline and alwaysinline' are incompatible!", V); + + if (Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeNone)) { + Assert1(Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoInline), + "Attribute 'optnone' requires 'noinline'!", V); + + Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize), + "Attributes 'optsize and optnone' are incompatible!", V); + + Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::MinSize), + "Attributes 'minsize and optnone' are incompatible!", V); + } +} + +void Verifier::VerifyBitcastType(const Value *V, Type *DestTy, Type *SrcTy) { + // Get the size of the types in bits, we'll need this later + unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits(); + unsigned DestBitSize = DestTy->getPrimitiveSizeInBits(); + + // BitCast implies a no-op cast of type only. No bits change. + // However, you can't cast pointers to anything but pointers. + Assert1(SrcTy->isPointerTy() == DestTy->isPointerTy(), + "Bitcast requires both operands to be pointer or neither", V); + Assert1(SrcBitSize == DestBitSize, + "Bitcast requires types of same width", V); + + // Disallow aggregates. + Assert1(!SrcTy->isAggregateType(), + "Bitcast operand must not be aggregate", V); + Assert1(!DestTy->isAggregateType(), + "Bitcast type must not be aggregate", V); + + // Without datalayout, assume all address spaces are the same size. + // Don't check if both types are not pointers. + // Skip casts between scalars and vectors. + if (!DL || + !SrcTy->isPtrOrPtrVectorTy() || + !DestTy->isPtrOrPtrVectorTy() || + SrcTy->isVectorTy() != DestTy->isVectorTy()) { + return; + } + + unsigned SrcAS = SrcTy->getPointerAddressSpace(); + unsigned DstAS = DestTy->getPointerAddressSpace(); + + Assert1(SrcAS == DstAS, + "Bitcasts between pointers of different address spaces is not legal." + "Use AddrSpaceCast instead.", V); +} + +void Verifier::VerifyConstantExprBitcastType(const ConstantExpr *CE) { + if (CE->getOpcode() == Instruction::BitCast) { + Type *SrcTy = CE->getOperand(0)->getType(); + Type *DstTy = CE->getType(); + VerifyBitcastType(CE, DstTy, SrcTy); + } } bool Verifier::VerifyAttributeCount(AttributeSet Attrs, unsigned Params) { @@ -842,7 +995,7 @@ bool Verifier::VerifyAttributeCount(AttributeSet Attrs, unsigned Params) { || (LastIndex == AttributeSet::FunctionIndex && (LastSlot == 0 || Attrs.getSlotIndex(LastSlot - 1) <= Params))) return true; - + return false; } @@ -861,7 +1014,7 @@ void Verifier::visitFunction(Function &F) { "# formal arguments must match # of arguments for function type!", &F, FT); Assert1(F.getReturnType()->isFirstClassType() || - F.getReturnType()->isVoidTy() || + F.getReturnType()->isVoidTy() || F.getReturnType()->isStructTy(), "Functions cannot return aggregate values!", &F); @@ -876,6 +1029,13 @@ void Verifier::visitFunction(Function &F) { // Check function attributes. VerifyFunctionAttrs(FT, Attrs, &F); + // On function declarations/definitions, we do not support the builtin + // attribute. We do not check this in VerifyFunctionAttrs since that is + // checking for Attributes that can/can not ever be on functions. + Assert1(!Attrs.hasAttribute(AttributeSet::FunctionIndex, + Attribute::Builtin), + "Attribute 'builtin' can only be applied to a callsite.", &F); + // Check that this function meets the restrictions on this calling convention. switch (F.getCallingConv()) { default: @@ -921,25 +1081,25 @@ void Verifier::visitFunction(Function &F) { // Verify that this function (which has a body) is not named "llvm.*". It // is not legal to define intrinsics. Assert1(!isLLVMdotName, "llvm intrinsics cannot be defined!", &F); - + // Check the entry node BasicBlock *Entry = &F.getEntryBlock(); Assert1(pred_begin(Entry) == pred_end(Entry), "Entry block to function must not have predecessors!", Entry); - + // The address of the entry block cannot be taken, unless it is dead. if (Entry->hasAddressTaken()) { Assert1(!BlockAddress::get(Entry)->isConstantUsed(), "blockaddress may not be used with the entry block!", Entry); } } - + // If this function is actually an intrinsic, verify that it is only used in // direct call/invokes, never having its "address taken". if (F.getIntrinsicID()) { const User *U; if (F.hasAddressTaken(&U)) - Assert1(0, "Invalid user of intrinsic instruction!", U); + Assert1(0, "Invalid user of intrinsic instruction!", U); } } @@ -1014,7 +1174,7 @@ void Verifier::visitBranchInst(BranchInst &BI) { void Verifier::visitReturnInst(ReturnInst &RI) { Function *F = RI.getParent()->getParent(); unsigned N = RI.getNumOperands(); - if (F->getReturnType()->isVoidTy()) + if (F->getReturnType()->isVoidTy()) Assert2(N == 0, "Found return instr that returns non-void in Function of void " "return type!", &RI, F->getReturnType()); @@ -1032,29 +1192,14 @@ void Verifier::visitSwitchInst(SwitchInst &SI) { // Check to make sure that all of the constants in the switch instruction // have the same type as the switched-on value. Type *SwitchTy = SI.getCondition()->getType(); - IntegerType *IntTy = cast(SwitchTy); - IntegersSubsetToBB Mapping; - std::map RangeSetMap; + SmallPtrSet Constants; for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { - IntegersSubset CaseRanges = i.getCaseValueEx(); - for (unsigned ri = 0, rie = CaseRanges.getNumItems(); ri < rie; ++ri) { - IntegersSubset::Range r = CaseRanges.getItem(ri); - Assert1(((const APInt&)r.getLow()).getBitWidth() == IntTy->getBitWidth(), - "Switch constants must all be same type as switch value!", &SI); - Assert1(((const APInt&)r.getHigh()).getBitWidth() == IntTy->getBitWidth(), - "Switch constants must all be same type as switch value!", &SI); - Mapping.add(r); - RangeSetMap[r] = i.getCaseIndex(); - } - } - - IntegersSubsetToBB::RangeIterator errItem; - if (!Mapping.verify(errItem)) { - unsigned CaseIndex = RangeSetMap[errItem->first]; - SwitchInst::CaseIt i(&SI, CaseIndex); - Assert2(false, "Duplicate integer as switch case", &SI, i.getCaseValueEx()); + Assert1(i.getCaseValue()->getType() == SwitchTy, + "Switch constants must all be same type as switch value!", &SI); + Assert2(Constants.insert(i.getCaseValue()), + "Duplicate integer as switch case", &SI, i.getCaseValue()); } - + visitTerminatorInst(SI); } @@ -1309,26 +1454,25 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) { } void Verifier::visitBitCastInst(BitCastInst &I) { - // Get the source and destination types Type *SrcTy = I.getOperand(0)->getType(); Type *DestTy = I.getType(); + VerifyBitcastType(&I, DestTy, SrcTy); + visitInstruction(I); +} - // Get the size of the types in bits, we'll need this later - unsigned SrcBitSize = SrcTy->getPrimitiveSizeInBits(); - unsigned DestBitSize = DestTy->getPrimitiveSizeInBits(); - - // BitCast implies a no-op cast of type only. No bits change. - // However, you can't cast pointers to anything but pointers. - Assert1(SrcTy->isPointerTy() == DestTy->isPointerTy(), - "Bitcast requires both operands to be pointer or neither", &I); - Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I); - - // Disallow aggregates. - Assert1(!SrcTy->isAggregateType(), - "Bitcast operand must not be aggregate", &I); - Assert1(!DestTy->isAggregateType(), - "Bitcast type must not be aggregate", &I); +void Verifier::visitAddrSpaceCastInst(AddrSpaceCastInst &I) { + Type *SrcTy = I.getOperand(0)->getType(); + Type *DestTy = I.getType(); + Assert1(SrcTy->isPtrOrPtrVectorTy(), + "AddrSpaceCast source must be a pointer", &I); + Assert1(DestTy->isPtrOrPtrVectorTy(), + "AddrSpaceCast result must be a pointer", &I); + Assert1(SrcTy->getPointerAddressSpace() != DestTy->getPointerAddressSpace(), + "AddrSpaceCast must be between different address spaces", &I); + if (SrcTy->isVectorTy()) + Assert1(SrcTy->getVectorNumElements() == DestTy->getVectorNumElements(), + "AddrSpaceCast vector pointer number of elements mismatch", &I); visitInstruction(I); } @@ -1339,7 +1483,7 @@ void Verifier::visitPHINode(PHINode &PN) { // This can be tested by checking whether the instruction before this is // either nonexistent (because this is begin()) or is a PHI node. If not, // then there is some other instruction before a PHI. - Assert2(&PN == &PN.getParent()->front() || + Assert2(&PN == &PN.getParent()->front() || isa(--BasicBlock::iterator(&PN)), "PHI nodes not grouped at top of basic block!", &PN, PN.getParent()); @@ -1403,9 +1547,9 @@ void Verifier::VerifyCallSite(CallSite CS) { // Check attributes on the varargs part. for (unsigned Idx = 1 + FTy->getNumParams(); Idx <= CS.arg_size(); ++Idx) { - Type *Ty = CS.getArgument(Idx-1)->getType(); + Type *Ty = CS.getArgument(Idx-1)->getType(); VerifyParameterAttrs(Attrs, Idx, Ty, false, I); - + if (Attrs.hasAttribute(Idx, Attribute::Nest)) { Assert1(!SawNest, "More than one parameter has attribute nest!", I); SawNest = true; @@ -1718,7 +1862,7 @@ void Verifier::visitStoreInst(StoreInst &SI) { void Verifier::visitAllocaInst(AllocaInst &AI) { PointerType *PTy = AI.getType(); - Assert1(PTy->getAddressSpace() == 0, + Assert1(PTy->getAddressSpace() == 0, "Allocation instruction pointer not in the generic address space!", &AI); Assert1(PTy->getElementType()->isSized(), "Cannot allocate unsized type", @@ -1790,7 +1934,7 @@ void Verifier::visitExtractValueInst(ExtractValueInst &EVI) { EVI.getIndices()) == EVI.getType(), "Invalid ExtractValueInst operands!", &EVI); - + visitInstruction(EVI); } @@ -1799,7 +1943,7 @@ void Verifier::visitInsertValueInst(InsertValueInst &IVI) { IVI.getIndices()) == IVI.getOperand(1)->getType(), "Invalid InsertValueInst operands!", &IVI); - + visitInstruction(IVI); } @@ -1886,7 +2030,7 @@ void Verifier::visitInstruction(Instruction &I) { // Check that the return value of the instruction is either void or a legal // value type. - Assert1(I.getType()->isVoidTy() || + Assert1(I.getType()->isVoidTy() || I.getType()->isFirstClassType(), "Instruction returns a non-scalar type!", &I); @@ -1944,6 +2088,27 @@ void Verifier::visitInstruction(Instruction &I) { Assert1((i + 1 == e && isa(I)) || (i + 3 == e && isa(I)), "Cannot take the address of an inline asm!", &I); + } else if (ConstantExpr *CE = dyn_cast(I.getOperand(i))) { + if (CE->getType()->isPtrOrPtrVectorTy()) { + // If we have a ConstantExpr pointer, we need to see if it came from an + // illegal bitcast (inttoptr ) + SmallVector Stack; + SmallPtrSet Visited; + Stack.push_back(CE); + + while (!Stack.empty()) { + const ConstantExpr *V = Stack.pop_back_val(); + if (!Visited.insert(V)) + continue; + + VerifyConstantExprBitcastType(V); + + for (unsigned I = 0, N = V->getNumOperands(); I != N; ++I) { + if (ConstantExpr *Op = dyn_cast(V->getOperand(I))) + Stack.push_back(Op); + } + } + } } } @@ -1954,7 +2119,7 @@ void Verifier::visitInstruction(Instruction &I) { Value *Op0 = MD->getOperand(0); if (ConstantFP *CFP0 = dyn_cast_or_null(Op0)) { APFloat Accuracy = CFP0->getValueAPF(); - Assert1(Accuracy.isNormal() && !Accuracy.isNegative(), + Assert1(Accuracy.isFiniteNonZero() && !Accuracy.isNegative(), "fpmath accuracy not a positive number!", &I); } else { Assert1(false, "invalid fpmath accuracy!", &I); @@ -1964,6 +2129,11 @@ void Verifier::visitInstruction(Instruction &I) { MDNode *MD = I.getMetadata(LLVMContext::MD_range); Assert1(!MD || isa(I), "Ranges are only for loads!", &I); + if (!DisableDebugInfoVerifier) { + MD = I.getMetadata(LLVMContext::MD_dbg); + Finder.processLocation(*Mod, DILocation(MD)); + } + InstsInThisBlock.insert(&I); } @@ -1978,12 +2148,13 @@ bool Verifier::VerifyIntrinsicType(Type *Ty, using namespace Intrinsic; // If we ran out of descriptors, there are too many arguments. - if (Infos.empty()) return true; + if (Infos.empty()) return true; IITDescriptor D = Infos.front(); Infos = Infos.slice(1); - + switch (D.Kind) { case IITDescriptor::Void: return !Ty->isVoidTy(); + case IITDescriptor::VarArg: return true; case IITDescriptor::MMX: return !Ty->isX86_MMXTy(); case IITDescriptor::Metadata: return !Ty->isMetadataTy(); case IITDescriptor::Half: return !Ty->isHalfTy(); @@ -2000,29 +2171,29 @@ bool Verifier::VerifyIntrinsicType(Type *Ty, return PT == 0 || PT->getAddressSpace() != D.Pointer_AddressSpace || VerifyIntrinsicType(PT->getElementType(), Infos, ArgTys); } - + case IITDescriptor::Struct: { StructType *ST = dyn_cast(Ty); if (ST == 0 || ST->getNumElements() != D.Struct_NumElements) return true; - + for (unsigned i = 0, e = D.Struct_NumElements; i != e; ++i) if (VerifyIntrinsicType(ST->getElementType(i), Infos, ArgTys)) return true; return false; } - + case IITDescriptor::Argument: // Two cases here - If this is the second occurrence of an argument, verify - // that the later instance matches the previous instance. + // that the later instance matches the previous instance. if (D.getArgumentNumber() < ArgTys.size()) - return Ty != ArgTys[D.getArgumentNumber()]; - + return Ty != ArgTys[D.getArgumentNumber()]; + // Otherwise, if this is the first instance of an argument, record it and // verify the "Any" kind. assert(D.getArgumentNumber() == ArgTys.size() && "Table consistency error"); ArgTys.push_back(Ty); - + switch (D.getArgumentKind()) { case IITDescriptor::AK_AnyInteger: return !Ty->isIntOrIntVectorTy(); case IITDescriptor::AK_AnyFloat: return !Ty->isFPOrFPVectorTy(); @@ -2030,7 +2201,7 @@ bool Verifier::VerifyIntrinsicType(Type *Ty, case IITDescriptor::AK_AnyPointer: return !isa(Ty); } llvm_unreachable("all argument kinds not covered"); - + case IITDescriptor::ExtendVecArgument: // This may only be used when referring to a previous vector argument. return D.getArgumentNumber() >= ArgTys.size() || @@ -2048,6 +2219,33 @@ bool Verifier::VerifyIntrinsicType(Type *Ty, llvm_unreachable("unhandled"); } +/// \brief Verify if the intrinsic has variable arguments. +/// This method is intended to be called after all the fixed arguments have been +/// verified first. +/// +/// This method returns true on error and does not print an error message. +bool +Verifier::VerifyIntrinsicIsVarArg(bool isVarArg, + ArrayRef &Infos) { + using namespace Intrinsic; + + // If there are no descriptors left, then it can't be a vararg. + if (Infos.empty()) + return isVarArg ? true : false; + + // There should be only one descriptor remaining at this point. + if (Infos.size() != 1) + return true; + + // Check and verify the descriptor. + IITDescriptor D = Infos.front(); + Infos = Infos.slice(1); + if (D.Kind == IITDescriptor::VarArg) + return isVarArg ? false : true; + + return true; +} + /// visitIntrinsicFunction - Allow intrinsics to be verified in different ways. /// void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { @@ -2058,8 +2256,8 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { // Verify that the intrinsic prototype lines up with what the .td files // describe. FunctionType *IFTy = IF->getFunctionType(); - Assert1(!IFTy->isVarArg(), "Intrinsic prototypes are not varargs", IF); - + bool IsVarArg = IFTy->isVarArg(); + SmallVector Table; getIntrinsicInfoTableEntries(ID, Table); ArrayRef TableRef = Table; @@ -2070,6 +2268,16 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { for (unsigned i = 0, e = IFTy->getNumParams(); i != e; ++i) Assert1(!VerifyIntrinsicType(IFTy->getParamType(i), TableRef, ArgTys), "Intrinsic has incorrect argument type!", IF); + + // Verify if the intrinsic call matches the vararg property. + if (IsVarArg) + Assert1(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef), + "Intrinsic was not defined with variable arguments!", IF); + else + Assert1(!VerifyIntrinsicIsVarArg(IsVarArg, TableRef), + "Callsite was not defined with variable arguments!", IF); + + // All descriptors should be absorbed by now. Assert1(TableRef.empty(), "Intrinsic has too few arguments!", IF); // Now that we have the intrinsic ID and the actual argument types (and we @@ -2078,7 +2286,7 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { // the name. Assert1(Intrinsic::getName(ID, ArgTys) == IF->getName(), "Intrinsic name not mangled correctly for type arguments!", IF); - + // If the intrinsic takes MDNode arguments, verify that they are either global // or are local to *this* function. for (unsigned i = 0, e = CI.getNumArgOperands(); i != e; ++i) @@ -2100,7 +2308,17 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { MDNode *MD = cast(CI.getArgOperand(0)); Assert1(MD->getNumOperands() == 1, "invalid llvm.dbg.declare intrinsic call 2", &CI); + if (!DisableDebugInfoVerifier) + Finder.processDeclare(*Mod, cast(&CI)); } break; + case Intrinsic::dbg_value: { //llvm.dbg.value + if (!DisableDebugInfoVerifier) { + Assert1(CI.getArgOperand(0) && isa(CI.getArgOperand(0)), + "invalid llvm.dbg.value intrinsic call 1", &CI); + Finder.processValue(*Mod, cast(&CI)); + } + break; + } case Intrinsic::memcpy: case Intrinsic::memmove: case Intrinsic::memset: @@ -2162,6 +2380,28 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) { } } +void Verifier::verifyDebugInfo() { + // Verify Debug Info. + if (!DisableDebugInfoVerifier) { + for (DebugInfoFinder::iterator I = Finder.compile_unit_begin(), + E = Finder.compile_unit_end(); I != E; ++I) + Assert1(DICompileUnit(*I).Verify(), "DICompileUnit does not Verify!", *I); + for (DebugInfoFinder::iterator I = Finder.subprogram_begin(), + E = Finder.subprogram_end(); I != E; ++I) + Assert1(DISubprogram(*I).Verify(), "DISubprogram does not Verify!", *I); + for (DebugInfoFinder::iterator I = Finder.global_variable_begin(), + E = Finder.global_variable_end(); I != E; ++I) + Assert1(DIGlobalVariable(*I).Verify(), + "DIGlobalVariable does not Verify!", *I); + for (DebugInfoFinder::iterator I = Finder.type_begin(), + E = Finder.type_end(); I != E; ++I) + Assert1(DIType(*I).Verify(), "DIType does not Verify!", *I); + for (DebugInfoFinder::iterator I = Finder.scope_begin(), + E = Finder.scope_end(); I != E; ++I) + Assert1(DIScope(*I).Verify(), "DIScope does not Verify!", *I); + } +} + //===----------------------------------------------------------------------===// // Implement the public interfaces to this file... //===----------------------------------------------------------------------===// @@ -2181,6 +2421,7 @@ bool llvm::verifyFunction(const Function &f, VerifierFailureAction action) { FunctionPassManager FPM(F.getParent()); Verifier *V = new Verifier(action); FPM.add(V); + FPM.doInitialization(); FPM.run(F); return V->Broken; } diff --git a/contrib/llvm/lib/IRReader/IRReader.cpp b/contrib/llvm/lib/IRReader/IRReader.cpp index eeec14e..935e81d 100644 --- a/contrib/llvm/lib/IRReader/IRReader.cpp +++ b/contrib/llvm/lib/IRReader/IRReader.cpp @@ -11,10 +11,15 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/Assembly/Parser.h" #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/system_error.h" #include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm-c/Core.h" +#include "llvm-c/IRReader.h" using namespace llvm; @@ -22,8 +27,8 @@ namespace llvm { extern bool TimePassesIsEnabled; } -static const char *TimeIRParsingGroupName = "LLVM IR Parsing"; -static const char *TimeIRParsingName = "Parse IR"; +static const char *const TimeIRParsingGroupName = "LLVM IR Parsing"; +static const char *const TimeIRParsingName = "Parse IR"; Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err, @@ -48,7 +53,7 @@ Module *llvm::getLazyIRModule(MemoryBuffer *Buffer, SMDiagnostic &Err, Module *llvm::getLazyIRFileModule(const std::string &Filename, SMDiagnostic &Err, LLVMContext &Context) { OwningPtr File; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) { + if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) { Err = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + ec.message()); return 0; @@ -79,7 +84,7 @@ Module *llvm::ParseIR(MemoryBuffer *Buffer, SMDiagnostic &Err, Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err, LLVMContext &Context) { OwningPtr File; - if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename.c_str(), File)) { + if (error_code ec = MemoryBuffer::getFileOrSTDIN(Filename, File)) { Err = SMDiagnostic(Filename, SourceMgr::DK_Error, "Could not open input file: " + ec.message()); return 0; @@ -87,3 +92,30 @@ Module *llvm::ParseIRFile(const std::string &Filename, SMDiagnostic &Err, return ParseIR(File.take(), Err, Context); } + +//===----------------------------------------------------------------------===// +// C API. +//===----------------------------------------------------------------------===// + +LLVMBool LLVMParseIRInContext(LLVMContextRef ContextRef, + LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM, + char **OutMessage) { + SMDiagnostic Diag; + + *OutM = wrap(ParseIR(unwrap(MemBuf), Diag, *unwrap(ContextRef))); + + if(!*OutM) { + if (OutMessage) { + std::string buf; + raw_string_ostream os(buf); + + Diag.print(NULL, os, false); + os.flush(); + + *OutMessage = strdup(buf.c_str()); + } + return 1; + } + + return 0; +} diff --git a/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp new file mode 100644 index 0000000..2b3648e --- /dev/null +++ b/contrib/llvm/lib/LTO/LTOCodeGenerator.cpp @@ -0,0 +1,521 @@ +//===-LTOCodeGenerator.cpp - LLVM Link Time Optimizer ---------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Link Time Optimization library. This library is +// intended to be used by linker to optimize code at link time. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/LTOCodeGenerator.h" +#include "llvm/LTO/LTOModule.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Verifier.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/CodeGen/RuntimeLibcalls.h" +#include "llvm/Config/config.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/InitializePasses.h" +#include "llvm/Linker.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/PassManager.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Signals.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/system_error.h" +#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Transforms/IPO.h" +#include "llvm/Transforms/IPO/PassManagerBuilder.h" +#include "llvm/Transforms/ObjCARC.h" +using namespace llvm; + +const char* LTOCodeGenerator::getVersionString() { +#ifdef LLVM_VERSION_INFO + return PACKAGE_NAME " version " PACKAGE_VERSION ", " LLVM_VERSION_INFO; +#else + return PACKAGE_NAME " version " PACKAGE_VERSION; +#endif +} + +LTOCodeGenerator::LTOCodeGenerator() + : Context(getGlobalContext()), Linker(new Module("ld-temp.o", Context)), + TargetMach(NULL), EmitDwarfDebugInfo(false), ScopeRestrictionsDone(false), + CodeModel(LTO_CODEGEN_PIC_MODEL_DYNAMIC), NativeObjectFile(NULL) { + initializeLTOPasses(); +} + +LTOCodeGenerator::~LTOCodeGenerator() { + delete TargetMach; + delete NativeObjectFile; + TargetMach = NULL; + NativeObjectFile = NULL; + + Linker.deleteModule(); + + for (std::vector::iterator I = CodegenOptions.begin(), + E = CodegenOptions.end(); + I != E; ++I) + free(*I); +} + +// Initialize LTO passes. Please keep this funciton in sync with +// PassManagerBuilder::populateLTOPassManager(), and make sure all LTO +// passes are initialized. +// +void LTOCodeGenerator::initializeLTOPasses() { + PassRegistry &R = *PassRegistry::getPassRegistry(); + + initializeInternalizePassPass(R); + initializeIPSCCPPass(R); + initializeGlobalOptPass(R); + initializeConstantMergePass(R); + initializeDAHPass(R); + initializeInstCombinerPass(R); + initializeSimpleInlinerPass(R); + initializePruneEHPass(R); + initializeGlobalDCEPass(R); + initializeArgPromotionPass(R); + initializeJumpThreadingPass(R); + initializeSROAPass(R); + initializeSROA_DTPass(R); + initializeSROA_SSAUpPass(R); + initializeFunctionAttrsPass(R); + initializeGlobalsModRefPass(R); + initializeLICMPass(R); + initializeGVNPass(R); + initializeMemCpyOptPass(R); + initializeDCEPass(R); + initializeCFGSimplifyPassPass(R); +} + +bool LTOCodeGenerator::addModule(LTOModule* mod, std::string& errMsg) { + bool ret = Linker.linkInModule(mod->getLLVVMModule(), &errMsg); + + const std::vector &undefs = mod->getAsmUndefinedRefs(); + for (int i = 0, e = undefs.size(); i != e; ++i) + AsmUndefinedRefs[undefs[i]] = 1; + + return !ret; +} + +void LTOCodeGenerator::setTargetOptions(TargetOptions options) { + Options.LessPreciseFPMADOption = options.LessPreciseFPMADOption; + Options.NoFramePointerElim = options.NoFramePointerElim; + Options.AllowFPOpFusion = options.AllowFPOpFusion; + Options.UnsafeFPMath = options.UnsafeFPMath; + Options.NoInfsFPMath = options.NoInfsFPMath; + Options.NoNaNsFPMath = options.NoNaNsFPMath; + Options.HonorSignDependentRoundingFPMathOption = + options.HonorSignDependentRoundingFPMathOption; + Options.UseSoftFloat = options.UseSoftFloat; + Options.FloatABIType = options.FloatABIType; + Options.NoZerosInBSS = options.NoZerosInBSS; + Options.GuaranteedTailCallOpt = options.GuaranteedTailCallOpt; + Options.DisableTailCalls = options.DisableTailCalls; + Options.StackAlignmentOverride = options.StackAlignmentOverride; + Options.TrapFuncName = options.TrapFuncName; + Options.PositionIndependentExecutable = options.PositionIndependentExecutable; + Options.EnableSegmentedStacks = options.EnableSegmentedStacks; + Options.UseInitArray = options.UseInitArray; +} + +void LTOCodeGenerator::setDebugInfo(lto_debug_model debug) { + switch (debug) { + case LTO_DEBUG_MODEL_NONE: + EmitDwarfDebugInfo = false; + return; + + case LTO_DEBUG_MODEL_DWARF: + EmitDwarfDebugInfo = true; + return; + } + llvm_unreachable("Unknown debug format!"); +} + +void LTOCodeGenerator::setCodePICModel(lto_codegen_model model) { + switch (model) { + case LTO_CODEGEN_PIC_MODEL_STATIC: + case LTO_CODEGEN_PIC_MODEL_DYNAMIC: + case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC: + CodeModel = model; + return; + } + llvm_unreachable("Unknown PIC model!"); +} + +bool LTOCodeGenerator::writeMergedModules(const char *path, + std::string &errMsg) { + if (!determineTarget(errMsg)) + return false; + + // mark which symbols can not be internalized + applyScopeRestrictions(); + + // create output file + std::string ErrInfo; + tool_output_file Out(path, ErrInfo, sys::fs::F_Binary); + if (!ErrInfo.empty()) { + errMsg = "could not open bitcode file for writing: "; + errMsg += path; + return false; + } + + // write bitcode to it + WriteBitcodeToFile(Linker.getModule(), Out.os()); + Out.os().close(); + + if (Out.os().has_error()) { + errMsg = "could not write bitcode file: "; + errMsg += path; + Out.os().clear_error(); + return false; + } + + Out.keep(); + return true; +} + +bool LTOCodeGenerator::compile_to_file(const char** name, + bool disableOpt, + bool disableInline, + bool disableGVNLoadPRE, + std::string& errMsg) { + // make unique temp .o file to put generated object file + SmallString<128> Filename; + int FD; + error_code EC = sys::fs::createTemporaryFile("lto-llvm", "o", FD, Filename); + if (EC) { + errMsg = EC.message(); + return false; + } + + // generate object file + tool_output_file objFile(Filename.c_str(), FD); + + bool genResult = generateObjectFile(objFile.os(), disableOpt, disableInline, + disableGVNLoadPRE, errMsg); + objFile.os().close(); + if (objFile.os().has_error()) { + objFile.os().clear_error(); + sys::fs::remove(Twine(Filename)); + return false; + } + + objFile.keep(); + if (!genResult) { + sys::fs::remove(Twine(Filename)); + return false; + } + + NativeObjectPath = Filename.c_str(); + *name = NativeObjectPath.c_str(); + return true; +} + +const void* LTOCodeGenerator::compile(size_t* length, + bool disableOpt, + bool disableInline, + bool disableGVNLoadPRE, + std::string& errMsg) { + const char *name; + if (!compile_to_file(&name, disableOpt, disableInline, disableGVNLoadPRE, + errMsg)) + return NULL; + + // remove old buffer if compile() called twice + delete NativeObjectFile; + + // read .o file into memory buffer + OwningPtr BuffPtr; + if (error_code ec = MemoryBuffer::getFile(name, BuffPtr, -1, false)) { + errMsg = ec.message(); + sys::fs::remove(NativeObjectPath); + return NULL; + } + NativeObjectFile = BuffPtr.take(); + + // remove temp files + sys::fs::remove(NativeObjectPath); + + // return buffer, unless error + if (NativeObjectFile == NULL) + return NULL; + *length = NativeObjectFile->getBufferSize(); + return NativeObjectFile->getBufferStart(); +} + +bool LTOCodeGenerator::determineTarget(std::string &errMsg) { + if (TargetMach != NULL) + return true; + + std::string TripleStr = Linker.getModule()->getTargetTriple(); + if (TripleStr.empty()) + TripleStr = sys::getDefaultTargetTriple(); + llvm::Triple Triple(TripleStr); + + // create target machine from info for merged modules + const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg); + if (march == NULL) + return false; + + // The relocation model is actually a static member of TargetMachine and + // needs to be set before the TargetMachine is instantiated. + Reloc::Model RelocModel = Reloc::Default; + switch (CodeModel) { + case LTO_CODEGEN_PIC_MODEL_STATIC: + RelocModel = Reloc::Static; + break; + case LTO_CODEGEN_PIC_MODEL_DYNAMIC: + RelocModel = Reloc::PIC_; + break; + case LTO_CODEGEN_PIC_MODEL_DYNAMIC_NO_PIC: + RelocModel = Reloc::DynamicNoPIC; + break; + } + + // construct LTOModule, hand over ownership of module and target + SubtargetFeatures Features; + Features.getDefaultSubtargetFeatures(Triple); + std::string FeatureStr = Features.getString(); + // Set a default CPU for Darwin triples. + if (MCpu.empty() && Triple.isOSDarwin()) { + if (Triple.getArch() == llvm::Triple::x86_64) + MCpu = "core2"; + else if (Triple.getArch() == llvm::Triple::x86) + MCpu = "yonah"; + } + + TargetMach = march->createTargetMachine(TripleStr, MCpu, FeatureStr, Options, + RelocModel, CodeModel::Default, + CodeGenOpt::Aggressive); + return true; +} + +void LTOCodeGenerator:: +applyRestriction(GlobalValue &GV, + const ArrayRef &Libcalls, + std::vector &MustPreserveList, + SmallPtrSet &AsmUsed, + Mangler &Mangler) { + SmallString<64> Buffer; + Mangler.getNameWithPrefix(Buffer, &GV, false); + + if (GV.isDeclaration()) + return; + if (MustPreserveSymbols.count(Buffer)) + MustPreserveList.push_back(GV.getName().data()); + if (AsmUndefinedRefs.count(Buffer)) + AsmUsed.insert(&GV); + + // Conservatively append user-supplied runtime library functions to + // llvm.compiler.used. These could be internalized and deleted by + // optimizations like -globalopt, causing problems when later optimizations + // add new library calls (e.g., llvm.memset => memset and printf => puts). + // Leave it to the linker to remove any dead code (e.g. with -dead_strip). + if (isa(GV) && + std::binary_search(Libcalls.begin(), Libcalls.end(), GV.getName())) + AsmUsed.insert(&GV); +} + +static void findUsedValues(GlobalVariable *LLVMUsed, + SmallPtrSet &UsedValues) { + if (LLVMUsed == 0) return; + + ConstantArray *Inits = cast(LLVMUsed->getInitializer()); + for (unsigned i = 0, e = Inits->getNumOperands(); i != e; ++i) + if (GlobalValue *GV = + dyn_cast(Inits->getOperand(i)->stripPointerCasts())) + UsedValues.insert(GV); +} + +static void accumulateAndSortLibcalls(std::vector &Libcalls, + const TargetLibraryInfo& TLI, + const TargetLowering *Lowering) +{ + // TargetLibraryInfo has info on C runtime library calls on the current + // target. + for (unsigned I = 0, E = static_cast(LibFunc::NumLibFuncs); + I != E; ++I) { + LibFunc::Func F = static_cast(I); + if (TLI.has(F)) + Libcalls.push_back(TLI.getName(F)); + } + + // TargetLowering has info on library calls that CodeGen expects to be + // available, both from the C runtime and compiler-rt. + if (Lowering) + for (unsigned I = 0, E = static_cast(RTLIB::UNKNOWN_LIBCALL); + I != E; ++I) + if (const char *Name + = Lowering->getLibcallName(static_cast(I))) + Libcalls.push_back(Name); + + array_pod_sort(Libcalls.begin(), Libcalls.end()); + Libcalls.erase(std::unique(Libcalls.begin(), Libcalls.end()), + Libcalls.end()); +} + +void LTOCodeGenerator::applyScopeRestrictions() { + if (ScopeRestrictionsDone) + return; + Module *mergedModule = Linker.getModule(); + + // Start off with a verification pass. + PassManager passes; + passes.add(createVerifierPass()); + + // mark which symbols can not be internalized + Mangler Mangler(TargetMach); + std::vector MustPreserveList; + SmallPtrSet AsmUsed; + std::vector Libcalls; + TargetLibraryInfo TLI(Triple(TargetMach->getTargetTriple())); + accumulateAndSortLibcalls(Libcalls, TLI, TargetMach->getTargetLowering()); + + for (Module::iterator f = mergedModule->begin(), + e = mergedModule->end(); f != e; ++f) + applyRestriction(*f, Libcalls, MustPreserveList, AsmUsed, Mangler); + for (Module::global_iterator v = mergedModule->global_begin(), + e = mergedModule->global_end(); v != e; ++v) + applyRestriction(*v, Libcalls, MustPreserveList, AsmUsed, Mangler); + for (Module::alias_iterator a = mergedModule->alias_begin(), + e = mergedModule->alias_end(); a != e; ++a) + applyRestriction(*a, Libcalls, MustPreserveList, AsmUsed, Mangler); + + GlobalVariable *LLVMCompilerUsed = + mergedModule->getGlobalVariable("llvm.compiler.used"); + findUsedValues(LLVMCompilerUsed, AsmUsed); + if (LLVMCompilerUsed) + LLVMCompilerUsed->eraseFromParent(); + + if (!AsmUsed.empty()) { + llvm::Type *i8PTy = llvm::Type::getInt8PtrTy(Context); + std::vector asmUsed2; + for (SmallPtrSet::const_iterator i = AsmUsed.begin(), + e = AsmUsed.end(); i !=e; ++i) { + GlobalValue *GV = *i; + Constant *c = ConstantExpr::getBitCast(GV, i8PTy); + asmUsed2.push_back(c); + } + + llvm::ArrayType *ATy = llvm::ArrayType::get(i8PTy, asmUsed2.size()); + LLVMCompilerUsed = + new llvm::GlobalVariable(*mergedModule, ATy, false, + llvm::GlobalValue::AppendingLinkage, + llvm::ConstantArray::get(ATy, asmUsed2), + "llvm.compiler.used"); + + LLVMCompilerUsed->setSection("llvm.metadata"); + } + + passes.add(createInternalizePass(MustPreserveList)); + + // apply scope restrictions + passes.run(*mergedModule); + + ScopeRestrictionsDone = true; +} + +/// Optimize merged modules using various IPO passes +bool LTOCodeGenerator::generateObjectFile(raw_ostream &out, + bool DisableOpt, + bool DisableInline, + bool DisableGVNLoadPRE, + std::string &errMsg) { + if (!this->determineTarget(errMsg)) + return false; + + Module *mergedModule = Linker.getModule(); + + // Mark which symbols can not be internalized + this->applyScopeRestrictions(); + + // Instantiate the pass manager to organize the passes. + PassManager passes; + + // Start off with a verification pass. + passes.add(createVerifierPass()); + + // Add an appropriate DataLayout instance for this module... + passes.add(new DataLayout(*TargetMach->getDataLayout())); + TargetMach->addAnalysisPasses(passes); + + // Enabling internalize here would use its AllButMain variant. It + // keeps only main if it exists and does nothing for libraries. Instead + // we create the pass ourselves with the symbol list provided by the linker. + if (!DisableOpt) + PassManagerBuilder().populateLTOPassManager(passes, + /*Internalize=*/false, + !DisableInline, + DisableGVNLoadPRE); + + // Make sure everything is still good. + passes.add(createVerifierPass()); + + PassManager codeGenPasses; + + codeGenPasses.add(new DataLayout(*TargetMach->getDataLayout())); + TargetMach->addAnalysisPasses(codeGenPasses); + + formatted_raw_ostream Out(out); + + // If the bitcode files contain ARC code and were compiled with optimization, + // the ObjCARCContractPass must be run, so do it unconditionally here. + codeGenPasses.add(createObjCARCContractPass()); + + if (TargetMach->addPassesToEmitFile(codeGenPasses, Out, + TargetMachine::CGFT_ObjectFile)) { + errMsg = "target file type not supported"; + return false; + } + + // Run our queue of passes all at once now, efficiently. + passes.run(*mergedModule); + + // Run the code generator, and write assembly file + codeGenPasses.run(*mergedModule); + + return true; +} + +/// setCodeGenDebugOptions - Set codegen debugging options to aid in debugging +/// LTO problems. +void LTOCodeGenerator::setCodeGenDebugOptions(const char *options) { + for (std::pair o = getToken(options); + !o.first.empty(); o = getToken(o.second)) { + // ParseCommandLineOptions() expects argv[0] to be program name. Lazily add + // that. + if (CodegenOptions.empty()) + CodegenOptions.push_back(strdup("libLLVMLTO")); + CodegenOptions.push_back(strdup(o.first.str().c_str())); + } +} + +void LTOCodeGenerator::parseCodeGenDebugOptions() { + // if options were requested, set them + if (!CodegenOptions.empty()) + cl::ParseCommandLineOptions(CodegenOptions.size(), + const_cast(&CodegenOptions[0])); +} diff --git a/contrib/llvm/lib/LTO/LTOModule.cpp b/contrib/llvm/lib/LTO/LTOModule.cpp new file mode 100644 index 0000000..65416be --- /dev/null +++ b/contrib/llvm/lib/LTO/LTOModule.cpp @@ -0,0 +1,794 @@ +//===-- LTOModule.cpp - LLVM Link Time Optimizer --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Link Time Optimization library. This library is +// intended to be used by linker to optimize code at link time. +// +//===----------------------------------------------------------------------===// + +#include "llvm/LTO/LTOModule.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCParser/MCAsmParser.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCTargetAsmParser.h" +#include "llvm/MC/SubtargetFeature.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/Path.h" +#include "llvm/Support/SourceMgr.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm/Support/TargetSelect.h" +#include "llvm/Support/system_error.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Transforms/Utils/GlobalStatus.h" +using namespace llvm; + +LTOModule::LTOModule(llvm::Module *m, llvm::TargetMachine *t) + : _module(m), _target(t), + _context(_target->getMCAsmInfo(), _target->getRegisterInfo(), NULL), + _mangler(t) {} + +/// isBitcodeFile - Returns 'true' if the file (or memory contents) is LLVM +/// bitcode. +bool LTOModule::isBitcodeFile(const void *mem, size_t length) { + return sys::fs::identify_magic(StringRef((const char *)mem, length)) == + sys::fs::file_magic::bitcode; +} + +bool LTOModule::isBitcodeFile(const char *path) { + sys::fs::file_magic type; + if (sys::fs::identify_magic(path, type)) + return false; + return type == sys::fs::file_magic::bitcode; +} + +/// isBitcodeFileForTarget - Returns 'true' if the file (or memory contents) is +/// LLVM bitcode for the specified triple. +bool LTOModule::isBitcodeFileForTarget(const void *mem, size_t length, + const char *triplePrefix) { + MemoryBuffer *buffer = makeBuffer(mem, length); + if (!buffer) + return false; + return isTargetMatch(buffer, triplePrefix); +} + +bool LTOModule::isBitcodeFileForTarget(const char *path, + const char *triplePrefix) { + OwningPtr buffer; + if (MemoryBuffer::getFile(path, buffer)) + return false; + return isTargetMatch(buffer.take(), triplePrefix); +} + +/// isTargetMatch - Returns 'true' if the memory buffer is for the specified +/// target triple. +bool LTOModule::isTargetMatch(MemoryBuffer *buffer, const char *triplePrefix) { + std::string Triple = getBitcodeTargetTriple(buffer, getGlobalContext()); + delete buffer; + return strncmp(Triple.c_str(), triplePrefix, strlen(triplePrefix)) == 0; +} + +/// makeLTOModule - Create an LTOModule. N.B. These methods take ownership of +/// the buffer. +LTOModule *LTOModule::makeLTOModule(const char *path, TargetOptions options, + std::string &errMsg) { + OwningPtr buffer; + if (error_code ec = MemoryBuffer::getFile(path, buffer)) { + errMsg = ec.message(); + return NULL; + } + return makeLTOModule(buffer.take(), options, errMsg); +} + +LTOModule *LTOModule::makeLTOModule(int fd, const char *path, + size_t size, TargetOptions options, + std::string &errMsg) { + return makeLTOModule(fd, path, size, 0, options, errMsg); +} + +LTOModule *LTOModule::makeLTOModule(int fd, const char *path, + size_t map_size, + off_t offset, + TargetOptions options, + std::string &errMsg) { + OwningPtr buffer; + if (error_code ec = + MemoryBuffer::getOpenFileSlice(fd, path, buffer, map_size, offset)) { + errMsg = ec.message(); + return NULL; + } + return makeLTOModule(buffer.take(), options, errMsg); +} + +LTOModule *LTOModule::makeLTOModule(const void *mem, size_t length, + TargetOptions options, + std::string &errMsg) { + OwningPtr buffer(makeBuffer(mem, length)); + if (!buffer) + return NULL; + return makeLTOModule(buffer.take(), options, errMsg); +} + +LTOModule *LTOModule::makeLTOModule(MemoryBuffer *buffer, + TargetOptions options, + std::string &errMsg) { + // parse bitcode buffer + OwningPtr m(getLazyBitcodeModule(buffer, getGlobalContext(), + &errMsg)); + if (!m) { + delete buffer; + return NULL; + } + + std::string TripleStr = m->getTargetTriple(); + if (TripleStr.empty()) + TripleStr = sys::getDefaultTargetTriple(); + llvm::Triple Triple(TripleStr); + + // find machine architecture for this module + const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg); + if (!march) + return NULL; + + // construct LTOModule, hand over ownership of module and target + SubtargetFeatures Features; + Features.getDefaultSubtargetFeatures(Triple); + std::string FeatureStr = Features.getString(); + // Set a default CPU for Darwin triples. + std::string CPU; + if (Triple.isOSDarwin()) { + if (Triple.getArch() == llvm::Triple::x86_64) + CPU = "core2"; + else if (Triple.getArch() == llvm::Triple::x86) + CPU = "yonah"; + } + + TargetMachine *target = march->createTargetMachine(TripleStr, CPU, FeatureStr, + options); + m->MaterializeAllPermanently(); + + LTOModule *Ret = new LTOModule(m.take(), target); + if (Ret->parseSymbols(errMsg)) { + delete Ret; + return NULL; + } + + return Ret; +} + +/// makeBuffer - Create a MemoryBuffer from a memory range. +MemoryBuffer *LTOModule::makeBuffer(const void *mem, size_t length) { + const char *startPtr = (const char*)mem; + return MemoryBuffer::getMemBuffer(StringRef(startPtr, length), "", false); +} + +/// objcClassNameFromExpression - Get string that the data pointer points to. +bool +LTOModule::objcClassNameFromExpression(const Constant *c, std::string &name) { + if (const ConstantExpr *ce = dyn_cast(c)) { + Constant *op = ce->getOperand(0); + if (GlobalVariable *gvn = dyn_cast(op)) { + Constant *cn = gvn->getInitializer(); + if (ConstantDataArray *ca = dyn_cast(cn)) { + if (ca->isCString()) { + name = ".objc_class_name_" + ca->getAsCString().str(); + return true; + } + } + } + } + return false; +} + +/// addObjCClass - Parse i386/ppc ObjC class data structure. +void LTOModule::addObjCClass(const GlobalVariable *clgv) { + const ConstantStruct *c = dyn_cast(clgv->getInitializer()); + if (!c) return; + + // second slot in __OBJC,__class is pointer to superclass name + std::string superclassName; + if (objcClassNameFromExpression(c->getOperand(1), superclassName)) { + NameAndAttributes info; + StringMap::value_type &entry = + _undefines.GetOrCreateValue(superclassName); + if (!entry.getValue().name) { + const char *symbolName = entry.getKey().data(); + info.name = symbolName; + info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; + info.isFunction = false; + info.symbol = clgv; + entry.setValue(info); + } + } + + // third slot in __OBJC,__class is pointer to class name + std::string className; + if (objcClassNameFromExpression(c->getOperand(2), className)) { + StringSet::value_type &entry = _defines.GetOrCreateValue(className); + entry.setValue(1); + + NameAndAttributes info; + info.name = entry.getKey().data(); + info.attributes = LTO_SYMBOL_PERMISSIONS_DATA | + LTO_SYMBOL_DEFINITION_REGULAR | LTO_SYMBOL_SCOPE_DEFAULT; + info.isFunction = false; + info.symbol = clgv; + _symbols.push_back(info); + } +} + +/// addObjCCategory - Parse i386/ppc ObjC category data structure. +void LTOModule::addObjCCategory(const GlobalVariable *clgv) { + const ConstantStruct *c = dyn_cast(clgv->getInitializer()); + if (!c) return; + + // second slot in __OBJC,__category is pointer to target class name + std::string targetclassName; + if (!objcClassNameFromExpression(c->getOperand(1), targetclassName)) + return; + + NameAndAttributes info; + StringMap::value_type &entry = + _undefines.GetOrCreateValue(targetclassName); + + if (entry.getValue().name) + return; + + const char *symbolName = entry.getKey().data(); + info.name = symbolName; + info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; + info.isFunction = false; + info.symbol = clgv; + entry.setValue(info); +} + +/// addObjCClassRef - Parse i386/ppc ObjC class list data structure. +void LTOModule::addObjCClassRef(const GlobalVariable *clgv) { + std::string targetclassName; + if (!objcClassNameFromExpression(clgv->getInitializer(), targetclassName)) + return; + + NameAndAttributes info; + StringMap::value_type &entry = + _undefines.GetOrCreateValue(targetclassName); + if (entry.getValue().name) + return; + + const char *symbolName = entry.getKey().data(); + info.name = symbolName; + info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; + info.isFunction = false; + info.symbol = clgv; + entry.setValue(info); +} + +/// addDefinedDataSymbol - Add a data symbol as defined to the list. +void LTOModule::addDefinedDataSymbol(const GlobalValue *v) { + // Add to list of defined symbols. + addDefinedSymbol(v, false); + + if (!v->hasSection() /* || !isTargetDarwin */) + return; + + // Special case i386/ppc ObjC data structures in magic sections: + // The issue is that the old ObjC object format did some strange + // contortions to avoid real linker symbols. For instance, the + // ObjC class data structure is allocated statically in the executable + // that defines that class. That data structures contains a pointer to + // its superclass. But instead of just initializing that part of the + // struct to the address of its superclass, and letting the static and + // dynamic linkers do the rest, the runtime works by having that field + // instead point to a C-string that is the name of the superclass. + // At runtime the objc initialization updates that pointer and sets + // it to point to the actual super class. As far as the linker + // knows it is just a pointer to a string. But then someone wanted the + // linker to issue errors at build time if the superclass was not found. + // So they figured out a way in mach-o object format to use an absolute + // symbols (.objc_class_name_Foo = 0) and a floating reference + // (.reference .objc_class_name_Bar) to cause the linker into erroring when + // a class was missing. + // The following synthesizes the implicit .objc_* symbols for the linker + // from the ObjC data structures generated by the front end. + + // special case if this data blob is an ObjC class definition + if (v->getSection().compare(0, 15, "__OBJC,__class,") == 0) { + if (const GlobalVariable *gv = dyn_cast(v)) { + addObjCClass(gv); + } + } + + // special case if this data blob is an ObjC category definition + else if (v->getSection().compare(0, 18, "__OBJC,__category,") == 0) { + if (const GlobalVariable *gv = dyn_cast(v)) { + addObjCCategory(gv); + } + } + + // special case if this data blob is the list of referenced classes + else if (v->getSection().compare(0, 18, "__OBJC,__cls_refs,") == 0) { + if (const GlobalVariable *gv = dyn_cast(v)) { + addObjCClassRef(gv); + } + } +} + +/// addDefinedFunctionSymbol - Add a function symbol as defined to the list. +void LTOModule::addDefinedFunctionSymbol(const Function *f) { + // add to list of defined symbols + addDefinedSymbol(f, true); +} + +static bool canBeHidden(const GlobalValue *GV) { + GlobalValue::LinkageTypes L = GV->getLinkage(); + + if (L != GlobalValue::LinkOnceODRLinkage) + return false; + + if (GV->hasUnnamedAddr()) + return true; + + GlobalStatus GS; + if (GlobalStatus::analyzeGlobal(GV, GS)) + return false; + + return !GS.IsCompared; +} + +/// addDefinedSymbol - Add a defined symbol to the list. +void LTOModule::addDefinedSymbol(const GlobalValue *def, bool isFunction) { + // ignore all llvm.* symbols + if (def->getName().startswith("llvm.")) + return; + + // string is owned by _defines + SmallString<64> Buffer; + _mangler.getNameWithPrefix(Buffer, def, false); + + // set alignment part log2() can have rounding errors + uint32_t align = def->getAlignment(); + uint32_t attr = align ? countTrailingZeros(def->getAlignment()) : 0; + + // set permissions part + if (isFunction) { + attr |= LTO_SYMBOL_PERMISSIONS_CODE; + } else { + const GlobalVariable *gv = dyn_cast(def); + if (gv && gv->isConstant()) + attr |= LTO_SYMBOL_PERMISSIONS_RODATA; + else + attr |= LTO_SYMBOL_PERMISSIONS_DATA; + } + + // set definition part + if (def->hasWeakLinkage() || def->hasLinkOnceLinkage() || + def->hasLinkerPrivateWeakLinkage()) + attr |= LTO_SYMBOL_DEFINITION_WEAK; + else if (def->hasCommonLinkage()) + attr |= LTO_SYMBOL_DEFINITION_TENTATIVE; + else + attr |= LTO_SYMBOL_DEFINITION_REGULAR; + + // set scope part + if (def->hasHiddenVisibility()) + attr |= LTO_SYMBOL_SCOPE_HIDDEN; + else if (def->hasProtectedVisibility()) + attr |= LTO_SYMBOL_SCOPE_PROTECTED; + else if (canBeHidden(def)) + attr |= LTO_SYMBOL_SCOPE_DEFAULT_CAN_BE_HIDDEN; + else if (def->hasExternalLinkage() || def->hasWeakLinkage() || + def->hasLinkOnceLinkage() || def->hasCommonLinkage() || + def->hasLinkerPrivateWeakLinkage()) + attr |= LTO_SYMBOL_SCOPE_DEFAULT; + else + attr |= LTO_SYMBOL_SCOPE_INTERNAL; + + StringSet::value_type &entry = _defines.GetOrCreateValue(Buffer); + entry.setValue(1); + + // fill information structure + NameAndAttributes info; + StringRef Name = entry.getKey(); + info.name = Name.data(); + assert(info.name[Name.size()] == '\0'); + info.attributes = attr; + info.isFunction = isFunction; + info.symbol = def; + + // add to table of symbols + _symbols.push_back(info); +} + +/// addAsmGlobalSymbol - Add a global symbol from module-level ASM to the +/// defined list. +void LTOModule::addAsmGlobalSymbol(const char *name, + lto_symbol_attributes scope) { + StringSet::value_type &entry = _defines.GetOrCreateValue(name); + + // only add new define if not already defined + if (entry.getValue()) + return; + + entry.setValue(1); + + NameAndAttributes &info = _undefines[entry.getKey().data()]; + + if (info.symbol == 0) { + // FIXME: This is trying to take care of module ASM like this: + // + // module asm ".zerofill __FOO, __foo, _bar_baz_qux, 0" + // + // but is gross and its mother dresses it funny. Have the ASM parser give us + // more details for this type of situation so that we're not guessing so + // much. + + // fill information structure + info.name = entry.getKey().data(); + info.attributes = + LTO_SYMBOL_PERMISSIONS_DATA | LTO_SYMBOL_DEFINITION_REGULAR | scope; + info.isFunction = false; + info.symbol = 0; + + // add to table of symbols + _symbols.push_back(info); + return; + } + + if (info.isFunction) + addDefinedFunctionSymbol(cast(info.symbol)); + else + addDefinedDataSymbol(info.symbol); + + _symbols.back().attributes &= ~LTO_SYMBOL_SCOPE_MASK; + _symbols.back().attributes |= scope; +} + +/// addAsmGlobalSymbolUndef - Add a global symbol from module-level ASM to the +/// undefined list. +void LTOModule::addAsmGlobalSymbolUndef(const char *name) { + StringMap::value_type &entry = + _undefines.GetOrCreateValue(name); + + _asm_undefines.push_back(entry.getKey().data()); + + // we already have the symbol + if (entry.getValue().name) + return; + + uint32_t attr = LTO_SYMBOL_DEFINITION_UNDEFINED;; + attr |= LTO_SYMBOL_SCOPE_DEFAULT; + NameAndAttributes info; + info.name = entry.getKey().data(); + info.attributes = attr; + info.isFunction = false; + info.symbol = 0; + + entry.setValue(info); +} + +/// addPotentialUndefinedSymbol - Add a symbol which isn't defined just yet to a +/// list to be resolved later. +void +LTOModule::addPotentialUndefinedSymbol(const GlobalValue *decl, bool isFunc) { + // ignore all llvm.* symbols + if (decl->getName().startswith("llvm.")) + return; + + // ignore all aliases + if (isa(decl)) + return; + + SmallString<64> name; + _mangler.getNameWithPrefix(name, decl, false); + + StringMap::value_type &entry = + _undefines.GetOrCreateValue(name); + + // we already have the symbol + if (entry.getValue().name) + return; + + NameAndAttributes info; + + info.name = entry.getKey().data(); + + if (decl->hasExternalWeakLinkage()) + info.attributes = LTO_SYMBOL_DEFINITION_WEAKUNDEF; + else + info.attributes = LTO_SYMBOL_DEFINITION_UNDEFINED; + + info.isFunction = isFunc; + info.symbol = decl; + + entry.setValue(info); +} + +namespace { + class RecordStreamer : public MCStreamer { + public: + enum State { NeverSeen, Global, Defined, DefinedGlobal, Used }; + + private: + StringMap Symbols; + + void markDefined(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Global: + S = DefinedGlobal; + break; + case NeverSeen: + case Defined: + case Used: + S = Defined; + break; + } + } + void markGlobal(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + S = DefinedGlobal; + break; + + case NeverSeen: + case Global: + case Used: + S = Global; + break; + } + } + void markUsed(const MCSymbol &Symbol) { + State &S = Symbols[Symbol.getName()]; + switch (S) { + case DefinedGlobal: + case Defined: + case Global: + break; + + case NeverSeen: + case Used: + S = Used; + break; + } + } + + // FIXME: mostly copied for the obj streamer. + void AddValueSymbols(const MCExpr *Value) { + switch (Value->getKind()) { + case MCExpr::Target: + // FIXME: What should we do in here? + break; + + case MCExpr::Constant: + break; + + case MCExpr::Binary: { + const MCBinaryExpr *BE = cast(Value); + AddValueSymbols(BE->getLHS()); + AddValueSymbols(BE->getRHS()); + break; + } + + case MCExpr::SymbolRef: + markUsed(cast(Value)->getSymbol()); + break; + + case MCExpr::Unary: + AddValueSymbols(cast(Value)->getSubExpr()); + break; + } + } + + public: + typedef StringMap::const_iterator const_iterator; + + const_iterator begin() { + return Symbols.begin(); + } + + const_iterator end() { + return Symbols.end(); + } + + RecordStreamer(MCContext &Context) : MCStreamer(Context, 0) {} + + virtual void EmitInstruction(const MCInst &Inst) { + // Scan for values. + for (unsigned i = Inst.getNumOperands(); i--; ) + if (Inst.getOperand(i).isExpr()) + AddValueSymbols(Inst.getOperand(i).getExpr()); + } + virtual void EmitLabel(MCSymbol *Symbol) { + Symbol->setSection(*getCurrentSection().first); + markDefined(*Symbol); + } + virtual void EmitDebugLabel(MCSymbol *Symbol) { + EmitLabel(Symbol); + } + virtual void EmitAssignment(MCSymbol *Symbol, const MCExpr *Value) { + // FIXME: should we handle aliases? + markDefined(*Symbol); + } + virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { + if (Attribute == MCSA_Global) + markGlobal(*Symbol); + return true; + } + virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size , unsigned ByteAlignment) { + markDefined(*Symbol); + } + virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) { + markDefined(*Symbol); + } + + virtual void EmitBundleAlignMode(unsigned AlignPow2) {} + virtual void EmitBundleLock(bool AlignToEnd) {} + virtual void EmitBundleUnlock() {} + + // Noop calls. + virtual void ChangeSection(const MCSection *Section, + const MCExpr *Subsection) {} + virtual void InitToTextSection() {} + virtual void InitSections() {} + virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) {} + virtual void EmitThumbFunc(MCSymbol *Func) {} + virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} + virtual void EmitWeakReference(MCSymbol *Alias, const MCSymbol *Symbol) {} + virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol) {} + virtual void EmitCOFFSymbolStorageClass(int StorageClass) {} + virtual void EmitCOFFSymbolType(int Type) {} + virtual void EndCOFFSymbolDef() {} + virtual void EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) {} + virtual void EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, + unsigned ByteAlignment) {} + virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, + uint64_t Size, unsigned ByteAlignment) {} + virtual void EmitBytes(StringRef Data) {} + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) {} + virtual void EmitULEB128Value(const MCExpr *Value) {} + virtual void EmitSLEB128Value(const MCExpr *Value) {} + virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, + unsigned ValueSize, + unsigned MaxBytesToEmit) {} + virtual void EmitCodeAlignment(unsigned ByteAlignment, + unsigned MaxBytesToEmit) {} + virtual bool EmitValueToOffset(const MCExpr *Offset, + unsigned char Value ) { return false; } + virtual void EmitFileDirective(StringRef Filename) {} + virtual void EmitDwarfAdvanceLineAddr(int64_t LineDelta, + const MCSymbol *LastLabel, + const MCSymbol *Label, + unsigned PointerSize) {} + virtual void FinishImpl() {} + virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { + RecordProcEnd(Frame); + } + }; +} // end anonymous namespace + +/// addAsmGlobalSymbols - Add global symbols from module-level ASM to the +/// defined or undefined lists. +bool LTOModule::addAsmGlobalSymbols(std::string &errMsg) { + const std::string &inlineAsm = _module->getModuleInlineAsm(); + if (inlineAsm.empty()) + return false; + + OwningPtr Streamer(new RecordStreamer(_context)); + MemoryBuffer *Buffer = MemoryBuffer::getMemBuffer(inlineAsm); + SourceMgr SrcMgr; + SrcMgr.AddNewSourceBuffer(Buffer, SMLoc()); + OwningPtr Parser(createMCAsmParser(SrcMgr, + _context, *Streamer, + *_target->getMCAsmInfo())); + const Target &T = _target->getTarget(); + OwningPtr MCII(T.createMCInstrInfo()); + OwningPtr + STI(T.createMCSubtargetInfo(_target->getTargetTriple(), + _target->getTargetCPU(), + _target->getTargetFeatureString())); + OwningPtr TAP(T.createMCAsmParser(*STI, *Parser.get(), *MCII)); + if (!TAP) { + errMsg = "target " + std::string(T.getName()) + + " does not define AsmParser."; + return true; + } + + Parser->setTargetParser(*TAP); + if (Parser->Run(false)) + return true; + + for (RecordStreamer::const_iterator i = Streamer->begin(), + e = Streamer->end(); i != e; ++i) { + StringRef Key = i->first(); + RecordStreamer::State Value = i->second; + if (Value == RecordStreamer::DefinedGlobal) + addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_DEFAULT); + else if (Value == RecordStreamer::Defined) + addAsmGlobalSymbol(Key.data(), LTO_SYMBOL_SCOPE_INTERNAL); + else if (Value == RecordStreamer::Global || + Value == RecordStreamer::Used) + addAsmGlobalSymbolUndef(Key.data()); + } + + return false; +} + +/// isDeclaration - Return 'true' if the global value is a declaration. +static bool isDeclaration(const GlobalValue &V) { + if (V.hasAvailableExternallyLinkage()) + return true; + + if (V.isMaterializable()) + return false; + + return V.isDeclaration(); +} + +/// parseSymbols - Parse the symbols from the module and model-level ASM and add +/// them to either the defined or undefined lists. +bool LTOModule::parseSymbols(std::string &errMsg) { + // add functions + for (Module::iterator f = _module->begin(), e = _module->end(); f != e; ++f) { + if (isDeclaration(*f)) + addPotentialUndefinedSymbol(f, true); + else + addDefinedFunctionSymbol(f); + } + + // add data + for (Module::global_iterator v = _module->global_begin(), + e = _module->global_end(); v != e; ++v) { + if (isDeclaration(*v)) + addPotentialUndefinedSymbol(v, false); + else + addDefinedDataSymbol(v); + } + + // add asm globals + if (addAsmGlobalSymbols(errMsg)) + return true; + + // add aliases + for (Module::alias_iterator a = _module->alias_begin(), + e = _module->alias_end(); a != e; ++a) { + if (isDeclaration(*a->getAliasedGlobal())) + // Is an alias to a declaration. + addPotentialUndefinedSymbol(a, false); + else + addDefinedDataSymbol(a); + } + + // make symbols for all undefines + for (StringMap::iterator u =_undefines.begin(), + e = _undefines.end(); u != e; ++u) { + // If this symbol also has a definition, then don't make an undefine because + // it is a tentative definition. + if (_defines.count(u->getKey())) continue; + NameAndAttributes info = u->getValue(); + _symbols.push_back(info); + } + + return false; +} diff --git a/contrib/llvm/lib/Linker/LinkModules.cpp b/contrib/llvm/lib/Linker/LinkModules.cpp index d2e13c9..8f2200e 100644 --- a/contrib/llvm/lib/Linker/LinkModules.cpp +++ b/contrib/llvm/lib/Linker/LinkModules.cpp @@ -22,6 +22,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/Cloning.h" +#include using namespace llvm; //===----------------------------------------------------------------------===// @@ -353,12 +354,32 @@ Type *TypeMapTy::getImpl(Type *Ty) { //===----------------------------------------------------------------------===// namespace { + class ModuleLinker; + + /// ValueMaterializerTy - Creates prototypes for functions that are lazily + /// linked on the fly. This speeds up linking for modules with many + /// lazily linked functions of which few get used. + class ValueMaterializerTy : public ValueMaterializer { + TypeMapTy &TypeMap; + Module *DstM; + std::vector &LazilyLinkFunctions; + public: + ValueMaterializerTy(TypeMapTy &TypeMap, Module *DstM, + std::vector &LazilyLinkFunctions) : + ValueMaterializer(), TypeMap(TypeMap), DstM(DstM), + LazilyLinkFunctions(LazilyLinkFunctions) { + } + + virtual Value *materializeValueFor(Value *V); + }; + /// ModuleLinker - This is an implementation class for the LinkModules /// function, which is the entrypoint for this file. class ModuleLinker { Module *DstM, *SrcM; TypeMapTy TypeMap; + ValueMaterializerTy ValMaterializer; /// ValueMap - Mapping of values from what they used to be in Src, to what /// they are now in DstM. ValueToValueMapTy is a ValueMap, which involves @@ -386,7 +407,9 @@ namespace { std::string ErrorMsg; ModuleLinker(Module *dstM, TypeSet &Set, Module *srcM, unsigned mode) - : DstM(dstM), SrcM(srcM), TypeMap(Set), Mode(mode) { } + : DstM(dstM), SrcM(srcM), TypeMap(Set), + ValMaterializer(TypeMap, DstM, LazilyLinkFunctions), + Mode(mode) { } bool run(); @@ -487,6 +510,20 @@ static bool isLessConstraining(GlobalValue::VisibilityTypes a, return false; } +Value *ValueMaterializerTy::materializeValueFor(Value *V) { + Function *SF = dyn_cast(V); + if (!SF) + return NULL; + + Function *DF = Function::Create(TypeMap.get(SF->getFunctionType()), + SF->getLinkage(), SF->getName(), DstM); + copyGVAttributes(DF, SF); + + LazilyLinkFunctions.push_back(SF); + return DF; +} + + /// getLinkageResult - This analyzes the two global values and determines what /// the result will look like in the destination module. In particular, it /// computes the resultant linkage type and visibility, computes whether the @@ -668,7 +705,11 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, if (DstGV->getVisibility() != SrcGV->getVisibility()) return emitError( "Appending variables with different visibility need to be linked!"); - + + if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr()) + return emitError( + "Appending variables with different unnamed_addr need to be linked!"); + if (DstGV->getSection() != SrcGV->getSection()) return emitError( "Appending variables with different section name need to be linked!"); @@ -710,6 +751,7 @@ bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { GlobalValue *DGV = getLinkedToGlobal(SGV); llvm::Optional NewVisibility; + bool HasUnnamedAddr = SGV->hasUnnamedAddr(); if (DGV) { // Concatenation of appending linkage variables is magic and handled later. @@ -724,6 +766,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { if (getLinkageResult(DGV, SGV, NewLinkage, NV, LinkFromSrc)) return true; NewVisibility = NV; + HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr(); // If we're not linking from the source, then keep the definition that we // have. @@ -732,10 +775,11 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { if (GlobalVariable *DGVar = dyn_cast(DGV)) if (DGVar->isDeclaration() && SGV->isConstant() && !DGVar->isConstant()) DGVar->setConstant(true); - - // Set calculated linkage and visibility. + + // Set calculated linkage, visibility and unnamed_addr. DGV->setLinkage(NewLinkage); DGV->setVisibility(*NewVisibility); + DGV->setUnnamedAddr(HasUnnamedAddr); // Make sure to remember this mapping. ValueMap[SGV] = ConstantExpr::getBitCast(DGV,TypeMap.get(SGV->getType())); @@ -761,6 +805,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { copyGVAttributes(NewDGV, SGV); if (NewVisibility) NewDGV->setVisibility(*NewVisibility); + NewDGV->setUnnamedAddr(HasUnnamedAddr); if (DGV) { DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDGV, DGV->getType())); @@ -777,6 +822,7 @@ bool ModuleLinker::linkGlobalProto(GlobalVariable *SGV) { bool ModuleLinker::linkFunctionProto(Function *SF) { GlobalValue *DGV = getLinkedToGlobal(SF); llvm::Optional NewVisibility; + bool HasUnnamedAddr = SF->hasUnnamedAddr(); if (DGV) { GlobalValue::LinkageTypes NewLinkage = GlobalValue::InternalLinkage; @@ -785,11 +831,13 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { if (getLinkageResult(DGV, SF, NewLinkage, NV, LinkFromSrc)) return true; NewVisibility = NV; + HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr(); if (!LinkFromSrc) { // Set calculated linkage DGV->setLinkage(NewLinkage); DGV->setVisibility(*NewVisibility); + DGV->setUnnamedAddr(HasUnnamedAddr); // Make sure to remember this mapping. ValueMap[SF] = ConstantExpr::getBitCast(DGV, TypeMap.get(SF->getType())); @@ -802,6 +850,14 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { } } + // If the function is to be lazily linked, don't create it just yet. + // The ValueMaterializerTy will deal with creating it if it's used. + if (!DGV && (SF->hasLocalLinkage() || SF->hasLinkOnceLinkage() || + SF->hasAvailableExternallyLinkage())) { + DoNotLinkFromSource.insert(SF); + return false; + } + // If there is no linkage to be performed or we are linking from the source, // bring SF over. Function *NewDF = Function::Create(TypeMap.get(SF->getFunctionType()), @@ -809,18 +865,12 @@ bool ModuleLinker::linkFunctionProto(Function *SF) { copyGVAttributes(NewDF, SF); if (NewVisibility) NewDF->setVisibility(*NewVisibility); + NewDF->setUnnamedAddr(HasUnnamedAddr); if (DGV) { // Any uses of DF need to change to NewDF, with cast. DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewDF, DGV->getType())); DGV->eraseFromParent(); - } else { - // Internal, LO_ODR, or LO linkage - stick in set to ignore and lazily link. - if (SF->hasLocalLinkage() || SF->hasLinkOnceLinkage() || - SF->hasAvailableExternallyLinkage()) { - DoNotLinkFromSource.insert(SF); - LazilyLinkFunctions.push_back(SF); - } } ValueMap[SF] = NewDF; @@ -887,7 +937,7 @@ void ModuleLinker::linkAppendingVarInit(const AppendingVarInfo &AVI) { SmallVector Elements; getArrayElements(AVI.DstInit, Elements); - Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap); + Constant *SrcInit = MapValue(AVI.SrcInit, ValueMap, RF_None, &TypeMap, &ValMaterializer); getArrayElements(SrcInit, Elements); ArrayType *NewType = cast(AVI.NewGV->getType()->getElementType()); @@ -908,7 +958,7 @@ void ModuleLinker::linkGlobalInits() { GlobalVariable *DGV = cast(ValueMap[I]); // Figure out what the initializer looks like in the dest module. DGV->setInitializer(MapValue(I->getInitializer(), ValueMap, - RF_None, &TypeMap)); + RF_None, &TypeMap, &ValMaterializer)); } } @@ -938,12 +988,14 @@ void ModuleLinker::linkFunctionBody(Function *Dst, Function *Src) { // functions and patch them up to point to the local versions. for (Function::iterator BB = Dst->begin(), BE = Dst->end(); BB != BE; ++BB) for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) - RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, &TypeMap); + RemapInstruction(I, ValueMap, RF_IgnoreMissingEntries, + &TypeMap, &ValMaterializer); } else { // Clone the body of the function into the dest function. SmallVector Returns; // Ignore returns. - CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", NULL, &TypeMap); + CloneFunctionInto(Dst, Src, ValueMap, false, Returns, "", NULL, + &TypeMap, &ValMaterializer); } // There is no need to map the arguments anymore. @@ -961,7 +1013,8 @@ void ModuleLinker::linkAliasBodies() { continue; if (Constant *Aliasee = I->getAliasee()) { GlobalAlias *DA = cast(ValueMap[I]); - DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None, &TypeMap)); + DA->setAliasee(MapValue(Aliasee, ValueMap, RF_None, + &TypeMap, &ValMaterializer)); } } } @@ -978,7 +1031,7 @@ void ModuleLinker::linkNamedMDNodes() { // Add Src elements into Dest node. for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) DestNMD->addOperand(MapValue(I->getOperand(i), ValueMap, - RF_None, &TypeMap)); + RF_None, &TypeMap, &ValMaterializer)); } } @@ -1208,6 +1261,13 @@ bool ModuleLinker::run() { // Skip if not linking from source. if (DoNotLinkFromSource.count(SF)) continue; + Function *DF = cast(ValueMap[SF]); + if (SF->hasPrefixData()) { + // Link in the prefix data. + DF->setPrefixData(MapValue( + SF->getPrefixData(), ValueMap, RF_None, &TypeMap, &ValMaterializer)); + } + // Skip if no body (function is external) or materialize. if (SF->isDeclaration()) { if (!SF->isMaterializable()) @@ -1216,7 +1276,7 @@ bool ModuleLinker::run() { return true; } - linkFunctionBody(cast(ValueMap[SF]), SF); + linkFunctionBody(DF, SF); SF->Dematerialize(); } @@ -1238,49 +1298,44 @@ bool ModuleLinker::run() { LinkedInAnyFunctions = false; for(std::vector::iterator I = LazilyLinkFunctions.begin(), - E = LazilyLinkFunctions.end(); I != E; ++I) { - if (!*I) - continue; - + E = LazilyLinkFunctions.end(); I != E; ++I) { Function *SF = *I; + if (!SF) + continue; + Function *DF = cast(ValueMap[SF]); - - if (!DF->use_empty()) { - - // Materialize if necessary. - if (SF->isDeclaration()) { - if (!SF->isMaterializable()) - continue; - if (SF->Materialize(&ErrorMsg)) - return true; - } - - // Link in function body. - linkFunctionBody(DF, SF); - SF->Dematerialize(); + if (SF->hasPrefixData()) { + // Link in the prefix data. + DF->setPrefixData(MapValue(SF->getPrefixData(), + ValueMap, + RF_None, + &TypeMap, + &ValMaterializer)); + } - // "Remove" from vector by setting the element to 0. - *I = 0; - - // Set flag to indicate we may have more functions to lazily link in - // since we linked in a function. - LinkedInAnyFunctions = true; + // Materialize if necessary. + if (SF->isDeclaration()) { + if (!SF->isMaterializable()) + continue; + if (SF->Materialize(&ErrorMsg)) + return true; } + + // Erase from vector *before* the function body is linked - linkFunctionBody could + // invalidate I. + LazilyLinkFunctions.erase(I); + + // Link in function body. + linkFunctionBody(DF, SF); + SF->Dematerialize(); + + // Set flag to indicate we may have more functions to lazily link in + // since we linked in a function. + LinkedInAnyFunctions = true; + break; } } while (LinkedInAnyFunctions); - // Remove any prototypes of functions that were not actually linked in. - for(std::vector::iterator I = LazilyLinkFunctions.begin(), - E = LazilyLinkFunctions.end(); I != E; ++I) { - if (!*I) - continue; - - Function *SF = *I; - Function *DF = cast(ValueMap[SF]); - if (DF->use_empty()) - DF->eraseFromParent(); - } - // Now that all of the types from the source are used, resolve any structs // copied over to the dest that didn't exist there. TypeMap.linkDefinedTypeBodies(); @@ -1297,6 +1352,11 @@ Linker::Linker(Module *M) : Composite(M) { Linker::~Linker() { } +void Linker::deleteModule() { + delete Composite; + Composite = NULL; +} + bool Linker::linkInModule(Module *Src, unsigned Mode, std::string *ErrorMsg) { ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src, Mode); if (TheLinker.run()) { diff --git a/contrib/llvm/lib/MC/ELFObjectWriter.cpp b/contrib/llvm/lib/MC/ELFObjectWriter.cpp index 3d99548..9899bb2 100644 --- a/contrib/llvm/lib/MC/ELFObjectWriter.cpp +++ b/contrib/llvm/lib/MC/ELFObjectWriter.cpp @@ -73,10 +73,6 @@ class ELFObjectWriter : public MCObjectWriter { // Support lexicographic sorting. bool operator<(const ELFSymbolData &RHS) const { - if (MCELF::GetType(*SymbolData) == ELF::STT_FILE) - return true; - if (MCELF::GetType(*RHS.SymbolData) == ELF::STT_FILE) - return false; return SymbolData->getSymbol().getName() < RHS.SymbolData->getSymbol().getName(); } @@ -98,6 +94,7 @@ class ELFObjectWriter : public MCObjectWriter { /// @{ SmallString<256> StringTable; + std::vector FileSymbolData; std::vector LocalSymbolData; std::vector ExternalSymbolData; std::vector UndefinedSymbolData; @@ -551,7 +548,7 @@ void ELFObjectWriter::WriteSymbol(MCDataFragment *SymtabF, uint8_t Type = MCELF::GetType(Data); uint8_t Info = (Binding << ELF_STB_Shift) | (Type << ELF_STT_Shift); - // Other and Visibility share the same byte with Visability using the lower + // Other and Visibility share the same byte with Visibility using the lower // 2 bits uint8_t Visibility = MCELF::GetVisibility(OrigData); uint8_t Other = MCELF::getOther(OrigData) << @@ -590,8 +587,15 @@ void ELFObjectWriter::WriteSymbolTable(MCDataFragment *SymtabF, // The first entry is the undefined symbol entry. WriteSymbolEntry(SymtabF, ShndxF, 0, 0, 0, 0, 0, 0, false); + for (unsigned i = 0, e = FileSymbolData.size(); i != e; ++i) { + WriteSymbolEntry(SymtabF, ShndxF, FileSymbolData[i], + ELF::STT_FILE | ELF::STB_LOCAL, 0, 0, + ELF::STV_DEFAULT, ELF::SHN_ABS, true); + } + // Write the symbol table entries. - LastLocalSymbolIndex = LocalSymbolData.size() + 1; + LastLocalSymbolIndex = FileSymbolData.size() + LocalSymbolData.size() + 1; + for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) { ELFSymbolData &MSD = LocalSymbolData[i]; WriteSymbol(SymtabF, ShndxF, MSD, Layout); @@ -759,9 +763,6 @@ void ELFObjectWriter::RecordRelocation(const MCAssembler &Asm, uint64_t RelocOffset = Layout.getFragmentOffset(Fragment) + Fixup.getOffset(); - // FIXME: no tests cover this. Is adjustFixupOffset dead code? - TargetObjectWriter->adjustFixupOffset(Fixup, RelocOffset); - if (!hasRelocationAddend()) Addend = 0; @@ -883,6 +884,20 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm, // FIXME: We could optimize suffixes in strtab in the same way we // optimize them in shstrtab. + for (MCAssembler::const_file_name_iterator it = Asm.file_names_begin(), + ie = Asm.file_names_end(); + it != ie; + ++it) { + StringRef Name = *it; + uint64_t &Entry = StringIndexMap[Name]; + if (!Entry) { + Entry = StringTable.size(); + StringTable += Name; + StringTable += '\x00'; + } + FileSymbolData.push_back(Entry); + } + // Add the data for the symbols. for (MCAssembler::symbol_iterator it = Asm.symbol_begin(), ie = Asm.symbol_end(); it != ie; ++it) { @@ -967,7 +982,7 @@ void ELFObjectWriter::ComputeSymbolTable(MCAssembler &Asm, // Set the symbol indices. Local symbols must come before all other // symbols with non-local bindings. - unsigned Index = 1; + unsigned Index = FileSymbolData.size() + 1; for (unsigned i = 0, e = LocalSymbolData.size(); i != e; ++i) LocalSymbolData[i].SymbolData->setIndex(Index++); @@ -1005,11 +1020,18 @@ void ELFObjectWriter::CreateRelocationSections(MCAssembler &Asm, else EntrySize = is64Bit() ? sizeof(ELF::Elf64_Rel) : sizeof(ELF::Elf32_Rel); + unsigned Flags = 0; + StringRef Group = ""; + if (Section.getFlags() & ELF::SHF_GROUP) { + Flags = ELF::SHF_GROUP; + Group = Section.getGroup()->getName(); + } + const MCSectionELF *RelaSection = Ctx.getELFSection(RelaSectionName, hasRelocationAddend() ? - ELF::SHT_RELA : ELF::SHT_REL, 0, + ELF::SHT_RELA : ELF::SHT_REL, Flags, SectionKind::getReadOnly(), - EntrySize, ""); + EntrySize, Group); RelMap[&Section] = RelaSection; Asm.getOrCreateSectionData(*RelaSection); } @@ -1069,7 +1091,7 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm, else if (entry.Index < 0) entry.Index = getSymbolIndexInSymbolTable(Asm, entry.Symbol); else - entry.Index += LocalSymbolData.size(); + entry.Index += FileSymbolData.size() + LocalSymbolData.size(); if (is64Bit()) { String64(*F, entry.r_offset); if (TargetObjectWriter->isN64()) { @@ -1100,11 +1122,10 @@ void ELFObjectWriter::WriteRelocationsFragment(const MCAssembler &Asm, } } -static int compareBySuffix(const void *a, const void *b) { - const MCSectionELF *secA = *static_cast(a); - const MCSectionELF *secB = *static_cast(b); - const StringRef &NameA = secA->getSectionName(); - const StringRef &NameB = secB->getSectionName(); +static int compareBySuffix(const MCSectionELF *const *a, + const MCSectionELF *const *b) { + const StringRef &NameA = (*a)->getSectionName(); + const StringRef &NameB = (*b)->getSectionName(); const unsigned sizeA = NameA.size(); const unsigned sizeB = NameB.size(); const unsigned len = std::min(sizeA, sizeB); @@ -1295,10 +1316,12 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, // Remove ".rel" and ".rela" prefixes. unsigned SecNameLen = (Section.getType() == ELF::SHT_REL) ? 4 : 5; StringRef SectionName = Section.getSectionName().substr(SecNameLen); + StringRef GroupName = + Section.getGroup() ? Section.getGroup()->getName() : ""; - InfoSection = Asm.getContext().getELFSection(SectionName, - ELF::SHT_PROGBITS, 0, - SectionKind::getReadOnly()); + InfoSection = Asm.getContext().getELFSection(SectionName, ELF::SHT_PROGBITS, + 0, SectionKind::getReadOnly(), + 0, GroupName); sh_info = SectionIndexMap.lookup(InfoSection); break; } @@ -1348,11 +1371,12 @@ void ELFObjectWriter::WriteSection(MCAssembler &Asm, ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, SectionKind::getText())); } else if (SecName.startswith(".ARM.exidx")) { - sh_link = SectionIndexMap.lookup( - Asm.getContext().getELFSection(SecName.substr(sizeof(".ARM.exidx") - 1), - ELF::SHT_PROGBITS, - ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, - SectionKind::getText())); + StringRef GroupName = + Section.getGroup() ? Section.getGroup()->getName() : ""; + sh_link = SectionIndexMap.lookup(Asm.getContext().getELFSection( + SecName.substr(sizeof(".ARM.exidx") - 1), ELF::SHT_PROGBITS, + ELF::SHF_EXECINSTR | ELF::SHF_ALLOC, SectionKind::getText(), 0, + GroupName)); } } diff --git a/contrib/llvm/lib/MC/MCAsmBackend.cpp b/contrib/llvm/lib/MC/MCAsmBackend.cpp index 53960e7..c4c98cc 100644 --- a/contrib/llvm/lib/MC/MCAsmBackend.cpp +++ b/contrib/llvm/lib/MC/MCAsmBackend.cpp @@ -8,6 +8,7 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCAsmBackend.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCFixupKindInfo.h" using namespace llvm; @@ -37,7 +38,6 @@ MCAsmBackend::getFixupKindInfo(MCFixupKind Kind) const { { "FK_SecRel_8", 0, 64, 0 } }; - assert((size_t)Kind <= sizeof(Builtins) / sizeof(Builtins[0]) && - "Unknown fixup kind"); + assert((size_t)Kind <= array_lengthof(Builtins) && "Unknown fixup kind"); return Builtins[Kind]; } diff --git a/contrib/llvm/lib/MC/MCAsmInfo.cpp b/contrib/llvm/lib/MC/MCAsmInfo.cpp index 9e60884..28f1c95 100644 --- a/contrib/llvm/lib/MC/MCAsmInfo.cpp +++ b/contrib/llvm/lib/MC/MCAsmInfo.cpp @@ -34,7 +34,8 @@ MCAsmInfo::MCAsmInfo() { HasStaticCtorDtorReferenceInStaticMode = false; LinkerRequiresNonEmptyDwarfLines = false; MaxInstLength = 4; - PCSymbol = "$"; + MinInstAlignment = 1; + DollarIsPC = false; SeparatorString = ";"; CommentColumn = 40; CommentString = "#"; @@ -49,10 +50,7 @@ MCAsmInfo::MCAsmInfo() { Code32Directive = ".code32"; Code64Directive = ".code64"; AssemblerDialect = 0; - AllowQuotesInName = false; - AllowNameToStartWithDigit = false; - AllowPeriodsInName = true; - AllowUTF8 = true; + AllowAtInName = false; UseDataRegionDirectives = false; ZeroDirective = "\t.zero\t"; AsciiDirective = "\t.ascii\t"; @@ -75,8 +73,8 @@ MCAsmInfo::MCAsmInfo() { LCOMMDirectiveAlignmentType = LCOMM::NoAlignment; HasDotTypeDotSizeDirective = true; HasSingleParameterDotFile = true; + HasIdentDirective = false; HasNoDeadStrip = false; - HasSymbolResolver = false; WeakRefDirective = 0; WeakDefDirective = 0; LinkOnceDirective = 0; @@ -86,7 +84,6 @@ MCAsmInfo::MCAsmInfo() { HasLEB128 = false; SupportsDebugInformation = false; ExceptionsType = ExceptionHandling::None; - DwarfUsesInlineInfoSection = false; DwarfUsesRelocationsAcrossSections = true; DwarfRegNumForCFI = false; HasMicrosoftFastStdCallMangling = false; @@ -97,7 +94,7 @@ MCAsmInfo::~MCAsmInfo() { } -unsigned MCAsmInfo::getULEB128Size(unsigned Value) { +unsigned MCAsmInfo::getULEB128Size(uint64_t Value) { unsigned Size = 0; do { Value >>= 7; @@ -106,7 +103,7 @@ unsigned MCAsmInfo::getULEB128Size(unsigned Value) { return Size; } -unsigned MCAsmInfo::getSLEB128Size(int Value) { +unsigned MCAsmInfo::getSLEB128Size(int64_t Value) { unsigned Size = 0; int Sign = Value >> (8 * sizeof(Value) - 1); bool IsMore; diff --git a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp index 33350d9..9d9f98e 100644 --- a/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp +++ b/contrib/llvm/lib/MC/MCAsmInfoCOFF.cpp @@ -43,7 +43,6 @@ MCAsmInfoCOFF::MCAsmInfoCOFF() { void MCAsmInfoMicrosoft::anchor() { } MCAsmInfoMicrosoft::MCAsmInfoMicrosoft() { - AllowQuotesInName = true; } void MCAsmInfoGNUCOFF::anchor() { } diff --git a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp index a0e3eba..704c816 100644 --- a/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp +++ b/contrib/llvm/lib/MC/MCAsmInfoDarwin.cpp @@ -26,7 +26,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { GlobalPrefix = "_"; PrivateGlobalPrefix = "L"; LinkerPrivateGlobalPrefix = "l"; - AllowQuotesInName = true; HasSingleParameterDotFile = false; HasSubsectionsViaSymbols = true; @@ -58,7 +57,6 @@ MCAsmInfoDarwin::MCAsmInfoDarwin() { HasDotTypeDotSizeDirective = false; HasNoDeadStrip = true; - HasSymbolResolver = true; DwarfUsesRelocationsAcrossSections = false; } diff --git a/contrib/llvm/lib/MC/MCAsmInfoELF.cpp b/contrib/llvm/lib/MC/MCAsmInfoELF.cpp new file mode 100644 index 0000000..8cf4e4f --- /dev/null +++ b/contrib/llvm/lib/MC/MCAsmInfoELF.cpp @@ -0,0 +1,23 @@ +//===-- MCAsmInfoELF.cpp - ELF asm properties -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines target asm properties related what form asm statements +// should take in general on ELF-based targets +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCAsmInfoELF.h" +using namespace llvm; + +void MCAsmInfoELF::anchor() { } + +MCAsmInfoELF::MCAsmInfoELF() { + HasIdentDirective = true; + WeakRefDirective = "\t.weak\t"; +} diff --git a/contrib/llvm/lib/MC/MCAsmStreamer.cpp b/contrib/llvm/lib/MC/MCAsmStreamer.cpp index 9e86785..ca49f8f 100644 --- a/contrib/llvm/lib/MC/MCAsmStreamer.cpp +++ b/contrib/llvm/lib/MC/MCAsmStreamer.cpp @@ -25,11 +25,12 @@ #include "llvm/MC/MCSectionCOFF.h" #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/PathV2.h" +#include "llvm/Support/Path.h" #include using namespace llvm; @@ -38,7 +39,7 @@ namespace { class MCAsmStreamer : public MCStreamer { protected: formatted_raw_ostream &OS; - const MCAsmInfo &MAI; + const MCAsmInfo *MAI; private: OwningPtr InstPrinter; OwningPtr Emitter; @@ -65,17 +66,15 @@ private: virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame); public: - MCAsmStreamer(MCContext &Context, formatted_raw_ostream &os, - bool isVerboseAsm, bool useLoc, bool useCFI, - bool useDwarfDirectory, - MCInstPrinter *printer, MCCodeEmitter *emitter, - MCAsmBackend *asmbackend, - bool showInst) - : MCStreamer(SK_AsmStreamer, Context), OS(os), MAI(Context.getAsmInfo()), - InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend), - CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm), - ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI), - UseDwarfDirectory(useDwarfDirectory) { + MCAsmStreamer(MCContext &Context, MCTargetStreamer *TargetStreamer, + formatted_raw_ostream &os, bool isVerboseAsm, bool useLoc, + bool useCFI, bool useDwarfDirectory, MCInstPrinter *printer, + MCCodeEmitter *emitter, MCAsmBackend *asmbackend, bool showInst) + : MCStreamer(Context, TargetStreamer), OS(os), MAI(Context.getAsmInfo()), + InstPrinter(printer), Emitter(emitter), AsmBackend(asmbackend), + CommentStream(CommentToEmit), IsVerboseAsm(isVerboseAsm), + ShowInst(showInst), UseLoc(useLoc), UseCFI(useCFI), + UseDwarfDirectory(useDwarfDirectory) { if (InstPrinter && IsVerboseAsm) InstPrinter->setCommentStream(CommentStream); } @@ -154,7 +153,7 @@ public: virtual void EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, const MCSymbol *Label); - virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); virtual void BeginCOFFSymbolDef(const MCSymbol *Symbol); @@ -180,12 +179,10 @@ public: virtual void EmitTBSSSymbol (const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment = 0); - virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitBytes(StringRef Data); - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace); - virtual void EmitIntValue(uint64_t Value, unsigned Size, - unsigned AddrSpace = 0); + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size); + virtual void EmitIntValue(uint64_t Value, unsigned Size); virtual void EmitULEB128Value(const MCExpr *Value); @@ -196,8 +193,7 @@ public: virtual void EmitGPRel32Value(const MCExpr *Value); - virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue, - unsigned AddrSpace); + virtual void EmitFill(uint64_t NumBytes, uint8_t FillValue); virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, unsigned ValueSize = 1, @@ -217,6 +213,7 @@ public: unsigned Isa, unsigned Discriminator, StringRef FileName); + virtual void EmitIdent(StringRef IdentString); virtual void EmitCFISections(bool EH, bool Debug); virtual void EmitCFIDefCfa(int64_t Register, int64_t Offset); virtual void EmitCFIDefCfaOffset(int64_t Offset); @@ -232,6 +229,7 @@ public: virtual void EmitCFISignalFrame(); virtual void EmitCFIUndefined(int64_t Register); virtual void EmitCFIRegister(int64_t Register1, int64_t Register2); + virtual void EmitCFIWindowSave(); virtual void EmitWin64EHStartProc(const MCSymbol *Symbol); virtual void EmitWin64EHEndProc(); @@ -248,17 +246,6 @@ public: virtual void EmitWin64EHPushFrame(bool Code); virtual void EmitWin64EHEndProlog(); - virtual void EmitFnStart(); - virtual void EmitFnEnd(); - virtual void EmitCantUnwind(); - virtual void EmitPersonality(const MCSymbol *Personality); - virtual void EmitHandlerData(); - virtual void EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset = 0); - virtual void EmitPad(int64_t Offset); - virtual void EmitRegSave(const SmallVectorImpl &RegList, bool); - - virtual void EmitTCEntry(const MCSymbol &S); - virtual void EmitInstruction(const MCInst &Inst); virtual void EmitBundleAlignMode(unsigned AlignPow2); @@ -268,15 +255,9 @@ public: /// EmitRawText - If this file is backed by an assembly streamer, this dumps /// the specified string in the output .s file. This capability is /// indicated by the hasRawTextSupport() predicate. - virtual void EmitRawText(StringRef String); + virtual void EmitRawTextImpl(StringRef String); virtual void FinishImpl(); - - /// @} - - static bool classof(const MCStreamer *S) { - return S->getKind() == SK_AsmStreamer; - } }; } // end anonymous namespace. @@ -312,9 +293,9 @@ void MCAsmStreamer::EmitCommentsAndEOL() { "Comment array not newline terminated"); do { // Emit a line of comments. - OS.PadToColumn(MAI.getCommentColumn()); + OS.PadToColumn(MAI->getCommentColumn()); size_t Position = Comments.find('\n'); - OS << MAI.getCommentString() << ' ' << Comments.substr(0, Position) << '\n'; + OS << MAI->getCommentString() << ' ' << Comments.substr(0, Position) <<'\n'; Comments = Comments.substr(Position+1); } while (!Comments.empty()); @@ -332,7 +313,7 @@ static inline int64_t truncateToSize(int64_t Value, unsigned Bytes) { void MCAsmStreamer::ChangeSection(const MCSection *Section, const MCExpr *Subsection) { assert(Section && "Cannot switch to a null section!"); - Section->PrintSwitchToSection(MAI, OS, Subsection); + Section->PrintSwitchToSection(*MAI, OS, Subsection); } void MCAsmStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, @@ -354,7 +335,7 @@ void MCAsmStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); MCStreamer::EmitLabel(Symbol); - OS << *Symbol << MAI.getLabelSuffix(); + OS << *Symbol << MAI->getLabelSuffix(); EmitEOL(); } @@ -362,7 +343,7 @@ void MCAsmStreamer::EmitDebugLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); MCStreamer::EmitDebugLabel(Symbol); - OS << *Symbol << MAI.getDebugLabelSuffix(); + OS << *Symbol << MAI->getDebugLabelSuffix(); EmitEOL(); } @@ -370,9 +351,9 @@ void MCAsmStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { switch (Flag) { case MCAF_SyntaxUnified: OS << "\t.syntax unified"; break; case MCAF_SubsectionsViaSymbols: OS << ".subsections_via_symbols"; break; - case MCAF_Code16: OS << '\t'<< MAI.getCode16Directive(); break; - case MCAF_Code32: OS << '\t'<< MAI.getCode32Directive(); break; - case MCAF_Code64: OS << '\t'<< MAI.getCode64Directive(); break; + case MCAF_Code16: OS << '\t'<< MAI->getCode16Directive();break; + case MCAF_Code32: OS << '\t'<< MAI->getCode32Directive();break; + case MCAF_Code64: OS << '\t'<< MAI->getCode64Directive();break; } EmitEOL(); } @@ -388,9 +369,7 @@ void MCAsmStreamer::EmitLinkerOptions(ArrayRef Options) { } void MCAsmStreamer::EmitDataRegion(MCDataRegionType Kind) { - MCContext &Ctx = getContext(); - const MCAsmInfo &MAI = Ctx.getAsmInfo(); - if (!MAI.doesSupportDataRegionDirectives()) + if (!MAI->doesSupportDataRegionDirectives()) return; switch (Kind) { case MCDR_DataRegion: OS << "\t.data_region"; break; @@ -407,7 +386,7 @@ void MCAsmStreamer::EmitThumbFunc(MCSymbol *Func) { // MCSymbols when they have spaces in them. OS << "\t.thumb_func"; // Only Mach-O hasSubsectionsViaSymbols() - if (MAI.hasSubsectionsViaSymbols()) + if (MAI->hasSubsectionsViaSymbols()) OS << '\t' << *Func; EmitEOL(); } @@ -441,7 +420,7 @@ void MCAsmStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, } -void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, +bool MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { switch (Attribute) { case MCSA_Invalid: llvm_unreachable("Invalid symbol attribute"); @@ -452,11 +431,12 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_ELF_TypeCommon: /// .type _foo, STT_COMMON # aka @common case MCSA_ELF_TypeNoType: /// .type _foo, STT_NOTYPE # aka @notype case MCSA_ELF_TypeGnuUniqueObject: /// .type _foo, @gnu_unique_object - assert(MAI.hasDotTypeDotSizeDirective() && "Symbol Attr not supported"); + if (!MAI->hasDotTypeDotSizeDirective()) + return false; // Symbol attribute not supported OS << "\t.type\t" << *Symbol << ',' - << ((MAI.getCommentString()[0] != '@') ? '@' : '%'); + << ((MAI->getCommentString()[0] != '@') ? '@' : '%'); switch (Attribute) { - default: llvm_unreachable("Unknown ELF .type"); + default: return false; case MCSA_ELF_TypeFunction: OS << "function"; break; case MCSA_ELF_TypeIndFunction: OS << "gnu_indirect_function"; break; case MCSA_ELF_TypeObject: OS << "object"; break; @@ -466,9 +446,9 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_ELF_TypeGnuUniqueObject: OS << "gnu_unique_object"; break; } EmitEOL(); - return; + return true; case MCSA_Global: // .globl/.global - OS << MAI.getGlobalDirective(); + OS << MAI->getGlobalDirective(); FlagMap[Symbol] |= EHGlobal; break; case MCSA_Hidden: OS << "\t.hidden\t"; break; @@ -490,12 +470,14 @@ void MCAsmStreamer::EmitSymbolAttribute(MCSymbol *Symbol, FlagMap[Symbol] |= EHWeakDefinition; break; // .weak_reference - case MCSA_WeakReference: OS << MAI.getWeakRefDirective(); break; + case MCSA_WeakReference: OS << MAI->getWeakRefDirective(); break; case MCSA_WeakDefAutoPrivate: OS << "\t.weak_def_can_be_hidden\t"; break; } OS << *Symbol; EmitEOL(); + + return true; } void MCAsmStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { @@ -529,15 +511,18 @@ void MCAsmStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { } void MCAsmStreamer::EmitELFSize(MCSymbol *Symbol, const MCExpr *Value) { - assert(MAI.hasDotTypeDotSizeDirective()); + assert(MAI->hasDotTypeDotSizeDirective()); OS << "\t.size\t" << *Symbol << ", " << *Value << '\n'; } void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { + // Common symbols do not belong to any actual section. + AssignSection(Symbol, NULL); + OS << "\t.comm\t" << *Symbol << ',' << Size; if (ByteAlignment != 0) { - if (MAI.getCOMMDirectiveAlignmentIsInBytes()) + if (MAI->getCOMMDirectiveAlignmentIsInBytes()) OS << ',' << ByteAlignment; else OS << ',' << Log2_32(ByteAlignment); @@ -551,9 +536,12 @@ void MCAsmStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, /// @param Size - The size of the common symbol. void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlign) { + // Common symbols do not belong to any actual section. + AssignSection(Symbol, NULL); + OS << "\t.lcomm\t" << *Symbol << ',' << Size; if (ByteAlign > 1) { - switch (MAI.getLCOMMDirectiveAlignmentType()) { + switch (MAI->getLCOMMDirectiveAlignmentType()) { case LCOMM::NoAlignment: llvm_unreachable("alignment not supported on .lcomm!"); case LCOMM::ByteAlignment: @@ -570,6 +558,9 @@ void MCAsmStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { + if (Symbol) + AssignSection(Symbol, Section); + // Note: a .zerofill directive does not switch sections. OS << ".zerofill "; @@ -590,6 +581,8 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, // e.g. _a. void MCAsmStreamer::EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) { + AssignSection(Symbol, Section); + assert(Symbol != NULL && "Symbol shouldn't be NULL!"); // Instead of using the Section we'll just use the shortcut. // This is a mach-o specific directive and section. @@ -638,13 +631,13 @@ static void PrintQuotedString(StringRef Data, raw_ostream &OS) { } -void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { +void MCAsmStreamer::EmitBytes(StringRef Data) { assert(getCurrentSection().first && "Cannot emit contents before setting section!"); if (Data.empty()) return; if (Data.size() == 1) { - OS << MAI.getData8bitsDirective(AddrSpace); + OS << MAI->getData8bitsDirective(); OS << (unsigned)(unsigned char)Data[0]; EmitEOL(); return; @@ -652,46 +645,43 @@ void MCAsmStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { // If the data ends with 0 and the target supports .asciz, use it, otherwise // use .ascii - if (MAI.getAscizDirective() && Data.back() == 0) { - OS << MAI.getAscizDirective(); + if (MAI->getAscizDirective() && Data.back() == 0) { + OS << MAI->getAscizDirective(); Data = Data.substr(0, Data.size()-1); } else { - OS << MAI.getAsciiDirective(); + OS << MAI->getAsciiDirective(); } - OS << ' '; PrintQuotedString(Data, OS); EmitEOL(); } -void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size, - unsigned AddrSpace) { - EmitValue(MCConstantExpr::Create(Value, getContext()), Size, AddrSpace); +void MCAsmStreamer::EmitIntValue(uint64_t Value, unsigned Size) { + EmitValue(MCConstantExpr::Create(Value, getContext()), Size); } -void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { +void MCAsmStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size) { assert(getCurrentSection().first && "Cannot emit contents before setting section!"); const char *Directive = 0; switch (Size) { default: break; - case 1: Directive = MAI.getData8bitsDirective(AddrSpace); break; - case 2: Directive = MAI.getData16bitsDirective(AddrSpace); break; - case 4: Directive = MAI.getData32bitsDirective(AddrSpace); break; + case 1: Directive = MAI->getData8bitsDirective(); break; + case 2: Directive = MAI->getData16bitsDirective(); break; + case 4: Directive = MAI->getData32bitsDirective(); break; case 8: - Directive = MAI.getData64bitsDirective(AddrSpace); + Directive = MAI->getData64bitsDirective(); // If the target doesn't support 64-bit data, emit as two 32-bit halves. if (Directive) break; int64_t IntValue; if (!Value->EvaluateAsAbsolute(IntValue)) report_fatal_error("Don't know how to emit this value."); - if (getContext().getAsmInfo().isLittleEndian()) { - EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); - EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); + if (MAI->isLittleEndian()) { + EmitIntValue((uint32_t)(IntValue >> 0 ), 4); + EmitIntValue((uint32_t)(IntValue >> 32), 4); } else { - EmitIntValue((uint32_t)(IntValue >> 32), 4, AddrSpace); - EmitIntValue((uint32_t)(IntValue >> 0 ), 4, AddrSpace); + EmitIntValue((uint32_t)(IntValue >> 32), 4); + EmitIntValue((uint32_t)(IntValue >> 0 ), 4); } return; } @@ -707,7 +697,7 @@ void MCAsmStreamer::EmitULEB128Value(const MCExpr *Value) { EmitULEB128IntValue(IntValue); return; } - assert(MAI.hasLEB128() && "Cannot print a .uleb"); + assert(MAI->hasLEB128() && "Cannot print a .uleb"); OS << ".uleb128 " << *Value; EmitEOL(); } @@ -718,41 +708,39 @@ void MCAsmStreamer::EmitSLEB128Value(const MCExpr *Value) { EmitSLEB128IntValue(IntValue); return; } - assert(MAI.hasLEB128() && "Cannot print a .sleb"); + assert(MAI->hasLEB128() && "Cannot print a .sleb"); OS << ".sleb128 " << *Value; EmitEOL(); } void MCAsmStreamer::EmitGPRel64Value(const MCExpr *Value) { - assert(MAI.getGPRel64Directive() != 0); - OS << MAI.getGPRel64Directive() << *Value; + assert(MAI->getGPRel64Directive() != 0); + OS << MAI->getGPRel64Directive() << *Value; EmitEOL(); } void MCAsmStreamer::EmitGPRel32Value(const MCExpr *Value) { - assert(MAI.getGPRel32Directive() != 0); - OS << MAI.getGPRel32Directive() << *Value; + assert(MAI->getGPRel32Directive() != 0); + OS << MAI->getGPRel32Directive() << *Value; EmitEOL(); } /// EmitFill - Emit NumBytes bytes worth of the value specified by /// FillValue. This implements directives such as '.space'. -void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, - unsigned AddrSpace) { +void MCAsmStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) { if (NumBytes == 0) return; - if (AddrSpace == 0) - if (const char *ZeroDirective = MAI.getZeroDirective()) { - OS << ZeroDirective << NumBytes; - if (FillValue != 0) - OS << ',' << (int)FillValue; - EmitEOL(); - return; - } + if (const char *ZeroDirective = MAI->getZeroDirective()) { + OS << ZeroDirective << NumBytes; + if (FillValue != 0) + OS << ',' << (int)FillValue; + EmitEOL(); + return; + } // Emit a byte at a time. - MCStreamer::EmitFill(NumBytes, FillValue, AddrSpace); + MCStreamer::EmitFill(NumBytes, FillValue); } void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, @@ -763,14 +751,14 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, if (isPowerOf2_32(ByteAlignment)) { switch (ValueSize) { default: llvm_unreachable("Invalid size for machine code value!"); - case 1: OS << MAI.getAlignDirective(); break; + case 1: OS << MAI->getAlignDirective(); break; // FIXME: use MAI for this! case 2: OS << ".p2alignw "; break; case 4: OS << ".p2alignl "; break; case 8: llvm_unreachable("Unsupported alignment size!"); } - if (MAI.getAlignmentIsInBytes()) + if (MAI->getAlignmentIsInBytes()) OS << ByteAlignment; else OS << Log2_32(ByteAlignment); @@ -806,7 +794,7 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value, void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment, unsigned MaxBytesToEmit) { // Emit with a text fill value. - EmitValueToAlignment(ByteAlignment, MAI.getTextAlignFillValue(), + EmitValueToAlignment(ByteAlignment, MAI->getTextAlignFillValue(), 1, MaxBytesToEmit); } @@ -820,7 +808,7 @@ bool MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset, void MCAsmStreamer::EmitFileDirective(StringRef Filename) { - assert(MAI.hasSingleParameterDotFile()); + assert(MAI->hasSingleParameterDotFile()); OS << "\t.file\t"; PrintQuotedString(Filename, OS); EmitEOL(); @@ -886,13 +874,20 @@ void MCAsmStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, OS << "discriminator " << Discriminator; if (IsVerboseAsm) { - OS.PadToColumn(MAI.getCommentColumn()); - OS << MAI.getCommentString() << ' ' << FileName << ':' + OS.PadToColumn(MAI->getCommentColumn()); + OS << MAI->getCommentString() << ' ' << FileName << ':' << Line << ':' << Column; } EmitEOL(); } +void MCAsmStreamer::EmitIdent(StringRef IdentString) { + assert(MAI->hasIdentDirective() && ".ident directive not supported"); + OS << "\t.ident\t"; + PrintQuotedString(IdentString, OS); + EmitEOL(); +} + void MCAsmStreamer::EmitCFISections(bool EH, bool Debug) { MCStreamer::EmitCFISections(EH, Debug); @@ -936,9 +931,9 @@ void MCAsmStreamer::EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { } void MCAsmStreamer::EmitRegisterName(int64_t Register) { - if (InstPrinter && !MAI.useDwarfRegNumForCFI()) { - const MCRegisterInfo &MRI = getContext().getRegisterInfo(); - unsigned LLVMRegister = MRI.getLLVMRegNum(Register, true); + if (InstPrinter && !MAI->useDwarfRegNumForCFI()) { + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); + unsigned LLVMRegister = MRI->getLLVMRegNum(Register, true); InstPrinter->printRegName(OS, LLVMRegister); } else { OS << Register; @@ -1094,6 +1089,16 @@ void MCAsmStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) { EmitEOL(); } +void MCAsmStreamer::EmitCFIWindowSave() { + MCStreamer::EmitCFIWindowSave(); + + if (!UseCFI) + return; + + OS << "\t.cfi_window_save"; + EmitEOL(); +} + void MCAsmStreamer::EmitWin64EHStartProc(const MCSymbol *Symbol) { MCStreamer::EmitWin64EHStartProc(Symbol); @@ -1276,7 +1281,7 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { unsigned Bit = (Code[i] >> j) & 1; unsigned FixupBit; - if (getContext().getAsmInfo().isLittleEndian()) + if (MAI->isLittleEndian()) FixupBit = i * 8 + j; else FixupBit = i * 8 + (7-j); @@ -1299,73 +1304,6 @@ void MCAsmStreamer::AddEncodingComment(const MCInst &Inst) { } } -void MCAsmStreamer::EmitFnStart() { - OS << "\t.fnstart"; - EmitEOL(); -} - -void MCAsmStreamer::EmitFnEnd() { - OS << "\t.fnend"; - EmitEOL(); -} - -void MCAsmStreamer::EmitCantUnwind() { - OS << "\t.cantunwind"; - EmitEOL(); -} - -void MCAsmStreamer::EmitHandlerData() { - OS << "\t.handlerdata"; - EmitEOL(); -} - -void MCAsmStreamer::EmitPersonality(const MCSymbol *Personality) { - OS << "\t.personality " << Personality->getName(); - EmitEOL(); -} - -void MCAsmStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { - OS << "\t.setfp\t"; - InstPrinter->printRegName(OS, FpReg); - OS << ", "; - InstPrinter->printRegName(OS, SpReg); - if (Offset) - OS << ", #" << Offset; - EmitEOL(); -} - -void MCAsmStreamer::EmitPad(int64_t Offset) { - OS << "\t.pad\t#" << Offset; - EmitEOL(); -} - -void MCAsmStreamer::EmitRegSave(const SmallVectorImpl &RegList, - bool isVector) { - assert(RegList.size() && "RegList should not be empty"); - if (isVector) - OS << "\t.vsave\t{"; - else - OS << "\t.save\t{"; - - InstPrinter->printRegName(OS, RegList[0]); - - for (unsigned i = 1, e = RegList.size(); i != e; ++i) { - OS << ", "; - InstPrinter->printRegName(OS, RegList[i]); - } - - OS << "}"; - EmitEOL(); -} - -void MCAsmStreamer::EmitTCEntry(const MCSymbol &S) { - OS << "\t.tc "; - OS << S.getName(); - OS << "[TC],"; - OS << S.getName(); - EmitEOL(); -} - void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { assert(getCurrentSection().first && "Cannot emit contents before setting section!"); @@ -1376,7 +1314,7 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { // Show the MCInst if enabled. if (ShowInst) { - Inst.dump_pretty(GetCommentOS(), &MAI, InstPrinter.get(), "\n "); + Inst.dump_pretty(GetCommentOS(), MAI, InstPrinter.get(), "\n "); GetCommentOS() << "\n"; } @@ -1384,7 +1322,7 @@ void MCAsmStreamer::EmitInstruction(const MCInst &Inst) { if (InstPrinter) InstPrinter->printInst(&Inst, OS, ""); else - Inst.print(OS, &MAI); + Inst.print(OS, MAI); EmitEOL(); } @@ -1408,7 +1346,7 @@ void MCAsmStreamer::EmitBundleUnlock() { /// EmitRawText - If this file is backed by an assembly streamer, this dumps /// the specified string in the output .s file. This capability is /// indicated by the hasRawTextSupport() predicate. -void MCAsmStreamer::EmitRawText(StringRef String) { +void MCAsmStreamer::EmitRawTextImpl(StringRef String) { if (!String.empty() && String.back() == '\n') String = String.substr(0, String.size()-1); OS << String; @@ -1427,14 +1365,16 @@ void MCAsmStreamer::FinishImpl() { MCGenDwarfInfo::Emit(this, LineSectionSymbol); if (!UseCFI) - EmitFrames(false); + EmitFrames(AsmBackend.get(), false); } + MCStreamer *llvm::createAsmStreamer(MCContext &Context, + MCTargetStreamer *TargetStreamer, formatted_raw_ostream &OS, - bool isVerboseAsm, bool useLoc, - bool useCFI, bool useDwarfDirectory, - MCInstPrinter *IP, MCCodeEmitter *CE, - MCAsmBackend *MAB, bool ShowInst) { - return new MCAsmStreamer(Context, OS, isVerboseAsm, useLoc, useCFI, - useDwarfDirectory, IP, CE, MAB, ShowInst); + bool isVerboseAsm, bool useLoc, bool useCFI, + bool useDwarfDirectory, MCInstPrinter *IP, + MCCodeEmitter *CE, MCAsmBackend *MAB, + bool ShowInst) { + return new MCAsmStreamer(Context, TargetStreamer, OS, isVerboseAsm, useLoc, + useCFI, useDwarfDirectory, IP, CE, MAB, ShowInst); } diff --git a/contrib/llvm/lib/MC/MCAssembler.cpp b/contrib/llvm/lib/MC/MCAssembler.cpp index fb5ab28..68111f1 100644 --- a/contrib/llvm/lib/MC/MCAssembler.cpp +++ b/contrib/llvm/lib/MC/MCAssembler.cpp @@ -580,10 +580,10 @@ static void writeFragment(const MCAssembler &Asm, const MCAsmLayout &Layout, case MCFragment::FT_Align: { ++stats::EmittedAlignFragments; const MCAlignFragment &AF = cast(F); - uint64_t Count = FragmentSize / AF.getValueSize(); - assert(AF.getValueSize() && "Invalid virtual align in concrete fragment!"); + uint64_t Count = FragmentSize / AF.getValueSize(); + // FIXME: This error shouldn't actually occur (the front end should emit // multiple .align directives to enforce the semantics it wants), but is // severe enough that we want to report it. How to handle this? @@ -708,12 +708,13 @@ void MCAssembler::writeSectionData(const MCSectionData *SD, case MCFragment::FT_Align: // Check that we aren't trying to write a non-zero value into a virtual // section. - assert((!cast(it)->getValueSize() || - !cast(it)->getValue()) && + assert((cast(it)->getValueSize() == 0 || + cast(it)->getValue() == 0) && "Invalid align in virtual section!"); break; case MCFragment::FT_Fill: - assert(!cast(it)->getValueSize() && + assert((cast(it)->getValueSize() == 0 || + cast(it)->getValue() == 0) && "Invalid fill in virtual section!"); break; } @@ -904,6 +905,7 @@ bool MCAssembler::relaxLEB(MCAsmLayout &Layout, MCLEBFragment &LF) { bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, MCDwarfLineAddrFragment &DF) { + MCContext &Context = Layout.getAssembler().getContext(); int64_t AddrDelta = 0; uint64_t OldSize = DF.getContents().size(); bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout); @@ -914,13 +916,14 @@ bool MCAssembler::relaxDwarfLineAddr(MCAsmLayout &Layout, SmallString<8> &Data = DF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); - MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OSE); + MCDwarfLineAddr::Encode(Context, LineDelta, AddrDelta, OSE); OSE.flush(); return OldSize != Data.size(); } bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout, MCDwarfCallFrameFragment &DF) { + MCContext &Context = Layout.getAssembler().getContext(); int64_t AddrDelta = 0; uint64_t OldSize = DF.getContents().size(); bool IsAbs = DF.getAddrDelta().EvaluateAsAbsolute(AddrDelta, Layout); @@ -929,7 +932,7 @@ bool MCAssembler::relaxDwarfCallFrameFragment(MCAsmLayout &Layout, SmallString<8> &Data = DF.getContents(); Data.clear(); raw_svector_ostream OSE(Data); - MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OSE); + MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OSE); OSE.flush(); return OldSize != Data.size(); } diff --git a/contrib/llvm/lib/MC/MCAtom.cpp b/contrib/llvm/lib/MC/MCAtom.cpp index d714443..bc353cd 100644 --- a/contrib/llvm/lib/MC/MCAtom.cpp +++ b/contrib/llvm/lib/MC/MCAtom.cpp @@ -10,88 +10,105 @@ #include "llvm/MC/MCAtom.h" #include "llvm/MC/MCModule.h" #include "llvm/Support/ErrorHandling.h" +#include using namespace llvm; -void MCAtom::addInst(const MCInst &I, uint64_t Address, unsigned Size) { - assert(Type == TextAtom && "Trying to add MCInst to a non-text atom!"); +// Pin the vtable to this file. +void MCAtom::anchor() {} - assert(Address < End+Size && - "Instruction not contiguous with end of atom!"); - if (Address > End) - Parent->remap(this, Begin, End+Size); - - Text.push_back(std::make_pair(Address, I)); +void MCAtom::remap(uint64_t NewBegin, uint64_t NewEnd) { + Parent->remap(this, NewBegin, NewEnd); } -void MCAtom::addData(const MCData &D) { - assert(Type == DataAtom && "Trying to add MCData to a non-data atom!"); - Parent->remap(this, Begin, End+1); - - Data.push_back(D); +void MCAtom::remapForTruncate(uint64_t TruncPt) { + assert((TruncPt >= Begin && TruncPt < End) && + "Truncation point not contained in atom!"); + remap(Begin, TruncPt); } -MCAtom *MCAtom::split(uint64_t SplitPt) { +void MCAtom::remapForSplit(uint64_t SplitPt, + uint64_t &LBegin, uint64_t &LEnd, + uint64_t &RBegin, uint64_t &REnd) { assert((SplitPt > Begin && SplitPt <= End) && "Splitting at point not contained in atom!"); // Compute the new begin/end points. - uint64_t LeftBegin = Begin; - uint64_t LeftEnd = SplitPt - 1; - uint64_t RightBegin = SplitPt; - uint64_t RightEnd = End; + LBegin = Begin; + LEnd = SplitPt - 1; + RBegin = SplitPt; + REnd = End; // Remap this atom to become the lower of the two new ones. - Parent->remap(this, LeftBegin, LeftEnd); + remap(LBegin, LEnd); +} - // Create a new atom for the higher atom. - MCAtom *RightAtom = Parent->createAtom(Type, RightBegin, RightEnd); +// MCDataAtom - // Split the contents of the original atom between it and the new one. The - // precise method depends on whether this is a data or a text atom. - if (isDataAtom()) { - std::vector::iterator I = Data.begin() + (RightBegin - LeftBegin); +void MCDataAtom::addData(const MCData &D) { + Data.push_back(D); + if (Data.size() > End + 1 - Begin) + remap(Begin, End + 1); +} - assert(I != Data.end() && "Split point not found in range!"); +void MCDataAtom::truncate(uint64_t TruncPt) { + remapForTruncate(TruncPt); - std::copy(I, Data.end(), RightAtom->Data.end()); - Data.erase(I, Data.end()); - } else if (isTextAtom()) { - std::vector >::iterator I = Text.begin(); + Data.resize(TruncPt - Begin + 1); +} - while (I != Text.end() && I->first < SplitPt) ++I; +MCDataAtom *MCDataAtom::split(uint64_t SplitPt) { + uint64_t LBegin, LEnd, RBegin, REnd; + remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd); - assert(I != Text.end() && "Split point not found in disassembly!"); - assert(I->first == SplitPt && - "Split point does not fall on instruction boundary!"); + MCDataAtom *RightAtom = Parent->createDataAtom(RBegin, REnd); + RightAtom->setName(getName()); - std::copy(I, Text.end(), RightAtom->Text.end()); - Text.erase(I, Text.end()); - } else - llvm_unreachable("Unknown atom type!"); + std::vector::iterator I = Data.begin() + (RBegin - LBegin); + assert(I != Data.end() && "Split point not found in range!"); + std::copy(I, Data.end(), std::back_inserter(RightAtom->Data)); + Data.erase(I, Data.end()); return RightAtom; } -void MCAtom::truncate(uint64_t TruncPt) { - assert((TruncPt >= Begin && TruncPt < End) && - "Truncation point not contained in atom!"); +// MCTextAtom - Parent->remap(this, Begin, TruncPt); +void MCTextAtom::addInst(const MCInst &I, uint64_t Size) { + if (NextInstAddress + Size - 1 > End) + remap(Begin, NextInstAddress + Size - 1); + Insts.push_back(MCDecodedInst(I, NextInstAddress, Size)); + NextInstAddress += Size; +} - if (isDataAtom()) { - Data.resize(TruncPt - Begin + 1); - } else if (isTextAtom()) { - std::vector >::iterator I = Text.begin(); +void MCTextAtom::truncate(uint64_t TruncPt) { + remapForTruncate(TruncPt); - while (I != Text.end() && I->first <= TruncPt) ++I; + InstListTy::iterator I = Insts.begin(); + while (I != Insts.end() && I->Address <= TruncPt) ++I; - assert(I != Text.end() && "Truncation point not found in disassembly!"); - assert(I->first == TruncPt+1 && - "Truncation point does not fall on instruction boundary"); + assert(I != Insts.end() && "Truncation point not found in disassembly!"); + assert(I->Address == TruncPt + 1 && + "Truncation point does not fall on instruction boundary"); - Text.erase(I, Text.end()); - } else - llvm_unreachable("Unknown atom type!"); + Insts.erase(I, Insts.end()); } +MCTextAtom *MCTextAtom::split(uint64_t SplitPt) { + uint64_t LBegin, LEnd, RBegin, REnd; + remapForSplit(SplitPt, LBegin, LEnd, RBegin, REnd); + + MCTextAtom *RightAtom = Parent->createTextAtom(RBegin, REnd); + RightAtom->setName(getName()); + + InstListTy::iterator I = Insts.begin(); + while (I != Insts.end() && I->Address < SplitPt) ++I; + assert(I != Insts.end() && "Split point not found in disassembly!"); + assert(I->Address == SplitPt && + "Split point does not fall on instruction boundary!"); + + std::copy(I, Insts.end(), std::back_inserter(RightAtom->Insts)); + Insts.erase(I, Insts.end()); + Parent->splitBasicBlocksForAtom(this, RightAtom); + return RightAtom; +} diff --git a/contrib/llvm/lib/MC/MCContext.cpp b/contrib/llvm/lib/MC/MCContext.cpp index 9adcc02..a0acda5 100644 --- a/contrib/llvm/lib/MC/MCContext.cpp +++ b/contrib/llvm/lib/MC/MCContext.cpp @@ -21,27 +21,35 @@ #include "llvm/MC/MCSymbol.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Signals.h" #include "llvm/Support/SourceMgr.h" + +#include + using namespace llvm; -typedef StringMap MachOUniqueMapTy; -typedef StringMap ELFUniqueMapTy; -typedef StringMap COFFUniqueMapTy; +typedef std::pair SectionGroupPair; +typedef StringMap MachOUniqueMapTy; +typedef std::map ELFUniqueMapTy; +typedef std::map COFFUniqueMapTy; -MCContext::MCContext(const MCAsmInfo &mai, const MCRegisterInfo &mri, +MCContext::MCContext(const MCAsmInfo *mai, const MCRegisterInfo *mri, const MCObjectFileInfo *mofi, const SourceMgr *mgr, bool DoAutoReset) : SrcMgr(mgr), MAI(mai), MRI(mri), MOFI(mofi), Allocator(), Symbols(Allocator), UsedNames(Allocator), NextUniqueID(0), - CompilationDir(llvm::sys::Path::GetCurrentDirectory().str()), CurrentDwarfLoc(0,0,0,DWARF2_FLAG_IS_STMT,0,0), DwarfLocSeen(false), GenDwarfForAssembly(false), GenDwarfFileNumber(0), AllowTemporaryLabels(true), DwarfCompileUnitID(0), AutoReset(DoAutoReset) { + error_code EC = llvm::sys::fs::current_path(CompilationDir); + if (EC) + CompilationDir.clear(); + MachOUniquingMap = 0; ELFUniquingMap = 0; COFFUniquingMap = 0; @@ -126,7 +134,7 @@ MCSymbol *MCContext::CreateSymbol(StringRef Name) { // Determine whether this is an assembler temporary or normal label, if used. bool isTemporary = false; if (AllowTemporaryLabels) - isTemporary = Name.startswith(MAI.getPrivateGlobalPrefix()); + isTemporary = Name.startswith(MAI->getPrivateGlobalPrefix()); StringMapEntry *NameEntry = &UsedNames.GetOrCreateValue(Name); if (NameEntry->getValue()) { @@ -156,7 +164,7 @@ MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) { MCSymbol *MCContext::CreateTempSymbol() { SmallString<128> NameSV; raw_svector_ostream(NameSV) - << MAI.getPrivateGlobalPrefix() << "tmp" << NextUniqueID++; + << MAI->getPrivateGlobalPrefix() << "tmp" << NextUniqueID++; return CreateSymbol(NameSV); } @@ -175,14 +183,14 @@ unsigned MCContext::GetInstance(int64_t LocalLabelVal) { } MCSymbol *MCContext::CreateDirectionalLocalSymbol(int64_t LocalLabelVal) { - return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + + return GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + Twine(LocalLabelVal) + "\2" + Twine(NextInstance(LocalLabelVal))); } MCSymbol *MCContext::GetDirectionalLocalSymbol(int64_t LocalLabelVal, int bORf) { - return GetOrCreateSymbol(Twine(MAI.getPrivateGlobalPrefix()) + + return GetOrCreateSymbol(Twine(MAI->getPrivateGlobalPrefix()) + Twine(LocalLabelVal) + "\2" + Twine(GetInstance(LocalLabelVal) + bORf)); @@ -245,8 +253,9 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags, ELFUniqueMapTy &Map = *(ELFUniqueMapTy*)ELFUniquingMap; // Do the lookup, if we have a hit, return it. - StringMapEntry &Entry = Map.GetOrCreateValue(Section); - if (Entry.getValue()) return Entry.getValue(); + std::pair Entry = Map.insert( + std::make_pair(SectionGroupPair(Section, Group), (MCSectionELF *)0)); + if (!Entry.second) return Entry.first->second; // Possibly refine the entry size first. if (!EntrySize) { @@ -257,9 +266,9 @@ getELFSection(StringRef Section, unsigned Type, unsigned Flags, if (!Group.empty()) GroupSym = GetOrCreateSymbol(Group); - MCSectionELF *Result = new (*this) MCSectionELF(Entry.getKey(), Type, Flags, - Kind, EntrySize, GroupSym); - Entry.setValue(Result); + MCSectionELF *Result = new (*this) MCSectionELF( + Entry.first->first.first, Type, Flags, Kind, EntrySize, GroupSym); + Entry.first->second = Result; return Result; } @@ -270,26 +279,53 @@ const MCSectionELF *MCContext::CreateELFGroupSection() { return Result; } -const MCSection *MCContext::getCOFFSection(StringRef Section, - unsigned Characteristics, - int Selection, - SectionKind Kind) { +const MCSectionCOFF * +MCContext::getCOFFSection(StringRef Section, unsigned Characteristics, + SectionKind Kind, StringRef COMDATSymName, + int Selection, const MCSectionCOFF *Assoc) { if (COFFUniquingMap == 0) COFFUniquingMap = new COFFUniqueMapTy(); COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap; // Do the lookup, if we have a hit, return it. - StringMapEntry &Entry = Map.GetOrCreateValue(Section); - if (Entry.getValue()) return Entry.getValue(); - MCSectionCOFF *Result = new (*this) MCSectionCOFF(Entry.getKey(), - Characteristics, - Selection, Kind); + SectionGroupPair P(Section, COMDATSymName); + std::pair Entry = + Map.insert(std::make_pair(P, (MCSectionCOFF *)0)); + COFFUniqueMapTy::iterator Iter = Entry.first; + if (!Entry.second) + return Iter->second; + + const MCSymbol *COMDATSymbol = NULL; + if (!COMDATSymName.empty()) + COMDATSymbol = GetOrCreateSymbol(COMDATSymName); - Entry.setValue(Result); + MCSectionCOFF *Result = + new (*this) MCSectionCOFF(Iter->first.first, Characteristics, + COMDATSymbol, Selection, Assoc, Kind); + + Iter->second = Result; return Result; } +const MCSectionCOFF * +MCContext::getCOFFSection(StringRef Section, unsigned Characteristics, + SectionKind Kind) { + return getCOFFSection(Section, Characteristics, Kind, "", 0); +} + +const MCSectionCOFF *MCContext::getCOFFSection(StringRef Section) { + if (COFFUniquingMap == 0) + COFFUniquingMap = new COFFUniqueMapTy(); + COFFUniqueMapTy &Map = *(COFFUniqueMapTy*)COFFUniquingMap; + + SectionGroupPair P(Section, ""); + COFFUniqueMapTy::iterator Iter = Map.find(P); + if (Iter == Map.end()) + return 0; + return Iter->second; +} + //===----------------------------------------------------------------------===// // Dwarf Management //===----------------------------------------------------------------------===// diff --git a/contrib/llvm/lib/MC/MCDisassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler.cpp index 0809690..bfd51ab 100644 --- a/contrib/llvm/lib/MC/MCDisassembler.cpp +++ b/contrib/llvm/lib/MC/MCDisassembler.cpp @@ -8,7 +8,49 @@ //===----------------------------------------------------------------------===// #include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCExternalSymbolizer.h" +#include "llvm/Support/raw_ostream.h" + using namespace llvm; MCDisassembler::~MCDisassembler() { } + +void +MCDisassembler::setupForSymbolicDisassembly( + LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp, + void *DisInfo, + MCContext *Ctx, + OwningPtr &RelInfo) { + this->GetOpInfo = GetOpInfo; + this->SymbolLookUp = SymbolLookUp; + this->DisInfo = DisInfo; + this->Ctx = Ctx; + assert(Ctx != 0 && "No MCContext given for symbolic disassembly"); + if (!Symbolizer) + Symbolizer.reset(new MCExternalSymbolizer(*Ctx, RelInfo, GetOpInfo, + SymbolLookUp, DisInfo)); +} + +bool MCDisassembler::tryAddingSymbolicOperand(MCInst &Inst, int64_t Value, + uint64_t Address, bool IsBranch, + uint64_t Offset, + uint64_t InstSize) const { + raw_ostream &cStream = CommentStream ? *CommentStream : nulls(); + if (Symbolizer) + return Symbolizer->tryAddingSymbolicOperand(Inst, cStream, Value, Address, + IsBranch, Offset, InstSize); + return false; +} + +void MCDisassembler::tryAddingPcLoadReferenceComment(int64_t Value, + uint64_t Address) const { + raw_ostream &cStream = CommentStream ? *CommentStream : nulls(); + if (Symbolizer) + Symbolizer->tryAddingPcLoadReferenceComment(cStream, Value, Address); +} + +void MCDisassembler::setSymbolizer(OwningPtr &Symzer) { + Symbolizer.reset(Symzer.take()); +} diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp index 4766b37..a0066c8 100644 --- a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp +++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.cpp @@ -16,8 +16,11 @@ #include "llvm/MC/MCInstPrinter.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCRegisterInfo.h" +#include "llvm/MC/MCRelocationInfo.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/MC/MCSymbolizer.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/MemoryObject.h" #include "llvm/Support/TargetRegistry.h" @@ -40,10 +43,15 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, // Get the target. std::string Error; const Target *TheTarget = TargetRegistry::lookupTarget(Triple, Error); - assert(TheTarget && "Unable to create target!"); + if (!TheTarget) + return 0; + + const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple); + if (!MRI) + return 0; // Get the assembler info needed to setup the MCContext. - const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(Triple); + const MCAsmInfo *MAI = TheTarget->createMCAsmInfo(*MRI, Triple); if (!MAI) return 0; @@ -51,10 +59,6 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, if (!MII) return 0; - const MCRegisterInfo *MRI = TheTarget->createMCRegInfo(Triple); - if (!MRI) - return 0; - // Package up features to be passed to target/subtarget std::string FeaturesStr; @@ -64,7 +68,7 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, return 0; // Set up the MCContext for creating symbols and MCExpr's. - MCContext *Ctx = new MCContext(*MAI, *MRI, 0); + MCContext *Ctx = new MCContext(MAI, MRI, 0); if (!Ctx) return 0; @@ -72,8 +76,18 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, MCDisassembler *DisAsm = TheTarget->createMCDisassembler(*STI); if (!DisAsm) return 0; - DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, Ctx); + OwningPtr RelInfo( + TheTarget->createMCRelocationInfo(Triple, *Ctx)); + if (!RelInfo) + return 0; + + OwningPtr Symbolizer( + TheTarget->createMCSymbolizer(Triple, GetOpInfo, SymbolLookUp, DisInfo, + Ctx, RelInfo.take())); + DisAsm->setSymbolizer(Symbolizer); + DisAsm->setupForSymbolicDisassembly(GetOpInfo, SymbolLookUp, DisInfo, + Ctx, RelInfo); // Set up the instruction printer. int AsmPrinterVariant = MAI->getAssemblerDialect(); MCInstPrinter *IP = TheTarget->createMCInstPrinter(AsmPrinterVariant, @@ -88,6 +102,7 @@ LLVMDisasmContextRef LLVMCreateDisasmCPU(const char *Triple, const char *CPU, if (!DC) return 0; + DC->setCPU(CPU); return DC; } @@ -130,6 +145,112 @@ public: }; } // end anonymous namespace +/// \brief Emits the comments that are stored in \p DC comment stream. +/// Each comment in the comment stream must end with a newline. +static void emitComments(LLVMDisasmContext *DC, + formatted_raw_ostream &FormattedOS) { + // Flush the stream before taking its content. + DC->CommentStream.flush(); + StringRef Comments = DC->CommentsToEmit.str(); + // Get the default information for printing a comment. + const MCAsmInfo *MAI = DC->getAsmInfo(); + const char *CommentBegin = MAI->getCommentString(); + unsigned CommentColumn = MAI->getCommentColumn(); + bool IsFirst = true; + while (!Comments.empty()) { + if (!IsFirst) + FormattedOS << '\n'; + // Emit a line of comments. + FormattedOS.PadToColumn(CommentColumn); + size_t Position = Comments.find('\n'); + FormattedOS << CommentBegin << ' ' << Comments.substr(0, Position); + // Move after the newline character. + Comments = Comments.substr(Position+1); + IsFirst = false; + } + FormattedOS.flush(); + + // Tell the comment stream that the vector changed underneath it. + DC->CommentsToEmit.clear(); + DC->CommentStream.resync(); +} + +/// \brief Gets latency information for \p Inst form the itinerary +/// scheduling model, based on \p DC information. +/// \return The maximum expected latency over all the operands or -1 +/// if no information are available. +static int getItineraryLatency(LLVMDisasmContext *DC, const MCInst &Inst) { + const int NoInformationAvailable = -1; + + // Check if we have a CPU to get the itinerary information. + if (DC->getCPU().empty()) + return NoInformationAvailable; + + // Get itinerary information. + const MCSubtargetInfo *STI = DC->getSubtargetInfo(); + InstrItineraryData IID = STI->getInstrItineraryForCPU(DC->getCPU()); + // Get the scheduling class of the requested instruction. + const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode()); + unsigned SCClass = Desc.getSchedClass(); + + int Latency = 0; + for (unsigned OpIdx = 0, OpIdxEnd = Inst.getNumOperands(); OpIdx != OpIdxEnd; + ++OpIdx) + Latency = std::max(Latency, IID.getOperandCycle(SCClass, OpIdx)); + + return Latency; +} + +/// \brief Gets latency information for \p Inst, based on \p DC information. +/// \return The maximum expected latency over all the definitions or -1 +/// if no information are available. +static int getLatency(LLVMDisasmContext *DC, const MCInst &Inst) { + // Try to compute scheduling information. + const MCSubtargetInfo *STI = DC->getSubtargetInfo(); + const MCSchedModel *SCModel = STI->getSchedModel(); + const int NoInformationAvailable = -1; + + // Check if we have a scheduling model for instructions. + if (!SCModel || !SCModel->hasInstrSchedModel()) + // Try to fall back to the itinerary model if we do not have a + // scheduling model. + return getItineraryLatency(DC, Inst); + + // Get the scheduling class of the requested instruction. + const MCInstrDesc& Desc = DC->getInstrInfo()->get(Inst.getOpcode()); + unsigned SCClass = Desc.getSchedClass(); + const MCSchedClassDesc *SCDesc = SCModel->getSchedClassDesc(SCClass); + // Resolving the variant SchedClass requires an MI to pass to + // SubTargetInfo::resolveSchedClass. + if (!SCDesc || !SCDesc->isValid() || SCDesc->isVariant()) + return NoInformationAvailable; + + // Compute output latency. + int Latency = 0; + for (unsigned DefIdx = 0, DefEnd = SCDesc->NumWriteLatencyEntries; + DefIdx != DefEnd; ++DefIdx) { + // Lookup the definition's write latency in SubtargetInfo. + const MCWriteLatencyEntry *WLEntry = STI->getWriteLatencyEntry(SCDesc, + DefIdx); + Latency = std::max(Latency, WLEntry->Cycles); + } + + return Latency; +} + + +/// \brief Emits latency information in DC->CommentStream for \p Inst, based +/// on the information available in \p DC. +static void emitLatency(LLVMDisasmContext *DC, const MCInst &Inst) { + int Latency = getLatency(DC, Inst); + + // Report only interesting latency. + if (Latency < 2) + return; + + DC->CommentStream << "Latency: " << Latency << '\n'; +} + // // LLVMDisasmInstruction() disassembles a single instruction using the // disassembler context specified in the parameter DC. The bytes of the @@ -154,8 +275,10 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes, const MCDisassembler *DisAsm = DC->getDisAsm(); MCInstPrinter *IP = DC->getIP(); MCDisassembler::DecodeStatus S; + SmallVector InsnStr; + raw_svector_ostream Annotations(InsnStr); S = DisAsm->getInstruction(Inst, Size, MemoryObject, PC, - /*REMOVE*/ nulls(), DC->CommentStream); + /*REMOVE*/ nulls(), Annotations); switch (S) { case MCDisassembler::Fail: case MCDisassembler::SoftFail: @@ -163,17 +286,18 @@ size_t LLVMDisasmInstruction(LLVMDisasmContextRef DCR, uint8_t *Bytes, return 0; case MCDisassembler::Success: { - DC->CommentStream.flush(); - StringRef Comments = DC->CommentsToEmit.str(); + Annotations.flush(); + StringRef AnnotationsStr = Annotations.str(); SmallVector InsnStr; raw_svector_ostream OS(InsnStr); - IP->printInst(&Inst, OS, Comments); - OS.flush(); + formatted_raw_ostream FormattedOS(OS); + IP->printInst(&Inst, FormattedOS, AnnotationsStr); + + if (DC->getOptions() & LLVMDisassembler_Option_PrintLatency) + emitLatency(DC, Inst); - // Tell the comment stream that the vector changed underneath it. - DC->CommentsToEmit.clear(); - DC->CommentStream.resync(); + emitComments(DC, FormattedOS); assert(OutStringSize != 0 && "Output buffer cannot be zero size"); size_t OutputSize = std::min(OutStringSize-1, InsnStr.size()); @@ -195,12 +319,14 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){ LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR; MCInstPrinter *IP = DC->getIP(); IP->setUseMarkup(1); + DC->addOptions(LLVMDisassembler_Option_UseMarkup); Options &= ~LLVMDisassembler_Option_UseMarkup; } if (Options & LLVMDisassembler_Option_PrintImmHex){ LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR; MCInstPrinter *IP = DC->getIP(); IP->setPrintImmHex(1); + DC->addOptions(LLVMDisassembler_Option_PrintImmHex); Options &= ~LLVMDisassembler_Option_PrintImmHex; } if (Options & LLVMDisassembler_Option_AsmPrinterVariant){ @@ -216,8 +342,21 @@ int LLVMSetDisasmOptions(LLVMDisasmContextRef DCR, uint64_t Options){ AsmPrinterVariant, *MAI, *MII, *MRI, *STI); if (IP) { DC->setIP(IP); + DC->addOptions(LLVMDisassembler_Option_AsmPrinterVariant); Options &= ~LLVMDisassembler_Option_AsmPrinterVariant; } } + if (Options & LLVMDisassembler_Option_SetInstrComments) { + LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR; + MCInstPrinter *IP = DC->getIP(); + IP->setCommentStream(DC->CommentStream); + DC->addOptions(LLVMDisassembler_Option_SetInstrComments); + Options &= ~LLVMDisassembler_Option_SetInstrComments; + } + if (Options & LLVMDisassembler_Option_PrintLatency) { + LLVMDisasmContext *DC = (LLVMDisasmContext *)DCR; + DC->addOptions(LLVMDisassembler_Option_PrintLatency); + Options &= ~LLVMDisassembler_Option_PrintLatency; + } return (Options == 0); } diff --git a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h index 6eb59d0..4855af2 100644 --- a/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h +++ b/contrib/llvm/lib/MC/MCDisassembler/Disassembler.h @@ -73,6 +73,10 @@ private: llvm::OwningPtr DisAsm; // The instruction printer for the target architecture. llvm::OwningPtr IP; + // The options used to set up the disassembler. + uint64_t Options; + // The CPU string. + std::string CPU; public: // Comment stream and backing vector. @@ -90,6 +94,7 @@ public: MCInstPrinter *iP) : TripleName(tripleName), DisInfo(disInfo), TagType(tagType), GetOpInfo(getOpInfo), SymbolLookUp(symbolLookUp), TheTarget(theTarget), + Options(0), CommentStream(CommentsToEmit) { MAI.reset(mAI); MRI.reset(mRI); @@ -114,6 +119,10 @@ public: const MCSubtargetInfo *getSubtargetInfo() const { return MSI.get(); } MCInstPrinter *getIP() { return IP.get(); } void setIP(MCInstPrinter *NewIP) { IP.reset(NewIP); } + uint64_t getOptions() const { return Options; } + void addOptions(uint64_t Options) { this->Options |= Options; } + StringRef getCPU() const { return CPU; } + void setCPU(const char *CPU) { this->CPU = CPU; } }; } // namespace llvm diff --git a/contrib/llvm/lib/MC/MCDwarf.cpp b/contrib/llvm/lib/MC/MCDwarf.cpp index 18982e9..479f445 100644 --- a/contrib/llvm/lib/MC/MCDwarf.cpp +++ b/contrib/llvm/lib/MC/MCDwarf.cpp @@ -16,7 +16,6 @@ #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectFileInfo.h" -#include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" @@ -47,20 +46,15 @@ using namespace llvm; // Range of line offsets in a special line info. opcode. #define DWARF2_LINE_RANGE 14 -// Define the architecture-dependent minimum instruction length (in bytes). -// This value should be rather too small than too big. -#define DWARF2_LINE_MIN_INSN_LENGTH 1 - -// Note: when DWARF2_LINE_MIN_INSN_LENGTH == 1 which is the current setting, -// this routine is a nop and will be optimized away. -static inline uint64_t ScaleAddrDelta(uint64_t AddrDelta) { - if (DWARF2_LINE_MIN_INSN_LENGTH == 1) +static inline uint64_t ScaleAddrDelta(MCContext &Context, uint64_t AddrDelta) { + unsigned MinInsnLength = Context.getAsmInfo()->getMinInstAlignment(); + if (MinInsnLength == 1) return AddrDelta; - if (AddrDelta % DWARF2_LINE_MIN_INSN_LENGTH != 0) { + if (AddrDelta % MinInsnLength != 0) { // TODO: report this error, but really only once. ; } - return AddrDelta / DWARF2_LINE_MIN_INSN_LENGTH; + return AddrDelta / MinInsnLength; } // @@ -182,9 +176,9 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS, // At this point we want to emit/create the sequence to encode the delta in // line numbers and the increment of the address from the previous Label // and the current Label. - const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo(); + const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo(); MCOS->EmitDwarfAdvanceLineAddr(LineDelta, LastLabel, Label, - asmInfo.getPointerSize()); + asmInfo->getPointerSize()); LastLine = it->getLine(); LastLabel = Label; @@ -210,9 +204,9 @@ static inline void EmitDwarfLineTable(MCStreamer *MCOS, // Switch back the dwarf line section. MCOS->SwitchSection(context.getObjectFileInfo()->getDwarfLineSection()); - const MCAsmInfo &asmInfo = MCOS->getContext().getAsmInfo(); + const MCAsmInfo *asmInfo = MCOS->getContext().getAsmInfo(); MCOS->EmitDwarfAdvanceLineAddr(INT64_MAX, LastLabel, SectionEnd, - asmInfo.getPointerSize()); + asmInfo->getPointerSize()); } // @@ -274,10 +268,10 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) { // total length, the 2 bytes for the version, and these 4 bytes for the // length of the prologue. MCOS->EmitAbsValue(MakeStartMinusEndExpr(*MCOS, *LineStartSym, *ProEndSym, - (4 + 2 + 4)), 4, 0); + (4 + 2 + 4)), 4); // Parameters of the state machine, are next. - MCOS->EmitIntValue(DWARF2_LINE_MIN_INSN_LENGTH, 1); + MCOS->EmitIntValue(context.getAsmInfo()->getMinInstAlignment(), 1); MCOS->EmitIntValue(DWARF2_LINE_DEFAULT_IS_STMT, 1); MCOS->EmitIntValue(DWARF2_LINE_BASE, 1); MCOS->EmitIntValue(DWARF2_LINE_RANGE, 1); @@ -338,7 +332,7 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) { EmitDwarfLineTable(MCOS, Sec, Line, CUID); } - if (MCOS->getContext().getAsmInfo().getLinkerRequiresNonEmptyDwarfLines() + if (MCOS->getContext().getAsmInfo()->getLinkerRequiresNonEmptyDwarfLines() && MCLineSectionOrder.begin() == MCLineSectionOrder.end()) { // The darwin9 linker has a bug (see PR8715). For for 32-bit architectures // it requires: @@ -357,32 +351,24 @@ const MCSymbol *MCDwarfFileTable::EmitCU(MCStreamer *MCOS, unsigned CUID) { return LineStartSym; } -/// Utility function to write the encoding to an object writer. -void MCDwarfLineAddr::Write(MCObjectWriter *OW, int64_t LineDelta, - uint64_t AddrDelta) { - SmallString<256> Tmp; - raw_svector_ostream OS(Tmp); - MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS); - OW->WriteBytes(OS.str()); -} - /// Utility function to emit the encoding to a streamer. void MCDwarfLineAddr::Emit(MCStreamer *MCOS, int64_t LineDelta, uint64_t AddrDelta) { + MCContext &Context = MCOS->getContext(); SmallString<256> Tmp; raw_svector_ostream OS(Tmp); - MCDwarfLineAddr::Encode(LineDelta, AddrDelta, OS); + MCDwarfLineAddr::Encode(Context, LineDelta, AddrDelta, OS); MCOS->EmitBytes(OS.str()); } /// Utility function to encode a Dwarf pair of LineDelta and AddrDeltas. -void MCDwarfLineAddr::Encode(int64_t LineDelta, uint64_t AddrDelta, - raw_ostream &OS) { +void MCDwarfLineAddr::Encode(MCContext &Context, int64_t LineDelta, + uint64_t AddrDelta, raw_ostream &OS) { uint64_t Temp, Opcode; bool NeedCopy = false; // Scale the address delta by the minimum instruction length. - AddrDelta = ScaleAddrDelta(AddrDelta); + AddrDelta = ScaleAddrDelta(Context, AddrDelta); // A LineDelta of INT64_MAX is a signal that this is actually a // DW_LNE_end_sequence. We cannot use special opcodes here, since we want the @@ -481,7 +467,8 @@ static void EmitGenDwarfAbbrev(MCStreamer *MCOS) { EmitAbbrev(MCOS, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr); EmitAbbrev(MCOS, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr); EmitAbbrev(MCOS, dwarf::DW_AT_name, dwarf::DW_FORM_string); - EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string); + if (!context.getCompilationDir().empty()) + EmitAbbrev(MCOS, dwarf::DW_AT_comp_dir, dwarf::DW_FORM_string); StringRef DwarfDebugFlags = context.getDwarfDebugFlags(); if (!DwarfDebugFlags.empty()) EmitAbbrev(MCOS, dwarf::DW_AT_APPLE_flags, dwarf::DW_FORM_string); @@ -534,8 +521,8 @@ static void EmitGenDwarfAranges(MCStreamer *MCOS, // Figure the padding after the header before the table of address and size // pairs who's values are PointerSize'ed. - const MCAsmInfo &asmInfo = context.getAsmInfo(); - int AddrSize = asmInfo.getPointerSize(); + const MCAsmInfo *asmInfo = context.getAsmInfo(); + int AddrSize = asmInfo->getPointerSize(); int Pad = 2 * AddrSize - (Length & (2 * AddrSize - 1)); if (Pad == 2 * AddrSize) Pad = 0; @@ -615,8 +602,8 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, MCOS->EmitIntValue(0, 4); } - const MCAsmInfo &asmInfo = context.getAsmInfo(); - int AddrSize = asmInfo.getPointerSize(); + const MCAsmInfo *asmInfo = context.getAsmInfo(); + int AddrSize = asmInfo->getPointerSize(); // The 1 byte size of an address. MCOS->EmitIntValue(AddrSize, 1); @@ -657,8 +644,10 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. // AT_comp_dir, the working directory the assembly was done in. - MCOS->EmitBytes(context.getCompilationDir()); - MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. + if (!context.getCompilationDir().empty()) { + MCOS->EmitBytes(context.getCompilationDir()); + MCOS->EmitIntValue(0, 1); // NULL byte to terminate the string. + } // AT_APPLE_flags, the command line arguments of the assembler tool. StringRef DwarfDebugFlags = context.getDwarfDebugFlags(); @@ -743,9 +732,9 @@ static void EmitGenDwarfInfo(MCStreamer *MCOS, void MCGenDwarfInfo::Emit(MCStreamer *MCOS, const MCSymbol *LineSectionSymbol) { // Create the dwarf sections in this order (.debug_line already created). MCContext &context = MCOS->getContext(); - const MCAsmInfo &AsmInfo = context.getAsmInfo(); + const MCAsmInfo *AsmInfo = context.getAsmInfo(); bool CreateDwarfSectionSymbols = - AsmInfo.doesDwarfUseRelocationsAcrossSections(); + AsmInfo->doesDwarfUseRelocationsAcrossSections(); if (!CreateDwarfSectionSymbols) LineSectionSymbol = NULL; MCSymbol *AbbrevSectionSymbol = NULL; @@ -821,9 +810,9 @@ void MCGenDwarfLabelEntry::Make(MCSymbol *Symbol, MCStreamer *MCOS, static int getDataAlignmentFactor(MCStreamer &streamer) { MCContext &context = streamer.getContext(); - const MCAsmInfo &asmInfo = context.getAsmInfo(); - int size = asmInfo.getCalleeSaveStackSlotSize(); - if (asmInfo.isStackGrowthDirectionUp()) + const MCAsmInfo *asmInfo = context.getAsmInfo(); + int size = asmInfo->getCalleeSaveStackSlotSize(); + if (asmInfo->isStackGrowthDirectionUp()) return size; else return -size; @@ -837,7 +826,7 @@ static unsigned getSizeForEncoding(MCStreamer &streamer, default: llvm_unreachable("Unknown Encoding"); case dwarf::DW_EH_PE_absptr: case dwarf::DW_EH_PE_signed: - return context.getAsmInfo().getPointerSize(); + return context.getAsmInfo()->getPointerSize(); case dwarf::DW_EH_PE_udata2: case dwarf::DW_EH_PE_sdata2: return 2; @@ -853,10 +842,10 @@ static unsigned getSizeForEncoding(MCStreamer &streamer, static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol, unsigned symbolEncoding, const char *comment = 0) { MCContext &context = streamer.getContext(); - const MCAsmInfo &asmInfo = context.getAsmInfo(); - const MCExpr *v = asmInfo.getExprForFDESymbol(&symbol, - symbolEncoding, - streamer); + const MCAsmInfo *asmInfo = context.getAsmInfo(); + const MCExpr *v = asmInfo->getExprForFDESymbol(&symbol, + symbolEncoding, + streamer); unsigned size = getSizeForEncoding(streamer, symbolEncoding); if (streamer.isVerboseAsm() && comment) streamer.AddComment(comment); streamer.EmitAbsValue(v, size); @@ -865,25 +854,14 @@ static void EmitSymbol(MCStreamer &streamer, const MCSymbol &symbol, static void EmitPersonality(MCStreamer &streamer, const MCSymbol &symbol, unsigned symbolEncoding) { MCContext &context = streamer.getContext(); - const MCAsmInfo &asmInfo = context.getAsmInfo(); - const MCExpr *v = asmInfo.getExprForPersonalitySymbol(&symbol, - symbolEncoding, - streamer); + const MCAsmInfo *asmInfo = context.getAsmInfo(); + const MCExpr *v = asmInfo->getExprForPersonalitySymbol(&symbol, + symbolEncoding, + streamer); unsigned size = getSizeForEncoding(streamer, symbolEncoding); streamer.EmitValue(v, size); } -static const MachineLocation TranslateMachineLocation( - const MCRegisterInfo &MRI, - const MachineLocation &Loc) { - unsigned Reg = Loc.getReg() == MachineLocation::VirtualFP ? - MachineLocation::VirtualFP : - unsigned(MRI.getDwarfRegNum(Loc.getReg(), true)); - const MachineLocation &NewLoc = Loc.isReg() ? - MachineLocation(Reg) : MachineLocation(Reg, Loc.getOffset()); - return NewLoc; -} - namespace { class FrameEmitterImpl { int CFAOffset; @@ -898,9 +876,7 @@ namespace { void setSectionStart(const MCSymbol *Label) { SectionStart = Label; } - /// EmitCompactUnwind - Emit the unwind information in a compact way. If - /// we're successful, return 'true'. Otherwise, return 'false' and it will - /// emit the normal CIE and FDE. + /// EmitCompactUnwind - Emit the unwind information in a compact way. void EmitCompactUnwind(MCStreamer &streamer, const MCDwarfFrameInfo &frame); @@ -914,7 +890,7 @@ namespace { const MCSymbol &cieStart, const MCDwarfFrameInfo &frame); void EmitCFIInstructions(MCStreamer &streamer, - const std::vector &Instrs, + ArrayRef Instrs, MCSymbol *BaseLabel); void EmitCFIInstruction(MCStreamer &Streamer, const MCCFIInstruction &Instr); @@ -986,6 +962,10 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, Streamer.EmitULEB128IntValue(Reg2); return; } + case MCCFIInstruction::OpWindowSave: { + Streamer.EmitIntValue(dwarf::DW_CFA_GNU_window_save, 1); + return; + } case MCCFIInstruction::OpUndefined: { unsigned Reg = Instr.getRegister(); if (VerboseAsm) { @@ -1116,7 +1096,7 @@ void FrameEmitterImpl::EmitCFIInstruction(MCStreamer &Streamer, /// EmitFrameMoves - Emit frame instructions to describe the layout of the /// frame. void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer, - const std::vector &Instrs, + ArrayRef Instrs, MCSymbol *BaseLabel) { for (unsigned i = 0, N = Instrs.size(); i < N; ++i) { const MCCFIInstruction &Instr = Instrs[i]; @@ -1138,9 +1118,7 @@ void FrameEmitterImpl::EmitCFIInstructions(MCStreamer &streamer, } } -/// EmitCompactUnwind - Emit the unwind information in a compact way. If we're -/// successful, return 'true'. Otherwise, return 'false' and it will emit the -/// normal CIE and FDE. +/// EmitCompactUnwind - Emit the unwind information in a compact way. void FrameEmitterImpl::EmitCompactUnwind(MCStreamer &Streamer, const MCDwarfFrameInfo &Frame) { MCContext &Context = Streamer.getContext(); @@ -1219,7 +1197,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, bool IsSignalFrame, unsigned lsdaEncoding) { MCContext &context = streamer.getContext(); - const MCRegisterInfo &MRI = context.getRegisterInfo(); + const MCRegisterInfo *MRI = context.getRegisterInfo(); const MCObjectFileInfo *MOFI = context.getObjectFileInfo(); bool verboseAsm = streamer.isVerboseAsm(); @@ -1267,7 +1245,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, // Code Alignment Factor if (verboseAsm) streamer.AddComment("CIE Code Alignment Factor"); - streamer.EmitULEB128IntValue(1); + streamer.EmitULEB128IntValue(context.getAsmInfo()->getMinInstAlignment()); // Data Alignment Factor if (verboseAsm) streamer.AddComment("CIE Data Alignment Factor"); @@ -1275,7 +1253,7 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, // Return Address Register if (verboseAsm) streamer.AddComment("CIE Return Address Column"); - streamer.EmitULEB128IntValue(MRI.getDwarfRegNum(MRI.getRARegister(), true)); + streamer.EmitULEB128IntValue(MRI->getDwarfRegNum(MRI->getRARegister(), true)); // Augmentation Data Length (optional) @@ -1315,38 +1293,13 @@ const MCSymbol &FrameEmitterImpl::EmitCIE(MCStreamer &streamer, // Initial Instructions - const MCAsmInfo &MAI = context.getAsmInfo(); - const std::vector &Moves = MAI.getInitialFrameState(); - std::vector Instructions; - - for (int i = 0, n = Moves.size(); i != n; ++i) { - MCSymbol *Label = Moves[i].getLabel(); - const MachineLocation &Dst = - TranslateMachineLocation(MRI, Moves[i].getDestination()); - const MachineLocation &Src = - TranslateMachineLocation(MRI, Moves[i].getSource()); - - if (Dst.isReg()) { - assert(Dst.getReg() == MachineLocation::VirtualFP); - assert(!Src.isReg()); - MCCFIInstruction Inst = - MCCFIInstruction::createDefCfa(Label, Src.getReg(), -Src.getOffset()); - Instructions.push_back(Inst); - } else { - assert(Src.isReg()); - unsigned Reg = Src.getReg(); - int Offset = Dst.getOffset(); - MCCFIInstruction Inst = - MCCFIInstruction::createOffset(Label, Reg, Offset); - Instructions.push_back(Inst); - } - } - + const MCAsmInfo *MAI = context.getAsmInfo(); + const std::vector &Instructions = + MAI->getInitialFrameState(); EmitCFIInstructions(streamer, Instructions, NULL); // Padding - streamer.EmitValueToAlignment(IsEH - ? 4 : context.getAsmInfo().getPointerSize()); + streamer.EmitValueToAlignment(IsEH ? 4 : MAI->getPointerSize()); streamer.EmitLabel(sectionEnd); return *sectionStart; @@ -1376,13 +1329,13 @@ MCSymbol *FrameEmitterImpl::EmitFDE(MCStreamer &streamer, streamer.EmitLabel(fdeStart); // CIE Pointer - const MCAsmInfo &asmInfo = context.getAsmInfo(); + const MCAsmInfo *asmInfo = context.getAsmInfo(); if (IsEH) { const MCExpr *offset = MakeStartMinusEndExpr(streamer, cieStart, *fdeStart, 0); if (verboseAsm) streamer.AddComment("FDE CIE Offset"); streamer.EmitAbsValue(offset, 4); - } else if (!asmInfo.doesDwarfUseRelocationsAcrossSections()) { + } else if (!asmInfo->doesDwarfUseRelocationsAcrossSections()) { const MCExpr *offset = MakeStartMinusEndExpr(streamer, *SectionStart, cieStart, 0); streamer.EmitAbsValue(offset, 4); @@ -1469,36 +1422,33 @@ namespace llvm { }; } -void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, - bool UsingCFI, - bool IsEH) { +void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, MCAsmBackend *MAB, + bool UsingCFI, bool IsEH) { + Streamer.generateCompactUnwindEncodings(MAB); + MCContext &Context = Streamer.getContext(); - MCObjectFileInfo *MOFI = - const_cast(Context.getObjectFileInfo()); + const MCObjectFileInfo *MOFI = Context.getObjectFileInfo(); FrameEmitterImpl Emitter(UsingCFI, IsEH); ArrayRef FrameArray = Streamer.getFrameInfos(); // Emit the compact unwind info if available. if (IsEH && MOFI->getCompactUnwindSection()) { - unsigned NumFrameInfos = Streamer.getNumFrameInfos(); bool SectionEmitted = false; - - if (NumFrameInfos) { - for (unsigned i = 0; i < NumFrameInfos; ++i) { - const MCDwarfFrameInfo &Frame = Streamer.getFrameInfo(i); - if (Frame.CompactUnwindEncoding == 0) continue; - if (!SectionEmitted) { - Streamer.SwitchSection(MOFI->getCompactUnwindSection()); - Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize()); - SectionEmitted = true; - } - Emitter.EmitCompactUnwind(Streamer, Frame); + for (unsigned i = 0, n = FrameArray.size(); i < n; ++i) { + const MCDwarfFrameInfo &Frame = FrameArray[i]; + if (Frame.CompactUnwindEncoding == 0) continue; + if (!SectionEmitted) { + Streamer.SwitchSection(MOFI->getCompactUnwindSection()); + Streamer.EmitValueToAlignment(Context.getAsmInfo()->getPointerSize()); + SectionEmitted = true; } + Emitter.EmitCompactUnwind(Streamer, Frame); } } - const MCSection &Section = IsEH ? *MOFI->getEHFrameSection() : - *MOFI->getDwarfFrameSection(); + const MCSection &Section = + IsEH ? *const_cast(MOFI)->getEHFrameSection() : + *MOFI->getDwarfFrameSection(); Streamer.SwitchSection(&Section); MCSymbol *SectionStart = Context.CreateTempSymbol(); Streamer.EmitLabel(SectionStart); @@ -1525,22 +1475,26 @@ void MCDwarfFrameEmitter::Emit(MCStreamer &Streamer, Streamer.EmitLabel(FDEEnd); } - Streamer.EmitValueToAlignment(Context.getAsmInfo().getPointerSize()); + Streamer.EmitValueToAlignment(Context.getAsmInfo()->getPointerSize()); if (FDEEnd) Streamer.EmitLabel(FDEEnd); } void MCDwarfFrameEmitter::EmitAdvanceLoc(MCStreamer &Streamer, uint64_t AddrDelta) { + MCContext &Context = Streamer.getContext(); SmallString<256> Tmp; raw_svector_ostream OS(Tmp); - MCDwarfFrameEmitter::EncodeAdvanceLoc(AddrDelta, OS); + MCDwarfFrameEmitter::EncodeAdvanceLoc(Context, AddrDelta, OS); Streamer.EmitBytes(OS.str()); } -void MCDwarfFrameEmitter::EncodeAdvanceLoc(uint64_t AddrDelta, +void MCDwarfFrameEmitter::EncodeAdvanceLoc(MCContext &Context, + uint64_t AddrDelta, raw_ostream &OS) { - // FIXME: Assumes the code alignment factor is 1. + // Scale the address delta by the minimum instruction length. + AddrDelta = ScaleAddrDelta(Context, AddrDelta); + if (AddrDelta == 0) { } else if (isUIntN(6, AddrDelta)) { uint8_t Opcode = dwarf::DW_CFA_advance_loc | AddrDelta; diff --git a/contrib/llvm/lib/MC/MCELF.cpp b/contrib/llvm/lib/MC/MCELF.cpp index 560cdbc..ebb189e 100644 --- a/contrib/llvm/lib/MC/MCELF.cpp +++ b/contrib/llvm/lib/MC/MCELF.cpp @@ -36,8 +36,8 @@ unsigned MCELF::GetBinding(const MCSymbolData &SD) { void MCELF::SetType(MCSymbolData &SD, unsigned Type) { assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || - Type == ELF::STT_FILE || Type == ELF::STT_COMMON || - Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC); + Type == ELF::STT_COMMON || Type == ELF::STT_TLS || + Type == ELF::STT_GNU_IFUNC); uint32_t OtherFlags = SD.getFlags() & ~(0xf << ELF_STT_Shift); SD.setFlags(OtherFlags | (Type << ELF_STT_Shift)); @@ -47,8 +47,7 @@ unsigned MCELF::GetType(const MCSymbolData &SD) { uint32_t Type = (SD.getFlags() & (0xf << ELF_STT_Shift)) >> ELF_STT_Shift; assert(Type == ELF::STT_NOTYPE || Type == ELF::STT_OBJECT || Type == ELF::STT_FUNC || Type == ELF::STT_SECTION || - Type == ELF::STT_FILE || Type == ELF::STT_COMMON || - Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC); + Type == ELF::STT_COMMON || Type == ELF::STT_TLS || Type == ELF::STT_GNU_IFUNC); return Type; } diff --git a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp index 4cac84d..0c39e4a 100644 --- a/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp +++ b/contrib/llvm/lib/MC/MCELFObjectTargetWriter.cpp @@ -39,13 +39,23 @@ const MCSymbol *MCELFObjectTargetWriter::undefinedExplicitRelSym(const MCValue & return &Symbol.AliasedSymbol(); } -void MCELFObjectTargetWriter::adjustFixupOffset(const MCFixup &Fixup, - uint64_t &RelocOffset) { +// ELF doesn't require relocations to be in any order. We sort by the r_offset, +// just to match gnu as for easier comparison. The use type and index is an +// arbitrary way of making the sort deterministic. +static int cmpRel(const ELFRelocationEntry *AP, const ELFRelocationEntry *BP) { + const ELFRelocationEntry &A = *AP; + const ELFRelocationEntry &B = *BP; + if (A.r_offset != B.r_offset) + return B.r_offset - A.r_offset; + if (B.Type != A.Type) + return A.Type - B.Type; + if (B.Index != A.Index) + return B.Index - A.Index; + llvm_unreachable("ELFRelocs might be unstable!"); } void MCELFObjectTargetWriter::sortRelocs(const MCAssembler &Asm, std::vector &Relocs) { - // Sort by the r_offset, just like gnu as does. - array_pod_sort(Relocs.begin(), Relocs.end()); + array_pod_sort(Relocs.begin(), Relocs.end(), cmpRel); } diff --git a/contrib/llvm/lib/MC/MCELFStreamer.cpp b/contrib/llvm/lib/MC/MCELFStreamer.cpp index 116f86f..e806cb9 100644 --- a/contrib/llvm/lib/MC/MCELFStreamer.cpp +++ b/contrib/llvm/lib/MC/MCELFStreamer.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" #include "llvm/MC/MCAssembler.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCCodeEmitter.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCELF.h" @@ -96,6 +97,9 @@ void MCELFStreamer::EmitDebugLabel(MCSymbol *Symbol) { } void MCELFStreamer::EmitAssemblerFlag(MCAssemblerFlag Flag) { + // Let the target do whatever target specific stuff it needs to do. + getAssembler().getBackend().handleAssemblerFlag(Flag); + // Do any generic stuff we need to do. switch (Flag) { case MCAF_SyntaxUnified: return; // no-op here. case MCAF_Code16: return; // Change parsing mode; no-op here. @@ -148,8 +152,8 @@ static unsigned CombineSymbolTypes(unsigned T1, unsigned T2) { return T2; } -void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, - MCSymbolAttr Attribute) { +bool MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, + MCSymbolAttr Attribute) { // Indirect symbols are handled differently, to match how 'as' handles // them. This makes writing matching .o files easier. if (Attribute == MCSA_IndirectSymbol) { @@ -159,7 +163,7 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, ISD.Symbol = Symbol; ISD.SectionData = getCurrentSectionData(); getAssembler().getIndirectSymbols().push_back(ISD); - return; + return true; } // Adding a symbol attribute always introduces the symbol, note that an @@ -182,7 +186,7 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_WeakDefAutoPrivate: case MCSA_Invalid: case MCSA_IndirectSymbol: - llvm_unreachable("Invalid symbol attribute for ELF!"); + return false; case MCSA_NoDeadStrip: case MCSA_ELF_TypeGnuUniqueObject: @@ -251,6 +255,8 @@ void MCELFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCELF::SetVisibility(SD, ELF::STV_INTERNAL); break; } + + return true; } void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, @@ -270,7 +276,8 @@ void MCELFStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, ELF::SHF_WRITE | ELF::SHF_ALLOC, SectionKind::getBSS()); - Symbol->setSection(*Section); + + AssignSection(Symbol, Section); struct LocalCommon L = {&SD, Size, ByteAlignment}; LocalCommons.push_back(L); @@ -296,12 +303,11 @@ void MCELFStreamer::EmitLocalCommonSymbol(MCSymbol *Symbol, uint64_t Size, EmitCommonSymbol(Symbol, Size, ByteAlignment); } -void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { +void MCELFStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size) { if (getCurrentSectionData()->isBundleLocked()) report_fatal_error("Emitting values inside a locked bundle is forbidden"); fixSymbolsInTLSFixups(Value); - MCObjectStreamer::EmitValueImpl(Value, Size, AddrSpace); + MCObjectStreamer::EmitValueImpl(Value, Size); } void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment, @@ -314,20 +320,29 @@ void MCELFStreamer::EmitValueToAlignment(unsigned ByteAlignment, ValueSize, MaxBytesToEmit); } - -// Add a symbol for the file name of this module. This is the second -// entry in the module's symbol table (the first being the null symbol). +// Add a symbol for the file name of this module. They start after the +// null symbol and don't count as normal symbol, i.e. a non-STT_FILE symbol +// with the same name may appear. void MCELFStreamer::EmitFileDirective(StringRef Filename) { - MCSymbol *Symbol = getAssembler().getContext().GetOrCreateSymbol(Filename); - Symbol->setSection(*getCurrentSection().first); - Symbol->setAbsolute(); - - MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); - - SD.setFlags(ELF_STT_File | ELF_STB_Local | ELF_STV_Default); + getAssembler().addFileName(Filename); +} + +void MCELFStreamer::EmitIdent(StringRef IdentString) { + const MCSection *Comment = getAssembler().getContext().getELFSection( + ".comment", ELF::SHT_PROGBITS, ELF::SHF_MERGE | ELF::SHF_STRINGS, + SectionKind::getReadOnly(), 1, ""); + PushSection(); + SwitchSection(Comment); + if (!SeenIdent) { + EmitIntValue(0, 1); + SeenIdent = true; + } + EmitBytes(IdentString); + EmitIntValue(0, 1); + PopSection(); } -void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { +void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { switch (expr->getKind()) { case MCExpr::Target: cast(expr)->fixELFSymbolsInTLSFixups(getAssembler()); @@ -363,18 +378,41 @@ void MCELFStreamer::fixSymbolsInTLSFixups(const MCExpr *expr) { case MCSymbolRefExpr::VK_Mips_GOTTPREL: case MCSymbolRefExpr::VK_Mips_TPREL_HI: case MCSymbolRefExpr::VK_Mips_TPREL_LO: - case MCSymbolRefExpr::VK_PPC_TPREL16_HA: - case MCSymbolRefExpr::VK_PPC_TPREL16_LO: - case MCSymbolRefExpr::VK_PPC_DTPREL16_HA: - case MCSymbolRefExpr::VK_PPC_DTPREL16_LO: - case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_HA: - case MCSymbolRefExpr::VK_PPC_GOT_TPREL16_LO: + case MCSymbolRefExpr::VK_PPC_DTPMOD: + case MCSymbolRefExpr::VK_PPC_TPREL: + case MCSymbolRefExpr::VK_PPC_TPREL_LO: + case MCSymbolRefExpr::VK_PPC_TPREL_HI: + case MCSymbolRefExpr::VK_PPC_TPREL_HA: + case MCSymbolRefExpr::VK_PPC_TPREL_HIGHER: + case MCSymbolRefExpr::VK_PPC_TPREL_HIGHERA: + case MCSymbolRefExpr::VK_PPC_TPREL_HIGHEST: + case MCSymbolRefExpr::VK_PPC_TPREL_HIGHESTA: + case MCSymbolRefExpr::VK_PPC_DTPREL: + case MCSymbolRefExpr::VK_PPC_DTPREL_LO: + case MCSymbolRefExpr::VK_PPC_DTPREL_HI: + case MCSymbolRefExpr::VK_PPC_DTPREL_HA: + case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHER: + case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHERA: + case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHEST: + case MCSymbolRefExpr::VK_PPC_DTPREL_HIGHESTA: + case MCSymbolRefExpr::VK_PPC_GOT_TPREL: + case MCSymbolRefExpr::VK_PPC_GOT_TPREL_LO: + case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HI: + case MCSymbolRefExpr::VK_PPC_GOT_TPREL_HA: + case MCSymbolRefExpr::VK_PPC_GOT_DTPREL: + case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_LO: + case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HI: + case MCSymbolRefExpr::VK_PPC_GOT_DTPREL_HA: case MCSymbolRefExpr::VK_PPC_TLS: - case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_HA: - case MCSymbolRefExpr::VK_PPC_GOT_TLSGD16_LO: + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD: + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_LO: + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HI: + case MCSymbolRefExpr::VK_PPC_GOT_TLSGD_HA: case MCSymbolRefExpr::VK_PPC_TLSGD: - case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_HA: - case MCSymbolRefExpr::VK_PPC_GOT_TLSLD16_LO: + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD: + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_LO: + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HI: + case MCSymbolRefExpr::VK_PPC_GOT_TLSLD_HA: case MCSymbolRefExpr::VK_PPC_TLSLD: break; } @@ -503,9 +541,7 @@ void MCELFStreamer::EmitBundleUnlock() { SD->setBundleLockState(MCSectionData::NotBundleLocked); } -void MCELFStreamer::FinishImpl() { - EmitFrames(true); - +void MCELFStreamer::Flush() { for (std::vector::const_iterator i = LocalCommons.begin(), e = LocalCommons.end(); i != e; ++i) { @@ -526,17 +562,23 @@ void MCELFStreamer::FinishImpl() { SectData.setAlignment(ByteAlignment); } - this->MCObjectStreamer::FinishImpl(); + LocalCommons.clear(); } -void MCELFStreamer::EmitTCEntry(const MCSymbol &S) { - // Creates a R_PPC64_TOC relocation - MCObjectStreamer::EmitSymbolValue(&S, 8); + +void MCELFStreamer::FinishImpl() { + EmitFrames(NULL, true); + + Flush(); + + this->MCObjectStreamer::FinishImpl(); } -MCStreamer *llvm::createELFStreamer(MCContext &Context, MCAsmBackend &MAB, - raw_ostream &OS, MCCodeEmitter *CE, - bool RelaxAll, bool NoExecStack) { - MCELFStreamer *S = new MCELFStreamer(Context, MAB, OS, CE); +MCStreamer *llvm::createELFStreamer(MCContext &Context, + MCTargetStreamer *Streamer, + MCAsmBackend &MAB, raw_ostream &OS, + MCCodeEmitter *CE, bool RelaxAll, + bool NoExecStack) { + MCELFStreamer *S = new MCELFStreamer(Context, Streamer, MAB, OS, CE); if (RelaxAll) S->getAssembler().setRelaxAll(true); if (NoExecStack) diff --git a/contrib/llvm/lib/MC/MCExpr.cpp b/contrib/llvm/lib/MC/MCExpr.cpp index 06bc72f..c777e64 100644 --- a/contrib/llvm/lib/MC/MCExpr.cpp +++ b/contrib/llvm/lib/MC/MCExpr.cpp @@ -42,13 +42,6 @@ void MCExpr::print(raw_ostream &OS) const { // Parenthesize names that start with $ so that they don't look like // absolute names. bool UseParens = Sym.getName()[0] == '$'; - - if (SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_HA16 || - SRE.getKind() == MCSymbolRefExpr::VK_PPC_DARWIN_LO16) { - OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); - UseParens = true; - } - if (UseParens) OS << '(' << Sym << ')'; else @@ -65,9 +58,7 @@ void MCExpr::print(raw_ostream &OS) const { SRE.getKind() == MCSymbolRefExpr::VK_ARM_TARGET2 || SRE.getKind() == MCSymbolRefExpr::VK_ARM_PREL31) OS << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); - else if (SRE.getKind() != MCSymbolRefExpr::VK_None && - SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_HA16 && - SRE.getKind() != MCSymbolRefExpr::VK_PPC_DARWIN_LO16) + else if (SRE.getKind() != MCSymbolRefExpr::VK_None) OS << '@' << MCSymbolRefExpr::getVariantKindName(SRE.getKind()); return; @@ -205,26 +196,56 @@ StringRef MCSymbolRefExpr::getVariantKindName(VariantKind Kind) { case VK_ARM_TARGET1: return "(target1)"; case VK_ARM_TARGET2: return "(target2)"; case VK_ARM_PREL31: return "(prel31)"; - case VK_PPC_TOC: return "tocbase"; - case VK_PPC_TOC_ENTRY: return "toc"; - case VK_PPC_DARWIN_HA16: return "ha16"; - case VK_PPC_DARWIN_LO16: return "lo16"; - case VK_PPC_GAS_HA16: return "ha"; - case VK_PPC_GAS_LO16: return "l"; - case VK_PPC_TPREL16_HA: return "tprel@ha"; - case VK_PPC_TPREL16_LO: return "tprel@l"; - case VK_PPC_DTPREL16_HA: return "dtprel@ha"; - case VK_PPC_DTPREL16_LO: return "dtprel@l"; - case VK_PPC_TOC16_HA: return "toc@ha"; - case VK_PPC_TOC16_LO: return "toc@l"; - case VK_PPC_GOT_TPREL16_HA: return "got@tprel@ha"; - case VK_PPC_GOT_TPREL16_LO: return "got@tprel@l"; + case VK_PPC_LO: return "l"; + case VK_PPC_HI: return "h"; + case VK_PPC_HA: return "ha"; + case VK_PPC_HIGHER: return "higher"; + case VK_PPC_HIGHERA: return "highera"; + case VK_PPC_HIGHEST: return "highest"; + case VK_PPC_HIGHESTA: return "highesta"; + case VK_PPC_GOT_LO: return "got@l"; + case VK_PPC_GOT_HI: return "got@h"; + case VK_PPC_GOT_HA: return "got@ha"; + case VK_PPC_TOCBASE: return "tocbase"; + case VK_PPC_TOC: return "toc"; + case VK_PPC_TOC_LO: return "toc@l"; + case VK_PPC_TOC_HI: return "toc@h"; + case VK_PPC_TOC_HA: return "toc@ha"; + case VK_PPC_DTPMOD: return "dtpmod"; + case VK_PPC_TPREL: return "tprel"; + case VK_PPC_TPREL_LO: return "tprel@l"; + case VK_PPC_TPREL_HI: return "tprel@h"; + case VK_PPC_TPREL_HA: return "tprel@ha"; + case VK_PPC_TPREL_HIGHER: return "tprel@higher"; + case VK_PPC_TPREL_HIGHERA: return "tprel@highera"; + case VK_PPC_TPREL_HIGHEST: return "tprel@highest"; + case VK_PPC_TPREL_HIGHESTA: return "tprel@highesta"; + case VK_PPC_DTPREL: return "dtprel"; + case VK_PPC_DTPREL_LO: return "dtprel@l"; + case VK_PPC_DTPREL_HI: return "dtprel@h"; + case VK_PPC_DTPREL_HA: return "dtprel@ha"; + case VK_PPC_DTPREL_HIGHER: return "dtprel@higher"; + case VK_PPC_DTPREL_HIGHERA: return "dtprel@highera"; + case VK_PPC_DTPREL_HIGHEST: return "dtprel@highest"; + case VK_PPC_DTPREL_HIGHESTA: return "dtprel@highesta"; + case VK_PPC_GOT_TPREL: return "got@tprel"; + case VK_PPC_GOT_TPREL_LO: return "got@tprel@l"; + case VK_PPC_GOT_TPREL_HI: return "got@tprel@h"; + case VK_PPC_GOT_TPREL_HA: return "got@tprel@ha"; + case VK_PPC_GOT_DTPREL: return "got@dtprel"; + case VK_PPC_GOT_DTPREL_LO: return "got@dtprel@l"; + case VK_PPC_GOT_DTPREL_HI: return "got@dtprel@h"; + case VK_PPC_GOT_DTPREL_HA: return "got@dtprel@ha"; case VK_PPC_TLS: return "tls"; - case VK_PPC_GOT_TLSGD16_HA: return "got@tlsgd@ha"; - case VK_PPC_GOT_TLSGD16_LO: return "got@tlsgd@l"; - case VK_PPC_GOT_TLSLD16_HA: return "got@tlsld@ha"; - case VK_PPC_GOT_TLSLD16_LO: return "got@tlsld@l"; + case VK_PPC_GOT_TLSGD: return "got@tlsgd"; + case VK_PPC_GOT_TLSGD_LO: return "got@tlsgd@l"; + case VK_PPC_GOT_TLSGD_HI: return "got@tlsgd@h"; + case VK_PPC_GOT_TLSGD_HA: return "got@tlsgd@ha"; case VK_PPC_TLSGD: return "tlsgd"; + case VK_PPC_GOT_TLSLD: return "got@tlsld"; + case VK_PPC_GOT_TLSLD_LO: return "got@tlsld@l"; + case VK_PPC_GOT_TLSLD_HI: return "got@tlsld@h"; + case VK_PPC_GOT_TLSLD_HA: return "got@tlsld@ha"; case VK_PPC_TLSLD: return "tlsld"; case VK_Mips_GPREL: return "GPREL"; case VK_Mips_GOT_CALL: return "GOT_CALL"; @@ -290,40 +311,104 @@ MCSymbolRefExpr::getVariantKindForName(StringRef Name) { .Case("imgrel", VK_COFF_IMGREL32) .Case("SECREL32", VK_SECREL) .Case("secrel32", VK_SECREL) - .Case("HA", VK_PPC_GAS_HA16) - .Case("ha", VK_PPC_GAS_HA16) - .Case("L", VK_PPC_GAS_LO16) - .Case("l", VK_PPC_GAS_LO16) - .Case("TOCBASE", VK_PPC_TOC) - .Case("tocbase", VK_PPC_TOC) - .Case("TOC", VK_PPC_TOC_ENTRY) - .Case("toc", VK_PPC_TOC_ENTRY) - .Case("TOC@HA", VK_PPC_TOC16_HA) - .Case("toc@ha", VK_PPC_TOC16_HA) - .Case("TOC@L", VK_PPC_TOC16_LO) - .Case("toc@l", VK_PPC_TOC16_LO) + .Case("L", VK_PPC_LO) + .Case("l", VK_PPC_LO) + .Case("H", VK_PPC_HI) + .Case("h", VK_PPC_HI) + .Case("HA", VK_PPC_HA) + .Case("ha", VK_PPC_HA) + .Case("HIGHER", VK_PPC_HIGHER) + .Case("higher", VK_PPC_HIGHER) + .Case("HIGHERA", VK_PPC_HIGHERA) + .Case("highera", VK_PPC_HIGHERA) + .Case("HIGHEST", VK_PPC_HIGHEST) + .Case("highest", VK_PPC_HIGHEST) + .Case("HIGHESTA", VK_PPC_HIGHESTA) + .Case("highesta", VK_PPC_HIGHESTA) + .Case("GOT@L", VK_PPC_GOT_LO) + .Case("got@l", VK_PPC_GOT_LO) + .Case("GOT@H", VK_PPC_GOT_HI) + .Case("got@h", VK_PPC_GOT_HI) + .Case("GOT@HA", VK_PPC_GOT_HA) + .Case("got@ha", VK_PPC_GOT_HA) + .Case("TOCBASE", VK_PPC_TOCBASE) + .Case("tocbase", VK_PPC_TOCBASE) + .Case("TOC", VK_PPC_TOC) + .Case("toc", VK_PPC_TOC) + .Case("TOC@L", VK_PPC_TOC_LO) + .Case("toc@l", VK_PPC_TOC_LO) + .Case("TOC@H", VK_PPC_TOC_HI) + .Case("toc@h", VK_PPC_TOC_HI) + .Case("TOC@HA", VK_PPC_TOC_HA) + .Case("toc@ha", VK_PPC_TOC_HA) .Case("TLS", VK_PPC_TLS) .Case("tls", VK_PPC_TLS) - .Case("TPREL@HA", VK_PPC_TPREL16_HA) - .Case("tprel@ha", VK_PPC_TPREL16_HA) - .Case("TPREL@L", VK_PPC_TPREL16_LO) - .Case("tprel@l", VK_PPC_TPREL16_LO) - .Case("DTPREL@HA", VK_PPC_DTPREL16_HA) - .Case("dtprel@ha", VK_PPC_DTPREL16_HA) - .Case("DTPREL@L", VK_PPC_DTPREL16_LO) - .Case("dtprel@l", VK_PPC_DTPREL16_LO) - .Case("GOT@TPREL@HA", VK_PPC_GOT_TPREL16_HA) - .Case("got@tprel@ha", VK_PPC_GOT_TPREL16_HA) - .Case("GOT@TPREL@L", VK_PPC_GOT_TPREL16_LO) - .Case("got@tprel@l", VK_PPC_GOT_TPREL16_LO) - .Case("GOT@TLSGD@HA", VK_PPC_GOT_TLSGD16_HA) - .Case("got@tlsgd@ha", VK_PPC_GOT_TLSGD16_HA) - .Case("GOT@TLSGD@L", VK_PPC_GOT_TLSGD16_LO) - .Case("got@tlsgd@l", VK_PPC_GOT_TLSGD16_LO) - .Case("GOT@TLSLD@HA", VK_PPC_GOT_TLSLD16_HA) - .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD16_HA) - .Case("GOT@TLSLD@L", VK_PPC_GOT_TLSLD16_LO) - .Case("got@tlsld@l", VK_PPC_GOT_TLSLD16_LO) + .Case("DTPMOD", VK_PPC_DTPMOD) + .Case("dtpmod", VK_PPC_DTPMOD) + .Case("TPREL", VK_PPC_TPREL) + .Case("tprel", VK_PPC_TPREL) + .Case("TPREL@L", VK_PPC_TPREL_LO) + .Case("tprel@l", VK_PPC_TPREL_LO) + .Case("TPREL@H", VK_PPC_TPREL_HI) + .Case("tprel@h", VK_PPC_TPREL_HI) + .Case("TPREL@HA", VK_PPC_TPREL_HA) + .Case("tprel@ha", VK_PPC_TPREL_HA) + .Case("TPREL@HIGHER", VK_PPC_TPREL_HIGHER) + .Case("tprel@higher", VK_PPC_TPREL_HIGHER) + .Case("TPREL@HIGHERA", VK_PPC_TPREL_HIGHERA) + .Case("tprel@highera", VK_PPC_TPREL_HIGHERA) + .Case("TPREL@HIGHEST", VK_PPC_TPREL_HIGHEST) + .Case("tprel@highest", VK_PPC_TPREL_HIGHEST) + .Case("TPREL@HIGHESTA", VK_PPC_TPREL_HIGHESTA) + .Case("tprel@highesta", VK_PPC_TPREL_HIGHESTA) + .Case("DTPREL", VK_PPC_DTPREL) + .Case("dtprel", VK_PPC_DTPREL) + .Case("DTPREL@L", VK_PPC_DTPREL_LO) + .Case("dtprel@l", VK_PPC_DTPREL_LO) + .Case("DTPREL@H", VK_PPC_DTPREL_HI) + .Case("dtprel@h", VK_PPC_DTPREL_HI) + .Case("DTPREL@HA", VK_PPC_DTPREL_HA) + .Case("dtprel@ha", VK_PPC_DTPREL_HA) + .Case("DTPREL@HIGHER", VK_PPC_DTPREL_HIGHER) + .Case("dtprel@higher", VK_PPC_DTPREL_HIGHER) + .Case("DTPREL@HIGHERA", VK_PPC_DTPREL_HIGHERA) + .Case("dtprel@highera", VK_PPC_DTPREL_HIGHERA) + .Case("DTPREL@HIGHEST", VK_PPC_DTPREL_HIGHEST) + .Case("dtprel@highest", VK_PPC_DTPREL_HIGHEST) + .Case("DTPREL@HIGHESTA", VK_PPC_DTPREL_HIGHESTA) + .Case("dtprel@highesta", VK_PPC_DTPREL_HIGHESTA) + .Case("GOT@TPREL", VK_PPC_GOT_TPREL) + .Case("got@tprel", VK_PPC_GOT_TPREL) + .Case("GOT@TPREL@L", VK_PPC_GOT_TPREL_LO) + .Case("got@tprel@l", VK_PPC_GOT_TPREL_LO) + .Case("GOT@TPREL@H", VK_PPC_GOT_TPREL_HI) + .Case("got@tprel@h", VK_PPC_GOT_TPREL_HI) + .Case("GOT@TPREL@HA", VK_PPC_GOT_TPREL_HA) + .Case("got@tprel@ha", VK_PPC_GOT_TPREL_HA) + .Case("GOT@DTPREL", VK_PPC_GOT_DTPREL) + .Case("got@dtprel", VK_PPC_GOT_DTPREL) + .Case("GOT@DTPREL@L", VK_PPC_GOT_DTPREL_LO) + .Case("got@dtprel@l", VK_PPC_GOT_DTPREL_LO) + .Case("GOT@DTPREL@H", VK_PPC_GOT_DTPREL_HI) + .Case("got@dtprel@h", VK_PPC_GOT_DTPREL_HI) + .Case("GOT@DTPREL@HA", VK_PPC_GOT_DTPREL_HA) + .Case("got@dtprel@ha", VK_PPC_GOT_DTPREL_HA) + .Case("GOT@TLSGD", VK_PPC_GOT_TLSGD) + .Case("got@tlsgd", VK_PPC_GOT_TLSGD) + .Case("GOT@TLSGD@L", VK_PPC_GOT_TLSGD_LO) + .Case("got@tlsgd@l", VK_PPC_GOT_TLSGD_LO) + .Case("GOT@TLSGD@H", VK_PPC_GOT_TLSGD_HI) + .Case("got@tlsgd@h", VK_PPC_GOT_TLSGD_HI) + .Case("GOT@TLSGD@HA", VK_PPC_GOT_TLSGD_HA) + .Case("got@tlsgd@ha", VK_PPC_GOT_TLSGD_HA) + .Case("GOT@TLSLD", VK_PPC_GOT_TLSLD) + .Case("got@tlsld", VK_PPC_GOT_TLSLD) + .Case("GOT@TLSLD@L", VK_PPC_GOT_TLSLD_LO) + .Case("got@tlsld@l", VK_PPC_GOT_TLSLD_LO) + .Case("GOT@TLSLD@H", VK_PPC_GOT_TLSLD_HI) + .Case("got@tlsld@h", VK_PPC_GOT_TLSLD_HI) + .Case("GOT@TLSLD@HA", VK_PPC_GOT_TLSLD_HA) + .Case("got@tlsld@ha", VK_PPC_GOT_TLSLD_HA) .Default(VK_Invalid); } diff --git a/contrib/llvm/lib/MC/MCExternalSymbolizer.cpp b/contrib/llvm/lib/MC/MCExternalSymbolizer.cpp new file mode 100644 index 0000000..ca368b2 --- /dev/null +++ b/contrib/llvm/lib/MC/MCExternalSymbolizer.cpp @@ -0,0 +1,181 @@ +//===-- lib/MC/MCExternalSymbolizer.cpp - External symbolizer ---*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCExternalSymbolizer.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; + +// This function tries to add a symbolic operand in place of the immediate +// Value in the MCInst. The immediate Value has had any PC adjustment made by +// the caller. If the instruction is a branch instruction then IsBranch is true, +// else false. If the getOpInfo() function was set as part of the +// setupForSymbolicDisassembly() call then that function is called to get any +// symbolic information at the Address for this instruction. If that returns +// non-zero then the symbolic information it returns is used to create an MCExpr +// and that is added as an operand to the MCInst. If getOpInfo() returns zero +// and IsBranch is true then a symbol look up for Value is done and if a symbol +// is found an MCExpr is created with that, else an MCExpr with Value is +// created. This function returns true if it adds an operand to the MCInst and +// false otherwise. +bool MCExternalSymbolizer::tryAddingSymbolicOperand(MCInst &MI, + raw_ostream &cStream, + int64_t Value, + uint64_t Address, + bool IsBranch, + uint64_t Offset, + uint64_t InstSize) { + struct LLVMOpInfo1 SymbolicOp; + std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + SymbolicOp.Value = Value; + + if (!GetOpInfo || + !GetOpInfo(DisInfo, Address, Offset, InstSize, 1, &SymbolicOp)) { + // Clear SymbolicOp.Value from above and also all other fields. + std::memset(&SymbolicOp, '\0', sizeof(struct LLVMOpInfo1)); + if (!SymbolLookUp) + return false; + uint64_t ReferenceType; + if (IsBranch) + ReferenceType = LLVMDisassembler_ReferenceType_In_Branch; + else + ReferenceType = LLVMDisassembler_ReferenceType_InOut_None; + const char *ReferenceName; + const char *Name = SymbolLookUp(DisInfo, Value, &ReferenceType, Address, + &ReferenceName); + if (Name) { + SymbolicOp.AddSymbol.Name = Name; + SymbolicOp.AddSymbol.Present = true; + } + // For branches always create an MCExpr so it gets printed as hex address. + else if (IsBranch) { + SymbolicOp.Value = Value; + } + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_SymbolStub) + cStream << "symbol stub for: " << ReferenceName; + else if(ReferenceType == LLVMDisassembler_ReferenceType_Out_Objc_Message) + cStream << "Objc message: " << ReferenceName; + if (!Name && !IsBranch) + return false; + } + + const MCExpr *Add = NULL; + if (SymbolicOp.AddSymbol.Present) { + if (SymbolicOp.AddSymbol.Name) { + StringRef Name(SymbolicOp.AddSymbol.Name); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + Add = MCSymbolRefExpr::Create(Sym, Ctx); + } else { + Add = MCConstantExpr::Create((int)SymbolicOp.AddSymbol.Value, Ctx); + } + } + + const MCExpr *Sub = NULL; + if (SymbolicOp.SubtractSymbol.Present) { + if (SymbolicOp.SubtractSymbol.Name) { + StringRef Name(SymbolicOp.SubtractSymbol.Name); + MCSymbol *Sym = Ctx.GetOrCreateSymbol(Name); + Sub = MCSymbolRefExpr::Create(Sym, Ctx); + } else { + Sub = MCConstantExpr::Create((int)SymbolicOp.SubtractSymbol.Value, Ctx); + } + } + + const MCExpr *Off = NULL; + if (SymbolicOp.Value != 0) + Off = MCConstantExpr::Create(SymbolicOp.Value, Ctx); + + const MCExpr *Expr; + if (Sub) { + const MCExpr *LHS; + if (Add) + LHS = MCBinaryExpr::CreateSub(Add, Sub, Ctx); + else + LHS = MCUnaryExpr::CreateMinus(Sub, Ctx); + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(LHS, Off, Ctx); + else + Expr = LHS; + } else if (Add) { + if (Off != 0) + Expr = MCBinaryExpr::CreateAdd(Add, Off, Ctx); + else + Expr = Add; + } else { + if (Off != 0) + Expr = Off; + else + Expr = MCConstantExpr::Create(0, Ctx); + } + + Expr = RelInfo->createExprForCAPIVariantKind(Expr, SymbolicOp.VariantKind); + if (!Expr) + return false; + + MI.addOperand(MCOperand::CreateExpr(Expr)); + return true; +} + +// This function tries to add a comment as to what is being referenced by a load +// instruction with the base register that is the Pc. These can often be values +// in a literal pool near the Address of the instruction. The Address of the +// instruction and its immediate Value are used as a possible literal pool entry. +// The SymbolLookUp call back will return the name of a symbol referenced by the +// literal pool's entry if the referenced address is that of a symbol. Or it +// will return a pointer to a literal 'C' string if the referenced address of +// the literal pool's entry is an address into a section with C string literals. +// Or if the reference is to an Objective-C data structure it will return a +// specific reference type for it and a string. +void MCExternalSymbolizer::tryAddingPcLoadReferenceComment(raw_ostream &cStream, + int64_t Value, + uint64_t Address) { + if (SymbolLookUp) { + uint64_t ReferenceType = LLVMDisassembler_ReferenceType_In_PCrel_Load; + const char *ReferenceName; + (void)SymbolLookUp(DisInfo, Value, &ReferenceType, Address, &ReferenceName); + if(ReferenceType == LLVMDisassembler_ReferenceType_Out_LitPool_SymAddr) + cStream << "literal pool symbol address: " << ReferenceName; + else if(ReferenceType == + LLVMDisassembler_ReferenceType_Out_LitPool_CstrAddr) + cStream << "literal pool for: \"" << ReferenceName << "\""; + else if(ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_CFString_Ref) + cStream << "Objc cfstring ref: @\"" << ReferenceName << "\""; + else if(ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Message) + cStream << "Objc message: " << ReferenceName; + else if(ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Message_Ref) + cStream << "Objc message ref: " << ReferenceName; + else if(ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Selector_Ref) + cStream << "Objc selector ref: " << ReferenceName; + else if(ReferenceType == + LLVMDisassembler_ReferenceType_Out_Objc_Class_Ref) + cStream << "Objc class ref: " << ReferenceName; + } +} + +namespace llvm { +MCSymbolizer *createMCSymbolizer(StringRef TT, LLVMOpInfoCallback GetOpInfo, + LLVMSymbolLookupCallback SymbolLookUp, + void *DisInfo, + MCContext *Ctx, + MCRelocationInfo *RelInfo) { + assert(Ctx != 0 && "No MCContext given for symbolic disassembly"); + + OwningPtr RelInfoOwingPtr(RelInfo); + return new MCExternalSymbolizer(*Ctx, RelInfoOwingPtr, GetOpInfo, + SymbolLookUp, DisInfo); +} +} diff --git a/contrib/llvm/lib/MC/MCFunction.cpp b/contrib/llvm/lib/MC/MCFunction.cpp new file mode 100644 index 0000000..767e1e0 --- /dev/null +++ b/contrib/llvm/lib/MC/MCFunction.cpp @@ -0,0 +1,81 @@ +//===-- lib/MC/MCFunction.cpp -----------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCFunction.h" +#include "llvm/MC/MCAtom.h" +#include "llvm/MC/MCModule.h" +#include + +using namespace llvm; + +// MCFunction + +MCFunction::MCFunction(StringRef Name, MCModule *Parent) + : Name(Name), ParentModule(Parent) +{} + +MCFunction::~MCFunction() { + for (iterator I = begin(), E = end(); I != E; ++I) + delete *I; +} + +MCBasicBlock &MCFunction::createBlock(const MCTextAtom &TA) { + MCBasicBlock *MCBB = new MCBasicBlock(TA, this); + Blocks.push_back(MCBB); + return *MCBB; +} + +MCBasicBlock *MCFunction::find(uint64_t StartAddr) { + for (const_iterator I = begin(), E = end(); I != E; ++I) + if ((*I)->getInsts()->getBeginAddr() == StartAddr) + return *I; + return 0; +} + +const MCBasicBlock *MCFunction::find(uint64_t StartAddr) const { + return const_cast(this)->find(StartAddr); +} + +// MCBasicBlock + +MCBasicBlock::MCBasicBlock(const MCTextAtom &Insts, MCFunction *Parent) + : Insts(&Insts), Parent(Parent) { + getParent()->getParent()->trackBBForAtom(&Insts, this); +} + +void MCBasicBlock::addSuccessor(const MCBasicBlock *MCBB) { + if (!isSuccessor(MCBB)) + Successors.push_back(MCBB); +} + +bool MCBasicBlock::isSuccessor(const MCBasicBlock *MCBB) const { + return std::find(Successors.begin(), Successors.end(), + MCBB) != Successors.end(); +} + +void MCBasicBlock::addPredecessor(const MCBasicBlock *MCBB) { + if (!isPredecessor(MCBB)) + Predecessors.push_back(MCBB); +} + +bool MCBasicBlock::isPredecessor(const MCBasicBlock *MCBB) const { + return std::find(Predecessors.begin(), Predecessors.end(), + MCBB) != Predecessors.end(); +} + +void MCBasicBlock::splitBasicBlock(MCBasicBlock *SplitBB) { + assert(Insts->getEndAddr() + 1 == SplitBB->Insts->getBeginAddr() && + "Splitting unrelated basic blocks!"); + SplitBB->addPredecessor(this); + assert(SplitBB->Successors.empty() && + "Split basic block shouldn't already have successors!"); + SplitBB->Successors = Successors; + Successors.clear(); + addSuccessor(SplitBB); +} diff --git a/contrib/llvm/lib/MC/MCInstPrinter.cpp b/contrib/llvm/lib/MC/MCInstPrinter.cpp index c729d49..ba71245 100644 --- a/contrib/llvm/lib/MC/MCInstPrinter.cpp +++ b/contrib/llvm/lib/MC/MCInstPrinter.cpp @@ -31,9 +31,13 @@ void MCInstPrinter::printRegName(raw_ostream &OS, unsigned RegNo) const { void MCInstPrinter::printAnnotation(raw_ostream &OS, StringRef Annot) { if (!Annot.empty()) { - if (CommentStream) + if (CommentStream) { (*CommentStream) << Annot; - else + // By definition (see MCInstPrinter.h), CommentStream must end with + // a newline after each comment. + if (Annot.back() != '\n') + (*CommentStream) << '\n'; + } else OS << " " << MAI.getCommentString() << " " << Annot; } } @@ -89,6 +93,7 @@ format_object1 MCInstPrinter::formatHex(const int64_t Value) const { return format("%" PRIx64 "h", Value); } } + llvm_unreachable("unsupported print style"); } format_object1 MCInstPrinter::formatHex(const uint64_t Value) const { @@ -101,4 +106,5 @@ format_object1 MCInstPrinter::formatHex(const uint64_t Value) const { else return format("%" PRIx64 "h", Value); } + llvm_unreachable("unsupported print style"); } diff --git a/contrib/llvm/lib/MC/MCInstrAnalysis.cpp b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp index 7736702..2d8336d 100644 --- a/contrib/llvm/lib/MC/MCInstrAnalysis.cpp +++ b/contrib/llvm/lib/MC/MCInstrAnalysis.cpp @@ -10,12 +10,13 @@ #include "llvm/MC/MCInstrAnalysis.h" using namespace llvm; -uint64_t MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size) const { +bool MCInstrAnalysis::evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target) const { if (Inst.getNumOperands() == 0 || Info->get(Inst.getOpcode()).OpInfo[0].OperandType != MCOI::OPERAND_PCREL) - return -1ULL; + return false; int64_t Imm = Inst.getOperand(0).getImm(); - return Addr+Size+Imm; + Target = Addr+Size+Imm; + return true; } diff --git a/contrib/llvm/lib/MC/MCMachOStreamer.cpp b/contrib/llvm/lib/MC/MCMachOStreamer.cpp index e08b01b..2924dcd 100644 --- a/contrib/llvm/lib/MC/MCMachOStreamer.cpp +++ b/contrib/llvm/lib/MC/MCMachOStreamer.cpp @@ -1,3 +1,4 @@ +//===-- MCMachOStreamer.cpp - MachO Streamer ------------------------------===// // // The LLVM Compiler Infrastructure // @@ -36,7 +37,7 @@ private: public: MCMachOStreamer(MCContext &Context, MCAsmBackend &MAB, raw_ostream &OS, MCCodeEmitter *Emitter) - : MCObjectStreamer(SK_MachOStreamer, Context, MAB, OS, Emitter) {} + : MCObjectStreamer(Context, 0, MAB, OS, Emitter) {} /// @name MCStreamer Interface /// @{ @@ -51,7 +52,7 @@ public: virtual void EmitLinkerOptions(ArrayRef Options); virtual void EmitDataRegion(MCDataRegionType Kind); virtual void EmitThumbFunc(MCSymbol *Func); - virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); virtual void EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); @@ -81,16 +82,14 @@ public: // FIXME: Just ignore the .file; it isn't important enough to fail the // entire assembly. - //report_fatal_error("unsupported directive: '.file'"); + // report_fatal_error("unsupported directive: '.file'"); } - virtual void FinishImpl(); - - /// @} - - static bool classof(const MCStreamer *S) { - return S->getKind() == SK_MachOStreamer; + virtual void EmitIdent(StringRef IdentString) { + llvm_unreachable("macho doesn't support this directive"); } + + virtual void FinishImpl(); }; } // end anonymous namespace. @@ -122,7 +121,7 @@ void MCMachOStreamer::EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); // isSymbolLinkerVisible uses the section. - Symbol->setSection(*getCurrentSection().first); + AssignSection(Symbol, getCurrentSection().first); // We have to create a new fragment if this is an atom defining symbol, // fragments cannot span atoms. if (getAssembler().isSymbolLinkerVisible(*Symbol)) @@ -217,7 +216,7 @@ void MCMachOStreamer::EmitThumbFunc(MCSymbol *Symbol) { SD.setFlags(SD.getFlags() | SF_ThumbFunc); } -void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, +bool MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { // Indirect symbols are handled differently, to match how 'as' handles // them. This makes writing matching .o files easier. @@ -228,7 +227,7 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, ISD.Symbol = Symbol; ISD.SectionData = getCurrentSectionData(); getAssembler().getIndirectSymbols().push_back(ISD); - return; + return true; } // Adding a symbol attribute always introduces the symbol, note that an @@ -257,7 +256,7 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, case MCSA_Protected: case MCSA_Weak: case MCSA_Local: - llvm_unreachable("Invalid symbol attribute for Mach-O!"); + return false; case MCSA_Global: SD.setExternal(true); @@ -309,6 +308,8 @@ void MCMachOStreamer::EmitSymbolAttribute(MCSymbol *Symbol, SD.setFlags(SD.getFlags() | SF_WeakDefinition | SF_WeakReference); break; } + + return true; } void MCMachOStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { @@ -324,6 +325,8 @@ void MCMachOStreamer::EmitCommonSymbol(MCSymbol *Symbol, uint64_t Size, // FIXME: Darwin 'as' does appear to allow redef of a .comm by itself. assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); + AssignSection(Symbol, NULL); + MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); SD.setExternal(true); SD.setCommon(Size, ByteAlignment); @@ -346,7 +349,8 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, if (!Symbol) return; - // FIXME: Assert that this section has the zerofill type. + // On darwin all virtual sections have zerofill type. + assert(Section->isVirtualSection() && "Section does not have zerofill type!"); assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); @@ -359,7 +363,7 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, MCFragment *F = new MCFillFragment(0, 0, Size, &SectData); SD.setFragment(F); - Symbol->setSection(*Section); + AssignSection(Symbol, Section); // Update the maximum alignment on the zero fill section if necessary. if (ByteAlignment > SectData.getAlignment()) @@ -392,7 +396,7 @@ void MCMachOStreamer::EmitInstToData(const MCInst &Inst) { } void MCMachOStreamer::FinishImpl() { - EmitFrames(true); + EmitFrames(&getAssembler().getBackend(), true); // We have to set the fragment atom associations so we can relax properly for // Mach-O. diff --git a/contrib/llvm/lib/MC/MCModule.cpp b/contrib/llvm/lib/MC/MCModule.cpp index f563160..7e9e18a 100644 --- a/contrib/llvm/lib/MC/MCModule.cpp +++ b/contrib/llvm/lib/MC/MCModule.cpp @@ -7,39 +7,136 @@ // //===----------------------------------------------------------------------===// -#include "llvm/MC/MCAtom.h" #include "llvm/MC/MCModule.h" +#include "llvm/MC/MCAtom.h" +#include "llvm/MC/MCFunction.h" +#include using namespace llvm; -MCAtom *MCModule::createAtom(MCAtom::AtomType Type, - uint64_t Begin, uint64_t End) { - assert(Begin < End && "Creating MCAtom with endpoints reversed?"); +static bool AtomComp(const MCAtom *L, uint64_t Addr) { + return L->getEndAddr() < Addr; +} + +static bool AtomCompInv(uint64_t Addr, const MCAtom *R) { + return Addr < R->getEndAddr(); +} + +void MCModule::map(MCAtom *NewAtom) { + uint64_t Begin = NewAtom->Begin; + + assert(Begin <= NewAtom->End && "Creating MCAtom with endpoints reversed?"); // Check for atoms already covering this range. - IntervalMap::iterator I = OffsetMap.find(Begin); - assert((!I.valid() || I.start() < End) && "Offset range already occupied!"); + AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(), + Begin, AtomComp); + assert((I == atom_end() || (*I)->getBeginAddr() > NewAtom->End) + && "Offset range already occupied!"); - // Create the new atom and add it to our maps. - MCAtom *NewAtom = new MCAtom(Type, this, Begin, End); - AtomAllocationTracker.insert(NewAtom); - OffsetMap.insert(Begin, End, NewAtom); + // Insert the new atom to the list. + Atoms.insert(I, NewAtom); +} + +MCTextAtom *MCModule::createTextAtom(uint64_t Begin, uint64_t End) { + MCTextAtom *NewAtom = new MCTextAtom(this, Begin, End); + map(NewAtom); + return NewAtom; +} + +MCDataAtom *MCModule::createDataAtom(uint64_t Begin, uint64_t End) { + MCDataAtom *NewAtom = new MCDataAtom(this, Begin, End); + map(NewAtom); return NewAtom; } // remap - Update the interval mapping for an atom. void MCModule::remap(MCAtom *Atom, uint64_t NewBegin, uint64_t NewEnd) { // Find and erase the old mapping. - IntervalMap::iterator I = OffsetMap.find(Atom->Begin); - assert(I.valid() && "Atom offset not found in module!"); + AtomListTy::iterator I = std::lower_bound(atom_begin(), atom_end(), + Atom->Begin, AtomComp); + assert(I != atom_end() && "Atom offset not found in module!"); assert(*I == Atom && "Previous atom mapping was invalid!"); - I.erase(); + Atoms.erase(I); + + // FIXME: special case NewBegin == Atom->Begin // Insert the new mapping. - OffsetMap.insert(NewBegin, NewEnd, Atom); + AtomListTy::iterator NewI = std::lower_bound(atom_begin(), atom_end(), + NewBegin, AtomComp); + assert((NewI == atom_end() || (*NewI)->getBeginAddr() > Atom->End) + && "Offset range already occupied!"); + Atoms.insert(NewI, Atom); // Update the atom internal bounds. Atom->Begin = NewBegin; Atom->End = NewEnd; } +const MCAtom *MCModule::findAtomContaining(uint64_t Addr) const { + AtomListTy::const_iterator I = std::lower_bound(atom_begin(), atom_end(), + Addr, AtomComp); + if (I != atom_end() && (*I)->getBeginAddr() <= Addr) + return *I; + return 0; +} + +MCAtom *MCModule::findAtomContaining(uint64_t Addr) { + return const_cast( + const_cast(this)->findAtomContaining(Addr)); +} + +const MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) const { + AtomListTy::const_iterator I = std::upper_bound(atom_begin(), atom_end(), + Addr, AtomCompInv); + if (I != atom_end()) + return *I; + return 0; +} + +MCAtom *MCModule::findFirstAtomAfter(uint64_t Addr) { + return const_cast( + const_cast(this)->findFirstAtomAfter(Addr)); +} + +MCFunction *MCModule::createFunction(StringRef Name) { + Functions.push_back(new MCFunction(Name, this)); + return Functions.back(); +} + +static bool CompBBToAtom(MCBasicBlock *BB, const MCTextAtom *Atom) { + return BB->getInsts() < Atom; +} + +void MCModule::splitBasicBlocksForAtom(const MCTextAtom *TA, + const MCTextAtom *NewTA) { + BBsByAtomTy::iterator + I = std::lower_bound(BBsByAtom.begin(), BBsByAtom.end(), + TA, CompBBToAtom); + for (; I != BBsByAtom.end() && (*I)->getInsts() == TA; ++I) { + MCBasicBlock *BB = *I; + MCBasicBlock *NewBB = &BB->getParent()->createBlock(*NewTA); + BB->splitBasicBlock(NewBB); + } +} + +void MCModule::trackBBForAtom(const MCTextAtom *Atom, MCBasicBlock *BB) { + assert(Atom == BB->getInsts() && "Text atom doesn't back the basic block!"); + BBsByAtomTy::iterator I = std::lower_bound(BBsByAtom.begin(), + BBsByAtom.end(), + Atom, CompBBToAtom); + for (; I != BBsByAtom.end() && (*I)->getInsts() == Atom; ++I) + if (*I == BB) + return; + BBsByAtom.insert(I, BB); +} + +MCModule::~MCModule() { + for (AtomListTy::iterator AI = atom_begin(), + AE = atom_end(); + AI != AE; ++AI) + delete *AI; + for (FunctionListTy::iterator FI = func_begin(), + FE = func_end(); + FI != FE; ++FI) + delete *FI; +} diff --git a/contrib/llvm/lib/MC/MCModuleYAML.cpp b/contrib/llvm/lib/MC/MCModuleYAML.cpp new file mode 100644 index 0000000..e2de578 --- /dev/null +++ b/contrib/llvm/lib/MC/MCModuleYAML.cpp @@ -0,0 +1,461 @@ +//===- MCModuleYAML.cpp - MCModule YAMLIO implementation ------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of MCModule. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCModuleYAML.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/MC/MCAtom.h" +#include "llvm/MC/MCFunction.h" +#include "llvm/MC/MCInstrInfo.h" +#include "llvm/MC/MCRegisterInfo.h" +#include "llvm/Object/YAML.h" +#include "llvm/Support/Allocator.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/YAMLTraits.h" +#include + +namespace llvm { + +namespace { + +// This class is used to map opcode and register names to enum values. +// +// There are at least 3 obvious ways to do this: +// 1- Generate an MII/MRI method using a tablegen StringMatcher +// 2- Write an MII/MRI method using std::lower_bound and the assumption that +// the enums are sorted (starting at a fixed value). +// 3- Do the matching manually as is done here. +// +// Why 3? +// 1- A StringMatcher function for thousands of entries would incur +// a non-negligible binary size overhead. +// 2- The lower_bound comparators would be somewhat involved and aren't +// obviously reusable (see LessRecordRegister in llvm/TableGen/Record.h) +// 3- This isn't actually something useful outside tests (but the same argument +// can be made against having {MII,MRI}::getName). +// +// If this becomes useful outside this specific situation, feel free to do +// the Right Thing (tm) and move the functionality to MII/MRI. +// +class InstrRegInfoHolder { + typedef StringMap EnumValByNameTy; + EnumValByNameTy InstEnumValueByName; + EnumValByNameTy RegEnumValueByName; + +public: + const MCInstrInfo &MII; + const MCRegisterInfo &MRI; + InstrRegInfoHolder(const MCInstrInfo &MII, const MCRegisterInfo &MRI) + : InstEnumValueByName(NextPowerOf2(MII.getNumOpcodes())), + RegEnumValueByName(NextPowerOf2(MRI.getNumRegs())), MII(MII), MRI(MRI) { + for (int i = 0, e = MII.getNumOpcodes(); i != e; ++i) + InstEnumValueByName[MII.getName(i)] = i; + for (int i = 0, e = MRI.getNumRegs(); i != e; ++i) + RegEnumValueByName[MRI.getName(i)] = i; + } + + bool matchRegister(StringRef Name, unsigned &Reg) { + EnumValByNameTy::const_iterator It = RegEnumValueByName.find(Name); + if (It == RegEnumValueByName.end()) + return false; + Reg = It->getValue(); + return true; + } + bool matchOpcode(StringRef Name, unsigned &Opc) { + EnumValByNameTy::const_iterator It = InstEnumValueByName.find(Name); + if (It == InstEnumValueByName.end()) + return false; + Opc = It->getValue(); + return true; + } +}; + +} // end unnamed namespace + +namespace MCModuleYAML { + +LLVM_YAML_STRONG_TYPEDEF(unsigned, OpcodeEnum) + +struct Operand { + MCOperand MCOp; +}; + +struct Inst { + OpcodeEnum Opcode; + std::vector Operands; + uint64_t Size; +}; + +struct Atom { + MCAtom::AtomKind Type; + yaml::Hex64 StartAddress; + uint64_t Size; + + std::vector Insts; + object::yaml::BinaryRef Data; +}; + +struct BasicBlock { + yaml::Hex64 Address; + std::vector Preds; + std::vector Succs; +}; + +struct Function { + StringRef Name; + std::vector BasicBlocks; +}; + +struct Module { + std::vector Atoms; + std::vector Functions; +}; + +} // end namespace MCModuleYAML +} // end namespace llvm + +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::yaml::Hex64) +LLVM_YAML_IS_FLOW_SEQUENCE_VECTOR(llvm::MCModuleYAML::Operand) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Inst) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Atom) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::BasicBlock) +LLVM_YAML_IS_SEQUENCE_VECTOR(llvm::MCModuleYAML::Function) + +namespace llvm { + +namespace yaml { + +template <> struct ScalarEnumerationTraits { + static void enumeration(IO &IO, MCAtom::AtomKind &Kind); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Atom &A); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Inst &I); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::BasicBlock &BB); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Function &Fn); +}; + +template <> struct MappingTraits { + static void mapping(IO &IO, MCModuleYAML::Module &M); +}; + +template <> struct ScalarTraits { + static void output(const MCModuleYAML::Operand &, void *, + llvm::raw_ostream &); + static StringRef input(StringRef, void *, MCModuleYAML::Operand &); +}; + +template <> struct ScalarTraits { + static void output(const MCModuleYAML::OpcodeEnum &, void *, + llvm::raw_ostream &); + static StringRef input(StringRef, void *, MCModuleYAML::OpcodeEnum &); +}; + +void ScalarEnumerationTraits::enumeration( + IO &IO, MCAtom::AtomKind &Value) { + IO.enumCase(Value, "Text", MCAtom::TextAtom); + IO.enumCase(Value, "Data", MCAtom::DataAtom); +} + +void MappingTraits::mapping(IO &IO, MCModuleYAML::Atom &A) { + IO.mapRequired("StartAddress", A.StartAddress); + IO.mapRequired("Size", A.Size); + IO.mapRequired("Type", A.Type); + if (A.Type == MCAtom::TextAtom) + IO.mapRequired("Content", A.Insts); + else if (A.Type == MCAtom::DataAtom) + IO.mapRequired("Content", A.Data); +} + +void MappingTraits::mapping(IO &IO, MCModuleYAML::Inst &I) { + IO.mapRequired("Inst", I.Opcode); + IO.mapRequired("Size", I.Size); + IO.mapRequired("Ops", I.Operands); +} + +void +MappingTraits::mapping(IO &IO, + MCModuleYAML::BasicBlock &BB) { + IO.mapRequired("Address", BB.Address); + IO.mapRequired("Preds", BB.Preds); + IO.mapRequired("Succs", BB.Succs); +} + +void MappingTraits::mapping(IO &IO, + MCModuleYAML::Function &F) { + IO.mapRequired("Name", F.Name); + IO.mapRequired("BasicBlocks", F.BasicBlocks); +} + +void MappingTraits::mapping(IO &IO, + MCModuleYAML::Module &M) { + IO.mapRequired("Atoms", M.Atoms); + IO.mapOptional("Functions", M.Functions); +} + +void +ScalarTraits::output(const MCModuleYAML::Operand &Val, + void *Ctx, raw_ostream &Out) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + + // FIXME: Doesn't support FPImm and expr/inst, but do these make sense? + if (Val.MCOp.isImm()) + Out << "I" << Val.MCOp.getImm(); + else if (Val.MCOp.isReg()) + Out << "R" << IRI->MRI.getName(Val.MCOp.getReg()); + else + llvm_unreachable("Trying to output invalid MCOperand!"); +} + +StringRef +ScalarTraits::input(StringRef Scalar, void *Ctx, + MCModuleYAML::Operand &Val) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + char Type = 0; + if (Scalar.size() >= 1) + Type = Scalar.front(); + if (Type != 'R' && Type != 'I') + return "Operand must start with 'R' (register) or 'I' (immediate)."; + if (Type == 'R') { + unsigned Reg; + if (!IRI->matchRegister(Scalar.substr(1), Reg)) + return "Invalid register name."; + Val.MCOp = MCOperand::CreateReg(Reg); + } else if (Type == 'I') { + int64_t RIVal; + if (Scalar.substr(1).getAsInteger(10, RIVal)) + return "Invalid immediate value."; + Val.MCOp = MCOperand::CreateImm(RIVal); + } else { + Val.MCOp = MCOperand(); + } + return StringRef(); +} + +void ScalarTraits::output( + const MCModuleYAML::OpcodeEnum &Val, void *Ctx, raw_ostream &Out) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + Out << IRI->MII.getName(Val); +} + +StringRef +ScalarTraits::input(StringRef Scalar, void *Ctx, + MCModuleYAML::OpcodeEnum &Val) { + InstrRegInfoHolder *IRI = (InstrRegInfoHolder *)Ctx; + unsigned Opc; + if (!IRI->matchOpcode(Scalar, Opc)) + return "Invalid instruction opcode."; + Val = Opc; + return ""; +} + +} // end namespace yaml + +namespace { + +class MCModule2YAML { + const MCModule &MCM; + MCModuleYAML::Module YAMLModule; + void dumpAtom(const MCAtom *MCA); + void dumpFunction(const MCFunction *MCF); + void dumpBasicBlock(const MCBasicBlock *MCBB); + +public: + MCModule2YAML(const MCModule &MCM); + MCModuleYAML::Module &getYAMLModule(); +}; + +class YAML2MCModule { + MCModule &MCM; + +public: + YAML2MCModule(MCModule &MCM); + StringRef parse(const MCModuleYAML::Module &YAMLModule); +}; + +} // end unnamed namespace + +MCModule2YAML::MCModule2YAML(const MCModule &MCM) : MCM(MCM), YAMLModule() { + for (MCModule::const_atom_iterator AI = MCM.atom_begin(), AE = MCM.atom_end(); + AI != AE; ++AI) + dumpAtom(*AI); + for (MCModule::const_func_iterator FI = MCM.func_begin(), FE = MCM.func_end(); + FI != FE; ++FI) + dumpFunction(*FI); +} + +void MCModule2YAML::dumpAtom(const MCAtom *MCA) { + YAMLModule.Atoms.resize(YAMLModule.Atoms.size() + 1); + MCModuleYAML::Atom &A = YAMLModule.Atoms.back(); + A.Type = MCA->getKind(); + A.StartAddress = MCA->getBeginAddr(); + A.Size = MCA->getEndAddr() - MCA->getBeginAddr() + 1; + if (const MCTextAtom *TA = dyn_cast(MCA)) { + const size_t InstCount = TA->size(); + A.Insts.resize(InstCount); + for (size_t i = 0; i != InstCount; ++i) { + const MCDecodedInst &MCDI = TA->at(i); + A.Insts[i].Opcode = MCDI.Inst.getOpcode(); + A.Insts[i].Size = MCDI.Size; + const unsigned OpCount = MCDI.Inst.getNumOperands(); + A.Insts[i].Operands.resize(OpCount); + for (unsigned oi = 0; oi != OpCount; ++oi) + A.Insts[i].Operands[oi].MCOp = MCDI.Inst.getOperand(oi); + } + } else if (const MCDataAtom *DA = dyn_cast(MCA)) { + A.Data = DA->getData(); + } else { + llvm_unreachable("Unknown atom type."); + } +} + +void MCModule2YAML::dumpFunction(const MCFunction *MCF) { + YAMLModule.Functions.resize(YAMLModule.Functions.size() + 1); + MCModuleYAML::Function &F = YAMLModule.Functions.back(); + F.Name = MCF->getName(); + for (MCFunction::const_iterator BBI = MCF->begin(), BBE = MCF->end(); + BBI != BBE; ++BBI) { + const MCBasicBlock *MCBB = *BBI; + F.BasicBlocks.resize(F.BasicBlocks.size() + 1); + MCModuleYAML::BasicBlock &BB = F.BasicBlocks.back(); + BB.Address = MCBB->getInsts()->getBeginAddr(); + for (MCBasicBlock::pred_const_iterator PI = MCBB->pred_begin(), + PE = MCBB->pred_end(); + PI != PE; ++PI) + BB.Preds.push_back((*PI)->getInsts()->getBeginAddr()); + for (MCBasicBlock::succ_const_iterator SI = MCBB->succ_begin(), + SE = MCBB->succ_end(); + SI != SE; ++SI) + BB.Succs.push_back((*SI)->getInsts()->getBeginAddr()); + } +} + +MCModuleYAML::Module &MCModule2YAML::getYAMLModule() { return YAMLModule; } + +YAML2MCModule::YAML2MCModule(MCModule &MCM) : MCM(MCM) {} + +StringRef YAML2MCModule::parse(const MCModuleYAML::Module &YAMLModule) { + typedef std::vector::const_iterator AtomIt; + typedef std::vector::const_iterator InstIt; + typedef std::vector::const_iterator OpIt; + + typedef DenseMap AddrToTextAtomTy; + AddrToTextAtomTy TAByAddr; + + for (AtomIt AI = YAMLModule.Atoms.begin(), AE = YAMLModule.Atoms.end(); + AI != AE; ++AI) { + uint64_t StartAddress = AI->StartAddress; + if (AI->Size == 0) + return "Atoms can't be empty!"; + uint64_t EndAddress = StartAddress + AI->Size - 1; + switch (AI->Type) { + case MCAtom::TextAtom: { + MCTextAtom *TA = MCM.createTextAtom(StartAddress, EndAddress); + TAByAddr[StartAddress] = TA; + for (InstIt II = AI->Insts.begin(), IE = AI->Insts.end(); II != IE; + ++II) { + MCInst MI; + MI.setOpcode(II->Opcode); + for (OpIt OI = II->Operands.begin(), OE = II->Operands.end(); OI != OE; + ++OI) + MI.addOperand(OI->MCOp); + TA->addInst(MI, II->Size); + } + break; + } + case MCAtom::DataAtom: { + MCDataAtom *DA = MCM.createDataAtom(StartAddress, EndAddress); + SmallVector Data; + raw_svector_ostream OS(Data); + AI->Data.writeAsBinary(OS); + OS.flush(); + for (size_t i = 0, e = Data.size(); i != e; ++i) + DA->addData((uint8_t)Data[i]); + break; + } + } + } + + typedef std::vector::const_iterator FuncIt; + typedef std::vector::const_iterator BBIt; + typedef std::vector::const_iterator AddrIt; + for (FuncIt FI = YAMLModule.Functions.begin(), + FE = YAMLModule.Functions.end(); + FI != FE; ++FI) { + MCFunction *MCFN = MCM.createFunction(FI->Name); + for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end(); + BBI != BBE; ++BBI) { + AddrToTextAtomTy::const_iterator It = TAByAddr.find(BBI->Address); + if (It == TAByAddr.end()) + return "Basic block start address doesn't match any text atom!"; + MCFN->createBlock(*It->second); + } + for (BBIt BBI = FI->BasicBlocks.begin(), BBE = FI->BasicBlocks.end(); + BBI != BBE; ++BBI) { + MCBasicBlock *MCBB = MCFN->find(BBI->Address); + if (!MCBB) + return "Couldn't find matching basic block in function."; + for (AddrIt PI = BBI->Preds.begin(), PE = BBI->Preds.end(); PI != PE; + ++PI) { + MCBasicBlock *Pred = MCFN->find(*PI); + if (!Pred) + return "Couldn't find predecessor basic block."; + MCBB->addPredecessor(Pred); + } + for (AddrIt SI = BBI->Succs.begin(), SE = BBI->Succs.end(); SI != SE; + ++SI) { + MCBasicBlock *Succ = MCFN->find(*SI); + if (!Succ) + return "Couldn't find predecessor basic block."; + MCBB->addSuccessor(Succ); + } + } + } + return ""; +} + +StringRef mcmodule2yaml(raw_ostream &OS, const MCModule &MCM, + const MCInstrInfo &MII, const MCRegisterInfo &MRI) { + MCModule2YAML Dumper(MCM); + InstrRegInfoHolder IRI(MII, MRI); + yaml::Output YOut(OS, (void *)&IRI); + YOut << Dumper.getYAMLModule(); + return ""; +} + +StringRef yaml2mcmodule(OwningPtr &MCM, StringRef YamlContent, + const MCInstrInfo &MII, const MCRegisterInfo &MRI) { + MCM.reset(new MCModule); + YAML2MCModule Parser(*MCM); + MCModuleYAML::Module YAMLModule; + InstrRegInfoHolder IRI(MII, MRI); + yaml::Input YIn(YamlContent, (void *)&IRI); + YIn >> YAMLModule; + if (error_code ec = YIn.error()) + return ec.message(); + StringRef err = Parser.parse(YAMLModule); + if (!err.empty()) + return err; + return ""; +} + +} // end namespace llvm diff --git a/contrib/llvm/lib/MC/MCNullStreamer.cpp b/contrib/llvm/lib/MC/MCNullStreamer.cpp index 659706a..9b9c4aa 100644 --- a/contrib/llvm/lib/MC/MCNullStreamer.cpp +++ b/contrib/llvm/lib/MC/MCNullStreamer.cpp @@ -19,7 +19,7 @@ namespace { class MCNullStreamer : public MCStreamer { public: - MCNullStreamer(MCContext &Context) : MCStreamer(SK_NullStreamer, Context) {} + MCNullStreamer(MCContext &Context) : MCStreamer(Context, 0) {} /// @name MCStreamer Interface /// @{ @@ -37,7 +37,7 @@ namespace { virtual void EmitLabel(MCSymbol *Symbol) { assert(Symbol->isUndefined() && "Cannot define a symbol twice!"); assert(getCurrentSection().first &&"Cannot emit before setting section!"); - Symbol->setSection(*getCurrentSection().first); + AssignSection(Symbol, getCurrentSection().first); } virtual void EmitDebugLabel(MCSymbol *Symbol) { EmitLabel(Symbol); @@ -52,7 +52,9 @@ namespace { const MCSymbol *Label, unsigned PointerSize) {} - virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){} + virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute){ + return true; + } virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) {} @@ -71,10 +73,9 @@ namespace { uint64_t Size = 0, unsigned ByteAlignment = 0) {} virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment) {} - virtual void EmitBytes(StringRef Data, unsigned AddrSpace) {} + virtual void EmitBytes(StringRef Data) {} - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) {} + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) {} virtual void EmitULEB128Value(const MCExpr *Value) {} virtual void EmitSLEB128Value(const MCExpr *Value) {} virtual void EmitGPRel32Value(const MCExpr *Value) {} @@ -108,13 +109,6 @@ namespace { virtual void EmitCFIEndProcImpl(MCDwarfFrameInfo &Frame) { RecordProcEnd(Frame); } - - /// @} - - static bool classof(const MCStreamer *S) { - return S->getKind() == SK_NullStreamer; - } - }; } diff --git a/contrib/llvm/lib/MC/MCObjectDisassembler.cpp b/contrib/llvm/lib/MC/MCObjectDisassembler.cpp new file mode 100644 index 0000000..16a110f0 --- /dev/null +++ b/contrib/llvm/lib/MC/MCObjectDisassembler.cpp @@ -0,0 +1,584 @@ +//===- lib/MC/MCObjectDisassembler.cpp ------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectDisassembler.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAtom.h" +#include "llvm/MC/MCDisassembler.h" +#include "llvm/MC/MCFunction.h" +#include "llvm/MC/MCInstrAnalysis.h" +#include "llvm/MC/MCModule.h" +#include "llvm/MC/MCObjectSymbolizer.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/MachO.h" +#include "llvm/Support/MemoryObject.h" +#include "llvm/Support/StringRefMemoryObject.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace object; + +MCObjectDisassembler::MCObjectDisassembler(const ObjectFile &Obj, + const MCDisassembler &Dis, + const MCInstrAnalysis &MIA) + : Obj(Obj), Dis(Dis), MIA(MIA), MOS(0) {} + +uint64_t MCObjectDisassembler::getEntrypoint() { + error_code ec; + for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols(); + SI != SE; SI.increment(ec)) { + if (ec) + break; + StringRef Name; + SI->getName(Name); + if (Name == "main" || Name == "_main") { + uint64_t Entrypoint; + SI->getAddress(Entrypoint); + return getEffectiveLoadAddr(Entrypoint); + } + } + return 0; +} + +ArrayRef MCObjectDisassembler::getStaticInitFunctions() { + return ArrayRef(); +} + +ArrayRef MCObjectDisassembler::getStaticExitFunctions() { + return ArrayRef(); +} + +MemoryObject *MCObjectDisassembler::getRegionFor(uint64_t Addr) { + // FIXME: Keep track of object sections. + return FallbackRegion.get(); +} + +uint64_t MCObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) { + return Addr; +} + +uint64_t MCObjectDisassembler::getOriginalLoadAddr(uint64_t Addr) { + return Addr; +} + +MCModule *MCObjectDisassembler::buildEmptyModule() { + MCModule *Module = new MCModule; + Module->Entrypoint = getEntrypoint(); + return Module; +} + +MCModule *MCObjectDisassembler::buildModule(bool withCFG) { + MCModule *Module = buildEmptyModule(); + + buildSectionAtoms(Module); + if (withCFG) + buildCFG(Module); + return Module; +} + +void MCObjectDisassembler::buildSectionAtoms(MCModule *Module) { + error_code ec; + for (section_iterator SI = Obj.begin_sections(), + SE = Obj.end_sections(); + SI != SE; + SI.increment(ec)) { + if (ec) break; + + bool isText; SI->isText(isText); + bool isData; SI->isData(isData); + if (!isData && !isText) + continue; + + uint64_t StartAddr; SI->getAddress(StartAddr); + uint64_t SecSize; SI->getSize(SecSize); + if (StartAddr == UnknownAddressOrSize || SecSize == UnknownAddressOrSize) + continue; + StartAddr = getEffectiveLoadAddr(StartAddr); + + StringRef Contents; SI->getContents(Contents); + StringRefMemoryObject memoryObject(Contents, StartAddr); + + // We don't care about things like non-file-backed sections yet. + if (Contents.size() != SecSize || !SecSize) + continue; + uint64_t EndAddr = StartAddr + SecSize - 1; + + StringRef SecName; SI->getName(SecName); + + if (isText) { + MCTextAtom *Text = 0; + MCDataAtom *InvalidData = 0; + + uint64_t InstSize; + for (uint64_t Index = 0; Index < SecSize; Index += InstSize) { + const uint64_t CurAddr = StartAddr + Index; + MCInst Inst; + if (Dis.getInstruction(Inst, InstSize, memoryObject, CurAddr, nulls(), + nulls())) { + if (!Text) { + Text = Module->createTextAtom(CurAddr, CurAddr); + Text->setName(SecName); + } + Text->addInst(Inst, InstSize); + InvalidData = 0; + } else { + assert(InstSize && "getInstruction() consumed no bytes"); + if (!InvalidData) { + Text = 0; + InvalidData = Module->createDataAtom(CurAddr, CurAddr+InstSize - 1); + } + for (uint64_t I = 0; I < InstSize; ++I) + InvalidData->addData(Contents[Index+I]); + } + } + } else { + MCDataAtom *Data = Module->createDataAtom(StartAddr, EndAddr); + Data->setName(SecName); + for (uint64_t Index = 0; Index < SecSize; ++Index) + Data->addData(Contents[Index]); + } + } +} + +namespace { + struct BBInfo; + typedef SmallPtrSet BBInfoSetTy; + + struct BBInfo { + MCTextAtom *Atom; + MCBasicBlock *BB; + BBInfoSetTy Succs; + BBInfoSetTy Preds; + MCObjectDisassembler::AddressSetTy SuccAddrs; + + BBInfo() : Atom(0), BB(0) {} + + void addSucc(BBInfo &Succ) { + Succs.insert(&Succ); + Succ.Preds.insert(this); + } + }; +} + +static void RemoveDupsFromAddressVector(MCObjectDisassembler::AddressSetTy &V) { + std::sort(V.begin(), V.end()); + V.erase(std::unique(V.begin(), V.end()), V.end()); +} + +void MCObjectDisassembler::buildCFG(MCModule *Module) { + typedef std::map BBInfoByAddrTy; + BBInfoByAddrTy BBInfos; + AddressSetTy Splits; + AddressSetTy Calls; + + error_code ec; + for (symbol_iterator SI = Obj.begin_symbols(), SE = Obj.end_symbols(); + SI != SE; SI.increment(ec)) { + if (ec) + break; + SymbolRef::Type SymType; + SI->getType(SymType); + if (SymType == SymbolRef::ST_Function) { + uint64_t SymAddr; + SI->getAddress(SymAddr); + SymAddr = getEffectiveLoadAddr(SymAddr); + Calls.push_back(SymAddr); + Splits.push_back(SymAddr); + } + } + + assert(Module->func_begin() == Module->func_end() + && "Module already has a CFG!"); + + // First, determine the basic block boundaries and call targets. + for (MCModule::atom_iterator AI = Module->atom_begin(), + AE = Module->atom_end(); + AI != AE; ++AI) { + MCTextAtom *TA = dyn_cast(*AI); + if (!TA) continue; + Calls.push_back(TA->getBeginAddr()); + BBInfos[TA->getBeginAddr()].Atom = TA; + for (MCTextAtom::const_iterator II = TA->begin(), IE = TA->end(); + II != IE; ++II) { + if (MIA.isTerminator(II->Inst)) + Splits.push_back(II->Address + II->Size); + uint64_t Target; + if (MIA.evaluateBranch(II->Inst, II->Address, II->Size, Target)) { + if (MIA.isCall(II->Inst)) + Calls.push_back(Target); + Splits.push_back(Target); + } + } + } + + RemoveDupsFromAddressVector(Splits); + RemoveDupsFromAddressVector(Calls); + + // Split text atoms into basic block atoms. + for (AddressSetTy::const_iterator SI = Splits.begin(), SE = Splits.end(); + SI != SE; ++SI) { + MCAtom *A = Module->findAtomContaining(*SI); + if (!A) continue; + MCTextAtom *TA = cast(A); + if (TA->getBeginAddr() == *SI) + continue; + MCTextAtom *NewAtom = TA->split(*SI); + BBInfos[NewAtom->getBeginAddr()].Atom = NewAtom; + StringRef BBName = TA->getName(); + BBName = BBName.substr(0, BBName.find_last_of(':')); + NewAtom->setName((BBName + ":" + utohexstr(*SI)).str()); + } + + // Compute succs/preds. + for (MCModule::atom_iterator AI = Module->atom_begin(), + AE = Module->atom_end(); + AI != AE; ++AI) { + MCTextAtom *TA = dyn_cast(*AI); + if (!TA) continue; + BBInfo &CurBB = BBInfos[TA->getBeginAddr()]; + const MCDecodedInst &LI = TA->back(); + if (MIA.isBranch(LI.Inst)) { + uint64_t Target; + if (MIA.evaluateBranch(LI.Inst, LI.Address, LI.Size, Target)) + CurBB.addSucc(BBInfos[Target]); + if (MIA.isConditionalBranch(LI.Inst)) + CurBB.addSucc(BBInfos[LI.Address + LI.Size]); + } else if (!MIA.isTerminator(LI.Inst)) + CurBB.addSucc(BBInfos[LI.Address + LI.Size]); + } + + + // Create functions and basic blocks. + for (AddressSetTy::const_iterator CI = Calls.begin(), CE = Calls.end(); + CI != CE; ++CI) { + BBInfo &BBI = BBInfos[*CI]; + if (!BBI.Atom) continue; + + MCFunction &MCFN = *Module->createFunction(BBI.Atom->getName()); + + // Create MCBBs. + SmallSetVector Worklist; + Worklist.insert(&BBI); + for (size_t wi = 0; wi < Worklist.size(); ++wi) { + BBInfo *BBI = Worklist[wi]; + if (!BBI->Atom) + continue; + BBI->BB = &MCFN.createBlock(*BBI->Atom); + // Add all predecessors and successors to the worklist. + for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); + SI != SE; ++SI) + Worklist.insert(*SI); + for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); + PI != PE; ++PI) + Worklist.insert(*PI); + } + + // Set preds/succs. + for (size_t wi = 0; wi < Worklist.size(); ++wi) { + BBInfo *BBI = Worklist[wi]; + MCBasicBlock *MCBB = BBI->BB; + if (!MCBB) + continue; + for (BBInfoSetTy::iterator SI = BBI->Succs.begin(), SE = BBI->Succs.end(); + SI != SE; ++SI) + if ((*SI)->BB) + MCBB->addSuccessor((*SI)->BB); + for (BBInfoSetTy::iterator PI = BBI->Preds.begin(), PE = BBI->Preds.end(); + PI != PE; ++PI) + if ((*PI)->BB) + MCBB->addPredecessor((*PI)->BB); + } + } +} + +// Basic idea of the disassembly + discovery: +// +// start with the wanted address, insert it in the worklist +// while worklist not empty, take next address in the worklist: +// - check if atom exists there +// - if middle of atom: +// - split basic blocks referencing the atom +// - look for an already encountered BBInfo (using a map) +// - if there is, split it (new one, fallthrough, move succs, etc..) +// - if start of atom: nothing else to do +// - if no atom: create new atom and new bbinfo +// - look at the last instruction in the atom, add succs to worklist +// for all elements in the worklist: +// - create basic block, update preds/succs, etc.. +// +MCBasicBlock *MCObjectDisassembler::getBBAt(MCModule *Module, MCFunction *MCFN, + uint64_t BBBeginAddr, + AddressSetTy &CallTargets, + AddressSetTy &TailCallTargets) { + typedef std::map BBInfoByAddrTy; + typedef SmallSetVector AddrWorklistTy; + BBInfoByAddrTy BBInfos; + AddrWorklistTy Worklist; + + Worklist.insert(BBBeginAddr); + for (size_t wi = 0; wi < Worklist.size(); ++wi) { + const uint64_t BeginAddr = Worklist[wi]; + BBInfo *BBI = &BBInfos[BeginAddr]; + + MCTextAtom *&TA = BBI->Atom; + assert(!TA && "Discovered basic block already has an associated atom!"); + + // Look for an atom at BeginAddr. + if (MCAtom *A = Module->findAtomContaining(BeginAddr)) { + // FIXME: We don't care about mixed atoms, see above. + TA = cast(A); + + // The found atom doesn't begin at BeginAddr, we have to split it. + if (TA->getBeginAddr() != BeginAddr) { + // FIXME: Handle overlapping atoms: middle-starting instructions, etc.. + MCTextAtom *NewTA = TA->split(BeginAddr); + + // Look for an already encountered basic block that needs splitting + BBInfoByAddrTy::iterator It = BBInfos.find(TA->getBeginAddr()); + if (It != BBInfos.end() && It->second.Atom) { + BBI->SuccAddrs = It->second.SuccAddrs; + It->second.SuccAddrs.clear(); + It->second.SuccAddrs.push_back(BeginAddr); + } + TA = NewTA; + } + BBI->Atom = TA; + } else { + // If we didn't find an atom, then we have to disassemble to create one! + + MemoryObject *Region = getRegionFor(BeginAddr); + if (!Region) + llvm_unreachable(("Couldn't find suitable region for disassembly at " + + utostr(BeginAddr)).c_str()); + + uint64_t InstSize; + uint64_t EndAddr = Region->getBase() + Region->getExtent(); + + // We want to stop before the next atom and have a fallthrough to it. + if (MCTextAtom *NextAtom = + cast_or_null(Module->findFirstAtomAfter(BeginAddr))) + EndAddr = std::min(EndAddr, NextAtom->getBeginAddr()); + + for (uint64_t Addr = BeginAddr; Addr < EndAddr; Addr += InstSize) { + MCInst Inst; + if (Dis.getInstruction(Inst, InstSize, *Region, Addr, nulls(), + nulls())) { + if (!TA) + TA = Module->createTextAtom(Addr, Addr); + TA->addInst(Inst, InstSize); + } else { + // We don't care about splitting mixed atoms either. + llvm_unreachable("Couldn't disassemble instruction in atom."); + } + + uint64_t BranchTarget; + if (MIA.evaluateBranch(Inst, Addr, InstSize, BranchTarget)) { + if (MIA.isCall(Inst)) + CallTargets.push_back(BranchTarget); + } + + if (MIA.isTerminator(Inst)) + break; + } + BBI->Atom = TA; + } + + assert(TA && "Couldn't disassemble atom, none was created!"); + assert(TA->begin() != TA->end() && "Empty atom!"); + + MemoryObject *Region = getRegionFor(TA->getBeginAddr()); + assert(Region && "Couldn't find region for already disassembled code!"); + uint64_t EndRegion = Region->getBase() + Region->getExtent(); + + // Now we have a basic block atom, add successors. + // Add the fallthrough block. + if ((MIA.isConditionalBranch(TA->back().Inst) || + !MIA.isTerminator(TA->back().Inst)) && + (TA->getEndAddr() + 1 < EndRegion)) { + BBI->SuccAddrs.push_back(TA->getEndAddr() + 1); + Worklist.insert(TA->getEndAddr() + 1); + } + + // If the terminator is a branch, add the target block. + if (MIA.isBranch(TA->back().Inst)) { + uint64_t BranchTarget; + if (MIA.evaluateBranch(TA->back().Inst, TA->back().Address, + TA->back().Size, BranchTarget)) { + StringRef ExtFnName; + if (MOS) + ExtFnName = + MOS->findExternalFunctionAt(getOriginalLoadAddr(BranchTarget)); + if (!ExtFnName.empty()) { + TailCallTargets.push_back(BranchTarget); + CallTargets.push_back(BranchTarget); + } else { + BBI->SuccAddrs.push_back(BranchTarget); + Worklist.insert(BranchTarget); + } + } + } + } + + for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { + const uint64_t BeginAddr = Worklist[wi]; + BBInfo *BBI = &BBInfos[BeginAddr]; + + assert(BBI->Atom && "Found a basic block without an associated atom!"); + + // Look for a basic block at BeginAddr. + BBI->BB = MCFN->find(BeginAddr); + if (BBI->BB) { + // FIXME: check that the succs/preds are the same + continue; + } + // If there was none, we have to create one from the atom. + BBI->BB = &MCFN->createBlock(*BBI->Atom); + } + + for (size_t wi = 0, we = Worklist.size(); wi != we; ++wi) { + const uint64_t BeginAddr = Worklist[wi]; + BBInfo *BBI = &BBInfos[BeginAddr]; + MCBasicBlock *BB = BBI->BB; + + RemoveDupsFromAddressVector(BBI->SuccAddrs); + for (AddressSetTy::const_iterator SI = BBI->SuccAddrs.begin(), + SE = BBI->SuccAddrs.end(); + SE != SE; ++SI) { + MCBasicBlock *Succ = BBInfos[*SI].BB; + BB->addSuccessor(Succ); + Succ->addPredecessor(BB); + } + } + + assert(BBInfos[Worklist[0]].BB && + "No basic block created at requested address?"); + + return BBInfos[Worklist[0]].BB; +} + +MCFunction * +MCObjectDisassembler::createFunction(MCModule *Module, uint64_t BeginAddr, + AddressSetTy &CallTargets, + AddressSetTy &TailCallTargets) { + // First, check if this is an external function. + StringRef ExtFnName; + if (MOS) + ExtFnName = MOS->findExternalFunctionAt(getOriginalLoadAddr(BeginAddr)); + if (!ExtFnName.empty()) + return Module->createFunction(ExtFnName); + + // If it's not, look for an existing function. + for (MCModule::func_iterator FI = Module->func_begin(), + FE = Module->func_end(); + FI != FE; ++FI) { + if ((*FI)->empty()) + continue; + // FIXME: MCModule should provide a findFunctionByAddr() + if ((*FI)->getEntryBlock()->getInsts()->getBeginAddr() == BeginAddr) + return *FI; + } + + // Finally, just create a new one. + MCFunction *MCFN = Module->createFunction(""); + getBBAt(Module, MCFN, BeginAddr, CallTargets, TailCallTargets); + return MCFN; +} + +// MachO MCObjectDisassembler implementation. + +MCMachOObjectDisassembler::MCMachOObjectDisassembler( + const MachOObjectFile &MOOF, const MCDisassembler &Dis, + const MCInstrAnalysis &MIA, uint64_t VMAddrSlide, + uint64_t HeaderLoadAddress) + : MCObjectDisassembler(MOOF, Dis, MIA), MOOF(MOOF), + VMAddrSlide(VMAddrSlide), HeaderLoadAddress(HeaderLoadAddress) { + + error_code ec; + for (section_iterator SI = MOOF.begin_sections(), SE = MOOF.end_sections(); + SI != SE; SI.increment(ec)) { + if (ec) + break; + StringRef Name; + SI->getName(Name); + // FIXME: We should use the S_ section type instead of the name. + if (Name == "__mod_init_func") { + DEBUG(dbgs() << "Found __mod_init_func section!\n"); + SI->getContents(ModInitContents); + } else if (Name == "__mod_exit_func") { + DEBUG(dbgs() << "Found __mod_exit_func section!\n"); + SI->getContents(ModExitContents); + } + } +} + +// FIXME: Only do the translations for addresses actually inside the object. +uint64_t MCMachOObjectDisassembler::getEffectiveLoadAddr(uint64_t Addr) { + return Addr + VMAddrSlide; +} + +uint64_t +MCMachOObjectDisassembler::getOriginalLoadAddr(uint64_t EffectiveAddr) { + return EffectiveAddr - VMAddrSlide; +} + +uint64_t MCMachOObjectDisassembler::getEntrypoint() { + uint64_t EntryFileOffset = 0; + + // Look for LC_MAIN. + { + uint32_t LoadCommandCount = MOOF.getHeader().ncmds; + MachOObjectFile::LoadCommandInfo Load = MOOF.getFirstLoadCommandInfo(); + for (unsigned I = 0;; ++I) { + if (Load.C.cmd == MachO::LC_MAIN) { + EntryFileOffset = + ((const MachO::entry_point_command *)Load.Ptr)->entryoff; + break; + } + + if (I == LoadCommandCount - 1) + break; + else + Load = MOOF.getNextLoadCommandInfo(Load); + } + } + + // If we didn't find anything, default to the common implementation. + // FIXME: Maybe we could also look at LC_UNIXTHREAD and friends? + if (EntryFileOffset) + return MCObjectDisassembler::getEntrypoint(); + + return EntryFileOffset + HeaderLoadAddress; +} + +ArrayRef MCMachOObjectDisassembler::getStaticInitFunctions() { + // FIXME: We only handle 64bit mach-o + assert(MOOF.is64Bit()); + + size_t EntrySize = 8; + size_t EntryCount = ModInitContents.size() / EntrySize; + return ArrayRef( + reinterpret_cast(ModInitContents.data()), EntryCount); +} + +ArrayRef MCMachOObjectDisassembler::getStaticExitFunctions() { + // FIXME: We only handle 64bit mach-o + assert(MOOF.is64Bit()); + + size_t EntrySize = 8; + size_t EntryCount = ModExitContents.size() / EntrySize; + return ArrayRef( + reinterpret_cast(ModExitContents.data()), EntryCount); +} diff --git a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp index 96b62f1..8ef4a0a 100644 --- a/contrib/llvm/lib/MC/MCObjectFileInfo.cpp +++ b/contrib/llvm/lib/MC/MCObjectFileInfo.cpp @@ -39,6 +39,9 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { = Ctx->getMachOSection("__DATA", "__data", 0, SectionKind::getDataRel()); + // BSSSection might not be expected initialized on msvc. + BSSSection = 0; + TLSDataSection // .tdata = Ctx->getMachOSection("__DATA", "__thread_data", MCSectionMachO::S_THREAD_LOCAL_REGULAR, @@ -79,7 +82,8 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { // to using it in -static mode. SixteenByteConstantSection = 0; if (RelocM != Reloc::Static && - T.getArch() != Triple::x86_64 && T.getArch() != Triple::ppc64) + T.getArch() != Triple::x86_64 && T.getArch() != Triple::ppc64 && + T.getArch() != Triple::ppc64le) SixteenByteConstantSection = // .literal16 Ctx->getMachOSection("__TEXT", "__literal16", MCSectionMachO::S_16BYTE_LITERALS, @@ -198,6 +202,14 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { Ctx->getMachOSection("__DWARF", "__debug_pubtypes", MCSectionMachO::S_ATTR_DEBUG, SectionKind::getMetadata()); + DwarfGnuPubNamesSection = + Ctx->getMachOSection("__DWARF", "__debug_gnu_pubn", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); + DwarfGnuPubTypesSection = + Ctx->getMachOSection("__DWARF", "__debug_gnu_pubt", + MCSectionMachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); DwarfStrSection = Ctx->getMachOSection("__DWARF", "__debug_str", MCSectionMachO::S_ATTR_DEBUG, @@ -222,6 +234,9 @@ void MCObjectFileInfo::InitMachOMCObjectFileInfo(Triple T) { Ctx->getMachOSection("__DWARF", "__debug_inlined", MCSectionMachO::S_ATTR_DEBUG, SectionKind::getMetadata()); + StackMapSection = + Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps", 0, + SectionKind::getMetadata()); TLSExtraDataSection = TLSTLVSection; } @@ -288,7 +303,7 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { FDEEncoding = dwarf::DW_EH_PE_udata4; TTypeEncoding = dwarf::DW_EH_PE_absptr; } - } else if (T.getArch() == Triple::ppc64) { + } else if (T.getArch() == Triple::ppc64 || T.getArch() == Triple::ppc64le) { PersonalityEncoding = dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; LSDAEncoding = dwarf::DW_EH_PE_pcrel | dwarf::DW_EH_PE_udata8; @@ -434,6 +449,12 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { DwarfPubTypesSection = Ctx->getELFSection(".debug_pubtypes", ELF::SHT_PROGBITS, 0, SectionKind::getMetadata()); + DwarfGnuPubNamesSection = + Ctx->getELFSection(".debug_gnu_pubnames", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); + DwarfGnuPubTypesSection = + Ctx->getELFSection(".debug_gnu_pubtypes", ELF::SHT_PROGBITS, 0, + SectionKind::getMetadata()); DwarfStrSection = Ctx->getELFSection(".debug_str", ELF::SHT_PROGBITS, ELF::SHF_MERGE | ELF::SHF_STRINGS, @@ -495,6 +516,12 @@ void MCObjectFileInfo::InitELFMCObjectFileInfo(Triple T) { void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { // COFF + BSSSection = + Ctx->getCOFFSection(".bss", + COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE, + SectionKind::getBSS()); TextSection = Ctx->getCOFFSection(".text", COFF::IMAGE_SCN_CNT_CODE | @@ -584,6 +611,16 @@ void MCObjectFileInfo::InitCOFFMCObjectFileInfo(Triple T) { COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); + DwarfGnuPubNamesSection = + Ctx->getCOFFSection(".debug_gnu_pubnames", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); + DwarfGnuPubTypesSection = + Ctx->getCOFFSection(".debug_gnu_pubtypes", + COFF::IMAGE_SCN_MEM_DISCARDABLE | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfStrSection = Ctx->getCOFFSection(".debug_str", COFF::IMAGE_SCN_MEM_DISCARDABLE | diff --git a/contrib/llvm/lib/MC/MCObjectStreamer.cpp b/contrib/llvm/lib/MC/MCObjectStreamer.cpp index d21ce8d..bc14c2a 100644 --- a/contrib/llvm/lib/MC/MCObjectStreamer.cpp +++ b/contrib/llvm/lib/MC/MCObjectStreamer.cpp @@ -18,22 +18,26 @@ #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCObjectWriter.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MCSection.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; -MCObjectStreamer::MCObjectStreamer(StreamerKind Kind, MCContext &Context, +MCObjectStreamer::MCObjectStreamer(MCContext &Context, + MCTargetStreamer *TargetStreamer, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter_) - : MCStreamer(Kind, Context), + : MCStreamer(Context, TargetStreamer), Assembler(new MCAssembler(Context, TAB, *Emitter_, *TAB.createObjectWriter(OS), OS)), CurSectionData(0) {} -MCObjectStreamer::MCObjectStreamer(StreamerKind Kind, MCContext &Context, +MCObjectStreamer::MCObjectStreamer(MCContext &Context, + MCTargetStreamer *TargetStreamer, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter_, MCAssembler *_Assembler) - : MCStreamer(Kind, Context), Assembler(_Assembler), CurSectionData(0) {} + : MCStreamer(Context, TargetStreamer), Assembler(_Assembler), + CurSectionData(0) {} MCObjectStreamer::~MCObjectStreamer() { delete &Assembler->getBackend(); @@ -98,15 +102,15 @@ const MCExpr *MCObjectStreamer::AddValueSymbols(const MCExpr *Value) { return Value; } -void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { - assert(AddrSpace == 0 && "Address space must be 0!"); +void MCObjectStreamer::EmitValueImpl(const MCExpr *Value, unsigned Size) { MCDataFragment *DF = getOrCreateDataFragment(); + MCLineEntry::Make(this, getCurrentSection().first); + // Avoid fixups when possible. int64_t AbsValue; if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue, getAssembler())) { - EmitIntValue(AbsValue, Size, AddrSpace); + EmitIntValue(AbsValue, Size); return; } DF->getFixups().push_back( @@ -241,7 +245,7 @@ void MCObjectStreamer::EmitInstToFragment(const MCInst &Inst) { } #ifndef NDEBUG -static const char *BundlingNotImplementedMsg = +static const char *const BundlingNotImplementedMsg = "Aligned bundling is not implemented for this object format"; #endif @@ -257,6 +261,19 @@ void MCObjectStreamer::EmitBundleUnlock() { llvm_unreachable(BundlingNotImplementedMsg); } +void MCObjectStreamer::EmitDwarfLocDirective(unsigned FileNo, unsigned Line, + unsigned Column, unsigned Flags, + unsigned Isa, + unsigned Discriminator, + StringRef FileName) { + // In case we see two .loc directives in a row, make sure the + // first one gets a line entry. + MCLineEntry::Make(this, getCurrentSection().first); + + this->MCStreamer::EmitDwarfLocDirective(FileNo, Line, Column, Flags, + Isa, Discriminator, FileName); +} + void MCObjectStreamer::EmitDwarfAdvanceLineAddr(int64_t LineDelta, const MCSymbol *LastLabel, const MCSymbol *Label, @@ -287,8 +304,8 @@ void MCObjectStreamer::EmitDwarfAdvanceFrameAddr(const MCSymbol *LastLabel, insert(new MCDwarfCallFrameFragment(*AddrDelta)); } -void MCObjectStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { - assert(AddrSpace == 0 && "Address space must be 0!"); +void MCObjectStreamer::EmitBytes(StringRef Data) { + MCLineEntry::Make(this, getCurrentSection().first); getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); } @@ -351,14 +368,17 @@ void MCObjectStreamer::EmitGPRel64Value(const MCExpr *Value) { DF->getContents().resize(DF->getContents().size() + 8, 0); } -void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, - unsigned AddrSpace) { - assert(AddrSpace == 0 && "Address space must be 0!"); +void MCObjectStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) { // FIXME: A MCFillFragment would be more memory efficient but MCExpr has // problems evaluating expressions across multiple fragments. getOrCreateDataFragment()->getContents().append(NumBytes, FillValue); } +void MCObjectStreamer::EmitZeros(uint64_t NumBytes) { + unsigned ItemSize = getCurrentSection().first->isVirtualSection() ? 0 : 1; + insert(new MCFillFragment(0, ItemSize, NumBytes)); +} + void MCObjectStreamer::FinishImpl() { // Dump out the dwarf file & directory tables and line tables. const MCSymbol *LineSectionSymbol = NULL; diff --git a/contrib/llvm/lib/MC/MCObjectSymbolizer.cpp b/contrib/llvm/lib/MC/MCObjectSymbolizer.cpp new file mode 100644 index 0000000..b9131d1 --- /dev/null +++ b/contrib/llvm/lib/MC/MCObjectSymbolizer.cpp @@ -0,0 +1,310 @@ +//===-- lib/MC/MCObjectSymbolizer.cpp -------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCObjectSymbolizer.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/MC/MCInst.h" +#include "llvm/MC/MCRelocationInfo.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Object/MachO.h" +#include "llvm/Object/ELFObjectFile.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace object; + +//===- MCMachObjectSymbolizer ---------------------------------------------===// + +namespace { +class MCMachObjectSymbolizer : public MCObjectSymbolizer { + const MachOObjectFile *MOOF; + // __TEXT;__stubs support. + uint64_t StubsStart; + uint64_t StubsCount; + uint64_t StubSize; + uint64_t StubsIndSymIndex; + +public: + MCMachObjectSymbolizer(MCContext &Ctx, OwningPtr &RelInfo, + const MachOObjectFile *MOOF); + + StringRef findExternalFunctionAt(uint64_t Addr) LLVM_OVERRIDE; + + void tryAddingPcLoadReferenceComment(raw_ostream &cStream, + int64_t Value, + uint64_t Address) LLVM_OVERRIDE; +}; +} // End unnamed namespace + + +MCMachObjectSymbolizer:: +MCMachObjectSymbolizer(MCContext &Ctx, OwningPtr &RelInfo, + const MachOObjectFile *MOOF) + : MCObjectSymbolizer(Ctx, RelInfo, MOOF), MOOF(MOOF), + StubsStart(0), StubsCount(0), StubSize(0), StubsIndSymIndex(0) { + + error_code ec; + for (section_iterator SI = MOOF->begin_sections(), SE = MOOF->end_sections(); + SI != SE; SI.increment(ec)) { + if (ec) break; + StringRef Name; SI->getName(Name); + if (Name == "__stubs") { + SectionRef StubsSec = *SI; + if (MOOF->is64Bit()) { + MachO::section_64 S = MOOF->getSection64(StubsSec.getRawDataRefImpl()); + StubsIndSymIndex = S.reserved1; + StubSize = S.reserved2; + } else { + MachO::section S = MOOF->getSection(StubsSec.getRawDataRefImpl()); + StubsIndSymIndex = S.reserved1; + StubSize = S.reserved2; + } + assert(StubSize && "Mach-O stub entry size can't be zero!"); + StubsSec.getAddress(StubsStart); + StubsSec.getSize(StubsCount); + StubsCount /= StubSize; + } + } +} + +StringRef MCMachObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) { + // FIXME: also, this can all be done at the very beginning, by iterating over + // all stubs and creating the calls to outside functions. Is it worth it + // though? + if (!StubSize) + return StringRef(); + uint64_t StubIdx = (Addr - StubsStart) / StubSize; + if (StubIdx >= StubsCount) + return StringRef(); + + uint32_t SymtabIdx = + MOOF->getIndirectSymbolTableEntry(MOOF->getDysymtabLoadCommand(), StubIdx); + + StringRef SymName; + symbol_iterator SI = MOOF->begin_symbols(); + error_code ec; + for (uint32_t i = 0; i != SymtabIdx; ++i) { + SI.increment(ec); + } + SI->getName(SymName); + assert(SI != MOOF->end_symbols() && "Stub wasn't found in the symbol table!"); + assert(SymName.front() == '_' && "Mach-O symbol doesn't start with '_'!"); + return SymName.substr(1); +} + +void MCMachObjectSymbolizer:: +tryAddingPcLoadReferenceComment(raw_ostream &cStream, int64_t Value, + uint64_t Address) { + if (const RelocationRef *R = findRelocationAt(Address)) { + const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R); + if (!RelExpr || RelExpr->EvaluateAsAbsolute(Value) == false) + return; + } + uint64_t Addr = Value; + if (const SectionRef *S = findSectionContaining(Addr)) { + StringRef Name; S->getName(Name); + uint64_t SAddr; S->getAddress(SAddr); + if (Name == "__cstring") { + StringRef Contents; + S->getContents(Contents); + Contents = Contents.substr(Addr - SAddr); + cStream << " ## literal pool for: " + << Contents.substr(0, Contents.find_first_of(0)); + } + } +} + +//===- MCObjectSymbolizer -------------------------------------------------===// + +MCObjectSymbolizer::MCObjectSymbolizer(MCContext &Ctx, + OwningPtr &RelInfo, + const ObjectFile *Obj) + : MCSymbolizer(Ctx, RelInfo), Obj(Obj), SortedSections(), AddrToReloc() { +} + +bool MCObjectSymbolizer:: +tryAddingSymbolicOperand(MCInst &MI, raw_ostream &cStream, + int64_t Value, uint64_t Address, bool IsBranch, + uint64_t Offset, uint64_t InstSize) { + if (IsBranch) { + StringRef ExtFnName = findExternalFunctionAt((uint64_t)Value); + if (!ExtFnName.empty()) { + MCSymbol *Sym = Ctx.GetOrCreateSymbol(ExtFnName); + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + MI.addOperand(MCOperand::CreateExpr(Expr)); + return true; + } + } + + if (const RelocationRef *R = findRelocationAt(Address + Offset)) { + if (const MCExpr *RelExpr = RelInfo->createExprForRelocation(*R)) { + MI.addOperand(MCOperand::CreateExpr(RelExpr)); + return true; + } + // Only try to create a symbol+offset expression if there is no relocation. + return false; + } + + // Interpret Value as a branch target. + if (IsBranch == false) + return false; + uint64_t UValue = Value; + // FIXME: map instead of looping each time? + error_code ec; + for (symbol_iterator SI = Obj->begin_symbols(), SE = Obj->end_symbols(); + SI != SE; SI.increment(ec)) { + if (ec) break; + uint64_t SymAddr; SI->getAddress(SymAddr); + uint64_t SymSize; SI->getSize(SymSize); + StringRef SymName; SI->getName(SymName); + SymbolRef::Type SymType; SI->getType(SymType); + if (SymAddr == UnknownAddressOrSize || SymSize == UnknownAddressOrSize + || SymName.empty() || SymType != SymbolRef::ST_Function) + continue; + + if ( SymAddr == UValue || + (SymAddr <= UValue && SymAddr + SymSize > UValue)) { + MCSymbol *Sym = Ctx.GetOrCreateSymbol(SymName); + const MCExpr *Expr = MCSymbolRefExpr::Create(Sym, Ctx); + if (SymAddr != UValue) { + const MCExpr *Off = MCConstantExpr::Create(UValue - SymAddr, Ctx); + Expr = MCBinaryExpr::CreateAdd(Expr, Off, Ctx); + } + MI.addOperand(MCOperand::CreateExpr(Expr)); + return true; + } + } + return false; +} + +void MCObjectSymbolizer:: +tryAddingPcLoadReferenceComment(raw_ostream &cStream, + int64_t Value, uint64_t Address) { +} + +StringRef MCObjectSymbolizer::findExternalFunctionAt(uint64_t Addr) { + return StringRef(); +} + +MCObjectSymbolizer * +MCObjectSymbolizer::createObjectSymbolizer(MCContext &Ctx, + OwningPtr &RelInfo, + const ObjectFile *Obj) { + if (const MachOObjectFile *MOOF = dyn_cast(Obj)) + return new MCMachObjectSymbolizer(Ctx, RelInfo, MOOF); + return new MCObjectSymbolizer(Ctx, RelInfo, Obj); +} + +// SortedSections implementation. + +static bool SectionStartsBefore(const SectionRef &S, uint64_t Addr) { + uint64_t SAddr; S.getAddress(SAddr); + return SAddr < Addr; +} + +const SectionRef *MCObjectSymbolizer::findSectionContaining(uint64_t Addr) { + if (SortedSections.empty()) + buildSectionList(); + + SortedSectionList::iterator + EndIt = SortedSections.end(), + It = std::lower_bound(SortedSections.begin(), EndIt, + Addr, SectionStartsBefore); + if (It == EndIt) + return 0; + uint64_t SAddr; It->getAddress(SAddr); + uint64_t SSize; It->getSize(SSize); + if (Addr >= SAddr + SSize) + return 0; + return &*It; +} + +const RelocationRef *MCObjectSymbolizer::findRelocationAt(uint64_t Addr) { + if (AddrToReloc.empty()) + buildRelocationByAddrMap(); + + AddrToRelocMap::const_iterator RI = AddrToReloc.find(Addr); + if (RI == AddrToReloc.end()) + return 0; + return &RI->second; +} + +void MCObjectSymbolizer::buildSectionList() { + error_code ec; + for (section_iterator SI = Obj->begin_sections(), SE = Obj->end_sections(); + SI != SE; SI.increment(ec)) { + if (ec) break; + + bool RequiredForExec; SI->isRequiredForExecution(RequiredForExec); + if (RequiredForExec == false) + continue; + uint64_t SAddr; SI->getAddress(SAddr); + uint64_t SSize; SI->getSize(SSize); + SortedSectionList::iterator It = std::lower_bound(SortedSections.begin(), + SortedSections.end(), + SAddr, + SectionStartsBefore); + if (It != SortedSections.end()) { + uint64_t FoundSAddr; It->getAddress(FoundSAddr); + if (FoundSAddr < SAddr + SSize) + llvm_unreachable("Inserting overlapping sections"); + } + SortedSections.insert(It, *SI); + } +} + +void MCObjectSymbolizer::buildRelocationByAddrMap() { + error_code ec; + for (section_iterator SI = Obj->begin_sections(), SE = Obj->end_sections(); + SI != SE; SI.increment(ec)) { + if (ec) break; + + section_iterator RelSecI = SI->getRelocatedSection(); + if (RelSecI == Obj->end_sections()) + continue; + + uint64_t StartAddr; RelSecI->getAddress(StartAddr); + uint64_t Size; RelSecI->getSize(Size); + bool RequiredForExec; RelSecI->isRequiredForExecution(RequiredForExec); + if (RequiredForExec == false || Size == 0) + continue; + for (relocation_iterator RI = SI->begin_relocations(), + RE = SI->end_relocations(); + RI != RE; + RI.increment(ec)) { + if (ec) break; + // FIXME: libObject is inconsistent regarding error handling. The + // overwhelming majority of methods always return object_error::success, + // and assert for simple errors.. Here, ELFObjectFile::getRelocationOffset + // asserts when the file type isn't ET_REL. + // This workaround handles x86-64 elf, the only one that has a relocinfo. + uint64_t Offset; + if (Obj->isELF()) { + const ELF64LEObjectFile *ELFObj = dyn_cast(Obj); + if (ELFObj == 0) + break; + if (ELFObj->getELFFile()->getHeader()->e_type == ELF::ET_REL) { + RI->getOffset(Offset); + Offset += StartAddr; + } else { + RI->getAddress(Offset); + } + } else { + RI->getOffset(Offset); + Offset += StartAddr; + } + // At a specific address, only keep the first relocation. + if (AddrToReloc.find(Offset) == AddrToReloc.end()) + AddrToReloc[Offset] = *RI; + } + } +} diff --git a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp index c1c594a..b49dd01 100644 --- a/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp +++ b/contrib/llvm/lib/MC/MCParser/AsmLexer.cpp @@ -91,9 +91,56 @@ AsmToken AsmLexer::LexFloatLiteral() { StringRef(TokStart, CurPtr - TokStart)); } -/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@]* +/// LexHexFloatLiteral matches essentially (.[0-9a-fA-F]*)?[pP][+-]?[0-9a-fA-F]+ +/// while making sure there are enough actual digits around for the constant to +/// be valid. +/// +/// The leading "0x[0-9a-fA-F]*" (i.e. integer part) has already been consumed +/// before we get here. +AsmToken AsmLexer::LexHexFloatLiteral(bool NoIntDigits) { + assert((*CurPtr == 'p' || *CurPtr == 'P' || *CurPtr == '.') && + "unexpected parse state in floating hex"); + bool NoFracDigits = true; + + // Skip the fractional part if there is one + if (*CurPtr == '.') { + ++CurPtr; + + const char *FracStart = CurPtr; + while (isxdigit(*CurPtr)) + ++CurPtr; + + NoFracDigits = CurPtr == FracStart; + } + + if (NoIntDigits && NoFracDigits) + return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " + "expected at least one significand digit"); + + // Make sure we do have some kind of proper exponent part + if (*CurPtr != 'p' && *CurPtr != 'P') + return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " + "expected exponent part 'p'"); + ++CurPtr; + + if (*CurPtr == '+' || *CurPtr == '-') + ++CurPtr; + + // N.b. exponent digits are *not* hex + const char *ExpStart = CurPtr; + while (isdigit(*CurPtr)) + ++CurPtr; + + if (CurPtr == ExpStart) + return ReturnError(TokStart, "invalid hexadecimal floating-point constant: " + "expected at least one exponent digit"); + + return AsmToken(AsmToken::Real, StringRef(TokStart, CurPtr - TokStart)); +} + +/// LexIdentifier: [a-zA-Z_.][a-zA-Z0-9_$.@?]* static bool IsIdentifierChar(char c) { - return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@'; + return isalnum(c) || c == '_' || c == '$' || c == '.' || c == '@' || c == '?'; } AsmToken AsmLexer::LexIdentifier() { // Check for floating point literals. @@ -265,7 +312,12 @@ AsmToken AsmLexer::LexDigit() { while (isxdigit(CurPtr[0])) ++CurPtr; - // Requires at least one hex digit. + // "0x.0p0" is valid, and "0x0p0" (but not "0xp0" for example, which will be + // diagnosed by LexHexFloatLiteral). + if (CurPtr[0] == '.' || CurPtr[0] == 'p' || CurPtr[0] == 'P') + return LexHexFloatLiteral(NumStart == CurPtr); + + // Otherwise requires at least one hex digit. if (CurPtr == NumStart) return ReturnError(CurPtr-2, "invalid hexadecimal number"); diff --git a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp index edefdb4..a91bd93 100644 --- a/contrib/llvm/lib/MC/MCParser/AsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/AsmParser.cpp @@ -94,13 +94,13 @@ public: }; struct ParseStatementInfo { - /// ParsedOperands - The parsed operands from the last parsed statement. + /// \brief The parsed operands from the last parsed statement. SmallVector ParsedOperands; - /// Opcode - The opcode from the last parsed instruction. + /// \brief The opcode from the last parsed instruction. unsigned Opcode; - /// Error - Was there an error parsing the inline assembly? + /// \brief Was there an error parsing the inline assembly? bool ParseError; SmallVectorImpl *AsmRewrites; @@ -138,17 +138,20 @@ private: AsmCond TheCondState; std::vector TheCondStack; - /// ExtensionDirectiveMap - maps directive names to handler methods in parser + /// \brief maps directive names to handler methods in parser /// extensions. Extensions register themselves in this map by calling /// addDirectiveHandler. StringMap ExtensionDirectiveMap; - /// MacroMap - Map of currently defined macros. + /// \brief Map of currently defined macros. StringMap MacroMap; - /// ActiveMacros - Stack of active macro instantiations. + /// \brief Stack of active macro instantiations. std::vector ActiveMacros; + /// \brief List of bodies of anonymous macros. + std::deque MacroLikeBodies; + /// Boolean tracking whether macro substitution is enabled. unsigned MacrosEnabledFlag : 1; @@ -160,14 +163,21 @@ private: int64_t CppHashLineNumber; SMLoc CppHashLoc; int CppHashBuf; + /// When generating dwarf for assembly source files we need to calculate the + /// logical line number based on the last parsed cpp hash file line comment + /// and current line. Since this is slow and messes up the SourceMgr's + /// cache we save the last info we queried with SrcMgr.FindLineNumber(). + SMLoc LastQueryIDLoc; + int LastQueryBuffer; + unsigned LastQueryLine; /// AssemblerDialect. ~OU means unset value and use value provided by MAI. unsigned AssemblerDialect; - /// IsDarwin - is Darwin compatibility enabled? + /// \brief is Darwin compatibility enabled? bool IsDarwin; - /// ParsingInlineAsm - Are we parsing ms-style inline assembly? + /// \brief Are we parsing ms-style inline assembly? bool ParsingInlineAsm; public: @@ -225,7 +235,7 @@ public: virtual bool parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc); virtual bool parseAbsoluteExpression(int64_t &Res); - /// parseIdentifier - Parse an identifier or string (as a quoted identifier) + /// \brief Parse an identifier or string (as a quoted identifier) /// and set \p Res to the identifier contents. virtual bool parseIdentifier(StringRef &Res); virtual void eatToEndOfStatement(); @@ -235,11 +245,11 @@ public: private: - bool ParseStatement(ParseStatementInfo &Info); - void EatToEndOfLine(); - bool ParseCppHashLineFilenameComment(const SMLoc &L); + bool parseStatement(ParseStatementInfo &Info); + void eatToEndOfLine(); + bool parseCppHashLineFilenameComment(const SMLoc &L); - void CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body, + void checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body, MCAsmMacroParameters Parameters); bool expandMacro(raw_svector_ostream &OS, StringRef Body, const MCAsmMacroParameters &Parameters, @@ -247,55 +257,56 @@ private: const SMLoc &L); /// \brief Are macros enabled in the parser? - bool MacrosEnabled() {return MacrosEnabledFlag;} + bool areMacrosEnabled() {return MacrosEnabledFlag;} /// \brief Control a flag in the parser that enables or disables macros. - void SetMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;} + void setMacrosEnabled(bool Flag) {MacrosEnabledFlag = Flag;} /// \brief Lookup a previously defined macro. /// \param Name Macro name. /// \returns Pointer to macro. NULL if no such macro was defined. - const MCAsmMacro* LookupMacro(StringRef Name); + const MCAsmMacro* lookupMacro(StringRef Name); /// \brief Define a new macro with the given name and information. - void DefineMacro(StringRef Name, const MCAsmMacro& Macro); + void defineMacro(StringRef Name, const MCAsmMacro& Macro); /// \brief Undefine a macro. If no such macro was defined, it's a no-op. - void UndefineMacro(StringRef Name); + void undefineMacro(StringRef Name); /// \brief Are we inside a macro instantiation? - bool InsideMacroInstantiation() {return !ActiveMacros.empty();} + bool isInsideMacroInstantiation() {return !ActiveMacros.empty();} - /// \brief Handle entry to macro instantiation. + /// \brief Handle entry to macro instantiation. /// /// \param M The macro. /// \param NameLoc Instantiation location. - bool HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc); + bool handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc); /// \brief Handle exit from macro instantiation. - void HandleMacroExit(); + void handleMacroExit(); /// \brief Extract AsmTokens for a macro argument. If the argument delimiter /// is initially unknown, set it to AsmToken::Eof. It will be set to the /// correct delimiter by the method. - bool ParseMacroArgument(MCAsmMacroArgument &MA, + bool parseMacroArgument(MCAsmMacroArgument &MA, AsmToken::TokenKind &ArgumentDelimiter); /// \brief Parse all macro arguments for a given macro. - bool ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A); + bool parseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A); - void PrintMacroInstantiations(); - void PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, + void printMacroInstantiations(); + void printMessage(SMLoc Loc, SourceMgr::DiagKind Kind, const Twine &Msg, ArrayRef Ranges = None) const { SrcMgr.PrintMessage(Loc, Kind, Msg, Ranges); } static void DiagHandler(const SMDiagnostic &Diag, void *Context); - /// EnterIncludeFile - Enter the specified file. This returns true on failure. - bool EnterIncludeFile(const std::string &Filename); - /// ProcessIncbinFile - Process the specified file for the .incbin directive. + /// \brief Enter the specified file. This returns true on failure. + bool enterIncludeFile(const std::string &Filename); + + /// \brief Process the specified file for the .incbin directive. /// This returns true on failure. - bool ProcessIncbinFile(const std::string &Filename); + bool processIncbinFile(const std::string &Filename); /// \brief Reset the current lexer position to that given by \p Loc. The /// current token is not set; clients should ensure Lex() is called @@ -303,7 +314,7 @@ private: /// /// \param InBuffer If not -1, should be the known buffer id that contains the /// location. - void JumpToLoc(SMLoc Loc, int InBuffer=-1); + void jumpToLoc(SMLoc Loc, int InBuffer=-1); /// \brief Parse up to the end of statement and a return the contents from the /// current token until the end of the statement; the current token on exit @@ -312,17 +323,16 @@ private: /// \brief Parse until the end of a statement or a comma is encountered, /// return the contents from the current token up to the end or comma. - StringRef ParseStringToComma(); + StringRef parseStringToComma(); - bool ParseAssignment(StringRef Name, bool allow_redef, + bool parseAssignment(StringRef Name, bool allow_redef, bool NoDeadStrip = false); - bool ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc); - bool ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc); - bool ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc); - bool ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc); + bool parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc); + bool parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc); + bool parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc); - bool ParseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc); + bool parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc); // Generic (target and platform independent) directive parsing. enum DirectiveKind { @@ -332,7 +342,7 @@ private: DK_FLOAT, DK_DOUBLE, DK_ALIGN, DK_ALIGN32, DK_BALIGN, DK_BALIGNW, DK_BALIGNL, DK_P2ALIGN, DK_P2ALIGNW, DK_P2ALIGNL, DK_ORG, DK_FILL, DK_ENDR, DK_BUNDLE_ALIGN_MODE, DK_BUNDLE_LOCK, DK_BUNDLE_UNLOCK, - DK_ZERO, DK_EXTERN, DK_GLOBL, DK_GLOBAL, DK_INDIRECT_SYMBOL, + DK_ZERO, DK_EXTERN, DK_GLOBL, DK_GLOBAL, DK_LAZY_REFERENCE, DK_NO_DEAD_STRIP, DK_SYMBOL_RESOLVER, DK_PRIVATE_EXTERN, DK_REFERENCE, DK_WEAK_DEFINITION, DK_WEAK_REFERENCE, DK_WEAK_DEF_CAN_BE_HIDDEN, DK_COMM, DK_COMMON, DK_LCOMM, DK_ABORT, @@ -345,112 +355,113 @@ private: DK_CFI_OFFSET, DK_CFI_REL_OFFSET, DK_CFI_PERSONALITY, DK_CFI_LSDA, DK_CFI_REMEMBER_STATE, DK_CFI_RESTORE_STATE, DK_CFI_SAME_VALUE, DK_CFI_RESTORE, DK_CFI_ESCAPE, DK_CFI_SIGNAL_FRAME, DK_CFI_UNDEFINED, - DK_CFI_REGISTER, + DK_CFI_REGISTER, DK_CFI_WINDOW_SAVE, DK_MACROS_ON, DK_MACROS_OFF, DK_MACRO, DK_ENDM, DK_ENDMACRO, DK_PURGEM, DK_SLEB128, DK_ULEB128 }; - /// DirectiveKindMap - Maps directive name --> DirectiveKind enum, for + /// \brief Maps directive name --> DirectiveKind enum, for /// directives parsed by this class. StringMap DirectiveKindMap; // ".ascii", ".asciz", ".string" - bool ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); - bool ParseDirectiveValue(unsigned Size); // ".byte", ".long", ... - bool ParseDirectiveRealValue(const fltSemantics &); // ".single", ... - bool ParseDirectiveFill(); // ".fill" - bool ParseDirectiveZero(); // ".zero" + bool parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated); + bool parseDirectiveValue(unsigned Size); // ".byte", ".long", ... + bool parseDirectiveRealValue(const fltSemantics &); // ".single", ... + bool parseDirectiveFill(); // ".fill" + bool parseDirectiveZero(); // ".zero" // ".set", ".equ", ".equiv" - bool ParseDirectiveSet(StringRef IDVal, bool allow_redef); - bool ParseDirectiveOrg(); // ".org" + bool parseDirectiveSet(StringRef IDVal, bool allow_redef); + bool parseDirectiveOrg(); // ".org" // ".align{,32}", ".p2align{,w,l}" - bool ParseDirectiveAlign(bool IsPow2, unsigned ValueSize); + bool parseDirectiveAlign(bool IsPow2, unsigned ValueSize); // ".file", ".line", ".loc", ".stabs" - bool ParseDirectiveFile(SMLoc DirectiveLoc); - bool ParseDirectiveLine(); - bool ParseDirectiveLoc(); - bool ParseDirectiveStabs(); + bool parseDirectiveFile(SMLoc DirectiveLoc); + bool parseDirectiveLine(); + bool parseDirectiveLoc(); + bool parseDirectiveStabs(); // .cfi directives - bool ParseDirectiveCFIRegister(SMLoc DirectiveLoc); - bool ParseDirectiveCFISections(); - bool ParseDirectiveCFIStartProc(); - bool ParseDirectiveCFIEndProc(); - bool ParseDirectiveCFIDefCfaOffset(); - bool ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc); - bool ParseDirectiveCFIAdjustCfaOffset(); - bool ParseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc); - bool ParseDirectiveCFIOffset(SMLoc DirectiveLoc); - bool ParseDirectiveCFIRelOffset(SMLoc DirectiveLoc); - bool ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality); - bool ParseDirectiveCFIRememberState(); - bool ParseDirectiveCFIRestoreState(); - bool ParseDirectiveCFISameValue(SMLoc DirectiveLoc); - bool ParseDirectiveCFIRestore(SMLoc DirectiveLoc); - bool ParseDirectiveCFIEscape(); - bool ParseDirectiveCFISignalFrame(); - bool ParseDirectiveCFIUndefined(SMLoc DirectiveLoc); + bool parseDirectiveCFIRegister(SMLoc DirectiveLoc); + bool parseDirectiveCFIWindowSave(); + bool parseDirectiveCFISections(); + bool parseDirectiveCFIStartProc(); + bool parseDirectiveCFIEndProc(); + bool parseDirectiveCFIDefCfaOffset(); + bool parseDirectiveCFIDefCfa(SMLoc DirectiveLoc); + bool parseDirectiveCFIAdjustCfaOffset(); + bool parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc); + bool parseDirectiveCFIOffset(SMLoc DirectiveLoc); + bool parseDirectiveCFIRelOffset(SMLoc DirectiveLoc); + bool parseDirectiveCFIPersonalityOrLsda(bool IsPersonality); + bool parseDirectiveCFIRememberState(); + bool parseDirectiveCFIRestoreState(); + bool parseDirectiveCFISameValue(SMLoc DirectiveLoc); + bool parseDirectiveCFIRestore(SMLoc DirectiveLoc); + bool parseDirectiveCFIEscape(); + bool parseDirectiveCFISignalFrame(); + bool parseDirectiveCFIUndefined(SMLoc DirectiveLoc); // macro directives - bool ParseDirectivePurgeMacro(SMLoc DirectiveLoc); - bool ParseDirectiveEndMacro(StringRef Directive); - bool ParseDirectiveMacro(SMLoc DirectiveLoc); - bool ParseDirectiveMacrosOnOff(StringRef Directive); + bool parseDirectivePurgeMacro(SMLoc DirectiveLoc); + bool parseDirectiveEndMacro(StringRef Directive); + bool parseDirectiveMacro(SMLoc DirectiveLoc); + bool parseDirectiveMacrosOnOff(StringRef Directive); // ".bundle_align_mode" - bool ParseDirectiveBundleAlignMode(); + bool parseDirectiveBundleAlignMode(); // ".bundle_lock" - bool ParseDirectiveBundleLock(); + bool parseDirectiveBundleLock(); // ".bundle_unlock" - bool ParseDirectiveBundleUnlock(); + bool parseDirectiveBundleUnlock(); // ".space", ".skip" - bool ParseDirectiveSpace(StringRef IDVal); + bool parseDirectiveSpace(StringRef IDVal); // .sleb128 (Signed=true) and .uleb128 (Signed=false) - bool ParseDirectiveLEB128(bool Signed); + bool parseDirectiveLEB128(bool Signed); - /// ParseDirectiveSymbolAttribute - Parse a directive like ".globl" which + /// \brief Parse a directive like ".globl" which /// accepts a single symbol (which should be a label or an external). - bool ParseDirectiveSymbolAttribute(MCSymbolAttr Attr); + bool parseDirectiveSymbolAttribute(MCSymbolAttr Attr); - bool ParseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" + bool parseDirectiveComm(bool IsLocal); // ".comm" and ".lcomm" - bool ParseDirectiveAbort(); // ".abort" - bool ParseDirectiveInclude(); // ".include" - bool ParseDirectiveIncbin(); // ".incbin" + bool parseDirectiveAbort(); // ".abort" + bool parseDirectiveInclude(); // ".include" + bool parseDirectiveIncbin(); // ".incbin" - bool ParseDirectiveIf(SMLoc DirectiveLoc); // ".if" + bool parseDirectiveIf(SMLoc DirectiveLoc); // ".if" // ".ifb" or ".ifnb", depending on ExpectBlank. - bool ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank); + bool parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank); // ".ifc" or ".ifnc", depending on ExpectEqual. - bool ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual); + bool parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual); // ".ifdef" or ".ifndef", depending on expect_defined - bool ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined); - bool ParseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" - bool ParseDirectiveElse(SMLoc DirectiveLoc); // ".else" - bool ParseDirectiveEndIf(SMLoc DirectiveLoc); // .endif + bool parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined); + bool parseDirectiveElseIf(SMLoc DirectiveLoc); // ".elseif" + bool parseDirectiveElse(SMLoc DirectiveLoc); // ".else" + bool parseDirectiveEndIf(SMLoc DirectiveLoc); // .endif virtual bool parseEscapedString(std::string &Data); - const MCExpr *ApplyModifierToExpr(const MCExpr *E, + const MCExpr *applyModifierToExpr(const MCExpr *E, MCSymbolRefExpr::VariantKind Variant); // Macro-like directives - MCAsmMacro *ParseMacroLikeBody(SMLoc DirectiveLoc); - void InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, + MCAsmMacro *parseMacroLikeBody(SMLoc DirectiveLoc); + void instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, raw_svector_ostream &OS); - bool ParseDirectiveRept(SMLoc DirectiveLoc); // ".rept" - bool ParseDirectiveIrp(SMLoc DirectiveLoc); // ".irp" - bool ParseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc" - bool ParseDirectiveEndr(SMLoc DirectiveLoc); // ".endr" + bool parseDirectiveRept(SMLoc DirectiveLoc); // ".rept" + bool parseDirectiveIrp(SMLoc DirectiveLoc); // ".irp" + bool parseDirectiveIrpc(SMLoc DirectiveLoc); // ".irpc" + bool parseDirectiveEndr(SMLoc DirectiveLoc); // ".endr" // "_emit" or "__emit" - bool ParseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info, + bool parseDirectiveMSEmit(SMLoc DirectiveLoc, ParseStatementInfo &Info, size_t Len); // "align" - bool ParseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info); + bool parseDirectiveMSAlign(SMLoc DirectiveLoc, ParseStatementInfo &Info); void initializeDirectiveKindMap(); }; @@ -466,12 +477,12 @@ extern MCAsmParserExtension *createCOFFAsmParser(); enum { DEFAULT_ADDRSPACE = 0 }; -AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, - MCStreamer &_Out, const MCAsmInfo &_MAI) - : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM), - PlatformParser(0), - CurBuffer(0), MacrosEnabledFlag(true), CppHashLineNumber(0), - AssemblerDialect(~0U), IsDarwin(false), ParsingInlineAsm(false) { +AsmParser::AsmParser(SourceMgr &_SM, MCContext &_Ctx, MCStreamer &_Out, + const MCAsmInfo &_MAI) + : Lexer(_MAI), Ctx(_Ctx), Out(_Out), MAI(_MAI), SrcMgr(_SM), + PlatformParser(0), CurBuffer(0), MacrosEnabledFlag(true), + CppHashLineNumber(0), AssemblerDialect(~0U), IsDarwin(false), + ParsingInlineAsm(false) { // Save the old handler. SavedDiagHandler = SrcMgr.getDiagHandler(); SavedDiagContext = SrcMgr.getDiagContext(); @@ -502,37 +513,40 @@ AsmParser::~AsmParser() { assert(ActiveMacros.empty() && "Unexpected active macro instantiation!"); // Destroy any macros. - for (StringMap::iterator it = MacroMap.begin(), - ie = MacroMap.end(); it != ie; ++it) + for (StringMap::iterator it = MacroMap.begin(), + ie = MacroMap.end(); + it != ie; ++it) delete it->getValue(); delete PlatformParser; } -void AsmParser::PrintMacroInstantiations() { +void AsmParser::printMacroInstantiations() { // Print the active macro instantiation stack. - for (std::vector::const_reverse_iterator - it = ActiveMacros.rbegin(), ie = ActiveMacros.rend(); it != ie; ++it) - PrintMessage((*it)->InstantiationLoc, SourceMgr::DK_Note, + for (std::vector::const_reverse_iterator + it = ActiveMacros.rbegin(), + ie = ActiveMacros.rend(); + it != ie; ++it) + printMessage((*it)->InstantiationLoc, SourceMgr::DK_Note, "while in macro instantiation"); } bool AsmParser::Warning(SMLoc L, const Twine &Msg, ArrayRef Ranges) { if (FatalAssemblerWarnings) return Error(L, Msg, Ranges); - PrintMessage(L, SourceMgr::DK_Warning, Msg, Ranges); - PrintMacroInstantiations(); + printMessage(L, SourceMgr::DK_Warning, Msg, Ranges); + printMacroInstantiations(); return false; } bool AsmParser::Error(SMLoc L, const Twine &Msg, ArrayRef Ranges) { HadError = true; - PrintMessage(L, SourceMgr::DK_Error, Msg, Ranges); - PrintMacroInstantiations(); + printMessage(L, SourceMgr::DK_Error, Msg, Ranges); + printMacroInstantiations(); return true; } -bool AsmParser::EnterIncludeFile(const std::string &Filename) { +bool AsmParser::enterIncludeFile(const std::string &Filename) { std::string IncludedFile; int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile); if (NewBuf == -1) @@ -545,22 +559,21 @@ bool AsmParser::EnterIncludeFile(const std::string &Filename) { return false; } -/// Process the specified .incbin file by seaching for it in the include paths +/// Process the specified .incbin file by searching for it in the include paths /// then just emitting the byte contents of the file to the streamer. This /// returns true on failure. -bool AsmParser::ProcessIncbinFile(const std::string &Filename) { +bool AsmParser::processIncbinFile(const std::string &Filename) { std::string IncludedFile; int NewBuf = SrcMgr.AddIncludeFile(Filename, Lexer.getLoc(), IncludedFile); if (NewBuf == -1) return true; // Pick up the bytes from the file and emit them. - getStreamer().EmitBytes(SrcMgr.getMemoryBuffer(NewBuf)->getBuffer(), - DEFAULT_ADDRSPACE); + getStreamer().EmitBytes(SrcMgr.getMemoryBuffer(NewBuf)->getBuffer()); return false; } -void AsmParser::JumpToLoc(SMLoc Loc, int InBuffer) { +void AsmParser::jumpToLoc(SMLoc Loc, int InBuffer) { if (InBuffer != -1) { CurBuffer = InBuffer; } else { @@ -577,7 +590,7 @@ const AsmToken &AsmParser::Lex() { // include stack. SMLoc ParentIncludeLoc = SrcMgr.getParentIncludeLoc(CurBuffer); if (ParentIncludeLoc != SMLoc()) { - JumpToLoc(ParentIncludeLoc); + jumpToLoc(ParentIncludeLoc); tok = &Lexer.Lex(); } } @@ -614,7 +627,8 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // While we have input, parse each statement. while (Lexer.isNot(AsmToken::Eof)) { ParseStatementInfo Info; - if (!ParseStatement(Info)) continue; + if (!parseStatement(Info)) + continue; // We had an error, validate that one was emitted and recover by skipping to // the next line. @@ -628,7 +642,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // Check to see there are no empty DwarfFile slots. const SmallVectorImpl &MCDwarfFiles = - getContext().getMCDwarfFiles(); + getContext().getMCDwarfFiles(); for (unsigned i = 1; i < MCDwarfFiles.size(); i++) { if (!MCDwarfFiles[i]) TokError("unassigned file number: " + Twine(i) + " for .file directives"); @@ -641,7 +655,7 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { if (!NoFinalize && MAI.hasSubsectionsViaSymbols()) { const MCContext::SymbolTable &Symbols = getContext().getSymbols(); for (MCContext::SymbolTable::const_iterator i = Symbols.begin(), - e = Symbols.end(); + e = Symbols.end(); i != e; ++i) { MCSymbol *Sym = i->getValue(); // Variable symbols may not be marked as defined, so check those @@ -651,13 +665,12 @@ bool AsmParser::Run(bool NoInitialTextSection, bool NoFinalize) { // FIXME: We would really like to refer back to where the symbol was // first referenced for a source location. We need to add something // to track that. Currently, we just point to the end of the file. - PrintMessage(getLexer().getLoc(), SourceMgr::DK_Error, - "assembler local symbol '" + Sym->getName() + - "' not defined"); + printMessage( + getLexer().getLoc(), SourceMgr::DK_Error, + "assembler local symbol '" + Sym->getName() + "' not defined"); } } - // Finalize the output stream if there are no errors and if the client wants // us to. if (!HadError && !NoFinalize) @@ -673,10 +686,9 @@ void AsmParser::checkForValidSection() { } } -/// eatToEndOfStatement - Throw away the rest of the line for testing purposes. +/// \brief Throw away the rest of the line for testing purposes. void AsmParser::eatToEndOfStatement() { - while (Lexer.isNot(AsmToken::EndOfStatement) && - Lexer.isNot(AsmToken::Eof)) + while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof)) Lex(); // Eat EOL. @@ -687,33 +699,32 @@ void AsmParser::eatToEndOfStatement() { StringRef AsmParser::parseStringToEndOfStatement() { const char *Start = getTok().getLoc().getPointer(); - while (Lexer.isNot(AsmToken::EndOfStatement) && - Lexer.isNot(AsmToken::Eof)) + while (Lexer.isNot(AsmToken::EndOfStatement) && Lexer.isNot(AsmToken::Eof)) Lex(); const char *End = getTok().getLoc().getPointer(); return StringRef(Start, End - Start); } -StringRef AsmParser::ParseStringToComma() { +StringRef AsmParser::parseStringToComma() { const char *Start = getTok().getLoc().getPointer(); while (Lexer.isNot(AsmToken::EndOfStatement) && - Lexer.isNot(AsmToken::Comma) && - Lexer.isNot(AsmToken::Eof)) + Lexer.isNot(AsmToken::Comma) && Lexer.isNot(AsmToken::Eof)) Lex(); const char *End = getTok().getLoc().getPointer(); return StringRef(Start, End - Start); } -/// ParseParenExpr - Parse a paren expression and return it. +/// \brief Parse a paren expression and return it. /// NOTE: This assumes the leading '(' has already been consumed. /// /// parenexpr ::= expr) /// -bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { - if (parseExpression(Res)) return true; +bool AsmParser::parseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { + if (parseExpression(Res)) + return true; if (Lexer.isNot(AsmToken::RParen)) return TokError("expected ')' in parentheses expression"); EndLoc = Lexer.getTok().getEndLoc(); @@ -721,13 +732,14 @@ bool AsmParser::ParseParenExpr(const MCExpr *&Res, SMLoc &EndLoc) { return false; } -/// ParseBracketExpr - Parse a bracket expression and return it. +/// \brief Parse a bracket expression and return it. /// NOTE: This assumes the leading '[' has already been consumed. /// /// bracketexpr ::= expr] /// -bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { - if (parseExpression(Res)) return true; +bool AsmParser::parseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { + if (parseExpression(Res)) + return true; if (Lexer.isNot(AsmToken::RBrac)) return TokError("expected ']' in brackets expression"); EndLoc = Lexer.getTok().getEndLoc(); @@ -735,13 +747,13 @@ bool AsmParser::ParseBracketExpr(const MCExpr *&Res, SMLoc &EndLoc) { return false; } -/// ParsePrimaryExpr - Parse a primary expression and return it. +/// \brief Parse a primary expression and return it. /// primaryexpr ::= (parenexpr /// primaryexpr ::= symbol /// primaryexpr ::= number /// primaryexpr ::= '.' /// primaryexpr ::= ~,+,- primaryexpr -bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { +bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { SMLoc FirstTokenLoc = getLexer().getLoc(); AsmToken::TokenKind FirstTokenKind = Lexer.getKind(); switch (FirstTokenKind) { @@ -752,36 +764,54 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { return true; case AsmToken::Exclaim: Lex(); // Eat the operator. - if (ParsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc)) return true; Res = MCUnaryExpr::CreateLNot(Res, getContext()); return false; case AsmToken::Dollar: + case AsmToken::At: case AsmToken::String: case AsmToken::Identifier: { StringRef Identifier; if (parseIdentifier(Identifier)) { - if (FirstTokenKind == AsmToken::Dollar) - return Error(FirstTokenLoc, "invalid token in expression"); - return true; + if (FirstTokenKind == AsmToken::Dollar) { + if (Lexer.getMAI().getDollarIsPC()) { + // This is a '$' reference, which references the current PC. Emit a + // temporary label to the streamer and refer to it. + MCSymbol *Sym = Ctx.CreateTempSymbol(); + Out.EmitLabel(Sym); + Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, + getContext()); + EndLoc = FirstTokenLoc; + return false; + } else + return Error(FirstTokenLoc, "invalid token in expression"); + return true; + } } EndLoc = SMLoc::getFromPointer(Identifier.end()); // This is a symbol reference. + StringRef SymbolName = Identifier; + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; std::pair Split = Identifier.split('@'); - MCSymbol *Sym = getContext().GetOrCreateSymbol(Split.first); // Lookup the symbol variant if used. - MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; if (Split.first.size() != Identifier.size()) { Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); - if (Variant == MCSymbolRefExpr::VK_Invalid) { + if (Variant != MCSymbolRefExpr::VK_Invalid) { + SymbolName = Split.first; + } else if (MAI.doesAllowAtInName()) { + Variant = MCSymbolRefExpr::VK_None; + } else { Variant = MCSymbolRefExpr::VK_None; return TokError("invalid variant '" + Split.second + "'"); } } + MCSymbol *Sym = getContext().GetOrCreateSymbol(SymbolName); + // If this is an absolute variable reference, substitute it now to preserve // semantics in the face of reassignment. if (Sym->isVariable() && isa(Sym->getVariableValue())) { @@ -805,11 +835,21 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { // Look for 'b' or 'f' following an Integer as a directional label if (Lexer.getKind() == AsmToken::Identifier) { StringRef IDVal = getTok().getString(); - if (IDVal == "f" || IDVal == "b"){ - MCSymbol *Sym = Ctx.GetDirectionalLocalSymbol(IntVal, - IDVal == "f" ? 1 : 0); - Res = MCSymbolRefExpr::Create(Sym, MCSymbolRefExpr::VK_None, - getContext()); + // Lookup the symbol variant if used. + std::pair Split = IDVal.split('@'); + MCSymbolRefExpr::VariantKind Variant = MCSymbolRefExpr::VK_None; + if (Split.first.size() != IDVal.size()) { + Variant = MCSymbolRefExpr::getVariantKindForName(Split.second); + if (Variant == MCSymbolRefExpr::VK_Invalid) { + Variant = MCSymbolRefExpr::VK_None; + return TokError("invalid variant '" + Split.second + "'"); + } + IDVal = Split.first; + } + if (IDVal == "f" || IDVal == "b") { + MCSymbol *Sym = + Ctx.GetDirectionalLocalSymbol(IntVal, IDVal == "f" ? 1 : 0); + Res = MCSymbolRefExpr::Create(Sym, Variant, getContext()); if (IDVal == "b" && Sym->isUndefined()) return Error(Loc, "invalid reference to undefined symbol"); EndLoc = Lexer.getTok().getEndLoc(); @@ -838,27 +878,27 @@ bool AsmParser::ParsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { } case AsmToken::LParen: Lex(); // Eat the '('. - return ParseParenExpr(Res, EndLoc); + return parseParenExpr(Res, EndLoc); case AsmToken::LBrac: if (!PlatformParser->HasBracketExpressions()) return TokError("brackets expression not supported on this target"); Lex(); // Eat the '['. - return ParseBracketExpr(Res, EndLoc); + return parseBracketExpr(Res, EndLoc); case AsmToken::Minus: Lex(); // Eat the operator. - if (ParsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc)) return true; Res = MCUnaryExpr::CreateMinus(Res, getContext()); return false; case AsmToken::Plus: Lex(); // Eat the operator. - if (ParsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc)) return true; Res = MCUnaryExpr::CreatePlus(Res, getContext()); return false; case AsmToken::Tilde: Lex(); // Eat the operator. - if (ParsePrimaryExpr(Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc)) return true; Res = MCUnaryExpr::CreateNot(Res, getContext()); return false; @@ -870,13 +910,13 @@ bool AsmParser::parseExpression(const MCExpr *&Res) { return parseExpression(Res, EndLoc); } -bool AsmParser::parsePrimaryExpr(const MCExpr *&Res, SMLoc &EndLoc) { - return ParsePrimaryExpr(Res, EndLoc); -} - const MCExpr * -AsmParser::ApplyModifierToExpr(const MCExpr *E, +AsmParser::applyModifierToExpr(const MCExpr *E, MCSymbolRefExpr::VariantKind Variant) { + // Ask the target implementation about this expression first. + const MCExpr *NewE = getTargetParser().applyModifierToExpr(E, Variant, Ctx); + if (NewE) + return NewE; // Recurse over the given expression, rebuilding it to apply the given variant // if there is exactly one symbol. switch (E->getKind()) { @@ -888,8 +928,8 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E, const MCSymbolRefExpr *SRE = cast(E); if (SRE->getKind() != MCSymbolRefExpr::VK_None) { - TokError("invalid variant on expression '" + - getTok().getIdentifier() + "' (already modified)"); + TokError("invalid variant on expression '" + getTok().getIdentifier() + + "' (already modified)"); return E; } @@ -898,7 +938,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E, case MCExpr::Unary: { const MCUnaryExpr *UE = cast(E); - const MCExpr *Sub = ApplyModifierToExpr(UE->getSubExpr(), Variant); + const MCExpr *Sub = applyModifierToExpr(UE->getSubExpr(), Variant); if (!Sub) return 0; return MCUnaryExpr::Create(UE->getOpcode(), Sub, getContext()); @@ -906,14 +946,16 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E, case MCExpr::Binary: { const MCBinaryExpr *BE = cast(E); - const MCExpr *LHS = ApplyModifierToExpr(BE->getLHS(), Variant); - const MCExpr *RHS = ApplyModifierToExpr(BE->getRHS(), Variant); + const MCExpr *LHS = applyModifierToExpr(BE->getLHS(), Variant); + const MCExpr *RHS = applyModifierToExpr(BE->getRHS(), Variant); if (!LHS && !RHS) return 0; - if (!LHS) LHS = BE->getLHS(); - if (!RHS) RHS = BE->getRHS(); + if (!LHS) + LHS = BE->getLHS(); + if (!RHS) + RHS = BE->getRHS(); return MCBinaryExpr::Create(BE->getOpcode(), LHS, RHS, getContext()); } @@ -922,7 +964,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E, llvm_unreachable("Invalid expression kind!"); } -/// parseExpression - Parse an expression and return it. +/// \brief Parse an expression and return it. /// /// expr ::= expr &&,|| expr -> lowest. /// expr ::= expr |,^,&,! expr @@ -935,7 +977,7 @@ AsmParser::ApplyModifierToExpr(const MCExpr *E, bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) { // Parse the expression. Res = 0; - if (ParsePrimaryExpr(Res, EndLoc) || ParseBinOpRHS(1, Res, EndLoc)) + if (parsePrimaryExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc)) return true; // As a special case, we support 'a op b @ modifier' by rewriting the @@ -948,11 +990,11 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) { return TokError("unexpected symbol modifier following '@'"); MCSymbolRefExpr::VariantKind Variant = - MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier()); + MCSymbolRefExpr::getVariantKindForName(getTok().getIdentifier()); if (Variant == MCSymbolRefExpr::VK_Invalid) return TokError("invalid variant '" + getTok().getIdentifier() + "'"); - const MCExpr *ModifiedRes = ApplyModifierToExpr(Res, Variant); + const MCExpr *ModifiedRes = applyModifierToExpr(Res, Variant); if (!ModifiedRes) { return TokError("invalid modifier '" + getTok().getIdentifier() + "' (no symbols present)"); @@ -972,8 +1014,7 @@ bool AsmParser::parseExpression(const MCExpr *&Res, SMLoc &EndLoc) { bool AsmParser::parseParenExpression(const MCExpr *&Res, SMLoc &EndLoc) { Res = 0; - return ParseParenExpr(Res, EndLoc) || - ParseBinOpRHS(1, Res, EndLoc); + return parseParenExpr(Res, EndLoc) || parseBinOpRHS(1, Res, EndLoc); } bool AsmParser::parseAbsoluteExpression(int64_t &Res) { @@ -993,9 +1034,9 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K, MCBinaryExpr::Opcode &Kind) { switch (K) { default: - return 0; // not a binop. + return 0; // not a binop. - // Lowest Precedence: &&, || + // Lowest Precedence: &&, || case AsmToken::AmpAmp: Kind = MCBinaryExpr::LAnd; return 1; @@ -1003,10 +1044,9 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K, Kind = MCBinaryExpr::LOr; return 1; - - // Low Precedence: |, &, ^ - // - // FIXME: gas seems to support '!' as an infix operator? + // Low Precedence: |, &, ^ + // + // FIXME: gas seems to support '!' as an infix operator? case AsmToken::Pipe: Kind = MCBinaryExpr::Or; return 2; @@ -1017,7 +1057,7 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K, Kind = MCBinaryExpr::And; return 2; - // Low Intermediate Precedence: ==, !=, <>, <, <=, >, >= + // Low Intermediate Precedence: ==, !=, <>, <, <=, >, >= case AsmToken::EqualEqual: Kind = MCBinaryExpr::EQ; return 3; @@ -1038,7 +1078,7 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K, Kind = MCBinaryExpr::GTE; return 3; - // Intermediate Precedence: <<, >> + // Intermediate Precedence: <<, >> case AsmToken::LessLess: Kind = MCBinaryExpr::Shl; return 4; @@ -1046,7 +1086,7 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K, Kind = MCBinaryExpr::Shr; return 4; - // High Intermediate Precedence: +, - + // High Intermediate Precedence: +, - case AsmToken::Plus: Kind = MCBinaryExpr::Add; return 5; @@ -1054,7 +1094,7 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K, Kind = MCBinaryExpr::Sub; return 5; - // Highest Precedence: *, /, % + // Highest Precedence: *, /, % case AsmToken::Star: Kind = MCBinaryExpr::Mul; return 6; @@ -1067,10 +1107,9 @@ static unsigned getBinOpPrecedence(AsmToken::TokenKind K, } } - -/// ParseBinOpRHS - Parse all binary operators with precedence >= 'Precedence'. +/// \brief Parse all binary operators with precedence >= 'Precedence'. /// Res contains the LHS of the expression on input. -bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, +bool AsmParser::parseBinOpRHS(unsigned Precedence, const MCExpr *&Res, SMLoc &EndLoc) { while (1) { MCBinaryExpr::Opcode Kind = MCBinaryExpr::Add; @@ -1085,15 +1124,15 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, // Eat the next primary expression. const MCExpr *RHS; - if (ParsePrimaryExpr(RHS, EndLoc)) return true; + if (parsePrimaryExpr(RHS, EndLoc)) + return true; // If BinOp binds less tightly with RHS than the operator after RHS, let // the pending operator take RHS as its LHS. MCBinaryExpr::Opcode Dummy; unsigned NextTokPrec = getBinOpPrecedence(Lexer.getKind(), Dummy); - if (TokPrec < NextTokPrec) { - if (ParseBinOpRHS(TokPrec+1, RHS, EndLoc)) return true; - } + if (TokPrec < NextTokPrec && parseBinOpRHS(TokPrec + 1, RHS, EndLoc)) + return true; // Merge LHS and RHS according to operator. Res = MCBinaryExpr::Create(Kind, Res, RHS, getContext()); @@ -1104,7 +1143,7 @@ bool AsmParser::ParseBinOpRHS(unsigned Precedence, const MCExpr *&Res, /// ::= EndOfStatement /// ::= Label* Directive ...Operands... EndOfStatement /// ::= Label* Identifier OperandList* EndOfStatement -bool AsmParser::ParseStatement(ParseStatementInfo &Info) { +bool AsmParser::parseStatement(ParseStatementInfo &Info) { if (Lexer.is(AsmToken::EndOfStatement)) { Out.AddBlankLine(); Lex(); @@ -1118,7 +1157,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { int64_t LocalLabelVal = -1; // A full line comment is a '#' as the first token. if (Lexer.is(AsmToken::Hash)) - return ParseCppHashLineFilenameComment(IDLoc); + return parseCppHashLineFilenameComment(IDLoc); // Allow an integer followed by a ':' as a directional local label. if (Lexer.is(AsmToken::Integer)) { @@ -1149,34 +1188,34 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // have to do this so that .endif isn't skipped in a ".if 0" block for // example. StringMap::const_iterator DirKindIt = - DirectiveKindMap.find(IDVal); - DirectiveKind DirKind = - (DirKindIt == DirectiveKindMap.end()) ? DK_NO_DIRECTIVE : - DirKindIt->getValue(); + DirectiveKindMap.find(IDVal); + DirectiveKind DirKind = (DirKindIt == DirectiveKindMap.end()) + ? DK_NO_DIRECTIVE + : DirKindIt->getValue(); switch (DirKind) { - default: - break; - case DK_IF: - return ParseDirectiveIf(IDLoc); - case DK_IFB: - return ParseDirectiveIfb(IDLoc, true); - case DK_IFNB: - return ParseDirectiveIfb(IDLoc, false); - case DK_IFC: - return ParseDirectiveIfc(IDLoc, true); - case DK_IFNC: - return ParseDirectiveIfc(IDLoc, false); - case DK_IFDEF: - return ParseDirectiveIfdef(IDLoc, true); - case DK_IFNDEF: - case DK_IFNOTDEF: - return ParseDirectiveIfdef(IDLoc, false); - case DK_ELSEIF: - return ParseDirectiveElseIf(IDLoc); - case DK_ELSE: - return ParseDirectiveElse(IDLoc); - case DK_ENDIF: - return ParseDirectiveEndIf(IDLoc); + default: + break; + case DK_IF: + return parseDirectiveIf(IDLoc); + case DK_IFB: + return parseDirectiveIfb(IDLoc, true); + case DK_IFNB: + return parseDirectiveIfb(IDLoc, false); + case DK_IFC: + return parseDirectiveIfc(IDLoc, true); + case DK_IFNC: + return parseDirectiveIfc(IDLoc, false); + case DK_IFDEF: + return parseDirectiveIfdef(IDLoc, true); + case DK_IFNDEF: + case DK_IFNOTDEF: + return parseDirectiveIfdef(IDLoc, false); + case DK_ELSEIF: + return parseDirectiveElseIf(IDLoc); + case DK_ELSE: + return parseDirectiveElse(IDLoc); + case DK_ENDIF: + return parseDirectiveEndIf(IDLoc); } // Ignore the statement if in the middle of inactive conditional @@ -1223,6 +1262,8 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { MCGenDwarfLabelEntry::Make(Sym, &getStreamer(), getSourceManager(), IDLoc); + getTargetParser().onLabelParsed(Sym); + // Consume any end of statement token, if present, to avoid spurious // AddBlankLine calls(). if (Lexer.is(AsmToken::EndOfStatement)) { @@ -1238,24 +1279,24 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // identifier '=' ... -> assignment statement Lex(); - return ParseAssignment(IDVal, true); + return parseAssignment(IDVal, true); default: // Normal instruction or directive. break; } // If macros are enabled, check to see if this is a macro instantiation. - if (MacrosEnabled()) - if (const MCAsmMacro *M = LookupMacro(IDVal)) { - return HandleMacroEntry(M, IDLoc); + if (areMacrosEnabled()) + if (const MCAsmMacro *M = lookupMacro(IDVal)) { + return handleMacroEntry(M, IDLoc); } // Otherwise, we have a normal instruction or directive. - + // Directives start with "." if (IDVal[0] == '.' && IDVal != ".") { // There are several entities interested in parsing directives: - // + // // 1. The target-specific assembly parser. Some directives are target // specific or may potentially behave differently on certain targets. // 2. Asm parser extensions. For example, platform-specific parsers @@ -1272,185 +1313,185 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // Next, check the extention directive map to see if any extension has // registered itself to parse this directive. - std::pair Handler = - ExtensionDirectiveMap.lookup(IDVal); + std::pair Handler = + ExtensionDirectiveMap.lookup(IDVal); if (Handler.first) return (*Handler.second)(Handler.first, IDVal, IDLoc); // Finally, if no one else is interested in this directive, it must be // generic and familiar to this class. switch (DirKind) { - default: - break; - case DK_SET: - case DK_EQU: - return ParseDirectiveSet(IDVal, true); - case DK_EQUIV: - return ParseDirectiveSet(IDVal, false); - case DK_ASCII: - return ParseDirectiveAscii(IDVal, false); - case DK_ASCIZ: - case DK_STRING: - return ParseDirectiveAscii(IDVal, true); - case DK_BYTE: - return ParseDirectiveValue(1); - case DK_SHORT: - case DK_VALUE: - case DK_2BYTE: - return ParseDirectiveValue(2); - case DK_LONG: - case DK_INT: - case DK_4BYTE: - return ParseDirectiveValue(4); - case DK_QUAD: - case DK_8BYTE: - return ParseDirectiveValue(8); - case DK_SINGLE: - case DK_FLOAT: - return ParseDirectiveRealValue(APFloat::IEEEsingle); - case DK_DOUBLE: - return ParseDirectiveRealValue(APFloat::IEEEdouble); - case DK_ALIGN: { - bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); - return ParseDirectiveAlign(IsPow2, /*ExprSize=*/1); - } - case DK_ALIGN32: { - bool IsPow2 = !getContext().getAsmInfo().getAlignmentIsInBytes(); - return ParseDirectiveAlign(IsPow2, /*ExprSize=*/4); - } - case DK_BALIGN: - return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); - case DK_BALIGNW: - return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2); - case DK_BALIGNL: - return ParseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4); - case DK_P2ALIGN: - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); - case DK_P2ALIGNW: - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2); - case DK_P2ALIGNL: - return ParseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); - case DK_ORG: - return ParseDirectiveOrg(); - case DK_FILL: - return ParseDirectiveFill(); - case DK_ZERO: - return ParseDirectiveZero(); - case DK_EXTERN: - eatToEndOfStatement(); // .extern is the default, ignore it. - return false; - case DK_GLOBL: - case DK_GLOBAL: - return ParseDirectiveSymbolAttribute(MCSA_Global); - case DK_INDIRECT_SYMBOL: - return ParseDirectiveSymbolAttribute(MCSA_IndirectSymbol); - case DK_LAZY_REFERENCE: - return ParseDirectiveSymbolAttribute(MCSA_LazyReference); - case DK_NO_DEAD_STRIP: - return ParseDirectiveSymbolAttribute(MCSA_NoDeadStrip); - case DK_SYMBOL_RESOLVER: - return ParseDirectiveSymbolAttribute(MCSA_SymbolResolver); - case DK_PRIVATE_EXTERN: - return ParseDirectiveSymbolAttribute(MCSA_PrivateExtern); - case DK_REFERENCE: - return ParseDirectiveSymbolAttribute(MCSA_Reference); - case DK_WEAK_DEFINITION: - return ParseDirectiveSymbolAttribute(MCSA_WeakDefinition); - case DK_WEAK_REFERENCE: - return ParseDirectiveSymbolAttribute(MCSA_WeakReference); - case DK_WEAK_DEF_CAN_BE_HIDDEN: - return ParseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate); - case DK_COMM: - case DK_COMMON: - return ParseDirectiveComm(/*IsLocal=*/false); - case DK_LCOMM: - return ParseDirectiveComm(/*IsLocal=*/true); - case DK_ABORT: - return ParseDirectiveAbort(); - case DK_INCLUDE: - return ParseDirectiveInclude(); - case DK_INCBIN: - return ParseDirectiveIncbin(); - case DK_CODE16: - case DK_CODE16GCC: - return TokError(Twine(IDVal) + " not supported yet"); - case DK_REPT: - return ParseDirectiveRept(IDLoc); - case DK_IRP: - return ParseDirectiveIrp(IDLoc); - case DK_IRPC: - return ParseDirectiveIrpc(IDLoc); - case DK_ENDR: - return ParseDirectiveEndr(IDLoc); - case DK_BUNDLE_ALIGN_MODE: - return ParseDirectiveBundleAlignMode(); - case DK_BUNDLE_LOCK: - return ParseDirectiveBundleLock(); - case DK_BUNDLE_UNLOCK: - return ParseDirectiveBundleUnlock(); - case DK_SLEB128: - return ParseDirectiveLEB128(true); - case DK_ULEB128: - return ParseDirectiveLEB128(false); - case DK_SPACE: - case DK_SKIP: - return ParseDirectiveSpace(IDVal); - case DK_FILE: - return ParseDirectiveFile(IDLoc); - case DK_LINE: - return ParseDirectiveLine(); - case DK_LOC: - return ParseDirectiveLoc(); - case DK_STABS: - return ParseDirectiveStabs(); - case DK_CFI_SECTIONS: - return ParseDirectiveCFISections(); - case DK_CFI_STARTPROC: - return ParseDirectiveCFIStartProc(); - case DK_CFI_ENDPROC: - return ParseDirectiveCFIEndProc(); - case DK_CFI_DEF_CFA: - return ParseDirectiveCFIDefCfa(IDLoc); - case DK_CFI_DEF_CFA_OFFSET: - return ParseDirectiveCFIDefCfaOffset(); - case DK_CFI_ADJUST_CFA_OFFSET: - return ParseDirectiveCFIAdjustCfaOffset(); - case DK_CFI_DEF_CFA_REGISTER: - return ParseDirectiveCFIDefCfaRegister(IDLoc); - case DK_CFI_OFFSET: - return ParseDirectiveCFIOffset(IDLoc); - case DK_CFI_REL_OFFSET: - return ParseDirectiveCFIRelOffset(IDLoc); - case DK_CFI_PERSONALITY: - return ParseDirectiveCFIPersonalityOrLsda(true); - case DK_CFI_LSDA: - return ParseDirectiveCFIPersonalityOrLsda(false); - case DK_CFI_REMEMBER_STATE: - return ParseDirectiveCFIRememberState(); - case DK_CFI_RESTORE_STATE: - return ParseDirectiveCFIRestoreState(); - case DK_CFI_SAME_VALUE: - return ParseDirectiveCFISameValue(IDLoc); - case DK_CFI_RESTORE: - return ParseDirectiveCFIRestore(IDLoc); - case DK_CFI_ESCAPE: - return ParseDirectiveCFIEscape(); - case DK_CFI_SIGNAL_FRAME: - return ParseDirectiveCFISignalFrame(); - case DK_CFI_UNDEFINED: - return ParseDirectiveCFIUndefined(IDLoc); - case DK_CFI_REGISTER: - return ParseDirectiveCFIRegister(IDLoc); - case DK_MACROS_ON: - case DK_MACROS_OFF: - return ParseDirectiveMacrosOnOff(IDVal); - case DK_MACRO: - return ParseDirectiveMacro(IDLoc); - case DK_ENDM: - case DK_ENDMACRO: - return ParseDirectiveEndMacro(IDVal); - case DK_PURGEM: - return ParseDirectivePurgeMacro(IDLoc); + default: + break; + case DK_SET: + case DK_EQU: + return parseDirectiveSet(IDVal, true); + case DK_EQUIV: + return parseDirectiveSet(IDVal, false); + case DK_ASCII: + return parseDirectiveAscii(IDVal, false); + case DK_ASCIZ: + case DK_STRING: + return parseDirectiveAscii(IDVal, true); + case DK_BYTE: + return parseDirectiveValue(1); + case DK_SHORT: + case DK_VALUE: + case DK_2BYTE: + return parseDirectiveValue(2); + case DK_LONG: + case DK_INT: + case DK_4BYTE: + return parseDirectiveValue(4); + case DK_QUAD: + case DK_8BYTE: + return parseDirectiveValue(8); + case DK_SINGLE: + case DK_FLOAT: + return parseDirectiveRealValue(APFloat::IEEEsingle); + case DK_DOUBLE: + return parseDirectiveRealValue(APFloat::IEEEdouble); + case DK_ALIGN: { + bool IsPow2 = !getContext().getAsmInfo()->getAlignmentIsInBytes(); + return parseDirectiveAlign(IsPow2, /*ExprSize=*/1); + } + case DK_ALIGN32: { + bool IsPow2 = !getContext().getAsmInfo()->getAlignmentIsInBytes(); + return parseDirectiveAlign(IsPow2, /*ExprSize=*/4); + } + case DK_BALIGN: + return parseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/1); + case DK_BALIGNW: + return parseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/2); + case DK_BALIGNL: + return parseDirectiveAlign(/*IsPow2=*/false, /*ExprSize=*/4); + case DK_P2ALIGN: + return parseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/1); + case DK_P2ALIGNW: + return parseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/2); + case DK_P2ALIGNL: + return parseDirectiveAlign(/*IsPow2=*/true, /*ExprSize=*/4); + case DK_ORG: + return parseDirectiveOrg(); + case DK_FILL: + return parseDirectiveFill(); + case DK_ZERO: + return parseDirectiveZero(); + case DK_EXTERN: + eatToEndOfStatement(); // .extern is the default, ignore it. + return false; + case DK_GLOBL: + case DK_GLOBAL: + return parseDirectiveSymbolAttribute(MCSA_Global); + case DK_LAZY_REFERENCE: + return parseDirectiveSymbolAttribute(MCSA_LazyReference); + case DK_NO_DEAD_STRIP: + return parseDirectiveSymbolAttribute(MCSA_NoDeadStrip); + case DK_SYMBOL_RESOLVER: + return parseDirectiveSymbolAttribute(MCSA_SymbolResolver); + case DK_PRIVATE_EXTERN: + return parseDirectiveSymbolAttribute(MCSA_PrivateExtern); + case DK_REFERENCE: + return parseDirectiveSymbolAttribute(MCSA_Reference); + case DK_WEAK_DEFINITION: + return parseDirectiveSymbolAttribute(MCSA_WeakDefinition); + case DK_WEAK_REFERENCE: + return parseDirectiveSymbolAttribute(MCSA_WeakReference); + case DK_WEAK_DEF_CAN_BE_HIDDEN: + return parseDirectiveSymbolAttribute(MCSA_WeakDefAutoPrivate); + case DK_COMM: + case DK_COMMON: + return parseDirectiveComm(/*IsLocal=*/false); + case DK_LCOMM: + return parseDirectiveComm(/*IsLocal=*/true); + case DK_ABORT: + return parseDirectiveAbort(); + case DK_INCLUDE: + return parseDirectiveInclude(); + case DK_INCBIN: + return parseDirectiveIncbin(); + case DK_CODE16: + case DK_CODE16GCC: + return TokError(Twine(IDVal) + " not supported yet"); + case DK_REPT: + return parseDirectiveRept(IDLoc); + case DK_IRP: + return parseDirectiveIrp(IDLoc); + case DK_IRPC: + return parseDirectiveIrpc(IDLoc); + case DK_ENDR: + return parseDirectiveEndr(IDLoc); + case DK_BUNDLE_ALIGN_MODE: + return parseDirectiveBundleAlignMode(); + case DK_BUNDLE_LOCK: + return parseDirectiveBundleLock(); + case DK_BUNDLE_UNLOCK: + return parseDirectiveBundleUnlock(); + case DK_SLEB128: + return parseDirectiveLEB128(true); + case DK_ULEB128: + return parseDirectiveLEB128(false); + case DK_SPACE: + case DK_SKIP: + return parseDirectiveSpace(IDVal); + case DK_FILE: + return parseDirectiveFile(IDLoc); + case DK_LINE: + return parseDirectiveLine(); + case DK_LOC: + return parseDirectiveLoc(); + case DK_STABS: + return parseDirectiveStabs(); + case DK_CFI_SECTIONS: + return parseDirectiveCFISections(); + case DK_CFI_STARTPROC: + return parseDirectiveCFIStartProc(); + case DK_CFI_ENDPROC: + return parseDirectiveCFIEndProc(); + case DK_CFI_DEF_CFA: + return parseDirectiveCFIDefCfa(IDLoc); + case DK_CFI_DEF_CFA_OFFSET: + return parseDirectiveCFIDefCfaOffset(); + case DK_CFI_ADJUST_CFA_OFFSET: + return parseDirectiveCFIAdjustCfaOffset(); + case DK_CFI_DEF_CFA_REGISTER: + return parseDirectiveCFIDefCfaRegister(IDLoc); + case DK_CFI_OFFSET: + return parseDirectiveCFIOffset(IDLoc); + case DK_CFI_REL_OFFSET: + return parseDirectiveCFIRelOffset(IDLoc); + case DK_CFI_PERSONALITY: + return parseDirectiveCFIPersonalityOrLsda(true); + case DK_CFI_LSDA: + return parseDirectiveCFIPersonalityOrLsda(false); + case DK_CFI_REMEMBER_STATE: + return parseDirectiveCFIRememberState(); + case DK_CFI_RESTORE_STATE: + return parseDirectiveCFIRestoreState(); + case DK_CFI_SAME_VALUE: + return parseDirectiveCFISameValue(IDLoc); + case DK_CFI_RESTORE: + return parseDirectiveCFIRestore(IDLoc); + case DK_CFI_ESCAPE: + return parseDirectiveCFIEscape(); + case DK_CFI_SIGNAL_FRAME: + return parseDirectiveCFISignalFrame(); + case DK_CFI_UNDEFINED: + return parseDirectiveCFIUndefined(IDLoc); + case DK_CFI_REGISTER: + return parseDirectiveCFIRegister(IDLoc); + case DK_CFI_WINDOW_SAVE: + return parseDirectiveCFIWindowSave(); + case DK_MACROS_ON: + case DK_MACROS_OFF: + return parseDirectiveMacrosOnOff(IDVal); + case DK_MACRO: + return parseDirectiveMacro(IDLoc); + case DK_ENDM: + case DK_ENDMACRO: + return parseDirectiveEndMacro(IDVal); + case DK_PURGEM: + return parseDirectivePurgeMacro(IDLoc); } return Error(IDLoc, "unknown directive"); @@ -1459,19 +1500,19 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // __asm _emit or __asm __emit if (ParsingInlineAsm && (IDVal == "_emit" || IDVal == "__emit" || IDVal == "_EMIT" || IDVal == "__EMIT")) - return ParseDirectiveMSEmit(IDLoc, Info, IDVal.size()); + return parseDirectiveMSEmit(IDLoc, Info, IDVal.size()); // __asm align if (ParsingInlineAsm && (IDVal == "align" || IDVal == "ALIGN")) - return ParseDirectiveMSAlign(IDLoc, Info); + return parseDirectiveMSAlign(IDLoc, Info); checkForValidSection(); // Canonicalize the opcode to lower case. std::string OpcodeStr = IDVal.lower(); ParseInstructionInfo IInfo(Info.AsmRewrites); - bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, - IDLoc, Info.ParsedOperands); + bool HadError = getTargetParser().ParseInstruction(IInfo, OpcodeStr, IDLoc, + Info.ParsedOperands); Info.ParseError = HadError; // Dump the parsed representation, if requested. @@ -1486,7 +1527,7 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { } OS << "]"; - PrintMessage(IDLoc, SourceMgr::DK_Note, OS.str()); + printMessage(IDLoc, SourceMgr::DK_Note, OS.str()); } // If we are generating dwarf for assembly source files and the current @@ -1494,38 +1535,49 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { // the instruction. if (!HadError && getContext().getGenDwarfForAssembly() && getContext().getGenDwarfSection() == - getStreamer().getCurrentSection().first) { + getStreamer().getCurrentSection().first) { unsigned Line = SrcMgr.FindLineNumber(IDLoc, CurBuffer); // If we previously parsed a cpp hash file line comment then make sure the // current Dwarf File is for the CppHashFilename if not then emit the // Dwarf File table for it and adjust the line number for the .loc. - const SmallVectorImpl &MCDwarfFiles = - getContext().getMCDwarfFiles(); + const SmallVectorImpl &MCDwarfFiles = + getContext().getMCDwarfFiles(); if (CppHashFilename.size() != 0) { if (MCDwarfFiles[getContext().getGenDwarfFileNumber()]->getName() != CppHashFilename) getStreamer().EmitDwarfFileDirective( - getContext().nextGenDwarfFileNumber(), StringRef(), CppHashFilename); - - unsigned CppHashLocLineNo = SrcMgr.FindLineNumber(CppHashLoc,CppHashBuf); - Line = CppHashLineNumber - 1 + (Line - CppHashLocLineNo); + getContext().nextGenDwarfFileNumber(), StringRef(), + CppHashFilename); + + // Since SrcMgr.FindLineNumber() is slow and messes up the SourceMgr's + // cache with the different Loc from the call above we save the last + // info we queried here with SrcMgr.FindLineNumber(). + unsigned CppHashLocLineNo; + if (LastQueryIDLoc == CppHashLoc && LastQueryBuffer == CppHashBuf) + CppHashLocLineNo = LastQueryLine; + else { + CppHashLocLineNo = SrcMgr.FindLineNumber(CppHashLoc, CppHashBuf); + LastQueryLine = CppHashLocLineNo; + LastQueryIDLoc = CppHashLoc; + LastQueryBuffer = CppHashBuf; + } + Line = CppHashLineNumber - 1 + (Line - CppHashLocLineNo); } - getStreamer().EmitDwarfLocDirective(getContext().getGenDwarfFileNumber(), - Line, 0, DWARF2_LINE_DEFAULT_IS_STMT ? - DWARF2_FLAG_IS_STMT : 0, 0, 0, - StringRef()); + getStreamer().EmitDwarfLocDirective( + getContext().getGenDwarfFileNumber(), Line, 0, + DWARF2_LINE_DEFAULT_IS_STMT ? DWARF2_FLAG_IS_STMT : 0, 0, 0, + StringRef()); } // If parsing succeeded, match the instruction. if (!HadError) { unsigned ErrorInfo; - HadError = getTargetParser().MatchAndEmitInstruction(IDLoc, Info.Opcode, - Info.ParsedOperands, - Out, ErrorInfo, - ParsingInlineAsm); + HadError = getTargetParser().MatchAndEmitInstruction( + IDLoc, Info.Opcode, Info.ParsedOperands, Out, ErrorInfo, + ParsingInlineAsm); } // Don't skip the rest of the line, the instruction parser is responsible for @@ -1533,25 +1585,25 @@ bool AsmParser::ParseStatement(ParseStatementInfo &Info) { return false; } -/// EatToEndOfLine uses the Lexer to eat the characters to the end of the line +/// eatToEndOfLine uses the Lexer to eat the characters to the end of the line /// since they may not be able to be tokenized to get to the end of line token. -void AsmParser::EatToEndOfLine() { +void AsmParser::eatToEndOfLine() { if (!Lexer.is(AsmToken::EndOfStatement)) Lexer.LexUntilEndOfLine(); - // Eat EOL. - Lex(); + // Eat EOL. + Lex(); } -/// ParseCppHashLineFilenameComment as this: +/// parseCppHashLineFilenameComment as this: /// ::= # number "filename" /// or just as a full line comment if it doesn't have a number and a string. -bool AsmParser::ParseCppHashLineFilenameComment(const SMLoc &L) { +bool AsmParser::parseCppHashLineFilenameComment(const SMLoc &L) { Lex(); // Eat the hash token. if (getLexer().isNot(AsmToken::Integer)) { // Consume the line since in cases it is not a well-formed line directive, // as if were simply a full line comment. - EatToEndOfLine(); + eatToEndOfLine(); return false; } @@ -1559,13 +1611,13 @@ bool AsmParser::ParseCppHashLineFilenameComment(const SMLoc &L) { Lex(); if (getLexer().isNot(AsmToken::String)) { - EatToEndOfLine(); + eatToEndOfLine(); return false; } StringRef Filename = getTok().getString(); // Get rid of the enclosing quotes. - Filename = Filename.substr(1, Filename.size()-2); + Filename = Filename.substr(1, Filename.size() - 2); // Save the SMLoc, Filename and LineNumber for later use by diagnostics. CppHashLoc = L; @@ -1574,14 +1626,14 @@ bool AsmParser::ParseCppHashLineFilenameComment(const SMLoc &L) { CppHashBuf = CurBuffer; // Ignore any trailing characters, they're just comment. - EatToEndOfLine(); + eatToEndOfLine(); return false; } -/// DiagHandler - will use the last parsed cpp hash line filename comment +/// \brief will use the last parsed cpp hash line filename comment /// for the Filename and LineNo if any in the diagnostic. void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { - const AsmParser *Parser = static_cast(Context); + const AsmParser *Parser = static_cast(Context); raw_ostream &OS = errs(); const SourceMgr &DiagSrcMgr = *Diag.getSourceMgr(); @@ -1589,19 +1641,18 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { int DiagBuf = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); int CppHashBuf = Parser->SrcMgr.FindBufferContainingLoc(Parser->CppHashLoc); - // Like SourceMgr::PrintMessage() we need to print the include stack if any + // Like SourceMgr::printMessage() we need to print the include stack if any // before printing the message. int DiagCurBuffer = DiagSrcMgr.FindBufferContainingLoc(DiagLoc); if (!Parser->SavedDiagHandler && DiagCurBuffer > 0) { - SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer); - DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS); + SMLoc ParentIncludeLoc = DiagSrcMgr.getParentIncludeLoc(DiagCurBuffer); + DiagSrcMgr.PrintIncludeStack(ParentIncludeLoc, OS); } // If we have not parsed a cpp hash line filename comment or the source // manager changed or buffer changed (like in a nested include) then just // print the normal diagnostic using its Filename and LineNo. - if (!Parser->CppHashLineNumber || - &DiagSrcMgr != &Parser->SrcMgr || + if (!Parser->CppHashLineNumber || &DiagSrcMgr != &Parser->SrcMgr || DiagBuf != CppHashBuf) { if (Parser->SavedDiagHandler) Parser->SavedDiagHandler(Diag, Parser->SavedDiagContext); @@ -1613,17 +1664,16 @@ void AsmParser::DiagHandler(const SMDiagnostic &Diag, void *Context) { // Use the CppHashFilename and calculate a line number based on the // CppHashLoc and CppHashLineNumber relative to this Diag's SMLoc for // the diagnostic. - const std::string Filename = Parser->CppHashFilename; + const std::string &Filename = Parser->CppHashFilename; int DiagLocLineNo = DiagSrcMgr.FindLineNumber(DiagLoc, DiagBuf); int CppHashLocLineNo = Parser->SrcMgr.FindLineNumber(Parser->CppHashLoc, CppHashBuf); - int LineNo = Parser->CppHashLineNumber - 1 + - (DiagLocLineNo - CppHashLocLineNo); + int LineNo = + Parser->CppHashLineNumber - 1 + (DiagLocLineNo - CppHashLocLineNo); - SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), - Filename, LineNo, Diag.getColumnNo(), - Diag.getKind(), Diag.getMessage(), + SMDiagnostic NewDiag(*Diag.getSourceMgr(), Diag.getLoc(), Filename, LineNo, + Diag.getColumnNo(), Diag.getKind(), Diag.getMessage(), Diag.getLineContents(), Diag.getRanges()); if (Parser->SavedDiagHandler) @@ -1643,8 +1693,7 @@ static bool isIdentifierChar(char c) { bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, const MCAsmMacroParameters &Parameters, - const MCAsmMacroArguments &A, - const SMLoc &L) { + const MCAsmMacroArguments &A, const SMLoc &L) { unsigned NParameters = Parameters.size(); if (NParameters != 0 && NParameters != A.size()) return Error(L, "Wrong number of arguments"); @@ -1680,27 +1729,28 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, break; if (!NParameters) { - switch (Body[Pos+1]) { - // $$ => $ + switch (Body[Pos + 1]) { + // $$ => $ case '$': OS << '$'; break; - // $n => number of arguments + // $n => number of arguments case 'n': OS << A.size(); break; - // $[0-9] => argument + // $[0-9] => argument default: { // Missing arguments are ignored. - unsigned Index = Body[Pos+1] - '0'; + unsigned Index = Body[Pos + 1] - '0'; if (Index >= A.size()) break; // Otherwise substitute with the token values, with spaces eliminated. for (MCAsmMacroArgument::const_iterator it = A[Index].begin(), - ie = A[Index].end(); it != ie; ++it) + ie = A[Index].end(); + it != ie; ++it) OS << it->getString(); break; } @@ -1711,23 +1761,24 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, while (isIdentifierChar(Body[I]) && I + 1 != End) ++I; - const char *Begin = Body.data() + Pos +1; - StringRef Argument(Begin, I - (Pos +1)); + const char *Begin = Body.data() + Pos + 1; + StringRef Argument(Begin, I - (Pos + 1)); unsigned Index = 0; for (; Index < NParameters; ++Index) if (Parameters[Index].first == Argument) break; if (Index == NParameters) { - if (Body[Pos+1] == '(' && Body[Pos+2] == ')') - Pos += 3; - else { - OS << '\\' << Argument; - Pos = I; - } + if (Body[Pos + 1] == '(' && Body[Pos + 2] == ')') + Pos += 3; + else { + OS << '\\' << Argument; + Pos = I; + } } else { for (MCAsmMacroArgument::const_iterator it = A[Index].begin(), - ie = A[Index].end(); it != ie; ++it) + ie = A[Index].end(); + it != ie; ++it) if (it->getKind() == AsmToken::String) OS << it->getStringContents(); else @@ -1743,48 +1794,43 @@ bool AsmParser::expandMacro(raw_svector_ostream &OS, StringRef Body, return false; } -MacroInstantiation::MacroInstantiation(const MCAsmMacro *M, SMLoc IL, - int EB, SMLoc EL, - MemoryBuffer *I) - : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitBuffer(EB), - ExitLoc(EL) -{ -} +MacroInstantiation::MacroInstantiation(const MCAsmMacro *M, SMLoc IL, int EB, + SMLoc EL, MemoryBuffer *I) + : TheMacro(M), Instantiation(I), InstantiationLoc(IL), ExitBuffer(EB), + ExitLoc(EL) {} -static bool IsOperator(AsmToken::TokenKind kind) -{ - switch (kind) - { - default: - return false; - case AsmToken::Plus: - case AsmToken::Minus: - case AsmToken::Tilde: - case AsmToken::Slash: - case AsmToken::Star: - case AsmToken::Dot: - case AsmToken::Equal: - case AsmToken::EqualEqual: - case AsmToken::Pipe: - case AsmToken::PipePipe: - case AsmToken::Caret: - case AsmToken::Amp: - case AsmToken::AmpAmp: - case AsmToken::Exclaim: - case AsmToken::ExclaimEqual: - case AsmToken::Percent: - case AsmToken::Less: - case AsmToken::LessEqual: - case AsmToken::LessLess: - case AsmToken::LessGreater: - case AsmToken::Greater: - case AsmToken::GreaterEqual: - case AsmToken::GreaterGreater: - return true; +static bool isOperator(AsmToken::TokenKind kind) { + switch (kind) { + default: + return false; + case AsmToken::Plus: + case AsmToken::Minus: + case AsmToken::Tilde: + case AsmToken::Slash: + case AsmToken::Star: + case AsmToken::Dot: + case AsmToken::Equal: + case AsmToken::EqualEqual: + case AsmToken::Pipe: + case AsmToken::PipePipe: + case AsmToken::Caret: + case AsmToken::Amp: + case AsmToken::AmpAmp: + case AsmToken::Exclaim: + case AsmToken::ExclaimEqual: + case AsmToken::Percent: + case AsmToken::Less: + case AsmToken::LessEqual: + case AsmToken::LessLess: + case AsmToken::LessGreater: + case AsmToken::Greater: + case AsmToken::GreaterEqual: + case AsmToken::GreaterGreater: + return true; } } -bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA, +bool AsmParser::parseMacroArgument(MCAsmMacroArgument &MA, AsmToken::TokenKind &ArgumentDelimiter) { unsigned ParenLevel = 0; unsigned AddTokens = 0; @@ -1818,7 +1864,7 @@ bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA, // one into this argument if (ArgumentDelimiter == AsmToken::Space || ArgumentDelimiter == AsmToken::Eof) { - if (IsOperator(Lexer.getKind())) { + if (isOperator(Lexer.getKind())) { // Check to see whether the token is used as an operator, // or part of an identifier const char *NextChar = getTok().getEndLoc().getPointer(); @@ -1828,14 +1874,14 @@ bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA, if (!AddTokens && ParenLevel == 0) { if (ArgumentDelimiter == AsmToken::Eof && - !IsOperator(Lexer.getKind())) + !isOperator(Lexer.getKind())) ArgumentDelimiter = AsmToken::Space; break; } } } - // HandleMacroEntry relies on not advancing the lexer here + // handleMacroEntry relies on not advancing the lexer here // to be able to fill in the remaining default parameter values if (Lexer.is(AsmToken::EndOfStatement)) break; @@ -1860,10 +1906,11 @@ bool AsmParser::ParseMacroArgument(MCAsmMacroArgument &MA, } // Parse the macro instantiation arguments. -bool AsmParser::ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A) { +bool AsmParser::parseMacroArguments(const MCAsmMacro *M, + MCAsmMacroArguments &A) { const unsigned NParameters = M ? M->Parameters.size() : 0; // Argument delimiter is initially unknown. It will be set by - // ParseMacroArgument() + // parseMacroArgument() AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof; // Parse two kinds of macro invocations: @@ -1873,7 +1920,7 @@ bool AsmParser::ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A) ++Parameter) { MCAsmMacroArgument MA; - if (ParseMacroArgument(MA, ArgumentDelimiter)) + if (parseMacroArgument(MA, ArgumentDelimiter)) return true; if (!MA.empty() || !NParameters) @@ -1904,31 +1951,31 @@ bool AsmParser::ParseMacroArguments(const MCAsmMacro *M, MCAsmMacroArguments &A) return TokError("Too many arguments"); } -const MCAsmMacro* AsmParser::LookupMacro(StringRef Name) { - StringMap::iterator I = MacroMap.find(Name); +const MCAsmMacro *AsmParser::lookupMacro(StringRef Name) { + StringMap::iterator I = MacroMap.find(Name); return (I == MacroMap.end()) ? NULL : I->getValue(); } -void AsmParser::DefineMacro(StringRef Name, const MCAsmMacro& Macro) { +void AsmParser::defineMacro(StringRef Name, const MCAsmMacro &Macro) { MacroMap[Name] = new MCAsmMacro(Macro); } -void AsmParser::UndefineMacro(StringRef Name) { - StringMap::iterator I = MacroMap.find(Name); +void AsmParser::undefineMacro(StringRef Name) { + StringMap::iterator I = MacroMap.find(Name); if (I != MacroMap.end()) { delete I->getValue(); MacroMap.erase(I); } } -bool AsmParser::HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) { +bool AsmParser::handleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) { // Arbitrarily limit macro nesting depth, to match 'as'. We can eliminate // this, although we should protect against infinite loops. if (ActiveMacros.size() == 20) return TokError("macros cannot be nested more than 20 levels deep"); MCAsmMacroArguments A; - if (ParseMacroArguments(M, A)) + if (parseMacroArguments(M, A)) return true; // Remove any trailing empty arguments. Do this after-the-fact as we have @@ -1951,14 +1998,12 @@ bool AsmParser::HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) { OS << ".endmacro\n"; MemoryBuffer *Instantiation = - MemoryBuffer::getMemBufferCopy(OS.str(), ""); + MemoryBuffer::getMemBufferCopy(OS.str(), ""); // Create the macro instantiation object and add to the current macro // instantiation stack. - MacroInstantiation *MI = new MacroInstantiation(M, NameLoc, - CurBuffer, - getTok().getLoc(), - Instantiation); + MacroInstantiation *MI = new MacroInstantiation( + M, NameLoc, CurBuffer, getTok().getLoc(), Instantiation); ActiveMacros.push_back(MI); // Jump to the macro instantiation and prime the lexer. @@ -1969,9 +2014,9 @@ bool AsmParser::HandleMacroEntry(const MCAsmMacro *M, SMLoc NameLoc) { return false; } -void AsmParser::HandleMacroExit() { +void AsmParser::handleMacroExit() { // Jump to the EndOfStatement we should return to, and consume it. - JumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer); + jumpToLoc(ActiveMacros.back()->ExitLoc, ActiveMacros.back()->ExitBuffer); Lex(); // Pop the instantiation entry. @@ -1979,29 +2024,30 @@ void AsmParser::HandleMacroExit() { ActiveMacros.pop_back(); } -static bool IsUsedIn(const MCSymbol *Sym, const MCExpr *Value) { +static bool isUsedIn(const MCSymbol *Sym, const MCExpr *Value) { switch (Value->getKind()) { case MCExpr::Binary: { - const MCBinaryExpr *BE = static_cast(Value); - return IsUsedIn(Sym, BE->getLHS()) || IsUsedIn(Sym, BE->getRHS()); + const MCBinaryExpr *BE = static_cast(Value); + return isUsedIn(Sym, BE->getLHS()) || isUsedIn(Sym, BE->getRHS()); } case MCExpr::Target: case MCExpr::Constant: return false; case MCExpr::SymbolRef: { - const MCSymbol &S = static_cast(Value)->getSymbol(); + const MCSymbol &S = + static_cast(Value)->getSymbol(); if (S.isVariable()) - return IsUsedIn(Sym, S.getVariableValue()); + return isUsedIn(Sym, S.getVariableValue()); return &S == Sym; } case MCExpr::Unary: - return IsUsedIn(Sym, static_cast(Value)->getSubExpr()); + return isUsedIn(Sym, static_cast(Value)->getSubExpr()); } llvm_unreachable("Unknown expr kind!"); } -bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef, +bool AsmParser::parseAssignment(StringRef Name, bool allow_redef, bool NoDeadStrip) { // FIXME: Use better location, we should use proper tokens. SMLoc EqualLoc = Lexer.getLoc(); @@ -2034,7 +2080,7 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef, // // FIXME: Diagnostics. Note the location of the definition as a label. // FIXME: Diagnose assignment to protected identifier (e.g., register name). - if (IsUsedIn(Sym, Value)) + if (isUsedIn(Sym, Value)) return Error(EqualLoc, "Recursive use of '" + Name + "'"); else if (Sym->isUndefined() && !Sym->isUsed() && !Sym->isVariable()) ; // Allow redefinitions of undefined symbols only used in directives. @@ -2046,7 +2092,7 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef, return Error(EqualLoc, "invalid assignment to '" + Name + "'"); else if (!isa(Sym->getVariableValue())) return Error(EqualLoc, "invalid reassignment of non-absolute variable '" + - Name + "'"); + Name + "'"); // Don't count these checks as uses. Sym->setUsed(false); @@ -2060,7 +2106,6 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef, if (NoDeadStrip) Out.EmitSymbolAttribute(Sym, MCSA_NoDeadStrip); - return false; } @@ -2069,31 +2114,30 @@ bool AsmParser::ParseAssignment(StringRef Name, bool allow_redef, /// ::= string bool AsmParser::parseIdentifier(StringRef &Res) { // The assembler has relaxed rules for accepting identifiers, in particular we - // allow things like '.globl $foo', which would normally be separate - // tokens. At this level, we have already lexed so we cannot (currently) + // allow things like '.globl $foo' and '.def @feat.00', which would normally be + // separate tokens. At this level, we have already lexed so we cannot (currently) // handle this as a context dependent token, instead we detect adjacent tokens // and return the combined identifier. - if (Lexer.is(AsmToken::Dollar)) { - SMLoc DollarLoc = getLexer().getLoc(); + if (Lexer.is(AsmToken::Dollar) || Lexer.is(AsmToken::At)) { + SMLoc PrefixLoc = getLexer().getLoc(); - // Consume the dollar sign, and check for a following identifier. + // Consume the prefix character, and check for a following identifier. Lex(); if (Lexer.isNot(AsmToken::Identifier)) return true; - // We have a '$' followed by an identifier, make sure they are adjacent. - if (DollarLoc.getPointer() + 1 != getTok().getLoc().getPointer()) + // We have a '$' or '@' followed by an identifier, make sure they are adjacent. + if (PrefixLoc.getPointer() + 1 != getTok().getLoc().getPointer()) return true; // Construct the joined identifier and consume the token. - Res = StringRef(DollarLoc.getPointer(), - getTok().getIdentifier().size() + 1); + Res = + StringRef(PrefixLoc.getPointer(), getTok().getIdentifier().size() + 1); Lex(); return false; } - if (Lexer.isNot(AsmToken::Identifier) && - Lexer.isNot(AsmToken::String)) + if (Lexer.isNot(AsmToken::Identifier) && Lexer.isNot(AsmToken::String)) return true; Res = getTok().getIdentifier(); @@ -2103,11 +2147,11 @@ bool AsmParser::parseIdentifier(StringRef &Res) { return false; } -/// ParseDirectiveSet: +/// parseDirectiveSet: /// ::= .equ identifier ',' expression /// ::= .equiv identifier ',' expression /// ::= .set identifier ',' expression -bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) { +bool AsmParser::parseDirectiveSet(StringRef IDVal, bool allow_redef) { StringRef Name; if (parseIdentifier(Name)) @@ -2117,7 +2161,7 @@ bool AsmParser::ParseDirectiveSet(StringRef IDVal, bool allow_redef) { return TokError("unexpected token in '" + Twine(IDVal) + "'"); Lex(); - return ParseAssignment(Name, allow_redef, true); + return parseAssignment(Name, allow_redef, true); } bool AsmParser::parseEscapedString(std::string &Data) { @@ -2138,15 +2182,15 @@ bool AsmParser::parseEscapedString(std::string &Data) { return TokError("unexpected backslash at end of string"); // Recognize octal sequences. - if ((unsigned) (Str[i] - '0') <= 7) { + if ((unsigned)(Str[i] - '0') <= 7) { // Consume up to three octal characters. unsigned Value = Str[i] - '0'; - if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { + if (i + 1 != e && ((unsigned)(Str[i + 1] - '0')) <= 7) { ++i; Value = Value * 8 + (Str[i] - '0'); - if (i + 1 != e && ((unsigned) (Str[i + 1] - '0')) <= 7) { + if (i + 1 != e && ((unsigned)(Str[i + 1] - '0')) <= 7) { ++i; Value = Value * 8 + (Str[i] - '0'); } @@ -2155,7 +2199,7 @@ bool AsmParser::parseEscapedString(std::string &Data) { if (Value > 255) return TokError("invalid octal escape sequence (out of range)"); - Data += (unsigned char) Value; + Data += (unsigned char)Value; continue; } @@ -2178,9 +2222,9 @@ bool AsmParser::parseEscapedString(std::string &Data) { return false; } -/// ParseDirectiveAscii: +/// parseDirectiveAscii: /// ::= ( .ascii | .asciz | .string ) [ "string" ( , "string" )* ] -bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) { +bool AsmParser::parseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) { if (getLexer().isNot(AsmToken::EndOfStatement)) { checkForValidSection(); @@ -2192,9 +2236,9 @@ bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) { if (parseEscapedString(Data)) return true; - getStreamer().EmitBytes(Data, DEFAULT_ADDRSPACE); + getStreamer().EmitBytes(Data); if (ZeroTerminated) - getStreamer().EmitBytes(StringRef("\0", 1), DEFAULT_ADDRSPACE); + getStreamer().EmitBytes(StringRef("\0", 1)); Lex(); @@ -2211,9 +2255,9 @@ bool AsmParser::ParseDirectiveAscii(StringRef IDVal, bool ZeroTerminated) { return false; } -/// ParseDirectiveValue +/// parseDirectiveValue /// ::= (.byte | .short | ... ) [ expression (, expression)* ] -bool AsmParser::ParseDirectiveValue(unsigned Size) { +bool AsmParser::parseDirectiveValue(unsigned Size) { if (getLexer().isNot(AsmToken::EndOfStatement)) { checkForValidSection(); @@ -2229,9 +2273,9 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) { uint64_t IntValue = MCE->getValue(); if (!isUIntN(8 * Size, IntValue) && !isIntN(8 * Size, IntValue)) return Error(ExprLoc, "literal value out of range for directive"); - getStreamer().EmitIntValue(IntValue, Size, DEFAULT_ADDRSPACE); + getStreamer().EmitIntValue(IntValue, Size); } else - getStreamer().EmitValue(Value, Size, DEFAULT_ADDRSPACE); + getStreamer().EmitValue(Value, Size); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -2247,9 +2291,9 @@ bool AsmParser::ParseDirectiveValue(unsigned Size) { return false; } -/// ParseDirectiveRealValue +/// parseDirectiveRealValue /// ::= (.single | .double) [ expression (, expression)* ] -bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) { +bool AsmParser::parseDirectiveRealValue(const fltSemantics &Semantics) { if (getLexer().isNot(AsmToken::EndOfStatement)) { checkForValidSection(); @@ -2279,7 +2323,7 @@ bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) { else return TokError("invalid floating point literal"); } else if (Value.convertFromString(IDVal, APFloat::rmNearestTiesToEven) == - APFloat::opInvalidOp) + APFloat::opInvalidOp) return TokError("invalid floating point literal"); if (IsNeg) Value.changeSign(); @@ -2290,7 +2334,7 @@ bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) { // Emit the value as an integer. APInt AsInt = Value.bitcastToAPInt(); getStreamer().EmitIntValue(AsInt.getLimitedValue(), - AsInt.getBitWidth() / 8, DEFAULT_ADDRSPACE); + AsInt.getBitWidth() / 8); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -2305,9 +2349,9 @@ bool AsmParser::ParseDirectiveRealValue(const fltSemantics &Semantics) { return false; } -/// ParseDirectiveZero +/// parseDirectiveZero /// ::= .zero expression -bool AsmParser::ParseDirectiveZero() { +bool AsmParser::parseDirectiveZero() { checkForValidSection(); int64_t NumBytes; @@ -2326,53 +2370,58 @@ bool AsmParser::ParseDirectiveZero() { Lex(); - getStreamer().EmitFill(NumBytes, Val, DEFAULT_ADDRSPACE); + getStreamer().EmitFill(NumBytes, Val); return false; } -/// ParseDirectiveFill -/// ::= .fill expression , expression , expression -bool AsmParser::ParseDirectiveFill() { +/// parseDirectiveFill +/// ::= .fill expression [ , expression [ , expression ] ] +bool AsmParser::parseDirectiveFill() { checkForValidSection(); int64_t NumValues; if (parseAbsoluteExpression(NumValues)) return true; - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in '.fill' directive"); - Lex(); + int64_t FillSize = 1; + int64_t FillExpr = 0; - int64_t FillSize; - if (parseAbsoluteExpression(FillSize)) - return true; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.fill' directive"); + Lex(); - if (getLexer().isNot(AsmToken::Comma)) - return TokError("unexpected token in '.fill' directive"); - Lex(); + if (parseAbsoluteExpression(FillSize)) + return true; - int64_t FillExpr; - if (parseAbsoluteExpression(FillExpr)) - return true; + if (getLexer().isNot(AsmToken::EndOfStatement)) { + if (getLexer().isNot(AsmToken::Comma)) + return TokError("unexpected token in '.fill' directive"); + Lex(); - if (getLexer().isNot(AsmToken::EndOfStatement)) - return TokError("unexpected token in '.fill' directive"); + if (parseAbsoluteExpression(FillExpr)) + return true; - Lex(); + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.fill' directive"); + + Lex(); + } + } if (FillSize != 1 && FillSize != 2 && FillSize != 4 && FillSize != 8) return TokError("invalid '.fill' size, expected 1, 2, 4, or 8"); for (uint64_t i = 0, e = NumValues; i != e; ++i) - getStreamer().EmitIntValue(FillExpr, FillSize, DEFAULT_ADDRSPACE); + getStreamer().EmitIntValue(FillExpr, FillSize); return false; } -/// ParseDirectiveOrg +/// parseDirectiveOrg /// ::= .org expression [ , expression ] -bool AsmParser::ParseDirectiveOrg() { +bool AsmParser::parseDirectiveOrg() { checkForValidSection(); const MCExpr *Offset; @@ -2405,9 +2454,9 @@ bool AsmParser::ParseDirectiveOrg() { return false; } -/// ParseDirectiveAlign +/// parseDirectiveAlign /// ::= {.align, ...} expression [ , expression [ , expression ]] -bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { +bool AsmParser::parseDirectiveAlign(bool IsPow2, unsigned ValueSize) { checkForValidSection(); SMLoc AlignmentLoc = getLexer().getLoc(); @@ -2471,13 +2520,13 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { if (MaxBytesLoc.isValid()) { if (MaxBytesToFill < 1) { Error(MaxBytesLoc, "alignment directive can never be satisfied in this " - "many bytes, ignoring maximum bytes expression"); + "many bytes, ignoring maximum bytes expression"); MaxBytesToFill = 0; } if (MaxBytesToFill >= Alignment) { Warning(MaxBytesLoc, "maximum bytes expression exceeds alignment and " - "has no effect"); + "has no effect"); MaxBytesToFill = 0; } } @@ -2497,10 +2546,10 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) { return false; } -/// ParseDirectiveFile +/// parseDirectiveFile /// ::= .file [number] filename /// ::= .file number directory filename -bool AsmParser::ParseDirectiveFile(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) { // FIXME: I'm not sure what this is. int64_t FileNumber = -1; SMLoc FileNumberLoc = getLexer().getLoc(); @@ -2516,17 +2565,21 @@ bool AsmParser::ParseDirectiveFile(SMLoc DirectiveLoc) { return TokError("unexpected token in '.file' directive"); // Usually the directory and filename together, otherwise just the directory. - StringRef Path = getTok().getString(); - Path = Path.substr(1, Path.size()-2); + // Allow the strings to have escaped octal character sequence. + std::string Path = getTok().getString(); + if (parseEscapedString(Path)) + return true; Lex(); StringRef Directory; StringRef Filename; + std::string FilenameData; if (getLexer().is(AsmToken::String)) { if (FileNumber == -1) return TokError("explicit path specified, but no file number"); - Filename = getTok().getString(); - Filename = Filename.substr(1, Filename.size()-2); + if (parseEscapedString(FilenameData)) + return true; + Filename = FilenameData; Directory = Path; Lex(); } else { @@ -2540,8 +2593,9 @@ bool AsmParser::ParseDirectiveFile(SMLoc DirectiveLoc) { getStreamer().EmitFileDirective(Filename); else { if (getContext().getGenDwarfForAssembly() == true) - Error(DirectiveLoc, "input can't have .file dwarf directives when -g is " - "used to generate dwarf debug info for assembly code"); + Error(DirectiveLoc, + "input can't have .file dwarf directives when -g is " + "used to generate dwarf debug info for assembly code"); if (getStreamer().EmitDwarfFileDirective(FileNumber, Directory, Filename)) Error(FileNumberLoc, "file number already allocated"); @@ -2550,15 +2604,15 @@ bool AsmParser::ParseDirectiveFile(SMLoc DirectiveLoc) { return false; } -/// ParseDirectiveLine +/// parseDirectiveLine /// ::= .line [number] -bool AsmParser::ParseDirectiveLine() { +bool AsmParser::parseDirectiveLine() { if (getLexer().isNot(AsmToken::EndOfStatement)) { if (getLexer().isNot(AsmToken::Integer)) return TokError("unexpected token in '.line' directive"); int64_t LineNumber = getTok().getIntVal(); - (void) LineNumber; + (void)LineNumber; Lex(); // FIXME: Do something with the .line. @@ -2570,14 +2624,14 @@ bool AsmParser::ParseDirectiveLine() { return false; } -/// ParseDirectiveLoc +/// parseDirectiveLoc /// ::= .loc FileNumber [LineNumber] [ColumnPos] [basic_block] [prologue_end] /// [epilogue_begin] [is_stmt VALUE] [isa VALUE] /// The first number is a file number, must have been previously assigned with /// a .file directive, the second number is the line number and optionally the /// third number is a column position (zero if not specified). The remaining /// optional items are .loc sub-directives. -bool AsmParser::ParseDirectiveLoc() { +bool AsmParser::parseDirectiveLoc() { if (getLexer().isNot(AsmToken::Integer)) return TokError("unexpected token in '.loc' directive"); int64_t FileNumber = getTok().getIntVal(); @@ -2590,8 +2644,8 @@ bool AsmParser::ParseDirectiveLoc() { int64_t LineNumber = 0; if (getLexer().is(AsmToken::Integer)) { LineNumber = getTok().getIntVal(); - if (LineNumber < 1) - return TokError("line number less than one in '.loc' directive"); + if (LineNumber < 0) + return TokError("line number less than zero in '.loc' directive"); Lex(); } @@ -2671,15 +2725,15 @@ bool AsmParser::ParseDirectiveLoc() { return false; } -/// ParseDirectiveStabs +/// parseDirectiveStabs /// ::= .stabs string, number, number, number -bool AsmParser::ParseDirectiveStabs() { +bool AsmParser::parseDirectiveStabs() { return TokError("unsupported directive '.stabs'"); } -/// ParseDirectiveCFISections +/// parseDirectiveCFISections /// ::= .cfi_sections section [, section] -bool AsmParser::ParseDirectiveCFISections() { +bool AsmParser::parseDirectiveCFISections() { StringRef Name; bool EH = false; bool Debug = false; @@ -2708,40 +2762,40 @@ bool AsmParser::ParseDirectiveCFISections() { return false; } -/// ParseDirectiveCFIStartProc +/// parseDirectiveCFIStartProc /// ::= .cfi_startproc -bool AsmParser::ParseDirectiveCFIStartProc() { +bool AsmParser::parseDirectiveCFIStartProc() { getStreamer().EmitCFIStartProc(); return false; } -/// ParseDirectiveCFIEndProc +/// parseDirectiveCFIEndProc /// ::= .cfi_endproc -bool AsmParser::ParseDirectiveCFIEndProc() { +bool AsmParser::parseDirectiveCFIEndProc() { getStreamer().EmitCFIEndProc(); return false; } -/// ParseRegisterOrRegisterNumber - parse register name or number. -bool AsmParser::ParseRegisterOrRegisterNumber(int64_t &Register, +/// \brief parse register name or number. +bool AsmParser::parseRegisterOrRegisterNumber(int64_t &Register, SMLoc DirectiveLoc) { unsigned RegNo; if (getLexer().isNot(AsmToken::Integer)) { if (getTargetParser().ParseRegister(RegNo, DirectiveLoc, DirectiveLoc)) return true; - Register = getContext().getRegisterInfo().getDwarfRegNum(RegNo, true); + Register = getContext().getRegisterInfo()->getDwarfRegNum(RegNo, true); } else return parseAbsoluteExpression(Register); return false; } -/// ParseDirectiveCFIDefCfa +/// parseDirectiveCFIDefCfa /// ::= .cfi_def_cfa register, offset -bool AsmParser::ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveCFIDefCfa(SMLoc DirectiveLoc) { int64_t Register = 0; - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) return true; if (getLexer().isNot(AsmToken::Comma)) @@ -2756,9 +2810,9 @@ bool AsmParser::ParseDirectiveCFIDefCfa(SMLoc DirectiveLoc) { return false; } -/// ParseDirectiveCFIDefCfaOffset +/// parseDirectiveCFIDefCfaOffset /// ::= .cfi_def_cfa_offset offset -bool AsmParser::ParseDirectiveCFIDefCfaOffset() { +bool AsmParser::parseDirectiveCFIDefCfaOffset() { int64_t Offset = 0; if (parseAbsoluteExpression(Offset)) return true; @@ -2767,11 +2821,11 @@ bool AsmParser::ParseDirectiveCFIDefCfaOffset() { return false; } -/// ParseDirectiveCFIRegister +/// parseDirectiveCFIRegister /// ::= .cfi_register register, register -bool AsmParser::ParseDirectiveCFIRegister(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveCFIRegister(SMLoc DirectiveLoc) { int64_t Register1 = 0; - if (ParseRegisterOrRegisterNumber(Register1, DirectiveLoc)) + if (parseRegisterOrRegisterNumber(Register1, DirectiveLoc)) return true; if (getLexer().isNot(AsmToken::Comma)) @@ -2779,16 +2833,23 @@ bool AsmParser::ParseDirectiveCFIRegister(SMLoc DirectiveLoc) { Lex(); int64_t Register2 = 0; - if (ParseRegisterOrRegisterNumber(Register2, DirectiveLoc)) + if (parseRegisterOrRegisterNumber(Register2, DirectiveLoc)) return true; getStreamer().EmitCFIRegister(Register1, Register2); return false; } -/// ParseDirectiveCFIAdjustCfaOffset +/// parseDirectiveCFIWindowSave +/// ::= .cfi_window_save +bool AsmParser::parseDirectiveCFIWindowSave() { + getStreamer().EmitCFIWindowSave(); + return false; +} + +/// parseDirectiveCFIAdjustCfaOffset /// ::= .cfi_adjust_cfa_offset adjustment -bool AsmParser::ParseDirectiveCFIAdjustCfaOffset() { +bool AsmParser::parseDirectiveCFIAdjustCfaOffset() { int64_t Adjustment = 0; if (parseAbsoluteExpression(Adjustment)) return true; @@ -2797,24 +2858,24 @@ bool AsmParser::ParseDirectiveCFIAdjustCfaOffset() { return false; } -/// ParseDirectiveCFIDefCfaRegister +/// parseDirectiveCFIDefCfaRegister /// ::= .cfi_def_cfa_register register -bool AsmParser::ParseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveCFIDefCfaRegister(SMLoc DirectiveLoc) { int64_t Register = 0; - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) return true; getStreamer().EmitCFIDefCfaRegister(Register); return false; } -/// ParseDirectiveCFIOffset +/// parseDirectiveCFIOffset /// ::= .cfi_offset register, offset -bool AsmParser::ParseDirectiveCFIOffset(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveCFIOffset(SMLoc DirectiveLoc) { int64_t Register = 0; int64_t Offset = 0; - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) return true; if (getLexer().isNot(AsmToken::Comma)) @@ -2828,12 +2889,12 @@ bool AsmParser::ParseDirectiveCFIOffset(SMLoc DirectiveLoc) { return false; } -/// ParseDirectiveCFIRelOffset +/// parseDirectiveCFIRelOffset /// ::= .cfi_rel_offset register, offset -bool AsmParser::ParseDirectiveCFIRelOffset(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveCFIRelOffset(SMLoc DirectiveLoc) { int64_t Register = 0; - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) return true; if (getLexer().isNot(AsmToken::Comma)) @@ -2870,11 +2931,11 @@ static bool isValidEncoding(int64_t Encoding) { return true; } -/// ParseDirectiveCFIPersonalityOrLsda +/// parseDirectiveCFIPersonalityOrLsda /// IsPersonality true for cfi_personality, false for cfi_lsda /// ::= .cfi_personality encoding, [symbol_name] /// ::= .cfi_lsda encoding, [symbol_name] -bool AsmParser::ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality) { +bool AsmParser::parseDirectiveCFIPersonalityOrLsda(bool IsPersonality) { int64_t Encoding = 0; if (parseAbsoluteExpression(Encoding)) return true; @@ -2901,46 +2962,46 @@ bool AsmParser::ParseDirectiveCFIPersonalityOrLsda(bool IsPersonality) { return false; } -/// ParseDirectiveCFIRememberState +/// parseDirectiveCFIRememberState /// ::= .cfi_remember_state -bool AsmParser::ParseDirectiveCFIRememberState() { +bool AsmParser::parseDirectiveCFIRememberState() { getStreamer().EmitCFIRememberState(); return false; } -/// ParseDirectiveCFIRestoreState +/// parseDirectiveCFIRestoreState /// ::= .cfi_remember_state -bool AsmParser::ParseDirectiveCFIRestoreState() { +bool AsmParser::parseDirectiveCFIRestoreState() { getStreamer().EmitCFIRestoreState(); return false; } -/// ParseDirectiveCFISameValue +/// parseDirectiveCFISameValue /// ::= .cfi_same_value register -bool AsmParser::ParseDirectiveCFISameValue(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveCFISameValue(SMLoc DirectiveLoc) { int64_t Register = 0; - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) return true; getStreamer().EmitCFISameValue(Register); return false; } -/// ParseDirectiveCFIRestore +/// parseDirectiveCFIRestore /// ::= .cfi_restore register -bool AsmParser::ParseDirectiveCFIRestore(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveCFIRestore(SMLoc DirectiveLoc) { int64_t Register = 0; - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) return true; getStreamer().EmitCFIRestore(Register); return false; } -/// ParseDirectiveCFIEscape +/// parseDirectiveCFIEscape /// ::= .cfi_escape expression[,...] -bool AsmParser::ParseDirectiveCFIEscape() { +bool AsmParser::parseDirectiveCFIEscape() { std::string Values; int64_t CurrValue; if (parseAbsoluteExpression(CurrValue)) @@ -2961,9 +3022,9 @@ bool AsmParser::ParseDirectiveCFIEscape() { return false; } -/// ParseDirectiveCFISignalFrame +/// parseDirectiveCFISignalFrame /// ::= .cfi_signal_frame -bool AsmParser::ParseDirectiveCFISignalFrame() { +bool AsmParser::parseDirectiveCFISignalFrame() { if (getLexer().isNot(AsmToken::EndOfStatement)) return Error(getLexer().getLoc(), "unexpected token in '.cfi_signal_frame'"); @@ -2972,40 +3033,40 @@ bool AsmParser::ParseDirectiveCFISignalFrame() { return false; } -/// ParseDirectiveCFIUndefined +/// parseDirectiveCFIUndefined /// ::= .cfi_undefined register -bool AsmParser::ParseDirectiveCFIUndefined(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveCFIUndefined(SMLoc DirectiveLoc) { int64_t Register = 0; - if (ParseRegisterOrRegisterNumber(Register, DirectiveLoc)) + if (parseRegisterOrRegisterNumber(Register, DirectiveLoc)) return true; getStreamer().EmitCFIUndefined(Register); return false; } -/// ParseDirectiveMacrosOnOff +/// parseDirectiveMacrosOnOff /// ::= .macros_on /// ::= .macros_off -bool AsmParser::ParseDirectiveMacrosOnOff(StringRef Directive) { +bool AsmParser::parseDirectiveMacrosOnOff(StringRef Directive) { if (getLexer().isNot(AsmToken::EndOfStatement)) return Error(getLexer().getLoc(), "unexpected token in '" + Directive + "' directive"); - SetMacrosEnabled(Directive == ".macros_on"); + setMacrosEnabled(Directive == ".macros_on"); return false; } -/// ParseDirectiveMacro +/// parseDirectiveMacro /// ::= .macro name [parameters] -bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveMacro(SMLoc DirectiveLoc) { StringRef Name; if (parseIdentifier(Name)) return TokError("expected identifier in '.macro' directive"); MCAsmMacroParameters Parameters; // Argument delimiter is initially unknown. It will be set by - // ParseMacroArgument() + // parseMacroArgument() AsmToken::TokenKind ArgumentDelimiter = AsmToken::Eof; if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { @@ -3015,7 +3076,7 @@ bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) { if (getLexer().is(AsmToken::Equal)) { Lex(); - if (ParseMacroArgument(Parameter.second, ArgumentDelimiter)) + if (parseMacroArgument(Parameter.second, ArgumentDelimiter)) return true; } @@ -3055,19 +3116,19 @@ bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) { eatToEndOfStatement(); } - if (LookupMacro(Name)) { + if (lookupMacro(Name)) { return Error(DirectiveLoc, "macro '" + Name + "' is already defined"); } const char *BodyStart = StartToken.getLoc().getPointer(); const char *BodyEnd = EndToken.getLoc().getPointer(); StringRef Body = StringRef(BodyStart, BodyEnd - BodyStart); - CheckForBadMacro(DirectiveLoc, Name, Body, Parameters); - DefineMacro(Name, MCAsmMacro(Name, Body, Parameters)); + checkForBadMacro(DirectiveLoc, Name, Body, Parameters); + defineMacro(Name, MCAsmMacro(Name, Body, Parameters)); return false; } -/// CheckForBadMacro +/// checkForBadMacro /// /// With the support added for named parameters there may be code out there that /// is transitioning from positional parameters. In versions of gas that did @@ -3081,7 +3142,7 @@ bool AsmParser::ParseDirectiveMacro(SMLoc DirectiveLoc) { /// intended or change the macro to use the named parameters. It is possible /// this warning will trigger when the none of the named parameters are used /// and the strings like $1 are infact to simply to be passed trough unchanged. -void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name, +void AsmParser::checkForBadMacro(SMLoc DirectiveLoc, StringRef Name, StringRef Body, MCAsmMacroParameters Parameters) { // If this macro is not defined with named parameters the warning we are @@ -3119,21 +3180,21 @@ void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name, break; if (Body[Pos] == '$') { - switch (Body[Pos+1]) { - // $$ => $ + switch (Body[Pos + 1]) { + // $$ => $ case '$': break; - // $n => number of arguments + // $n => number of arguments case 'n': PositionalParametersFound = true; break; - // $[0-9] => argument + // $[0-9] => argument default: { PositionalParametersFound = true; break; - } + } } Pos += 2; } else { @@ -3141,19 +3202,19 @@ void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name, while (isIdentifierChar(Body[I]) && I + 1 != End) ++I; - const char *Begin = Body.data() + Pos +1; - StringRef Argument(Begin, I - (Pos +1)); + const char *Begin = Body.data() + Pos + 1; + StringRef Argument(Begin, I - (Pos + 1)); unsigned Index = 0; for (; Index < NParameters; ++Index) if (Parameters[Index].first == Argument) break; if (Index == NParameters) { - if (Body[Pos+1] == '(' && Body[Pos+2] == ')') - Pos += 3; - else { - Pos = I; - } + if (Body[Pos + 1] == '(' && Body[Pos + 2] == ')') + Pos += 3; + else { + Pos = I; + } } else { NamedParametersFound = true; Pos += 1 + Argument.size(); @@ -3169,29 +3230,29 @@ void AsmParser::CheckForBadMacro(SMLoc DirectiveLoc, StringRef Name, "found in body which will have no effect"); } -/// ParseDirectiveEndMacro +/// parseDirectiveEndMacro /// ::= .endm /// ::= .endmacro -bool AsmParser::ParseDirectiveEndMacro(StringRef Directive) { +bool AsmParser::parseDirectiveEndMacro(StringRef Directive) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '" + Directive + "' directive"); // If we are inside a macro instantiation, terminate the current // instantiation. - if (InsideMacroInstantiation()) { - HandleMacroExit(); + if (isInsideMacroInstantiation()) { + handleMacroExit(); return false; } // Otherwise, this .endmacro is a stray entry in the file; well formed // .endmacro directives are handled during the macro definition parsing. return TokError("unexpected '" + Directive + "' in file, " - "no current macro definition"); + "no current macro definition"); } -/// ParseDirectivePurgeMacro +/// parseDirectivePurgeMacro /// ::= .purgem -bool AsmParser::ParseDirectivePurgeMacro(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectivePurgeMacro(SMLoc DirectiveLoc) { StringRef Name; if (parseIdentifier(Name)) return TokError("expected identifier in '.purgem' directive"); @@ -3199,16 +3260,16 @@ bool AsmParser::ParseDirectivePurgeMacro(SMLoc DirectiveLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.purgem' directive"); - if (!LookupMacro(Name)) + if (!lookupMacro(Name)) return Error(DirectiveLoc, "macro '" + Name + "' is not defined"); - UndefineMacro(Name); + undefineMacro(Name); return false; } -/// ParseDirectiveBundleAlignMode +/// parseDirectiveBundleAlignMode /// ::= {.bundle_align_mode} expression -bool AsmParser::ParseDirectiveBundleAlignMode() { +bool AsmParser::parseDirectiveBundleAlignMode() { checkForValidSection(); // Expect a single argument: an expression that evaluates to a constant @@ -3232,9 +3293,9 @@ bool AsmParser::ParseDirectiveBundleAlignMode() { return false; } -/// ParseDirectiveBundleLock +/// parseDirectiveBundleLock /// ::= {.bundle_lock} [align_to_end] -bool AsmParser::ParseDirectiveBundleLock() { +bool AsmParser::parseDirectiveBundleLock() { checkForValidSection(); bool AlignToEnd = false; @@ -3242,7 +3303,7 @@ bool AsmParser::ParseDirectiveBundleLock() { StringRef Option; SMLoc Loc = getTok().getLoc(); const char *kInvalidOptionError = - "invalid option for '.bundle_lock' directive"; + "invalid option for '.bundle_lock' directive"; if (parseIdentifier(Option)) return Error(Loc, kInvalidOptionError); @@ -3261,9 +3322,9 @@ bool AsmParser::ParseDirectiveBundleLock() { return false; } -/// ParseDirectiveBundleLock +/// parseDirectiveBundleLock /// ::= {.bundle_lock} -bool AsmParser::ParseDirectiveBundleUnlock() { +bool AsmParser::parseDirectiveBundleUnlock() { checkForValidSection(); if (getLexer().isNot(AsmToken::EndOfStatement)) @@ -3274,9 +3335,9 @@ bool AsmParser::ParseDirectiveBundleUnlock() { return false; } -/// ParseDirectiveSpace +/// parseDirectiveSpace /// ::= (.skip | .space) expression [ , expression ] -bool AsmParser::ParseDirectiveSpace(StringRef IDVal) { +bool AsmParser::parseDirectiveSpace(StringRef IDVal) { checkForValidSection(); int64_t NumBytes; @@ -3299,18 +3360,18 @@ bool AsmParser::ParseDirectiveSpace(StringRef IDVal) { Lex(); if (NumBytes <= 0) - return TokError("invalid number of bytes in '" + - Twine(IDVal) + "' directive"); + return TokError("invalid number of bytes in '" + Twine(IDVal) + + "' directive"); // FIXME: Sometimes the fill expr is 'nop' if it isn't supplied, instead of 0. - getStreamer().EmitFill(NumBytes, FillExpr, DEFAULT_ADDRSPACE); + getStreamer().EmitFill(NumBytes, FillExpr); return false; } -/// ParseDirectiveLEB128 +/// parseDirectiveLEB128 /// ::= (.sleb128 | .uleb128) expression -bool AsmParser::ParseDirectiveLEB128(bool Signed) { +bool AsmParser::parseDirectiveLEB128(bool Signed) { checkForValidSection(); const MCExpr *Value; @@ -3328,9 +3389,9 @@ bool AsmParser::ParseDirectiveLEB128(bool Signed) { return false; } -/// ParseDirectiveSymbolAttribute +/// parseDirectiveSymbolAttribute /// ::= { ".globl", ".weak", ... } [ identifier ( , identifier )* ] -bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { +bool AsmParser::parseDirectiveSymbolAttribute(MCSymbolAttr Attr) { if (getLexer().isNot(AsmToken::EndOfStatement)) { for (;;) { StringRef Name; @@ -3345,7 +3406,8 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { if (Sym->isTemporary()) return Error(Loc, "non-local symbol required in directive"); - getStreamer().EmitSymbolAttribute(Sym, Attr); + if (!getStreamer().EmitSymbolAttribute(Sym, Attr)) + return Error(Loc, "unable to emit symbol attribute"); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -3360,9 +3422,9 @@ bool AsmParser::ParseDirectiveSymbolAttribute(MCSymbolAttr Attr) { return false; } -/// ParseDirectiveComm +/// parseDirectiveComm /// ::= ( .comm | .lcomm ) identifier , size_expression [ , align_expression ] -bool AsmParser::ParseDirectiveComm(bool IsLocal) { +bool AsmParser::parseDirectiveComm(bool IsLocal) { checkForValidSection(); SMLoc IDLoc = getLexer().getLoc(); @@ -3412,14 +3474,14 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { // but a size of .lcomm creates a bss symbol of size zero. if (Size < 0) return Error(SizeLoc, "invalid '.comm' or '.lcomm' directive size, can't " - "be less than zero"); + "be less than zero"); // NOTE: The alignment in the directive is a power of 2 value, the assembler // may internally end up wanting an alignment in bytes. // FIXME: Diagnose overflow. if (Pow2Alignment < 0) return Error(Pow2AlignmentLoc, "invalid '.comm' or '.lcomm' directive " - "alignment, can't be less than zero"); + "alignment, can't be less than zero"); if (!Sym->isUndefined()) return Error(IDLoc, "invalid symbol redefinition"); @@ -3434,9 +3496,9 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) { return false; } -/// ParseDirectiveAbort +/// parseDirectiveAbort /// ::= .abort [... message ...] -bool AsmParser::ParseDirectiveAbort() { +bool AsmParser::parseDirectiveAbort() { // FIXME: Use loc from directive. SMLoc Loc = getLexer().getLoc(); @@ -3455,25 +3517,25 @@ bool AsmParser::ParseDirectiveAbort() { return false; } -/// ParseDirectiveInclude +/// parseDirectiveInclude /// ::= .include "filename" -bool AsmParser::ParseDirectiveInclude() { +bool AsmParser::parseDirectiveInclude() { if (getLexer().isNot(AsmToken::String)) return TokError("expected string in '.include' directive"); - std::string Filename = getTok().getString(); + // Allow the strings to have escaped octal character sequence. + std::string Filename; + if (parseEscapedString(Filename)) + return true; SMLoc IncludeLoc = getLexer().getLoc(); Lex(); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.include' directive"); - // Strip the quotes. - Filename = Filename.substr(1, Filename.size()-2); - // Attempt to switch the lexer to the included file before consuming the end // of statement to avoid losing it when we switch. - if (EnterIncludeFile(Filename)) { + if (enterIncludeFile(Filename)) { Error(IncludeLoc, "Could not find include file '" + Filename + "'"); return true; } @@ -3481,24 +3543,24 @@ bool AsmParser::ParseDirectiveInclude() { return false; } -/// ParseDirectiveIncbin +/// parseDirectiveIncbin /// ::= .incbin "filename" -bool AsmParser::ParseDirectiveIncbin() { +bool AsmParser::parseDirectiveIncbin() { if (getLexer().isNot(AsmToken::String)) return TokError("expected string in '.incbin' directive"); - std::string Filename = getTok().getString(); + // Allow the strings to have escaped octal character sequence. + std::string Filename; + if (parseEscapedString(Filename)) + return true; SMLoc IncbinLoc = getLexer().getLoc(); Lex(); if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.incbin' directive"); - // Strip the quotes. - Filename = Filename.substr(1, Filename.size()-2); - // Attempt to process the included file. - if (ProcessIncbinFile(Filename)) { + if (processIncbinFile(Filename)) { Error(IncbinLoc, "Could not find incbin file '" + Filename + "'"); return true; } @@ -3506,9 +3568,9 @@ bool AsmParser::ParseDirectiveIncbin() { return false; } -/// ParseDirectiveIf +/// parseDirectiveIf /// ::= .if expression -bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveIf(SMLoc DirectiveLoc) { TheCondStack.push_back(TheCondState); TheCondState.TheCond = AsmCond::IfCond; if (TheCondState.Ignore) { @@ -3530,9 +3592,9 @@ bool AsmParser::ParseDirectiveIf(SMLoc DirectiveLoc) { return false; } -/// ParseDirectiveIfb +/// parseDirectiveIfb /// ::= .ifb string -bool AsmParser::ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) { +bool AsmParser::parseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) { TheCondStack.push_back(TheCondState); TheCondState.TheCond = AsmCond::IfCond; @@ -3553,16 +3615,16 @@ bool AsmParser::ParseDirectiveIfb(SMLoc DirectiveLoc, bool ExpectBlank) { return false; } -/// ParseDirectiveIfc +/// parseDirectiveIfc /// ::= .ifc string1, string2 -bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) { +bool AsmParser::parseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) { TheCondStack.push_back(TheCondState); TheCondState.TheCond = AsmCond::IfCond; if (TheCondState.Ignore) { eatToEndOfStatement(); } else { - StringRef Str1 = ParseStringToComma(); + StringRef Str1 = parseStringToComma(); if (getLexer().isNot(AsmToken::Comma)) return TokError("unexpected token in '.ifc' directive"); @@ -3583,9 +3645,9 @@ bool AsmParser::ParseDirectiveIfc(SMLoc DirectiveLoc, bool ExpectEqual) { return false; } -/// ParseDirectiveIfdef +/// parseDirectiveIfdef /// ::= .ifdef symbol -bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { +bool AsmParser::parseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { StringRef Name; TheCondStack.push_back(TheCondState); TheCondState.TheCond = AsmCond::IfCond; @@ -3610,9 +3672,9 @@ bool AsmParser::ParseDirectiveIfdef(SMLoc DirectiveLoc, bool expect_defined) { return false; } -/// ParseDirectiveElseIf +/// parseDirectiveElseIf /// ::= .elseif expression -bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveElseIf(SMLoc DirectiveLoc) { if (TheCondState.TheCond != AsmCond::IfCond && TheCondState.TheCond != AsmCond::ElseIfCond) Error(DirectiveLoc, "Encountered a .elseif that doesn't follow a .if or " @@ -3641,9 +3703,9 @@ bool AsmParser::ParseDirectiveElseIf(SMLoc DirectiveLoc) { return false; } -/// ParseDirectiveElse +/// parseDirectiveElse /// ::= .else -bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveElse(SMLoc DirectiveLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.else' directive"); @@ -3665,16 +3727,15 @@ bool AsmParser::ParseDirectiveElse(SMLoc DirectiveLoc) { return false; } -/// ParseDirectiveEndIf +/// parseDirectiveEndIf /// ::= .endif -bool AsmParser::ParseDirectiveEndIf(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveEndIf(SMLoc DirectiveLoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in '.endif' directive"); Lex(); - if ((TheCondState.TheCond == AsmCond::NoCond) || - TheCondStack.empty()) + if ((TheCondState.TheCond == AsmCond::NoCond) || TheCondStack.empty()) Error(DirectiveLoc, "Encountered a .endif that doesn't follow a .if or " ".else"); if (!TheCondStack.empty()) { @@ -3718,7 +3779,6 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".extern"] = DK_EXTERN; DirectiveKindMap[".globl"] = DK_GLOBL; DirectiveKindMap[".global"] = DK_GLOBAL; - DirectiveKindMap[".indirect_symbol"] = DK_INDIRECT_SYMBOL; DirectiveKindMap[".lazy_reference"] = DK_LAZY_REFERENCE; DirectiveKindMap[".no_dead_strip"] = DK_NO_DEAD_STRIP; DirectiveKindMap[".symbol_resolver"] = DK_SYMBOL_RESOLVER; @@ -3780,6 +3840,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".cfi_signal_frame"] = DK_CFI_SIGNAL_FRAME; DirectiveKindMap[".cfi_undefined"] = DK_CFI_UNDEFINED; DirectiveKindMap[".cfi_register"] = DK_CFI_REGISTER; + DirectiveKindMap[".cfi_window_save"] = DK_CFI_WINDOW_SAVE; DirectiveKindMap[".macros_on"] = DK_MACROS_ON; DirectiveKindMap[".macros_off"] = DK_MACROS_OFF; DirectiveKindMap[".macro"] = DK_MACRO; @@ -3788,8 +3849,7 @@ void AsmParser::initializeDirectiveKindMap() { DirectiveKindMap[".purgem"] = DK_PURGEM; } - -MCAsmMacro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) { +MCAsmMacro *AsmParser::parseMacroLikeBody(SMLoc DirectiveLoc) { AsmToken EndToken, StartToken = getTok(); unsigned NestLevel = 0; @@ -3806,8 +3866,7 @@ MCAsmMacro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) { } // Otherwise, check whether we have reached the .endr. - if (Lexer.is(AsmToken::Identifier) && - getTok().getIdentifier() == ".endr") { + if (Lexer.is(AsmToken::Identifier) && getTok().getIdentifier() == ".endr") { if (NestLevel == 0) { EndToken = getTok(); Lex(); @@ -3831,22 +3890,21 @@ MCAsmMacro *AsmParser::ParseMacroLikeBody(SMLoc DirectiveLoc) { // We Are Anonymous. StringRef Name; MCAsmMacroParameters Parameters; - return new MCAsmMacro(Name, Body, Parameters); + MacroLikeBodies.push_back(MCAsmMacro(Name, Body, Parameters)); + return &MacroLikeBodies.back(); } -void AsmParser::InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, +void AsmParser::instantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, raw_svector_ostream &OS) { OS << ".endr\n"; MemoryBuffer *Instantiation = - MemoryBuffer::getMemBufferCopy(OS.str(), ""); + MemoryBuffer::getMemBufferCopy(OS.str(), ""); // Create the macro instantiation object and add to the current macro // instantiation stack. - MacroInstantiation *MI = new MacroInstantiation(M, DirectiveLoc, - CurBuffer, - getTok().getLoc(), - Instantiation); + MacroInstantiation *MI = new MacroInstantiation( + M, DirectiveLoc, CurBuffer, getTok().getLoc(), Instantiation); ActiveMacros.push_back(MI); // Jump to the macro instantiation and prime the lexer. @@ -3855,7 +3913,7 @@ void AsmParser::InstantiateMacroLikeBody(MCAsmMacro *M, SMLoc DirectiveLoc, Lex(); } -bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveRept(SMLoc DirectiveLoc) { int64_t Count; if (parseAbsoluteExpression(Count)) return TokError("unexpected token in '.rept' directive"); @@ -3870,7 +3928,7 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) { Lex(); // Lex the rept definition. - MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc); + MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc); if (!M) return true; @@ -3884,14 +3942,14 @@ bool AsmParser::ParseDirectiveRept(SMLoc DirectiveLoc) { if (expandMacro(OS, M->Body, Parameters, A, getTok().getLoc())) return true; } - InstantiateMacroLikeBody(M, DirectiveLoc, OS); + instantiateMacroLikeBody(M, DirectiveLoc, OS); return false; } -/// ParseDirectiveIrp +/// parseDirectiveIrp /// ::= .irp symbol,values -bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveIrp(SMLoc DirectiveLoc) { MCAsmMacroParameters Parameters; MCAsmMacroParameter Parameter; @@ -3906,14 +3964,14 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { Lex(); MCAsmMacroArguments A; - if (ParseMacroArguments(0, A)) + if (parseMacroArguments(0, A)) return true; // Eat the end of statement. Lex(); // Lex the irp definition. - MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc); + MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc); if (!M) return true; @@ -3930,14 +3988,14 @@ bool AsmParser::ParseDirectiveIrp(SMLoc DirectiveLoc) { return true; } - InstantiateMacroLikeBody(M, DirectiveLoc, OS); + instantiateMacroLikeBody(M, DirectiveLoc, OS); return false; } -/// ParseDirectiveIrpc +/// parseDirectiveIrpc /// ::= .irpc symbol,values -bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveIrpc(SMLoc DirectiveLoc) { MCAsmMacroParameters Parameters; MCAsmMacroParameter Parameter; @@ -3952,7 +4010,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { Lex(); MCAsmMacroArguments A; - if (ParseMacroArguments(0, A)) + if (parseMacroArguments(0, A)) return true; if (A.size() != 1 || A.front().size() != 1) @@ -3962,7 +4020,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { Lex(); // Lex the irpc definition. - MCAsmMacro *M = ParseMacroLikeBody(DirectiveLoc); + MCAsmMacro *M = parseMacroLikeBody(DirectiveLoc); if (!M) return true; @@ -3975,7 +4033,7 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { std::size_t I, End = Values.size(); for (I = 0; I < End; ++I) { MCAsmMacroArgument Arg; - Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I+1))); + Arg.push_back(AsmToken(AsmToken::Identifier, Values.slice(I, I + 1))); MCAsmMacroArguments Args; Args.push_back(Arg); @@ -3984,24 +4042,24 @@ bool AsmParser::ParseDirectiveIrpc(SMLoc DirectiveLoc) { return true; } - InstantiateMacroLikeBody(M, DirectiveLoc, OS); + instantiateMacroLikeBody(M, DirectiveLoc, OS); return false; } -bool AsmParser::ParseDirectiveEndr(SMLoc DirectiveLoc) { +bool AsmParser::parseDirectiveEndr(SMLoc DirectiveLoc) { if (ActiveMacros.empty()) return TokError("unmatched '.endr' directive"); // The only .repl that should get here are the ones created by - // InstantiateMacroLikeBody. + // instantiateMacroLikeBody. assert(getLexer().is(AsmToken::EndOfStatement)); - HandleMacroExit(); + handleMacroExit(); return false; } -bool AsmParser::ParseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info, +bool AsmParser::parseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info, size_t Len) { const MCExpr *Value; SMLoc ExprLoc = getLexer().getLoc(); @@ -4018,7 +4076,7 @@ bool AsmParser::ParseDirectiveMSEmit(SMLoc IDLoc, ParseStatementInfo &Info, return false; } -bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { +bool AsmParser::parseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { const MCExpr *Value; SMLoc ExprLoc = getLexer().getLoc(); if (parseExpression(Value)) @@ -4030,16 +4088,15 @@ bool AsmParser::ParseDirectiveMSAlign(SMLoc IDLoc, ParseStatementInfo &Info) { if (!isPowerOf2_64(IntValue)) return Error(ExprLoc, "literal value not a power of two greater then zero"); - Info.AsmRewrites->push_back(AsmRewrite(AOK_Align, IDLoc, 5, - Log2_64(IntValue))); + Info.AsmRewrites->push_back( + AsmRewrite(AOK_Align, IDLoc, 5, Log2_64(IntValue))); return false; } // We are comparing pointers, but the pointers are relative to a single string. // Thus, this should always be deterministic. -static int RewritesSort(const void *A, const void *B) { - const AsmRewrite *AsmRewriteA = static_cast(A); - const AsmRewrite *AsmRewriteB = static_cast(B); +static int rewritesSort(const AsmRewrite *AsmRewriteA, + const AsmRewrite *AsmRewriteB) { if (AsmRewriteA->Loc.getPointer() < AsmRewriteB->Loc.getPointer()) return -1; if (AsmRewriteB->Loc.getPointer() < AsmRewriteA->Loc.getPointer()) @@ -4049,25 +4106,22 @@ static int RewritesSort(const void *A, const void *B) { // rewrite to the same location. Make sure the SizeDirective rewrite is // performed first, then the Imm/ImmPrefix and finally the Input/Output. This // ensures the sort algorithm is stable. - if (AsmRewritePrecedence [AsmRewriteA->Kind] > - AsmRewritePrecedence [AsmRewriteB->Kind]) + if (AsmRewritePrecedence[AsmRewriteA->Kind] > + AsmRewritePrecedence[AsmRewriteB->Kind]) return -1; - if (AsmRewritePrecedence [AsmRewriteA->Kind] < - AsmRewritePrecedence [AsmRewriteB->Kind]) + if (AsmRewritePrecedence[AsmRewriteA->Kind] < + AsmRewritePrecedence[AsmRewriteB->Kind]) return 1; - llvm_unreachable ("Unstable rewrite sort."); + llvm_unreachable("Unstable rewrite sort."); } -bool -AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, - unsigned &NumOutputs, unsigned &NumInputs, - SmallVectorImpl > &OpDecls, - SmallVectorImpl &Constraints, - SmallVectorImpl &Clobbers, - const MCInstrInfo *MII, - const MCInstPrinter *IP, - MCAsmParserSemaCallback &SI) { +bool AsmParser::parseMSInlineAsm( + void *AsmLoc, std::string &AsmString, unsigned &NumOutputs, + unsigned &NumInputs, SmallVectorImpl > &OpDecls, + SmallVectorImpl &Constraints, + SmallVectorImpl &Clobbers, const MCInstrInfo *MII, + const MCInstPrinter *IP, MCAsmParserSemaCallback &SI) { SmallVector InputDecls; SmallVector OutputDecls; SmallVector InputDeclsAddressOf; @@ -4086,7 +4140,7 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned OutputIdx = 0; while (getLexer().isNot(AsmToken::Eof)) { ParseStatementInfo Info(&AsmStrRewrites); - if (ParseStatement(Info)) + if (parseStatement(Info)) return true; if (Info.ParseError) @@ -4174,7 +4228,7 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, raw_string_ostream OS(AsmStringIR); const char *AsmStart = SrcMgr.getMemoryBuffer(0)->getBufferStart(); const char *AsmEnd = SrcMgr.getMemoryBuffer(0)->getBufferEnd(); - array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), RewritesSort); + array_pod_sort(AsmStrRewrites.begin(), AsmStrRewrites.end(), rewritesSort); for (SmallVectorImpl::iterator I = AsmStrRewrites.begin(), E = AsmStrRewrites.end(); I != E; ++I) { @@ -4199,7 +4253,8 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, unsigned AdditionalSkip = 0; // Rewrite expressions in $N notation. switch (Kind) { - default: break; + default: + break; case AOK_Imm: OS << "$$" << (*I).Val; break; @@ -4254,8 +4309,7 @@ AsmParser::parseMSInlineAsm(void *AsmLoc, std::string &AsmString, } /// \brief Create an MCAsmParser instance. -MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM, - MCContext &C, MCStreamer &Out, - const MCAsmInfo &MAI) { +MCAsmParser *llvm::createMCAsmParser(SourceMgr &SM, MCContext &C, + MCStreamer &Out, const MCAsmInfo &MAI) { return new AsmParser(SM, C, Out, MAI); } diff --git a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp index a50eab2..d8343a3 100644 --- a/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/COFFAsmParser.cpp @@ -35,6 +35,13 @@ class COFFAsmParser : public MCAsmParserExtension { unsigned Characteristics, SectionKind Kind); + bool ParseSectionSwitch(StringRef Section, unsigned Characteristics, + SectionKind Kind, StringRef COMDATSymName, + COFF::COMDATType Type, const MCSectionCOFF *Assoc); + + bool ParseSectionName(StringRef &SectionName); + bool ParseSectionFlags(StringRef FlagsString, unsigned* Flags); + virtual void Initialize(MCAsmParser &Parser) { // Call the base implementation. MCAsmParserExtension::Initialize(Parser); @@ -42,11 +49,13 @@ class COFFAsmParser : public MCAsmParserExtension { addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveText>(".text"); addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveData>(".data"); addDirectiveHandler<&COFFAsmParser::ParseSectionDirectiveBSS>(".bss"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveSection>(".section"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveDef>(".def"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveScl>(".scl"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveType>(".type"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveEndef>(".endef"); addDirectiveHandler<&COFFAsmParser::ParseDirectiveSecRel32>(".secrel32"); + addDirectiveHandler<&COFFAsmParser::ParseDirectiveLinkOnce>(".linkonce"); // Win64 EH directives. addDirectiveHandler<&COFFAsmParser::ParseSEHDirectiveStartProc>( @@ -100,11 +109,15 @@ class COFFAsmParser : public MCAsmParserExtension { SectionKind::getBSS()); } + bool ParseDirectiveSection(StringRef, SMLoc); bool ParseDirectiveDef(StringRef, SMLoc); bool ParseDirectiveScl(StringRef, SMLoc); bool ParseDirectiveType(StringRef, SMLoc); bool ParseDirectiveEndef(StringRef, SMLoc); bool ParseDirectiveSecRel32(StringRef, SMLoc); + bool parseCOMDATTypeAndAssoc(COFF::COMDATType &Type, + const MCSectionCOFF *&Assoc); + bool ParseDirectiveLinkOnce(StringRef, SMLoc); // Win64 EH directives. bool ParseSEHDirectiveStartProc(StringRef, SMLoc); @@ -130,6 +143,119 @@ public: } // end annonomous namespace. +static SectionKind computeSectionKind(unsigned Flags) { + if (Flags & COFF::IMAGE_SCN_MEM_EXECUTE) + return SectionKind::getText(); + if (Flags & COFF::IMAGE_SCN_MEM_READ && + (Flags & COFF::IMAGE_SCN_MEM_WRITE) == 0) + return SectionKind::getReadOnly(); + return SectionKind::getDataRel(); +} + +bool COFFAsmParser::ParseSectionFlags(StringRef FlagsString, unsigned* Flags) { + enum { + None = 0, + Alloc = 1 << 0, + Code = 1 << 1, + Load = 1 << 2, + InitData = 1 << 3, + Shared = 1 << 4, + NoLoad = 1 << 5, + NoRead = 1 << 6, + NoWrite = 1 << 7 + }; + + bool ReadOnlyRemoved = false; + unsigned SecFlags = None; + + for (unsigned i = 0; i < FlagsString.size(); ++i) { + switch (FlagsString[i]) { + case 'a': + // Ignored. + break; + + case 'b': // bss section + SecFlags |= Alloc; + if (SecFlags & InitData) + return TokError("conflicting section flags 'b' and 'd'."); + SecFlags &= ~Load; + break; + + case 'd': // data section + SecFlags |= InitData; + if (SecFlags & Alloc) + return TokError("conflicting section flags 'b' and 'd'."); + SecFlags &= ~NoWrite; + if ((SecFlags & NoLoad) == 0) + SecFlags |= Load; + break; + + case 'n': // section is not loaded + SecFlags |= NoLoad; + SecFlags &= ~Load; + break; + + case 'r': // read-only + ReadOnlyRemoved = false; + SecFlags |= NoWrite; + if ((SecFlags & Code) == 0) + SecFlags |= InitData; + if ((SecFlags & NoLoad) == 0) + SecFlags |= Load; + break; + + case 's': // shared section + SecFlags |= Shared | InitData; + SecFlags &= ~NoWrite; + if ((SecFlags & NoLoad) == 0) + SecFlags |= Load; + break; + + case 'w': // writable + SecFlags &= ~NoWrite; + ReadOnlyRemoved = true; + break; + + case 'x': // executable section + SecFlags |= Code; + if ((SecFlags & NoLoad) == 0) + SecFlags |= Load; + if (!ReadOnlyRemoved) + SecFlags |= NoWrite; + break; + + case 'y': // not readable + SecFlags |= NoRead | NoWrite; + break; + + default: + return TokError("unknown flag"); + } + } + + *Flags = 0; + + if (SecFlags == None) + SecFlags = InitData; + + if (SecFlags & Code) + *Flags |= COFF::IMAGE_SCN_CNT_CODE | COFF::IMAGE_SCN_MEM_EXECUTE; + if (SecFlags & InitData) + *Flags |= COFF::IMAGE_SCN_CNT_INITIALIZED_DATA; + if ((SecFlags & Alloc) && (SecFlags & Load) == 0) + *Flags |= COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA; + if (SecFlags & NoLoad) + *Flags |= COFF::IMAGE_SCN_LNK_REMOVE; + if ((SecFlags & NoRead) == 0) + *Flags |= COFF::IMAGE_SCN_MEM_READ; + if ((SecFlags & NoWrite) == 0) + *Flags |= COFF::IMAGE_SCN_MEM_WRITE; + if (SecFlags & Shared) + *Flags |= COFF::IMAGE_SCN_MEM_SHARED; + + return false; +} + /// ParseDirectiveSymbolAttribute /// ::= { ".weak", ... } [ identifier ( , identifier )* ] bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { @@ -164,13 +290,96 @@ bool COFFAsmParser::ParseDirectiveSymbolAttribute(StringRef Directive, SMLoc) { bool COFFAsmParser::ParseSectionSwitch(StringRef Section, unsigned Characteristics, SectionKind Kind) { + return ParseSectionSwitch(Section, Characteristics, Kind, "", + COFF::IMAGE_COMDAT_SELECT_ANY, 0); +} + +bool COFFAsmParser::ParseSectionSwitch(StringRef Section, + unsigned Characteristics, + SectionKind Kind, + StringRef COMDATSymName, + COFF::COMDATType Type, + const MCSectionCOFF *Assoc) { if (getLexer().isNot(AsmToken::EndOfStatement)) return TokError("unexpected token in section switching directive"); Lex(); getStreamer().SwitchSection(getContext().getCOFFSection( - Section, Characteristics, Kind)); + Section, Characteristics, Kind, COMDATSymName, Type, Assoc)); + + return false; +} + +bool COFFAsmParser::ParseSectionName(StringRef &SectionName) { + if (!getLexer().is(AsmToken::Identifier)) + return true; + + SectionName = getTok().getIdentifier(); + Lex(); + return false; +} + +// .section name [, "flags"] [, identifier [ identifier ], identifier] +// +// Supported flags: +// a: Ignored. +// b: BSS section (uninitialized data) +// d: data section (initialized data) +// n: Discardable section +// r: Readable section +// s: Shared section +// w: Writable section +// x: Executable section +// y: Not-readable section (clears 'r') +// +// Subsections are not supported. +bool COFFAsmParser::ParseDirectiveSection(StringRef, SMLoc) { + StringRef SectionName; + + if (ParseSectionName(SectionName)) + return TokError("expected identifier in directive"); + + unsigned Flags = COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ | + COFF::IMAGE_SCN_MEM_WRITE; + + if (getLexer().is(AsmToken::Comma)) { + Lex(); + + if (getLexer().isNot(AsmToken::String)) + return TokError("expected string in directive"); + + StringRef FlagsStr = getTok().getStringContents(); + Lex(); + + if (ParseSectionFlags(FlagsStr, &Flags)) + return true; + } + + COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY; + const MCSectionCOFF *Assoc = 0; + StringRef COMDATSymName; + if (getLexer().is(AsmToken::Comma)) { + Lex(); + + Flags |= COFF::IMAGE_SCN_LNK_COMDAT; + if (parseCOMDATTypeAndAssoc(Type, Assoc)) + return true; + + if (getLexer().isNot(AsmToken::Comma)) + return TokError("expected comma in directive"); + Lex(); + + if (getParser().parseIdentifier(COMDATSymName)) + return TokError("expected identifier in directive"); + } + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + SectionKind Kind = computeSectionKind(Flags); + ParseSectionSwitch(SectionName, Flags, Kind, COMDATSymName, Type, Assoc); return false; } @@ -235,6 +444,75 @@ bool COFFAsmParser::ParseDirectiveSecRel32(StringRef, SMLoc) { return false; } +/// ::= [ identifier [ identifier ] ] +bool COFFAsmParser::parseCOMDATTypeAndAssoc(COFF::COMDATType &Type, + const MCSectionCOFF *&Assoc) { + StringRef TypeId = getTok().getIdentifier(); + + Type = StringSwitch(TypeId) + .Case("one_only", COFF::IMAGE_COMDAT_SELECT_NODUPLICATES) + .Case("discard", COFF::IMAGE_COMDAT_SELECT_ANY) + .Case("same_size", COFF::IMAGE_COMDAT_SELECT_SAME_SIZE) + .Case("same_contents", COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH) + .Case("associative", COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) + .Case("largest", COFF::IMAGE_COMDAT_SELECT_LARGEST) + .Case("newest", COFF::IMAGE_COMDAT_SELECT_NEWEST) + .Default((COFF::COMDATType)0); + + if (Type == 0) + return TokError(Twine("unrecognized COMDAT type '" + TypeId + "'")); + + Lex(); + + if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) { + SMLoc Loc = getTok().getLoc(); + StringRef AssocName; + if (ParseSectionName(AssocName)) + return TokError("expected associated section name"); + + Assoc = static_cast( + getContext().getCOFFSection(AssocName)); + if (!Assoc) + return Error(Loc, "cannot associate unknown section '" + AssocName + "'"); + if (!(Assoc->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT)) + return Error(Loc, "associated section must be a COMDAT section"); + if (Assoc->getSelection() == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) + return Error(Loc, "associated section cannot be itself associative"); + } + + return false; +} + +/// ParseDirectiveLinkOnce +/// ::= .linkonce [ identifier [ identifier ] ] +bool COFFAsmParser::ParseDirectiveLinkOnce(StringRef, SMLoc Loc) { + COFF::COMDATType Type = COFF::IMAGE_COMDAT_SELECT_ANY; + const MCSectionCOFF *Assoc = 0; + if (getLexer().is(AsmToken::Identifier)) + if (parseCOMDATTypeAndAssoc(Type, Assoc)) + return true; + + const MCSectionCOFF *Current = static_cast( + getStreamer().getCurrentSection().first); + + + if (Type == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) { + if (Assoc == Current) + return Error(Loc, "cannot associate a section with itself"); + } + + if (Current->getCharacteristics() & COFF::IMAGE_SCN_LNK_COMDAT) + return Error(Loc, Twine("section '") + Current->getSectionName() + + "' is already linkonce"); + + Current->setSelection(Type, Assoc); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in directive"); + + return false; +} + bool COFFAsmParser::ParseSEHDirectiveStartProc(StringRef, SMLoc) { StringRef SymbolID; if (getParser().parseIdentifier(SymbolID)) @@ -453,7 +731,7 @@ bool COFFAsmParser::ParseAtUnwindOrAtExcept(bool &unwind, bool &except) { bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) { SMLoc startLoc = getLexer().getLoc(); if (getLexer().is(AsmToken::Percent)) { - const MCRegisterInfo &MRI = getContext().getRegisterInfo(); + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); SMLoc endLoc; unsigned LLVMRegNo; if (getParser().getTargetParser().ParseRegister(LLVMRegNo,startLoc,endLoc)) @@ -473,7 +751,7 @@ bool COFFAsmParser::ParseSEHRegisterNumber(unsigned &RegNo) { return Error(startLoc, "expected non-volatile register"); #endif - int SEHRegNo = MRI.getSEHRegNum(LLVMRegNo); + int SEHRegNo = MRI->getSEHRegNum(LLVMRegNo); if (SEHRegNo < 0) return Error(startLoc,"register can't be represented in SEH unwind info"); RegNo = SEHRegNo; diff --git a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp index 7eb8b74..4c9bafa 100644 --- a/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/DarwinAsmParser.cpp @@ -45,6 +45,8 @@ public: this->MCAsmParserExtension::Initialize(Parser); addDirectiveHandler<&DarwinAsmParser::ParseDirectiveDesc>(".desc"); + addDirectiveHandler<&DarwinAsmParser::ParseDirectiveIndirectSymbol>( + ".indirect_symbol"); addDirectiveHandler<&DarwinAsmParser::ParseDirectiveLsym>(".lsym"); addDirectiveHandler<&DarwinAsmParser::ParseDirectiveSubsectionsViaSymbols>( ".subsections_via_symbols"); @@ -69,6 +71,7 @@ public: ".end_data_region"); // Special section directives. + addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveBss>(".bss"); addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConst>(".const"); addDirectiveHandler<&DarwinAsmParser::ParseSectionDirectiveConstData>( ".const_data"); @@ -163,6 +166,7 @@ public: } bool ParseDirectiveDesc(StringRef, SMLoc); + bool ParseDirectiveIndirectSymbol(StringRef, SMLoc); bool ParseDirectiveDumpOrLoad(StringRef, SMLoc); bool ParseDirectiveLsym(StringRef, SMLoc); bool ParseDirectiveLinkerOption(StringRef, SMLoc); @@ -179,6 +183,10 @@ public: bool ParseDirectiveDataRegionEnd(StringRef, SMLoc); // Named Section Directive + bool ParseSectionDirectiveBss(StringRef, SMLoc) { + return ParseSectionSwitch("__DATA", "__bss"); + } + bool ParseSectionDirectiveConst(StringRef, SMLoc) { return ParseSectionSwitch("__TEXT", "__const"); } @@ -415,6 +423,39 @@ bool DarwinAsmParser::ParseDirectiveDesc(StringRef, SMLoc) { return false; } +/// ParseDirectiveIndirectSymbol +/// ::= .indirect_symbol identifier +bool DarwinAsmParser::ParseDirectiveIndirectSymbol(StringRef, SMLoc Loc) { + const MCSectionMachO *Current = static_cast( + getStreamer().getCurrentSection().first); + unsigned SectionType = Current->getType(); + if (SectionType != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS && + SectionType != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && + SectionType != MCSectionMachO::S_SYMBOL_STUBS) + return Error(Loc, "indirect symbol not in a symbol pointer or stub " + "section"); + + StringRef Name; + if (getParser().parseIdentifier(Name)) + return TokError("expected identifier in .indirect_symbol directive"); + + MCSymbol *Sym = getContext().GetOrCreateSymbol(Name); + + // Assembler local symbols don't make any sense here. Complain loudly. + if (Sym->isTemporary()) + return TokError("non-local symbol required in directive"); + + if (!getStreamer().EmitSymbolAttribute(Sym, MCSA_IndirectSymbol)) + return TokError("unable to emit indirect symbol attribute for: " + Name); + + if (getLexer().isNot(AsmToken::EndOfStatement)) + return TokError("unexpected token in '.indirect_symbol' directive"); + + Lex(); + + return false; +} + /// ParseDirectiveDumpOrLoad /// ::= ( .dump | .load ) "filename" bool DarwinAsmParser::ParseDirectiveDumpOrLoad(StringRef Directive, @@ -593,7 +634,7 @@ bool DarwinAsmParser::ParseDirectiveSecureLogUnique(StringRef, SMLoc IDLoc) { raw_ostream *OS = getContext().getSecureLog(); if (OS == NULL) { std::string Err; - OS = new raw_fd_ostream(SecureLogFile, Err, raw_fd_ostream::F_Append); + OS = new raw_fd_ostream(SecureLogFile, Err, sys::fs::F_Append); if (!Err.empty()) { delete OS; return Error(IDLoc, Twine("can't open secure log file: ") + diff --git a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp index 3134fc3..8807975 100644 --- a/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp +++ b/contrib/llvm/lib/MC/MCParser/ELFAsmParser.cpp @@ -16,6 +16,7 @@ #include "llvm/MC/MCParser/MCAsmLexer.h" #include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" #include "llvm/Support/ELF.h" using namespace llvm; @@ -30,14 +31,11 @@ class ELFAsmParser : public MCAsmParserExtension { getParser().addDirectiveHandler(Directive, Handler); } - bool ParseSectionSwitch(StringRef Section, unsigned Type, - unsigned Flags, SectionKind Kind); - bool SeenIdent; + bool ParseSectionSwitch(StringRef Section, unsigned Type, unsigned Flags, + SectionKind Kind); public: - ELFAsmParser() : SeenIdent(false) { - BracketExpressionsSupported = true; - } + ELFAsmParser() { BracketExpressionsSupported = true; } virtual void Initialize(MCAsmParser &Parser) { // Call the base implementation. @@ -241,7 +239,6 @@ bool ELFAsmParser::ParseSectionName(StringRef &SectionName) { } for (;;) { - StringRef Tmp; unsigned CurSize; SMLoc PrevLoc = getLexer().getLoc(); @@ -279,14 +276,17 @@ static SectionKind computeSectionKind(unsigned Flags) { return SectionKind::getDataRel(); } -static int parseSectionFlags(StringRef flagsStr) { - int flags = 0; +static unsigned parseSectionFlags(StringRef flagsStr, bool *UseLastGroup) { + unsigned flags = 0; for (unsigned i = 0; i < flagsStr.size(); i++) { switch (flagsStr[i]) { case 'a': flags |= ELF::SHF_ALLOC; break; + case 'e': + flags |= ELF::SHF_EXCLUDE; + break; case 'x': flags |= ELF::SHF_EXECINSTR; break; @@ -311,8 +311,11 @@ static int parseSectionFlags(StringRef flagsStr) { case 'G': flags |= ELF::SHF_GROUP; break; + case '?': + *UseLastGroup = true; + break; default: - return -1; + return -1U; } } @@ -352,6 +355,7 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush) { StringRef GroupName; unsigned Flags = 0; const MCExpr *Subsection = 0; + bool UseLastGroup = false; // Set the defaults first. if (SectionName == ".fini" || SectionName == ".init" || @@ -377,13 +381,16 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush) { StringRef FlagsStr = getTok().getStringContents(); Lex(); - int extraFlags = parseSectionFlags(FlagsStr); - if (extraFlags < 0) + unsigned extraFlags = parseSectionFlags(FlagsStr, &UseLastGroup); + if (extraFlags == -1U) return TokError("unknown flag"); Flags |= extraFlags; bool Mergeable = Flags & ELF::SHF_MERGE; bool Group = Flags & ELF::SHF_GROUP; + if (Group && UseLastGroup) + return TokError("Section cannot specifiy a group name while also acting " + "as a member of the last group"); if (getLexer().isNot(AsmToken::Comma)) { if (Mergeable) @@ -392,10 +399,13 @@ bool ELFAsmParser::ParseSectionArguments(bool IsPush) { return TokError("Group section must specify the type"); } else { Lex(); - if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At)) - return TokError("expected '@' or '%' before type"); + if (getLexer().is(AsmToken::At) || getLexer().is(AsmToken::Percent) || + getLexer().is(AsmToken::String)) { + if (!getLexer().is(AsmToken::String)) + Lex(); + } else + return TokError("expected '@', '%' or \"\""); - Lex(); if (getParser().parseIdentifier(TypeName)) return TokError("expected identifier in directive"); @@ -461,6 +471,16 @@ EndStmt: return TokError("unknown section type"); } + if (UseLastGroup) { + MCSectionSubPair CurrentSection = getStreamer().getCurrentSection(); + if (const MCSectionELF *Section = + cast_or_null(CurrentSection.first)) + if (const MCSymbol *Group = Section->getGroup()) { + GroupName = Group->getName(); + Flags |= ELF::SHF_GROUP; + } + } + SectionKind Kind = computeSectionKind(Flags); getStreamer().SwitchSection(getContext().getELFSection(SectionName, Type, Flags, Kind, Size, @@ -479,7 +499,11 @@ bool ELFAsmParser::ParseDirectivePrevious(StringRef DirName, SMLoc) { } /// ParseDirectiveELFType +/// ::= .type identifier , STT_ +/// ::= .type identifier , #attribute /// ::= .type identifier , @attribute +/// ::= .type identifier , %attribute +/// ::= .type identifier , "attribute" bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) { StringRef Name; if (getParser().parseIdentifier(Name)) @@ -492,26 +516,42 @@ bool ELFAsmParser::ParseDirectiveType(StringRef, SMLoc) { return TokError("unexpected token in '.type' directive"); Lex(); - if (getLexer().isNot(AsmToken::Percent) && getLexer().isNot(AsmToken::At)) - return TokError("expected '@' or '%' before type"); - Lex(); - StringRef Type; SMLoc TypeLoc; + MCSymbolAttr Attr; + if (getLexer().is(AsmToken::Identifier)) { + TypeLoc = getLexer().getLoc(); + if (getParser().parseIdentifier(Type)) + return TokError("expected symbol type in directive"); + Attr = StringSwitch(Type) + .Case("STT_FUNC", MCSA_ELF_TypeFunction) + .Case("STT_OBJECT", MCSA_ELF_TypeObject) + .Case("STT_TLS", MCSA_ELF_TypeTLS) + .Case("STT_COMMON", MCSA_ELF_TypeCommon) + .Case("STT_NOTYPE", MCSA_ELF_TypeNoType) + .Case("STT_GNU_IFUNC", MCSA_ELF_TypeIndFunction) + .Default(MCSA_Invalid); + } else if (getLexer().is(AsmToken::Hash) || getLexer().is(AsmToken::At) || + getLexer().is(AsmToken::Percent) || + getLexer().is(AsmToken::String)) { + if (!getLexer().is(AsmToken::String)) + Lex(); - TypeLoc = getLexer().getLoc(); - if (getParser().parseIdentifier(Type)) - return TokError("expected symbol type in directive"); - - MCSymbolAttr Attr = StringSwitch(Type) - .Case("function", MCSA_ELF_TypeFunction) - .Case("object", MCSA_ELF_TypeObject) - .Case("tls_object", MCSA_ELF_TypeTLS) - .Case("common", MCSA_ELF_TypeCommon) - .Case("notype", MCSA_ELF_TypeNoType) - .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject) - .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction) - .Default(MCSA_Invalid); + TypeLoc = getLexer().getLoc(); + if (getParser().parseIdentifier(Type)) + return TokError("expected symbol type in directive"); + Attr = StringSwitch(Type) + .Case("function", MCSA_ELF_TypeFunction) + .Case("object", MCSA_ELF_TypeObject) + .Case("tls_object", MCSA_ELF_TypeTLS) + .Case("common", MCSA_ELF_TypeCommon) + .Case("notype", MCSA_ELF_TypeNoType) + .Case("gnu_unique_object", MCSA_ELF_TypeGnuUniqueObject) + .Case("gnu_indirect_function", MCSA_ELF_TypeIndFunction) + .Default(MCSA_Invalid); + } else + return TokError("expected STT_, '#', '@', " + "'%' or \"\""); if (Attr == MCSA_Invalid) return Error(TypeLoc, "unsupported attribute in '.type' directive"); @@ -536,22 +576,7 @@ bool ELFAsmParser::ParseDirectiveIdent(StringRef, SMLoc) { Lex(); - const MCSection *Comment = - getContext().getELFSection(".comment", ELF::SHT_PROGBITS, - ELF::SHF_MERGE | - ELF::SHF_STRINGS, - SectionKind::getReadOnly(), - 1, ""); - - getStreamer().PushSection(); - getStreamer().SwitchSection(Comment); - if (!SeenIdent) { - getStreamer().EmitIntValue(0, 1); - SeenIdent = true; - } - getStreamer().EmitBytes(Data); - getStreamer().EmitIntValue(0, 1); - getStreamer().PopSection(); + getStreamer().EmitIdent(Data); return false; } diff --git a/contrib/llvm/lib/MC/MCPureStreamer.cpp b/contrib/llvm/lib/MC/MCPureStreamer.cpp index 8ae724f..f7bf002 100644 --- a/contrib/llvm/lib/MC/MCPureStreamer.cpp +++ b/contrib/llvm/lib/MC/MCPureStreamer.cpp @@ -29,7 +29,7 @@ private: public: MCPureStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, MCCodeEmitter *Emitter) - : MCObjectStreamer(SK_PureStreamer, Context, TAB, OS, Emitter) {} + : MCObjectStreamer(Context, 0, TAB, OS, Emitter) {} /// @name MCStreamer Interface /// @{ @@ -40,7 +40,7 @@ public: virtual void EmitDebugLabel(MCSymbol *Symbol); virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0, uint64_t Size = 0, unsigned ByteAlignment = 0); - virtual void EmitBytes(StringRef Data, unsigned AddrSpace); + virtual void EmitBytes(StringRef Data); virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0, unsigned ValueSize = 1, unsigned MaxBytesToEmit = 0); @@ -51,8 +51,9 @@ public: virtual void FinishImpl(); - virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { + virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { report_fatal_error("unsupported directive in pure streamer"); + return false; } virtual void EmitAssemblerFlag(MCAssemblerFlag Flag) { report_fatal_error("unsupported directive in pure streamer"); @@ -93,16 +94,13 @@ public: virtual void EmitFileDirective(StringRef Filename) { report_fatal_error("unsupported directive in pure streamer"); } + virtual void EmitIdent(StringRef IdentString) { + report_fatal_error("unsupported directive in pure streamer"); + } virtual bool EmitDwarfFileDirective(unsigned FileNo, StringRef Directory, StringRef Filename, unsigned CUID = 0) { report_fatal_error("unsupported directive in pure streamer"); } - - /// @} - - static bool classof(const MCStreamer *S) { - return S->getKind() == SK_PureStreamer; - } }; } // end anonymous namespace. @@ -120,7 +118,7 @@ void MCPureStreamer::EmitLabel(MCSymbol *Symbol) { assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(getCurrentSection().first && "Cannot emit before setting section!"); - Symbol->setSection(*getCurrentSection().first); + AssignSection(Symbol, getCurrentSection().first); MCSymbolData &SD = getAssembler().getOrCreateSymbolData(*Symbol); @@ -149,7 +147,7 @@ void MCPureStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol, report_fatal_error("not yet implemented in pure streamer"); } -void MCPureStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) { +void MCPureStreamer::EmitBytes(StringRef Data) { // TODO: This is exactly the same as WinCOFFStreamer. Consider merging into // MCObjectStreamer. getOrCreateDataFragment()->getContents().append(Data.begin(), Data.end()); diff --git a/contrib/llvm/lib/MC/MCRegisterInfo.cpp b/contrib/llvm/lib/MC/MCRegisterInfo.cpp index 5c71106..ce79cd5 100644 --- a/contrib/llvm/lib/MC/MCRegisterInfo.cpp +++ b/contrib/llvm/lib/MC/MCRegisterInfo.cpp @@ -46,6 +46,18 @@ unsigned MCRegisterInfo::getSubRegIndex(unsigned Reg, unsigned SubReg) const { return 0; } +unsigned MCRegisterInfo::getSubRegIdxSize(unsigned Idx) const { + assert(Idx && Idx < getNumSubRegIndices() && + "This is not a subregister index"); + return SubRegIdxRanges[Idx].Size; +} + +unsigned MCRegisterInfo::getSubRegIdxOffset(unsigned Idx) const { + assert(Idx && Idx < getNumSubRegIndices() && + "This is not a subregister index"); + return SubRegIdxRanges[Idx].Offset; +} + int MCRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const { const DwarfLLVMRegPair *M = isEH ? EHL2DwarfRegs : L2DwarfRegs; unsigned Size = isEH ? EHL2DwarfRegsSize : L2DwarfRegsSize; diff --git a/contrib/llvm/lib/MC/MCRelocationInfo.cpp b/contrib/llvm/lib/MC/MCRelocationInfo.cpp new file mode 100644 index 0000000..53c48ded --- /dev/null +++ b/contrib/llvm/lib/MC/MCRelocationInfo.cpp @@ -0,0 +1,39 @@ +//==-- lib/MC/MCRelocationInfo.cpp -------------------------------*- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCRelocationInfo.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/TargetRegistry.h" +#include "llvm-c/Disassembler.h" + +using namespace llvm; + +MCRelocationInfo::MCRelocationInfo(MCContext &Ctx) + : Ctx(Ctx) { +} + +MCRelocationInfo::~MCRelocationInfo() { +} + +const MCExpr * +MCRelocationInfo::createExprForRelocation(object::RelocationRef Rel) { + return 0; +} + +const MCExpr * +MCRelocationInfo::createExprForCAPIVariantKind(const MCExpr *SubExpr, + unsigned VariantKind) { + if (VariantKind != LLVMDisassembler_VariantKind_None) + return 0; + return SubExpr; +} + +MCRelocationInfo *llvm::createMCRelocationInfo(StringRef TT, MCContext &Ctx) { + return new MCRelocationInfo(Ctx); +} diff --git a/contrib/llvm/lib/MC/MCSectionCOFF.cpp b/contrib/llvm/lib/MC/MCSectionCOFF.cpp index 6cedf06..64aa2c5 100644 --- a/contrib/llvm/lib/MC/MCSectionCOFF.cpp +++ b/contrib/llvm/lib/MC/MCSectionCOFF.cpp @@ -28,6 +28,17 @@ bool MCSectionCOFF::ShouldOmitSectionDirective(StringRef Name, return false; } +void MCSectionCOFF::setSelection(int Selection, + const MCSectionCOFF *Assoc) const { + assert(Selection != 0 && "invalid COMDAT selection type"); + assert((Selection == COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) == + (Assoc != 0) && + "associative COMDAT section must have an associated section"); + this->Selection = Selection; + this->Assoc = Assoc; + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; +} + void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS, const MCExpr *Subsection) const { @@ -63,12 +74,15 @@ void MCSectionCOFF::PrintSwitchToSection(const MCAsmInfo &MAI, case COFF::IMAGE_COMDAT_SELECT_EXACT_MATCH: OS << "\t.linkonce same_contents\n"; break; - //NOTE: as of binutils 2.20, there is no way to specifiy select largest - // with the .linkonce directive. For now, we treat it as an invalid - // comdat selection value. + case COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE: + OS << "\t.linkonce associative " << Assoc->getSectionName() << "\n"; + break; case COFF::IMAGE_COMDAT_SELECT_LARGEST: - // OS << "\t.linkonce largest\n"; - // break; + OS << "\t.linkonce largest\n"; + break; + case COFF::IMAGE_COMDAT_SELECT_NEWEST: + OS << "\t.linkonce newest\n"; + break; default: assert (0 && "unsupported COFF selection type"); break; diff --git a/contrib/llvm/lib/MC/MCSectionELF.cpp b/contrib/llvm/lib/MC/MCSectionELF.cpp index bf1a984..09eb3e7 100644 --- a/contrib/llvm/lib/MC/MCSectionELF.cpp +++ b/contrib/llvm/lib/MC/MCSectionELF.cpp @@ -32,6 +32,29 @@ bool MCSectionELF::ShouldOmitSectionDirective(StringRef Name, return false; } +static void printName(raw_ostream &OS, StringRef Name) { + if (Name.find_first_not_of("0123456789_." + "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == Name.npos) { + OS << Name; + return; + } + OS << '"'; + for (const char *B = Name.begin(), *E = Name.end(); B < E; ++B) { + if (*B == '"') // Unquoted " + OS << "\\\""; + else if (*B != '\\') // Neither " or backslash + OS << *B; + else if (B + 1 == E) // Trailing backslash + OS << "\\\\"; + else { + OS << B[0] << B[1]; // Quoted character + ++B; + } + } + OS << '"'; +} + void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, raw_ostream &OS, const MCExpr *Subsection) const { @@ -44,27 +67,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, return; } - StringRef name = getSectionName(); - if (name.find_first_not_of("0123456789_." - "abcdefghijklmnopqrstuvwxyz" - "ABCDEFGHIJKLMNOPQRSTUVWXYZ") == name.npos) { - OS << "\t.section\t" << name; - } else { - OS << "\t.section\t\""; - for (const char *b = name.begin(), *e = name.end(); b < e; ++b) { - if (*b == '"') // Unquoted " - OS << "\\\""; - else if (*b != '\\') // Neither " or backslash - OS << *b; - else if (b + 1 == e) // Trailing backslash - OS << "\\\\"; - else { - OS << b[0] << b[1]; // Quoted character - ++b; - } - } - OS << '"'; - } + OS << "\t.section\t"; + printName(OS, getSectionName()); // Handle the weird solaris syntax if desired. if (MAI.usesSunStyleELFSectionSwitchSyntax() && @@ -75,6 +79,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << ",#execinstr"; if (Flags & ELF::SHF_WRITE) OS << ",#write"; + if (Flags & ELF::SHF_EXCLUDE) + OS << ",#exclude"; if (Flags & ELF::SHF_TLS) OS << ",#tls"; OS << '\n'; @@ -84,6 +90,8 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << ",\""; if (Flags & ELF::SHF_ALLOC) OS << 'a'; + if (Flags & ELF::SHF_EXCLUDE) + OS << 'e'; if (Flags & ELF::SHF_EXECINSTR) OS << 'x'; if (Flags & ELF::SHF_GROUP) @@ -131,8 +139,11 @@ void MCSectionELF::PrintSwitchToSection(const MCAsmInfo &MAI, OS << "," << EntrySize; } - if (Flags & ELF::SHF_GROUP) - OS << "," << Group->getName() << ",comdat"; + if (Flags & ELF::SHF_GROUP) { + OS << ","; + printName(OS, Group->getName()); + OS << ",comdat"; + } OS << '\n'; if (Subsection) diff --git a/contrib/llvm/lib/MC/MCStreamer.cpp b/contrib/llvm/lib/MC/MCStreamer.cpp index 8f1895e..2e1d69b 100644 --- a/contrib/llvm/lib/MC/MCStreamer.cpp +++ b/contrib/llvm/lib/MC/MCStreamer.cpp @@ -10,6 +10,7 @@ #include "llvm/MC/MCStreamer.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Twine.h" +#include "llvm/MC/MCAsmBackend.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -21,10 +22,17 @@ #include using namespace llvm; -MCStreamer::MCStreamer(StreamerKind Kind, MCContext &Ctx) - : Kind(Kind), Context(Ctx), EmitEHFrame(true), EmitDebugFrame(false), - CurrentW64UnwindInfo(0), LastSymbol(0), AutoInitSections(false) { +// Pin the vtables to this file. +MCTargetStreamer::~MCTargetStreamer() {} +void ARMTargetStreamer::anchor() {} + +MCStreamer::MCStreamer(MCContext &Ctx, MCTargetStreamer *TargetStreamer) + : Context(Ctx), TargetStreamer(TargetStreamer), EmitEHFrame(true), + EmitDebugFrame(false), CurrentW64UnwindInfo(0), LastSymbol(0), + AutoInitSections(false) { SectionStack.push_back(std::pair()); + if (TargetStreamer) + TargetStreamer->setStreamer(this); } MCStreamer::~MCStreamer() { @@ -58,7 +66,7 @@ const MCExpr *MCStreamer::BuildSymbolDiff(MCContext &Context, } const MCExpr *MCStreamer::ForceExpAbs(const MCExpr* Expr) { - if (Context.getAsmInfo().hasAggressiveSymbolFolding() || + if (Context.getAsmInfo()->hasAggressiveSymbolFolding() || isa(Expr)) return Expr; @@ -72,6 +80,13 @@ raw_ostream &MCStreamer::GetCommentOS() { return nulls(); } +void MCStreamer::generateCompactUnwindEncodings(MCAsmBackend *MAB) { + for (std::vector::iterator I = FrameInfos.begin(), + E = FrameInfos.end(); I != E; ++I) + I->CompactUnwindEncoding = + (MAB ? MAB->generateCompactUnwindEncoding(I->Instructions) : 0); +} + void MCStreamer::EmitDwarfSetLineAddr(int64_t LineDelta, const MCSymbol *Label, int PointerSize) { // emit the sequence to set the address @@ -86,55 +101,49 @@ void MCStreamer::EmitDwarfSetLineAddr(int64_t LineDelta, /// EmitIntValue - Special case of EmitValue that avoids the client having to /// pass in a MCExpr for constant integers. -void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size, - unsigned AddrSpace) { +void MCStreamer::EmitIntValue(uint64_t Value, unsigned Size) { assert(Size <= 8 && "Invalid size"); assert((isUIntN(8 * Size, Value) || isIntN(8 * Size, Value)) && "Invalid size"); char buf[8]; - const bool isLittleEndian = Context.getAsmInfo().isLittleEndian(); + const bool isLittleEndian = Context.getAsmInfo()->isLittleEndian(); for (unsigned i = 0; i != Size; ++i) { unsigned index = isLittleEndian ? i : (Size - i - 1); buf[i] = uint8_t(Value >> (index * 8)); } - EmitBytes(StringRef(buf, Size), AddrSpace); + EmitBytes(StringRef(buf, Size)); } /// EmitULEB128Value - Special case of EmitULEB128Value that avoids the /// client having to pass in a MCExpr for constant integers. -void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned Padding, - unsigned AddrSpace) { +void MCStreamer::EmitULEB128IntValue(uint64_t Value, unsigned Padding) { SmallString<128> Tmp; raw_svector_ostream OSE(Tmp); encodeULEB128(Value, OSE, Padding); - EmitBytes(OSE.str(), AddrSpace); + EmitBytes(OSE.str()); } /// EmitSLEB128Value - Special case of EmitSLEB128Value that avoids the /// client having to pass in a MCExpr for constant integers. -void MCStreamer::EmitSLEB128IntValue(int64_t Value, unsigned AddrSpace) { +void MCStreamer::EmitSLEB128IntValue(int64_t Value) { SmallString<128> Tmp; raw_svector_ostream OSE(Tmp); encodeSLEB128(Value, OSE); - EmitBytes(OSE.str(), AddrSpace); + EmitBytes(OSE.str()); } -void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { +void MCStreamer::EmitAbsValue(const MCExpr *Value, unsigned Size) { const MCExpr *ABS = ForceExpAbs(Value); - EmitValue(ABS, Size, AddrSpace); + EmitValue(ABS, Size); } -void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { - EmitValueImpl(Value, Size, AddrSpace); +void MCStreamer::EmitValue(const MCExpr *Value, unsigned Size) { + EmitValueImpl(Value, Size); } -void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size, - unsigned AddrSpace) { - EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size, - AddrSpace); +void MCStreamer::EmitSymbolValue(const MCSymbol *Sym, unsigned Size) { + EmitValueImpl(MCSymbolRefExpr::Create(Sym, getContext()), Size); } void MCStreamer::EmitGPRel64Value(const MCExpr *Value) { @@ -147,11 +156,15 @@ void MCStreamer::EmitGPRel32Value(const MCExpr *Value) { /// EmitFill - Emit NumBytes bytes worth of the value specified by /// FillValue. This implements directives such as '.space'. -void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue, - unsigned AddrSpace) { +void MCStreamer::EmitFill(uint64_t NumBytes, uint8_t FillValue) { const MCExpr *E = MCConstantExpr::Create(FillValue, getContext()); for (uint64_t i = 0, e = NumBytes; i != e; ++i) - EmitValue(E, 1, AddrSpace); + EmitValue(E, 1); +} + +/// The implementation in this class just redirects to EmitFill. +void MCStreamer::EmitZeros(uint64_t NumBytes) { + EmitFill(NumBytes, 0); } bool MCStreamer::EmitDwarfFileDirective(unsigned FileNo, @@ -185,17 +198,28 @@ void MCStreamer::EmitEHSymAttributes(const MCSymbol *Symbol, MCSymbol *EHSymbol) { } +void MCStreamer::AssignSection(MCSymbol *Symbol, const MCSection *Section) { + if (Section) + Symbol->setSection(*Section); + else + Symbol->setUndefined(); + + // As we emit symbols into a section, track the order so that they can + // be sorted upon later. Zero is reserved to mean 'unemitted'. + SymbolOrdering[Symbol] = 1 + SymbolOrdering.size(); +} + void MCStreamer::EmitLabel(MCSymbol *Symbol) { assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(getCurrentSection().first && "Cannot emit before setting section!"); - Symbol->setSection(*getCurrentSection().first); + AssignSection(Symbol, getCurrentSection().first); LastSymbol = Symbol; } void MCStreamer::EmitDebugLabel(MCSymbol *Symbol) { assert(!Symbol->isVariable() && "Cannot emit a variable symbol!"); assert(getCurrentSection().first && "Cannot emit before setting section!"); - Symbol->setSection(*getCurrentSection().first); + AssignSection(Symbol, getCurrentSection().first); LastSymbol = Symbol; } @@ -229,7 +253,7 @@ void MCStreamer::RecordProcStart(MCDwarfFrameInfo &Frame) { Frame.Function = LastSymbol; // If the function is externally visible, we need to create a local // symbol to avoid relocations. - StringRef Prefix = getContext().getAsmInfo().getPrivateGlobalPrefix(); + StringRef Prefix = getContext().getAsmInfo()->getPrivateGlobalPrefix(); if (LastSymbol && LastSymbol->getName().startswith(Prefix)) { Frame.Begin = LastSymbol; } else { @@ -382,6 +406,14 @@ void MCStreamer::EmitCFIRegister(int64_t Register1, int64_t Register2) { CurFrame->Instructions.push_back(Instruction); } +void MCStreamer::EmitCFIWindowSave() { + MCSymbol *Label = EmitCFICommon(); + MCCFIInstruction Instruction = + MCCFIInstruction::createWindowSave(Label); + MCDwarfFrameInfo *CurFrame = getCurrentFrameInfo(); + CurFrame->Instructions.push_back(Instruction); +} + void MCStreamer::setCurrentW64UnwindInfo(MCWin64EHUnwindInfo *Frame) { W64UnwindInfos.push_back(Frame); CurrentW64UnwindInfo = W64UnwindInfos.back(); @@ -472,7 +504,9 @@ void MCStreamer::EmitWin64EHSetFrame(unsigned Register, unsigned Offset) { report_fatal_error("Frame register and offset already specified!"); if (Offset & 0x0F) report_fatal_error("Misaligned frame pointer offset!"); - MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, 0, Register, Offset); + MCSymbol *Label = getContext().CreateTempSymbol(); + MCWin64EHInstruction Inst(Win64EH::UOP_SetFPReg, Label, Register, Offset); + EmitLabel(Label); CurFrame->LastFrameInst = CurFrame->Instructions.size(); CurFrame->Instructions.push_back(Inst); } @@ -536,54 +570,10 @@ void MCStreamer::EmitCOFFSecRel32(MCSymbol const *Symbol) { llvm_unreachable("This file format doesn't support this directive"); } -void MCStreamer::EmitFnStart() { - errs() << "Not implemented yet\n"; - abort(); -} - -void MCStreamer::EmitFnEnd() { - errs() << "Not implemented yet\n"; - abort(); -} - -void MCStreamer::EmitCantUnwind() { - errs() << "Not implemented yet\n"; - abort(); -} - -void MCStreamer::EmitHandlerData() { - errs() << "Not implemented yet\n"; - abort(); -} - -void MCStreamer::EmitPersonality(const MCSymbol *Personality) { - errs() << "Not implemented yet\n"; - abort(); -} - -void MCStreamer::EmitSetFP(unsigned FpReg, unsigned SpReg, int64_t Offset) { - errs() << "Not implemented yet\n"; - abort(); -} - -void MCStreamer::EmitPad(int64_t Offset) { - errs() << "Not implemented yet\n"; - abort(); -} - -void MCStreamer::EmitRegSave(const SmallVectorImpl &RegList, bool) { - errs() << "Not implemented yet\n"; - abort(); -} - -void MCStreamer::EmitTCEntry(const MCSymbol &S) { - llvm_unreachable("Unsupported method"); -} - /// EmitRawText - If this file is backed by an assembly streamer, this dumps /// the specified string in the output .s file. This capability is /// indicated by the hasRawTextSupport() predicate. -void MCStreamer::EmitRawText(StringRef String) { +void MCStreamer::EmitRawTextImpl(StringRef String) { errs() << "EmitRawText called on an MCStreamer that doesn't support it, " " something must not be fully mc'ized\n"; abort(); @@ -591,19 +581,18 @@ void MCStreamer::EmitRawText(StringRef String) { void MCStreamer::EmitRawText(const Twine &T) { SmallString<128> Str; - T.toVector(Str); - EmitRawText(Str.str()); + EmitRawTextImpl(T.toStringRef(Str)); } -void MCStreamer::EmitFrames(bool usingCFI) { +void MCStreamer::EmitFrames(MCAsmBackend *MAB, bool usingCFI) { if (!getNumFrameInfos()) return; if (EmitEHFrame) - MCDwarfFrameEmitter::Emit(*this, usingCFI, true); + MCDwarfFrameEmitter::Emit(*this, MAB, usingCFI, true); if (EmitDebugFrame) - MCDwarfFrameEmitter::Emit(*this, usingCFI, false); + MCDwarfFrameEmitter::Emit(*this, MAB, usingCFI, false); } void MCStreamer::EmitW64Tables() { diff --git a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp index f18828d..8d8e290 100644 --- a/contrib/llvm/lib/MC/MCSubtargetInfo.cpp +++ b/contrib/llvm/lib/MC/MCSubtargetInfo.cpp @@ -27,6 +27,11 @@ MCSubtargetInfo::InitMCProcessorInfo(StringRef CPU, StringRef FS) { FeatureBits = Features.getFeatureBits(CPU, ProcDesc, NumProcs, ProcFeatures, NumFeatures); + InitCPUSchedModel(CPU); +} + +void +MCSubtargetInfo::InitCPUSchedModel(StringRef CPU) { if (!CPU.empty()) CPUSchedModel = getSchedModelForCPU(CPU); else @@ -91,10 +96,8 @@ MCSubtargetInfo::getSchedModelForCPU(StringRef CPU) const { #endif // Find entry - SubtargetInfoKV KV; - KV.Key = CPU.data(); const SubtargetInfoKV *Found = - std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, KV); + std::lower_bound(ProcSchedModels, ProcSchedModels+NumProcs, CPU); if (Found == ProcSchedModels+NumProcs || StringRef(Found->Key) != CPU) { errs() << "'" << CPU << "' is not a recognized processor for this target" diff --git a/contrib/llvm/lib/MC/MCSymbol.cpp b/contrib/llvm/lib/MC/MCSymbol.cpp index b973c57..2416525 100644 --- a/contrib/llvm/lib/MC/MCSymbol.cpp +++ b/contrib/llvm/lib/MC/MCSymbol.cpp @@ -68,12 +68,23 @@ void MCSymbol::print(raw_ostream &OS) const { // The name for this MCSymbol is required to be a valid target name. However, // some targets support quoting names with funny characters. If the name // contains a funny character, then print it quoted. - if (!NameNeedsQuoting(getName())) { - OS << getName(); + StringRef Name = getName(); + if (!NameNeedsQuoting(Name)) { + OS << Name; return; } - OS << '"' << getName() << '"'; + OS << '"'; + for (unsigned I = 0, E = Name.size(); I != E; ++I) { + char C = Name[I]; + if (C == '\n') + OS << "\\n"; + else if (C == '"') + OS << "\\\""; + else + OS << C; + } + OS << '"'; } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) diff --git a/contrib/llvm/lib/MC/MCSymbolizer.cpp b/contrib/llvm/lib/MC/MCSymbolizer.cpp new file mode 100644 index 0000000..1020b74 --- /dev/null +++ b/contrib/llvm/lib/MC/MCSymbolizer.cpp @@ -0,0 +1,20 @@ +//===-- llvm/MC/MCSymbolizer.cpp - MCSymbolizer class -----------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/MC/MCSymbolizer.h" +#include "llvm/MC/MCRelocationInfo.h" + +using namespace llvm; + +MCSymbolizer::MCSymbolizer(MCContext &Ctx, OwningPtr &RelInfo) + : Ctx(Ctx), RelInfo(RelInfo.take()) { +} + +MCSymbolizer::~MCSymbolizer() { +} diff --git a/contrib/llvm/lib/MC/MCWin64EH.cpp b/contrib/llvm/lib/MC/MCWin64EH.cpp index c5b637c..b8b07d3 100644 --- a/contrib/llvm/lib/MC/MCWin64EH.cpp +++ b/contrib/llvm/lib/MC/MCWin64EH.cpp @@ -64,7 +64,7 @@ static void EmitAbsDifference(MCStreamer &streamer, MCSymbol *lhs, static void EmitUnwindCode(MCStreamer &streamer, MCSymbol *begin, MCWin64EHInstruction &inst) { - uint8_t b1, b2; + uint8_t b2; uint16_t w; b2 = (inst.getOperation() & 0x0F); switch (inst.getOperation()) { @@ -93,8 +93,7 @@ static void EmitUnwindCode(MCStreamer &streamer, MCSymbol *begin, streamer.EmitIntValue(b2, 1); break; case Win64EH::UOP_SetFPReg: - b1 = inst.getOffset() & 0xF0; - streamer.EmitIntValue(b1, 1); + EmitAbsDifference(streamer, inst.getLabel(), begin); streamer.EmitIntValue(b2, 1); break; case Win64EH::UOP_SaveNonVol: @@ -129,14 +128,29 @@ static void EmitUnwindCode(MCStreamer &streamer, MCSymbol *begin, } } +static void EmitSymbolRefWithOfs(MCStreamer &streamer, + const MCSymbol *Base, + const MCSymbol *Other) { + MCContext &Context = streamer.getContext(); + const MCSymbolRefExpr *BaseRef = MCSymbolRefExpr::Create(Base, Context); + const MCSymbolRefExpr *OtherRef = MCSymbolRefExpr::Create(Other, Context); + const MCExpr *Ofs = MCBinaryExpr::CreateSub(OtherRef, BaseRef, Context); + const MCSymbolRefExpr *BaseRefRel = MCSymbolRefExpr::Create(Base, + MCSymbolRefExpr::VK_COFF_IMGREL32, + Context); + streamer.EmitValue(MCBinaryExpr::CreateAdd(BaseRefRel, Ofs, Context), 4); +} + static void EmitRuntimeFunction(MCStreamer &streamer, const MCWin64EHUnwindInfo *info) { MCContext &context = streamer.getContext(); streamer.EmitValueToAlignment(4); - streamer.EmitValue(MCSymbolRefExpr::Create(info->Begin, context), 4); - streamer.EmitValue(MCSymbolRefExpr::Create(info->End, context), 4); - streamer.EmitValue(MCSymbolRefExpr::Create(info->Symbol, context), 4); + EmitSymbolRefWithOfs(streamer, info->Function, info->Begin); + EmitSymbolRefWithOfs(streamer, info->Function, info->End); + streamer.EmitValue(MCSymbolRefExpr::Create(info->Symbol, + MCSymbolRefExpr::VK_COFF_IMGREL32, + context), 4); } static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) { @@ -145,11 +159,11 @@ static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) { MCContext &context = streamer.getContext(); streamer.EmitValueToAlignment(4); - // Upper 3 bits are the version number (currently 1). - uint8_t flags = 0x01; info->Symbol = context.CreateTempSymbol(); streamer.EmitLabel(info->Symbol); + // Upper 3 bits are the version number (currently 1). + uint8_t flags = 0x01; if (info->ChainedParent) flags |= Win64EH::UNW_ChainInfo << 3; else { @@ -185,20 +199,26 @@ static void EmitUnwindInfo(MCStreamer &streamer, MCWin64EHUnwindInfo *info) { EmitUnwindCode(streamer, info->Begin, inst); } + // For alignment purposes, the instruction array will always have an even + // number of entries, with the final entry potentially unused (in which case + // the array will be one longer than indicated by the count of unwind codes + // field). + if (numCodes & 1) { + streamer.EmitIntValue(0, 2); + } + if (flags & (Win64EH::UNW_ChainInfo << 3)) EmitRuntimeFunction(streamer, info->ChainedParent); else if (flags & ((Win64EH::UNW_TerminateHandler|Win64EH::UNW_ExceptionHandler) << 3)) - streamer.EmitValue(MCSymbolRefExpr::Create(info->ExceptionHandler, context), - 4); - else if (numCodes < 2) { + streamer.EmitValue(MCSymbolRefExpr::Create(info->ExceptionHandler, + MCSymbolRefExpr::VK_COFF_IMGREL32, + context), 4); + else if (numCodes == 0) { // The minimum size of an UNWIND_INFO struct is 8 bytes. If we're not // a chained unwind info, if there is no handler, and if there are fewer // than 2 slots used in the unwind code array, we have to pad to 8 bytes. - if (numCodes == 1) - streamer.EmitIntValue(0, 2); - else - streamer.EmitIntValue(0, 4); + streamer.EmitIntValue(0, 4); } } diff --git a/contrib/llvm/lib/MC/MachObjectWriter.cpp b/contrib/llvm/lib/MC/MachObjectWriter.cpp index a5ba3c3..8234aff 100644 --- a/contrib/llvm/lib/MC/MachObjectWriter.cpp +++ b/contrib/llvm/lib/MC/MachObjectWriter.cpp @@ -20,12 +20,11 @@ #include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCValue.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MachO.h" #include using namespace llvm; -using namespace llvm::object; void MachObjectWriter::reset() { Relocations.clear(); @@ -128,7 +127,7 @@ void MachObjectWriter::WriteHeader(unsigned NumLoadCommands, uint32_t Flags = 0; if (SubsectionsViaSymbols) - Flags |= macho::HF_SubsectionsViaSymbols; + Flags |= MachO::MH_SUBSECTIONS_VIA_SYMBOLS; // struct mach_header (28 bytes) or // struct mach_header_64 (32 bytes) @@ -136,12 +135,12 @@ void MachObjectWriter::WriteHeader(unsigned NumLoadCommands, uint64_t Start = OS.tell(); (void) Start; - Write32(is64Bit() ? macho::HM_Object64 : macho::HM_Object32); + Write32(is64Bit() ? MachO::MH_MAGIC_64 : MachO::MH_MAGIC); Write32(TargetObjectWriter->getCPUType()); Write32(TargetObjectWriter->getCPUSubtype()); - Write32(macho::HFT_Object); + Write32(MachO::MH_OBJECT); Write32(NumLoadCommands); Write32(LoadCommandsSize); Write32(Flags); @@ -149,7 +148,7 @@ void MachObjectWriter::WriteHeader(unsigned NumLoadCommands, Write32(0); // reserved assert(OS.tell() - Start == - (is64Bit() ? macho::Header64Size : macho::Header32Size)); + (is64Bit()?sizeof(MachO::mach_header_64): sizeof(MachO::mach_header))); } /// WriteSegmentLoadCommand - Write a segment load command. @@ -167,12 +166,12 @@ void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections, (void) Start; unsigned SegmentLoadCommandSize = - is64Bit() ? macho::SegmentLoadCommand64Size: - macho::SegmentLoadCommand32Size; - Write32(is64Bit() ? macho::LCT_Segment64 : macho::LCT_Segment); + is64Bit() ? sizeof(MachO::segment_command_64): + sizeof(MachO::segment_command); + Write32(is64Bit() ? MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT); Write32(SegmentLoadCommandSize + - NumSections * (is64Bit() ? macho::Section64Size : - macho::Section32Size)); + NumSections * (is64Bit() ? sizeof(MachO::section_64) : + sizeof(MachO::section))); WriteBytes("", 16); if (is64Bit()) { @@ -186,8 +185,10 @@ void MachObjectWriter::WriteSegmentLoadCommand(unsigned NumSections, Write32(SectionDataStartOffset); // file offset Write32(SectionDataSize); // file size } - Write32(0x7); // maxprot - Write32(0x7); // initprot + // maxprot + Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); + // initprot + Write32(MachO::VM_PROT_READ | MachO::VM_PROT_WRITE | MachO::VM_PROT_EXECUTE); Write32(NumSections); Write32(0); // flags @@ -240,8 +241,8 @@ void MachObjectWriter::WriteSection(const MCAssembler &Asm, if (is64Bit()) Write32(0); // reserved3 - assert(OS.tell() - Start == (is64Bit() ? macho::Section64Size : - macho::Section32Size)); + assert(OS.tell() - Start == (is64Bit() ? sizeof(MachO::section_64) : + sizeof(MachO::section))); } void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset, @@ -253,14 +254,14 @@ void MachObjectWriter::WriteSymtabLoadCommand(uint32_t SymbolOffset, uint64_t Start = OS.tell(); (void) Start; - Write32(macho::LCT_Symtab); - Write32(macho::SymtabLoadCommandSize); + Write32(MachO::LC_SYMTAB); + Write32(sizeof(MachO::symtab_command)); Write32(SymbolOffset); Write32(NumSymbols); Write32(StringTableOffset); Write32(StringTableSize); - assert(OS.tell() - Start == macho::SymtabLoadCommandSize); + assert(OS.tell() - Start == sizeof(MachO::symtab_command)); } void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, @@ -276,8 +277,8 @@ void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, uint64_t Start = OS.tell(); (void) Start; - Write32(macho::LCT_Dysymtab); - Write32(macho::DysymtabLoadCommandSize); + Write32(MachO::LC_DYSYMTAB); + Write32(sizeof(MachO::dysymtab_command)); Write32(FirstLocalSymbol); Write32(NumLocalSymbols); Write32(FirstExternalSymbol); @@ -297,7 +298,7 @@ void MachObjectWriter::WriteDysymtabLoadCommand(uint32_t FirstLocalSymbol, Write32(0); // locreloff Write32(0); // nlocrel - assert(OS.tell() - Start == macho::DysymtabLoadCommandSize); + assert(OS.tell() - Start == sizeof(MachO::dysymtab_command)); } void MachObjectWriter::WriteNlist(MachSymbolData &MSD, @@ -312,20 +313,20 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD, // // FIXME: Are the prebound or indirect fields possible here? if (Symbol.isUndefined()) - Type = macho::STT_Undefined; + Type = MachO::N_UNDF; else if (Symbol.isAbsolute()) - Type = macho::STT_Absolute; + Type = MachO::N_ABS; else - Type = macho::STT_Section; + Type = MachO::N_SECT; // FIXME: Set STAB bits. if (Data.isPrivateExtern()) - Type |= macho::STF_PrivateExtern; + Type |= MachO::N_PEXT; // Set external bit. if (Data.isExternal() || Symbol.isUndefined()) - Type |= macho::STF_External; + Type |= MachO::N_EXT; // Compute the symbol address. if (Symbol.isDefined()) { @@ -341,7 +342,8 @@ void MachObjectWriter::WriteNlist(MachSymbolData &MSD, assert((1U << Log2Size) == Align && "Invalid 'common' alignment!"); if (Log2Size > 15) report_fatal_error("invalid 'common' alignment '" + - Twine(Align) + "'"); + Twine(Align) + "' for '" + Symbol.getName() + "'", + false); // FIXME: Keep this mask with the SymbolFlags enumeration. Flags = (Flags & 0xF0FF) | (Log2Size << 8); } @@ -369,17 +371,17 @@ void MachObjectWriter::WriteLinkeditLoadCommand(uint32_t Type, (void) Start; Write32(Type); - Write32(macho::LinkeditLoadCommandSize); + Write32(sizeof(MachO::linkedit_data_command)); Write32(DataOffset); Write32(DataSize); - assert(OS.tell() - Start == macho::LinkeditLoadCommandSize); + assert(OS.tell() - Start == sizeof(MachO::linkedit_data_command)); } static unsigned ComputeLinkerOptionsLoadCommandSize( const std::vector &Options, bool is64Bit) { - unsigned Size = sizeof(macho::LinkerOptionsLoadCommand); + unsigned Size = sizeof(MachO::linker_options_command); for (unsigned i = 0, e = Options.size(); i != e; ++i) Size += Options[i].size() + 1; return RoundUpToAlignment(Size, is64Bit ? 8 : 4); @@ -392,10 +394,10 @@ void MachObjectWriter::WriteLinkerOptionsLoadCommand( uint64_t Start = OS.tell(); (void) Start; - Write32(macho::LCT_LinkerOptions); + Write32(MachO::LC_LINKER_OPTIONS); Write32(Size); Write32(Options.size()); - uint64_t BytesWritten = sizeof(macho::LinkerOptionsLoadCommand); + uint64_t BytesWritten = sizeof(MachO::linker_options_command); for (unsigned i = 0, e = Options.size(); i != e; ++i) { // Write each string, including the null byte. const std::string &Option = Options[i]; @@ -428,6 +430,22 @@ void MachObjectWriter::BindIndirectSymbols(MCAssembler &Asm) { // // FIXME: Revisit this when the dust settles. + // Report errors for use of .indirect_symbol not in a symbol pointer section + // or stub section. + for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), + ie = Asm.indirect_symbol_end(); it != ie; ++it) { + const MCSectionMachO &Section = + cast(it->SectionData->getSection()); + + if (Section.getType() != MCSectionMachO::S_NON_LAZY_SYMBOL_POINTERS && + Section.getType() != MCSectionMachO::S_LAZY_SYMBOL_POINTERS && + Section.getType() != MCSectionMachO::S_SYMBOL_STUBS) { + MCSymbol &Symbol = *it->Symbol; + report_fatal_error("indirect symbol '" + Symbol.getName() + + "' not in a symbol pointer or stub section"); + } + } + // Bind non lazy symbol pointers first. unsigned IndirectIndex = 0; for (MCAssembler::indirect_symbol_iterator it = Asm.indirect_symbol_begin(), @@ -723,14 +741,14 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, // section headers) and the symbol table. unsigned NumLoadCommands = 1; uint64_t LoadCommandsSize = is64Bit() ? - macho::SegmentLoadCommand64Size + NumSections * macho::Section64Size : - macho::SegmentLoadCommand32Size + NumSections * macho::Section32Size; + sizeof(MachO::segment_command_64) + NumSections * sizeof(MachO::section_64): + sizeof(MachO::segment_command) + NumSections * sizeof(MachO::section); // Add the data-in-code load command size, if used. unsigned NumDataRegions = Asm.getDataRegions().size(); if (NumDataRegions) { ++NumLoadCommands; - LoadCommandsSize += macho::LinkeditLoadCommandSize; + LoadCommandsSize += sizeof(MachO::linkedit_data_command); } // Add the symbol table load command sizes, if used. @@ -738,8 +756,8 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, UndefinedSymbolData.size(); if (NumSymbols) { NumLoadCommands += 2; - LoadCommandsSize += (macho::SymtabLoadCommandSize + - macho::DysymtabLoadCommandSize); + LoadCommandsSize += (sizeof(MachO::symtab_command) + + sizeof(MachO::dysymtab_command)); } // Add the linker option load commands sizes. @@ -753,8 +771,8 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, // Compute the total size of the section data, as well as its file size and vm // size. - uint64_t SectionDataStart = (is64Bit() ? macho::Header64Size : - macho::Header32Size) + LoadCommandsSize; + uint64_t SectionDataStart = (is64Bit() ? sizeof(MachO::mach_header_64) : + sizeof(MachO::mach_header)) + LoadCommandsSize; uint64_t SectionDataSize = 0; uint64_t SectionDataFileSize = 0; uint64_t VMSize = 0; @@ -791,11 +809,11 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, uint64_t RelocTableEnd = SectionDataStart + SectionDataFileSize; for (MCAssembler::const_iterator it = Asm.begin(), ie = Asm.end(); it != ie; ++it) { - std::vector &Relocs = Relocations[it]; + std::vector &Relocs = Relocations[it]; unsigned NumRelocs = Relocs.size(); uint64_t SectionStart = SectionDataStart + getSectionAddress(it); WriteSection(Asm, Layout, *it, SectionStart, RelocTableEnd, NumRelocs); - RelocTableEnd += NumRelocs * macho::RelocationInfoSize; + RelocTableEnd += NumRelocs * sizeof(MachO::any_relocation_info); } // Write the data-in-code load command, if used. @@ -803,7 +821,7 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, if (NumDataRegions) { uint64_t DataRegionsOffset = RelocTableEnd; uint64_t DataRegionsSize = NumDataRegions * 8; - WriteLinkeditLoadCommand(macho::LCT_DataInCode, DataRegionsOffset, + WriteLinkeditLoadCommand(MachO::LC_DATA_IN_CODE, DataRegionsOffset, DataRegionsSize); } @@ -830,8 +848,9 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, // The string table is written after symbol table. uint64_t StringTableOffset = - SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? macho::Nlist64Size : - macho::Nlist32Size); + SymbolTableOffset + NumSymTabSymbols * (is64Bit() ? + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist)); WriteSymtabLoadCommand(SymbolTableOffset, NumSymTabSymbols, StringTableOffset, StringTable.size()); @@ -864,10 +883,10 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, ie = Asm.end(); it != ie; ++it) { // Write the section relocation entries, in reverse order to match 'as' // (approximately, the exact algorithm is more complicated than this). - std::vector &Relocs = Relocations[it]; + std::vector &Relocs = Relocations[it]; for (unsigned i = 0, e = Relocs.size(); i != e; ++i) { - Write32(Relocs[e - i - 1].Word0); - Write32(Relocs[e - i - 1].Word1); + Write32(Relocs[e - i - 1].r_word0); + Write32(Relocs[e - i - 1].r_word1); } } @@ -906,9 +925,9 @@ void MachObjectWriter::WriteObject(MCAssembler &Asm, // If this symbol is defined and internal, mark it as such. if (it->Symbol->isDefined() && !Asm.getSymbolData(*it->Symbol).isExternal()) { - uint32_t Flags = macho::ISF_Local; + uint32_t Flags = MachO::INDIRECT_SYMBOL_LOCAL; if (it->Symbol->isAbsolute()) - Flags |= macho::ISF_Absolute; + Flags |= MachO::INDIRECT_SYMBOL_ABS; Write32(Flags); continue; } diff --git a/contrib/llvm/lib/MC/SubtargetFeature.cpp b/contrib/llvm/lib/MC/SubtargetFeature.cpp index 7625abd..2fb91f2 100644 --- a/contrib/llvm/lib/MC/SubtargetFeature.cpp +++ b/contrib/llvm/lib/MC/SubtargetFeature.cpp @@ -121,13 +121,10 @@ void SubtargetFeatures::AddFeature(const StringRef String, /// Find KV in array using binary search. static const SubtargetFeatureKV *Find(StringRef S, const SubtargetFeatureKV *A, size_t L) { - // Make the lower bound element we're looking for - SubtargetFeatureKV KV; - KV.Key = S.data(); // Determine the end of the array const SubtargetFeatureKV *Hi = A + L; // Binary search the array - const SubtargetFeatureKV *F = std::lower_bound(A, Hi, KV); + const SubtargetFeatureKV *F = std::lower_bound(A, Hi, S); // If not found then return NULL if (F == Hi || StringRef(F->Key) != S) return NULL; // Return the found array item @@ -353,8 +350,7 @@ void SubtargetFeatures::dump() const { } #endif -/// getDefaultSubtargetFeatures - Return a string listing the features -/// associated with the target triple. +/// Adds the default features for the specified target triple. /// /// FIXME: This is an inelegant way of specifying the features of a /// subtarget. It would be better if we could encode this information diff --git a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp index 518b59e..d9ca86d 100644 --- a/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp +++ b/contrib/llvm/lib/MC/WinCOFFObjectWriter.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/Twine.h" #include "llvm/MC/MCAsmLayout.h" #include "llvm/MC/MCAssembler.h" #include "llvm/MC/MCContext.h" @@ -137,7 +138,7 @@ public: symbol_map SymbolMap; WinCOFFObjectWriter(MCWinCOFFObjectTargetWriter *MOTW, raw_ostream &OS); - ~WinCOFFObjectWriter(); + virtual ~WinCOFFObjectWriter(); COFFSymbol *createSymbol(StringRef Name); COFFSymbol *GetOrCreateCOFFSymbol(const MCSymbol * Symbol); @@ -147,13 +148,12 @@ public: object_t *createCOFFEntity(StringRef Name, list_t &List); void DefineSection(MCSectionData const &SectionData); - void DefineSymbol(MCSymbolData const &SymbolData, - MCAssembler &Assembler); + void DefineSymbol(MCSymbolData const &SymbolData, MCAssembler &Assembler, + const MCAsmLayout &Layout); void MakeSymbolReal(COFFSymbol &S, size_t Index); void MakeSectionReal(COFFSection &S, size_t Number); - bool ExportSection(COFFSection const *S); bool ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm); bool IsPhysicalSection(COFFSection *S); @@ -189,17 +189,6 @@ static inline void write_uint32_le(void *Data, uint32_t const &Value) { Ptr[3] = (Value & 0xFF000000) >> 24; } -static inline void write_uint16_le(void *Data, uint16_t const &Value) { - uint8_t *Ptr = reinterpret_cast(Data); - Ptr[0] = (Value & 0x00FF) >> 0; - Ptr[1] = (Value & 0xFF00) >> 8; -} - -static inline void write_uint8_le(void *Data, uint8_t const &Value) { - uint8_t *Ptr = reinterpret_cast(Data); - Ptr[0] = (Value & 0xFF) >> 0; -} - //------------------------------------------------------------------------------ // Symbol class implementation @@ -410,7 +399,8 @@ void WinCOFFObjectWriter::DefineSection(MCSectionData const &SectionData) { /// This function takes a section data object from the assembler /// and creates the associated COFF symbol staging object. void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, - MCAssembler &Assembler) { + MCAssembler &Assembler, + const MCAsmLayout &Layout) { MCSymbol const &Symbol = SymbolData.getSymbol(); COFFSymbol *coff_symbol = GetOrCreateCOFFSymbol(&Symbol); SymbolMap[&Symbol] = coff_symbol; @@ -451,6 +441,12 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, const MCSymbolData &ResSymData = Assembler.getSymbolData(Symbol.AliasedSymbol()); + if (Symbol.isVariable()) { + int64_t Addr; + if (Symbol.getVariableValue()->EvaluateAsAbsolute(Addr, Layout)) + coff_symbol->Data.Value = Addr; + } + coff_symbol->Data.Type = (ResSymData.getFlags() & 0x0000FFFF) >> 0; coff_symbol->Data.StorageClass = (ResSymData.getFlags() & 0x00FF0000) >> 16; @@ -462,7 +458,9 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, external ? COFF::IMAGE_SYM_CLASS_EXTERNAL : COFF::IMAGE_SYM_CLASS_STATIC; } - if (ResSymData.Fragment != NULL) + if (Symbol.isAbsolute() || Symbol.AliasedSymbol().isVariable()) + coff_symbol->Data.SectionNumber = COFF::IMAGE_SYM_ABSOLUTE; + else if (ResSymData.Fragment != NULL) coff_symbol->Section = SectionMap[&ResSymData.Fragment->getParent()->getSection()]; @@ -474,18 +472,21 @@ void WinCOFFObjectWriter::DefineSymbol(MCSymbolData const &SymbolData, /// name into the string table if needed void WinCOFFObjectWriter::MakeSectionReal(COFFSection &S, size_t Number) { if (S.Name.size() > COFF::NameSize) { - size_t StringTableEntry = Strings.insert(S.Name.c_str()); - - // FIXME: Why is this number 999999? This number is never mentioned in the - // spec. I'm assuming this is due to the printed value needing to fit into - // the S.Header.Name field. In which case why not 9999999 (7 9's instead of - // 6)? The spec does not state if this entry should be null terminated in - // this case, and thus this seems to be the best way to do it. I think I - // just solved my own FIXME... - if (StringTableEntry > 999999) - report_fatal_error("COFF string table is greater than 999999 bytes."); - - std::sprintf(S.Header.Name, "/%d", unsigned(StringTableEntry)); + const unsigned Max6DecimalSize = 999999; + const unsigned Max7DecimalSize = 9999999; + uint64_t StringTableEntry = Strings.insert(S.Name.c_str()); + + if (StringTableEntry <= Max6DecimalSize) { + std::sprintf(S.Header.Name, "/%d", unsigned(StringTableEntry)); + } else if (StringTableEntry <= Max7DecimalSize) { + // With seven digits, we have to skip the terminating null. Because + // sprintf always appends it, we use a larger temporary buffer. + char buffer[9] = { }; + std::sprintf(buffer, "/%d", unsigned(StringTableEntry)); + std::memcpy(S.Header.Name, buffer, 8); + } else { + report_fatal_error("COFF string table is greater than 9,999,999 bytes."); + } } else std::memcpy(S.Header.Name, S.Name.c_str(), S.Name.size()); @@ -504,10 +505,6 @@ void WinCOFFObjectWriter::MakeSymbolReal(COFFSymbol &S, size_t Index) { S.Index = Index; } -bool WinCOFFObjectWriter::ExportSection(COFFSection const *S) { - return !S->MCData->getFragmentList().empty(); -} - bool WinCOFFObjectWriter::ExportSymbol(MCSymbolData const &SymbolData, MCAssembler &Asm) { // This doesn't seem to be right. Strings referred to from the .data section @@ -621,9 +618,10 @@ void WinCOFFObjectWriter::ExecutePostLayoutBinding(MCAssembler &Asm, DefineSection(*i); for (MCAssembler::const_symbol_iterator i = Asm.symbol_begin(), - e = Asm.symbol_end(); i != e; i++) { + e = Asm.symbol_end(); + i != e; i++) { if (ExportSymbol(*i, Asm)) { - DefineSymbol(*i, Asm); + DefineSymbol(*i, Asm, Layout); } } } @@ -636,8 +634,9 @@ void WinCOFFObjectWriter::RecordRelocation(const MCAssembler &Asm, uint64_t &FixedValue) { assert(Target.getSymA() != NULL && "Relocation must reference a symbol!"); - const MCSymbol *A = &Target.getSymA()->getSymbol(); - MCSymbolData &A_SD = Asm.getSymbolData(*A); + const MCSymbol &Symbol = Target.getSymA()->getSymbol(); + const MCSymbol &A = Symbol.AliasedSymbol(); + MCSymbolData &A_SD = Asm.getSymbolData(A); MCSectionData const *SectionData = Fragment->getParent(); @@ -707,10 +706,13 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, // Assign symbol and section indexes and offsets. Header.NumberOfSections = 0; + DenseMap SectionIndices; for (sections::iterator i = Sections.begin(), e = Sections.end(); i != e; i++) { if (Layout.getSectionAddressSize((*i)->MCData) > 0) { - MakeSectionReal(**i, ++Header.NumberOfSections); + size_t Number = ++Header.NumberOfSections; + SectionIndices[*i] = Number; + MakeSectionReal(**i, Number); } else { (*i)->Number = -1; } @@ -754,6 +756,31 @@ void WinCOFFObjectWriter::WriteObject(MCAssembler &Asm, } } + // Fixup associative COMDAT sections. + for (sections::iterator i = Sections.begin(), + e = Sections.end(); i != e; i++) { + if ((*i)->Symbol->Aux[0].Aux.SectionDefinition.Selection != + COFF::IMAGE_COMDAT_SELECT_ASSOCIATIVE) + continue; + + const MCSectionCOFF &MCSec = static_cast( + (*i)->MCData->getSection()); + + COFFSection *Assoc = SectionMap.lookup(MCSec.getAssocSection()); + if (!Assoc) { + report_fatal_error(Twine("Missing associated COMDAT section ") + + MCSec.getAssocSection()->getSectionName() + + " for section " + MCSec.getSectionName()); + } + + // Skip this section if the associated section is unused. + if (Assoc->Number == -1) + continue; + + (*i)->Symbol->Aux[0].Aux.SectionDefinition.Number = SectionIndices[Assoc]; + } + + // Assign file offsets to COFF object file structures. unsigned offset = 0; @@ -888,6 +915,9 @@ MCWinCOFFObjectTargetWriter::MCWinCOFFObjectTargetWriter(unsigned Machine_) : Machine(Machine_) { } +// Pin the vtable to this file. +void MCWinCOFFObjectTargetWriter::anchor() {} + //------------------------------------------------------------------------------ // WinCOFFObjectWriter factory function diff --git a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp index 75f343c..5b5aad7 100644 --- a/contrib/llvm/lib/MC/WinCOFFStreamer.cpp +++ b/contrib/llvm/lib/MC/WinCOFFStreamer.cpp @@ -55,7 +55,7 @@ public: virtual void EmitDebugLabel(MCSymbol *Symbol); virtual void EmitAssemblerFlag(MCAssemblerFlag Flag); virtual void EmitThumbFunc(MCSymbol *Func); - virtual void EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); + virtual bool EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute); virtual void EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue); virtual void BeginCOFFSymbolDef(MCSymbol const *Symbol); virtual void EmitCOFFSymbolStorageClass(int StorageClass); @@ -72,13 +72,10 @@ public: virtual void EmitTBSSSymbol(const MCSection *Section, MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment); virtual void EmitFileDirective(StringRef Filename); + virtual void EmitIdent(StringRef IdentString); virtual void EmitWin64EHHandlerData(); virtual void FinishImpl(); - static bool classof(const MCStreamer *S) { - return S->getKind() == SK_WinCOFFStreamer; - } - private: virtual void EmitInstToData(const MCInst &Inst) { MCDataFragment *DF = getOrCreateDataFragment(); @@ -134,8 +131,7 @@ private: WinCOFFStreamer::WinCOFFStreamer(MCContext &Context, MCAsmBackend &MAB, MCCodeEmitter &CE, raw_ostream &OS) - : MCObjectStreamer(SK_WinCOFFStreamer, Context, MAB, OS, &CE), - CurSymbol(NULL) {} + : MCObjectStreamer(Context, 0, MAB, OS, &CE), CurSymbol(NULL) {} void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, unsigned ByteAlignment, bool External) { @@ -155,7 +151,8 @@ void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, int Selection = COFF::IMAGE_COMDAT_SELECT_LARGEST; const MCSection *Section = MCStreamer::getContext().getCOFFSection( - SectionName, Characteristics, Selection, SectionKind::getBSS()); + SectionName, Characteristics, SectionKind::getBSS(), Symbol->getName(), + Selection); MCSectionData &SectionData = getAssembler().getOrCreateSectionData(*Section); @@ -164,7 +161,7 @@ void WinCOFFStreamer::AddCommonSymbol(MCSymbol *Symbol, uint64_t Size, SymbolData.setExternal(External); - Symbol->setSection(*Section); + AssignSection(Symbol, Section); if (ByteAlignment != 1) new MCAlignFragment(ByteAlignment, 0, 0, ByteAlignment, &SectionData); @@ -201,7 +198,7 @@ void WinCOFFStreamer::EmitThumbFunc(MCSymbol *Func) { llvm_unreachable("not implemented"); } -void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, +bool WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, MCSymbolAttr Attribute) { assert(Symbol && "Symbol must be non-null!"); assert((Symbol->isInSection() @@ -221,8 +218,10 @@ void WinCOFFStreamer::EmitSymbolAttribute(MCSymbol *Symbol, break; default: - llvm_unreachable("unsupported attribute"); + return false; } + + return true; } void WinCOFFStreamer::EmitSymbolDesc(MCSymbol *Symbol, unsigned DescValue) { @@ -309,6 +308,11 @@ void WinCOFFStreamer::EmitFileDirective(StringRef Filename) { // info will be a much large effort. } +// TODO: Implement this if you want to emit .comment section in COFF obj files. +void WinCOFFStreamer::EmitIdent(StringRef IdentString) { + llvm_unreachable("unsupported directive"); +} + void WinCOFFStreamer::EmitWin64EHHandlerData() { MCStreamer::EmitWin64EHHandlerData(); @@ -318,6 +322,7 @@ void WinCOFFStreamer::EmitWin64EHHandlerData() { } void WinCOFFStreamer::FinishImpl() { + EmitFrames(NULL, true); EmitW64Tables(); MCObjectStreamer::FinishImpl(); } diff --git a/contrib/llvm/lib/Object/Archive.cpp b/contrib/llvm/lib/Object/Archive.cpp index 0e13d05..71efca2 100644 --- a/contrib/llvm/lib/Object/Archive.cpp +++ b/contrib/llvm/lib/Object/Archive.cpp @@ -13,33 +13,110 @@ #include "llvm/Object/Archive.h" #include "llvm/ADT/APInt.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" #include "llvm/Support/Endian.h" #include "llvm/Support/MemoryBuffer.h" using namespace llvm; using namespace object; -static const char *Magic = "!\n"; +static const char *const Magic = "!\n"; -static bool isInternalMember(const ArchiveMemberHeader &amh) { - static const char *const internals[] = { - "/", - "//", - "#_LLVM_SYM_TAB_#" - }; +void Archive::anchor() { } + +StringRef ArchiveMemberHeader::getName() const { + char EndCond; + if (Name[0] == '/' || Name[0] == '#') + EndCond = ' '; + else + EndCond = '/'; + llvm::StringRef::size_type end = + llvm::StringRef(Name, sizeof(Name)).find(EndCond); + if (end == llvm::StringRef::npos) + end = sizeof(Name); + assert(end <= sizeof(Name) && end > 0); + // Don't include the EndCond if there is one. + return llvm::StringRef(Name, end); +} + +uint32_t ArchiveMemberHeader::getSize() const { + uint32_t Ret; + if (llvm::StringRef(Size, sizeof(Size)).rtrim(" ").getAsInteger(10, Ret)) + llvm_unreachable("Size is not a decimal number."); + return Ret; +} + +sys::fs::perms ArchiveMemberHeader::getAccessMode() const { + unsigned Ret; + if (StringRef(AccessMode, sizeof(AccessMode)).rtrim(" ").getAsInteger(8, Ret)) + llvm_unreachable("Access mode is not an octal number."); + return static_cast(Ret); +} + +sys::TimeValue ArchiveMemberHeader::getLastModified() const { + unsigned Seconds; + if (StringRef(LastModified, sizeof(LastModified)).rtrim(" ") + .getAsInteger(10, Seconds)) + llvm_unreachable("Last modified time not a decimal number."); + + sys::TimeValue Ret; + Ret.fromEpochTime(Seconds); + return Ret; +} + +unsigned ArchiveMemberHeader::getUID() const { + unsigned Ret; + if (StringRef(UID, sizeof(UID)).rtrim(" ").getAsInteger(10, Ret)) + llvm_unreachable("UID time not a decimal number."); + return Ret; +} + +unsigned ArchiveMemberHeader::getGID() const { + unsigned Ret; + if (StringRef(GID, sizeof(GID)).rtrim(" ").getAsInteger(10, Ret)) + llvm_unreachable("GID time not a decimal number."); + return Ret; +} + +Archive::Child::Child(const Archive *Parent, const char *Start) + : Parent(Parent) { + if (!Start) + return; + + const ArchiveMemberHeader *Header = + reinterpret_cast(Start); + Data = StringRef(Start, sizeof(ArchiveMemberHeader) + Header->getSize()); - StringRef name = amh.getName(); - for (std::size_t i = 0; i < sizeof(internals) / sizeof(*internals); ++i) { - if (name == internals[i]) - return true; + // Setup StartOfFile and PaddingBytes. + StartOfFile = sizeof(ArchiveMemberHeader); + // Don't include attached name. + StringRef Name = Header->getName(); + if (Name.startswith("#1/")) { + uint64_t NameSize; + if (Name.substr(3).rtrim(" ").getAsInteger(10, NameSize)) + llvm_unreachable("Long name length is not an integer"); + StartOfFile += NameSize; } - return false; } -void Archive::anchor() { } +Archive::Child Archive::Child::getNext() const { + size_t SpaceToSkip = Data.size(); + // If it's odd, add 1 to make it even. + if (SpaceToSkip & 1) + ++SpaceToSkip; + + const char *NextLoc = Data.data() + SpaceToSkip; + + // Check to see if this is past the end of the archive. + if (NextLoc >= Parent->Data->getBufferEnd()) + return Child(Parent, NULL); + + return Child(Parent, NextLoc); +} error_code Archive::Child::getName(StringRef &Result) const { - StringRef name = ToHeader(Data.data())->getName(); + StringRef name = getRawName(); // Check if it's a special name. if (name[0] == '/') { if (name.size() == 1) { // Linker member. @@ -79,7 +156,8 @@ error_code Archive::Child::getName(StringRef &Result) const { uint64_t name_size; if (name.substr(3).rtrim(" ").getAsInteger(10, name_size)) llvm_unreachable("Long name length is not an ingeter"); - Result = Data.substr(sizeof(ArchiveMemberHeader), name_size); + Result = Data.substr(sizeof(ArchiveMemberHeader), name_size) + .rtrim(StringRef("\0", 1)); return object_error::success; } // It's a simple name. @@ -90,6 +168,20 @@ error_code Archive::Child::getName(StringRef &Result) const { return object_error::success; } +error_code Archive::Child::getMemoryBuffer(OwningPtr &Result, + bool FullPath) const { + StringRef Name; + if (error_code ec = getName(Name)) + return ec; + SmallString<128> Path; + Result.reset(MemoryBuffer::getMemBuffer( + getBuffer(), FullPath ? (Twine(Parent->getFileName()) + "(" + Name + ")") + .toStringRef(Path) + : Name, + false)); + return error_code::success(); +} + error_code Archive::Child::getAsBinary(OwningPtr &Result) const { OwningPtr ret; OwningPtr Buff; @@ -102,11 +194,11 @@ error_code Archive::Child::getAsBinary(OwningPtr &Result) const { } Archive::Archive(MemoryBuffer *source, error_code &ec) - : Binary(Binary::ID_Archive, source) { + : Binary(Binary::ID_Archive, source), SymbolTable(end_children()) { // Check for sufficient magic. - if (!source || source->getBufferSize() - < (8 + sizeof(ArchiveMemberHeader) + 2) // Smallest archive. - || StringRef(source->getBufferStart(), 8) != Magic) { + assert(source); + if (source->getBufferSize() < 8 || + StringRef(source->getBufferStart(), 8) != Magic) { ec = object_error::invalid_file_type; return; } @@ -115,72 +207,122 @@ Archive::Archive(MemoryBuffer *source, error_code &ec) child_iterator i = begin_children(false); child_iterator e = end_children(); - StringRef name; - if ((ec = i->getName(name))) + if (i == e) { + ec = object_error::success; return; + } + + StringRef Name = i->getRawName(); // Below is the pattern that is used to figure out the archive format // GNU archive format - // First member : / (points to the symbol table ) + // First member : / (may exist, if it exists, points to the symbol table ) // Second member : // (may exist, if it exists, points to the string table) // Note : The string table is used if the filename exceeds 15 characters // BSD archive format - // First member : __.SYMDEF (points to the symbol table) - // There is no string table, if the filename exceeds 15 characters or has a - // embedded space, the filename has #1/, The size represents the size + // First member : __.SYMDEF or "__.SYMDEF SORTED" (the symbol table) + // There is no string table, if the filename exceeds 15 characters or has a + // embedded space, the filename has #1/, The size represents the size // of the filename that needs to be read after the archive header // COFF archive format // First member : / // Second member : / (provides a directory of symbols) - // Third member : // contains the string table, this is present even if the - // string table is empty - if (name == "/") { + // Third member : // (may exist, if it exists, contains the string table) + // Note: Microsoft PE/COFF Spec 8.3 says that the third member is present + // even if the string table is empty. However, lib.exe does not in fact + // seem to create the third member if there's no member whose filename + // exceeds 15 characters. So the third member is optional. + + if (Name == "__.SYMDEF") { + Format = K_BSD; SymbolTable = i; - StringTable = e; - if (i != e) ++i; - if (i == e) { - ec = object_error::parse_failed; - return; - } - if ((ec = i->getName(name))) + ++i; + FirstRegular = i; + ec = object_error::success; + return; + } + + if (Name.startswith("#1/")) { + Format = K_BSD; + // We know this is BSD, so getName will work since there is no string table. + ec = i->getName(Name); + if (ec) return; - if (name[0] != '/') { - Format = K_GNU; - } else if ((name.size() > 1) && (name == "//")) { - Format = K_GNU; - StringTable = i; + if (Name == "__.SYMDEF SORTED") { + SymbolTable = i; ++i; - } else { - Format = K_COFF; - if (i != e) { - SymbolTable = i; - ++i; - } - if (i != e) { - StringTable = i; - } } - } else if (name == "__.SYMDEF") { - Format = K_BSD; + FirstRegular = i; + return; + } + + if (Name == "/") { SymbolTable = i; - StringTable = e; - } + + ++i; + if (i == e) { + ec = object_error::parse_failed; + return; + } + Name = i->getRawName(); + } + + if (Name == "//") { + Format = K_GNU; + StringTable = i; + ++i; + FirstRegular = i; + ec = object_error::success; + return; + } + + if (Name[0] != '/') { + Format = K_GNU; + FirstRegular = i; + ec = object_error::success; + return; + } + + if (Name != "/") { + ec = object_error::parse_failed; + return; + } + + Format = K_COFF; + SymbolTable = i; + + ++i; + if (i == e) { + FirstRegular = i; + ec = object_error::success; + return; + } + + Name = i->getRawName(); + + if (Name == "//") { + StringTable = i; + ++i; + } + + FirstRegular = i; ec = object_error::success; } -Archive::child_iterator Archive::begin_children(bool skip_internal) const { +Archive::child_iterator Archive::begin_children(bool SkipInternal) const { + if (Data->getBufferSize() == 8) // empty archive. + return end_children(); + + if (SkipInternal) + return FirstRegular; + const char *Loc = Data->getBufferStart() + strlen(Magic); - size_t Size = sizeof(ArchiveMemberHeader) + - ToHeader(Loc)->getSize(); - Child c(this, StringRef(Loc, Size)); - // Skip internals at the beginning of an archive. - if (skip_internal && isInternalMember(*ToHeader(Loc))) - return c.getNext(); + Child c(this, Loc); return c; } Archive::child_iterator Archive::end_children() const { - return Child(this, StringRef(0, 0)); + return Child(this, NULL); } error_code Archive::Symbol::getName(StringRef &Result) const { @@ -228,9 +370,7 @@ error_code Archive::Symbol::getMember(child_iterator &Result) const { } const char *Loc = Parent->getData().begin() + Offset; - size_t Size = sizeof(ArchiveMemberHeader) + - ToHeader(Loc)->getSize(); - Result = Child(Parent, StringRef(Loc, Size)); + Result = Child(Parent, Loc); return object_error::success; } @@ -245,6 +385,9 @@ Archive::Symbol Archive::Symbol::getNext() const { } Archive::symbol_iterator Archive::begin_symbols() const { + if (!hasSymbolTable()) + return symbol_iterator(Symbol(this, 0, 0)); + const char *buf = SymbolTable->getBuffer().begin(); if (kind() == K_GNU) { uint32_t symbol_count = 0; @@ -265,11 +408,13 @@ Archive::symbol_iterator Archive::begin_symbols() const { } Archive::symbol_iterator Archive::end_symbols() const { + if (!hasSymbolTable()) + return symbol_iterator(Symbol(this, 0, 0)); + const char *buf = SymbolTable->getBuffer().begin(); uint32_t symbol_count = 0; if (kind() == K_GNU) { symbol_count = *reinterpret_cast(buf); - buf += sizeof(uint32_t) + (symbol_count * (sizeof(uint32_t))); } else if (kind() == K_BSD) { llvm_unreachable("BSD archive format is not supported"); } else { @@ -299,3 +444,7 @@ Archive::child_iterator Archive::findSym(StringRef name) const { } return end_children(); } + +bool Archive::hasSymbolTable() const { + return SymbolTable != end_children(); +} diff --git a/contrib/llvm/lib/Object/Binary.cpp b/contrib/llvm/lib/Object/Binary.cpp index 4e528d8..de57b4c 100644 --- a/contrib/llvm/lib/Object/Binary.cpp +++ b/contrib/llvm/lib/Object/Binary.cpp @@ -14,11 +14,13 @@ #include "llvm/Object/Binary.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" // Include headers for createBinary. #include "llvm/Object/Archive.h" #include "llvm/Object/COFF.h" +#include "llvm/Object/MachOUniversal.h" #include "llvm/Object/ObjectFile.h" using namespace llvm; @@ -45,22 +47,19 @@ error_code object::createBinary(MemoryBuffer *Source, OwningPtr scopedSource(Source); if (!Source) return make_error_code(errc::invalid_argument); - if (Source->getBufferSize() < 64) - return object_error::invalid_file_type; - sys::LLVMFileType type = sys::IdentifyFileType(Source->getBufferStart(), - static_cast(Source->getBufferSize())); + sys::fs::file_magic type = sys::fs::identify_magic(Source->getBuffer()); error_code ec; switch (type) { - case sys::Archive_FileType: { + case sys::fs::file_magic::archive: { OwningPtr ret(new Archive(scopedSource.take(), ec)); if (ec) return ec; Result.swap(ret); return object_error::success; } - case sys::ELF_Relocatable_FileType: - case sys::ELF_Executable_FileType: - case sys::ELF_SharedObject_FileType: - case sys::ELF_Core_FileType: { + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::elf_executable: + case sys::fs::file_magic::elf_shared_object: + case sys::fs::file_magic::elf_core: { OwningPtr ret( ObjectFile::createELFObjectFile(scopedSource.take())); if (!ret) @@ -68,15 +67,16 @@ error_code object::createBinary(MemoryBuffer *Source, Result.swap(ret); return object_error::success; } - case sys::Mach_O_Object_FileType: - case sys::Mach_O_Executable_FileType: - case sys::Mach_O_FixedVirtualMemorySharedLib_FileType: - case sys::Mach_O_Core_FileType: - case sys::Mach_O_PreloadExecutable_FileType: - case sys::Mach_O_DynamicallyLinkedSharedLib_FileType: - case sys::Mach_O_DynamicLinker_FileType: - case sys::Mach_O_Bundle_FileType: - case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: { + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::macho_executable: + case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: + case sys::fs::file_magic::macho_core: + case sys::fs::file_magic::macho_preload_executable: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib: + case sys::fs::file_magic::macho_dynamic_linker: + case sys::fs::file_magic::macho_bundle: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: + case sys::fs::file_magic::macho_dsym_companion: { OwningPtr ret( ObjectFile::createMachOObjectFile(scopedSource.take())); if (!ret) @@ -84,15 +84,30 @@ error_code object::createBinary(MemoryBuffer *Source, Result.swap(ret); return object_error::success; } - case sys::COFF_FileType: { - OwningPtr ret(new COFFObjectFile(scopedSource.take(), ec)); + case sys::fs::file_magic::macho_universal_binary: { + OwningPtr ret(new MachOUniversalBinary(scopedSource.take(), ec)); if (ec) return ec; Result.swap(ret); return object_error::success; } - default: // Unrecognized object file format. + case sys::fs::file_magic::coff_object: + case sys::fs::file_magic::coff_import_library: + case sys::fs::file_magic::pecoff_executable: { + OwningPtr ret( + ObjectFile::createCOFFObjectFile(scopedSource.take())); + if (!ret) + return object_error::invalid_file_type; + Result.swap(ret); + return object_error::success; + } + case sys::fs::file_magic::unknown: + case sys::fs::file_magic::bitcode: + case sys::fs::file_magic::windows_resource: { + // Unrecognized object file format. return object_error::invalid_file_type; + } } + llvm_unreachable("Unexpected Binary File Type"); } error_code object::createBinary(StringRef Path, OwningPtr &Result) { diff --git a/contrib/llvm/lib/Object/COFFObjectFile.cpp b/contrib/llvm/lib/Object/COFFObjectFile.cpp index 70fec32..42066c3 100644 --- a/contrib/llvm/lib/Object/COFFObjectFile.cpp +++ b/contrib/llvm/lib/Object/COFFObjectFile.cpp @@ -16,6 +16,9 @@ #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringSwitch.h" #include "llvm/ADT/Triple.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include using namespace llvm; using namespace object; @@ -37,18 +40,19 @@ bool checkSize(const MemoryBuffer *m, error_code &ec, uint64_t size) { return true; } -// Returns false if any bytes in [addr, addr + size) fall outsize of m. -bool checkAddr(const MemoryBuffer *m, - error_code &ec, - uintptr_t addr, - uint64_t size) { - if (addr + size < addr || - addr + size < size || - addr + size > uintptr_t(m->getBufferEnd())) { - ec = object_error::unexpected_eof; - return false; +// Sets Obj unless any bytes in [addr, addr + size) fall outsize of m. +// Returns unexpected_eof if error. +template +error_code getObject(const T *&Obj, const MemoryBuffer *M, const uint8_t *Ptr, + const size_t Size = sizeof(T)) { + uintptr_t Addr = uintptr_t(Ptr); + if (Addr + Size < Addr || + Addr + Size < Size || + Addr + Size > uintptr_t(M->getBufferEnd())) { + return object_error::unexpected_eof; } - return true; + Obj = reinterpret_cast(Addr); + return object_error::success; } } @@ -58,12 +62,12 @@ const coff_symbol *COFFObjectFile::toSymb(DataRefImpl Symb) const { # ifndef NDEBUG // Verify that the symbol points to a valid entry in the symbol table. uintptr_t offset = uintptr_t(addr) - uintptr_t(base()); - if (offset < Header->PointerToSymbolTable - || offset >= Header->PointerToSymbolTable - + (Header->NumberOfSymbols * sizeof(coff_symbol))) + if (offset < COFFHeader->PointerToSymbolTable + || offset >= COFFHeader->PointerToSymbolTable + + (COFFHeader->NumberOfSymbols * sizeof(coff_symbol))) report_fatal_error("Symbol was outside of symbol table."); - assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol) + assert((offset - COFFHeader->PointerToSymbolTable) % sizeof(coff_symbol) == 0 && "Symbol did not point to the beginning of a symbol"); # endif @@ -76,7 +80,7 @@ const coff_section *COFFObjectFile::toSec(DataRefImpl Sec) const { # ifndef NDEBUG // Verify that the section points to a valid entry in the section table. if (addr < SectionTable - || addr >= (SectionTable + Header->NumberOfSections)) + || addr >= (SectionTable + COFFHeader->NumberOfSections)) report_fatal_error("Section was outside of section table."); uintptr_t offset = uintptr_t(addr) - uintptr_t(SectionTable); @@ -108,10 +112,8 @@ error_code COFFObjectFile::getSymbolFileOffset(DataRefImpl Symb, const coff_section *Section = NULL; if (error_code ec = getSection(symb->SectionNumber, Section)) return ec; - char Type; - if (error_code ec = getSymbolNMTypeChar(Symb, Type)) - return ec; - if (Type == 'U' || Type == 'w') + + if (symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) Result = UnknownAddressOrSize; else if (Section) Result = Section->PointerToRawData + symb->Value; @@ -126,10 +128,8 @@ error_code COFFObjectFile::getSymbolAddress(DataRefImpl Symb, const coff_section *Section = NULL; if (error_code ec = getSection(symb->SectionNumber, Section)) return ec; - char Type; - if (error_code ec = getSymbolNMTypeChar(Symb, Type)) - return ec; - if (Type == 'U' || Type == 'w') + + if (symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) Result = UnknownAddressOrSize; else if (Section) Result = Section->VirtualAddress + symb->Value; @@ -149,12 +149,16 @@ error_code COFFObjectFile::getSymbolType(DataRefImpl Symb, if (symb->getComplexType() == COFF::IMAGE_SYM_DTYPE_FUNCTION) { Result = SymbolRef::ST_Function; } else { - char Type; - if (error_code ec = getSymbolNMTypeChar(Symb, Type)) - return ec; - if (Type == 'r' || Type == 'R') { - Result = SymbolRef::ST_Data; + uint32_t Characteristics = 0; + if (symb->SectionNumber > 0) { + const coff_section *Section = NULL; + if (error_code ec = getSection(symb->SectionNumber, Section)) + return ec; + Characteristics = Section->Characteristics; } + if (Characteristics & COFF::IMAGE_SCN_MEM_READ && + ~Characteristics & COFF::IMAGE_SCN_MEM_WRITE) // Read only. + Result = SymbolRef::ST_Data; } } return object_error::success; @@ -193,10 +197,8 @@ error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb, const coff_section *Section = NULL; if (error_code ec = getSection(symb->SectionNumber, Section)) return ec; - char Type; - if (error_code ec = getSymbolNMTypeChar(Symb, Type)) - return ec; - if (Type == 'U' || Type == 'w') + + if (symb->SectionNumber == COFF::IMAGE_SYM_UNDEFINED) Result = UnknownAddressOrSize; else if (Section) Result = Section->SizeOfRawData - symb->Value; @@ -205,74 +207,6 @@ error_code COFFObjectFile::getSymbolSize(DataRefImpl Symb, return object_error::success; } -error_code COFFObjectFile::getSymbolNMTypeChar(DataRefImpl Symb, - char &Result) const { - const coff_symbol *symb = toSymb(Symb); - StringRef name; - if (error_code ec = getSymbolName(Symb, name)) - return ec; - char ret = StringSwitch(name) - .StartsWith(".debug", 'N') - .StartsWith(".sxdata", 'N') - .Default('?'); - - if (ret != '?') { - Result = ret; - return object_error::success; - } - - uint32_t Characteristics = 0; - if (symb->SectionNumber > 0) { - const coff_section *Section = NULL; - if (error_code ec = getSection(symb->SectionNumber, Section)) - return ec; - Characteristics = Section->Characteristics; - } - - switch (symb->SectionNumber) { - case COFF::IMAGE_SYM_UNDEFINED: - // Check storage classes. - if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_WEAK_EXTERNAL) { - Result = 'w'; - return object_error::success; // Don't do ::toupper. - } else if (symb->Value != 0) // Check for common symbols. - ret = 'c'; - else - ret = 'u'; - break; - case COFF::IMAGE_SYM_ABSOLUTE: - ret = 'a'; - break; - case COFF::IMAGE_SYM_DEBUG: - ret = 'n'; - break; - default: - // Check section type. - if (Characteristics & COFF::IMAGE_SCN_CNT_CODE) - ret = 't'; - else if ( Characteristics & COFF::IMAGE_SCN_MEM_READ - && ~Characteristics & COFF::IMAGE_SCN_MEM_WRITE) // Read only. - ret = 'r'; - else if (Characteristics & COFF::IMAGE_SCN_CNT_INITIALIZED_DATA) - ret = 'd'; - else if (Characteristics & COFF::IMAGE_SCN_CNT_UNINITIALIZED_DATA) - ret = 'b'; - else if (Characteristics & COFF::IMAGE_SCN_LNK_INFO) - ret = 'i'; - - // Check for section symbol. - else if ( symb->StorageClass == COFF::IMAGE_SYM_CLASS_STATIC - && symb->Value == 0) - ret = 's'; - } - - if (symb->StorageClass == COFF::IMAGE_SYM_CLASS_EXTERNAL) - ret = ::toupper(static_cast(ret)); - - Result = ret; - return object_error::success; -} - error_code COFFObjectFile::getSymbolSection(DataRefImpl Symb, section_iterator &Result) const { const coff_symbol *symb = toSymb(Symb); @@ -403,7 +337,7 @@ error_code COFFObjectFile::sectionContainsSymbol(DataRefImpl Sec, return object_error::success; } -relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const { +relocation_iterator COFFObjectFile::section_rel_begin(DataRefImpl Sec) const { const coff_section *sec = toSec(Sec); DataRefImpl ret; if (sec->NumberOfRelocations == 0) @@ -414,7 +348,7 @@ relocation_iterator COFFObjectFile::getSectionRelBegin(DataRefImpl Sec) const { return relocation_iterator(RelocationRef(ret, this)); } -relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const { +relocation_iterator COFFObjectFile::section_rel_end(DataRefImpl Sec) const { const coff_section *sec = toSec(Sec); DataRefImpl ret; if (sec->NumberOfRelocations == 0) @@ -428,86 +362,178 @@ relocation_iterator COFFObjectFile::getSectionRelEnd(DataRefImpl Sec) const { return relocation_iterator(RelocationRef(ret, this)); } +// Initialize the pointer to the symbol table. +error_code COFFObjectFile::initSymbolTablePtr() { + if (error_code ec = getObject( + SymbolTable, Data, base() + COFFHeader->PointerToSymbolTable, + COFFHeader->NumberOfSymbols * sizeof(coff_symbol))) + return ec; + + // Find string table. The first four byte of the string table contains the + // total size of the string table, including the size field itself. If the + // string table is empty, the value of the first four byte would be 4. + const uint8_t *StringTableAddr = + base() + COFFHeader->PointerToSymbolTable + + COFFHeader->NumberOfSymbols * sizeof(coff_symbol); + const ulittle32_t *StringTableSizePtr; + if (error_code ec = getObject(StringTableSizePtr, Data, StringTableAddr)) + return ec; + StringTableSize = *StringTableSizePtr; + if (error_code ec = + getObject(StringTable, Data, StringTableAddr, StringTableSize)) + return ec; + + // Check that the string table is null terminated if has any in it. + if (StringTableSize < 4 || + (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) + return object_error::parse_failed; + return object_error::success; +} + +// Returns the file offset for the given RVA. +error_code COFFObjectFile::getRvaPtr(uint32_t Rva, uintptr_t &Res) const { + error_code ec; + for (section_iterator i = begin_sections(), e = end_sections(); i != e; + i.increment(ec)) { + if (ec) + return ec; + const coff_section *Section = getCOFFSection(i); + uint32_t SectionStart = Section->VirtualAddress; + uint32_t SectionEnd = Section->VirtualAddress + Section->VirtualSize; + if (SectionStart <= Rva && Rva < SectionEnd) { + uint32_t Offset = Rva - SectionStart; + Res = uintptr_t(base()) + Section->PointerToRawData + Offset; + return object_error::success; + } + } + return object_error::parse_failed; +} + +// Returns hint and name fields, assuming \p Rva is pointing to a Hint/Name +// table entry. +error_code COFFObjectFile:: +getHintName(uint32_t Rva, uint16_t &Hint, StringRef &Name) const { + uintptr_t IntPtr = 0; + if (error_code ec = getRvaPtr(Rva, IntPtr)) + return ec; + const uint8_t *Ptr = reinterpret_cast(IntPtr); + Hint = *reinterpret_cast(Ptr); + Name = StringRef(reinterpret_cast(Ptr + 2)); + return object_error::success; +} + +// Find the import table. +error_code COFFObjectFile::initImportTablePtr() { + // First, we get the RVA of the import table. If the file lacks a pointer to + // the import table, do nothing. + const data_directory *DataEntry; + if (getDataDirectory(COFF::IMPORT_TABLE, DataEntry)) + return object_error::success; + + // Do nothing if the pointer to import table is NULL. + if (DataEntry->RelativeVirtualAddress == 0) + return object_error::success; + + uint32_t ImportTableRva = DataEntry->RelativeVirtualAddress; + NumberOfImportDirectory = DataEntry->Size / + sizeof(import_directory_table_entry); + + // Find the section that contains the RVA. This is needed because the RVA is + // the import table's memory address which is different from its file offset. + uintptr_t IntPtr = 0; + if (error_code ec = getRvaPtr(ImportTableRva, IntPtr)) + return ec; + ImportDirectory = reinterpret_cast< + const import_directory_table_entry *>(IntPtr); + + // It's an error if there's no section containing the Import Table RVA. + return object_error::parse_failed; +} + COFFObjectFile::COFFObjectFile(MemoryBuffer *Object, error_code &ec) : ObjectFile(Binary::ID_COFF, Object) - , Header(0) + , COFFHeader(0) + , PE32Header(0) + , DataDirectory(0) , SectionTable(0) , SymbolTable(0) , StringTable(0) - , StringTableSize(0) { + , StringTableSize(0) + , ImportDirectory(0) + , NumberOfImportDirectory(0) { // Check that we at least have enough room for a header. if (!checkSize(Data, ec, sizeof(coff_file_header))) return; - // The actual starting location of the COFF header in the file. This can be - // non-zero in PE/COFF files. - uint64_t HeaderStart = 0; + // The current location in the file where we are looking at. + uint64_t CurPtr = 0; + + // PE header is optional and is present only in executables. If it exists, + // it is placed right after COFF header. + bool hasPEHeader = false; // Check if this is a PE/COFF file. if (base()[0] == 0x4d && base()[1] == 0x5a) { // PE/COFF, seek through MS-DOS compatibility stub and 4-byte // PE signature to find 'normal' COFF header. if (!checkSize(Data, ec, 0x3c + 8)) return; - HeaderStart = *reinterpret_cast(base() + 0x3c); - // Check the PE header. ("PE\0\0") - if (std::memcmp(base() + HeaderStart, "PE\0\0", 4) != 0) { + CurPtr = *reinterpret_cast(base() + 0x3c); + // Check the PE magic bytes. ("PE\0\0") + if (std::memcmp(base() + CurPtr, "PE\0\0", 4) != 0) { ec = object_error::parse_failed; return; } - HeaderStart += 4; // Skip the PE Header. + CurPtr += 4; // Skip the PE magic bytes. + hasPEHeader = true; } - Header = reinterpret_cast(base() + HeaderStart); - if (!checkAddr(Data, ec, uintptr_t(Header), sizeof(coff_file_header))) - return; - - SectionTable = - reinterpret_cast( base() - + HeaderStart - + sizeof(coff_file_header) - + Header->SizeOfOptionalHeader); - if (!checkAddr(Data, ec, uintptr_t(SectionTable), - Header->NumberOfSections * sizeof(coff_section))) + if ((ec = getObject(COFFHeader, Data, base() + CurPtr))) return; + CurPtr += sizeof(coff_file_header); - if (Header->PointerToSymbolTable != 0) { - SymbolTable = - reinterpret_cast(base() - + Header->PointerToSymbolTable); - if (!checkAddr(Data, ec, uintptr_t(SymbolTable), - Header->NumberOfSymbols * sizeof(coff_symbol))) + if (hasPEHeader) { + if ((ec = getObject(PE32Header, Data, base() + CurPtr))) return; + if (PE32Header->Magic != 0x10b) { + // We only support PE32. If this is PE32 (not PE32+), the magic byte + // should be 0x10b. If this is not PE32, continue as if there's no PE + // header in this file. + PE32Header = 0; + } else if (PE32Header->NumberOfRvaAndSize > 0) { + const uint8_t *addr = base() + CurPtr + sizeof(pe32_header); + uint64_t size = sizeof(data_directory) * PE32Header->NumberOfRvaAndSize; + if ((ec = getObject(DataDirectory, Data, addr, size))) + return; + } + CurPtr += COFFHeader->SizeOfOptionalHeader; + } - // Find string table. - StringTable = reinterpret_cast(base()) - + Header->PointerToSymbolTable - + Header->NumberOfSymbols * sizeof(coff_symbol); - if (!checkAddr(Data, ec, uintptr_t(StringTable), sizeof(ulittle32_t))) + if (!COFFHeader->isImportLibrary()) + if ((ec = getObject(SectionTable, Data, base() + CurPtr, + COFFHeader->NumberOfSections * sizeof(coff_section)))) return; - StringTableSize = *reinterpret_cast(StringTable); - if (!checkAddr(Data, ec, uintptr_t(StringTable), StringTableSize)) + // Initialize the pointer to the symbol table. + if (COFFHeader->PointerToSymbolTable != 0) + if ((ec = initSymbolTablePtr())) return; - // Check that the string table is null terminated if has any in it. - if (StringTableSize < 4 - || (StringTableSize > 4 && StringTable[StringTableSize - 1] != 0)) { - ec = object_error::parse_failed; - return; - } - } + + // Initialize the pointer to the beginning of the import table. + if ((ec = initImportTablePtr())) + return; ec = object_error::success; } symbol_iterator COFFObjectFile::begin_symbols() const { DataRefImpl ret; - ret.p = reinterpret_cast(SymbolTable); + ret.p = reinterpret_cast(SymbolTable); return symbol_iterator(SymbolRef(ret, this)); } symbol_iterator COFFObjectFile::end_symbols() const { // The symbol table ends where the string table begins. DataRefImpl ret; - ret.p = reinterpret_cast(StringTable); + ret.p = reinterpret_cast(StringTable); return symbol_iterator(SymbolRef(ret, this)); } @@ -536,16 +562,34 @@ StringRef COFFObjectFile::getLoadName() const { return ""; } +import_directory_iterator COFFObjectFile::import_directory_begin() const { + DataRefImpl Imp; + Imp.p = reinterpret_cast(ImportDirectory); + return import_directory_iterator(ImportDirectoryEntryRef(Imp, this)); +} + +import_directory_iterator COFFObjectFile::import_directory_end() const { + DataRefImpl Imp; + if (ImportDirectory) { + Imp.p = reinterpret_cast( + ImportDirectory + (NumberOfImportDirectory - 1)); + } else { + Imp.p = 0; + } + return import_directory_iterator(ImportDirectoryEntryRef(Imp, this)); +} section_iterator COFFObjectFile::begin_sections() const { DataRefImpl ret; - ret.p = reinterpret_cast(SectionTable); + ret.p = reinterpret_cast(SectionTable); return section_iterator(SectionRef(ret, this)); } section_iterator COFFObjectFile::end_sections() const { DataRefImpl ret; - ret.p = reinterpret_cast(SectionTable + Header->NumberOfSections); + int numSections = COFFHeader->isImportLibrary() + ? 0 : COFFHeader->NumberOfSections; + ret.p = reinterpret_cast(SectionTable + numSections); return section_iterator(SectionRef(ret, this)); } @@ -554,7 +598,7 @@ uint8_t COFFObjectFile::getBytesInAddress() const { } StringRef COFFObjectFile::getFileFormatName() const { - switch(Header->Machine) { + switch(COFFHeader->Machine) { case COFF::IMAGE_FILE_MACHINE_I386: return "COFF-i386"; case COFF::IMAGE_FILE_MACHINE_AMD64: @@ -565,7 +609,7 @@ StringRef COFFObjectFile::getFileFormatName() const { } unsigned COFFObjectFile::getArch() const { - switch(Header->Machine) { + switch(COFFHeader->Machine) { case COFF::IMAGE_FILE_MACHINE_I386: return Triple::x86; case COFF::IMAGE_FILE_MACHINE_AMD64: @@ -575,8 +619,28 @@ unsigned COFFObjectFile::getArch() const { } } +// This method is kept here because lld uses this. As soon as we make +// lld to use getCOFFHeader, this method will be removed. error_code COFFObjectFile::getHeader(const coff_file_header *&Res) const { - Res = Header; + return getCOFFHeader(Res); +} + +error_code COFFObjectFile::getCOFFHeader(const coff_file_header *&Res) const { + Res = COFFHeader; + return object_error::success; +} + +error_code COFFObjectFile::getPE32Header(const pe32_header *&Res) const { + Res = PE32Header; + return object_error::success; +} + +error_code COFFObjectFile::getDataDirectory(uint32_t index, + const data_directory *&Res) const { + // Error if if there's no data directory or the index is out of range. + if (!DataDirectory || index > PE32Header->NumberOfRvaAndSize) + return object_error::parse_failed; + Res = &DataDirectory[index]; return object_error::success; } @@ -587,7 +651,7 @@ error_code COFFObjectFile::getSection(int32_t index, index == COFF::IMAGE_SYM_ABSOLUTE || index == COFF::IMAGE_SYM_DEBUG) Result = NULL; - else if (index > 0 && index <= Header->NumberOfSections) + else if (index > 0 && index <= COFFHeader->NumberOfSections) // We already verified the section table data, so no need to check again. Result = SectionTable + (index - 1); else @@ -608,7 +672,7 @@ error_code COFFObjectFile::getString(uint32_t offset, error_code COFFObjectFile::getSymbol(uint32_t index, const coff_symbol *&Result) const { - if (index < Header->NumberOfSymbols) + if (index < COFFHeader->NumberOfSymbols) Result = SymbolTable + index; else return object_error::parse_failed; @@ -637,19 +701,19 @@ error_code COFFObjectFile::getSymbolName(const coff_symbol *symbol, ArrayRef COFFObjectFile::getSymbolAuxData( const coff_symbol *symbol) const { const uint8_t *aux = NULL; - + if ( symbol->NumberOfAuxSymbols > 0 ) { // AUX data comes immediately after the symbol in COFF aux = reinterpret_cast(symbol + 1); # ifndef NDEBUG // Verify that the aux symbol points to a valid entry in the symbol table. uintptr_t offset = uintptr_t(aux) - uintptr_t(base()); - if (offset < Header->PointerToSymbolTable - || offset >= Header->PointerToSymbolTable - + (Header->NumberOfSymbols * sizeof(coff_symbol))) + if (offset < COFFHeader->PointerToSymbolTable + || offset >= COFFHeader->PointerToSymbolTable + + (COFFHeader->NumberOfSymbols * sizeof(coff_symbol))) report_fatal_error("Aux Symbol data was outside of symbol table."); - assert((offset - Header->PointerToSymbolTable) % sizeof(coff_symbol) + assert((offset - COFFHeader->PointerToSymbolTable) % sizeof(coff_symbol) == 0 && "Aux Symbol data did not point to the beginning of a symbol"); # endif } @@ -712,13 +776,11 @@ error_code COFFObjectFile::getRelocationOffset(DataRefImpl Rel, Res = toRel(Rel)->VirtualAddress; return object_error::success; } -error_code COFFObjectFile::getRelocationSymbol(DataRefImpl Rel, - SymbolRef &Res) const { +symbol_iterator COFFObjectFile::getRelocationSymbol(DataRefImpl Rel) const { const coff_relocation* R = toRel(Rel); DataRefImpl Symb; Symb.p = reinterpret_cast(SymbolTable + R->SymbolTableIndex); - Res = SymbolRef(Symb, this); - return object_error::success; + return symbol_iterator(SymbolRef(Symb, this)); } error_code COFFObjectFile::getRelocationType(DataRefImpl Rel, uint64_t &Res) const { @@ -740,7 +802,6 @@ const coff_relocation *COFFObjectFile::getCOFFRelocation( return toRel(It->getRawDataRefImpl()); } - #define LLVM_COFF_SWITCH_RELOC_TYPE_NAME(enum) \ case COFF::enum: res = #enum; break; @@ -748,7 +809,7 @@ error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel, SmallVectorImpl &Result) const { const coff_relocation *reloc = toRel(Rel); StringRef res; - switch (Header->Machine) { + switch (COFFHeader->Machine) { case COFF::IMAGE_FILE_MACHINE_AMD64: switch (reloc->Type) { LLVM_COFF_SWITCH_RELOC_TYPE_NAME(IMAGE_REL_AMD64_ABSOLUTE); @@ -798,11 +859,6 @@ error_code COFFObjectFile::getRelocationTypeName(DataRefImpl Rel, #undef LLVM_COFF_SWITCH_RELOC_TYPE_NAME -error_code COFFObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel, - int64_t &Res) const { - Res = 0; - return object_error::success; -} error_code COFFObjectFile::getRelocationValueString(DataRefImpl Rel, SmallVectorImpl &Result) const { const coff_relocation *reloc = toRel(Rel); @@ -826,6 +882,52 @@ error_code COFFObjectFile::getLibraryPath(DataRefImpl LibData, report_fatal_error("getLibraryPath not implemented in COFFObjectFile"); } +bool ImportDirectoryEntryRef:: +operator==(const ImportDirectoryEntryRef &Other) const { + return ImportDirectoryPimpl == Other.ImportDirectoryPimpl; +} + +static const import_directory_table_entry *toImportEntry(DataRefImpl Imp) { + return reinterpret_cast(Imp.p); +} + +error_code +ImportDirectoryEntryRef::getNext(ImportDirectoryEntryRef &Result) const { + const import_directory_table_entry *Dir = toImportEntry(ImportDirectoryPimpl); + Dir += 1; + DataRefImpl Next; + Next.p = reinterpret_cast(Dir); + Result = ImportDirectoryEntryRef(Next, OwningObject); + return object_error::success; +} + +error_code ImportDirectoryEntryRef:: +getImportTableEntry(const import_directory_table_entry *&Result) const { + Result = toImportEntry(ImportDirectoryPimpl); + return object_error::success; +} + +error_code ImportDirectoryEntryRef::getName(StringRef &Result) const { + const import_directory_table_entry *Dir = toImportEntry(ImportDirectoryPimpl); + uintptr_t IntPtr = 0; + if (error_code ec = OwningObject->getRvaPtr(Dir->NameRVA, IntPtr)) + return ec; + const char *Ptr = reinterpret_cast(IntPtr); + Result = StringRef(Ptr); + return object_error::success; +} + +error_code ImportDirectoryEntryRef::getImportLookupEntry( + const import_lookup_table_entry32 *&Result) const { + const import_directory_table_entry *Dir = toImportEntry(ImportDirectoryPimpl); + uintptr_t IntPtr = 0; + if (error_code ec = OwningObject->getRvaPtr( + Dir->ImportLookupTableRVA, IntPtr)) + return ec; + Result = reinterpret_cast(IntPtr); + return object_error::success; +} + namespace llvm { ObjectFile *ObjectFile::createCOFFObjectFile(MemoryBuffer *Object) { diff --git a/contrib/llvm/lib/Object/COFFYAML.cpp b/contrib/llvm/lib/Object/COFFYAML.cpp new file mode 100644 index 0000000..e549b4e --- /dev/null +++ b/contrib/llvm/lib/Object/COFFYAML.cpp @@ -0,0 +1,281 @@ +//===- COFFYAML.cpp - COFF YAMLIO implementation --------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of COFF. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/COFFYAML.h" + +#define ECase(X) IO.enumCase(Value, #X, COFF::X); +namespace llvm { + +namespace COFFYAML { +Section::Section() { memset(&Header, 0, sizeof(COFF::section)); } +Symbol::Symbol() { memset(&Header, 0, sizeof(COFF::symbol)); } +Object::Object() { memset(&Header, 0, sizeof(COFF::header)); } +} + +namespace yaml { +void ScalarEnumerationTraits::enumeration( + IO &IO, COFF::MachineTypes &Value) { + ECase(IMAGE_FILE_MACHINE_UNKNOWN); + ECase(IMAGE_FILE_MACHINE_AM33); + ECase(IMAGE_FILE_MACHINE_AMD64); + ECase(IMAGE_FILE_MACHINE_ARM); + ECase(IMAGE_FILE_MACHINE_ARMV7); + ECase(IMAGE_FILE_MACHINE_EBC); + ECase(IMAGE_FILE_MACHINE_I386); + ECase(IMAGE_FILE_MACHINE_IA64); + ECase(IMAGE_FILE_MACHINE_M32R); + ECase(IMAGE_FILE_MACHINE_MIPS16); + ECase(IMAGE_FILE_MACHINE_MIPSFPU); + ECase(IMAGE_FILE_MACHINE_MIPSFPU16); + ECase(IMAGE_FILE_MACHINE_POWERPC); + ECase(IMAGE_FILE_MACHINE_POWERPCFP); + ECase(IMAGE_FILE_MACHINE_R4000); + ECase(IMAGE_FILE_MACHINE_SH3); + ECase(IMAGE_FILE_MACHINE_SH3DSP); + ECase(IMAGE_FILE_MACHINE_SH4); + ECase(IMAGE_FILE_MACHINE_SH5); + ECase(IMAGE_FILE_MACHINE_THUMB); + ECase(IMAGE_FILE_MACHINE_WCEMIPSV2); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, COFF::SymbolBaseType &Value) { + ECase(IMAGE_SYM_TYPE_NULL); + ECase(IMAGE_SYM_TYPE_VOID); + ECase(IMAGE_SYM_TYPE_CHAR); + ECase(IMAGE_SYM_TYPE_SHORT); + ECase(IMAGE_SYM_TYPE_INT); + ECase(IMAGE_SYM_TYPE_LONG); + ECase(IMAGE_SYM_TYPE_FLOAT); + ECase(IMAGE_SYM_TYPE_DOUBLE); + ECase(IMAGE_SYM_TYPE_STRUCT); + ECase(IMAGE_SYM_TYPE_UNION); + ECase(IMAGE_SYM_TYPE_ENUM); + ECase(IMAGE_SYM_TYPE_MOE); + ECase(IMAGE_SYM_TYPE_BYTE); + ECase(IMAGE_SYM_TYPE_WORD); + ECase(IMAGE_SYM_TYPE_UINT); + ECase(IMAGE_SYM_TYPE_DWORD); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, COFF::SymbolStorageClass &Value) { + ECase(IMAGE_SYM_CLASS_END_OF_FUNCTION); + ECase(IMAGE_SYM_CLASS_NULL); + ECase(IMAGE_SYM_CLASS_AUTOMATIC); + ECase(IMAGE_SYM_CLASS_EXTERNAL); + ECase(IMAGE_SYM_CLASS_STATIC); + ECase(IMAGE_SYM_CLASS_REGISTER); + ECase(IMAGE_SYM_CLASS_EXTERNAL_DEF); + ECase(IMAGE_SYM_CLASS_LABEL); + ECase(IMAGE_SYM_CLASS_UNDEFINED_LABEL); + ECase(IMAGE_SYM_CLASS_MEMBER_OF_STRUCT); + ECase(IMAGE_SYM_CLASS_ARGUMENT); + ECase(IMAGE_SYM_CLASS_STRUCT_TAG); + ECase(IMAGE_SYM_CLASS_MEMBER_OF_UNION); + ECase(IMAGE_SYM_CLASS_UNION_TAG); + ECase(IMAGE_SYM_CLASS_TYPE_DEFINITION); + ECase(IMAGE_SYM_CLASS_UNDEFINED_STATIC); + ECase(IMAGE_SYM_CLASS_ENUM_TAG); + ECase(IMAGE_SYM_CLASS_MEMBER_OF_ENUM); + ECase(IMAGE_SYM_CLASS_REGISTER_PARAM); + ECase(IMAGE_SYM_CLASS_BIT_FIELD); + ECase(IMAGE_SYM_CLASS_BLOCK); + ECase(IMAGE_SYM_CLASS_FUNCTION); + ECase(IMAGE_SYM_CLASS_END_OF_STRUCT); + ECase(IMAGE_SYM_CLASS_FILE); + ECase(IMAGE_SYM_CLASS_SECTION); + ECase(IMAGE_SYM_CLASS_WEAK_EXTERNAL); + ECase(IMAGE_SYM_CLASS_CLR_TOKEN); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, COFF::SymbolComplexType &Value) { + ECase(IMAGE_SYM_DTYPE_NULL); + ECase(IMAGE_SYM_DTYPE_POINTER); + ECase(IMAGE_SYM_DTYPE_FUNCTION); + ECase(IMAGE_SYM_DTYPE_ARRAY); +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, COFF::RelocationTypeX86 &Value) { + ECase(IMAGE_REL_I386_ABSOLUTE); + ECase(IMAGE_REL_I386_DIR16); + ECase(IMAGE_REL_I386_REL16); + ECase(IMAGE_REL_I386_DIR32); + ECase(IMAGE_REL_I386_DIR32NB); + ECase(IMAGE_REL_I386_SEG12); + ECase(IMAGE_REL_I386_SECTION); + ECase(IMAGE_REL_I386_SECREL); + ECase(IMAGE_REL_I386_TOKEN); + ECase(IMAGE_REL_I386_SECREL7); + ECase(IMAGE_REL_I386_REL32); + ECase(IMAGE_REL_AMD64_ABSOLUTE); + ECase(IMAGE_REL_AMD64_ADDR64); + ECase(IMAGE_REL_AMD64_ADDR32); + ECase(IMAGE_REL_AMD64_ADDR32NB); + ECase(IMAGE_REL_AMD64_REL32); + ECase(IMAGE_REL_AMD64_REL32_1); + ECase(IMAGE_REL_AMD64_REL32_2); + ECase(IMAGE_REL_AMD64_REL32_3); + ECase(IMAGE_REL_AMD64_REL32_4); + ECase(IMAGE_REL_AMD64_REL32_5); + ECase(IMAGE_REL_AMD64_SECTION); + ECase(IMAGE_REL_AMD64_SECREL); + ECase(IMAGE_REL_AMD64_SECREL7); + ECase(IMAGE_REL_AMD64_TOKEN); + ECase(IMAGE_REL_AMD64_SREL32); + ECase(IMAGE_REL_AMD64_PAIR); + ECase(IMAGE_REL_AMD64_SSPAN32); +} +#undef ECase + +#define BCase(X) IO.bitSetCase(Value, #X, COFF::X); +void ScalarBitSetTraits::bitset( + IO &IO, COFF::Characteristics &Value) { + BCase(IMAGE_FILE_RELOCS_STRIPPED); + BCase(IMAGE_FILE_EXECUTABLE_IMAGE); + BCase(IMAGE_FILE_LINE_NUMS_STRIPPED); + BCase(IMAGE_FILE_LOCAL_SYMS_STRIPPED); + BCase(IMAGE_FILE_AGGRESSIVE_WS_TRIM); + BCase(IMAGE_FILE_LARGE_ADDRESS_AWARE); + BCase(IMAGE_FILE_BYTES_REVERSED_LO); + BCase(IMAGE_FILE_32BIT_MACHINE); + BCase(IMAGE_FILE_DEBUG_STRIPPED); + BCase(IMAGE_FILE_REMOVABLE_RUN_FROM_SWAP); + BCase(IMAGE_FILE_NET_RUN_FROM_SWAP); + BCase(IMAGE_FILE_SYSTEM); + BCase(IMAGE_FILE_DLL); + BCase(IMAGE_FILE_UP_SYSTEM_ONLY); + BCase(IMAGE_FILE_BYTES_REVERSED_HI); +} + +void ScalarBitSetTraits::bitset( + IO &IO, COFF::SectionCharacteristics &Value) { + BCase(IMAGE_SCN_TYPE_NO_PAD); + BCase(IMAGE_SCN_CNT_CODE); + BCase(IMAGE_SCN_CNT_INITIALIZED_DATA); + BCase(IMAGE_SCN_CNT_UNINITIALIZED_DATA); + BCase(IMAGE_SCN_LNK_OTHER); + BCase(IMAGE_SCN_LNK_INFO); + BCase(IMAGE_SCN_LNK_REMOVE); + BCase(IMAGE_SCN_LNK_COMDAT); + BCase(IMAGE_SCN_GPREL); + BCase(IMAGE_SCN_MEM_PURGEABLE); + BCase(IMAGE_SCN_MEM_16BIT); + BCase(IMAGE_SCN_MEM_LOCKED); + BCase(IMAGE_SCN_MEM_PRELOAD); + BCase(IMAGE_SCN_LNK_NRELOC_OVFL); + BCase(IMAGE_SCN_MEM_DISCARDABLE); + BCase(IMAGE_SCN_MEM_NOT_CACHED); + BCase(IMAGE_SCN_MEM_NOT_PAGED); + BCase(IMAGE_SCN_MEM_SHARED); + BCase(IMAGE_SCN_MEM_EXECUTE); + BCase(IMAGE_SCN_MEM_READ); + BCase(IMAGE_SCN_MEM_WRITE); +} +#undef BCase + +namespace { +struct NSectionCharacteristics { + NSectionCharacteristics(IO &) + : Characteristics(COFF::SectionCharacteristics(0)) {} + NSectionCharacteristics(IO &, uint32_t C) + : Characteristics(COFF::SectionCharacteristics(C)) {} + uint32_t denormalize(IO &) { return Characteristics; } + COFF::SectionCharacteristics Characteristics; +}; + +struct NStorageClass { + NStorageClass(IO &) : StorageClass(COFF::SymbolStorageClass(0)) {} + NStorageClass(IO &, uint8_t S) : StorageClass(COFF::SymbolStorageClass(S)) {} + uint8_t denormalize(IO &) { return StorageClass; } + + COFF::SymbolStorageClass StorageClass; +}; + +struct NMachine { + NMachine(IO &) : Machine(COFF::MachineTypes(0)) {} + NMachine(IO &, uint16_t M) : Machine(COFF::MachineTypes(M)) {} + uint16_t denormalize(IO &) { return Machine; } + COFF::MachineTypes Machine; +}; + +struct NHeaderCharacteristics { + NHeaderCharacteristics(IO &) : Characteristics(COFF::Characteristics(0)) {} + NHeaderCharacteristics(IO &, uint16_t C) + : Characteristics(COFF::Characteristics(C)) {} + uint16_t denormalize(IO &) { return Characteristics; } + + COFF::Characteristics Characteristics; +}; + +struct NType { + NType(IO &) : Type(COFF::RelocationTypeX86(0)) {} + NType(IO &, uint16_t T) : Type(COFF::RelocationTypeX86(T)) {} + uint16_t denormalize(IO &) { return Type; } + COFF::RelocationTypeX86 Type; +}; + +} + +void MappingTraits::mapping(IO &IO, + COFFYAML::Relocation &Rel) { + MappingNormalization NT(IO, Rel.Type); + + IO.mapRequired("VirtualAddress", Rel.VirtualAddress); + IO.mapRequired("SymbolName", Rel.SymbolName); + IO.mapRequired("Type", NT->Type); +} + +void MappingTraits::mapping(IO &IO, COFF::header &H) { + MappingNormalization NM(IO, H.Machine); + MappingNormalization NC(IO, + H.Characteristics); + + IO.mapRequired("Machine", NM->Machine); + IO.mapOptional("Characteristics", NC->Characteristics); +} + +void MappingTraits::mapping(IO &IO, COFFYAML::Symbol &S) { + MappingNormalization NS(IO, S.Header.StorageClass); + + IO.mapRequired("Name", S.Name); + IO.mapRequired("Value", S.Header.Value); + IO.mapRequired("SectionNumber", S.Header.SectionNumber); + IO.mapRequired("SimpleType", S.SimpleType); + IO.mapRequired("ComplexType", S.ComplexType); + IO.mapRequired("StorageClass", NS->StorageClass); + IO.mapOptional("NumberOfAuxSymbols", S.Header.NumberOfAuxSymbols, + (uint8_t) 0); + IO.mapOptional("AuxiliaryData", S.AuxiliaryData, object::yaml::BinaryRef()); +} + +void MappingTraits::mapping(IO &IO, COFFYAML::Section &Sec) { + MappingNormalization NC( + IO, Sec.Header.Characteristics); + IO.mapRequired("Name", Sec.Name); + IO.mapRequired("Characteristics", NC->Characteristics); + IO.mapOptional("Alignment", Sec.Alignment); + IO.mapRequired("SectionData", Sec.SectionData); + IO.mapOptional("Relocations", Sec.Relocations); +} + +void MappingTraits::mapping(IO &IO, COFFYAML::Object &Obj) { + IO.mapRequired("header", Obj.Header); + IO.mapRequired("sections", Obj.Sections); + IO.mapRequired("symbols", Obj.Symbols); +} + +} +} diff --git a/contrib/llvm/lib/Object/ELF.cpp b/contrib/llvm/lib/Object/ELF.cpp new file mode 100644 index 0000000..7c80d41 --- /dev/null +++ b/contrib/llvm/lib/Object/ELF.cpp @@ -0,0 +1,714 @@ +//===- ELF.cpp - ELF object file implementation -----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ELF.h" + +namespace llvm { +namespace object { + +#define LLVM_ELF_SWITCH_RELOC_TYPE_NAME(enum) \ + case ELF::enum: \ + return #enum; \ + +StringRef getELFRelocationTypeName(uint32_t Machine, uint32_t Type) { + switch (Machine) { + case ELF::EM_X86_64: + switch (Type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_JUMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_RELATIVE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_32S); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPMOD64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSGD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSLD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_DTPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTTPOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PC64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOT64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPCREL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPLT64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_PLTOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_SIZE64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_GOTPC32_TLSDESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_TLSDESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_X86_64_IRELATIVE); + default: + break; + } + break; + case ELF::EM_386: + switch (Type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PLT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_JUMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_RELATIVE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_GOTPC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_32PLT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTIE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_PC8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_PUSH); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GD_POP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_PUSH); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDM_POP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LDO_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_IE_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_LE_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPMOD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DTPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_TPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_GOTDESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_TLS_DESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_386_IRELATIVE); + default: + break; + } + break; + case ELF::EM_MIPS: + switch (Type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_26); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_LITERAL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PC16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GPREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT5); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SHIFT6); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_DISP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_PAGE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_OFST); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GOT_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SUB); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_A); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_INSERT_B); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_DELETE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHER); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_HIGHEST); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_CALL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_SCN_DISP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_REL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_ADD_IMMEDIATE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_PJUMP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_RELGOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JALR); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPMOD64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_LDM); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_DTPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_GOTTPREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_TLS_TPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_JUMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_26_S1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_GOT16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_PC16_S1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_CALL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_GOT_DISP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_GOT_PAGE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_GOT_OFST); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_DTPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_DTPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_TPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MICROMIPS_TLS_TPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_MIPS_NUM); + default: + break; + } + break; + case ELF::EM_AARCH64: + switch (Type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ABS16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_PREL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G1_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G2_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_UABS_G3); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_MOVW_SABS_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD_PREL_LO19); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_LO21); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_PREL_PG_HI21); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADD_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST8_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TSTBR14); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CONDBR19); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_JUMP26); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_CALL26); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST16_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST32_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST64_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LDST128_ABS_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_ADR_GOT_PAGE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_LD64_GOT_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G1_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_MOVW_DTPREL_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_HI12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_ADD_DTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST8_DTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST16_DTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST32_DTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLD_LDST64_DTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_MOVW_GOTTPREL_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_ADR_GOTTPREL_PAGE21); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD64_GOTTPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSIE_LD_GOTTPREL_PREL19); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G1_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_MOVW_TPREL_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_HI12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_ADD_TPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST8_TPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST16_TPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST32_TPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSLE_LDST64_TPREL_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADR_PAGE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_LD64_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_ADD_LO12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_AARCH64_TLSDESC_CALL); + default: + break; + } + break; + case ELF::EM_ARM: + switch (Type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PC24); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_REL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_PC_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_ABS5); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_SBREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_PC8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_BREL_ADJ); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_SWI8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_XPC25); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_XPC22); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DTPMOD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DTPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_TPOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_JUMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_RELATIVE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOTOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_BASE_PREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_BREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PLT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_JUMP24); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP24); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_BASE_ABS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PCREL_7_0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PCREL_15_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PCREL_23_15); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SBREL_11_0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SBREL_19_12_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SBREL_27_20_CK); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TARGET1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_SBREL31); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_V4BX); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TARGET2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PREL31); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_ABS_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVT_ABS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_PREL_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVT_PREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_ABS_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVT_ABS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_PREL_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVT_PREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP19); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP6); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_ALU_PREL_11_0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_PC12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ABS32_NOI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_REL32_NOI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G1_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_PC_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_PC_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_PC_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_PC_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_PC_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_PC_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_PC_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_PC_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_PC_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G0_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G1_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ALU_SB_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SB_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SB_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDR_SB_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_SB_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_SB_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDRS_SB_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_SB_G0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_SB_G1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_LDC_SB_G2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_BREL_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVT_BREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_MOVW_BREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_BREL_NC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVT_BREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_MOVW_BREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_GOTDESC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_DESCSEQ); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_CALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PLT32_ABS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_ABS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_PREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOT_BREL12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOTOFF12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GOTRELAX); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GNU_VTENTRY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_GNU_VTINHERIT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP11); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_JUMP8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_GD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LDM32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LDO32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_IE32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LE32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LDO12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_LE12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_TLS_IE12GP); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_3); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_4); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_5); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_6); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_7); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_9); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_10); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_11); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_13); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_14); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_PRIVATE_15); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_ME_TOO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_DESCSEQ16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_ARM_THM_TLS_DESCSEQ32); + default: + break; + } + break; + case ELF::EM_HEXAGON: + switch (Type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B22_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B15_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B7_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_0); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_1); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_2); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GPREL16_3); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_HL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B13_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B9_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B32_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B22_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B15_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B13_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B9_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_B7_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_12_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_10_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_9_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_8_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_7_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_32_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_JMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_RELATIVE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_PLT_B22_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPMOD_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_PLT_B22_PCREL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_LO16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_HI16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_6_PCREL_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOTREL_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GOT_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_DTPREL_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_GD_GOT_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_IE_GOT_11_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_32_6_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_16_X); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_HEX_TPREL_11_X); + default: + break; + } + break; + case ELF::EM_PPC: + switch (Type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR24); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14_BRTAKEN); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_ADDR14_BRNTAKEN); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL24); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14_BRTAKEN); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL14_BRNTAKEN); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TLS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPMOD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TPREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPREL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPREL16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPREL16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPREL16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_DTPREL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSGD16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSGD16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSGD16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSGD16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSLD16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSLD16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSLD16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TLSLD16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TPREL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TPREL16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TPREL16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_TPREL16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_DTPREL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_DTPREL16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_DTPREL16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_GOT_DTPREL16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TLSGD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_TLSLD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC_REL16_HA); + default: + break; + } + break; + case ELF::EM_PPC64: + switch (Type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR24); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR14); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR14_BRTAKEN); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR14_BRNTAKEN); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL24); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL14); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL14_BRTAKEN); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL14_BRNTAKEN); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHER); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHERA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHEST); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_HIGHESTA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_ADDR16_LO_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT16_LO_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TOC16_LO_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPMOD64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSGD16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TLSLD16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_LO_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_TPREL16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_DTPREL16_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_DTPREL16_LO_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_DTPREL16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_GOT_DTPREL16_HA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_LO_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HIGHER); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HIGHERA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HIGHEST); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TPREL16_HIGHESTA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_LO_DS); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HIGHER); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HIGHERA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HIGHEST); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_DTPREL16_HIGHESTA); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLSGD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_TLSLD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL16_LO); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL16_HI); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_PPC64_REL16_HA); + default: + break; + } + break; + case ELF::EM_S390: + switch (Type) { + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_NONE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_8); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_COPY); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GLOB_DAT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_JMP_SLOT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_RELATIVE); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPC); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC16DBL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT16DBL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC32DBL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT32DBL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPCDBL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PC64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLT64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTENT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTOFF16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLTENT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLTOFF16); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLTOFF32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_PLTOFF64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LOAD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GDCALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDCALL); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GD32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GD64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE12); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDM32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDM64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_IE32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_IE64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_IEENT); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LE32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LE64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDO32); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_LDO64); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_DTPMOD); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_DTPOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_TPOFF); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_20); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOT20); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_GOTPLT20); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_TLS_GOTIE20); + LLVM_ELF_SWITCH_RELOC_TYPE_NAME(R_390_IRELATIVE); + default: + break; + } + break; + default: + break; + } + return "Unknown"; +} + +#undef LLVM_ELF_SWITCH_RELOC_TYPE_NAME + +} // end namespace object +} // end namespace llvm diff --git a/contrib/llvm/lib/Object/ELFObjectFile.cpp b/contrib/llvm/lib/Object/ELFObjectFile.cpp index cfe0eb4..15bc6be 100644 --- a/contrib/llvm/lib/Object/ELFObjectFile.cpp +++ b/contrib/llvm/lib/Object/ELFObjectFile.cpp @@ -11,11 +11,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Object/ELF.h" +#include "llvm/Object/ELFObjectFile.h" #include "llvm/Support/MathExtras.h" namespace llvm { - using namespace object; // Creates an in-memory object-file by default: createELFObjectFile(Buffer) @@ -24,7 +23,7 @@ ObjectFile *ObjectFile::createELFObjectFile(MemoryBuffer *Object) { error_code ec; std::size_t MaxAlignment = - 1ULL << CountTrailingZeros_64(uintptr_t(Object->getBufferStart())); + 1ULL << countTrailingZeros(uintptr_t(Object->getBufferStart())); if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) #if !LLVM_IS_UNALIGNED_ACCESS_FAST diff --git a/contrib/llvm/lib/Object/ELFYAML.cpp b/contrib/llvm/lib/Object/ELFYAML.cpp new file mode 100644 index 0000000..2f35cf9 --- /dev/null +++ b/contrib/llvm/lib/Object/ELFYAML.cpp @@ -0,0 +1,338 @@ +//===- ELFYAML.cpp - ELF YAMLIO implementation ----------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines classes for handling the YAML representation of ELF. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/ELFYAML.h" + +namespace llvm { +namespace yaml { + +void +ScalarEnumerationTraits::enumeration(IO &IO, + ELFYAML::ELF_ET &Value) { +#define ECase(X) IO.enumCase(Value, #X, ELF::X); + ECase(ET_NONE) + ECase(ET_REL) + ECase(ET_EXEC) + ECase(ET_DYN) + ECase(ET_CORE) +#undef ECase +} + +void +ScalarEnumerationTraits::enumeration(IO &IO, + ELFYAML::ELF_EM &Value) { +#define ECase(X) IO.enumCase(Value, #X, ELF::X); + ECase(EM_NONE) + ECase(EM_M32) + ECase(EM_SPARC) + ECase(EM_386) + ECase(EM_68K) + ECase(EM_88K) + ECase(EM_486) + ECase(EM_860) + ECase(EM_MIPS) + ECase(EM_S370) + ECase(EM_MIPS_RS3_LE) + ECase(EM_PARISC) + ECase(EM_VPP500) + ECase(EM_SPARC32PLUS) + ECase(EM_960) + ECase(EM_PPC) + ECase(EM_PPC64) + ECase(EM_S390) + ECase(EM_SPU) + ECase(EM_V800) + ECase(EM_FR20) + ECase(EM_RH32) + ECase(EM_RCE) + ECase(EM_ARM) + ECase(EM_ALPHA) + ECase(EM_SH) + ECase(EM_SPARCV9) + ECase(EM_TRICORE) + ECase(EM_ARC) + ECase(EM_H8_300) + ECase(EM_H8_300H) + ECase(EM_H8S) + ECase(EM_H8_500) + ECase(EM_IA_64) + ECase(EM_MIPS_X) + ECase(EM_COLDFIRE) + ECase(EM_68HC12) + ECase(EM_MMA) + ECase(EM_PCP) + ECase(EM_NCPU) + ECase(EM_NDR1) + ECase(EM_STARCORE) + ECase(EM_ME16) + ECase(EM_ST100) + ECase(EM_TINYJ) + ECase(EM_X86_64) + ECase(EM_PDSP) + ECase(EM_PDP10) + ECase(EM_PDP11) + ECase(EM_FX66) + ECase(EM_ST9PLUS) + ECase(EM_ST7) + ECase(EM_68HC16) + ECase(EM_68HC11) + ECase(EM_68HC08) + ECase(EM_68HC05) + ECase(EM_SVX) + ECase(EM_ST19) + ECase(EM_VAX) + ECase(EM_CRIS) + ECase(EM_JAVELIN) + ECase(EM_FIREPATH) + ECase(EM_ZSP) + ECase(EM_MMIX) + ECase(EM_HUANY) + ECase(EM_PRISM) + ECase(EM_AVR) + ECase(EM_FR30) + ECase(EM_D10V) + ECase(EM_D30V) + ECase(EM_V850) + ECase(EM_M32R) + ECase(EM_MN10300) + ECase(EM_MN10200) + ECase(EM_PJ) + ECase(EM_OPENRISC) + ECase(EM_ARC_COMPACT) + ECase(EM_XTENSA) + ECase(EM_VIDEOCORE) + ECase(EM_TMM_GPP) + ECase(EM_NS32K) + ECase(EM_TPC) + ECase(EM_SNP1K) + ECase(EM_ST200) + ECase(EM_IP2K) + ECase(EM_MAX) + ECase(EM_CR) + ECase(EM_F2MC16) + ECase(EM_MSP430) + ECase(EM_BLACKFIN) + ECase(EM_SE_C33) + ECase(EM_SEP) + ECase(EM_ARCA) + ECase(EM_UNICORE) + ECase(EM_EXCESS) + ECase(EM_DXP) + ECase(EM_ALTERA_NIOS2) + ECase(EM_CRX) + ECase(EM_XGATE) + ECase(EM_C166) + ECase(EM_M16C) + ECase(EM_DSPIC30F) + ECase(EM_CE) + ECase(EM_M32C) + ECase(EM_TSK3000) + ECase(EM_RS08) + ECase(EM_SHARC) + ECase(EM_ECOG2) + ECase(EM_SCORE7) + ECase(EM_DSP24) + ECase(EM_VIDEOCORE3) + ECase(EM_LATTICEMICO32) + ECase(EM_SE_C17) + ECase(EM_TI_C6000) + ECase(EM_TI_C2000) + ECase(EM_TI_C5500) + ECase(EM_MMDSP_PLUS) + ECase(EM_CYPRESS_M8C) + ECase(EM_R32C) + ECase(EM_TRIMEDIA) + ECase(EM_HEXAGON) + ECase(EM_8051) + ECase(EM_STXP7X) + ECase(EM_NDS32) + ECase(EM_ECOG1) + ECase(EM_ECOG1X) + ECase(EM_MAXQ30) + ECase(EM_XIMO16) + ECase(EM_MANIK) + ECase(EM_CRAYNV2) + ECase(EM_RX) + ECase(EM_METAG) + ECase(EM_MCST_ELBRUS) + ECase(EM_ECOG16) + ECase(EM_CR16) + ECase(EM_ETPU) + ECase(EM_SLE9X) + ECase(EM_L10M) + ECase(EM_K10M) + ECase(EM_AARCH64) + ECase(EM_AVR32) + ECase(EM_STM8) + ECase(EM_TILE64) + ECase(EM_TILEPRO) + ECase(EM_CUDA) + ECase(EM_TILEGX) + ECase(EM_CLOUDSHIELD) + ECase(EM_COREA_1ST) + ECase(EM_COREA_2ND) + ECase(EM_ARC_COMPACT2) + ECase(EM_OPEN8) + ECase(EM_RL78) + ECase(EM_VIDEOCORE5) + ECase(EM_78KOR) + ECase(EM_56800EX) +#undef ECase +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, ELFYAML::ELF_ELFCLASS &Value) { +#define ECase(X) IO.enumCase(Value, #X, ELF::X); + // Since the semantics of ELFCLASSNONE is "invalid", just don't accept it + // here. + ECase(ELFCLASS32) + ECase(ELFCLASS64) +#undef ECase +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, ELFYAML::ELF_ELFDATA &Value) { +#define ECase(X) IO.enumCase(Value, #X, ELF::X); + // Since the semantics of ELFDATANONE is "invalid", just don't accept it + // here. + ECase(ELFDATA2LSB) + ECase(ELFDATA2MSB) +#undef ECase +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, ELFYAML::ELF_ELFOSABI &Value) { +#define ECase(X) IO.enumCase(Value, #X, ELF::X); + ECase(ELFOSABI_NONE) + ECase(ELFOSABI_HPUX) + ECase(ELFOSABI_NETBSD) + ECase(ELFOSABI_GNU) + ECase(ELFOSABI_GNU) + ECase(ELFOSABI_HURD) + ECase(ELFOSABI_SOLARIS) + ECase(ELFOSABI_AIX) + ECase(ELFOSABI_IRIX) + ECase(ELFOSABI_FREEBSD) + ECase(ELFOSABI_TRU64) + ECase(ELFOSABI_MODESTO) + ECase(ELFOSABI_OPENBSD) + ECase(ELFOSABI_OPENVMS) + ECase(ELFOSABI_NSK) + ECase(ELFOSABI_AROS) + ECase(ELFOSABI_FENIXOS) + ECase(ELFOSABI_C6000_ELFABI) + ECase(ELFOSABI_C6000_LINUX) + ECase(ELFOSABI_ARM) + ECase(ELFOSABI_STANDALONE) +#undef ECase +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, ELFYAML::ELF_SHT &Value) { +#define ECase(X) IO.enumCase(Value, #X, ELF::X); + ECase(SHT_NULL) + ECase(SHT_PROGBITS) + // No SHT_SYMTAB. Use the top-level `Symbols` key instead. + // FIXME: Issue a diagnostic with this information. + ECase(SHT_STRTAB) + ECase(SHT_RELA) + ECase(SHT_HASH) + ECase(SHT_DYNAMIC) + ECase(SHT_NOTE) + ECase(SHT_NOBITS) + ECase(SHT_REL) + ECase(SHT_SHLIB) + ECase(SHT_DYNSYM) + ECase(SHT_INIT_ARRAY) + ECase(SHT_FINI_ARRAY) + ECase(SHT_PREINIT_ARRAY) + ECase(SHT_GROUP) + ECase(SHT_SYMTAB_SHNDX) +#undef ECase +} + +void ScalarBitSetTraits::bitset(IO &IO, + ELFYAML::ELF_SHF &Value) { +#define BCase(X) IO.bitSetCase(Value, #X, ELF::X); + BCase(SHF_WRITE) + BCase(SHF_ALLOC) + BCase(SHF_EXCLUDE) + BCase(SHF_EXECINSTR) + BCase(SHF_MERGE) + BCase(SHF_STRINGS) + BCase(SHF_INFO_LINK) + BCase(SHF_LINK_ORDER) + BCase(SHF_OS_NONCONFORMING) + BCase(SHF_GROUP) + BCase(SHF_TLS) +#undef BCase +} + +void ScalarEnumerationTraits::enumeration( + IO &IO, ELFYAML::ELF_STT &Value) { +#define ECase(X) IO.enumCase(Value, #X, ELF::X); + ECase(STT_NOTYPE) + ECase(STT_OBJECT) + ECase(STT_FUNC) + ECase(STT_SECTION) + ECase(STT_FILE) + ECase(STT_COMMON) + ECase(STT_TLS) + ECase(STT_GNU_IFUNC) +#undef ECase +} + +void MappingTraits::mapping(IO &IO, + ELFYAML::FileHeader &FileHdr) { + IO.mapRequired("Class", FileHdr.Class); + IO.mapRequired("Data", FileHdr.Data); + IO.mapOptional("OSABI", FileHdr.OSABI, ELFYAML::ELF_ELFOSABI(0)); + IO.mapRequired("Type", FileHdr.Type); + IO.mapRequired("Machine", FileHdr.Machine); + IO.mapOptional("Entry", FileHdr.Entry, Hex64(0)); +} + +void MappingTraits::mapping(IO &IO, ELFYAML::Symbol &Symbol) { + IO.mapOptional("Name", Symbol.Name, StringRef()); + IO.mapOptional("Type", Symbol.Type, ELFYAML::ELF_STT(0)); + IO.mapOptional("Section", Symbol.Section, StringRef()); + IO.mapOptional("Value", Symbol.Value, Hex64(0)); + IO.mapOptional("Size", Symbol.Size, Hex64(0)); +} + +void MappingTraits::mapping( + IO &IO, ELFYAML::LocalGlobalWeakSymbols &Symbols) { + IO.mapOptional("Local", Symbols.Local); + IO.mapOptional("Global", Symbols.Global); + IO.mapOptional("Weak", Symbols.Weak); +} + +void MappingTraits::mapping(IO &IO, + ELFYAML::Section &Section) { + IO.mapOptional("Name", Section.Name, StringRef()); + IO.mapRequired("Type", Section.Type); + IO.mapOptional("Flags", Section.Flags, ELFYAML::ELF_SHF(0)); + IO.mapOptional("Address", Section.Address, Hex64(0)); + IO.mapOptional("Content", Section.Content); + IO.mapOptional("Link", Section.Link); + IO.mapOptional("AddressAlign", Section.AddressAlign, Hex64(0)); +} + +void MappingTraits::mapping(IO &IO, ELFYAML::Object &Object) { + IO.mapRequired("FileHeader", Object.Header); + IO.mapOptional("Sections", Object.Sections); + IO.mapOptional("Symbols", Object.Symbols); +} + +} // end namespace yaml +} // end namespace llvm diff --git a/contrib/llvm/lib/Object/Error.cpp b/contrib/llvm/lib/Object/Error.cpp index 2594625..47ce38c 100644 --- a/contrib/llvm/lib/Object/Error.cpp +++ b/contrib/llvm/lib/Object/Error.cpp @@ -31,18 +31,20 @@ const char *_object_error_category::name() const { } std::string _object_error_category::message(int ev) const { - switch (ev) { + object_error::Impl E = static_cast(ev); + switch (E) { case object_error::success: return "Success"; + case object_error::arch_not_found: + return "No object file for requested architecture"; case object_error::invalid_file_type: return "The file was not recognized as a valid object file"; case object_error::parse_failed: return "Invalid data was encountered while parsing the file"; case object_error::unexpected_eof: return "The end of the file was unexpectedly encountered"; - default: - llvm_unreachable("An enumerator of object_error does not have a message " - "defined."); } + llvm_unreachable("An enumerator of object_error does not have a message " + "defined."); } error_condition _object_error_category::default_error_condition(int ev) const { diff --git a/contrib/llvm/lib/Object/MachOObjectFile.cpp b/contrib/llvm/lib/Object/MachOObjectFile.cpp index dfd8d3d..d2cb8bd 100644 --- a/contrib/llvm/lib/Object/MachOObjectFile.cpp +++ b/contrib/llvm/lib/Object/MachOObjectFile.cpp @@ -14,11 +14,11 @@ #include "llvm/Object/MachO.h" #include "llvm/ADT/Triple.h" -#include "llvm/Object/MachOFormat.h" #include "llvm/Support/DataExtractor.h" #include "llvm/Support/Format.h" #include "llvm/Support/Host.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/raw_ostream.h" #include #include #include @@ -29,16 +29,16 @@ using namespace object; namespace llvm { namespace object { -struct SymbolTableEntryBase { - uint32_t StringIndex; - uint8_t Type; - uint8_t SectionIndex; - uint16_t Flags; +struct nlist_base { + uint32_t n_strx; + uint8_t n_type; + uint8_t n_sect; + uint16_t n_desc; }; -struct SectionBase { - char Name[16]; - char SegmentName[16]; +struct section_base { + char sectname[16]; + char segname[16]; }; template @@ -50,167 +50,174 @@ template static void SwapStruct(T &Value); template<> -void SwapStruct(macho::RelocationEntry &H) { - SwapValue(H.Word0); - SwapValue(H.Word1); +void SwapStruct(MachO::any_relocation_info &H) { + SwapValue(H.r_word0); + SwapValue(H.r_word1); } template<> -void SwapStruct(macho::LoadCommand &L) { - SwapValue(L.Type); - SwapValue(L.Size); +void SwapStruct(MachO::load_command &L) { + SwapValue(L.cmd); + SwapValue(L.cmdsize); } template<> -void SwapStruct(SymbolTableEntryBase &S) { - SwapValue(S.StringIndex); - SwapValue(S.Flags); +void SwapStruct(nlist_base &S) { + SwapValue(S.n_strx); + SwapValue(S.n_desc); } template<> -void SwapStruct(macho::Section &S) { - SwapValue(S.Address); - SwapValue(S.Size); - SwapValue(S.Offset); - SwapValue(S.Align); - SwapValue(S.RelocationTableOffset); - SwapValue(S.NumRelocationTableEntries); - SwapValue(S.Flags); - SwapValue(S.Reserved1); - SwapValue(S.Reserved2); +void SwapStruct(MachO::section &S) { + SwapValue(S.addr); + SwapValue(S.size); + SwapValue(S.offset); + SwapValue(S.align); + SwapValue(S.reloff); + SwapValue(S.nreloc); + SwapValue(S.flags); + SwapValue(S.reserved1); + SwapValue(S.reserved2); } template<> -void SwapStruct(macho::Section64 &S) { - SwapValue(S.Address); - SwapValue(S.Size); - SwapValue(S.Offset); - SwapValue(S.Align); - SwapValue(S.RelocationTableOffset); - SwapValue(S.NumRelocationTableEntries); - SwapValue(S.Flags); - SwapValue(S.Reserved1); - SwapValue(S.Reserved2); - SwapValue(S.Reserved3); +void SwapStruct(MachO::section_64 &S) { + SwapValue(S.addr); + SwapValue(S.size); + SwapValue(S.offset); + SwapValue(S.align); + SwapValue(S.reloff); + SwapValue(S.nreloc); + SwapValue(S.flags); + SwapValue(S.reserved1); + SwapValue(S.reserved2); + SwapValue(S.reserved3); } template<> -void SwapStruct(macho::SymbolTableEntry &S) { - SwapValue(S.StringIndex); - SwapValue(S.Flags); - SwapValue(S.Value); +void SwapStruct(MachO::nlist &S) { + SwapValue(S.n_strx); + SwapValue(S.n_desc); + SwapValue(S.n_value); } template<> -void SwapStruct(macho::Symbol64TableEntry &S) { - SwapValue(S.StringIndex); - SwapValue(S.Flags); - SwapValue(S.Value); +void SwapStruct(MachO::nlist_64 &S) { + SwapValue(S.n_strx); + SwapValue(S.n_desc); + SwapValue(S.n_value); } template<> -void SwapStruct(macho::Header &H) { - SwapValue(H.Magic); - SwapValue(H.CPUType); - SwapValue(H.CPUSubtype); - SwapValue(H.FileType); - SwapValue(H.NumLoadCommands); - SwapValue(H.SizeOfLoadCommands); - SwapValue(H.Flags); +void SwapStruct(MachO::mach_header &H) { + SwapValue(H.magic); + SwapValue(H.cputype); + SwapValue(H.cpusubtype); + SwapValue(H.filetype); + SwapValue(H.ncmds); + SwapValue(H.sizeofcmds); + SwapValue(H.flags); } template<> -void SwapStruct(macho::Header64Ext &E) { - SwapValue(E.Reserved); +void SwapStruct(MachO::mach_header_64 &H) { + SwapValue(H.magic); + SwapValue(H.cputype); + SwapValue(H.cpusubtype); + SwapValue(H.filetype); + SwapValue(H.ncmds); + SwapValue(H.sizeofcmds); + SwapValue(H.flags); + SwapValue(H.reserved); } template<> -void SwapStruct(macho::SymtabLoadCommand &C) { - SwapValue(C.Type); - SwapValue(C.Size); - SwapValue(C.SymbolTableOffset); - SwapValue(C.NumSymbolTableEntries); - SwapValue(C.StringTableOffset); - SwapValue(C.StringTableSize); +void SwapStruct(MachO::symtab_command &C) { + SwapValue(C.cmd); + SwapValue(C.cmdsize); + SwapValue(C.symoff); + SwapValue(C.nsyms); + SwapValue(C.stroff); + SwapValue(C.strsize); } template<> -void SwapStruct(macho::DysymtabLoadCommand &C) { - SwapValue(C.Type); - SwapValue(C.Size); - SwapValue(C.LocalSymbolsIndex); - SwapValue(C.NumLocalSymbols); - SwapValue(C.ExternalSymbolsIndex); - SwapValue(C.NumExternalSymbols); - SwapValue(C.UndefinedSymbolsIndex); - SwapValue(C.NumUndefinedSymbols); - SwapValue(C.TOCOffset); - SwapValue(C.NumTOCEntries); - SwapValue(C.ModuleTableOffset); - SwapValue(C.NumModuleTableEntries); - SwapValue(C.ReferenceSymbolTableOffset); - SwapValue(C.NumReferencedSymbolTableEntries); - SwapValue(C.IndirectSymbolTableOffset); - SwapValue(C.NumIndirectSymbolTableEntries); - SwapValue(C.ExternalRelocationTableOffset); - SwapValue(C.NumExternalRelocationTableEntries); - SwapValue(C.LocalRelocationTableOffset); - SwapValue(C.NumLocalRelocationTableEntries); +void SwapStruct(MachO::dysymtab_command &C) { + SwapValue(C.cmd); + SwapValue(C.cmdsize); + SwapValue(C.ilocalsym); + SwapValue(C.nlocalsym); + SwapValue(C.iextdefsym); + SwapValue(C.nextdefsym); + SwapValue(C.iundefsym); + SwapValue(C.nundefsym); + SwapValue(C.tocoff); + SwapValue(C.ntoc); + SwapValue(C.modtaboff); + SwapValue(C.nmodtab); + SwapValue(C.extrefsymoff); + SwapValue(C.nextrefsyms); + SwapValue(C.indirectsymoff); + SwapValue(C.nindirectsyms); + SwapValue(C.extreloff); + SwapValue(C.nextrel); + SwapValue(C.locreloff); + SwapValue(C.nlocrel); } template<> -void SwapStruct(macho::LinkeditDataLoadCommand &C) { - SwapValue(C.Type); - SwapValue(C.Size); - SwapValue(C.DataOffset); - SwapValue(C.DataSize); +void SwapStruct(MachO::linkedit_data_command &C) { + SwapValue(C.cmd); + SwapValue(C.cmdsize); + SwapValue(C.dataoff); + SwapValue(C.datasize); } template<> -void SwapStruct(macho::SegmentLoadCommand &C) { - SwapValue(C.Type); - SwapValue(C.Size); - SwapValue(C.VMAddress); - SwapValue(C.VMSize); - SwapValue(C.FileOffset); - SwapValue(C.FileSize); - SwapValue(C.MaxVMProtection); - SwapValue(C.InitialVMProtection); - SwapValue(C.NumSections); - SwapValue(C.Flags); +void SwapStruct(MachO::segment_command &C) { + SwapValue(C.cmd); + SwapValue(C.cmdsize); + SwapValue(C.vmaddr); + SwapValue(C.vmsize); + SwapValue(C.fileoff); + SwapValue(C.filesize); + SwapValue(C.maxprot); + SwapValue(C.initprot); + SwapValue(C.nsects); + SwapValue(C.flags); } template<> -void SwapStruct(macho::Segment64LoadCommand &C) { - SwapValue(C.Type); - SwapValue(C.Size); - SwapValue(C.VMAddress); - SwapValue(C.VMSize); - SwapValue(C.FileOffset); - SwapValue(C.FileSize); - SwapValue(C.MaxVMProtection); - SwapValue(C.InitialVMProtection); - SwapValue(C.NumSections); - SwapValue(C.Flags); +void SwapStruct(MachO::segment_command_64 &C) { + SwapValue(C.cmd); + SwapValue(C.cmdsize); + SwapValue(C.vmaddr); + SwapValue(C.vmsize); + SwapValue(C.fileoff); + SwapValue(C.filesize); + SwapValue(C.maxprot); + SwapValue(C.initprot); + SwapValue(C.nsects); + SwapValue(C.flags); } template<> -void SwapStruct(macho::IndirectSymbolTableEntry &C) { - SwapValue(C.Index); +void SwapStruct(uint32_t &C) { + SwapValue(C); } template<> -void SwapStruct(macho::LinkerOptionsLoadCommand &C) { - SwapValue(C.Type); - SwapValue(C.Size); - SwapValue(C.Count); +void SwapStruct(MachO::linker_options_command &C) { + SwapValue(C.cmd); + SwapValue(C.cmdsize); + SwapValue(C.count); } template<> -void SwapStruct(macho::DataInCodeTableEntry &C) { - SwapValue(C.Offset); - SwapValue(C.Length); - SwapValue(C.Kind); +void SwapStruct(MachO::data_in_code_entry &C) { + SwapValue(C.offset); + SwapValue(C.length); + SwapValue(C.kind); } template @@ -226,11 +233,11 @@ static uint32_t getSegmentLoadCommandNumSections(const MachOObjectFile *O, const MachOObjectFile::LoadCommandInfo &L) { if (O->is64Bit()) { - macho::Segment64LoadCommand S = O->getSegment64LoadCommand(L); - return S.NumSections; + MachO::segment_command_64 S = O->getSegment64LoadCommand(L); + return S.nsects; } - macho::SegmentLoadCommand S = O->getSegmentLoadCommand(L); - return S.NumSections; + MachO::segment_command S = O->getSegmentLoadCommand(L); + return S.nsects; } static const char * @@ -239,10 +246,10 @@ getSectionPtr(const MachOObjectFile *O, MachOObjectFile::LoadCommandInfo L, uintptr_t CommandAddr = reinterpret_cast(L.Ptr); bool Is64 = O->is64Bit(); - unsigned SegmentLoadSize = Is64 ? sizeof(macho::Segment64LoadCommand) : - sizeof(macho::SegmentLoadCommand); - unsigned SectionSize = Is64 ? sizeof(macho::Section64) : - sizeof(macho::Section); + unsigned SegmentLoadSize = Is64 ? sizeof(MachO::segment_command_64) : + sizeof(MachO::segment_command); + unsigned SectionSize = Is64 ? sizeof(MachO::section_64) : + sizeof(MachO::section); uintptr_t SectionAddr = CommandAddr + SegmentLoadSize + Sec * SectionSize; return reinterpret_cast(SectionAddr); @@ -252,10 +259,10 @@ static const char *getPtr(const MachOObjectFile *O, size_t Offset) { return O->getData().substr(Offset, 1).data(); } -static SymbolTableEntryBase +static nlist_base getSymbolTableEntryBase(const MachOObjectFile *O, DataRefImpl DRI) { const char *P = reinterpret_cast(DRI.p); - return getStruct(O, P); + return getStruct(O, P); } static StringRef parseSegmentOrSectionName(const char *P) { @@ -283,11 +290,11 @@ static void advanceTo(T &it, size_t Val) { } static unsigned getCPUType(const MachOObjectFile *O) { - return O->getHeader().CPUType; + return O->getHeader().cputype; } static void printRelocationTargetName(const MachOObjectFile *O, - const macho::RelocationEntry &RE, + const MachO::any_relocation_info &RE, raw_string_ostream &fmt) { bool IsScattered = O->isRelocationScattered(RE); @@ -339,7 +346,7 @@ static void printRelocationTargetName(const MachOObjectFile *O, StringRef S; bool isExtern = O->getPlainRelocationExternal(RE); - uint64_t Val = O->getAnyRelocationAddress(RE); + uint64_t Val = O->getPlainRelocationSymbolNum(RE); if (isExtern) { symbol_iterator SI = O->begin_symbols(); @@ -347,86 +354,92 @@ static void printRelocationTargetName(const MachOObjectFile *O, SI->getName(S); } else { section_iterator SI = O->begin_sections(); - advanceTo(SI, Val); + // Adjust for the fact that sections are 1-indexed. + advanceTo(SI, Val - 1); SI->getName(S); } fmt << S; } -static uint32_t getPlainRelocationAddress(const macho::RelocationEntry &RE) { - return RE.Word0; +static uint32_t +getPlainRelocationAddress(const MachO::any_relocation_info &RE) { + return RE.r_word0; } static unsigned -getScatteredRelocationAddress(const macho::RelocationEntry &RE) { - return RE.Word0 & 0xffffff; +getScatteredRelocationAddress(const MachO::any_relocation_info &RE) { + return RE.r_word0 & 0xffffff; } static bool getPlainRelocationPCRel(const MachOObjectFile *O, - const macho::RelocationEntry &RE) { + const MachO::any_relocation_info &RE) { if (O->isLittleEndian()) - return (RE.Word1 >> 24) & 1; - return (RE.Word1 >> 7) & 1; + return (RE.r_word1 >> 24) & 1; + return (RE.r_word1 >> 7) & 1; } static bool getScatteredRelocationPCRel(const MachOObjectFile *O, - const macho::RelocationEntry &RE) { - return (RE.Word0 >> 30) & 1; + const MachO::any_relocation_info &RE) { + return (RE.r_word0 >> 30) & 1; } static unsigned getPlainRelocationLength(const MachOObjectFile *O, - const macho::RelocationEntry &RE) { + const MachO::any_relocation_info &RE) { if (O->isLittleEndian()) - return (RE.Word1 >> 25) & 3; - return (RE.Word1 >> 5) & 3; + return (RE.r_word1 >> 25) & 3; + return (RE.r_word1 >> 5) & 3; } static unsigned -getScatteredRelocationLength(const macho::RelocationEntry &RE) { - return (RE.Word0 >> 28) & 3; +getScatteredRelocationLength(const MachO::any_relocation_info &RE) { + return (RE.r_word0 >> 28) & 3; } static unsigned getPlainRelocationType(const MachOObjectFile *O, - const macho::RelocationEntry &RE) { + const MachO::any_relocation_info &RE) { if (O->isLittleEndian()) - return RE.Word1 >> 28; - return RE.Word1 & 0xf; + return RE.r_word1 >> 28; + return RE.r_word1 & 0xf; } -static unsigned getScatteredRelocationType(const macho::RelocationEntry &RE) { - return (RE.Word0 >> 24) & 0xf; +static unsigned +getScatteredRelocationType(const MachO::any_relocation_info &RE) { + return (RE.r_word0 >> 24) & 0xf; } static uint32_t getSectionFlags(const MachOObjectFile *O, DataRefImpl Sec) { if (O->is64Bit()) { - macho::Section64 Sect = O->getSection64(Sec); - return Sect.Flags; + MachO::section_64 Sect = O->getSection64(Sec); + return Sect.flags; } - macho::Section Sect = O->getSection(Sec); - return Sect.Flags; + MachO::section Sect = O->getSection(Sec); + return Sect.flags; } MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, bool IsLittleEndian, bool Is64bits, error_code &ec) : ObjectFile(getMachOType(IsLittleEndian, Is64bits), Object), - SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL) { - uint32_t LoadCommandCount = this->getHeader().NumLoadCommands; - macho::LoadCommandType SegmentLoadType = is64Bit() ? - macho::LCT_Segment64 : macho::LCT_Segment; + SymtabLoadCmd(NULL), DysymtabLoadCmd(NULL), DataInCodeLoadCmd(NULL) { + uint32_t LoadCommandCount = this->getHeader().ncmds; + MachO::LoadCommandType SegmentLoadType = is64Bit() ? + MachO::LC_SEGMENT_64 : MachO::LC_SEGMENT; MachOObjectFile::LoadCommandInfo Load = getFirstLoadCommandInfo(); for (unsigned I = 0; ; ++I) { - if (Load.C.Type == macho::LCT_Symtab) { + if (Load.C.cmd == MachO::LC_SYMTAB) { assert(!SymtabLoadCmd && "Multiple symbol tables"); SymtabLoadCmd = Load.Ptr; - } else if (Load.C.Type == macho::LCT_Dysymtab) { + } else if (Load.C.cmd == MachO::LC_DYSYMTAB) { assert(!DysymtabLoadCmd && "Multiple dynamic symbol tables"); DysymtabLoadCmd = Load.Ptr; - } else if (Load.C.Type == SegmentLoadType) { + } else if (Load.C.cmd == MachO::LC_DATA_IN_CODE) { + assert(!DataInCodeLoadCmd && "Multiple data in code tables"); + DataInCodeLoadCmd = Load.Ptr; + } else if (Load.C.cmd == SegmentLoadType) { uint32_t NumSections = getSegmentLoadCommandNumSections(this, Load); for (unsigned J = 0; J < NumSections; ++J) { const char *Sec = getSectionPtr(this, Load, J); @@ -444,8 +457,8 @@ MachOObjectFile::MachOObjectFile(MemoryBuffer *Object, error_code MachOObjectFile::getSymbolNext(DataRefImpl Symb, SymbolRef &Res) const { unsigned SymbolTableEntrySize = is64Bit() ? - sizeof(macho::Symbol64TableEntry) : - sizeof(macho::SymbolTableEntry); + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); Symb.p += SymbolTableEntrySize; Res = SymbolRef(Symb, this); return object_error::success; @@ -454,8 +467,8 @@ error_code MachOObjectFile::getSymbolNext(DataRefImpl Symb, error_code MachOObjectFile::getSymbolName(DataRefImpl Symb, StringRef &Res) const { StringRef StringTable = getStringTableData(); - SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb); - const char *Start = &StringTable.data()[Entry.StringIndex]; + nlist_base Entry = getSymbolTableEntryBase(this, Symb); + const char *Start = &StringTable.data()[Entry.n_strx]; Res = StringRef(Start); return object_error::success; } @@ -463,11 +476,11 @@ error_code MachOObjectFile::getSymbolName(DataRefImpl Symb, error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, uint64_t &Res) const { if (is64Bit()) { - macho::Symbol64TableEntry Entry = getSymbol64TableEntry(Symb); - Res = Entry.Value; + MachO::nlist_64 Entry = getSymbol64TableEntry(Symb); + Res = Entry.n_value; } else { - macho::SymbolTableEntry Entry = getSymbolTableEntry(Symb); - Res = Entry.Value; + MachO::nlist Entry = getSymbolTableEntry(Symb); + Res = Entry.n_value; } return object_error::success; } @@ -475,18 +488,18 @@ error_code MachOObjectFile::getSymbolAddress(DataRefImpl Symb, error_code MachOObjectFile::getSymbolFileOffset(DataRefImpl Symb, uint64_t &Res) const { - SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb); + nlist_base Entry = getSymbolTableEntryBase(this, Symb); getSymbolAddress(Symb, Res); - if (Entry.SectionIndex) { + if (Entry.n_sect) { uint64_t Delta; DataRefImpl SecRel; - SecRel.d.a = Entry.SectionIndex-1; + SecRel.d.a = Entry.n_sect-1; if (is64Bit()) { - macho::Section64 Sec = getSection64(SecRel); - Delta = Sec.Offset - Sec.Address; + MachO::section_64 Sec = getSection64(SecRel); + Delta = Sec.offset - Sec.addr; } else { - macho::Section Sec = getSection(SecRel); - Delta = Sec.Offset - Sec.Address; + MachO::section Sec = getSection(SecRel); + Delta = Sec.offset - Sec.addr; } Res += Delta; @@ -500,8 +513,8 @@ error_code MachOObjectFile::getSymbolAlignment(DataRefImpl DRI, uint32_t flags; this->getSymbolFlags(DRI, flags); if (flags & SymbolRef::SF_Common) { - SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI); - Result = 1 << MachO::GET_COMM_ALIGN(Entry.Flags); + nlist_base Entry = getSymbolTableEntryBase(this, DRI); + Result = 1 << MachO::GET_COMM_ALIGN(Entry.n_desc); } else { Result = 0; } @@ -514,13 +527,13 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, uint64_t EndOffset = 0; uint8_t SectionIndex; - SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI); + nlist_base Entry = getSymbolTableEntryBase(this, DRI); uint64_t Value; getSymbolAddress(DRI, Value); BeginOffset = Value; - SectionIndex = Entry.SectionIndex; + SectionIndex = Entry.n_sect; if (!SectionIndex) { uint32_t flags = SymbolRef::SF_None; this->getSymbolFlags(DRI, flags); @@ -538,7 +551,7 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, DataRefImpl DRI = I->getRawDataRefImpl(); Entry = getSymbolTableEntryBase(this, DRI); getSymbolAddress(DRI, Value); - if (Entry.SectionIndex == SectionIndex && Value > BeginOffset) + if (Entry.n_sect == SectionIndex && Value > BeginOffset) if (!EndOffset || Value < EndOffset) EndOffset = Value; } @@ -556,73 +569,47 @@ error_code MachOObjectFile::getSymbolSize(DataRefImpl DRI, error_code MachOObjectFile::getSymbolType(DataRefImpl Symb, SymbolRef::Type &Res) const { - SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb); - uint8_t n_type = Entry.Type; + nlist_base Entry = getSymbolTableEntryBase(this, Symb); + uint8_t n_type = Entry.n_type; Res = SymbolRef::ST_Other; // If this is a STAB debugging symbol, we can do nothing more. - if (n_type & MachO::NlistMaskStab) { + if (n_type & MachO::N_STAB) { Res = SymbolRef::ST_Debug; return object_error::success; } - switch (n_type & MachO::NlistMaskType) { - case MachO::NListTypeUndefined : + switch (n_type & MachO::N_TYPE) { + case MachO::N_UNDF : Res = SymbolRef::ST_Unknown; break; - case MachO::NListTypeSection : + case MachO::N_SECT : Res = SymbolRef::ST_Function; break; } return object_error::success; } -error_code MachOObjectFile::getSymbolNMTypeChar(DataRefImpl Symb, - char &Res) const { - SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb); - uint8_t Type = Entry.Type; - uint16_t Flags = Entry.Flags; - - char Char; - switch (Type & macho::STF_TypeMask) { - case macho::STT_Undefined: - Char = 'u'; - break; - case macho::STT_Absolute: - case macho::STT_Section: - Char = 's'; - break; - default: - Char = '?'; - break; - } - - if (Flags & (macho::STF_External | macho::STF_PrivateExtern)) - Char = toupper(static_cast(Char)); - Res = Char; - return object_error::success; -} - error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI, uint32_t &Result) const { - SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, DRI); + nlist_base Entry = getSymbolTableEntryBase(this, DRI); - uint8_t MachOType = Entry.Type; - uint16_t MachOFlags = Entry.Flags; + uint8_t MachOType = Entry.n_type; + uint16_t MachOFlags = Entry.n_desc; // TODO: Correctly set SF_ThreadLocal Result = SymbolRef::SF_None; - if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) + if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) Result |= SymbolRef::SF_Undefined; - if (MachOFlags & macho::STF_StabsEntryMask) + if (MachOType & MachO::N_STAB) Result |= SymbolRef::SF_FormatSpecific; - if (MachOType & MachO::NlistMaskExternal) { + if (MachOType & MachO::N_EXT) { Result |= SymbolRef::SF_Global; - if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeUndefined) { + if ((MachOType & MachO::N_TYPE) == MachO::N_UNDF) { uint64_t Value; getSymbolAddress(DRI, Value); if (Value) @@ -630,10 +617,10 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI, } } - if (MachOFlags & (MachO::NListDescWeakRef | MachO::NListDescWeakDef)) + if (MachOFlags & (MachO::N_WEAK_REF | MachO::N_WEAK_DEF)) Result |= SymbolRef::SF_Weak; - if ((MachOType & MachO::NlistMaskType) == MachO::NListTypeAbsolute) + if ((MachOType & MachO::N_TYPE) == MachO::N_ABS) Result |= SymbolRef::SF_Absolute; return object_error::success; @@ -642,8 +629,8 @@ error_code MachOObjectFile::getSymbolFlags(DataRefImpl DRI, error_code MachOObjectFile::getSymbolSection(DataRefImpl Symb, section_iterator &Res) const { - SymbolTableEntryBase Entry = getSymbolTableEntryBase(this, Symb); - uint8_t index = Entry.SectionIndex; + nlist_base Entry = getSymbolTableEntryBase(this, Symb); + uint8_t index = Entry.n_sect; if (index == 0) { Res = end_sections(); @@ -678,11 +665,11 @@ MachOObjectFile::getSectionName(DataRefImpl Sec, StringRef &Result) const { error_code MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const { if (is64Bit()) { - macho::Section64 Sect = getSection64(Sec); - Res = Sect.Address; + MachO::section_64 Sect = getSection64(Sec); + Res = Sect.addr; } else { - macho::Section Sect = getSection(Sec); - Res = Sect.Address; + MachO::section Sect = getSection(Sec); + Res = Sect.addr; } return object_error::success; } @@ -690,11 +677,11 @@ MachOObjectFile::getSectionAddress(DataRefImpl Sec, uint64_t &Res) const { error_code MachOObjectFile::getSectionSize(DataRefImpl Sec, uint64_t &Res) const { if (is64Bit()) { - macho::Section64 Sect = getSection64(Sec); - Res = Sect.Size; + MachO::section_64 Sect = getSection64(Sec); + Res = Sect.size; } else { - macho::Section Sect = getSection(Sec); - Res = Sect.Size; + MachO::section Sect = getSection(Sec); + Res = Sect.size; } return object_error::success; @@ -706,13 +693,13 @@ MachOObjectFile::getSectionContents(DataRefImpl Sec, StringRef &Res) const { uint64_t Size; if (is64Bit()) { - macho::Section64 Sect = getSection64(Sec); - Offset = Sect.Offset; - Size = Sect.Size; + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.offset; + Size = Sect.size; } else { - macho::Section Sect =getSection(Sec); - Offset = Sect.Offset; - Size = Sect.Size; + MachO::section Sect = getSection(Sec); + Offset = Sect.offset; + Size = Sect.size; } Res = this->getData().substr(Offset, Size); @@ -723,11 +710,11 @@ error_code MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const { uint32_t Align; if (is64Bit()) { - macho::Section64 Sect = getSection64(Sec); - Align = Sect.Align; + MachO::section_64 Sect = getSection64(Sec); + Align = Sect.align; } else { - macho::Section Sect = getSection(Sec); - Align = Sect.Align; + MachO::section Sect = getSection(Sec); + Align = Sect.align; } Res = uint64_t(1) << Align; @@ -737,7 +724,7 @@ MachOObjectFile::getSectionAlignment(DataRefImpl Sec, uint64_t &Res) const { error_code MachOObjectFile::isSectionText(DataRefImpl Sec, bool &Res) const { uint32_t Flags = getSectionFlags(this, Sec); - Res = Flags & macho::SF_PureInstructions; + Res = Flags & MachO::S_ATTR_PURE_INSTRUCTIONS; return object_error::success; } @@ -771,9 +758,9 @@ error_code MachOObjectFile::isSectionVirtual(DataRefImpl Sec, error_code MachOObjectFile::isSectionZeroInit(DataRefImpl Sec, bool &Res) const { uint32_t Flags = getSectionFlags(this, Sec); - unsigned SectionType = Flags & MachO::SectionFlagMaskSectionType; - Res = SectionType == MachO::SectionTypeZeroFill || - SectionType == MachO::SectionTypeZeroFillLarge; + unsigned SectionType = Flags & MachO::SECTION_TYPE; + Res = SectionType == MachO::S_ZEROFILL || + SectionType == MachO::S_GB_ZEROFILL; return object_error::success; } @@ -810,14 +797,14 @@ MachOObjectFile::sectionContainsSymbol(DataRefImpl Sec, DataRefImpl Symb, return object_error::success; } -relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const { +relocation_iterator MachOObjectFile::section_rel_begin(DataRefImpl Sec) const { uint32_t Offset; if (is64Bit()) { - macho::Section64 Sect = getSection64(Sec); - Offset = Sect.RelocationTableOffset; + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.reloff; } else { - macho::Section Sect = getSection(Sec); - Offset = Sect.RelocationTableOffset; + MachO::section Sect = getSection(Sec); + Offset = Sect.reloff; } DataRefImpl Ret; @@ -826,21 +813,21 @@ relocation_iterator MachOObjectFile::getSectionRelBegin(DataRefImpl Sec) const { } relocation_iterator -MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const { +MachOObjectFile::section_rel_end(DataRefImpl Sec) const { uint32_t Offset; uint32_t Num; if (is64Bit()) { - macho::Section64 Sect = getSection64(Sec); - Offset = Sect.RelocationTableOffset; - Num = Sect.NumRelocationTableEntries; + MachO::section_64 Sect = getSection64(Sec); + Offset = Sect.reloff; + Num = Sect.nreloc; } else { - macho::Section Sect = getSection(Sec); - Offset = Sect.RelocationTableOffset; - Num = Sect.NumRelocationTableEntries; + MachO::section Sect = getSection(Sec); + Offset = Sect.reloff; + Num = Sect.nreloc; } - const macho::RelocationEntry *P = - reinterpret_cast(getPtr(this, Offset)); + const MachO::any_relocation_info *P = + reinterpret_cast(getPtr(this, Offset)); DataRefImpl Ret; Ret.p = reinterpret_cast(P + Num); @@ -849,8 +836,8 @@ MachOObjectFile::getSectionRelEnd(DataRefImpl Sec) const { error_code MachOObjectFile::getRelocationNext(DataRefImpl Rel, RelocationRef &Res) const { - const macho::RelocationEntry *P = - reinterpret_cast(Rel.p); + const MachO::any_relocation_info *P = + reinterpret_cast(Rel.p); Rel.p = reinterpret_cast(P + 1); Res = RelocationRef(Rel, this); return object_error::success; @@ -863,35 +850,32 @@ MachOObjectFile::getRelocationAddress(DataRefImpl Rel, uint64_t &Res) const { error_code MachOObjectFile::getRelocationOffset(DataRefImpl Rel, uint64_t &Res) const { - macho::RelocationEntry RE = getRelocation(Rel); + MachO::any_relocation_info RE = getRelocation(Rel); Res = getAnyRelocationAddress(RE); return object_error::success; } -error_code -MachOObjectFile::getRelocationSymbol(DataRefImpl Rel, SymbolRef &Res) const { - macho::RelocationEntry RE = getRelocation(Rel); +symbol_iterator +MachOObjectFile::getRelocationSymbol(DataRefImpl Rel) const { + MachO::any_relocation_info RE = getRelocation(Rel); uint32_t SymbolIdx = getPlainRelocationSymbolNum(RE); bool isExtern = getPlainRelocationExternal(RE); - if (!isExtern) { - Res = *end_symbols(); - return object_error::success; - } + if (!isExtern) + return end_symbols(); - macho::SymtabLoadCommand S = getSymtabLoadCommand(); + MachO::symtab_command S = getSymtabLoadCommand(); unsigned SymbolTableEntrySize = is64Bit() ? - sizeof(macho::Symbol64TableEntry) : - sizeof(macho::SymbolTableEntry); - uint64_t Offset = S.SymbolTableOffset + SymbolIdx * SymbolTableEntrySize; + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); + uint64_t Offset = S.symoff + SymbolIdx * SymbolTableEntrySize; DataRefImpl Sym; Sym.p = reinterpret_cast(getPtr(this, Offset)); - Res = SymbolRef(Sym, this); - return object_error::success; + return symbol_iterator(SymbolRef(Sym, this)); } error_code MachOObjectFile::getRelocationType(DataRefImpl Rel, uint64_t &Res) const { - macho::RelocationEntry RE = getRelocation(Rel); + MachO::any_relocation_info RE = getRelocation(Rel); Res = getAnyRelocationType(RE); return object_error::success; } @@ -989,16 +973,10 @@ MachOObjectFile::getRelocationTypeName(DataRefImpl Rel, return object_error::success; } -error_code MachOObjectFile::getRelocationAdditionalInfo(DataRefImpl Rel, - int64_t &Res) const { - Res = 0; - return object_error::success; -} - error_code MachOObjectFile::getRelocationValueString(DataRefImpl Rel, SmallVectorImpl &Result) const { - macho::RelocationEntry RE = getRelocation(Rel); + MachO::any_relocation_info RE = getRelocation(Rel); unsigned Arch = this->getArch(); @@ -1015,47 +993,47 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, bool isPCRel = getAnyRelocationPCRel(RE); switch (Type) { - case macho::RIT_X86_64_GOTLoad: // X86_64_RELOC_GOT_LOAD - case macho::RIT_X86_64_GOT: { // X86_64_RELOC_GOT + case MachO::X86_64_RELOC_GOT_LOAD: + case MachO::X86_64_RELOC_GOT: { printRelocationTargetName(this, RE, fmt); fmt << "@GOT"; if (isPCRel) fmt << "PCREL"; break; } - case macho::RIT_X86_64_Subtractor: { // X86_64_RELOC_SUBTRACTOR + case MachO::X86_64_RELOC_SUBTRACTOR: { DataRefImpl RelNext = Rel; RelNext.d.a++; - macho::RelocationEntry RENext = getRelocation(RelNext); + MachO::any_relocation_info RENext = getRelocation(RelNext); - // X86_64_SUBTRACTOR must be followed by a relocation of type + // X86_64_RELOC_SUBTRACTOR must be followed by a relocation of type // X86_64_RELOC_UNSIGNED. // NOTE: Scattered relocations don't exist on x86_64. unsigned RType = getAnyRelocationType(RENext); - if (RType != 0) + if (RType != MachO::X86_64_RELOC_UNSIGNED) report_fatal_error("Expected X86_64_RELOC_UNSIGNED after " "X86_64_RELOC_SUBTRACTOR."); - // The X86_64_RELOC_UNSIGNED contains the minuend symbol, - // X86_64_SUBTRACTOR contains to the subtrahend. + // The X86_64_RELOC_UNSIGNED contains the minuend symbol; + // X86_64_RELOC_SUBTRACTOR contains the subtrahend. printRelocationTargetName(this, RENext, fmt); fmt << "-"; printRelocationTargetName(this, RE, fmt); break; } - case macho::RIT_X86_64_TLV: + case MachO::X86_64_RELOC_TLV: printRelocationTargetName(this, RE, fmt); fmt << "@TLV"; if (isPCRel) fmt << "P"; break; - case macho::RIT_X86_64_Signed1: // X86_64_RELOC_SIGNED1 + case MachO::X86_64_RELOC_SIGNED_1: printRelocationTargetName(this, RE, fmt); fmt << "-1"; break; - case macho::RIT_X86_64_Signed2: // X86_64_RELOC_SIGNED2 + case MachO::X86_64_RELOC_SIGNED_2: printRelocationTargetName(this, RE, fmt); fmt << "-2"; break; - case macho::RIT_X86_64_Signed4: // X86_64_RELOC_SIGNED4 + case MachO::X86_64_RELOC_SIGNED_4: printRelocationTargetName(this, RE, fmt); fmt << "-4"; break; @@ -1064,21 +1042,22 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, break; } // X86 and ARM share some relocation types in common. - } else if (Arch == Triple::x86 || Arch == Triple::arm) { + } else if (Arch == Triple::x86 || Arch == Triple::arm || + Arch == Triple::ppc) { // Generic relocation types... switch (Type) { - case macho::RIT_Pair: // GENERIC_RELOC_PAIR - prints no info + case MachO::GENERIC_RELOC_PAIR: // prints no info return object_error::success; - case macho::RIT_Difference: { // GENERIC_RELOC_SECTDIFF + case MachO::GENERIC_RELOC_SECTDIFF: { DataRefImpl RelNext = Rel; RelNext.d.a++; - macho::RelocationEntry RENext = getRelocation(RelNext); + MachO::any_relocation_info RENext = getRelocation(RelNext); // X86 sect diff's must be followed by a relocation of type // GENERIC_RELOC_PAIR. unsigned RType = getAnyRelocationType(RENext); - if (RType != 1) + if (RType != MachO::GENERIC_RELOC_PAIR) report_fatal_error("Expected GENERIC_RELOC_PAIR after " "GENERIC_RELOC_SECTDIFF."); @@ -1089,19 +1068,17 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, } } - if (Arch == Triple::x86) { - // All X86 relocations that need special printing were already - // handled in the generic code. + if (Arch == Triple::x86 || Arch == Triple::ppc) { switch (Type) { - case macho::RIT_Generic_LocalDifference:{// GENERIC_RELOC_LOCAL_SECTDIFF + case MachO::GENERIC_RELOC_LOCAL_SECTDIFF: { DataRefImpl RelNext = Rel; RelNext.d.a++; - macho::RelocationEntry RENext = getRelocation(RelNext); + MachO::any_relocation_info RENext = getRelocation(RelNext); // X86 sect diff's must be followed by a relocation of type // GENERIC_RELOC_PAIR. unsigned RType = getAnyRelocationType(RENext); - if (RType != 1) + if (RType != MachO::GENERIC_RELOC_PAIR) report_fatal_error("Expected GENERIC_RELOC_PAIR after " "GENERIC_RELOC_LOCAL_SECTDIFF."); @@ -1110,7 +1087,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, printRelocationTargetName(this, RENext, fmt); break; } - case macho::RIT_Generic_TLV: { + case MachO::GENERIC_RELOC_TLV: { printRelocationTargetName(this, RE, fmt); fmt << "@TLV"; if (IsPCRel) fmt << "P"; @@ -1121,8 +1098,8 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, } } else { // ARM-specific relocations switch (Type) { - case macho::RIT_ARM_Half: // ARM_RELOC_HALF - case macho::RIT_ARM_HalfDifference: { // ARM_RELOC_HALF_SECTDIFF + case MachO::ARM_RELOC_HALF: + case MachO::ARM_RELOC_HALF_SECTDIFF: { // Half relocations steal a bit from the length field to encode // whether this is an upper16 or a lower16 relocation. bool isUpper = getAnyRelocationLength(RE) >> 1; @@ -1135,14 +1112,14 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, DataRefImpl RelNext = Rel; RelNext.d.a++; - macho::RelocationEntry RENext = getRelocation(RelNext); + MachO::any_relocation_info RENext = getRelocation(RelNext); // ARM half relocs must be followed by a relocation of type // ARM_RELOC_PAIR. unsigned RType = getAnyRelocationType(RENext); - if (RType != 1) + if (RType != MachO::ARM_RELOC_PAIR) report_fatal_error("Expected ARM_RELOC_PAIR after " - "GENERIC_RELOC_HALF"); + "ARM_RELOC_HALF"); // NOTE: The half of the target virtual address is stashed in the // address field of the secondary relocation, but we can't reverse @@ -1151,7 +1128,7 @@ MachOObjectFile::getRelocationValueString(DataRefImpl Rel, // ARM_RELOC_HALF_SECTDIFF encodes the second section in the // symbol/section pointer of the follow-on relocation. - if (Type == macho::RIT_ARM_HalfDifference) { + if (Type == MachO::ARM_RELOC_HALF_SECTDIFF) { fmt << "-"; printRelocationTargetName(this, RENext, fmt); } @@ -1182,17 +1159,17 @@ MachOObjectFile::getRelocationHidden(DataRefImpl Rel, bool &Result) const { // On arches that use the generic relocations, GENERIC_RELOC_PAIR // is always hidden. - if (Arch == Triple::x86 || Arch == Triple::arm) { - if (Type == macho::RIT_Pair) Result = true; + if (Arch == Triple::x86 || Arch == Triple::arm || Arch == Triple::ppc) { + if (Type == MachO::GENERIC_RELOC_PAIR) Result = true; } else if (Arch == Triple::x86_64) { // On x86_64, X86_64_RELOC_UNSIGNED is hidden only when it follows - // an X864_64_RELOC_SUBTRACTOR. - if (Type == macho::RIT_X86_64_Unsigned && Rel.d.a > 0) { + // an X86_64_RELOC_SUBTRACTOR. + if (Type == MachO::X86_64_RELOC_UNSIGNED && Rel.d.a > 0) { DataRefImpl RelPrev = Rel; RelPrev.d.a--; uint64_t PrevType; getRelocationType(RelPrev, PrevType); - if (PrevType == macho::RIT_X86_64_Subtractor) + if (PrevType == MachO::X86_64_RELOC_SUBTRACTOR) Result = true; } } @@ -1215,8 +1192,8 @@ symbol_iterator MachOObjectFile::begin_symbols() const { if (!SymtabLoadCmd) return symbol_iterator(SymbolRef(DRI, this)); - macho::SymtabLoadCommand Symtab = getSymtabLoadCommand(); - DRI.p = reinterpret_cast(getPtr(this, Symtab.SymbolTableOffset)); + MachO::symtab_command Symtab = getSymtabLoadCommand(); + DRI.p = reinterpret_cast(getPtr(this, Symtab.symoff)); return symbol_iterator(SymbolRef(DRI, this)); } @@ -1225,12 +1202,12 @@ symbol_iterator MachOObjectFile::end_symbols() const { if (!SymtabLoadCmd) return symbol_iterator(SymbolRef(DRI, this)); - macho::SymtabLoadCommand Symtab = getSymtabLoadCommand(); + MachO::symtab_command Symtab = getSymtabLoadCommand(); unsigned SymbolTableEntrySize = is64Bit() ? - sizeof(macho::Symbol64TableEntry) : - sizeof(macho::SymbolTableEntry); - unsigned Offset = Symtab.SymbolTableOffset + - Symtab.NumSymbolTableEntries * SymbolTableEntrySize; + sizeof(MachO::nlist_64) : + sizeof(MachO::nlist); + unsigned Offset = Symtab.symoff + + Symtab.nsyms * SymbolTableEntrySize; DRI.p = reinterpret_cast(getPtr(this, Offset)); return symbol_iterator(SymbolRef(DRI, this)); } @@ -1274,66 +1251,91 @@ StringRef MachOObjectFile::getFileFormatName() const { unsigned CPUType = getCPUType(this); if (!is64Bit()) { switch (CPUType) { - case llvm::MachO::CPUTypeI386: + case llvm::MachO::CPU_TYPE_I386: return "Mach-O 32-bit i386"; - case llvm::MachO::CPUTypeARM: + case llvm::MachO::CPU_TYPE_ARM: return "Mach-O arm"; - case llvm::MachO::CPUTypePowerPC: + case llvm::MachO::CPU_TYPE_POWERPC: return "Mach-O 32-bit ppc"; default: - assert((CPUType & llvm::MachO::CPUArchABI64) == 0 && + assert((CPUType & llvm::MachO::CPU_ARCH_ABI64) == 0 && "64-bit object file when we're not 64-bit?"); return "Mach-O 32-bit unknown"; } } // Make sure the cpu type has the correct mask. - assert((CPUType & llvm::MachO::CPUArchABI64) - == llvm::MachO::CPUArchABI64 && - "32-bit object file when we're 64-bit?"); + assert((CPUType & llvm::MachO::CPU_ARCH_ABI64) + == llvm::MachO::CPU_ARCH_ABI64 && + "32-bit object file when we're 64-bit?"); switch (CPUType) { - case llvm::MachO::CPUTypeX86_64: + case llvm::MachO::CPU_TYPE_X86_64: return "Mach-O 64-bit x86-64"; - case llvm::MachO::CPUTypePowerPC64: + case llvm::MachO::CPU_TYPE_POWERPC64: return "Mach-O 64-bit ppc64"; default: return "Mach-O 64-bit unknown"; } } -unsigned MachOObjectFile::getArch() const { - switch (getCPUType(this)) { - case llvm::MachO::CPUTypeI386: +Triple::ArchType MachOObjectFile::getArch(uint32_t CPUType) { + switch (CPUType) { + case llvm::MachO::CPU_TYPE_I386: return Triple::x86; - case llvm::MachO::CPUTypeX86_64: + case llvm::MachO::CPU_TYPE_X86_64: return Triple::x86_64; - case llvm::MachO::CPUTypeARM: + case llvm::MachO::CPU_TYPE_ARM: return Triple::arm; - case llvm::MachO::CPUTypePowerPC: + case llvm::MachO::CPU_TYPE_POWERPC: return Triple::ppc; - case llvm::MachO::CPUTypePowerPC64: + case llvm::MachO::CPU_TYPE_POWERPC64: return Triple::ppc64; default: return Triple::UnknownArch; } } +unsigned MachOObjectFile::getArch() const { + return getArch(getCPUType(this)); +} + StringRef MachOObjectFile::getLoadName() const { // TODO: Implement report_fatal_error("get_load_name() unimplemented in MachOObjectFile"); } -relocation_iterator MachOObjectFile::getSectionRelBegin(unsigned Index) const { +relocation_iterator MachOObjectFile::section_rel_begin(unsigned Index) const { DataRefImpl DRI; DRI.d.a = Index; - return getSectionRelBegin(DRI); + return section_rel_begin(DRI); } -relocation_iterator MachOObjectFile::getSectionRelEnd(unsigned Index) const { +relocation_iterator MachOObjectFile::section_rel_end(unsigned Index) const { DataRefImpl DRI; DRI.d.a = Index; - return getSectionRelEnd(DRI); + return section_rel_end(DRI); +} + +dice_iterator MachOObjectFile::begin_dices() const { + DataRefImpl DRI; + if (!DataInCodeLoadCmd) + return dice_iterator(DiceRef(DRI, this)); + + MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand(); + DRI.p = reinterpret_cast(getPtr(this, DicLC.dataoff)); + return dice_iterator(DiceRef(DRI, this)); +} + +dice_iterator MachOObjectFile::end_dices() const { + DataRefImpl DRI; + if (!DataInCodeLoadCmd) + return dice_iterator(DiceRef(DRI, this)); + + MachO::linkedit_data_command DicLC = getDataInCodeLoadCommand(); + unsigned Offset = DicLC.dataoff + DicLC.datasize; + DRI.p = reinterpret_cast(getPtr(this, Offset)); + return dice_iterator(DiceRef(DRI, this)); } StringRef @@ -1344,78 +1346,82 @@ MachOObjectFile::getSectionFinalSegmentName(DataRefImpl Sec) const { ArrayRef MachOObjectFile::getSectionRawName(DataRefImpl Sec) const { - const SectionBase *Base = - reinterpret_cast(Sections[Sec.d.a]); - return ArrayRef(Base->Name); + const section_base *Base = + reinterpret_cast(Sections[Sec.d.a]); + return ArrayRef(Base->sectname); } ArrayRef MachOObjectFile::getSectionRawFinalSegmentName(DataRefImpl Sec) const { - const SectionBase *Base = - reinterpret_cast(Sections[Sec.d.a]); - return ArrayRef(Base->SegmentName); + const section_base *Base = + reinterpret_cast(Sections[Sec.d.a]); + return ArrayRef(Base->segname); } bool -MachOObjectFile::isRelocationScattered(const macho::RelocationEntry &RE) +MachOObjectFile::isRelocationScattered(const MachO::any_relocation_info &RE) const { - if (getCPUType(this) == llvm::MachO::CPUTypeX86_64) + if (getCPUType(this) == MachO::CPU_TYPE_X86_64) return false; - return getPlainRelocationAddress(RE) & macho::RF_Scattered; + return getPlainRelocationAddress(RE) & MachO::R_SCATTERED; } -unsigned MachOObjectFile::getPlainRelocationSymbolNum(const macho::RelocationEntry &RE) const { +unsigned MachOObjectFile::getPlainRelocationSymbolNum( + const MachO::any_relocation_info &RE) const { if (isLittleEndian()) - return RE.Word1 & 0xffffff; - return RE.Word1 >> 8; + return RE.r_word1 & 0xffffff; + return RE.r_word1 >> 8; } -bool MachOObjectFile::getPlainRelocationExternal(const macho::RelocationEntry &RE) const { +bool MachOObjectFile::getPlainRelocationExternal( + const MachO::any_relocation_info &RE) const { if (isLittleEndian()) - return (RE.Word1 >> 27) & 1; - return (RE.Word1 >> 4) & 1; + return (RE.r_word1 >> 27) & 1; + return (RE.r_word1 >> 4) & 1; } -bool -MachOObjectFile::getScatteredRelocationScattered(const macho::RelocationEntry &RE) const { - return RE.Word0 >> 31; +bool MachOObjectFile::getScatteredRelocationScattered( + const MachO::any_relocation_info &RE) const { + return RE.r_word0 >> 31; } -uint32_t -MachOObjectFile::getScatteredRelocationValue(const macho::RelocationEntry &RE) const { - return RE.Word1; +uint32_t MachOObjectFile::getScatteredRelocationValue( + const MachO::any_relocation_info &RE) const { + return RE.r_word1; } -unsigned -MachOObjectFile::getAnyRelocationAddress(const macho::RelocationEntry &RE) const { +unsigned MachOObjectFile::getAnyRelocationAddress( + const MachO::any_relocation_info &RE) const { if (isRelocationScattered(RE)) return getScatteredRelocationAddress(RE); return getPlainRelocationAddress(RE); } -unsigned -MachOObjectFile::getAnyRelocationPCRel(const macho::RelocationEntry &RE) const { +unsigned MachOObjectFile::getAnyRelocationPCRel( + const MachO::any_relocation_info &RE) const { if (isRelocationScattered(RE)) return getScatteredRelocationPCRel(this, RE); return getPlainRelocationPCRel(this, RE); } -unsigned -MachOObjectFile::getAnyRelocationLength(const macho::RelocationEntry &RE) const { +unsigned MachOObjectFile::getAnyRelocationLength( + const MachO::any_relocation_info &RE) const { if (isRelocationScattered(RE)) return getScatteredRelocationLength(RE); return getPlainRelocationLength(this, RE); } unsigned -MachOObjectFile::getAnyRelocationType(const macho::RelocationEntry &RE) const { +MachOObjectFile::getAnyRelocationType( + const MachO::any_relocation_info &RE) const { if (isRelocationScattered(RE)) return getScatteredRelocationType(RE); return getPlainRelocationType(this, RE); } SectionRef -MachOObjectFile::getRelocationSection(const macho::RelocationEntry &RE) const { +MachOObjectFile::getRelocationSection( + const MachO::any_relocation_info &RE) const { if (isRelocationScattered(RE) || getPlainRelocationExternal(RE)) return *end_sections(); unsigned SecNum = getPlainRelocationSymbolNum(RE) - 1; @@ -1428,113 +1434,132 @@ MachOObjectFile::LoadCommandInfo MachOObjectFile::getFirstLoadCommandInfo() const { MachOObjectFile::LoadCommandInfo Load; - unsigned HeaderSize = is64Bit() ? macho::Header64Size : macho::Header32Size; + unsigned HeaderSize = is64Bit() ? sizeof(MachO::mach_header_64) : + sizeof(MachO::mach_header); Load.Ptr = getPtr(this, HeaderSize); - Load.C = getStruct(this, Load.Ptr); + Load.C = getStruct(this, Load.Ptr); return Load; } MachOObjectFile::LoadCommandInfo MachOObjectFile::getNextLoadCommandInfo(const LoadCommandInfo &L) const { MachOObjectFile::LoadCommandInfo Next; - Next.Ptr = L.Ptr + L.C.Size; - Next.C = getStruct(this, Next.Ptr); + Next.Ptr = L.Ptr + L.C.cmdsize; + Next.C = getStruct(this, Next.Ptr); return Next; } -macho::Section MachOObjectFile::getSection(DataRefImpl DRI) const { - return getStruct(this, Sections[DRI.d.a]); +MachO::section MachOObjectFile::getSection(DataRefImpl DRI) const { + return getStruct(this, Sections[DRI.d.a]); } -macho::Section64 MachOObjectFile::getSection64(DataRefImpl DRI) const { - return getStruct(this, Sections[DRI.d.a]); +MachO::section_64 MachOObjectFile::getSection64(DataRefImpl DRI) const { + return getStruct(this, Sections[DRI.d.a]); } -macho::Section MachOObjectFile::getSection(const LoadCommandInfo &L, +MachO::section MachOObjectFile::getSection(const LoadCommandInfo &L, unsigned Index) const { const char *Sec = getSectionPtr(this, L, Index); - return getStruct(this, Sec); + return getStruct(this, Sec); } -macho::Section64 MachOObjectFile::getSection64(const LoadCommandInfo &L, - unsigned Index) const { +MachO::section_64 MachOObjectFile::getSection64(const LoadCommandInfo &L, + unsigned Index) const { const char *Sec = getSectionPtr(this, L, Index); - return getStruct(this, Sec); + return getStruct(this, Sec); } -macho::SymbolTableEntry +MachO::nlist MachOObjectFile::getSymbolTableEntry(DataRefImpl DRI) const { const char *P = reinterpret_cast(DRI.p); - return getStruct(this, P); + return getStruct(this, P); } -macho::Symbol64TableEntry +MachO::nlist_64 MachOObjectFile::getSymbol64TableEntry(DataRefImpl DRI) const { const char *P = reinterpret_cast(DRI.p); - return getStruct(this, P); + return getStruct(this, P); } -macho::LinkeditDataLoadCommand -MachOObjectFile::getLinkeditDataLoadCommand(const MachOObjectFile::LoadCommandInfo &L) const { - return getStruct(this, L.Ptr); +MachO::linkedit_data_command +MachOObjectFile::getLinkeditDataLoadCommand(const LoadCommandInfo &L) const { + return getStruct(this, L.Ptr); } -macho::SegmentLoadCommand +MachO::segment_command MachOObjectFile::getSegmentLoadCommand(const LoadCommandInfo &L) const { - return getStruct(this, L.Ptr); + return getStruct(this, L.Ptr); } -macho::Segment64LoadCommand +MachO::segment_command_64 MachOObjectFile::getSegment64LoadCommand(const LoadCommandInfo &L) const { - return getStruct(this, L.Ptr); + return getStruct(this, L.Ptr); } -macho::LinkerOptionsLoadCommand +MachO::linker_options_command MachOObjectFile::getLinkerOptionsLoadCommand(const LoadCommandInfo &L) const { - return getStruct(this, L.Ptr); + return getStruct(this, L.Ptr); } -macho::RelocationEntry +MachO::any_relocation_info MachOObjectFile::getRelocation(DataRefImpl Rel) const { const char *P = reinterpret_cast(Rel.p); - return getStruct(this, P); + return getStruct(this, P); +} + +MachO::data_in_code_entry +MachOObjectFile::getDice(DataRefImpl Rel) const { + const char *P = reinterpret_cast(Rel.p); + return getStruct(this, P); } -macho::Header MachOObjectFile::getHeader() const { - return getStruct(this, getPtr(this, 0)); +MachO::mach_header MachOObjectFile::getHeader() const { + return getStruct(this, getPtr(this, 0)); } -macho::Header64Ext MachOObjectFile::getHeader64Ext() const { - return - getStruct(this, getPtr(this, sizeof(macho::Header))); +MachO::mach_header_64 MachOObjectFile::getHeader64() const { + return getStruct(this, getPtr(this, 0)); } -macho::IndirectSymbolTableEntry MachOObjectFile::getIndirectSymbolTableEntry( - const macho::DysymtabLoadCommand &DLC, - unsigned Index) const { - uint64_t Offset = DLC.IndirectSymbolTableOffset + - Index * sizeof(macho::IndirectSymbolTableEntry); - return getStruct(this, getPtr(this, Offset)); +uint32_t MachOObjectFile::getIndirectSymbolTableEntry( + const MachO::dysymtab_command &DLC, + unsigned Index) const { + uint64_t Offset = DLC.indirectsymoff + Index * sizeof(uint32_t); + return getStruct(this, getPtr(this, Offset)); } -macho::DataInCodeTableEntry +MachO::data_in_code_entry MachOObjectFile::getDataInCodeTableEntry(uint32_t DataOffset, unsigned Index) const { - uint64_t Offset = DataOffset + Index * sizeof(macho::DataInCodeTableEntry); - return getStruct(this, getPtr(this, Offset)); + uint64_t Offset = DataOffset + Index * sizeof(MachO::data_in_code_entry); + return getStruct(this, getPtr(this, Offset)); } -macho::SymtabLoadCommand MachOObjectFile::getSymtabLoadCommand() const { - return getStruct(this, SymtabLoadCmd); +MachO::symtab_command MachOObjectFile::getSymtabLoadCommand() const { + return getStruct(this, SymtabLoadCmd); } -macho::DysymtabLoadCommand MachOObjectFile::getDysymtabLoadCommand() const { - return getStruct(this, DysymtabLoadCmd); +MachO::dysymtab_command MachOObjectFile::getDysymtabLoadCommand() const { + return getStruct(this, DysymtabLoadCmd); +} + +MachO::linkedit_data_command +MachOObjectFile::getDataInCodeLoadCommand() const { + if (DataInCodeLoadCmd) + return getStruct(this, DataInCodeLoadCmd); + + // If there is no DataInCodeLoadCmd return a load command with zero'ed fields. + MachO::linkedit_data_command Cmd; + Cmd.cmd = MachO::LC_DATA_IN_CODE; + Cmd.cmdsize = sizeof(MachO::linkedit_data_command); + Cmd.dataoff = 0; + Cmd.datasize = 0; + return Cmd; } StringRef MachOObjectFile::getStringTableData() const { - macho::SymtabLoadCommand S = getSymtabLoadCommand(); - return getData().substr(S.StringTableOffset, S.StringTableSize); + MachO::symtab_command S = getSymtabLoadCommand(); + return getData().substr(S.stroff, S.strsize); } bool MachOObjectFile::is64Bit() const { @@ -1557,21 +1582,23 @@ void MachOObjectFile::ReadULEB128s(uint64_t Index, ObjectFile *ObjectFile::createMachOObjectFile(MemoryBuffer *Buffer) { StringRef Magic = Buffer->getBuffer().slice(0, 4); error_code ec; - ObjectFile *Ret; + OwningPtr Ret; if (Magic == "\xFE\xED\xFA\xCE") - Ret = new MachOObjectFile(Buffer, false, false, ec); + Ret.reset(new MachOObjectFile(Buffer, false, false, ec)); else if (Magic == "\xCE\xFA\xED\xFE") - Ret = new MachOObjectFile(Buffer, true, false, ec); + Ret.reset(new MachOObjectFile(Buffer, true, false, ec)); else if (Magic == "\xFE\xED\xFA\xCF") - Ret = new MachOObjectFile(Buffer, false, true, ec); + Ret.reset(new MachOObjectFile(Buffer, false, true, ec)); else if (Magic == "\xCF\xFA\xED\xFE") - Ret = new MachOObjectFile(Buffer, true, true, ec); - else + Ret.reset(new MachOObjectFile(Buffer, true, true, ec)); + else { + delete Buffer; return NULL; + } if (ec) return NULL; - return Ret; + return Ret.take(); } } // end namespace object diff --git a/contrib/llvm/lib/Object/MachOUniversal.cpp b/contrib/llvm/lib/Object/MachOUniversal.cpp new file mode 100644 index 0000000..75160af --- /dev/null +++ b/contrib/llvm/lib/Object/MachOUniversal.cpp @@ -0,0 +1,139 @@ +//===- MachOUniversal.cpp - Mach-O universal binary -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the MachOUniversalBinary class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/MachOUniversal.h" + +#include "llvm/Object/MachO.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/Casting.h" +#include "llvm/Support/Host.h" +#include "llvm/Support/MemoryBuffer.h" + +using namespace llvm; +using namespace object; + +template +static void SwapValue(T &Value) { + Value = sys::SwapByteOrder(Value); +} + +template +static void SwapStruct(T &Value); + +template<> +void SwapStruct(MachO::fat_header &H) { + SwapValue(H.magic); + SwapValue(H.nfat_arch); +} + +template<> +void SwapStruct(MachO::fat_arch &H) { + SwapValue(H.cputype); + SwapValue(H.cpusubtype); + SwapValue(H.offset); + SwapValue(H.size); + SwapValue(H.align); +} + +template +static T getUniversalBinaryStruct(const char *Ptr) { + T Res; + memcpy(&Res, Ptr, sizeof(T)); + // Universal binary headers have big-endian byte order. + if (sys::IsLittleEndianHost) + SwapStruct(Res); + return Res; +} + +MachOUniversalBinary::ObjectForArch::ObjectForArch( + const MachOUniversalBinary *Parent, uint32_t Index) + : Parent(Parent), Index(Index) { + if (Parent == 0 || Index > Parent->getNumberOfObjects()) { + clear(); + } else { + // Parse object header. + StringRef ParentData = Parent->getData(); + const char *HeaderPos = ParentData.begin() + sizeof(MachO::fat_header) + + Index * sizeof(MachO::fat_arch); + Header = getUniversalBinaryStruct(HeaderPos); + if (ParentData.size() < Header.offset + Header.size) { + clear(); + } + } +} + +error_code MachOUniversalBinary::ObjectForArch::getAsObjectFile( + OwningPtr &Result) const { + if (Parent) { + StringRef ParentData = Parent->getData(); + StringRef ObjectData = ParentData.substr(Header.offset, Header.size); + std::string ObjectName = + Parent->getFileName().str() + ":" + + Triple::getArchTypeName(MachOObjectFile::getArch(Header.cputype)); + MemoryBuffer *ObjBuffer = MemoryBuffer::getMemBuffer( + ObjectData, ObjectName, false); + if (ObjectFile *Obj = ObjectFile::createMachOObjectFile(ObjBuffer)) { + Result.reset(Obj); + return object_error::success; + } + } + return object_error::parse_failed; +} + +void MachOUniversalBinary::anchor() { } + +MachOUniversalBinary::MachOUniversalBinary(MemoryBuffer *Source, + error_code &ec) + : Binary(Binary::ID_MachOUniversalBinary, Source), + NumberOfObjects(0) { + if (Source->getBufferSize() < sizeof(MachO::fat_header)) { + ec = object_error::invalid_file_type; + return; + } + // Check for magic value and sufficient header size. + StringRef Buf = getData(); + MachO::fat_header H= getUniversalBinaryStruct(Buf.begin()); + NumberOfObjects = H.nfat_arch; + uint32_t MinSize = sizeof(MachO::fat_header) + + sizeof(MachO::fat_arch) * NumberOfObjects; + if (H.magic != MachO::FAT_MAGIC || Buf.size() < MinSize) { + ec = object_error::parse_failed; + return; + } + ec = object_error::success; +} + +static bool getCTMForArch(Triple::ArchType Arch, MachO::CPUType &CTM) { + switch (Arch) { + case Triple::x86: CTM = MachO::CPU_TYPE_I386; return true; + case Triple::x86_64: CTM = MachO::CPU_TYPE_X86_64; return true; + case Triple::arm: CTM = MachO::CPU_TYPE_ARM; return true; + case Triple::sparc: CTM = MachO::CPU_TYPE_SPARC; return true; + case Triple::ppc: CTM = MachO::CPU_TYPE_POWERPC; return true; + case Triple::ppc64: CTM = MachO::CPU_TYPE_POWERPC64; return true; + default: return false; + } +} + +error_code +MachOUniversalBinary::getObjectForArch(Triple::ArchType Arch, + OwningPtr &Result) const { + MachO::CPUType CTM; + if (!getCTMForArch(Arch, CTM)) + return object_error::arch_not_found; + for (object_iterator I = begin_objects(), E = end_objects(); I != E; ++I) { + if (I->getCPUType() == static_cast(CTM)) + return I->getAsObjectFile(Result); + } + return object_error::arch_not_found; +} diff --git a/contrib/llvm/lib/Object/Object.cpp b/contrib/llvm/lib/Object/Object.cpp index 3e2c78e..6941708 100644 --- a/contrib/llvm/lib/Object/Object.cpp +++ b/contrib/llvm/lib/Object/Object.cpp @@ -219,10 +219,7 @@ uint64_t LLVMGetRelocationOffset(LLVMRelocationIteratorRef RI) { } LLVMSymbolIteratorRef LLVMGetRelocationSymbol(LLVMRelocationIteratorRef RI) { - SymbolRef ret; - if (error_code ec = (*unwrap(RI))->getSymbol(ret)) - report_fatal_error(ec.message()); - + symbol_iterator ret = (*unwrap(RI))->getSymbol(); return wrap(new symbol_iterator(ret)); } diff --git a/contrib/llvm/lib/Object/ObjectFile.cpp b/contrib/llvm/lib/Object/ObjectFile.cpp index 77fd995..0e626d6 100644 --- a/contrib/llvm/lib/Object/ObjectFile.cpp +++ b/contrib/llvm/lib/Object/ObjectFile.cpp @@ -14,8 +14,8 @@ #include "llvm/Object/ObjectFile.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Support/Path.h" #include "llvm/Support/system_error.h" using namespace llvm; @@ -33,35 +33,47 @@ error_code ObjectFile::getSymbolAlignment(DataRefImpl DRI, return object_error::success; } +section_iterator ObjectFile::getRelocatedSection(DataRefImpl Sec) const { + return section_iterator(SectionRef(Sec, this)); +} + ObjectFile *ObjectFile::createObjectFile(MemoryBuffer *Object) { - if (!Object || Object->getBufferSize() < 64) + if (Object->getBufferSize() < 64) { + delete Object; + return 0; + } + + sys::fs::file_magic Type = sys::fs::identify_magic(Object->getBuffer()); + switch (Type) { + case sys::fs::file_magic::unknown: + case sys::fs::file_magic::bitcode: + case sys::fs::file_magic::archive: + case sys::fs::file_magic::macho_universal_binary: + case sys::fs::file_magic::windows_resource: + delete Object; return 0; - sys::LLVMFileType type = sys::IdentifyFileType(Object->getBufferStart(), - static_cast(Object->getBufferSize())); - switch (type) { - case sys::Unknown_FileType: - return 0; - case sys::ELF_Relocatable_FileType: - case sys::ELF_Executable_FileType: - case sys::ELF_SharedObject_FileType: - case sys::ELF_Core_FileType: - return createELFObjectFile(Object); - case sys::Mach_O_Object_FileType: - case sys::Mach_O_Executable_FileType: - case sys::Mach_O_FixedVirtualMemorySharedLib_FileType: - case sys::Mach_O_Core_FileType: - case sys::Mach_O_PreloadExecutable_FileType: - case sys::Mach_O_DynamicallyLinkedSharedLib_FileType: - case sys::Mach_O_DynamicLinker_FileType: - case sys::Mach_O_Bundle_FileType: - case sys::Mach_O_DynamicallyLinkedSharedLibStub_FileType: - case sys::Mach_O_DSYMCompanion_FileType: - return createMachOObjectFile(Object); - case sys::COFF_FileType: - return createCOFFObjectFile(Object); - default: - llvm_unreachable("Unexpected Object File Type"); + case sys::fs::file_magic::elf_relocatable: + case sys::fs::file_magic::elf_executable: + case sys::fs::file_magic::elf_shared_object: + case sys::fs::file_magic::elf_core: + return createELFObjectFile(Object); + case sys::fs::file_magic::macho_object: + case sys::fs::file_magic::macho_executable: + case sys::fs::file_magic::macho_fixed_virtual_memory_shared_lib: + case sys::fs::file_magic::macho_core: + case sys::fs::file_magic::macho_preload_executable: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib: + case sys::fs::file_magic::macho_dynamic_linker: + case sys::fs::file_magic::macho_bundle: + case sys::fs::file_magic::macho_dynamically_linked_shared_lib_stub: + case sys::fs::file_magic::macho_dsym_companion: + return createMachOObjectFile(Object); + case sys::fs::file_magic::coff_object: + case sys::fs::file_magic::coff_import_library: + case sys::fs::file_magic::pecoff_executable: + return createCOFFObjectFile(Object); } + llvm_unreachable("Unexpected Object File Type"); } ObjectFile *ObjectFile::createObjectFile(StringRef ObjectPath) { diff --git a/contrib/llvm/lib/Object/YAML.cpp b/contrib/llvm/lib/Object/YAML.cpp new file mode 100644 index 0000000..c527bde --- /dev/null +++ b/contrib/llvm/lib/Object/YAML.cpp @@ -0,0 +1,68 @@ +//===- YAML.cpp - YAMLIO utilities for object files -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines utility classes for handling the YAML representation of +// object files. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Object/YAML.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" +#include + +using namespace llvm; +using namespace object::yaml; + +void yaml::ScalarTraits::output( + const object::yaml::BinaryRef &Val, void *, llvm::raw_ostream &Out) { + Val.writeAsHex(Out); +} + +StringRef yaml::ScalarTraits::input( + StringRef Scalar, void *, object::yaml::BinaryRef &Val) { + if (Scalar.size() % 2 != 0) + return "BinaryRef hex string must contain an even number of nybbles."; + // TODO: Can we improve YAMLIO to permit a more accurate diagnostic here? + // (e.g. a caret pointing to the offending character). + for (unsigned I = 0, N = Scalar.size(); I != N; ++I) + if (!isxdigit(Scalar[I])) + return "BinaryRef hex string must contain only hex digits."; + Val = object::yaml::BinaryRef(Scalar); + return StringRef(); +} + +void BinaryRef::writeAsBinary(raw_ostream &OS) const { + if (!DataIsHexString) { + OS.write((const char *)Data.data(), Data.size()); + return; + } + for (unsigned I = 0, N = Data.size(); I != N; I += 2) { + uint8_t Byte; + StringRef((const char *)&Data[I], 2).getAsInteger(16, Byte); + OS.write(Byte); + } +} + +void BinaryRef::writeAsHex(raw_ostream &OS) const { + if (binary_size() == 0) { + OS << "\"\""; + return; + } + if (DataIsHexString) { + OS.write((const char *)Data.data(), Data.size()); + return; + } + for (ArrayRef::iterator I = Data.begin(), E = Data.end(); I != E; + ++I) { + uint8_t Byte = *I; + OS << hexdigit(Byte >> 4); + OS << hexdigit(Byte & 0xf); + } +} diff --git a/contrib/llvm/lib/Option/ArgList.cpp b/contrib/llvm/lib/Option/ArgList.cpp index 39b22d7..15f7e8b 100644 --- a/contrib/llvm/lib/Option/ArgList.cpp +++ b/contrib/llvm/lib/Option/ArgList.cpp @@ -206,6 +206,13 @@ bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier Neg, bool Default) const { return Default; } +bool ArgList::hasFlag(OptSpecifier Pos, OptSpecifier PosAlias, OptSpecifier Neg, + bool Default) const { + if (Arg *A = getLastArg(Pos, PosAlias, Neg)) + return A->getOption().matches(Pos) || A->getOption().matches(PosAlias); + return Default; +} + StringRef ArgList::getLastArgValue(OptSpecifier Id, StringRef Default) const { if (Arg *A = getLastArg(Id)) @@ -226,6 +233,14 @@ void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id) const { } } +void ArgList::AddLastArg(ArgStringList &Output, OptSpecifier Id0, + OptSpecifier Id1) const { + if (Arg *A = getLastArg(Id0, Id1)) { + A->claim(); + A->render(*this, Output); + } +} + void ArgList::AddAllArgs(ArgStringList &Output, OptSpecifier Id0, OptSpecifier Id1, OptSpecifier Id2) const { for (arg_iterator it = filtered_begin(Id0, Id1, Id2), diff --git a/contrib/llvm/lib/Option/OptTable.cpp b/contrib/llvm/lib/Option/OptTable.cpp index 5c8a0ea..6fa459a 100644 --- a/contrib/llvm/lib/Option/OptTable.cpp +++ b/contrib/llvm/lib/Option/OptTable.cpp @@ -14,26 +14,27 @@ #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include +#include #include using namespace llvm; using namespace llvm::opt; -// Ordering on Info. The ordering is *almost* lexicographic, with two -// exceptions. First, '\0' comes at the end of the alphabet instead of -// the beginning (thus options precede any other options which prefix -// them). Second, for options with the same name, the less permissive -// version should come first; a Flag option should precede a Joined -// option, for example. +namespace llvm { +namespace opt { -static int StrCmpOptionName(const char *A, const char *B) { - char a = *A, b = *B; +// Ordering on Info. The ordering is *almost* case-insensitive lexicographic, +// with an exceptions. '\0' comes at the end of the alphabet instead of the +// beginning (thus options precede any other options which prefix them). +static int StrCmpOptionNameIgnoreCase(const char *A, const char *B) { + const char *X = A, *Y = B; + char a = tolower(*A), b = tolower(*B); while (a == b) { if (a == '\0') return 0; - a = *++A; - b = *++B; + a = tolower(*++X); + b = tolower(*++Y); } if (a == '\0') // A is a prefix of B. @@ -45,21 +46,25 @@ static int StrCmpOptionName(const char *A, const char *B) { return (a < b) ? -1 : 1; } -namespace llvm { -namespace opt { +#ifndef NDEBUG +static int StrCmpOptionName(const char *A, const char *B) { + if (int N = StrCmpOptionNameIgnoreCase(A, B)) + return N; + return strcmp(A, B); +} static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) { if (&A == &B) return false; if (int N = StrCmpOptionName(A.Name, B.Name)) - return N == -1; + return N < 0; for (const char * const *APre = A.Prefixes, * const *BPre = B.Prefixes; *APre != 0 && *BPre != 0; ++APre, ++BPre) { if (int N = StrCmpOptionName(*APre, *BPre)) - return N == -1; + return N < 0; } // Names are the same, check that classes are in order; exactly one @@ -68,22 +73,22 @@ static inline bool operator<(const OptTable::Info &A, const OptTable::Info &B) { "Unexpected classes for options with same name."); return B.Kind == Option::JoinedClass; } +#endif // Support lower_bound between info and an option name. static inline bool operator<(const OptTable::Info &I, const char *Name) { - return StrCmpOptionName(I.Name, Name) == -1; -} -static inline bool operator<(const char *Name, const OptTable::Info &I) { - return StrCmpOptionName(Name, I.Name) == -1; + return StrCmpOptionNameIgnoreCase(I.Name, Name) < 0; } } } OptSpecifier::OptSpecifier(const Option *Opt) : ID(Opt->getID()) {} -OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos) +OptTable::OptTable(const Info *_OptionInfos, unsigned _NumOptionInfos, + bool _IgnoreCase) : OptionInfos(_OptionInfos), NumOptionInfos(_NumOptionInfos), + IgnoreCase(_IgnoreCase), TheInputOptionID(0), TheUnknownOptionID(0), FirstSearchableIndex(0) @@ -160,10 +165,6 @@ const Option OptTable::getOption(OptSpecifier Opt) const { return Option(&getInfo(id), this); } -bool OptTable::isOptionHelpHidden(OptSpecifier id) const { - return getInfo(id).Flags & HelpHidden; -} - static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) { if (Arg == "-") return true; @@ -175,16 +176,25 @@ static bool isInput(const llvm::StringSet<> &Prefixes, StringRef Arg) { } /// \returns Matched size. 0 means no match. -static unsigned matchOption(const OptTable::Info *I, StringRef Str) { +static unsigned matchOption(const OptTable::Info *I, StringRef Str, + bool IgnoreCase) { for (const char * const *Pre = I->Prefixes; *Pre != 0; ++Pre) { StringRef Prefix(*Pre); - if (Str.startswith(Prefix) && Str.substr(Prefix.size()).startswith(I->Name)) - return Prefix.size() + StringRef(I->Name).size(); + if (Str.startswith(Prefix)) { + StringRef Rest = Str.substr(Prefix.size()); + bool Matched = IgnoreCase + ? Rest.startswith_lower(I->Name) + : Rest.startswith(I->Name); + if (Matched) + return Prefix.size() + StringRef(I->Name).size(); + } } return 0; } -Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index) const { +Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index, + unsigned FlagsToInclude, + unsigned FlagsToExclude) const { unsigned Prev = Index; const char *Str = Args.getArgString(Index); @@ -212,13 +222,20 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index) const { unsigned ArgSize = 0; // Scan for first option which is a proper prefix. for (; Start != End; ++Start) - if ((ArgSize = matchOption(Start, Str))) + if ((ArgSize = matchOption(Start, Str, IgnoreCase))) break; if (Start == End) break; + Option Opt(Start, this); + + if (FlagsToInclude && !Opt.hasFlag(FlagsToInclude)) + continue; + if (Opt.hasFlag(FlagsToExclude)) + continue; + // See if this option matches. - if (Arg *A = Option(Start, this).accept(Args, Index, ArgSize)) + if (Arg *A = Opt.accept(Args, Index, ArgSize)) return A; // Otherwise, see if this argument was missing values. @@ -226,13 +243,20 @@ Arg *OptTable::ParseOneArg(const ArgList &Args, unsigned &Index) const { return 0; } + // If we failed to find an option and this arg started with /, then it's + // probably an input path. + if (Str[0] == '/') + return new Arg(getOption(TheInputOptionID), Str, Index++, Str); + return new Arg(getOption(TheUnknownOptionID), Str, Index++, Str); } -InputArgList *OptTable::ParseArgs(const char* const *ArgBegin, - const char* const *ArgEnd, +InputArgList *OptTable::ParseArgs(const char *const *ArgBegin, + const char *const *ArgEnd, unsigned &MissingArgIndex, - unsigned &MissingArgCount) const { + unsigned &MissingArgCount, + unsigned FlagsToInclude, + unsigned FlagsToExclude) const { InputArgList *Args = new InputArgList(ArgBegin, ArgEnd); // FIXME: Handle '@' args (or at least error on them). @@ -241,13 +265,14 @@ InputArgList *OptTable::ParseArgs(const char* const *ArgBegin, unsigned Index = 0, End = ArgEnd - ArgBegin; while (Index < End) { // Ignore empty arguments (other things may still take them as arguments). - if (Args->getArgString(Index)[0] == '\0') { + StringRef Str = Args->getArgString(Index); + if (Str == "") { ++Index; continue; } unsigned Prev = Index; - Arg *A = ParseOneArg(*Args, Index); + Arg *A = ParseOneArg(*Args, Index, FlagsToInclude, FlagsToExclude); assert(Index > Prev && "Parser failed to consume argument."); // Check for missing argument error. @@ -281,6 +306,7 @@ static std::string getOptionHelpName(const OptTable &Opts, OptSpecifier Id) { break; case Option::SeparateClass: case Option::JoinedOrSeparateClass: + case Option::RemainingArgsClass: Name += ' '; // FALLTHROUGH case Option::JoinedClass: case Option::CommaJoinedClass: @@ -346,8 +372,16 @@ static const char *getOptionHelpGroup(const OptTable &Opts, OptSpecifier Id) { return getOptionHelpGroup(Opts, GroupID); } -void OptTable::PrintHelp(raw_ostream &OS, const char *Name, - const char *Title, bool ShowHidden) const { +void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title, + bool ShowHidden) const { + PrintHelp(OS, Name, Title, /*Include*/ 0, /*Exclude*/ + (ShowHidden ? 0 : HelpHidden)); +} + + +void OptTable::PrintHelp(raw_ostream &OS, const char *Name, const char *Title, + unsigned FlagsToInclude, + unsigned FlagsToExclude) const { OS << "OVERVIEW: " << Title << "\n"; OS << '\n'; OS << "USAGE: " << Name << " [options] \n"; @@ -366,7 +400,10 @@ void OptTable::PrintHelp(raw_ostream &OS, const char *Name, if (getOptionKind(Id) == Option::GroupClass) continue; - if (!ShowHidden && isOptionHelpHidden(Id)) + unsigned Flags = getInfo(Id).Flags; + if (FlagsToInclude && !(Flags & FlagsToInclude)) + continue; + if (Flags & FlagsToExclude) continue; if (const char *Text = getOptionHelpText(Id)) { diff --git a/contrib/llvm/lib/Option/Option.cpp b/contrib/llvm/lib/Option/Option.cpp index 0e22634..7b5ff2b 100644 --- a/contrib/llvm/lib/Option/Option.cpp +++ b/contrib/llvm/lib/Option/Option.cpp @@ -22,12 +22,17 @@ using namespace llvm::opt; Option::Option(const OptTable::Info *info, const OptTable *owner) : Info(info), Owner(owner) { - // Multi-level aliases are not supported, and alias options cannot - // have groups. This just simplifies option tracking, it is not an - // inherent limitation. - assert((!Info || !getAlias().isValid() || (!getAlias().getAlias().isValid() && - !getGroup().isValid())) && - "Multi-level aliases and aliases with groups are unsupported."); + // Multi-level aliases are not supported. This just simplifies option + // tracking, it is not an inherent limitation. + assert((!Info || !getAlias().isValid() || !getAlias().getAlias().isValid()) && + "Multi-level aliases are not supported."); + + if (Info && getAliasArgs()) { + assert(getAlias().isValid() && "Only alias options can have alias args."); + assert(getKind() == FlagClass && "Only Flag aliases can have alias args."); + assert(getAlias().getKind() != FlagClass && + "Cannot provide alias args to a flag option."); + } } Option::~Option() { @@ -47,14 +52,17 @@ void Option::dump() const { P(MultiArgClass); P(JoinedOrSeparateClass); P(JoinedAndSeparateClass); + P(RemainingArgsClass); #undef P } - llvm::errs() << " Prefixes:["; - for (const char * const *Pre = Info->Prefixes; *Pre != 0; ++Pre) { - llvm::errs() << '"' << *Pre << (*(Pre + 1) == 0 ? "\"" : "\", "); + if (Info->Prefixes) { + llvm::errs() << " Prefixes:["; + for (const char * const *Pre = Info->Prefixes; *Pre != 0; ++Pre) { + llvm::errs() << '"' << *Pre << (*(Pre + 1) == 0 ? "\"" : "\", "); + } + llvm::errs() << ']'; } - llvm::errs() << ']'; llvm::errs() << " Name:\"" << getName() << '"'; @@ -106,11 +114,22 @@ Arg *Option::accept(const ArgList &Args, } switch (getKind()) { - case FlagClass: + case FlagClass: { if (ArgSize != strlen(Args.getArgString(Index))) return 0; - return new Arg(UnaliasedOption, Spelling, Index++); + Arg *A = new Arg(UnaliasedOption, Spelling, Index++); + if (getAliasArgs()) { + const char *Val = getAliasArgs(); + while (*Val != '\0') { + A->getValues().push_back(Val); + + // Move past the '\0' to the next argument. + Val += strlen(Val) + 1; + } + } + return A; + } case JoinedClass: { const char *Value = Args.getArgString(Index) + ArgSize; return new Arg(UnaliasedOption, Spelling, Index++, Value); @@ -196,6 +215,16 @@ Arg *Option::accept(const ArgList &Args, return new Arg(UnaliasedOption, Spelling, Index - 2, Args.getArgString(Index - 2) + ArgSize, Args.getArgString(Index - 1)); + case RemainingArgsClass: { + // Matches iff this is an exact match. + // FIXME: Avoid strlen. + if (ArgSize != strlen(Args.getArgString(Index))) + return 0; + Arg *A = new Arg(UnaliasedOption, Spelling, Index++); + while (Index < Args.getNumInputArgStrings()) + A->getValues().push_back(Args.getArgString(Index++)); + return A; + } default: llvm_unreachable("Invalid option kind!"); } diff --git a/contrib/llvm/lib/Support/APFloat.cpp b/contrib/llvm/lib/Support/APFloat.cpp index 6182e34..676e2d4 100644 --- a/contrib/llvm/lib/Support/APFloat.cpp +++ b/contrib/llvm/lib/Support/APFloat.cpp @@ -25,7 +25,13 @@ using namespace llvm; -#define convolve(lhs, rhs) ((lhs) * 4 + (rhs)) +/// A macro used to combine two fcCategory enums into one key which can be used +/// in a switch statement to classify how the interaction of two APFloat's +/// categories affects an operation. +/// +/// TODO: If clang source code is ever allowed to use constexpr in its own +/// codebase, change this into a static inline function. +#define PackCategoriesIntoKey(_lhs, _rhs) ((_lhs) * 4 + (_rhs)) /* Assumed in hexadecimal significand parsing, and conversion to hexadecimal strings. */ @@ -38,11 +44,11 @@ namespace llvm { struct fltSemantics { /* The largest E such that 2^E is representable; this matches the definition of IEEE 754. */ - exponent_t maxExponent; + APFloat::ExponentType maxExponent; /* The smallest E such that 2^E is a normalized number; this matches the definition of IEEE 754. */ - exponent_t minExponent; + APFloat::ExponentType minExponent; /* Number of bits in the significand. This includes the integer bit. */ @@ -288,9 +294,9 @@ interpretDecimal(StringRef::iterator begin, StringRef::iterator end, } /* Adjust the exponents for any decimal point. */ - D->exponent += static_cast((dot - p) - (dot > p)); + D->exponent += static_cast((dot - p) - (dot > p)); D->normalizedExponent = (D->exponent + - static_cast((p - D->firstSigDigit) + static_cast((p - D->firstSigDigit) - (dot > D->firstSigDigit && dot < p))); } @@ -313,8 +319,8 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, else if (digitValue < 8 && digitValue > 0) return lfLessThanHalf; - /* Otherwise we need to find the first non-zero digit. */ - while (*p == '0') + // Otherwise we need to find the first non-zero digit. + while (p != end && (*p == '0' || *p == '.')) p++; assert(p != end && "Invalid trailing hexadecimal fraction!"); @@ -580,7 +586,7 @@ APFloat::initialize(const fltSemantics *ourSemantics) void APFloat::freeSignificand() { - if (partCount() > 1) + if (needsCleanup()) delete [] significand.parts; } @@ -592,14 +598,14 @@ APFloat::assign(const APFloat &rhs) sign = rhs.sign; category = rhs.category; exponent = rhs.exponent; - if (category == fcNormal || category == fcNaN) + if (isFiniteNonZero() || category == fcNaN) copySignificand(rhs); } void APFloat::copySignificand(const APFloat &rhs) { - assert(category == fcNormal || category == fcNaN); + assert(isFiniteNonZero() || category == fcNaN); assert(rhs.partCount() >= partCount()); APInt::tcAssign(significandParts(), rhs.significandParts(), @@ -679,12 +685,73 @@ APFloat::operator=(const APFloat &rhs) bool APFloat::isDenormal() const { - return isNormal() && (exponent == semantics->minExponent) && + return isFiniteNonZero() && (exponent == semantics->minExponent) && (APInt::tcExtractBit(significandParts(), semantics->precision - 1) == 0); } bool +APFloat::isSmallest() const { + // The smallest number by magnitude in our format will be the smallest + // denormal, i.e. the floating point number with exponent being minimum + // exponent and significand bitwise equal to 1 (i.e. with MSB equal to 0). + return isFiniteNonZero() && exponent == semantics->minExponent && + significandMSB() == 0; +} + +bool APFloat::isSignificandAllOnes() const { + // Test if the significand excluding the integral bit is all ones. This allows + // us to test for binade boundaries. + const integerPart *Parts = significandParts(); + const unsigned PartCount = partCount(); + for (unsigned i = 0; i < PartCount - 1; i++) + if (~Parts[i]) + return false; + + // Set the unused high bits to all ones when we compare. + const unsigned NumHighBits = + PartCount*integerPartWidth - semantics->precision + 1; + assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " + "fill than integerPartWidth"); + const integerPart HighBitFill = + ~integerPart(0) << (integerPartWidth - NumHighBits); + if (~(Parts[PartCount - 1] | HighBitFill)) + return false; + + return true; +} + +bool APFloat::isSignificandAllZeros() const { + // Test if the significand excluding the integral bit is all zeros. This + // allows us to test for binade boundaries. + const integerPart *Parts = significandParts(); + const unsigned PartCount = partCount(); + + for (unsigned i = 0; i < PartCount - 1; i++) + if (Parts[i]) + return false; + + const unsigned NumHighBits = + PartCount*integerPartWidth - semantics->precision + 1; + assert(NumHighBits <= integerPartWidth && "Can not have more high bits to " + "clear than integerPartWidth"); + const integerPart HighBitMask = ~integerPart(0) >> NumHighBits; + + if (Parts[PartCount - 1] & HighBitMask) + return false; + + return true; +} + +bool +APFloat::isLargest() const { + // The largest number by magnitude in our format will be the floating point + // number with maximum exponent and with significand that is all ones. + return isFiniteNonZero() && exponent == semantics->maxExponent + && isSignificandAllOnes(); +} + +bool APFloat::bitwiseIsEqual(const APFloat &rhs) const { if (this == &rhs) return true; @@ -694,7 +761,7 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const { return false; if (category==fcZero || category==fcInfinity) return true; - else if (category==fcNormal && exponent!=rhs.exponent) + else if (isFiniteNonZero() && exponent!=rhs.exponent) return false; else { int i= partCount(); @@ -711,6 +778,7 @@ APFloat::bitwiseIsEqual(const APFloat &rhs) const { APFloat::APFloat(const fltSemantics &ourSemantics, integerPart value) { initialize(&ourSemantics); sign = 0; + category = fcNormal; zeroSignificand(); exponent = ourSemantics.precision - 1; significandParts()[0] = value; @@ -728,17 +796,6 @@ APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) { initialize(&ourSemantics); } -APFloat::APFloat(const fltSemantics &ourSemantics, - fltCategory ourCategory, bool negative) { - initialize(&ourSemantics); - category = ourCategory; - sign = negative; - if (category == fcNormal) - category = fcZero; - else if (ourCategory == fcNaN) - makeNaN(); -} - APFloat::APFloat(const fltSemantics &ourSemantics, StringRef text) { initialize(&ourSemantics); convertFromString(text, rmNearestTiesToEven); @@ -780,8 +837,6 @@ APFloat::significandParts() const integerPart * APFloat::significandParts() { - assert(category == fcNormal || category == fcNaN); - if (partCount() > 1) return significand.parts; else @@ -791,7 +846,6 @@ APFloat::significandParts() void APFloat::zeroSignificand() { - category = fcNormal; APInt::tcSet(significandParts(), 0, partCount()); } @@ -872,7 +926,21 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; exponent += rhs.exponent; + // Assume the operands involved in the multiplication are single-precision + // FP, and the two multiplicants are: + // *this = a23 . a22 ... a0 * 2^e1 + // rhs = b23 . b22 ... b0 * 2^e2 + // the result of multiplication is: + // *this = c47 c46 . c45 ... c0 * 2^(e1+e2) + // Note that there are two significant bits at the left-hand side of the + // radix point. Move the radix point toward left by one bit, and adjust + // exponent accordingly. + exponent += 1; + if (addend) { + // The intermediate result of the multiplication has "2 * precision" + // signicant bit; adjust the addend to be consistent with mul result. + // Significand savedSignificand = significand; const fltSemantics *savedSemantics = semantics; fltSemantics extendedSemantics; @@ -880,8 +948,9 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) unsigned int extendedPrecision; /* Normalize our MSB. */ - extendedPrecision = precision + precision - 1; + extendedPrecision = 2 * precision; if (omsb != extendedPrecision) { + assert(extendedPrecision > omsb); APInt::tcShiftLeft(fullSignificand, newPartsCount, extendedPrecision - omsb); exponent -= extendedPrecision - omsb; @@ -912,8 +981,18 @@ APFloat::multiplySignificand(const APFloat &rhs, const APFloat *addend) omsb = APInt::tcMSB(fullSignificand, newPartsCount) + 1; } - exponent -= (precision - 1); + // Convert the result having "2 * precision" significant-bits back to the one + // having "precision" significant-bits. First, move the radix point from + // poision "2*precision - 1" to "precision - 1". The exponent need to be + // adjusted by "2*precision - 1" - "precision - 1" = "precision". + exponent -= precision; + // In case MSB resides at the left-hand side of radix point, shift the + // mantissa right by some amount to make sure the MSB reside right before + // the radix point (i.e. "MSB . rest-significant-bits"). + // + // Note that the result is not normalized when "omsb < precision". So, the + // caller needs to call APFloat::normalize() if normalized value is expected. if (omsb > precision) { unsigned int bits, significantParts; lostFraction lf; @@ -1035,7 +1114,7 @@ lostFraction APFloat::shiftSignificandRight(unsigned int bits) { /* Our exponent should not overflow. */ - assert((exponent_t) (exponent + bits) >= exponent); + assert((ExponentType) (exponent + bits) >= exponent); exponent += bits; @@ -1064,8 +1143,8 @@ APFloat::compareAbsoluteValue(const APFloat &rhs) const int compare; assert(semantics == rhs.semantics); - assert(category == fcNormal); - assert(rhs.category == fcNormal); + assert(isFiniteNonZero()); + assert(rhs.isFiniteNonZero()); compare = exponent - rhs.exponent; @@ -1117,7 +1196,7 @@ APFloat::roundAwayFromZero(roundingMode rounding_mode, unsigned int bit) const { /* NaNs and infinities should not have lost fractions. */ - assert(category == fcNormal || category == fcZero); + assert(isFiniteNonZero() || category == fcZero); /* Current callers never pass this so we don't handle it. */ assert(lost_fraction != lfExactlyZero); @@ -1155,7 +1234,7 @@ APFloat::normalize(roundingMode rounding_mode, unsigned int omsb; /* One, not zero, based MSB. */ int exponentChange; - if (category != fcNormal) + if (!isFiniteNonZero()) return opOK; /* Before rounding normalize the exponent of fcNormal numbers. */ @@ -1259,42 +1338,43 @@ APFloat::normalize(roundingMode rounding_mode, APFloat::opStatus APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) { - switch (convolve(category, rhs.category)) { + switch (PackCategoriesIntoKey(category, rhs.category)) { default: llvm_unreachable(0); - case convolve(fcNaN, fcZero): - case convolve(fcNaN, fcNormal): - case convolve(fcNaN, fcInfinity): - case convolve(fcNaN, fcNaN): - case convolve(fcNormal, fcZero): - case convolve(fcInfinity, fcNormal): - case convolve(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcZero): return opOK; - case convolve(fcZero, fcNaN): - case convolve(fcNormal, fcNaN): - case convolve(fcInfinity, fcNaN): + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): + sign = false; category = fcNaN; copySignificand(rhs); return opOK; - case convolve(fcNormal, fcInfinity): - case convolve(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcInfinity): category = fcInfinity; sign = rhs.sign ^ subtract; return opOK; - case convolve(fcZero, fcNormal): + case PackCategoriesIntoKey(fcZero, fcNormal): assign(rhs); sign = rhs.sign ^ subtract; return opOK; - case convolve(fcZero, fcZero): + case PackCategoriesIntoKey(fcZero, fcZero): /* Sign depends on rounding mode; handled by caller. */ return opOK; - case convolve(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): /* Differently signed infinities can only be validly subtracted. */ if (((sign ^ rhs.sign)!=0) != subtract) { @@ -1304,7 +1384,7 @@ APFloat::addOrSubtractSpecials(const APFloat &rhs, bool subtract) return opOK; - case convolve(fcNormal, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcNormal): return opDivByZero; } } @@ -1385,41 +1465,43 @@ APFloat::addOrSubtractSignificand(const APFloat &rhs, bool subtract) APFloat::opStatus APFloat::multiplySpecials(const APFloat &rhs) { - switch (convolve(category, rhs.category)) { + switch (PackCategoriesIntoKey(category, rhs.category)) { default: llvm_unreachable(0); - case convolve(fcNaN, fcZero): - case convolve(fcNaN, fcNormal): - case convolve(fcNaN, fcInfinity): - case convolve(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + sign = false; return opOK; - case convolve(fcZero, fcNaN): - case convolve(fcNormal, fcNaN): - case convolve(fcInfinity, fcNaN): + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): + sign = false; category = fcNaN; copySignificand(rhs); return opOK; - case convolve(fcNormal, fcInfinity): - case convolve(fcInfinity, fcNormal): - case convolve(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): category = fcInfinity; return opOK; - case convolve(fcZero, fcNormal): - case convolve(fcNormal, fcZero): - case convolve(fcZero, fcZero): + case PackCategoriesIntoKey(fcZero, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcZero): + case PackCategoriesIntoKey(fcZero, fcZero): category = fcZero; return opOK; - case convolve(fcZero, fcInfinity): - case convolve(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcInfinity, fcZero): makeNaN(); return opInvalidOp; - case convolve(fcNormal, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcNormal): return opOK; } } @@ -1427,41 +1509,40 @@ APFloat::multiplySpecials(const APFloat &rhs) APFloat::opStatus APFloat::divideSpecials(const APFloat &rhs) { - switch (convolve(category, rhs.category)) { + switch (PackCategoriesIntoKey(category, rhs.category)) { default: llvm_unreachable(0); - case convolve(fcNaN, fcZero): - case convolve(fcNaN, fcNormal): - case convolve(fcNaN, fcInfinity): - case convolve(fcNaN, fcNaN): - case convolve(fcInfinity, fcZero): - case convolve(fcInfinity, fcNormal): - case convolve(fcZero, fcInfinity): - case convolve(fcZero, fcNormal): - return opOK; - - case convolve(fcZero, fcNaN): - case convolve(fcNormal, fcNaN): - case convolve(fcInfinity, fcNaN): + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): category = fcNaN; copySignificand(rhs); + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + sign = false; + case PackCategoriesIntoKey(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcNormal): return opOK; - case convolve(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcNormal, fcInfinity): category = fcZero; return opOK; - case convolve(fcNormal, fcZero): + case PackCategoriesIntoKey(fcNormal, fcZero): category = fcInfinity; return opDivByZero; - case convolve(fcInfinity, fcInfinity): - case convolve(fcZero, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcZero): makeNaN(); return opInvalidOp; - case convolve(fcNormal, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcNormal): return opOK; } } @@ -1469,35 +1550,36 @@ APFloat::divideSpecials(const APFloat &rhs) APFloat::opStatus APFloat::modSpecials(const APFloat &rhs) { - switch (convolve(category, rhs.category)) { + switch (PackCategoriesIntoKey(category, rhs.category)) { default: llvm_unreachable(0); - case convolve(fcNaN, fcZero): - case convolve(fcNaN, fcNormal): - case convolve(fcNaN, fcInfinity): - case convolve(fcNaN, fcNaN): - case convolve(fcZero, fcInfinity): - case convolve(fcZero, fcNormal): - case convolve(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcInfinity): return opOK; - case convolve(fcZero, fcNaN): - case convolve(fcNormal, fcNaN): - case convolve(fcInfinity, fcNaN): + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): + sign = false; category = fcNaN; copySignificand(rhs); return opOK; - case convolve(fcNormal, fcZero): - case convolve(fcInfinity, fcZero): - case convolve(fcInfinity, fcNormal): - case convolve(fcInfinity, fcInfinity): - case convolve(fcZero, fcZero): + case PackCategoriesIntoKey(fcNormal, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcZero): makeNaN(); return opInvalidOp; - case convolve(fcNormal, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcNormal): return opOK; } } @@ -1578,7 +1660,7 @@ APFloat::multiply(const APFloat &rhs, roundingMode rounding_mode) sign ^= rhs.sign; fs = multiplySpecials(rhs); - if (category == fcNormal) { + if (isFiniteNonZero()) { lostFraction lost_fraction = multiplySignificand(rhs, 0); fs = normalize(rounding_mode, lost_fraction); if (lost_fraction != lfExactlyZero) @@ -1597,7 +1679,7 @@ APFloat::divide(const APFloat &rhs, roundingMode rounding_mode) sign ^= rhs.sign; fs = divideSpecials(rhs); - if (category == fcNormal) { + if (isFiniteNonZero()) { lostFraction lost_fraction = divideSignificand(rhs); fs = normalize(rounding_mode, lost_fraction); if (lost_fraction != lfExactlyZero) @@ -1651,7 +1733,7 @@ APFloat::mod(const APFloat &rhs, roundingMode rounding_mode) opStatus fs; fs = modSpecials(rhs); - if (category == fcNormal && rhs.category == fcNormal) { + if (isFiniteNonZero() && rhs.isFiniteNonZero()) { APFloat V = *this; unsigned int origSign = sign; @@ -1697,9 +1779,9 @@ APFloat::fusedMultiplyAdd(const APFloat &multiplicand, /* If and only if all arguments are normal do we need to do an extended-precision calculation. */ - if (category == fcNormal && - multiplicand.category == fcNormal && - addend.category == fcNormal) { + if (isFiniteNonZero() && + multiplicand.isFiniteNonZero() && + addend.isFiniteNonZero()) { lostFraction lost_fraction; lost_fraction = multiplySignificand(multiplicand, &addend); @@ -1736,7 +1818,7 @@ APFloat::opStatus APFloat::roundToIntegral(roundingMode rounding_mode) { // If the exponent is large enough, we know that this value is already // integral, and the arithmetic below would potentially cause it to saturate // to +/-Inf. Bail out early instead. - if (category == fcNormal && exponent+1 >= (int)semanticsPrecision(*semantics)) + if (isFiniteNonZero() && exponent+1 >= (int)semanticsPrecision(*semantics)) return opOK; // The algorithm here is quite simple: we add 2^(p-1), where p is the @@ -1780,36 +1862,36 @@ APFloat::compare(const APFloat &rhs) const assert(semantics == rhs.semantics); - switch (convolve(category, rhs.category)) { + switch (PackCategoriesIntoKey(category, rhs.category)) { default: llvm_unreachable(0); - case convolve(fcNaN, fcZero): - case convolve(fcNaN, fcNormal): - case convolve(fcNaN, fcInfinity): - case convolve(fcNaN, fcNaN): - case convolve(fcZero, fcNaN): - case convolve(fcNormal, fcNaN): - case convolve(fcInfinity, fcNaN): + case PackCategoriesIntoKey(fcNaN, fcZero): + case PackCategoriesIntoKey(fcNaN, fcNormal): + case PackCategoriesIntoKey(fcNaN, fcInfinity): + case PackCategoriesIntoKey(fcNaN, fcNaN): + case PackCategoriesIntoKey(fcZero, fcNaN): + case PackCategoriesIntoKey(fcNormal, fcNaN): + case PackCategoriesIntoKey(fcInfinity, fcNaN): return cmpUnordered; - case convolve(fcInfinity, fcNormal): - case convolve(fcInfinity, fcZero): - case convolve(fcNormal, fcZero): + case PackCategoriesIntoKey(fcInfinity, fcNormal): + case PackCategoriesIntoKey(fcInfinity, fcZero): + case PackCategoriesIntoKey(fcNormal, fcZero): if (sign) return cmpLessThan; else return cmpGreaterThan; - case convolve(fcNormal, fcInfinity): - case convolve(fcZero, fcInfinity): - case convolve(fcZero, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcInfinity): + case PackCategoriesIntoKey(fcZero, fcNormal): if (rhs.sign) return cmpGreaterThan; else return cmpLessThan; - case convolve(fcInfinity, fcInfinity): + case PackCategoriesIntoKey(fcInfinity, fcInfinity): if (sign == rhs.sign) return cmpEqual; else if (sign) @@ -1817,10 +1899,10 @@ APFloat::compare(const APFloat &rhs) const else return cmpGreaterThan; - case convolve(fcZero, fcZero): + case PackCategoriesIntoKey(fcZero, fcZero): return cmpEqual; - case convolve(fcNormal, fcNormal): + case PackCategoriesIntoKey(fcNormal, fcNormal): break; } @@ -1877,8 +1959,25 @@ APFloat::convert(const fltSemantics &toSemantics, X86SpecialNan = true; } + // If this is a truncation of a denormal number, and the target semantics + // has larger exponent range than the source semantics (this can happen + // when truncating from PowerPC double-double to double format), the + // right shift could lose result mantissa bits. Adjust exponent instead + // of performing excessive shift. + if (shift < 0 && isFiniteNonZero()) { + int exponentChange = significandMSB() + 1 - fromSemantics.precision; + if (exponent + exponentChange < toSemantics.minExponent) + exponentChange = toSemantics.minExponent - exponent; + if (exponentChange < shift) + exponentChange = shift; + if (exponentChange < 0) { + shift -= exponentChange; + exponent += exponentChange; + } + } + // If this is a truncation, perform the shift before we narrow the storage. - if (shift < 0 && (category==fcNormal || category==fcNaN)) + if (shift < 0 && (isFiniteNonZero() || category==fcNaN)) lostFraction = shiftRight(significandParts(), oldPartCount, -shift); // Fix the storage so it can hold to new value. @@ -1887,14 +1986,14 @@ APFloat::convert(const fltSemantics &toSemantics, integerPart *newParts; newParts = new integerPart[newPartCount]; APInt::tcSet(newParts, 0, newPartCount); - if (category==fcNormal || category==fcNaN) + if (isFiniteNonZero() || category==fcNaN) APInt::tcAssign(newParts, significandParts(), oldPartCount); freeSignificand(); significand.parts = newParts; } else if (newPartCount == 1 && oldPartCount != 1) { // Switch to built-in storage for a single part. integerPart newPart = 0; - if (category==fcNormal || category==fcNaN) + if (isFiniteNonZero() || category==fcNaN) newPart = significandParts()[0]; freeSignificand(); significand.part = newPart; @@ -1905,10 +2004,10 @@ APFloat::convert(const fltSemantics &toSemantics, // If this is an extension, perform the shift now that the storage is // available. - if (shift > 0 && (category==fcNormal || category==fcNaN)) + if (shift > 0 && (isFiniteNonZero() || category==fcNaN)) APInt::tcShiftLeft(significandParts(), newPartCount, shift); - if (category == fcNormal) { + if (isFiniteNonZero()) { fs = normalize(rounding_mode, lostFraction); *losesInfo = (fs != opOK); } else if (category == fcNaN) { @@ -2204,56 +2303,46 @@ APFloat::opStatus APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) { lostFraction lost_fraction = lfExactlyZero; - integerPart *significand; - unsigned int bitPos, partsCount; - StringRef::iterator dot, firstSignificantDigit; + category = fcNormal; zeroSignificand(); exponent = 0; - category = fcNormal; - significand = significandParts(); - partsCount = partCount(); - bitPos = partsCount * integerPartWidth; + integerPart *significand = significandParts(); + unsigned partsCount = partCount(); + unsigned bitPos = partsCount * integerPartWidth; + bool computedTrailingFraction = false; - /* Skip leading zeroes and any (hexa)decimal point. */ + // Skip leading zeroes and any (hexa)decimal point. StringRef::iterator begin = s.begin(); StringRef::iterator end = s.end(); + StringRef::iterator dot; StringRef::iterator p = skipLeadingZeroesAndAnyDot(begin, end, &dot); - firstSignificantDigit = p; + StringRef::iterator firstSignificantDigit = p; - for (; p != end;) { + while (p != end) { integerPart hex_value; if (*p == '.') { assert(dot == end && "String contains multiple dots"); dot = p++; - if (p == end) { - break; - } + continue; } hex_value = hexDigitValue(*p); - if (hex_value == -1U) { + if (hex_value == -1U) break; - } p++; - if (p == end) { - break; - } else { - /* Store the number whilst 4-bit nibbles remain. */ - if (bitPos) { - bitPos -= 4; - hex_value <<= bitPos % integerPartWidth; - significand[bitPos / integerPartWidth] |= hex_value; - } else { - lost_fraction = trailingHexadecimalFraction(p, end, hex_value); - while (p != end && hexDigitValue(*p) != -1U) - p++; - break; - } + // Store the number while we have space. + if (bitPos) { + bitPos -= 4; + hex_value <<= bitPos % integerPartWidth; + significand[bitPos / integerPartWidth] |= hex_value; + } else if (!computedTrailingFraction) { + lost_fraction = trailingHexadecimalFraction(p, end, hex_value); + computedTrailingFraction = true; } } @@ -2316,8 +2405,8 @@ APFloat::roundSignificandWithExponent(const integerPart *decSigParts, excessPrecision = calcSemantics.precision - semantics->precision; truncatedBits = excessPrecision; - APFloat decSig(calcSemantics, fcZero, sign); - APFloat pow5(calcSemantics, fcZero, false); + APFloat decSig = APFloat::getZero(calcSemantics, sign); + APFloat pow5(calcSemantics); sigStatus = decSig.convertFromUnsignedParts(decSigParts, sigPartCount, rmNearestTiesToEven); @@ -2402,7 +2491,14 @@ APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) 42039/12655 < L < 28738/8651 [ numerator <= 65536 ] */ - if (decDigitValue(*D.firstSigDigit) >= 10U) { + // Test if we have a zero number allowing for strings with no null terminators + // and zero decimals with non-zero exponents. + // + // We computed firstSigDigit by ignoring all zeros and dots. Thus if + // D->firstSigDigit equals str.end(), every digit must be a zero and there can + // be at most one dot. On the other hand, if we have a zero with a non-zero + // exponent, then we know that D.firstSigDigit will be non-numeric. + if (D.firstSigDigit == str.end() || decDigitValue(*D.firstSigDigit) >= 10U) { category = fcZero; fs = opOK; @@ -2419,6 +2515,7 @@ APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) (D.normalizedExponent + 1) * 28738 <= 8651 * (semantics->minExponent - (int) semantics->precision)) { /* Underflow to zero and round. */ + category = fcNormal; zeroSignificand(); fs = normalize(rounding_mode, lfLessThanHalf); @@ -2485,11 +2582,40 @@ APFloat::convertFromDecimalString(StringRef str, roundingMode rounding_mode) return fs; } +bool +APFloat::convertFromStringSpecials(StringRef str) { + if (str.equals("inf") || str.equals("INFINITY")) { + makeInf(false); + return true; + } + + if (str.equals("-inf") || str.equals("-INFINITY")) { + makeInf(true); + return true; + } + + if (str.equals("nan") || str.equals("NaN")) { + makeNaN(false, false); + return true; + } + + if (str.equals("-nan") || str.equals("-NaN")) { + makeNaN(false, true); + return true; + } + + return false; +} + APFloat::opStatus APFloat::convertFromString(StringRef str, roundingMode rounding_mode) { assert(!str.empty() && "Invalid string length"); + // Handle special cases. + if (convertFromStringSpecials(str)) + return opOK; + /* Handle a leading minus sign. */ StringRef::iterator p = str.begin(); size_t slen = str.size(); @@ -2686,7 +2812,7 @@ APFloat::convertNormalToHexString(char *dst, unsigned int hexDigits, } hash_code llvm::hash_value(const APFloat &Arg) { - if (Arg.category != APFloat::fcNormal) + if (!Arg.isFiniteNonZero()) return hash_combine((uint8_t)Arg.category, // NaN has no sign, fix it at zero. Arg.isNaN() ? (uint8_t)0 : (uint8_t)Arg.sign, @@ -2717,7 +2843,7 @@ APFloat::convertF80LongDoubleAPFloatToAPInt() const uint64_t myexponent, mysignificand; - if (category==fcNormal) { + if (isFiniteNonZero()) { myexponent = exponent+16383; //bias mysignificand = significandParts()[0]; if (myexponent==1 && !(mysignificand & 0x8000000000000000ULL)) @@ -2774,7 +2900,7 @@ APFloat::convertPPCDoubleDoubleAPFloatToAPInt() const // just set the second double to zero. Otherwise, re-convert back to // the extended format and compute the difference. This now should // convert exactly to double. - if (u.category == fcNormal && losesInfo) { + if (u.isFiniteNonZero() && losesInfo) { fs = u.convert(extendedSemantics, rmNearestTiesToEven, &losesInfo); assert(fs == opOK && !losesInfo); (void)fs; @@ -2800,7 +2926,7 @@ APFloat::convertQuadrupleAPFloatToAPInt() const uint64_t myexponent, mysignificand, mysignificand2; - if (category==fcNormal) { + if (isFiniteNonZero()) { myexponent = exponent+16383; //bias mysignificand = significandParts()[0]; mysignificand2 = significandParts()[1]; @@ -2836,7 +2962,7 @@ APFloat::convertDoubleAPFloatToAPInt() const uint64_t myexponent, mysignificand; - if (category==fcNormal) { + if (isFiniteNonZero()) { myexponent = exponent+1023; //bias mysignificand = *significandParts(); if (myexponent==1 && !(mysignificand & 0x10000000000000LL)) @@ -2866,7 +2992,7 @@ APFloat::convertFloatAPFloatToAPInt() const uint32_t myexponent, mysignificand; - if (category==fcNormal) { + if (isFiniteNonZero()) { myexponent = exponent+127; //bias mysignificand = (uint32_t)*significandParts(); if (myexponent == 1 && !(mysignificand & 0x800000)) @@ -2895,7 +3021,7 @@ APFloat::convertHalfAPFloatToAPInt() const uint32_t myexponent, mysignificand; - if (category==fcNormal) { + if (isFiniteNonZero()) { myexponent = exponent+15; //bias mysignificand = (uint32_t)*significandParts(); if (myexponent == 1 && !(mysignificand & 0x400)) @@ -3018,7 +3144,7 @@ APFloat::initFromPPCDoubleDoubleAPInt(const APInt &api) (void)fs; // Unless we have a special case, add in second double. - if (category == fcNormal) { + if (isFiniteNonZero()) { APFloat v(IEEEdouble, APInt(64, i2)); fs = v.convert(PPCDoubleDouble, rmNearestTiesToEven, &losesInfo); assert(fs == opOK && !losesInfo); @@ -3211,55 +3337,75 @@ APFloat::getAllOnesValue(unsigned BitWidth, bool isIEEE) } } -APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { - APFloat Val(Sem, fcNormal, Negative); - +/// Make this number the largest magnitude normal number in the given +/// semantics. +void APFloat::makeLargest(bool Negative) { // We want (in interchange format): // sign = {Negative} // exponent = 1..10 // significand = 1..1 + category = fcNormal; + sign = Negative; + exponent = semantics->maxExponent; - Val.exponent = Sem.maxExponent; // unbiased + // Use memset to set all but the highest integerPart to all ones. + integerPart *significand = significandParts(); + unsigned PartCount = partCount(); + memset(significand, 0xFF, sizeof(integerPart)*(PartCount - 1)); - // 1-initialize all bits.... - Val.zeroSignificand(); - integerPart *significand = Val.significandParts(); - unsigned N = partCountForBits(Sem.precision); - for (unsigned i = 0; i != N; ++i) - significand[i] = ~((integerPart) 0); + // Set the high integerPart especially setting all unused top bits for + // internal consistency. + const unsigned NumUnusedHighBits = + PartCount*integerPartWidth - semantics->precision; + significand[PartCount - 1] = ~integerPart(0) >> NumUnusedHighBits; +} + +/// Make this number the smallest magnitude denormal number in the given +/// semantics. +void APFloat::makeSmallest(bool Negative) { + // We want (in interchange format): + // sign = {Negative} + // exponent = 0..0 + // significand = 0..01 + category = fcNormal; + sign = Negative; + exponent = semantics->minExponent; + APInt::tcSet(significandParts(), 1, partCount()); +} - // ...and then clear the top bits for internal consistency. - if (Sem.precision % integerPartWidth != 0) - significand[N-1] &= - (((integerPart) 1) << (Sem.precision % integerPartWidth)) - 1; +APFloat APFloat::getLargest(const fltSemantics &Sem, bool Negative) { + // We want (in interchange format): + // sign = {Negative} + // exponent = 1..10 + // significand = 1..1 + APFloat Val(Sem, uninitialized); + Val.makeLargest(Negative); return Val; } APFloat APFloat::getSmallest(const fltSemantics &Sem, bool Negative) { - APFloat Val(Sem, fcNormal, Negative); - // We want (in interchange format): // sign = {Negative} // exponent = 0..0 // significand = 0..01 - - Val.exponent = Sem.minExponent; // unbiased - Val.zeroSignificand(); - Val.significandParts()[0] = 1; + APFloat Val(Sem, uninitialized); + Val.makeSmallest(Negative); return Val; } APFloat APFloat::getSmallestNormalized(const fltSemantics &Sem, bool Negative) { - APFloat Val(Sem, fcNormal, Negative); + APFloat Val(Sem, uninitialized); // We want (in interchange format): // sign = {Negative} // exponent = 0..0 // significand = 10..0 - Val.exponent = Sem.minExponent; + Val.category = fcNormal; Val.zeroSignificand(); + Val.sign = Negative; + Val.exponent = Sem.minExponent; Val.significandParts()[partCountForBits(Sem.precision)-1] |= (((integerPart) 1) << ((Sem.precision - 1) % integerPartWidth)); @@ -3400,11 +3546,14 @@ void APFloat::toString(SmallVectorImpl &Str, // Set FormatPrecision if zero. We want to do this before we // truncate trailing zeros, as those are part of the precision. if (!FormatPrecision) { - // It's an interesting question whether to use the nominal - // precision or the active precision here for denormals. + // We use enough digits so the number can be round-tripped back to an + // APFloat. The formula comes from "How to Print Floating-Point Numbers + // Accurately" by Steele and White. + // FIXME: Using a formula based purely on the precision is conservative; + // we can print fewer digits depending on the actual value being printed. - // FormatPrecision = ceil(significandBits / lg_2(10)) - FormatPrecision = (semantics->precision * 59 + 195) / 196; + // FormatPrecision = 2 + floor(significandBits / lg_2(10)) + FormatPrecision = 2 + semantics->precision * 59 / 196; } // Ignore trailing binary zeros. @@ -3564,7 +3713,7 @@ void APFloat::toString(SmallVectorImpl &Str, bool APFloat::getExactInverse(APFloat *inv) const { // Special floats and denormals have no exact inverse. - if (category != fcNormal) + if (!isFiniteNonZero()) return false; // Check that the number is a power of two by making sure that only the @@ -3579,10 +3728,10 @@ bool APFloat::getExactInverse(APFloat *inv) const { // Avoid multiplication with a denormal, it is not safe on all platforms and // may be slower than a normal division. - if (reciprocal.significandMSB() + 1 < reciprocal.semantics->precision) + if (reciprocal.isDenormal()) return false; - assert(reciprocal.category == fcNormal && + assert(reciprocal.isFiniteNonZero() && reciprocal.significandLSB() == reciprocal.semantics->precision - 1); if (inv) @@ -3590,3 +3739,148 @@ bool APFloat::getExactInverse(APFloat *inv) const { return true; } + +bool APFloat::isSignaling() const { + if (!isNaN()) + return false; + + // IEEE-754R 2008 6.2.1: A signaling NaN bit string should be encoded with the + // first bit of the trailing significand being 0. + return !APInt::tcExtractBit(significandParts(), semantics->precision - 2); +} + +/// IEEE-754R 2008 5.3.1: nextUp/nextDown. +/// +/// *NOTE* since nextDown(x) = -nextUp(-x), we only implement nextUp with +/// appropriate sign switching before/after the computation. +APFloat::opStatus APFloat::next(bool nextDown) { + // If we are performing nextDown, swap sign so we have -x. + if (nextDown) + changeSign(); + + // Compute nextUp(x) + opStatus result = opOK; + + // Handle each float category separately. + switch (category) { + case fcInfinity: + // nextUp(+inf) = +inf + if (!isNegative()) + break; + // nextUp(-inf) = -getLargest() + makeLargest(true); + break; + case fcNaN: + // IEEE-754R 2008 6.2 Par 2: nextUp(sNaN) = qNaN. Set Invalid flag. + // IEEE-754R 2008 6.2: nextUp(qNaN) = qNaN. Must be identity so we do not + // change the payload. + if (isSignaling()) { + result = opInvalidOp; + // For consistency, propogate the sign of the sNaN to the qNaN. + makeNaN(false, isNegative(), 0); + } + break; + case fcZero: + // nextUp(pm 0) = +getSmallest() + makeSmallest(false); + break; + case fcNormal: + // nextUp(-getSmallest()) = -0 + if (isSmallest() && isNegative()) { + APInt::tcSet(significandParts(), 0, partCount()); + category = fcZero; + exponent = 0; + break; + } + + // nextUp(getLargest()) == INFINITY + if (isLargest() && !isNegative()) { + APInt::tcSet(significandParts(), 0, partCount()); + category = fcInfinity; + exponent = semantics->maxExponent + 1; + break; + } + + // nextUp(normal) == normal + inc. + if (isNegative()) { + // If we are negative, we need to decrement the significand. + + // We only cross a binade boundary that requires adjusting the exponent + // if: + // 1. exponent != semantics->minExponent. This implies we are not in the + // smallest binade or are dealing with denormals. + // 2. Our significand excluding the integral bit is all zeros. + bool WillCrossBinadeBoundary = + exponent != semantics->minExponent && isSignificandAllZeros(); + + // Decrement the significand. + // + // We always do this since: + // 1. If we are dealing with a non binade decrement, by definition we + // just decrement the significand. + // 2. If we are dealing with a normal -> normal binade decrement, since + // we have an explicit integral bit the fact that all bits but the + // integral bit are zero implies that subtracting one will yield a + // significand with 0 integral bit and 1 in all other spots. Thus we + // must just adjust the exponent and set the integral bit to 1. + // 3. If we are dealing with a normal -> denormal binade decrement, + // since we set the integral bit to 0 when we represent denormals, we + // just decrement the significand. + integerPart *Parts = significandParts(); + APInt::tcDecrement(Parts, partCount()); + + if (WillCrossBinadeBoundary) { + // Our result is a normal number. Do the following: + // 1. Set the integral bit to 1. + // 2. Decrement the exponent. + APInt::tcSetBit(Parts, semantics->precision - 1); + exponent--; + } + } else { + // If we are positive, we need to increment the significand. + + // We only cross a binade boundary that requires adjusting the exponent if + // the input is not a denormal and all of said input's significand bits + // are set. If all of said conditions are true: clear the significand, set + // the integral bit to 1, and increment the exponent. If we have a + // denormal always increment since moving denormals and the numbers in the + // smallest normal binade have the same exponent in our representation. + bool WillCrossBinadeBoundary = !isDenormal() && isSignificandAllOnes(); + + if (WillCrossBinadeBoundary) { + integerPart *Parts = significandParts(); + APInt::tcSet(Parts, 0, partCount()); + APInt::tcSetBit(Parts, semantics->precision - 1); + assert(exponent != semantics->maxExponent && + "We can not increment an exponent beyond the maxExponent allowed" + " by the given floating point semantics."); + exponent++; + } else { + incrementSignificand(); + } + } + break; + } + + // If we are performing nextDown, swap sign so we have -nextUp(-x) + if (nextDown) + changeSign(); + + return result; +} + +void +APFloat::makeInf(bool Negative) { + category = fcInfinity; + sign = Negative; + exponent = semantics->maxExponent + 1; + APInt::tcSet(significandParts(), 0, partCount()); +} + +void +APFloat::makeZero(bool Negative) { + category = fcZero; + sign = Negative; + exponent = semantics->minExponent-1; + APInt::tcSet(significandParts(), 0, partCount()); +} diff --git a/contrib/llvm/lib/Support/APInt.cpp b/contrib/llvm/lib/Support/APInt.cpp index e853475..89f96bd 100644 --- a/contrib/llvm/lib/Support/APInt.cpp +++ b/contrib/llvm/lib/Support/APInt.cpp @@ -692,14 +692,14 @@ unsigned APInt::countLeadingZerosSlowCase() const { unsigned i = getNumWords(); integerPart MSW = pVal[i-1] & MSWMask; if (MSW) - return CountLeadingZeros_64(MSW) - (APINT_BITS_PER_WORD - BitsInMSW); + return llvm::countLeadingZeros(MSW) - (APINT_BITS_PER_WORD - BitsInMSW); unsigned Count = BitsInMSW; for (--i; i > 0u; --i) { if (pVal[i-1] == 0) Count += APINT_BITS_PER_WORD; else { - Count += CountLeadingZeros_64(pVal[i-1]); + Count += llvm::countLeadingZeros(pVal[i-1]); break; } } @@ -735,13 +735,13 @@ unsigned APInt::countLeadingOnes() const { unsigned APInt::countTrailingZeros() const { if (isSingleWord()) - return std::min(unsigned(CountTrailingZeros_64(VAL)), BitWidth); + return std::min(unsigned(llvm::countTrailingZeros(VAL)), BitWidth); unsigned Count = 0; unsigned i = 0; for (; i < getNumWords() && pVal[i] == 0; ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) - Count += CountTrailingZeros_64(pVal[i]); + Count += llvm::countTrailingZeros(pVal[i]); return std::min(Count, BitWidth); } @@ -1512,7 +1512,7 @@ static void KnuthDiv(unsigned *u, unsigned *v, unsigned *q, unsigned* r, // and v so that its high bits are shifted to the top of v's range without // overflow. Note that this can require an extra word in u so that u must // be of length m+n+1. - unsigned shift = CountLeadingZeros_32(v[n-1]); + unsigned shift = countLeadingZeros(v[n-1]); unsigned v_carry = 0; unsigned u_carry = 0; if (shift) { @@ -2304,24 +2304,7 @@ namespace { static unsigned int partMSB(integerPart value) { - unsigned int n, msb; - - if (value == 0) - return -1U; - - n = integerPartWidth / 2; - - msb = 0; - do { - if (value >> n) { - value >>= n; - msb += n; - } - - n >>= 1; - } while (n); - - return msb; + return findLastSet(value, ZB_Max); } /* Returns the bit number of the least significant set bit of a @@ -2329,24 +2312,7 @@ namespace { static unsigned int partLSB(integerPart value) { - unsigned int n, lsb; - - if (value == 0) - return -1U; - - lsb = integerPartWidth - 1; - n = integerPartWidth / 2; - - do { - if (value << n) { - value <<= n; - lsb -= n; - } - - n >>= 1; - } while (n); - - return lsb; + return findFirstSet(value, ZB_Max); } } @@ -2888,6 +2854,20 @@ APInt::tcIncrement(integerPart *dst, unsigned int parts) return i == parts; } +/* Decrement a bignum in-place, return the borrow flag. */ +integerPart +APInt::tcDecrement(integerPart *dst, unsigned int parts) { + for (unsigned int i = 0; i < parts; i++) { + // If the current word is non-zero, then the decrement has no effect on the + // higher-order words of the integer and no borrow can occur. Exit early. + if (dst[i]--) + return 0; + } + // If every word was zero, then there is a borrow. + return 1; +} + + /* Set the least significant BITS bits of a bignum, clear the rest. */ void diff --git a/contrib/llvm/lib/Support/Allocator.cpp b/contrib/llvm/lib/Support/Allocator.cpp index 3c4191b..6e7a541 100644 --- a/contrib/llvm/lib/Support/Allocator.cpp +++ b/contrib/llvm/lib/Support/Allocator.cpp @@ -26,6 +26,10 @@ BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold, : SlabSize(size), SizeThreshold(std::min(size, threshold)), Allocator(allocator), CurSlab(0), BytesAllocated(0) { } +BumpPtrAllocator::BumpPtrAllocator(size_t size, size_t threshold) + : SlabSize(size), SizeThreshold(std::min(size, threshold)), + Allocator(DefaultSlabAllocator), CurSlab(0), BytesAllocated(0) { } + BumpPtrAllocator::~BumpPtrAllocator() { DeallocateSlabs(CurSlab); } @@ -167,9 +171,6 @@ void BumpPtrAllocator::PrintStats() const { << " (includes alignment, etc)\n"; } -MallocSlabAllocator BumpPtrAllocator::DefaultSlabAllocator = - MallocSlabAllocator(); - SlabAllocator::~SlabAllocator() { } MallocSlabAllocator::~MallocSlabAllocator() { } diff --git a/contrib/llvm/lib/Support/BlockFrequency.cpp b/contrib/llvm/lib/Support/BlockFrequency.cpp index 84a993e..00efe90 100644 --- a/contrib/llvm/lib/Support/BlockFrequency.cpp +++ b/contrib/llvm/lib/Support/BlockFrequency.cpp @@ -18,76 +18,94 @@ using namespace llvm; -namespace { - -/// mult96bit - Multiply FREQ by N and store result in W array. -void mult96bit(uint64_t freq, uint32_t N, uint64_t W[2]) { +/// Multiply FREQ by N and store result in W array. +static void mult96bit(uint64_t freq, uint32_t N, uint32_t W[3]) { uint64_t u0 = freq & UINT32_MAX; uint64_t u1 = freq >> 32; - // Represent 96-bit value as w[2]:w[1]:w[0]; - uint32_t w[3] = { 0, 0, 0 }; - + // Represent 96-bit value as W[2]:W[1]:W[0]; uint64_t t = u0 * N; uint64_t k = t >> 32; - w[0] = t; + W[0] = t; t = u1 * N + k; - w[1] = t; - w[2] = t >> 32; - - // W[1] - higher bits. - // W[0] - lower bits. - W[0] = w[0] + ((uint64_t) w[1] << 32); - W[1] = w[2]; + W[1] = t; + W[2] = t >> 32; } - -/// div96bit - Divide 96-bit value stored in W array by D. Return 64-bit frequency. -uint64_t div96bit(uint64_t W[2], uint32_t D) { - uint64_t y = W[0]; - uint64_t x = W[1]; - int i; - - for (i = 1; i <= 64 && x; ++i) { - uint32_t t = (int)x >> 31; - x = (x << 1) | (y >> 63); - y = y << 1; - if ((x | t) >= D) { - x -= D; - ++y; +/// Divide 96-bit value stored in W[2]:W[1]:W[0] by D. Since our word size is a +/// 32 bit unsigned integer, we can use a short division algorithm. +static uint64_t divrem96bit(uint32_t W[3], uint32_t D, uint32_t *Rout) { + // We assume that W[2] is non-zero since if W[2] is not then the user should + // just use hardware division. + assert(W[2] && "This routine assumes that W[2] is non-zero since if W[2] is " + "zero, the caller should just use 64/32 hardware."); + uint32_t Q[3] = { 0, 0, 0 }; + + // The generalized short division algorithm sets i to m + n - 1, where n is + // the number of words in the divisior and m is the number of words by which + // the divident exceeds the divisor (i.e. m + n == the length of the dividend + // in words). Due to our assumption that W[2] is non-zero, we know that the + // dividend is of length 3 implying since n is 1 that m = 2. Thus we set i to + // m + n - 1 = 2 + 1 - 1 = 2. + uint32_t R = 0; + for (int i = 2; i >= 0; --i) { + uint64_t PartialD = uint64_t(R) << 32 | W[i]; + if (PartialD == 0) { + Q[i] = 0; + R = 0; + } else if (PartialD < D) { + Q[i] = 0; + R = uint32_t(PartialD); + } else if (PartialD == D) { + Q[i] = 1; + R = 0; + } else { + Q[i] = uint32_t(PartialD / D); + R = uint32_t(PartialD - (Q[i] * D)); } } - return y << (64 - i + 1); -} + // If Q[2] is non-zero, then we overflowed. + uint64_t Result; + if (Q[2]) { + Result = UINT64_MAX; + R = D; + } else { + // Form the final uint64_t result, avoiding endianness issues. + Result = uint64_t(Q[0]) | (uint64_t(Q[1]) << 32); + } + + if (Rout) + *Rout = R; + return Result; } +uint32_t BlockFrequency::scale(uint32_t N, uint32_t D) { + assert(D != 0 && "Division by zero"); -BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { - uint32_t n = Prob.getNumerator(); - uint32_t d = Prob.getDenominator(); - - assert(n <= d && "Probability must be less or equal to 1."); - - // Calculate Frequency * n. - uint64_t mulLo = (Frequency & UINT32_MAX) * n; - uint64_t mulHi = (Frequency >> 32) * n; - uint64_t mulRes = (mulHi << 32) + mulLo; - - // If there was overflow use 96-bit operations. - if (mulHi > UINT32_MAX || mulRes < mulLo) { - // 96-bit value represented as W[1]:W[0]. - uint64_t W[2]; - - // Probability is less or equal to 1 which means that results must fit - // 64-bit. - mult96bit(Frequency, n, W); - Frequency = div96bit(W, d); - return *this; + // Calculate Frequency * N. + uint64_t MulLo = (Frequency & UINT32_MAX) * N; + uint64_t MulHi = (Frequency >> 32) * N; + uint64_t MulRes = (MulHi << 32) + MulLo; + + // If the product fits in 64 bits, just use built-in division. + if (MulHi <= UINT32_MAX && MulRes >= MulLo) { + Frequency = MulRes / D; + return MulRes % D; } - Frequency = mulRes / d; + // Product overflowed, use 96-bit operations. + // 96-bit value represented as W[2]:W[1]:W[0]. + uint32_t W[3]; + uint32_t R; + mult96bit(Frequency, N, W); + Frequency = divrem96bit(W, D, &R); + return R; +} + +BlockFrequency &BlockFrequency::operator*=(const BranchProbability &Prob) { + scale(Prob.getNumerator(), Prob.getDenominator()); return *this; } @@ -98,6 +116,17 @@ BlockFrequency::operator*(const BranchProbability &Prob) const { return Freq; } +BlockFrequency &BlockFrequency::operator/=(const BranchProbability &Prob) { + scale(Prob.getDenominator(), Prob.getNumerator()); + return *this; +} + +BlockFrequency BlockFrequency::operator/(const BranchProbability &Prob) const { + BlockFrequency Freq(Frequency); + Freq /= Prob; + return Freq; +} + BlockFrequency &BlockFrequency::operator+=(const BlockFrequency &Freq) { uint64_t Before = Freq.Frequency; Frequency += Freq.Frequency; @@ -116,8 +145,21 @@ BlockFrequency::operator+(const BlockFrequency &Prob) const { return Freq; } +uint32_t BlockFrequency::scale(const BranchProbability &Prob) { + return scale(Prob.getNumerator(), Prob.getDenominator()); +} + void BlockFrequency::print(raw_ostream &OS) const { - OS << Frequency; + // Convert fixed-point number to decimal. + OS << Frequency / getEntryFrequency() << "."; + uint64_t Rem = Frequency % getEntryFrequency(); + uint64_t Eps = 1; + do { + Rem *= 10; + Eps *= 10; + OS << Rem / getEntryFrequency(); + Rem = Rem % getEntryFrequency(); + } while (Rem >= Eps/2); } namespace llvm { diff --git a/contrib/llvm/lib/Support/CommandLine.cpp b/contrib/llvm/lib/Support/CommandLine.cpp index 18d3db5..44a88d8 100644 --- a/contrib/llvm/lib/Support/CommandLine.cpp +++ b/contrib/llvm/lib/Support/CommandLine.cpp @@ -17,12 +17,14 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/CommandLine.h" +#include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Twine.h" #include "llvm/Config/config.h" +#include "llvm/Support/ConvertUTF.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Host.h" @@ -58,6 +60,7 @@ TEMPLATE_INSTANTIATION(class opt); TEMPLATE_INSTANTIATION(class opt); } } // end namespace llvm::cl +// Pin the vtables to this file. void GenericOptionValue::anchor() {} void OptionValue::anchor() {} void OptionValue::anchor() {} @@ -72,6 +75,7 @@ void parser::anchor() {} void parser::anchor() {} void parser::anchor() {} void parser::anchor() {} +void StringSaver::anchor() {} //===----------------------------------------------------------------------===// @@ -435,39 +439,248 @@ static bool EatsUnboundedNumberOfValues(const Option *O) { O->getNumOccurrencesFlag() == cl::OneOrMore; } -/// ParseCStringVector - Break INPUT up wherever one or more -/// whitespace characters are found, and store the resulting tokens in -/// OUTPUT. The tokens stored in OUTPUT are dynamically allocated -/// using strdup(), so it is the caller's responsibility to free() -/// them later. +static bool isWhitespace(char C) { + return strchr(" \t\n\r\f\v", C); +} + +static bool isQuote(char C) { + return C == '\"' || C == '\''; +} + +static bool isGNUSpecial(char C) { + return strchr("\\\"\' ", C); +} + +void cl::TokenizeGNUCommandLine(StringRef Src, StringSaver &Saver, + SmallVectorImpl &NewArgv) { + SmallString<128> Token; + for (size_t I = 0, E = Src.size(); I != E; ++I) { + // Consume runs of whitespace. + if (Token.empty()) { + while (I != E && isWhitespace(Src[I])) + ++I; + if (I == E) break; + } + + // Backslashes can escape backslashes, spaces, and other quotes. Otherwise + // they are literal. This makes it much easier to read Windows file paths. + if (I + 1 < E && Src[I] == '\\' && isGNUSpecial(Src[I + 1])) { + ++I; // Skip the escape. + Token.push_back(Src[I]); + continue; + } + + // Consume a quoted string. + if (isQuote(Src[I])) { + char Quote = Src[I++]; + while (I != E && Src[I] != Quote) { + // Backslashes are literal, unless they escape a special character. + if (Src[I] == '\\' && I + 1 != E && isGNUSpecial(Src[I + 1])) + ++I; + Token.push_back(Src[I]); + ++I; + } + if (I == E) break; + continue; + } + + // End the token if this is whitespace. + if (isWhitespace(Src[I])) { + if (!Token.empty()) + NewArgv.push_back(Saver.SaveString(Token.c_str())); + Token.clear(); + continue; + } + + // This is a normal character. Append it. + Token.push_back(Src[I]); + } + + // Append the last token after hitting EOF with no whitespace. + if (!Token.empty()) + NewArgv.push_back(Saver.SaveString(Token.c_str())); +} + +/// Backslashes are interpreted in a rather complicated way in the Windows-style +/// command line, because backslashes are used both to separate path and to +/// escape double quote. This method consumes runs of backslashes as well as the +/// following double quote if it's escaped. +/// +/// * If an even number of backslashes is followed by a double quote, one +/// backslash is output for every pair of backslashes, and the last double +/// quote remains unconsumed. The double quote will later be interpreted as +/// the start or end of a quoted string in the main loop outside of this +/// function. +/// +/// * If an odd number of backslashes is followed by a double quote, one +/// backslash is output for every pair of backslashes, and a double quote is +/// output for the last pair of backslash-double quote. The double quote is +/// consumed in this case. /// -static void ParseCStringVector(std::vector &OutputVector, - const char *Input) { - // Characters which will be treated as token separators: - StringRef Delims = " \v\f\t\r\n"; - - StringRef WorkStr(Input); - while (!WorkStr.empty()) { - // If the first character is a delimiter, strip them off. - if (Delims.find(WorkStr[0]) != StringRef::npos) { - size_t Pos = WorkStr.find_first_not_of(Delims); - if (Pos == StringRef::npos) Pos = WorkStr.size(); - WorkStr = WorkStr.substr(Pos); +/// * Otherwise, backslashes are interpreted literally. +static size_t parseBackslash(StringRef Src, size_t I, SmallString<128> &Token) { + size_t E = Src.size(); + int BackslashCount = 0; + // Skip the backslashes. + do { + ++I; + ++BackslashCount; + } while (I != E && Src[I] == '\\'); + + bool FollowedByDoubleQuote = (I != E && Src[I] == '"'); + if (FollowedByDoubleQuote) { + Token.append(BackslashCount / 2, '\\'); + if (BackslashCount % 2 == 0) + return I - 1; + Token.push_back('"'); + return I; + } + Token.append(BackslashCount, '\\'); + return I - 1; +} + +void cl::TokenizeWindowsCommandLine(StringRef Src, StringSaver &Saver, + SmallVectorImpl &NewArgv) { + SmallString<128> Token; + + // This is a small state machine to consume characters until it reaches the + // end of the source string. + enum { INIT, UNQUOTED, QUOTED } State = INIT; + for (size_t I = 0, E = Src.size(); I != E; ++I) { + // INIT state indicates that the current input index is at the start of + // the string or between tokens. + if (State == INIT) { + if (isWhitespace(Src[I])) + continue; + if (Src[I] == '"') { + State = QUOTED; + continue; + } + if (Src[I] == '\\') { + I = parseBackslash(Src, I, Token); + State = UNQUOTED; + continue; + } + Token.push_back(Src[I]); + State = UNQUOTED; continue; } - // Find position of first delimiter. - size_t Pos = WorkStr.find_first_of(Delims); - if (Pos == StringRef::npos) Pos = WorkStr.size(); + // UNQUOTED state means that it's reading a token not quoted by double + // quotes. + if (State == UNQUOTED) { + // Whitespace means the end of the token. + if (isWhitespace(Src[I])) { + NewArgv.push_back(Saver.SaveString(Token.c_str())); + Token.clear(); + State = INIT; + continue; + } + if (Src[I] == '"') { + State = QUOTED; + continue; + } + if (Src[I] == '\\') { + I = parseBackslash(Src, I, Token); + continue; + } + Token.push_back(Src[I]); + continue; + } - // Everything from 0 to Pos is the next word to copy. - char *NewStr = (char*)malloc(Pos+1); - memcpy(NewStr, WorkStr.data(), Pos); - NewStr[Pos] = 0; - OutputVector.push_back(NewStr); + // QUOTED state means that it's reading a token quoted by double quotes. + if (State == QUOTED) { + if (Src[I] == '"') { + State = UNQUOTED; + continue; + } + if (Src[I] == '\\') { + I = parseBackslash(Src, I, Token); + continue; + } + Token.push_back(Src[I]); + } + } + // Append the last token after hitting EOF with no whitespace. + if (!Token.empty()) + NewArgv.push_back(Saver.SaveString(Token.c_str())); +} - WorkStr = WorkStr.substr(Pos); +static bool ExpandResponseFile(const char *FName, StringSaver &Saver, + TokenizerCallback Tokenizer, + SmallVectorImpl &NewArgv) { + OwningPtr MemBuf; + if (MemoryBuffer::getFile(FName, MemBuf)) + return false; + StringRef Str(MemBuf->getBufferStart(), MemBuf->getBufferSize()); + + // If we have a UTF-16 byte order mark, convert to UTF-8 for parsing. + ArrayRef BufRef(MemBuf->getBufferStart(), MemBuf->getBufferEnd()); + std::string UTF8Buf; + if (hasUTF16ByteOrderMark(BufRef)) { + if (!convertUTF16ToUTF8String(BufRef, UTF8Buf)) + return false; + Str = StringRef(UTF8Buf); } + + // Tokenize the contents into NewArgv. + Tokenizer(Str, Saver, NewArgv); + + return true; +} + +/// \brief Expand response files on a command line recursively using the given +/// StringSaver and tokenization strategy. +bool cl::ExpandResponseFiles(StringSaver &Saver, TokenizerCallback Tokenizer, + SmallVectorImpl &Argv) { + unsigned RspFiles = 0; + bool AllExpanded = false; + + // Don't cache Argv.size() because it can change. + for (unsigned I = 0; I != Argv.size(); ) { + const char *Arg = Argv[I]; + if (Arg[0] != '@') { + ++I; + continue; + } + + // If we have too many response files, leave some unexpanded. This avoids + // crashing on self-referential response files. + if (RspFiles++ > 20) + return false; + + // Replace this response file argument with the tokenization of its + // contents. Nested response files are expanded in subsequent iterations. + // FIXME: If a nested response file uses a relative path, is it relative to + // the cwd of the process or the response file? + SmallVector ExpandedArgv; + if (!ExpandResponseFile(Arg + 1, Saver, Tokenizer, ExpandedArgv)) { + AllExpanded = false; + continue; + } + Argv.erase(Argv.begin() + I); + Argv.insert(Argv.begin() + I, ExpandedArgv.begin(), ExpandedArgv.end()); + } + return AllExpanded; +} + +namespace { + class StrDupSaver : public StringSaver { + std::vector Dups; + public: + ~StrDupSaver() { + for (std::vector::iterator I = Dups.begin(), E = Dups.end(); + I != E; ++I) { + char *Dup = *I; + free(Dup); + } + } + const char *SaveString(const char *Str) LLVM_OVERRIDE { + char *Dup = strdup(Str); + Dups.push_back(Dup); + return Dup; + } + }; } /// ParseEnvironmentOptions - An alternative entry point to the @@ -488,56 +701,15 @@ void cl::ParseEnvironmentOptions(const char *progName, const char *envVar, // Get program's "name", which we wouldn't know without the caller // telling us. - std::vector newArgv; - newArgv.push_back(strdup(progName)); + SmallVector newArgv; + StrDupSaver Saver; + newArgv.push_back(Saver.SaveString(progName)); // Parse the value of the environment variable into a "command line" // and hand it off to ParseCommandLineOptions(). - ParseCStringVector(newArgv, envValue); + TokenizeGNUCommandLine(envValue, Saver, newArgv); int newArgc = static_cast(newArgv.size()); ParseCommandLineOptions(newArgc, &newArgv[0], Overview); - - // Free all the strdup()ed strings. - for (std::vector::iterator i = newArgv.begin(), e = newArgv.end(); - i != e; ++i) - free(*i); -} - - -/// ExpandResponseFiles - Copy the contents of argv into newArgv, -/// substituting the contents of the response files for the arguments -/// of type @file. -static void ExpandResponseFiles(unsigned argc, const char*const* argv, - std::vector& newArgv) { - for (unsigned i = 1; i != argc; ++i) { - const char *arg = argv[i]; - - if (arg[0] == '@') { - sys::PathWithStatus respFile(++arg); - - // Check that the response file is not empty (mmap'ing empty - // files can be problematic). - const sys::FileStatus *FileStat = respFile.getFileStatus(); - if (FileStat && FileStat->getSize() != 0) { - - // If we could open the file, parse its contents, otherwise - // pass the @file option verbatim. - - // TODO: we should also support recursive loading of response files, - // since this is how gcc behaves. (From their man page: "The file may - // itself contain additional @file options; any such options will be - // processed recursively.") - - // Mmap the response file into memory. - OwningPtr respFilePtr; - if (!MemoryBuffer::getFile(respFile.c_str(), respFilePtr)) { - ParseCStringVector(newArgv, respFilePtr->getBufferStart()); - continue; - } - } - } - newArgv.push_back(strdup(arg)); - } } void cl::ParseCommandLineOptions(int argc, const char * const *argv, @@ -552,9 +724,11 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv, "No options specified!"); // Expand response files. - std::vector newArgv; - newArgv.push_back(strdup(argv[0])); - ExpandResponseFiles(argc, argv, newArgv); + SmallVector newArgv; + for (int i = 0; i != argc; ++i) + newArgv.push_back(argv[i]); + StrDupSaver Saver; + ExpandResponseFiles(Saver, TokenizeGNUCommandLine, newArgv); argv = &newArgv[0]; argc = static_cast(newArgv.size()); @@ -848,12 +1022,6 @@ void cl::ParseCommandLineOptions(int argc, const char * const *argv, PositionalOpts.clear(); MoreHelp->clear(); - // Free the memory allocated by ExpandResponseFiles. - // Free all the strdup()ed strings. - for (std::vector::iterator i = newArgv.begin(), e = newArgv.end(); - i != e; ++i) - free(*i); - // If we had an error processing our arguments, don't let the program execute if (ErrorParsing) exit(1); } @@ -913,11 +1081,20 @@ size_t alias::getOptionWidth() const { return std::strlen(ArgStr)+6; } +static void printHelpStr(StringRef HelpStr, size_t Indent, + size_t FirstLineIndentedBy) { + std::pair Split = HelpStr.split('\n'); + outs().indent(Indent - FirstLineIndentedBy) << " - " << Split.first << "\n"; + while (!Split.second.empty()) { + Split = Split.second.split('\n'); + outs().indent(Indent) << Split.first << "\n"; + } +} + // Print out the option for the alias. void alias::printOptionInfo(size_t GlobalWidth) const { - size_t L = std::strlen(ArgStr); outs() << " -" << ArgStr; - outs().indent(GlobalWidth-L-6) << " - " << HelpStr << "\n"; + printHelpStr(HelpStr, GlobalWidth, std::strlen(ArgStr) + 6); } //===----------------------------------------------------------------------===// @@ -946,7 +1123,7 @@ void basic_parser_impl::printOptionInfo(const Option &O, if (const char *ValName = getValueName()) outs() << "=<" << getValueStr(O, ValName) << '>'; - outs().indent(GlobalWidth-getOptionWidth(O)) << " - " << O.HelpStr << '\n'; + printHelpStr(O.HelpStr, GlobalWidth, getOptionWidth(O)); } void basic_parser_impl::printOptionName(const Option &O, @@ -1087,9 +1264,8 @@ size_t generic_parser_base::getOptionWidth(const Option &O) const { void generic_parser_base::printOptionInfo(const Option &O, size_t GlobalWidth) const { if (O.hasArgStr()) { - size_t L = std::strlen(O.ArgStr); outs() << " -" << O.ArgStr; - outs().indent(GlobalWidth-L-6) << " - " << O.HelpStr << '\n'; + printHelpStr(O.HelpStr, GlobalWidth, std::strlen(O.ArgStr) + 6); for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { size_t NumSpaces = GlobalWidth-strlen(getOption(i))-8; @@ -1100,9 +1276,9 @@ void generic_parser_base::printOptionInfo(const Option &O, if (O.HelpStr[0]) outs() << " " << O.HelpStr << '\n'; for (unsigned i = 0, e = getNumOptions(); i != e; ++i) { - size_t L = std::strlen(getOption(i)); - outs() << " -" << getOption(i); - outs().indent(GlobalWidth-L-8) << " - " << getDescription(i) << '\n'; + const char *Option = getOption(i); + outs() << " -" << Option; + printHelpStr(getDescription(i), GlobalWidth, std::strlen(Option) + 8); } } } diff --git a/contrib/llvm/lib/Support/Compression.cpp b/contrib/llvm/lib/Support/Compression.cpp index fd8a874..b5ddb70 100644 --- a/contrib/llvm/lib/Support/Compression.cpp +++ b/contrib/llvm/lib/Support/Compression.cpp @@ -81,6 +81,10 @@ zlib::Status zlib::uncompress(StringRef InputBuffer, return Res; } +uint32_t zlib::crc32(StringRef Buffer) { + return ::crc32(0, (const Bytef *)Buffer.data(), Buffer.size()); +} + #else bool zlib::isAvailable() { return false; } zlib::Status zlib::compress(StringRef InputBuffer, @@ -93,5 +97,8 @@ zlib::Status zlib::uncompress(StringRef InputBuffer, size_t UncompressedSize) { return zlib::StatusUnsupported; } +uint32_t zlib::crc32(StringRef Buffer) { + llvm_unreachable("zlib::crc32 is unavailable"); +} #endif diff --git a/contrib/llvm/lib/Support/ConstantRange.cpp b/contrib/llvm/lib/Support/ConstantRange.cpp index 5c58950..265b6e9 100644 --- a/contrib/llvm/lib/Support/ConstantRange.cpp +++ b/contrib/llvm/lib/Support/ConstantRange.cpp @@ -38,13 +38,14 @@ ConstantRange::ConstantRange(uint32_t BitWidth, bool Full) { /// Initialize a range to hold the single specified value. /// -ConstantRange::ConstantRange(const APInt &V) : Lower(V), Upper(V + 1) {} +ConstantRange::ConstantRange(APIntMoveTy V) + : Lower(llvm_move(V)), Upper(Lower + 1) {} -ConstantRange::ConstantRange(const APInt &L, const APInt &U) : - Lower(L), Upper(U) { - assert(L.getBitWidth() == U.getBitWidth() && +ConstantRange::ConstantRange(APIntMoveTy L, APIntMoveTy U) + : Lower(llvm_move(L)), Upper(llvm_move(U)) { + assert(Lower.getBitWidth() == Upper.getBitWidth() && "ConstantRange with unequal bit widths"); - assert((L != U || (L.isMaxValue() || L.isMinValue())) && + assert((Lower != Upper || (Lower.isMaxValue() || Lower.isMinValue())) && "Lower == Upper, but they aren't min or max value!"); } @@ -143,9 +144,6 @@ bool ConstantRange::isSignWrappedSet() const { /// getSetSize - Return the number of elements in this set. /// APInt ConstantRange::getSetSize() const { - if (isEmptySet()) - return APInt(getBitWidth()+1, 0); - if (isFullSet()) { APInt Size(getBitWidth()+1, 0); Size.setBit(getBitWidth()); @@ -432,7 +430,7 @@ ConstantRange ConstantRange::zeroExtend(uint32_t DstTySize) const { APInt LowerExt(DstTySize, 0); if (!Upper) // special case: [X, 0) -- not really wrapping around LowerExt = Lower.zext(DstTySize); - return ConstantRange(LowerExt, APInt(DstTySize, 1).shl(SrcTySize)); + return ConstantRange(LowerExt, APInt::getOneBitSet(DstTySize, SrcTySize)); } return ConstantRange(Lower.zext(DstTySize), Upper.zext(DstTySize)); @@ -447,6 +445,11 @@ ConstantRange ConstantRange::signExtend(uint32_t DstTySize) const { unsigned SrcTySize = getBitWidth(); assert(SrcTySize < DstTySize && "Not a value extension"); + + // special case: [X, INT_MIN) -- not really wrapping around + if (Upper.isMinSignedValue()) + return ConstantRange(Lower.sext(DstTySize), Upper.zext(DstTySize)); + if (isFullSet() || isSignWrappedSet()) { return ConstantRange(APInt::getHighBitsSet(DstTySize,DstTySize-SrcTySize+1), APInt::getLowBitsSet(DstTySize, SrcTySize-1) + 1); diff --git a/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp b/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp index 458fbb0..e45335d 100644 --- a/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp +++ b/contrib/llvm/lib/Support/ConvertUTFWrapper.cpp @@ -8,6 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/SwapByteOrder.h" +#include +#include namespace llvm { @@ -72,5 +75,57 @@ bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr) { return true; } +bool hasUTF16ByteOrderMark(ArrayRef S) { + return (S.size() >= 2 && + ((S[0] == '\xff' && S[1] == '\xfe') || + (S[0] == '\xfe' && S[1] == '\xff'))); +} + +bool convertUTF16ToUTF8String(ArrayRef SrcBytes, std::string &Out) { + assert(Out.empty()); + + // Error out on an uneven byte count. + if (SrcBytes.size() % 2) + return false; + + // Avoid OOB by returning early on empty input. + if (SrcBytes.empty()) + return true; + + const UTF16 *Src = reinterpret_cast(SrcBytes.begin()); + const UTF16 *SrcEnd = reinterpret_cast(SrcBytes.end()); + + // Byteswap if necessary. + std::vector ByteSwapped; + if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_SWAPPED) { + ByteSwapped.insert(ByteSwapped.end(), Src, SrcEnd); + for (unsigned I = 0, E = ByteSwapped.size(); I != E; ++I) + ByteSwapped[I] = llvm::sys::SwapByteOrder_16(ByteSwapped[I]); + Src = &ByteSwapped[0]; + SrcEnd = &ByteSwapped[ByteSwapped.size() - 1] + 1; + } + + // Skip the BOM for conversion. + if (Src[0] == UNI_UTF16_BYTE_ORDER_MARK_NATIVE) + Src++; + + // Just allocate enough space up front. We'll shrink it later. + Out.resize(SrcBytes.size() * UNI_MAX_UTF8_BYTES_PER_CODE_POINT); + UTF8 *Dst = reinterpret_cast(&Out[0]); + UTF8 *DstEnd = Dst + Out.size(); + + ConversionResult CR = + ConvertUTF16toUTF8(&Src, SrcEnd, &Dst, DstEnd, strictConversion); + assert(CR != targetExhausted); + + if (CR != conversionOK) { + Out.clear(); + return false; + } + + Out.resize(reinterpret_cast(Dst) - &Out[0]); + return true; +} + } // end namespace llvm diff --git a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp index 182c362..92c370d 100644 --- a/contrib/llvm/lib/Support/CrashRecoveryContext.cpp +++ b/contrib/llvm/lib/Support/CrashRecoveryContext.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" #include "llvm/Support/ThreadLocal.h" #include @@ -21,27 +22,34 @@ namespace { struct CrashRecoveryContextImpl; -static sys::ThreadLocal CurrentContext; +static ManagedStatic > CurrentContext; struct CrashRecoveryContextImpl { CrashRecoveryContext *CRC; std::string Backtrace; ::jmp_buf JumpBuffer; volatile unsigned Failed : 1; + unsigned SwitchedThread : 1; public: CrashRecoveryContextImpl(CrashRecoveryContext *CRC) : CRC(CRC), - Failed(false) { - CurrentContext.set(this); + Failed(false), + SwitchedThread(false) { + CurrentContext->set(this); } ~CrashRecoveryContextImpl() { - CurrentContext.erase(); + if (!SwitchedThread) + CurrentContext->erase(); } + /// \brief Called when the separate crash-recovery thread was finished, to + /// indicate that we don't need to clear the thread-local CurrentContext. + void setSwitchedThread() { SwitchedThread = true; } + void HandleCrash() { // Eliminate the current context entry, to avoid re-entering in case the // cleanup code crashes. - CurrentContext.erase(); + CurrentContext->erase(); assert(!Failed && "Crash recovery context already failed!"); Failed = true; @@ -55,10 +63,10 @@ public: } -static sys::Mutex gCrashRecoveryContexMutex; +static ManagedStatic gCrashRecoveryContextMutex; static bool gCrashRecoveryEnabled = false; -static sys::ThreadLocal +static ManagedStatic > tlIsRecoveringFromCrash; CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} @@ -66,7 +74,7 @@ CrashRecoveryContextCleanup::~CrashRecoveryContextCleanup() {} CrashRecoveryContext::~CrashRecoveryContext() { // Reclaim registered resources. CrashRecoveryContextCleanup *i = head; - tlIsRecoveringFromCrash.set(head); + tlIsRecoveringFromCrash->set(head); while (i) { CrashRecoveryContextCleanup *tmp = i; i = tmp->next; @@ -74,21 +82,21 @@ CrashRecoveryContext::~CrashRecoveryContext() { tmp->recoverResources(); delete tmp; } - tlIsRecoveringFromCrash.erase(); + tlIsRecoveringFromCrash->erase(); CrashRecoveryContextImpl *CRCI = (CrashRecoveryContextImpl *) Impl; delete CRCI; } bool CrashRecoveryContext::isRecoveringFromCrash() { - return tlIsRecoveringFromCrash.get() != 0; + return tlIsRecoveringFromCrash->get() != 0; } CrashRecoveryContext *CrashRecoveryContext::GetCurrent() { if (!gCrashRecoveryEnabled) return 0; - const CrashRecoveryContextImpl *CRCI = CurrentContext.get(); + const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); if (!CRCI) return 0; @@ -147,7 +155,7 @@ CrashRecoveryContext::unregisterCleanup(CrashRecoveryContextCleanup *cleanup) { static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) { // Lookup the current thread local recovery object. - const CrashRecoveryContextImpl *CRCI = CurrentContext.get(); + const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); if (!CRCI) { // Something has gone horribly wrong, so let's just tell everyone @@ -175,7 +183,7 @@ static LONG CALLBACK ExceptionHandler(PEXCEPTION_POINTERS ExceptionInfo) static sys::ThreadLocal sCurrentExceptionHandle; void CrashRecoveryContext::Enable() { - sys::ScopedLock L(gCrashRecoveryContexMutex); + sys::ScopedLock L(*gCrashRecoveryContextMutex); if (gCrashRecoveryEnabled) return; @@ -191,7 +199,7 @@ void CrashRecoveryContext::Enable() { } void CrashRecoveryContext::Disable() { - sys::ScopedLock L(gCrashRecoveryContexMutex); + sys::ScopedLock L(*gCrashRecoveryContextMutex); if (!gCrashRecoveryEnabled) return; @@ -229,7 +237,7 @@ static struct sigaction PrevActions[NumSignals]; static void CrashRecoverySignalHandler(int Signal) { // Lookup the current thread local recovery object. - const CrashRecoveryContextImpl *CRCI = CurrentContext.get(); + const CrashRecoveryContextImpl *CRCI = CurrentContext->get(); if (!CRCI) { // We didn't find a crash recovery context -- this means either we got a @@ -260,7 +268,7 @@ static void CrashRecoverySignalHandler(int Signal) { } void CrashRecoveryContext::Enable() { - sys::ScopedLock L(gCrashRecoveryContexMutex); + sys::ScopedLock L(*gCrashRecoveryContextMutex); if (gCrashRecoveryEnabled) return; @@ -279,7 +287,7 @@ void CrashRecoveryContext::Enable() { } void CrashRecoveryContext::Disable() { - sys::ScopedLock L(gCrashRecoveryContexMutex); + sys::ScopedLock L(*gCrashRecoveryContextMutex); if (!gCrashRecoveryEnabled) return; @@ -342,5 +350,7 @@ bool CrashRecoveryContext::RunSafelyOnThread(void (*Fn)(void*), void *UserData, unsigned RequestedStackSize) { RunSafelyOnThreadInfo Info = { Fn, UserData, this, false }; llvm_execute_on_thread(RunSafelyOnThread_Dispatch, &Info, RequestedStackSize); + if (CrashRecoveryContextImpl *CRC = (CrashRecoveryContextImpl *)Impl) + CRC->setSwitchedThread(); return Info.Result; } diff --git a/contrib/llvm/lib/Support/DataStream.cpp b/contrib/llvm/lib/Support/DataStream.cpp index 0a02281..0bd0c68 100644 --- a/contrib/llvm/lib/Support/DataStream.cpp +++ b/contrib/llvm/lib/Support/DataStream.cpp @@ -17,6 +17,7 @@ #define DEBUG_TYPE "Data-stream" #include "llvm/Support/DataStream.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Program.h" #include "llvm/Support/system_error.h" #include @@ -27,7 +28,6 @@ #else #include #endif -#include using namespace llvm; // Interface goals: @@ -66,18 +66,11 @@ public: error_code OpenFile(const std::string &Filename) { if (Filename == "-") { Fd = 0; - sys::Program::ChangeStdinToBinary(); + sys::ChangeStdinToBinary(); return error_code::success(); } - - int OpenFlags = O_RDONLY; -#ifdef O_BINARY - OpenFlags |= O_BINARY; // Open input file in binary mode on win32. -#endif - Fd = ::open(Filename.c_str(), OpenFlags); - if (Fd == -1) - return error_code(errno, posix_category()); - return error_code::success(); + + return sys::fs::openFileForRead(Filename, Fd); } }; diff --git a/contrib/llvm/lib/Support/Disassembler.cpp b/contrib/llvm/lib/Support/Disassembler.cpp index b3244fa..27df3a9 100644 --- a/contrib/llvm/lib/Support/Disassembler.cpp +++ b/contrib/llvm/lib/Support/Disassembler.cpp @@ -41,10 +41,10 @@ bool llvm::sys::hasDisassembler() std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length, uint64_t pc) { - std::stringstream res; - #if (defined (__i386__) || defined (__amd64__) || defined (__x86_64__)) \ && USE_UDIS86 + std::stringstream res; + unsigned bits; # if defined(__i386__) bits = 32; @@ -66,9 +66,9 @@ std::string llvm::sys::disassembleBuffer(uint8_t* start, size_t length, while (ud_disassemble(&ud_obj)) { res << ud_insn_off(&ud_obj) << ":\t" << ud_insn_asm(&ud_obj) << "\n"; } -#else - res << "No disassembler available. See configure help for options.\n"; -#endif return res.str(); +#else + return "No disassembler available. See configure help for options.\n"; +#endif } diff --git a/contrib/llvm/lib/Support/Dwarf.cpp b/contrib/llvm/lib/Support/Dwarf.cpp index 0a24883..c000b63 100644 --- a/contrib/llvm/lib/Support/Dwarf.cpp +++ b/contrib/llvm/lib/Support/Dwarf.cpp @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" + using namespace llvm; using namespace dwarf; @@ -59,8 +61,8 @@ const char *llvm::dwarf::TagString(unsigned Tag) { case DW_TAG_namelist_item: return "DW_TAG_namelist_item"; case DW_TAG_packed_type: return "DW_TAG_packed_type"; case DW_TAG_subprogram: return "DW_TAG_subprogram"; - case DW_TAG_template_type_parameter: return "DW_TAG_template_type_parameter"; - case DW_TAG_template_value_parameter:return "DW_TAG_template_value_parameter"; + case DW_TAG_template_type_parameter: return "DW_TAG_template_type_parameter"; + case DW_TAG_template_value_parameter: return "DW_TAG_template_value_parameter"; case DW_TAG_thrown_type: return "DW_TAG_thrown_type"; case DW_TAG_try_block: return "DW_TAG_try_block"; case DW_TAG_variant_part: return "DW_TAG_variant_part"; @@ -230,6 +232,7 @@ const char *llvm::dwarf::AttributeString(unsigned Attribute) { case DW_AT_body_end: return "DW_AT_body_end"; case DW_AT_GNU_vector: return "DW_AT_GNU_vector"; case DW_AT_GNU_template_name: return "DW_AT_GNU_template_name"; + case DW_AT_GNU_odr_signature: return "DW_AT_GNU_odr_signature"; case DW_AT_MIPS_assumed_size: return "DW_AT_MIPS_assumed_size"; case DW_AT_lo_user: return "DW_AT_lo_user"; case DW_AT_hi_user: return "DW_AT_hi_user"; @@ -723,3 +726,51 @@ const char *llvm::dwarf::CallFrameString(unsigned Encoding) { } return 0; } + +const char *llvm::dwarf::AtomTypeString(unsigned AT) { + switch (AT) { + case dwarf::DW_ATOM_null: + return "DW_ATOM_null"; + case dwarf::DW_ATOM_die_offset: + return "DW_ATOM_die_offset"; + case DW_ATOM_cu_offset: + return "DW_ATOM_cu_offset"; + case DW_ATOM_die_tag: + return "DW_ATOM_die_tag"; + case DW_ATOM_type_flags: + return "DW_ATOM_type_flags"; + } + return 0; +} + +const char *llvm::dwarf::GDBIndexEntryKindString(GDBIndexEntryKind Kind) { + switch (Kind) { + case GIEK_NONE: + return "NONE"; + case GIEK_TYPE: + return "TYPE"; + case GIEK_VARIABLE: + return "VARIABLE"; + case GIEK_FUNCTION: + return "FUNCTION"; + case GIEK_OTHER: + return "OTHER"; + case GIEK_UNUSED5: + return "UNUSED5"; + case GIEK_UNUSED6: + return "UNUSED6"; + case GIEK_UNUSED7: + return "UNUSED7"; + } + llvm_unreachable("Unknown GDBIndexEntryKind value"); +} + +const char *llvm::dwarf::GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage) { + switch (Linkage) { + case GIEL_EXTERNAL: + return "EXTERNAL"; + case GIEL_STATIC: + return "STATIC"; + } + llvm_unreachable("Unknown GDBIndexEntryLinkage value"); +} diff --git a/contrib/llvm/lib/Support/DynamicLibrary.cpp b/contrib/llvm/lib/Support/DynamicLibrary.cpp index f14cb45..a825c68 100644 --- a/contrib/llvm/lib/Support/DynamicLibrary.cpp +++ b/contrib/llvm/lib/Support/DynamicLibrary.cpp @@ -14,39 +14,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/ManagedStatic.h" #include "llvm/ADT/DenseSet.h" #include "llvm/ADT/StringMap.h" #include "llvm/Config/config.h" #include "llvm/Support/Mutex.h" +#include "llvm-c/Support.h" #include #include // Collection of symbol name/value pairs to be searched prior to any libraries. -static llvm::StringMap *ExplicitSymbols = 0; - -namespace { - -struct ExplicitSymbolsDeleter { - ~ExplicitSymbolsDeleter() { - delete ExplicitSymbols; - } -}; - -} - -static ExplicitSymbolsDeleter Dummy; - - -static llvm::sys::SmartMutex& getMutex() { - static llvm::sys::SmartMutex HandlesMutex; - return HandlesMutex; -} +static llvm::ManagedStatic > ExplicitSymbols; +static llvm::ManagedStatic > SymbolsMutex; void llvm::sys::DynamicLibrary::AddSymbol(StringRef symbolName, void *symbolValue) { - SmartScopedLock lock(getMutex()); - if (ExplicitSymbols == 0) - ExplicitSymbols = new StringMap(); + SmartScopedLock lock(*SymbolsMutex); (*ExplicitSymbols)[symbolName] = symbolValue; } @@ -72,7 +55,7 @@ static DenseSet *OpenedHandles = 0; DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, std::string *errMsg) { - SmartScopedLock lock(getMutex()); + SmartScopedLock lock(*SymbolsMutex); void *handle = dlopen(filename, RTLD_LAZY|RTLD_GLOBAL); if (handle == 0) { @@ -126,10 +109,10 @@ void *SearchForAddressOfSpecialSymbol(const char* symbolName); } void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { - SmartScopedLock Lock(getMutex()); + SmartScopedLock Lock(*SymbolsMutex); // First check symbols added via AddSymbol(). - if (ExplicitSymbols) { + if (ExplicitSymbols.isConstructed()) { StringMap::iterator i = ExplicitSymbols->find(symbolName); if (i != ExplicitSymbols->end()) @@ -187,3 +170,11 @@ void* DynamicLibrary::SearchForAddressOfSymbol(const char *symbolName) { } #endif // LLVM_ON_WIN32 + +//===----------------------------------------------------------------------===// +// C API. +//===----------------------------------------------------------------------===// + +LLVMBool LLVMLoadLibraryPermanently(const char* Filename) { + return llvm::sys::DynamicLibrary::LoadLibraryPermanently(Filename); +} diff --git a/contrib/llvm/lib/Support/Errno.cpp b/contrib/llvm/lib/Support/Errno.cpp index 730220f..1eefa3e 100644 --- a/contrib/llvm/lib/Support/Errno.cpp +++ b/contrib/llvm/lib/Support/Errno.cpp @@ -14,8 +14,6 @@ #include "llvm/Support/Errno.h" #include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/raw_ostream.h" - -#if HAVE_STRING_H #include #if HAVE_ERRNO_H @@ -41,28 +39,27 @@ std::string StrError(int errnum) { char buffer[MaxErrStrLen]; buffer[0] = '\0'; std::string str; + if (errnum == 0) + return str; + #ifdef HAVE_STRERROR_R // strerror_r is thread-safe. - if (errnum) -# if defined(__GLIBC__) && defined(_GNU_SOURCE) - // glibc defines its own incompatible version of strerror_r - // which may not use the buffer supplied. - str = strerror_r(errnum,buffer,MaxErrStrLen-1); -# else - strerror_r(errnum,buffer,MaxErrStrLen-1); - str = buffer; -# endif +#if defined(__GLIBC__) && defined(_GNU_SOURCE) + // glibc defines its own incompatible version of strerror_r + // which may not use the buffer supplied. + str = strerror_r(errnum, buffer, MaxErrStrLen - 1); +#else + strerror_r(errnum, buffer, MaxErrStrLen - 1); + str = buffer; +#endif #elif HAVE_DECL_STRERROR_S // "Windows Secure API" - if (errnum) { - strerror_s(buffer, MaxErrStrLen - 1, errnum); - str = buffer; - } + strerror_s(buffer, MaxErrStrLen - 1, errnum); + str = buffer; #elif defined(HAVE_STRERROR) // Copy the thread un-safe result of strerror into // the buffer as fast as possible to minimize impact // of collision of strerror in multiple threads. - if (errnum) - str = strerror(errnum); + str = strerror(errnum); #else // Strange that this system doesn't even have strerror // but, oh well, just use a generic message @@ -75,5 +72,3 @@ std::string StrError(int errnum) { } // namespace sys } // namespace llvm - -#endif // HAVE_STRING_H diff --git a/contrib/llvm/lib/Support/ErrorHandling.cpp b/contrib/llvm/lib/Support/ErrorHandling.cpp index f4b591e..1eafb96 100644 --- a/contrib/llvm/lib/Support/ErrorHandling.cpp +++ b/contrib/llvm/lib/Support/ErrorHandling.cpp @@ -20,6 +20,7 @@ #include "llvm/Support/Signals.h" #include "llvm/Support/Threading.h" #include "llvm/Support/raw_ostream.h" +#include "llvm-c/Core.h" #include #include @@ -96,4 +97,25 @@ void llvm::llvm_unreachable_internal(const char *msg, const char *file, dbgs() << " at " << file << ":" << line; dbgs() << "!\n"; abort(); +#ifdef LLVM_BUILTIN_UNREACHABLE + // Windows systems and possibly others don't declare abort() to be noreturn, + // so use the unreachable builtin to avoid a Clang self-host warning. + LLVM_BUILTIN_UNREACHABLE; +#endif +} + +static void bindingsErrorHandler(void *user_data, const std::string& reason, + bool gen_crash_diag) { + LLVMFatalErrorHandler handler = + LLVM_EXTENSION reinterpret_cast(user_data); + handler(reason.c_str()); +} + +void LLVMInstallFatalErrorHandler(LLVMFatalErrorHandler Handler) { + install_fatal_error_handler(bindingsErrorHandler, + LLVM_EXTENSION reinterpret_cast(Handler)); +} + +void LLVMResetFatalErrorHandler() { + remove_fatal_error_handler(); } diff --git a/contrib/llvm/lib/Support/FileOutputBuffer.cpp b/contrib/llvm/lib/Support/FileOutputBuffer.cpp index 1ee69b6..ed084fa 100644 --- a/contrib/llvm/lib/Support/FileOutputBuffer.cpp +++ b/contrib/llvm/lib/Support/FileOutputBuffer.cpp @@ -62,11 +62,16 @@ error_code FileOutputBuffer::create(StringRef FilePath, if (EC) return EC; + unsigned Mode = sys::fs::all_read | sys::fs::all_write; + // If requested, make the output file executable. + if (Flags & F_executable) + Mode |= sys::fs::all_exe; + // Create new file in same directory but with random name. SmallString<128> TempFilePath; int FD; - EC = sys::fs::unique_file(Twine(FilePath) + ".tmp%%%%%%%", - FD, TempFilePath, false, 0644); + EC = sys::fs::createUniqueFile(Twine(FilePath) + ".tmp%%%%%%%", FD, + TempFilePath, Mode); if (EC) return EC; @@ -75,26 +80,6 @@ error_code FileOutputBuffer::create(StringRef FilePath, if (EC) return EC; - // If requested, make the output file executable. - if ( Flags & F_executable ) { - sys::fs::file_status Stat2; - EC = sys::fs::status(Twine(TempFilePath), Stat2); - if (EC) - return EC; - - sys::fs::perms new_perms = Stat2.permissions(); - if ( new_perms & sys::fs::owner_read ) - new_perms |= sys::fs::owner_exe; - if ( new_perms & sys::fs::group_read ) - new_perms |= sys::fs::group_exe; - if ( new_perms & sys::fs::others_read ) - new_perms |= sys::fs::others_exe; - new_perms |= sys::fs::add_perms; - EC = sys::fs::permissions(Twine(TempFilePath), new_perms); - if (EC) - return EC; - } - Result.reset(new FileOutputBuffer(MappedFile.get(), FilePath, TempFilePath)); if (Result) MappedFile.take(); diff --git a/contrib/llvm/lib/Support/FileUtilities.cpp b/contrib/llvm/lib/Support/FileUtilities.cpp index 4d7b239..7f5d540 100644 --- a/contrib/llvm/lib/Support/FileUtilities.cpp +++ b/contrib/llvm/lib/Support/FileUtilities.cpp @@ -171,43 +171,20 @@ static bool CompareNumbers(const char *&F1P, const char *&F2P, /// error occurs, allowing the caller to distinguish between a failed diff and a /// file system error. /// -int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA, - const sys::PathWithStatus &FileB, +int llvm::DiffFilesWithTolerance(StringRef NameA, + StringRef NameB, double AbsTol, double RelTol, std::string *Error) { - const sys::FileStatus *FileAStat = FileA.getFileStatus(false, Error); - if (!FileAStat) - return 2; - const sys::FileStatus *FileBStat = FileB.getFileStatus(false, Error); - if (!FileBStat) - return 2; - - // Check for zero length files because some systems croak when you try to - // mmap an empty file. - size_t A_size = FileAStat->getSize(); - size_t B_size = FileBStat->getSize(); - - // If they are both zero sized then they're the same - if (A_size == 0 && B_size == 0) - return 0; - - // If only one of them is zero sized then they can't be the same - if ((A_size == 0 || B_size == 0)) { - if (Error) - *Error = "Files differ: one is zero-sized, the other isn't"; - return 1; - } - // Now its safe to mmap the files into memory because both files // have a non-zero size. OwningPtr F1; - if (error_code ec = MemoryBuffer::getFile(FileA.c_str(), F1)) { + if (error_code ec = MemoryBuffer::getFile(NameA, F1)) { if (Error) *Error = ec.message(); return 2; } OwningPtr F2; - if (error_code ec = MemoryBuffer::getFile(FileB.c_str(), F2)) { + if (error_code ec = MemoryBuffer::getFile(NameB, F2)) { if (Error) *Error = ec.message(); return 2; @@ -220,6 +197,8 @@ int llvm::DiffFilesWithTolerance(const sys::PathWithStatus &FileA, const char *File2End = F2->getBufferEnd(); const char *F1P = File1Start; const char *F2P = File2Start; + uint64_t A_size = F1->getBufferSize(); + uint64_t B_size = F2->getBufferSize(); // Are the buffers identical? Common case: Handle this efficiently. if (A_size == B_size && diff --git a/contrib/llvm/lib/Support/FormattedStream.cpp b/contrib/llvm/lib/Support/FormattedStream.cpp index 231ae48..9febf66 100644 --- a/contrib/llvm/lib/Support/FormattedStream.cpp +++ b/contrib/llvm/lib/Support/FormattedStream.cpp @@ -17,38 +17,43 @@ using namespace llvm; -/// CountColumns - Examine the given char sequence and figure out which -/// column we end up in after output. +/// UpdatePosition - Examine the given char sequence and figure out which +/// column we end up in after output, and how many line breaks are contained. /// -static unsigned CountColumns(unsigned Column, const char *Ptr, size_t Size) { - // Keep track of the current column by scanning the string for - // special characters +static void UpdatePosition(std::pair &Position, const char *Ptr, size_t Size) { + unsigned &Column = Position.first; + unsigned &Line = Position.second; + // Keep track of the current column and line by scanning the string for + // special characters for (const char *End = Ptr + Size; Ptr != End; ++Ptr) { ++Column; - if (*Ptr == '\n' || *Ptr == '\r') + switch (*Ptr) { + case '\n': + Line += 1; + case '\r': Column = 0; - else if (*Ptr == '\t') + break; + case '\t': // Assumes tab stop = 8 characters. Column += (8 - (Column & 0x7)) & 0x7; + break; + } } - - return Column; } -/// ComputeColumn - Examine the current output and figure out which -/// column we end up in after output. -void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) { +/// ComputePosition - Examine the current output and update line and column +/// counts. +void formatted_raw_ostream::ComputePosition(const char *Ptr, size_t Size) { // If our previous scan pointer is inside the buffer, assume we already // scanned those bytes. This depends on raw_ostream to not change our buffer // in unexpected ways. - if (Ptr <= Scanned && Scanned <= Ptr + Size) { + if (Ptr <= Scanned && Scanned <= Ptr + Size) // Scan all characters added since our last scan to determine the new // column. - ColumnScanned = CountColumns(ColumnScanned, Scanned, - Size - (Scanned - Ptr)); - } else - ColumnScanned = CountColumns(ColumnScanned, Ptr, Size); + UpdatePosition(Position, Scanned, Size - (Scanned - Ptr)); + else + UpdatePosition(Position, Ptr, Size); // Update the scanning pointer. Scanned = Ptr + Size; @@ -60,16 +65,16 @@ void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) { /// formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) { // Figure out what's in the buffer and add it to the column count. - ComputeColumn(getBufferStart(), GetNumBytesInBuffer()); + ComputePosition(getBufferStart(), GetNumBytesInBuffer()); // Output spaces until we reach the desired column. - indent(std::max(int(NewCol - ColumnScanned), 1)); + indent(std::max(int(NewCol - getColumn()), 1)); return *this; } void formatted_raw_ostream::write_impl(const char *Ptr, size_t Size) { // Figure out what's in the buffer and add it to the column count. - ComputeColumn(Ptr, Size); + ComputePosition(Ptr, Size); // Write the data to the underlying stream (which is unbuffered, so // the data will be immediately written out). diff --git a/contrib/llvm/lib/Support/GraphWriter.cpp b/contrib/llvm/lib/Support/GraphWriter.cpp index bff182f..85be415 100644 --- a/contrib/llvm/lib/Support/GraphWriter.cpp +++ b/contrib/llvm/lib/Support/GraphWriter.cpp @@ -14,6 +14,7 @@ #include "llvm/Support/GraphWriter.h" #include "llvm/Config/config.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Path.h" #include "llvm/Support/Program.h" using namespace llvm; @@ -64,31 +65,46 @@ StringRef llvm::DOT::getColorString(unsigned ColorNumber) { return Colors[ColorNumber % NumColors]; } +std::string llvm::createGraphFilename(const Twine &Name, int &FD) { + FD = -1; + SmallString<128> Filename; + error_code EC = sys::fs::createTemporaryFile(Name, "dot", FD, Filename); + if (EC) { + errs() << "Error: " << EC.message() << "\n"; + return ""; + } + + errs() << "Writing '" << Filename << "'... "; + return Filename.str(); +} + // Execute the graph viewer. Return true if successful. static bool LLVM_ATTRIBUTE_UNUSED -ExecGraphViewer(const sys::Path &ExecPath, std::vector &args, - const sys::Path &Filename, bool wait, std::string &ErrMsg) { +ExecGraphViewer(StringRef ExecPath, std::vector &args, + StringRef Filename, bool wait, std::string &ErrMsg) { if (wait) { - if (sys::Program::ExecuteAndWait(ExecPath, &args[0],0,0,0,0,&ErrMsg)) { + if (sys::ExecuteAndWait(ExecPath, &args[0],0,0,0,0,&ErrMsg)) { errs() << "Error: " << ErrMsg << "\n"; return false; } - Filename.eraseFromDisk(); + bool Existed; + sys::fs::remove(Filename, Existed); errs() << " done. \n"; } else { - sys::Program::ExecuteNoWait(ExecPath, &args[0],0,0,0,&ErrMsg); + sys::ExecuteNoWait(ExecPath, &args[0],0,0,0,&ErrMsg); errs() << "Remember to erase graph file: " << Filename.str() << "\n"; } return true; } -void llvm::DisplayGraph(const sys::Path &Filename, bool wait, +void llvm::DisplayGraph(StringRef FilenameRef, bool wait, GraphProgram::Name program) { + std::string Filename = FilenameRef; wait &= !ViewBackground; std::string ErrMsg; #if HAVE_GRAPHVIZ - sys::Path Graphviz(LLVM_PATH_GRAPHVIZ); + std::string Graphviz(LLVM_PATH_GRAPHVIZ); std::vector args; args.push_back(Graphviz.c_str()); @@ -99,9 +115,9 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, if (!ExecGraphViewer(Graphviz, args, Filename, wait, ErrMsg)) return; -#elif HAVE_XDOT_PY +#elif HAVE_XDOT std::vector args; - args.push_back(LLVM_PATH_XDOT_PY); + args.push_back(LLVM_PATH_XDOT); args.push_back(Filename.c_str()); switch (program) { @@ -115,53 +131,51 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, args.push_back(0); errs() << "Running 'xdot.py' program... "; - if (!ExecGraphViewer(sys::Path(LLVM_PATH_XDOT_PY), args, Filename, wait, ErrMsg)) + if (!ExecGraphViewer(LLVM_PATH_XDOT, args, Filename, wait, ErrMsg)) return; #elif (HAVE_GV && (HAVE_DOT || HAVE_FDP || HAVE_NEATO || \ HAVE_TWOPI || HAVE_CIRCO)) - sys::Path PSFilename = Filename; - PSFilename.appendSuffix("ps"); - - sys::Path prog; + std::string PSFilename = Filename + ".ps"; + std::string prog; // Set default grapher #if HAVE_CIRCO - prog = sys::Path(LLVM_PATH_CIRCO); + prog = LLVM_PATH_CIRCO; #endif #if HAVE_TWOPI - prog = sys::Path(LLVM_PATH_TWOPI); + prog = LLVM_PATH_TWOPI; #endif #if HAVE_NEATO - prog = sys::Path(LLVM_PATH_NEATO); + prog = LLVM_PATH_NEATO; #endif #if HAVE_FDP - prog = sys::Path(LLVM_PATH_FDP); + prog = LLVM_PATH_FDP; #endif #if HAVE_DOT - prog = sys::Path(LLVM_PATH_DOT); + prog = LLVM_PATH_DOT; #endif // Find which program the user wants #if HAVE_DOT if (program == GraphProgram::DOT) - prog = sys::Path(LLVM_PATH_DOT); + prog = LLVM_PATH_DOT; #endif #if (HAVE_FDP) if (program == GraphProgram::FDP) - prog = sys::Path(LLVM_PATH_FDP); + prog = LLVM_PATH_FDP; #endif #if (HAVE_NEATO) if (program == GraphProgram::NEATO) - prog = sys::Path(LLVM_PATH_NEATO); + prog = LLVM_PATH_NEATO; #endif #if (HAVE_TWOPI) if (program == GraphProgram::TWOPI) - prog = sys::Path(LLVM_PATH_TWOPI); + prog = LLVM_PATH_TWOPI; #endif #if (HAVE_CIRCO) if (program == GraphProgram::CIRCO) - prog = sys::Path(LLVM_PATH_CIRCO); + prog = LLVM_PATH_CIRCO; #endif std::vector args; @@ -174,12 +188,12 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, args.push_back(PSFilename.c_str()); args.push_back(0); - errs() << "Running '" << prog.str() << "' program... "; + errs() << "Running '" << prog << "' program... "; if (!ExecGraphViewer(prog, args, Filename, wait, ErrMsg)) return; - sys::Path gv(LLVM_PATH_GV); + std::string gv(LLVM_PATH_GV); args.clear(); args.push_back(gv.c_str()); args.push_back(PSFilename.c_str()); @@ -191,7 +205,7 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, return; #elif HAVE_DOTTY - sys::Path dotty(LLVM_PATH_DOTTY); + std::string dotty(LLVM_PATH_DOTTY); std::vector args; args.push_back(dotty.c_str()); @@ -205,5 +219,8 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait, errs() << "Running 'dotty' program... "; if (!ExecGraphViewer(dotty, args, Filename, wait, ErrMsg)) return; +#else + (void)Filename; + (void)ErrMsg; #endif } diff --git a/contrib/llvm/lib/Support/Host.cpp b/contrib/llvm/lib/Support/Host.cpp index a7c7a95..6e9a5c9 100644 --- a/contrib/llvm/lib/Support/Host.cpp +++ b/contrib/llvm/lib/Support/Host.cpp @@ -52,8 +52,54 @@ using namespace llvm; /// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the /// specified arguments. If we can't run cpuid on the host, return true. -static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, - unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { +static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, unsigned *rEBX, + unsigned *rECX, unsigned *rEDX) { +#if defined(__GNUC__) || defined(__clang__) + #if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) + // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. + asm ("movq\t%%rbx, %%rsi\n\t" + "cpuid\n\t" + "xchgq\t%%rbx, %%rsi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; + #elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86) + asm ("movl\t%%ebx, %%esi\n\t" + "cpuid\n\t" + "xchgl\t%%ebx, %%esi\n\t" + : "=a" (*rEAX), + "=S" (*rEBX), + "=c" (*rECX), + "=d" (*rEDX) + : "a" (value)); + return false; +// pedantic #else returns to appease -Wunreachable-code (so we don't generate +// postprocessed code that looks like "return true; return false;") + #else + return true; + #endif +#elif defined(_MSC_VER) + // The MSVC intrinsic is portable across x86 and x64. + int registers[4]; + __cpuid(registers, value); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; +#else + return true; +#endif +} + +/// GetX86CpuIDAndInfoEx - Execute the specified cpuid with subleaf and return the +/// 4 values in the specified arguments. If we can't run cpuid on the host, +/// return true. +bool GetX86CpuIDAndInfoEx(unsigned value, unsigned subleaf, unsigned *rEAX, + unsigned *rEBX, unsigned *rECX, unsigned *rEDX) { #if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64) #if defined(__GNUC__) // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually. @@ -64,16 +110,22 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, "=S" (*rEBX), "=c" (*rECX), "=d" (*rEDX) - : "a" (value)); + : "a" (value), + "c" (subleaf)); return false; #elif defined(_MSC_VER) - int registers[4]; - __cpuid(registers, value); - *rEAX = registers[0]; - *rEBX = registers[1]; - *rECX = registers[2]; - *rEDX = registers[3]; - return false; + // __cpuidex was added in MSVC++ 9.0 SP1 + #if (_MSC_VER > 1500) || (_MSC_VER == 1500 && _MSC_FULL_VER >= 150030729) + int registers[4]; + __cpuidex(registers, value, subleaf); + *rEAX = registers[0]; + *rEBX = registers[1]; + *rECX = registers[2]; + *rEDX = registers[3]; + return false; + #else + return true; + #endif #else return true; #endif @@ -86,11 +138,13 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, "=S" (*rEBX), "=c" (*rECX), "=d" (*rEDX) - : "a" (value)); + : "a" (value), + "c" (subleaf)); return false; #elif defined(_MSC_VER) __asm { mov eax,value + mov ecx,subleaf cpuid mov esi,rEAX mov dword ptr [esi],eax @@ -102,8 +156,6 @@ static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX, mov dword ptr [esi],edx } return false; -// pedantic #else returns to appease -Wunreachable-code (so we don't generate -// postprocessed code that looks like "return true; return false;") #else return true; #endif @@ -148,21 +200,27 @@ std::string sys::getHostCPUName() { unsigned Model = 0; DetectX86FamilyModel(EAX, Family, Model); + union { + unsigned u[3]; + char c[12]; + } text; + + GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); + + unsigned MaxLeaf = EAX; bool HasSSE3 = (ECX & 0x1); + bool HasSSE41 = (ECX & 0x80000); // If CPUID indicates support for XSAVE, XRESTORE and AVX, and XGETBV // indicates that the AVX registers will be saved and restored on context // switch, then we have full AVX support. const unsigned AVXBits = (1 << 27) | (1 << 28); bool HasAVX = ((ECX & AVXBits) == AVXBits) && OSHasAVXSupport(); + bool HasAVX2 = HasAVX && MaxLeaf >= 0x7 && + !GetX86CpuIDAndInfoEx(0x7, 0x0, &EAX, &EBX, &ECX, &EDX) && + (EBX & 0x20); GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX); bool Em64T = (EDX >> 29) & 0x1; - union { - unsigned u[3]; - char c[12]; - } text; - - GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1); if (memcmp(text.c, "GenuineIntel", 12) == 0) { switch (Family) { case 3: @@ -244,7 +302,8 @@ std::string sys::getHostCPUName() { // 17h. All processors are manufactured using the 45 nm process. // // 45nm: Penryn , Wolfdale, Yorkfield (XE) - return "penryn"; + // Not all Penryn processors support SSE 4.1 (such as the Pentium brand) + return HasSSE41 ? "penryn" : "core2"; case 26: // Intel Core i7 processor and Intel Xeon processor. All // processors are manufactured using the 45 nm process. @@ -269,10 +328,20 @@ std::string sys::getHostCPUName() { // Ivy Bridge: case 58: + case 62: // Ivy Bridge EP // Not all Ivy Bridge processors support AVX (such as the Pentium // versions instead of the i7 versions). return HasAVX ? "core-avx-i" : "corei7"; + // Haswell: + case 60: + case 63: + case 69: + case 70: + // Not all Haswell processors support AVX too (such as the Pentium + // versions instead of the i7 versions). + return HasAVX2 ? "core-avx2" : "corei7"; + case 28: // Most 45 nm Intel Atom processors case 38: // 45 nm Atom Lincroft case 39: // 32 nm Atom Medfield @@ -280,6 +349,12 @@ std::string sys::getHostCPUName() { case 54: // 32 nm Atom Midview return "atom"; + // Atom Silvermont codes from the Intel software optimization guide. + case 55: + case 74: + case 77: + return "slm"; + default: return (Em64T) ? "x86-64" : "i686"; } case 15: { @@ -357,9 +432,11 @@ std::string sys::getHostCPUName() { case 21: if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback. return "btver1"; - if (Model > 15 && Model <= 31) - return "bdver2"; - return "bdver1"; + if (Model >= 0x30) + return "bdver3"; // 30h-3Fh: Steamroller + if (Model >= 0x10) + return "bdver2"; // 10h-1Fh: Piledriver + return "bdver1"; // 00h-0Fh: Bulldozer case 22: if (!HasAVX) // If the OS doesn't support AVX provide a sane fallback. return "btver1"; @@ -544,6 +621,48 @@ std::string sys::getHostCPUName() { return "generic"; } +#elif defined(__linux__) && defined(__s390x__) +std::string sys::getHostCPUName() { + // STIDP is a privileged operation, so use /proc/cpuinfo instead. + // Note: We cannot mmap /proc/cpuinfo here and then process the resulting + // memory buffer because the 'file' has 0 size (it can be read from only + // as a stream). + + std::string Err; + DataStreamer *DS = getDataFileStreamer("/proc/cpuinfo", &Err); + if (!DS) { + DEBUG(dbgs() << "Unable to open /proc/cpuinfo: " << Err << "\n"); + return "generic"; + } + + // The "processor 0:" line comes after a fair amount of other information, + // including a cache breakdown, but this should be plenty. + char buffer[2048]; + size_t CPUInfoSize = DS->GetBytes((unsigned char*) buffer, sizeof(buffer)); + delete DS; + + StringRef Str(buffer, CPUInfoSize); + SmallVector Lines; + Str.split(Lines, "\n"); + for (unsigned I = 0, E = Lines.size(); I != E; ++I) { + if (Lines[I].startswith("processor ")) { + size_t Pos = Lines[I].find("machine = "); + if (Pos != StringRef::npos) { + Pos += sizeof("machine = ") - 1; + unsigned int Id; + if (!Lines[I].drop_front(Pos).getAsInteger(10, Id)) { + if (Id >= 2827) + return "zEC12"; + if (Id >= 2817) + return "z196"; + } + } + break; + } + } + + return "generic"; +} #else std::string sys::getHostCPUName() { return "generic"; @@ -570,41 +689,31 @@ bool sys::getHostCPUFeatures(StringMap &Features) { SmallVector Lines; Str.split(Lines, "\n"); - // Look for the CPU implementer line. - StringRef Implementer; - for (unsigned I = 0, E = Lines.size(); I != E; ++I) - if (Lines[I].startswith("CPU implementer")) - Implementer = Lines[I].substr(15).ltrim("\t :"); - - if (Implementer == "0x41") { // ARM Ltd. - SmallVector CPUFeatures; + SmallVector CPUFeatures; - // Look for the CPU features. - for (unsigned I = 0, E = Lines.size(); I != E; ++I) - if (Lines[I].startswith("Features")) { - Lines[I].split(CPUFeatures, " "); - break; - } - - for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { - StringRef LLVMFeatureStr = StringSwitch(CPUFeatures[I]) - .Case("half", "fp16") - .Case("neon", "neon") - .Case("vfpv3", "vfp3") - .Case("vfpv3d16", "d16") - .Case("vfpv4", "vfp4") - .Case("idiva", "hwdiv-arm") - .Case("idivt", "hwdiv") - .Default(""); - - if (LLVMFeatureStr != "") - Features.GetOrCreateValue(LLVMFeatureStr).setValue(true); + // Look for the CPU features. + for (unsigned I = 0, E = Lines.size(); I != E; ++I) + if (Lines[I].startswith("Features")) { + Lines[I].split(CPUFeatures, " "); + break; } - return true; + for (unsigned I = 0, E = CPUFeatures.size(); I != E; ++I) { + StringRef LLVMFeatureStr = StringSwitch(CPUFeatures[I]) + .Case("half", "fp16") + .Case("neon", "neon") + .Case("vfpv3", "vfp3") + .Case("vfpv3d16", "d16") + .Case("vfpv4", "vfp4") + .Case("idiva", "hwdiv-arm") + .Case("idivt", "hwdiv") + .Default(""); + + if (LLVMFeatureStr != "") + Features.GetOrCreateValue(LLVMFeatureStr).setValue(true); } - return false; + return true; } #else bool sys::getHostCPUFeatures(StringMap &Features){ @@ -613,7 +722,7 @@ bool sys::getHostCPUFeatures(StringMap &Features){ #endif std::string sys::getProcessTriple() { - Triple PT(LLVM_HOST_TRIPLE); + Triple PT(Triple::normalize(LLVM_HOST_TRIPLE)); if (sizeof(void *) == 8 && PT.isArch32Bit()) PT = PT.get64BitArchVariant(); diff --git a/contrib/llvm/lib/Support/Locale.cpp b/contrib/llvm/lib/Support/Locale.cpp index 17b9b6c..35ddf7f 100644 --- a/contrib/llvm/lib/Support/Locale.cpp +++ b/contrib/llvm/lib/Support/Locale.cpp @@ -1,10 +1,31 @@ #include "llvm/Support/Locale.h" -#include "llvm/Config/config.h" +#include "llvm/Support/Unicode.h" -#ifdef __APPLE__ -#include "LocaleXlocale.inc" -#elif LLVM_ON_WIN32 -#include "LocaleWindows.inc" +namespace llvm { +namespace sys { +namespace locale { + +int columnWidth(StringRef Text) { +#if LLVM_ON_WIN32 + return Text.size(); #else -#include "LocaleGeneric.inc" + return llvm::sys::unicode::columnWidthUTF8(Text); #endif +} + +bool isPrint(int UCS) { +#if LLVM_ON_WIN32 + // Restrict characters that we'll try to print to the the lower part of ASCII + // except for the control characters (0x20 - 0x7E). In general one can not + // reliably output code points U+0080 and higher using narrow character C/C++ + // output functions in Windows, because the meaning of the upper 128 codes is + // determined by the active code page in the console. + return ' ' <= UCS && UCS <= '~'; +#else + return llvm::sys::unicode::isPrintable(UCS); +#endif +} + +} // namespace locale +} // namespace sys +} // namespace llvm diff --git a/contrib/llvm/lib/Support/LocaleGeneric.inc b/contrib/llvm/lib/Support/LocaleGeneric.inc deleted file mode 100644 index 278deee..0000000 --- a/contrib/llvm/lib/Support/LocaleGeneric.inc +++ /dev/null @@ -1,17 +0,0 @@ -#include - -namespace llvm { -namespace sys { -namespace locale { - -int columnWidth(StringRef s) { - return s.size(); -} - -bool isPrint(int c) { - return iswprint(c); -} - -} -} -} diff --git a/contrib/llvm/lib/Support/LocaleWindows.inc b/contrib/llvm/lib/Support/LocaleWindows.inc deleted file mode 100644 index 28e429c..0000000 --- a/contrib/llvm/lib/Support/LocaleWindows.inc +++ /dev/null @@ -1,15 +0,0 @@ -namespace llvm { -namespace sys { -namespace locale { - -int columnWidth(StringRef s) { - return s.size(); -} - -bool isPrint(int c) { - return ' ' <= c && c <= '~'; -} - -} -} -} diff --git a/contrib/llvm/lib/Support/LocaleXlocale.inc b/contrib/llvm/lib/Support/LocaleXlocale.inc deleted file mode 100644 index 389fe3d..0000000 --- a/contrib/llvm/lib/Support/LocaleXlocale.inc +++ /dev/null @@ -1,61 +0,0 @@ -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/Support/ManagedStatic.h" -#include -#include - - -namespace { - struct locale_holder { - locale_holder() - : l(newlocale(LC_CTYPE_MASK,"en_US.UTF-8",LC_GLOBAL_LOCALE)) - { - assert(NULL!=l); - } - ~locale_holder() { - freelocale(l); - } - - int mbswidth(llvm::SmallString<16> s) const { - // this implementation assumes no '\0' in s - assert(s.size()==strlen(s.c_str())); - - size_t size = mbstowcs_l(NULL,s.c_str(),0,l); - assert(size!=(size_t)-1); - if (size==0) - return 0; - llvm::SmallVector ws(size); - size = mbstowcs_l(&ws[0],s.c_str(),ws.size(),l); - assert(ws.size()==size); - return wcswidth_l(&ws[0],ws.size(),l); - } - - int isprint(int c) const { - return iswprint_l(c,l); - } - - private: - - locale_t l; - }; - - llvm::ManagedStatic l; -} - -namespace llvm { -namespace sys { -namespace locale { - -int columnWidth(StringRef s) { - int width = l->mbswidth(s); - assert(width>=0); - return width; -} - -bool isPrint(int c) { - return l->isprint(c); -} - -} -} -} diff --git a/contrib/llvm/lib/Support/LockFileManager.cpp b/contrib/llvm/lib/Support/LockFileManager.cpp index 2917e27..eeec274 100644 --- a/contrib/llvm/lib/Support/LockFileManager.cpp +++ b/contrib/llvm/lib/Support/LockFileManager.cpp @@ -7,9 +7,11 @@ // //===----------------------------------------------------------------------===// #include "llvm/Support/LockFileManager.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/raw_ostream.h" -#include #include #include #if LLVM_ON_WIN32 @@ -35,16 +37,20 @@ LockFileManager::readLockFile(StringRef LockFileName) { // Read the owning host and PID out of the lock file. If it appears that the // owning process is dead, the lock file is invalid. - int PID = 0; - std::string Hostname; - std::ifstream Input(LockFileName.str().c_str()); - if (Input >> Hostname >> PID && PID > 0 && - processStillExecuting(Hostname, PID)) - return std::make_pair(Hostname, PID); + OwningPtr MB; + if (MemoryBuffer::getFile(LockFileName, MB)) + return None; + + StringRef Hostname; + StringRef PIDStr; + tie(Hostname, PIDStr) = getToken(MB->getBuffer(), " "); + PIDStr = PIDStr.substr(PIDStr.find_first_not_of(" ")); + int PID; + if (!PIDStr.getAsInteger(10, PID)) + return std::make_pair(std::string(Hostname), PID); // Delete the lock file. It's invalid anyway. - bool Existed; - sys::fs::remove(LockFileName, Existed); + sys::fs::remove(LockFileName); return None; } @@ -78,10 +84,9 @@ LockFileManager::LockFileManager(StringRef FileName) UniqueLockFileName += "-%%%%%%%%"; int UniqueLockFileID; if (error_code EC - = sys::fs::unique_file(UniqueLockFileName.str(), - UniqueLockFileID, - UniqueLockFileName, - /*makeAbsolute=*/false)) { + = sys::fs::createUniqueFile(UniqueLockFileName.str(), + UniqueLockFileID, + UniqueLockFileName)) { Error = EC; return; } diff --git a/contrib/llvm/lib/Support/MD5.cpp b/contrib/llvm/lib/Support/MD5.cpp new file mode 100644 index 0000000..514466c --- /dev/null +++ b/contrib/llvm/lib/Support/MD5.cpp @@ -0,0 +1,286 @@ +/* + * This code is derived from (original license follows): + * + * This is an OpenSSL-compatible implementation of the RSA Data Security, Inc. + * MD5 Message-Digest Algorithm (RFC 1321). + * + * Homepage: + * http://openwall.info/wiki/people/solar/software/public-domain-source-code/md5 + * + * Author: + * Alexander Peslyak, better known as Solar Designer + * + * This software was written by Alexander Peslyak in 2001. No copyright is + * claimed, and the software is hereby placed in the public domain. + * In case this attempt to disclaim copyright and place the software in the + * public domain is deemed null and void, then the software is + * Copyright (c) 2001 Alexander Peslyak and it is hereby released to the + * general public under the following terms: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted. + * + * There's ABSOLUTELY NO WARRANTY, express or implied. + * + * (This is a heavily cut-down "BSD license".) + * + * This differs from Colin Plumb's older public domain implementation in that + * no exactly 32-bit integer data type is required (any 32-bit or wider + * unsigned integer data type will do), there's no compile-time endianness + * configuration, and the function prototypes match OpenSSL's. No code from + * Colin Plumb's implementation has been reused; this comment merely compares + * the properties of the two independent implementations. + * + * The primary goals of this implementation are portability and ease of use. + * It is meant to be fast, but not as fast as possible. Some known + * optimizations are not included to reduce source code size and avoid + * compile-time configuration. + */ + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MD5.h" +#include "llvm/Support/raw_ostream.h" +#include + +// The basic MD5 functions. + +// F and G are optimized compared to their RFC 1321 definitions for +// architectures that lack an AND-NOT instruction, just like in Colin Plumb's +// implementation. +#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) +#define G(x, y, z) ((y) ^ ((z) & ((x) ^ (y)))) +#define H(x, y, z) ((x) ^ (y) ^ (z)) +#define I(x, y, z) ((y) ^ ((x) | ~(z))) + +// The MD5 transformation for all four rounds. +#define STEP(f, a, b, c, d, x, t, s) \ + (a) += f((b), (c), (d)) + (x) + (t); \ + (a) = (((a) << (s)) | (((a) & 0xffffffff) >> (32 - (s)))); \ + (a) += (b); + +// SET reads 4 input bytes in little-endian byte order and stores them +// in a properly aligned word in host byte order. +#define SET(n) \ + (block[(n)] = \ + (MD5_u32plus) ptr[(n) * 4] | ((MD5_u32plus) ptr[(n) * 4 + 1] << 8) | \ + ((MD5_u32plus) ptr[(n) * 4 + 2] << 16) | \ + ((MD5_u32plus) ptr[(n) * 4 + 3] << 24)) +#define GET(n) (block[(n)]) + +namespace llvm { + +/// \brief This processes one or more 64-byte data blocks, but does NOT update +///the bit counters. There are no alignment requirements. +const uint8_t *MD5::body(ArrayRef Data) { + const uint8_t *ptr; + MD5_u32plus a, b, c, d; + MD5_u32plus saved_a, saved_b, saved_c, saved_d; + unsigned long Size = Data.size(); + + ptr = Data.data(); + + a = this->a; + b = this->b; + c = this->c; + d = this->d; + + do { + saved_a = a; + saved_b = b; + saved_c = c; + saved_d = d; + + // Round 1 + STEP(F, a, b, c, d, SET(0), 0xd76aa478, 7) + STEP(F, d, a, b, c, SET(1), 0xe8c7b756, 12) + STEP(F, c, d, a, b, SET(2), 0x242070db, 17) + STEP(F, b, c, d, a, SET(3), 0xc1bdceee, 22) + STEP(F, a, b, c, d, SET(4), 0xf57c0faf, 7) + STEP(F, d, a, b, c, SET(5), 0x4787c62a, 12) + STEP(F, c, d, a, b, SET(6), 0xa8304613, 17) + STEP(F, b, c, d, a, SET(7), 0xfd469501, 22) + STEP(F, a, b, c, d, SET(8), 0x698098d8, 7) + STEP(F, d, a, b, c, SET(9), 0x8b44f7af, 12) + STEP(F, c, d, a, b, SET(10), 0xffff5bb1, 17) + STEP(F, b, c, d, a, SET(11), 0x895cd7be, 22) + STEP(F, a, b, c, d, SET(12), 0x6b901122, 7) + STEP(F, d, a, b, c, SET(13), 0xfd987193, 12) + STEP(F, c, d, a, b, SET(14), 0xa679438e, 17) + STEP(F, b, c, d, a, SET(15), 0x49b40821, 22) + + // Round 2 + STEP(G, a, b, c, d, GET(1), 0xf61e2562, 5) + STEP(G, d, a, b, c, GET(6), 0xc040b340, 9) + STEP(G, c, d, a, b, GET(11), 0x265e5a51, 14) + STEP(G, b, c, d, a, GET(0), 0xe9b6c7aa, 20) + STEP(G, a, b, c, d, GET(5), 0xd62f105d, 5) + STEP(G, d, a, b, c, GET(10), 0x02441453, 9) + STEP(G, c, d, a, b, GET(15), 0xd8a1e681, 14) + STEP(G, b, c, d, a, GET(4), 0xe7d3fbc8, 20) + STEP(G, a, b, c, d, GET(9), 0x21e1cde6, 5) + STEP(G, d, a, b, c, GET(14), 0xc33707d6, 9) + STEP(G, c, d, a, b, GET(3), 0xf4d50d87, 14) + STEP(G, b, c, d, a, GET(8), 0x455a14ed, 20) + STEP(G, a, b, c, d, GET(13), 0xa9e3e905, 5) + STEP(G, d, a, b, c, GET(2), 0xfcefa3f8, 9) + STEP(G, c, d, a, b, GET(7), 0x676f02d9, 14) + STEP(G, b, c, d, a, GET(12), 0x8d2a4c8a, 20) + + // Round 3 + STEP(H, a, b, c, d, GET(5), 0xfffa3942, 4) + STEP(H, d, a, b, c, GET(8), 0x8771f681, 11) + STEP(H, c, d, a, b, GET(11), 0x6d9d6122, 16) + STEP(H, b, c, d, a, GET(14), 0xfde5380c, 23) + STEP(H, a, b, c, d, GET(1), 0xa4beea44, 4) + STEP(H, d, a, b, c, GET(4), 0x4bdecfa9, 11) + STEP(H, c, d, a, b, GET(7), 0xf6bb4b60, 16) + STEP(H, b, c, d, a, GET(10), 0xbebfbc70, 23) + STEP(H, a, b, c, d, GET(13), 0x289b7ec6, 4) + STEP(H, d, a, b, c, GET(0), 0xeaa127fa, 11) + STEP(H, c, d, a, b, GET(3), 0xd4ef3085, 16) + STEP(H, b, c, d, a, GET(6), 0x04881d05, 23) + STEP(H, a, b, c, d, GET(9), 0xd9d4d039, 4) + STEP(H, d, a, b, c, GET(12), 0xe6db99e5, 11) + STEP(H, c, d, a, b, GET(15), 0x1fa27cf8, 16) + STEP(H, b, c, d, a, GET(2), 0xc4ac5665, 23) + + // Round 4 + STEP(I, a, b, c, d, GET(0), 0xf4292244, 6) + STEP(I, d, a, b, c, GET(7), 0x432aff97, 10) + STEP(I, c, d, a, b, GET(14), 0xab9423a7, 15) + STEP(I, b, c, d, a, GET(5), 0xfc93a039, 21) + STEP(I, a, b, c, d, GET(12), 0x655b59c3, 6) + STEP(I, d, a, b, c, GET(3), 0x8f0ccc92, 10) + STEP(I, c, d, a, b, GET(10), 0xffeff47d, 15) + STEP(I, b, c, d, a, GET(1), 0x85845dd1, 21) + STEP(I, a, b, c, d, GET(8), 0x6fa87e4f, 6) + STEP(I, d, a, b, c, GET(15), 0xfe2ce6e0, 10) + STEP(I, c, d, a, b, GET(6), 0xa3014314, 15) + STEP(I, b, c, d, a, GET(13), 0x4e0811a1, 21) + STEP(I, a, b, c, d, GET(4), 0xf7537e82, 6) + STEP(I, d, a, b, c, GET(11), 0xbd3af235, 10) + STEP(I, c, d, a, b, GET(2), 0x2ad7d2bb, 15) + STEP(I, b, c, d, a, GET(9), 0xeb86d391, 21) + + a += saved_a; + b += saved_b; + c += saved_c; + d += saved_d; + + ptr += 64; + } while (Size -= 64); + + this->a = a; + this->b = b; + this->c = c; + this->d = d; + + return ptr; +} + +MD5::MD5() + : a(0x67452301), b(0xefcdab89), c(0x98badcfe), d(0x10325476), hi(0), lo(0) { +} + +/// Incrementally add the bytes in \p Data to the hash. +void MD5::update(ArrayRef Data) { + MD5_u32plus saved_lo; + unsigned long used, free; + const uint8_t *Ptr = Data.data(); + unsigned long Size = Data.size(); + + saved_lo = lo; + if ((lo = (saved_lo + Size) & 0x1fffffff) < saved_lo) + hi++; + hi += Size >> 29; + + used = saved_lo & 0x3f; + + if (used) { + free = 64 - used; + + if (Size < free) { + memcpy(&buffer[used], Ptr, Size); + return; + } + + memcpy(&buffer[used], Ptr, free); + Ptr = Ptr + free; + Size -= free; + body(ArrayRef(buffer, 64)); + } + + if (Size >= 64) { + Ptr = body(ArrayRef(Ptr, Size & ~(unsigned long) 0x3f)); + Size &= 0x3f; + } + + memcpy(buffer, Ptr, Size); +} + +/// Add the bytes in the StringRef \p Str to the hash. +// Note that this isn't a string and so this won't include any trailing NULL +// bytes. +void MD5::update(StringRef Str) { + ArrayRef SVal((const uint8_t *)Str.data(), Str.size()); + update(SVal); +} + +/// \brief Finish the hash and place the resulting hash into \p result. +/// \param result is assumed to be a minimum of 16-bytes in size. +void MD5::final(MD5Result &result) { + unsigned long used, free; + + used = lo & 0x3f; + + buffer[used++] = 0x80; + + free = 64 - used; + + if (free < 8) { + memset(&buffer[used], 0, free); + body(ArrayRef(buffer, 64)); + used = 0; + free = 64; + } + + memset(&buffer[used], 0, free - 8); + + lo <<= 3; + buffer[56] = lo; + buffer[57] = lo >> 8; + buffer[58] = lo >> 16; + buffer[59] = lo >> 24; + buffer[60] = hi; + buffer[61] = hi >> 8; + buffer[62] = hi >> 16; + buffer[63] = hi >> 24; + + body(ArrayRef(buffer, 64)); + + result[0] = a; + result[1] = a >> 8; + result[2] = a >> 16; + result[3] = a >> 24; + result[4] = b; + result[5] = b >> 8; + result[6] = b >> 16; + result[7] = b >> 24; + result[8] = c; + result[9] = c >> 8; + result[10] = c >> 16; + result[11] = c >> 24; + result[12] = d; + result[13] = d >> 8; + result[14] = d >> 16; + result[15] = d >> 24; +} + +void MD5::stringifyResult(MD5Result &result, SmallString<32> &Str) { + raw_svector_ostream Res(Str); + for (int i = 0; i < 16; ++i) + Res << format("%.2x", result[i]); +} + +} diff --git a/contrib/llvm/lib/Support/MemoryBuffer.cpp b/contrib/llvm/lib/Support/MemoryBuffer.cpp index 7c5ab96..dcd5529 100644 --- a/contrib/llvm/lib/Support/MemoryBuffer.cpp +++ b/contrib/llvm/lib/Support/MemoryBuffer.cpp @@ -33,8 +33,7 @@ #include #else #include -// Simplistic definitinos of these macros to allow files to be read with -// MapInFilePages. +// Simplistic definitinos of these macros for use in getOpenFile. #ifndef S_ISREG #define S_ISREG(x) (1) #endif @@ -42,7 +41,6 @@ #define S_ISBLK(x) (0) #endif #endif -#include using namespace llvm; //===----------------------------------------------------------------------===// @@ -174,20 +172,12 @@ error_code MemoryBuffer::getFileOrSTDIN(StringRef Filename, return getFile(Filename, result, FileSize); } -error_code MemoryBuffer::getFileOrSTDIN(const char *Filename, - OwningPtr &result, - int64_t FileSize) { - if (strcmp(Filename, "-") == 0) - return getSTDIN(result); - return getFile(Filename, result, FileSize); -} - //===----------------------------------------------------------------------===// // MemoryBuffer::getFile implementation. //===----------------------------------------------------------------------===// namespace { -/// \brief Memorry maps a file descriptor using sys::fs::mapped_file_region. +/// \brief Memory maps a file descriptor using sys::fs::mapped_file_region. /// /// This handles converting the offset into a legal offset on the platform. class MemoryBufferMMapFile : public MemoryBuffer { @@ -227,7 +217,7 @@ public: }; } -static error_code getMemoryBufferForStream(int FD, +static error_code getMemoryBufferForStream(int FD, StringRef BufferName, OwningPtr &result) { const ssize_t ChunkSize = 4096*4; @@ -248,41 +238,36 @@ static error_code getMemoryBufferForStream(int FD, return error_code::success(); } -error_code MemoryBuffer::getFile(StringRef Filename, +static error_code getFileAux(const char *Filename, + OwningPtr &result, int64_t FileSize, + bool RequiresNullTerminator); + +error_code MemoryBuffer::getFile(Twine Filename, OwningPtr &result, int64_t FileSize, bool RequiresNullTerminator) { // Ensure the path is null terminated. - SmallString<256> PathBuf(Filename.begin(), Filename.end()); - return MemoryBuffer::getFile(PathBuf.c_str(), result, FileSize, - RequiresNullTerminator); + SmallString<256> PathBuf; + StringRef NullTerminatedName = Filename.toNullTerminatedStringRef(PathBuf); + return getFileAux(NullTerminatedName.data(), result, FileSize, + RequiresNullTerminator); } -error_code MemoryBuffer::getFile(const char *Filename, - OwningPtr &result, - int64_t FileSize, - bool RequiresNullTerminator) { - // FIXME: Review if this check is unnecessary on windows as well. -#ifdef LLVM_ON_WIN32 - // First check that the "file" is not a directory - bool is_dir = false; - error_code err = sys::fs::is_directory(Filename, is_dir); - if (err) - return err; - if (is_dir) - return make_error_code(errc::is_a_directory); -#endif - - int OpenFlags = O_RDONLY; -#ifdef O_BINARY - OpenFlags |= O_BINARY; // Open input file in binary mode on win32. -#endif - int FD = ::open(Filename, OpenFlags); - if (FD == -1) - return error_code(errno, posix_category()); - - error_code ret = getOpenFile(FD, Filename, result, FileSize, FileSize, - 0, RequiresNullTerminator); +static error_code getOpenFileImpl(int FD, const char *Filename, + OwningPtr &Result, + uint64_t FileSize, uint64_t MapSize, + int64_t Offset, bool RequiresNullTerminator); + +static error_code getFileAux(const char *Filename, + OwningPtr &result, int64_t FileSize, + bool RequiresNullTerminator) { + int FD; + error_code EC = sys::fs::openFileForRead(Filename, FD); + if (EC) + return EC; + + error_code ret = getOpenFileImpl(FD, Filename, result, FileSize, FileSize, 0, + RequiresNullTerminator); close(FD); return ret; } @@ -295,7 +280,7 @@ static bool shouldUseMmap(int FD, int PageSize) { // We don't use mmap for small files because this can severely fragment our // address space. - if (MapSize < 4096*4) + if (MapSize < 4 * 4096 || MapSize < (unsigned)PageSize) return false; if (!RequiresNullTerminator) @@ -307,12 +292,11 @@ static bool shouldUseMmap(int FD, // FIXME: this chunk of code is duplicated, but it avoids a fstat when // RequiresNullTerminator = false and MapSize != -1. if (FileSize == size_t(-1)) { - struct stat FileInfo; - // TODO: This should use fstat64 when available. - if (fstat(FD, &FileInfo) == -1) { - return error_code(errno, posix_category()); - } - FileSize = FileInfo.st_size; + sys::fs::file_status Status; + error_code EC = sys::fs::status(FD, Status); + if (EC) + return EC; + FileSize = Status.getSize(); } // If we need a null terminator and the end of the map is inside the file, @@ -322,6 +306,15 @@ static bool shouldUseMmap(int FD, if (End != FileSize) return false; +#if defined(_WIN32) || defined(__CYGWIN__) + // Don't peek the next page if file is multiple of *physical* pagesize(4k) + // but is not multiple of AllocationGranularity(64k), + // when a null terminator is required. + // FIXME: It's not good to hardcode 4096 here. dwPageSize shows 4096. + if ((FileSize & (4096 - 1)) == 0) + return false; +#endif + // Don't try to map files that are exactly a multiple of the system page size // if we need a null terminator. if ((FileSize & (PageSize -1)) == 0) @@ -330,11 +323,10 @@ static bool shouldUseMmap(int FD, return true; } -error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, - OwningPtr &result, - uint64_t FileSize, uint64_t MapSize, - int64_t Offset, - bool RequiresNullTerminator) { +static error_code getOpenFileImpl(int FD, const char *Filename, + OwningPtr &result, + uint64_t FileSize, uint64_t MapSize, + int64_t Offset, bool RequiresNullTerminator) { static int PageSize = sys::process::get_self()->page_size(); // Default is to map the full file. @@ -342,20 +334,20 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, // If we don't know the file size, use fstat to find out. fstat on an open // file descriptor is cheaper than stat on a random path. if (FileSize == uint64_t(-1)) { - struct stat FileInfo; - // TODO: This should use fstat64 when available. - if (fstat(FD, &FileInfo) == -1) { - return error_code(errno, posix_category()); - } + sys::fs::file_status Status; + error_code EC = sys::fs::status(FD, Status); + if (EC) + return EC; // If this not a file or a block device (e.g. it's a named pipe // or character device), we can't trust the size. Create the memory // buffer by copying off the stream. - if (!S_ISREG(FileInfo.st_mode) && !S_ISBLK(FileInfo.st_mode)) { + sys::fs::file_type Type = Status.type(); + if (Type != sys::fs::file_type::regular_file && + Type != sys::fs::file_type::block_file) return getMemoryBufferForStream(FD, Filename, result); - } - FileSize = FileInfo.st_size; + FileSize = Status.getSize(); } MapSize = FileSize; } @@ -411,6 +403,20 @@ error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, return error_code::success(); } +error_code MemoryBuffer::getOpenFile(int FD, const char *Filename, + OwningPtr &Result, + uint64_t FileSize, + bool RequiresNullTerminator) { + return getOpenFileImpl(FD, Filename, Result, FileSize, FileSize, 0, + RequiresNullTerminator); +} + +error_code MemoryBuffer::getOpenFileSlice(int FD, const char *Filename, + OwningPtr &Result, + uint64_t MapSize, int64_t Offset) { + return getOpenFileImpl(FD, Filename, Result, -1, MapSize, Offset, false); +} + //===----------------------------------------------------------------------===// // MemoryBuffer::getSTDIN implementation. //===----------------------------------------------------------------------===// @@ -420,7 +426,7 @@ error_code MemoryBuffer::getSTDIN(OwningPtr &result) { // // FIXME: That isn't necessarily true, we should try to mmap stdin and // fallback if it fails. - sys::Program::ChangeStdinToBinary(); + sys::ChangeStdinToBinary(); return getMemoryBufferForStream(0, "", result); } diff --git a/contrib/llvm/lib/Support/MemoryObject.cpp b/contrib/llvm/lib/Support/MemoryObject.cpp index b20ab89..02b5b50 100644 --- a/contrib/llvm/lib/Support/MemoryObject.cpp +++ b/contrib/llvm/lib/Support/MemoryObject.cpp @@ -15,8 +15,7 @@ MemoryObject::~MemoryObject() { int MemoryObject::readBytes(uint64_t address, uint64_t size, - uint8_t* buf, - uint64_t* copied) const { + uint8_t* buf) const { uint64_t current = address; uint64_t limit = getBase() + getExtent(); @@ -30,8 +29,5 @@ int MemoryObject::readBytes(uint64_t address, current++; } - if (copied) - *copied = current - address; - return 0; } diff --git a/contrib/llvm/lib/Support/Path.cpp b/contrib/llvm/lib/Support/Path.cpp index d070375..c869b30 100644 --- a/contrib/llvm/lib/Support/Path.cpp +++ b/contrib/llvm/lib/Support/Path.cpp @@ -1,4 +1,4 @@ -//===-- Path.cpp - Implement OS Path Concept --------------------*- C++ -*-===// +//===-- Path.cpp - Implement OS Path Concept ------------------------------===// // // The LLVM Compiler Infrastructure // @@ -7,80 +7,900 @@ // //===----------------------------------------------------------------------===// // -// This header file implements the operating system Path concept. +// This file implements the operating system Path API. // //===----------------------------------------------------------------------===// #include "llvm/Support/Path.h" -#include "llvm/Config/config.h" #include "llvm/Support/Endian.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FileSystem.h" -#include +#include +#include #include -#include -using namespace llvm; -using namespace sys; +#include + +#if !defined(_MSC_VER) && !defined(__MINGW32__) +#include +#else +#include +#endif + namespace { -using support::ulittle32_t; + using llvm::StringRef; + using llvm::sys::path::is_separator; + +#ifdef LLVM_ON_WIN32 + const char *separators = "\\/"; + const char prefered_separator = '\\'; +#else + const char separators = '/'; + const char prefered_separator = '/'; +#endif + + StringRef find_first_component(StringRef path) { + // Look for this first component in the following order. + // * empty (in this case we return an empty string) + // * either C: or {//,\\}net. + // * {/,\} + // * {.,..} + // * {file,directory}name + + if (path.empty()) + return path; + +#ifdef LLVM_ON_WIN32 + // C: + if (path.size() >= 2 && std::isalpha(static_cast(path[0])) && + path[1] == ':') + return path.substr(0, 2); +#endif + + // //net + if ((path.size() > 2) && + is_separator(path[0]) && + path[0] == path[1] && + !is_separator(path[2])) { + // Find the next directory separator. + size_t end = path.find_first_of(separators, 2); + return path.substr(0, end); + } + + // {/,\} + if (is_separator(path[0])) + return path.substr(0, 1); + + if (path.startswith("..")) + return path.substr(0, 2); + + if (path[0] == '.') + return path.substr(0, 1); + + // * {file,directory}name + size_t end = path.find_first_of(separators); + return path.substr(0, end); + } + + size_t filename_pos(StringRef str) { + if (str.size() == 2 && + is_separator(str[0]) && + str[0] == str[1]) + return 0; + + if (str.size() > 0 && is_separator(str[str.size() - 1])) + return str.size() - 1; + + size_t pos = str.find_last_of(separators, str.size() - 1); + +#ifdef LLVM_ON_WIN32 + if (pos == StringRef::npos) + pos = str.find_last_of(':', str.size() - 2); +#endif + + if (pos == StringRef::npos || + (pos == 1 && is_separator(str[0]))) + return 0; + + return pos + 1; + } + + size_t root_dir_start(StringRef str) { + // case "c:/" +#ifdef LLVM_ON_WIN32 + if (str.size() > 2 && + str[1] == ':' && + is_separator(str[2])) + return 2; +#endif + + // case "//" + if (str.size() == 2 && + is_separator(str[0]) && + str[0] == str[1]) + return StringRef::npos; + + // case "//net" + if (str.size() > 3 && + is_separator(str[0]) && + str[0] == str[1] && + !is_separator(str[2])) { + return str.find_first_of(separators, 2); + } + + // case "/" + if (str.size() > 0 && is_separator(str[0])) + return 0; + + return StringRef::npos; + } + + size_t parent_path_end(StringRef path) { + size_t end_pos = filename_pos(path); + + bool filename_was_sep = path.size() > 0 && is_separator(path[end_pos]); + + // Skip separators except for root dir. + size_t root_dir_pos = root_dir_start(path.substr(0, end_pos)); + + while(end_pos > 0 && + (end_pos - 1) != root_dir_pos && + is_separator(path[end_pos - 1])) + --end_pos; + + if (end_pos == 1 && root_dir_pos == 0 && filename_was_sep) + return StringRef::npos; + + return end_pos; + } +} // end unnamed namespace + +enum FSEntity { + FS_Dir, + FS_File, + FS_Name +}; + +// Implemented in Unix/Path.inc and Windows/Path.inc. +static llvm::error_code +createUniqueEntity(const llvm::Twine &Model, int &ResultFD, + llvm::SmallVectorImpl &ResultPath, + bool MakeAbsolute, unsigned Mode, FSEntity Type); + +namespace llvm { +namespace sys { +namespace path { + +const_iterator begin(StringRef path) { + const_iterator i; + i.Path = path; + i.Component = find_first_component(path); + i.Position = 0; + return i; } -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only TRULY operating system -//=== independent code. -//===----------------------------------------------------------------------===// +const_iterator end(StringRef path) { + const_iterator i; + i.Path = path; + i.Position = path.size(); + return i; +} + +const_iterator &const_iterator::operator++() { + assert(Position < Path.size() && "Tried to increment past end!"); + + // Increment Position to past the current component + Position += Component.size(); + + // Check for end. + if (Position == Path.size()) { + Component = StringRef(); + return *this; + } + + // Both POSIX and Windows treat paths that begin with exactly two separators + // specially. + bool was_net = Component.size() > 2 && + is_separator(Component[0]) && + Component[1] == Component[0] && + !is_separator(Component[2]); + + // Handle separators. + if (is_separator(Path[Position])) { + // Root dir. + if (was_net +#ifdef LLVM_ON_WIN32 + // c:/ + || Component.endswith(":") +#endif + ) { + Component = Path.substr(Position, 1); + return *this; + } + + // Skip extra separators. + while (Position != Path.size() && + is_separator(Path[Position])) { + ++Position; + } + + // Treat trailing '/' as a '.'. + if (Position == Path.size()) { + --Position; + Component = "."; + return *this; + } + } + + // Find next component. + size_t end_pos = Path.find_first_of(separators, Position); + Component = Path.slice(Position, end_pos); + + return *this; +} + +const_iterator &const_iterator::operator--() { + // If we're at the end and the previous char was a '/', return '.'. + if (Position == Path.size() && + Path.size() > 1 && + is_separator(Path[Position - 1]) +#ifdef LLVM_ON_WIN32 + && Path[Position - 2] != ':' +#endif + ) { + --Position; + Component = "."; + return *this; + } + + // Skip separators unless it's the root directory. + size_t root_dir_pos = root_dir_start(Path); + size_t end_pos = Position; + + while(end_pos > 0 && + (end_pos - 1) != root_dir_pos && + is_separator(Path[end_pos - 1])) + --end_pos; + + // Find next separator. + size_t start_pos = filename_pos(Path.substr(0, end_pos)); + Component = Path.slice(start_pos, end_pos); + Position = start_pos; + return *this; +} + +bool const_iterator::operator==(const const_iterator &RHS) const { + return Path.begin() == RHS.Path.begin() && + Position == RHS.Position; +} + +bool const_iterator::operator!=(const const_iterator &RHS) const { + return !(*this == RHS); +} + +ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const { + return Position - RHS.Position; +} + +const StringRef root_path(StringRef path) { + const_iterator b = begin(path), + pos = b, + e = end(path); + if (b != e) { + bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0]; + bool has_drive = +#ifdef LLVM_ON_WIN32 + b->endswith(":"); +#else + false; +#endif + + if (has_net || has_drive) { + if ((++pos != e) && is_separator((*pos)[0])) { + // {C:/,//net/}, so get the first two components. + return path.substr(0, b->size() + pos->size()); + } else { + // just {C:,//net}, return the first component. + return *b; + } + } + + // POSIX style root directory. + if (is_separator((*b)[0])) { + return *b; + } + } + + return StringRef(); +} + +const StringRef root_name(StringRef path) { + const_iterator b = begin(path), + e = end(path); + if (b != e) { + bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0]; + bool has_drive = +#ifdef LLVM_ON_WIN32 + b->endswith(":"); +#else + false; +#endif + + if (has_net || has_drive) { + // just {C:,//net}, return the first component. + return *b; + } + } + + // No path or no name. + return StringRef(); +} + +const StringRef root_directory(StringRef path) { + const_iterator b = begin(path), + pos = b, + e = end(path); + if (b != e) { + bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0]; + bool has_drive = +#ifdef LLVM_ON_WIN32 + b->endswith(":"); +#else + false; +#endif + + if ((has_net || has_drive) && + // {C:,//net}, skip to the next component. + (++pos != e) && is_separator((*pos)[0])) { + return *pos; + } + + // POSIX style root directory. + if (!has_net && is_separator((*b)[0])) { + return *b; + } + } + + // No path or no root. + return StringRef(); +} + +const StringRef relative_path(StringRef path) { + StringRef root = root_path(path); + return path.substr(root.size()); +} + +void append(SmallVectorImpl &path, const Twine &a, + const Twine &b, + const Twine &c, + const Twine &d) { + SmallString<32> a_storage; + SmallString<32> b_storage; + SmallString<32> c_storage; + SmallString<32> d_storage; + + SmallVector components; + if (!a.isTriviallyEmpty()) components.push_back(a.toStringRef(a_storage)); + if (!b.isTriviallyEmpty()) components.push_back(b.toStringRef(b_storage)); + if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage)); + if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage)); + + for (SmallVectorImpl::const_iterator i = components.begin(), + e = components.end(); + i != e; ++i) { + bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]); + bool component_has_sep = !i->empty() && is_separator((*i)[0]); + bool is_root_name = has_root_name(*i); + + if (path_has_sep) { + // Strip separators from beginning of component. + size_t loc = i->find_first_not_of(separators); + StringRef c = i->substr(loc); + + // Append it. + path.append(c.begin(), c.end()); + continue; + } + + if (!component_has_sep && !(path.empty() || is_root_name)) { + // Add a separator. + path.push_back(prefered_separator); + } + + path.append(i->begin(), i->end()); + } +} + +void append(SmallVectorImpl &path, + const_iterator begin, const_iterator end) { + for (; begin != end; ++begin) + path::append(path, *begin); +} + +const StringRef parent_path(StringRef path) { + size_t end_pos = parent_path_end(path); + if (end_pos == StringRef::npos) + return StringRef(); + else + return path.substr(0, end_pos); +} + +void remove_filename(SmallVectorImpl &path) { + size_t end_pos = parent_path_end(StringRef(path.begin(), path.size())); + if (end_pos != StringRef::npos) + path.set_size(end_pos); +} + +void replace_extension(SmallVectorImpl &path, const Twine &extension) { + StringRef p(path.begin(), path.size()); + SmallString<32> ext_storage; + StringRef ext = extension.toStringRef(ext_storage); + + // Erase existing extension. + size_t pos = p.find_last_of('.'); + if (pos != StringRef::npos && pos >= filename_pos(p)) + path.set_size(pos); -bool Path::operator==(const Path &that) const { - return path == that.path; + // Append '.' if needed. + if (ext.size() > 0 && ext[0] != '.') + path.push_back('.'); + + // Append extension. + path.append(ext.begin(), ext.end()); +} + +void native(const Twine &path, SmallVectorImpl &result) { + assert((!path.isSingleStringRef() || + path.getSingleStringRef().data() != result.data()) && + "path and result are not allowed to overlap!"); + // Clear result. + result.clear(); + path.toVector(result); + native(result); +} + +void native(SmallVectorImpl &path) { +#ifdef LLVM_ON_WIN32 + std::replace(path.begin(), path.end(), '/', '\\'); +#endif } -bool Path::operator<(const Path& that) const { - return path < that.path; +const StringRef filename(StringRef path) { + return *(--end(path)); } -LLVMFileType -sys::IdentifyFileType(const char *magic, unsigned length) { - assert(magic && "Invalid magic number string"); - assert(length >=4 && "Invalid magic number length"); - switch ((unsigned char)magic[0]) { +const StringRef stem(StringRef path) { + StringRef fname = filename(path); + size_t pos = fname.find_last_of('.'); + if (pos == StringRef::npos) + return fname; + else + if ((fname.size() == 1 && fname == ".") || + (fname.size() == 2 && fname == "..")) + return fname; + else + return fname.substr(0, pos); +} + +const StringRef extension(StringRef path) { + StringRef fname = filename(path); + size_t pos = fname.find_last_of('.'); + if (pos == StringRef::npos) + return StringRef(); + else + if ((fname.size() == 1 && fname == ".") || + (fname.size() == 2 && fname == "..")) + return StringRef(); + else + return fname.substr(pos); +} + +bool is_separator(char value) { + switch(value) { +#ifdef LLVM_ON_WIN32 + case '\\': // fall through +#endif + case '/': return true; + default: return false; + } +} + +void system_temp_directory(bool erasedOnReboot, SmallVectorImpl &result) { + result.clear(); + +#ifdef __APPLE__ + // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR. + int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR + : _CS_DARWIN_USER_CACHE_DIR; + size_t ConfLen = confstr(ConfName, 0, 0); + if (ConfLen > 0) { + do { + result.resize(ConfLen); + ConfLen = confstr(ConfName, result.data(), result.size()); + } while (ConfLen > 0 && ConfLen != result.size()); + + if (ConfLen > 0) { + assert(result.back() == 0); + result.pop_back(); + return; + } + + result.clear(); + } +#endif + + // Check whether the temporary directory is specified by an environment + // variable. + const char *EnvironmentVariable; +#ifdef LLVM_ON_WIN32 + EnvironmentVariable = "TEMP"; +#else + EnvironmentVariable = "TMPDIR"; +#endif + if (char *RequestedDir = getenv(EnvironmentVariable)) { + result.append(RequestedDir, RequestedDir + strlen(RequestedDir)); + return; + } + + // Fall back to a system default. + const char *DefaultResult; +#ifdef LLVM_ON_WIN32 + (void)erasedOnReboot; + DefaultResult = "C:\\TEMP"; +#else + if (erasedOnReboot) + DefaultResult = "/tmp"; + else + DefaultResult = "/var/tmp"; +#endif + result.append(DefaultResult, DefaultResult + strlen(DefaultResult)); +} + +bool has_root_name(const Twine &path) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !root_name(p).empty(); +} + +bool has_root_directory(const Twine &path) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !root_directory(p).empty(); +} + +bool has_root_path(const Twine &path) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !root_path(p).empty(); +} + +bool has_relative_path(const Twine &path) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !relative_path(p).empty(); +} + +bool has_filename(const Twine &path) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !filename(p).empty(); +} + +bool has_parent_path(const Twine &path) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !parent_path(p).empty(); +} + +bool has_stem(const Twine &path) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !stem(p).empty(); +} + +bool has_extension(const Twine &path) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + return !extension(p).empty(); +} + +bool is_absolute(const Twine &path) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + bool rootDir = has_root_directory(p), +#ifdef LLVM_ON_WIN32 + rootName = has_root_name(p); +#else + rootName = true; +#endif + + return rootDir && rootName; +} + +bool is_relative(const Twine &path) { + return !is_absolute(path); +} + +} // end namespace path + +namespace fs { + +error_code getUniqueID(const Twine Path, UniqueID &Result) { + file_status Status; + error_code EC = status(Path, Status); + if (EC) + return EC; + Result = Status.getUniqueID(); + return error_code::success(); +} + +error_code createUniqueFile(const Twine &Model, int &ResultFd, + SmallVectorImpl &ResultPath, unsigned Mode) { + return createUniqueEntity(Model, ResultFd, ResultPath, false, Mode, FS_File); +} + +error_code createUniqueFile(const Twine &Model, + SmallVectorImpl &ResultPath) { + int Dummy; + return createUniqueEntity(Model, Dummy, ResultPath, false, 0, FS_Name); +} + +static error_code createTemporaryFile(const Twine &Model, int &ResultFD, + llvm::SmallVectorImpl &ResultPath, + FSEntity Type) { + SmallString<128> Storage; + StringRef P = Model.toNullTerminatedStringRef(Storage); + assert(P.find_first_of(separators) == StringRef::npos && + "Model must be a simple filename."); + // Use P.begin() so that createUniqueEntity doesn't need to recreate Storage. + return createUniqueEntity(P.begin(), ResultFD, ResultPath, + true, owner_read | owner_write, Type); +} + +static error_code +createTemporaryFile(const Twine &Prefix, StringRef Suffix, int &ResultFD, + llvm::SmallVectorImpl &ResultPath, + FSEntity Type) { + const char *Middle = Suffix.empty() ? "-%%%%%%" : "-%%%%%%."; + return createTemporaryFile(Prefix + Middle + Suffix, ResultFD, ResultPath, + Type); +} + + +error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix, + int &ResultFD, + SmallVectorImpl &ResultPath) { + return createTemporaryFile(Prefix, Suffix, ResultFD, ResultPath, FS_File); +} + +error_code createTemporaryFile(const Twine &Prefix, StringRef Suffix, + SmallVectorImpl &ResultPath) { + int Dummy; + return createTemporaryFile(Prefix, Suffix, Dummy, ResultPath, FS_Name); +} + + +// This is a mkdtemp with a different pattern. We use createUniqueEntity mostly +// for consistency. We should try using mkdtemp. +error_code createUniqueDirectory(const Twine &Prefix, + SmallVectorImpl &ResultPath) { + int Dummy; + return createUniqueEntity(Prefix + "-%%%%%%", Dummy, ResultPath, + true, 0, FS_Dir); +} + +error_code make_absolute(SmallVectorImpl &path) { + StringRef p(path.data(), path.size()); + + bool rootDirectory = path::has_root_directory(p), +#ifdef LLVM_ON_WIN32 + rootName = path::has_root_name(p); +#else + rootName = true; +#endif + + // Already absolute. + if (rootName && rootDirectory) + return error_code::success(); + + // All of the following conditions will need the current directory. + SmallString<128> current_dir; + if (error_code ec = current_path(current_dir)) return ec; + + // Relative path. Prepend the current directory. + if (!rootName && !rootDirectory) { + // Append path to the current directory. + path::append(current_dir, p); + // Set path to the result. + path.swap(current_dir); + return error_code::success(); + } + + if (!rootName && rootDirectory) { + StringRef cdrn = path::root_name(current_dir); + SmallString<128> curDirRootName(cdrn.begin(), cdrn.end()); + path::append(curDirRootName, p); + // Set path to the result. + path.swap(curDirRootName); + return error_code::success(); + } + + if (rootName && !rootDirectory) { + StringRef pRootName = path::root_name(p); + StringRef bRootDirectory = path::root_directory(current_dir); + StringRef bRelativePath = path::relative_path(current_dir); + StringRef pRelativePath = path::relative_path(p); + + SmallString<128> res; + path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath); + path.swap(res); + return error_code::success(); + } + + llvm_unreachable("All rootName and rootDirectory combinations should have " + "occurred above!"); +} + +error_code create_directories(const Twine &path, bool &existed) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); + + StringRef parent = path::parent_path(p); + if (!parent.empty()) { + bool parent_exists; + if (error_code ec = fs::exists(parent, parent_exists)) return ec; + + if (!parent_exists) + if (error_code ec = create_directories(parent, existed)) return ec; + } + + return create_directory(p, existed); +} + +bool exists(file_status status) { + return status_known(status) && status.type() != file_type::file_not_found; +} + +bool status_known(file_status s) { + return s.type() != file_type::status_error; +} + +bool is_directory(file_status status) { + return status.type() == file_type::directory_file; +} + +error_code is_directory(const Twine &path, bool &result) { + file_status st; + if (error_code ec = status(path, st)) + return ec; + result = is_directory(st); + return error_code::success(); +} + +bool is_regular_file(file_status status) { + return status.type() == file_type::regular_file; +} + +error_code is_regular_file(const Twine &path, bool &result) { + file_status st; + if (error_code ec = status(path, st)) + return ec; + result = is_regular_file(st); + return error_code::success(); +} + +bool is_symlink(file_status status) { + return status.type() == file_type::symlink_file; +} + +error_code is_symlink(const Twine &path, bool &result) { + file_status st; + if (error_code ec = status(path, st)) + return ec; + result = is_symlink(st); + return error_code::success(); +} + +bool is_other(file_status status) { + return exists(status) && + !is_regular_file(status) && + !is_directory(status) && + !is_symlink(status); +} + +void directory_entry::replace_filename(const Twine &filename, file_status st) { + SmallString<128> path(Path.begin(), Path.end()); + path::remove_filename(path); + path::append(path, filename); + Path = path.str(); + Status = st; +} + +error_code has_magic(const Twine &path, const Twine &magic, bool &result) { + SmallString<32> MagicStorage; + StringRef Magic = magic.toStringRef(MagicStorage); + SmallString<32> Buffer; + + if (error_code ec = get_magic(path, Magic.size(), Buffer)) { + if (ec == errc::value_too_large) { + // Magic.size() > file_size(Path). + result = false; + return error_code::success(); + } + return ec; + } + + result = Magic == Buffer; + return error_code::success(); +} + +/// @brief Identify the magic in magic. + file_magic identify_magic(StringRef Magic) { + if (Magic.size() < 4) + return file_magic::unknown; + switch ((unsigned char)Magic[0]) { + case 0x00: { + // COFF short import library file + if (Magic[1] == (char)0x00 && Magic[2] == (char)0xff && + Magic[3] == (char)0xff) + return file_magic::coff_import_library; + // Windows resource file + const char Expected[] = { 0, 0, 0, 0, '\x20', 0, 0, 0, '\xff' }; + if (Magic.size() >= sizeof(Expected) && + memcmp(Magic.data(), Expected, sizeof(Expected)) == 0) + return file_magic::windows_resource; + // 0x0000 = COFF unknown machine type + if (Magic[1] == 0) + return file_magic::coff_object; + break; + } case 0xDE: // 0x0B17C0DE = BC wraper - if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 && - magic[3] == (char)0x0B) - return Bitcode_FileType; + if (Magic[1] == (char)0xC0 && Magic[2] == (char)0x17 && + Magic[3] == (char)0x0B) + return file_magic::bitcode; break; case 'B': - if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE) - return Bitcode_FileType; + if (Magic[1] == 'C' && Magic[2] == (char)0xC0 && Magic[3] == (char)0xDE) + return file_magic::bitcode; break; case '!': - if (length >= 8) - if (memcmp(magic,"!\n",8) == 0) - return Archive_FileType; + if (Magic.size() >= 8) + if (memcmp(Magic.data(),"!\n",8) == 0) + return file_magic::archive; break; case '\177': - if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') { - bool Data2MSB = magic[5] == 2; + if (Magic.size() >= 18 && Magic[1] == 'E' && Magic[2] == 'L' && + Magic[3] == 'F') { + bool Data2MSB = Magic[5] == 2; unsigned high = Data2MSB ? 16 : 17; unsigned low = Data2MSB ? 17 : 16; - if (length >= 18 && magic[high] == 0) - switch (magic[low]) { + if (Magic[high] == 0) + switch (Magic[low]) { default: break; - case 1: return ELF_Relocatable_FileType; - case 2: return ELF_Executable_FileType; - case 3: return ELF_SharedObject_FileType; - case 4: return ELF_Core_FileType; + case 1: return file_magic::elf_relocatable; + case 2: return file_magic::elf_executable; + case 3: return file_magic::elf_shared_object; + case 4: return file_magic::elf_core; } } break; case 0xCA: - if (magic[1] == char(0xFE) && magic[2] == char(0xBA) && - magic[3] == char(0xBE)) { + if (Magic[1] == char(0xFE) && Magic[2] == char(0xBA) && + Magic[3] == char(0xBE)) { // This is complicated by an overlap with Java class files. // See the Mach-O section in /usr/share/file/magic for details. - if (length >= 8 && magic[7] < 43) - // FIXME: Universal Binary of any type. - return Mach_O_DynamicallyLinkedSharedLib_FileType; + if (Magic.size() >= 8 && Magic[7] < 43) + return file_magic::macho_universal_binary; } break; @@ -91,29 +911,29 @@ sys::IdentifyFileType(const char *magic, unsigned length) { case 0xCE: case 0xCF: { uint16_t type = 0; - if (magic[0] == char(0xFE) && magic[1] == char(0xED) && - magic[2] == char(0xFA) && - (magic[3] == char(0xCE) || magic[3] == char(0xCF))) { + if (Magic[0] == char(0xFE) && Magic[1] == char(0xED) && + Magic[2] == char(0xFA) && + (Magic[3] == char(0xCE) || Magic[3] == char(0xCF))) { /* Native endian */ - if (length >= 16) type = magic[14] << 8 | magic[15]; - } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) && - magic[1] == char(0xFA) && magic[2] == char(0xED) && - magic[3] == char(0xFE)) { + if (Magic.size() >= 16) type = Magic[14] << 8 | Magic[15]; + } else if ((Magic[0] == char(0xCE) || Magic[0] == char(0xCF)) && + Magic[1] == char(0xFA) && Magic[2] == char(0xED) && + Magic[3] == char(0xFE)) { /* Reverse endian */ - if (length >= 14) type = magic[13] << 8 | magic[12]; + if (Magic.size() >= 14) type = Magic[13] << 8 | Magic[12]; } switch (type) { default: break; - case 1: return Mach_O_Object_FileType; - case 2: return Mach_O_Executable_FileType; - case 3: return Mach_O_FixedVirtualMemorySharedLib_FileType; - case 4: return Mach_O_Core_FileType; - case 5: return Mach_O_PreloadExecutable_FileType; - case 6: return Mach_O_DynamicallyLinkedSharedLib_FileType; - case 7: return Mach_O_DynamicLinker_FileType; - case 8: return Mach_O_Bundle_FileType; - case 9: return Mach_O_DynamicallyLinkedSharedLibStub_FileType; - case 10: return Mach_O_DSYMCompanion_FileType; + case 1: return file_magic::macho_object; + case 2: return file_magic::macho_executable; + case 3: return file_magic::macho_fixed_virtual_memory_shared_lib; + case 4: return file_magic::macho_core; + case 5: return file_magic::macho_preload_executable; + case 6: return file_magic::macho_dynamically_linked_shared_lib; + case 7: return file_magic::macho_dynamic_linker; + case 8: return file_magic::macho_bundle; + case 9: return file_magic::macho_dynamic_linker; + case 10: return file_magic::macho_dsym_companion; } break; } @@ -123,170 +943,94 @@ sys::IdentifyFileType(const char *magic, unsigned length) { case 0x66: // MPS R4000 Windows case 0x50: // mc68K case 0x4c: // 80386 Windows - if (magic[1] == 0x01) - return COFF_FileType; + if (Magic[1] == 0x01) + return file_magic::coff_object; case 0x90: // PA-RISC Windows case 0x68: // mc68K Windows - if (magic[1] == 0x02) - return COFF_FileType; + if (Magic[1] == 0x02) + return file_magic::coff_object; break; case 0x4d: // Possible MS-DOS stub on Windows PE file - if (magic[1] == 0x5a) { - uint32_t off = *reinterpret_cast(magic + 0x3c); + if (Magic[1] == 0x5a) { + uint32_t off = + *reinterpret_cast(Magic.data() + 0x3c); // PE/COFF file, either EXE or DLL. - if (off < length && memcmp(magic + off, "PE\0\0",4) == 0) - return COFF_FileType; + if (off < Magic.size() && memcmp(Magic.data() + off, "PE\0\0",4) == 0) + return file_magic::pecoff_executable; } break; case 0x64: // x86-64 Windows. - if (magic[1] == char(0x86)) - return COFF_FileType; + if (Magic[1] == char(0x86)) + return file_magic::coff_object; break; default: break; } - return Unknown_FileType; + return file_magic::unknown; } -bool -Path::isArchive() const { - fs::file_magic type; - if (fs::identify_magic(str(), type)) - return false; - return type == fs::file_magic::archive; -} +error_code identify_magic(const Twine &path, file_magic &result) { + SmallString<32> Magic; + error_code ec = get_magic(path, Magic.capacity(), Magic); + if (ec && ec != errc::value_too_large) + return ec; -bool -Path::isDynamicLibrary() const { - fs::file_magic type; - if (fs::identify_magic(str(), type)) - return false; - switch (type) { - default: return false; - case fs::file_magic::macho_fixed_virtual_memory_shared_lib: - case fs::file_magic::macho_dynamically_linked_shared_lib: - case fs::file_magic::macho_dynamically_linked_shared_lib_stub: - case fs::file_magic::elf_shared_object: - case fs::file_magic::pecoff_executable: return true; - } + result = identify_magic(Magic); + return error_code::success(); } -bool -Path::isObjectFile() const { - fs::file_magic type; - if (fs::identify_magic(str(), type) || type == fs::file_magic::unknown) - return false; - return true; -} - -Path -Path::FindLibrary(std::string& name) { - std::vector LibPaths; - GetSystemLibraryPaths(LibPaths); - for (unsigned i = 0; i < LibPaths.size(); ++i) { - sys::Path FullPath(LibPaths[i]); - FullPath.appendComponent("lib" + name + LTDL_SHLIB_EXT); - if (FullPath.isDynamicLibrary()) - return FullPath; - FullPath.eraseSuffix(); - FullPath.appendSuffix("a"); - if (FullPath.isArchive()) - return FullPath; +namespace { +error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) { + if (ft == file_type::directory_file) { + // This code would be a lot better with exceptions ;/. + error_code ec; + directory_iterator i(path, ec); + if (ec) return ec; + for (directory_iterator e; i != e; i.increment(ec)) { + if (ec) return ec; + file_status st; + if (error_code ec = i->status(st)) return ec; + if (error_code ec = remove_all_r(i->path(), st.type(), count)) return ec; + } + bool obviously_this_exists; + if (error_code ec = remove(path, obviously_this_exists)) return ec; + assert(obviously_this_exists); + ++count; // Include the directory itself in the items removed. + } else { + bool obviously_this_exists; + if (error_code ec = remove(path, obviously_this_exists)) return ec; + assert(obviously_this_exists); + ++count; } - return sys::Path(); -} -StringRef Path::GetDLLSuffix() { - return &(LTDL_SHLIB_EXT[1]); + return error_code::success(); } +} // end unnamed namespace -void -Path::appendSuffix(StringRef suffix) { - if (!suffix.empty()) { - path.append("."); - path.append(suffix); - } -} - -bool -Path::isBitcodeFile() const { - fs::file_magic type; - if (fs::identify_magic(str(), type)) - return false; - return type == fs::file_magic::bitcode; -} - -bool Path::hasMagicNumber(StringRef Magic) const { - std::string actualMagic; - if (getMagicNumber(actualMagic, static_cast(Magic.size()))) - return Magic == actualMagic; - return false; -} - -static void getPathList(const char*path, std::vector& Paths) { - const char* at = path; - const char* delim = strchr(at, PathSeparator); - Path tmpPath; - while (delim != 0) { - std::string tmp(at, size_t(delim-at)); - if (tmpPath.set(tmp)) - if (tmpPath.canRead()) - Paths.push_back(tmpPath); - at = delim + 1; - delim = strchr(at, PathSeparator); - } +error_code remove_all(const Twine &path, uint32_t &num_removed) { + SmallString<128> path_storage; + StringRef p = path.toStringRef(path_storage); - if (*at != 0) - if (tmpPath.set(std::string(at))) - if (tmpPath.canRead()) - Paths.push_back(tmpPath); + file_status fs; + if (error_code ec = status(path, fs)) + return ec; + num_removed = 0; + return remove_all_r(p, fs.type(), num_removed); } -static StringRef getDirnameCharSep(StringRef path, const char *Sep) { - assert(Sep[0] != '\0' && Sep[1] == '\0' && - "Sep must be a 1-character string literal."); - if (path.empty()) - return "."; - - // If the path is all slashes, return a single slash. - // Otherwise, remove all trailing slashes. - - signed pos = static_cast(path.size()) - 1; - - while (pos >= 0 && path[pos] == Sep[0]) - --pos; - - if (pos < 0) - return path[0] == Sep[0] ? Sep : "."; - - // Any slashes left? - signed i = 0; - - while (i < pos && path[i] != Sep[0]) - ++i; - - if (i == pos) // No slashes? Return "." - return "."; - - // There is at least one slash left. Remove all trailing non-slashes. - while (pos >= 0 && path[pos] != Sep[0]) - --pos; - - // Remove any trailing slashes. - while (pos >= 0 && path[pos] == Sep[0]) - --pos; - - if (pos < 0) - return path[0] == Sep[0] ? Sep : "."; - - return path.substr(0, pos+1); +error_code directory_entry::status(file_status &result) const { + return fs::status(Path, result); } -// Include the truly platform-specific parts of this class. +} // end namespace fs +} // end namespace sys +} // end namespace llvm + +// Include the truly platform-specific parts. #if defined(LLVM_ON_UNIX) #include "Unix/Path.inc" #endif diff --git a/contrib/llvm/lib/Support/PathV2.cpp b/contrib/llvm/lib/Support/PathV2.cpp deleted file mode 100644 index ac53a9e9..0000000 --- a/contrib/llvm/lib/Support/PathV2.cpp +++ /dev/null @@ -1,949 +0,0 @@ -//===-- PathV2.cpp - Implement OS Path Concept ------------------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the operating system PathV2 API. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Support/PathV2.h" -#include "llvm/Support/Endian.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FileSystem.h" -#include -#include -#include -#ifdef __APPLE__ -#include -#endif - -namespace { - using llvm::StringRef; - using llvm::sys::path::is_separator; - -#ifdef LLVM_ON_WIN32 - const char *separators = "\\/"; - const char prefered_separator = '\\'; -#else - const char separators = '/'; - const char prefered_separator = '/'; -#endif - - StringRef find_first_component(StringRef path) { - // Look for this first component in the following order. - // * empty (in this case we return an empty string) - // * either C: or {//,\\}net. - // * {/,\} - // * {.,..} - // * {file,directory}name - - if (path.empty()) - return path; - -#ifdef LLVM_ON_WIN32 - // C: - if (path.size() >= 2 && std::isalpha(static_cast(path[0])) && - path[1] == ':') - return path.substr(0, 2); -#endif - - // //net - if ((path.size() > 2) && - is_separator(path[0]) && - path[0] == path[1] && - !is_separator(path[2])) { - // Find the next directory separator. - size_t end = path.find_first_of(separators, 2); - return path.substr(0, end); - } - - // {/,\} - if (is_separator(path[0])) - return path.substr(0, 1); - - if (path.startswith("..")) - return path.substr(0, 2); - - if (path[0] == '.') - return path.substr(0, 1); - - // * {file,directory}name - size_t end = path.find_first_of(separators, 2); - return path.substr(0, end); - } - - size_t filename_pos(StringRef str) { - if (str.size() == 2 && - is_separator(str[0]) && - str[0] == str[1]) - return 0; - - if (str.size() > 0 && is_separator(str[str.size() - 1])) - return str.size() - 1; - - size_t pos = str.find_last_of(separators, str.size() - 1); - -#ifdef LLVM_ON_WIN32 - if (pos == StringRef::npos) - pos = str.find_last_of(':', str.size() - 2); -#endif - - if (pos == StringRef::npos || - (pos == 1 && is_separator(str[0]))) - return 0; - - return pos + 1; - } - - size_t root_dir_start(StringRef str) { - // case "c:/" -#ifdef LLVM_ON_WIN32 - if (str.size() > 2 && - str[1] == ':' && - is_separator(str[2])) - return 2; -#endif - - // case "//" - if (str.size() == 2 && - is_separator(str[0]) && - str[0] == str[1]) - return StringRef::npos; - - // case "//net" - if (str.size() > 3 && - is_separator(str[0]) && - str[0] == str[1] && - !is_separator(str[2])) { - return str.find_first_of(separators, 2); - } - - // case "/" - if (str.size() > 0 && is_separator(str[0])) - return 0; - - return StringRef::npos; - } - - size_t parent_path_end(StringRef path) { - size_t end_pos = filename_pos(path); - - bool filename_was_sep = path.size() > 0 && is_separator(path[end_pos]); - - // Skip separators except for root dir. - size_t root_dir_pos = root_dir_start(path.substr(0, end_pos)); - - while(end_pos > 0 && - (end_pos - 1) != root_dir_pos && - is_separator(path[end_pos - 1])) - --end_pos; - - if (end_pos == 1 && root_dir_pos == 0 && filename_was_sep) - return StringRef::npos; - - return end_pos; - } -} // end unnamed namespace - -namespace llvm { -namespace sys { -namespace path { - -const_iterator begin(StringRef path) { - const_iterator i; - i.Path = path; - i.Component = find_first_component(path); - i.Position = 0; - return i; -} - -const_iterator end(StringRef path) { - const_iterator i; - i.Path = path; - i.Position = path.size(); - return i; -} - -const_iterator &const_iterator::operator++() { - assert(Position < Path.size() && "Tried to increment past end!"); - - // Increment Position to past the current component - Position += Component.size(); - - // Check for end. - if (Position == Path.size()) { - Component = StringRef(); - return *this; - } - - // Both POSIX and Windows treat paths that begin with exactly two separators - // specially. - bool was_net = Component.size() > 2 && - is_separator(Component[0]) && - Component[1] == Component[0] && - !is_separator(Component[2]); - - // Handle separators. - if (is_separator(Path[Position])) { - // Root dir. - if (was_net -#ifdef LLVM_ON_WIN32 - // c:/ - || Component.endswith(":") -#endif - ) { - Component = Path.substr(Position, 1); - return *this; - } - - // Skip extra separators. - while (Position != Path.size() && - is_separator(Path[Position])) { - ++Position; - } - - // Treat trailing '/' as a '.'. - if (Position == Path.size()) { - --Position; - Component = "."; - return *this; - } - } - - // Find next component. - size_t end_pos = Path.find_first_of(separators, Position); - Component = Path.slice(Position, end_pos); - - return *this; -} - -const_iterator &const_iterator::operator--() { - // If we're at the end and the previous char was a '/', return '.'. - if (Position == Path.size() && - Path.size() > 1 && - is_separator(Path[Position - 1]) -#ifdef LLVM_ON_WIN32 - && Path[Position - 2] != ':' -#endif - ) { - --Position; - Component = "."; - return *this; - } - - // Skip separators unless it's the root directory. - size_t root_dir_pos = root_dir_start(Path); - size_t end_pos = Position; - - while(end_pos > 0 && - (end_pos - 1) != root_dir_pos && - is_separator(Path[end_pos - 1])) - --end_pos; - - // Find next separator. - size_t start_pos = filename_pos(Path.substr(0, end_pos)); - Component = Path.slice(start_pos, end_pos); - Position = start_pos; - return *this; -} - -bool const_iterator::operator==(const const_iterator &RHS) const { - return Path.begin() == RHS.Path.begin() && - Position == RHS.Position; -} - -bool const_iterator::operator!=(const const_iterator &RHS) const { - return !(*this == RHS); -} - -ptrdiff_t const_iterator::operator-(const const_iterator &RHS) const { - return Position - RHS.Position; -} - -const StringRef root_path(StringRef path) { - const_iterator b = begin(path), - pos = b, - e = end(path); - if (b != e) { - bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0]; - bool has_drive = -#ifdef LLVM_ON_WIN32 - b->endswith(":"); -#else - false; -#endif - - if (has_net || has_drive) { - if ((++pos != e) && is_separator((*pos)[0])) { - // {C:/,//net/}, so get the first two components. - return path.substr(0, b->size() + pos->size()); - } else { - // just {C:,//net}, return the first component. - return *b; - } - } - - // POSIX style root directory. - if (is_separator((*b)[0])) { - return *b; - } - } - - return StringRef(); -} - -const StringRef root_name(StringRef path) { - const_iterator b = begin(path), - e = end(path); - if (b != e) { - bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0]; - bool has_drive = -#ifdef LLVM_ON_WIN32 - b->endswith(":"); -#else - false; -#endif - - if (has_net || has_drive) { - // just {C:,//net}, return the first component. - return *b; - } - } - - // No path or no name. - return StringRef(); -} - -const StringRef root_directory(StringRef path) { - const_iterator b = begin(path), - pos = b, - e = end(path); - if (b != e) { - bool has_net = b->size() > 2 && is_separator((*b)[0]) && (*b)[1] == (*b)[0]; - bool has_drive = -#ifdef LLVM_ON_WIN32 - b->endswith(":"); -#else - false; -#endif - - if ((has_net || has_drive) && - // {C:,//net}, skip to the next component. - (++pos != e) && is_separator((*pos)[0])) { - return *pos; - } - - // POSIX style root directory. - if (!has_net && is_separator((*b)[0])) { - return *b; - } - } - - // No path or no root. - return StringRef(); -} - -const StringRef relative_path(StringRef path) { - StringRef root = root_path(path); - return path.substr(root.size()); -} - -void append(SmallVectorImpl &path, const Twine &a, - const Twine &b, - const Twine &c, - const Twine &d) { - SmallString<32> a_storage; - SmallString<32> b_storage; - SmallString<32> c_storage; - SmallString<32> d_storage; - - SmallVector components; - if (!a.isTriviallyEmpty()) components.push_back(a.toStringRef(a_storage)); - if (!b.isTriviallyEmpty()) components.push_back(b.toStringRef(b_storage)); - if (!c.isTriviallyEmpty()) components.push_back(c.toStringRef(c_storage)); - if (!d.isTriviallyEmpty()) components.push_back(d.toStringRef(d_storage)); - - for (SmallVectorImpl::const_iterator i = components.begin(), - e = components.end(); - i != e; ++i) { - bool path_has_sep = !path.empty() && is_separator(path[path.size() - 1]); - bool component_has_sep = !i->empty() && is_separator((*i)[0]); - bool is_root_name = has_root_name(*i); - - if (path_has_sep) { - // Strip separators from beginning of component. - size_t loc = i->find_first_not_of(separators); - StringRef c = i->substr(loc); - - // Append it. - path.append(c.begin(), c.end()); - continue; - } - - if (!component_has_sep && !(path.empty() || is_root_name)) { - // Add a separator. - path.push_back(prefered_separator); - } - - path.append(i->begin(), i->end()); - } -} - -void append(SmallVectorImpl &path, - const_iterator begin, const_iterator end) { - for (; begin != end; ++begin) - path::append(path, *begin); -} - -const StringRef parent_path(StringRef path) { - size_t end_pos = parent_path_end(path); - if (end_pos == StringRef::npos) - return StringRef(); - else - return path.substr(0, end_pos); -} - -void remove_filename(SmallVectorImpl &path) { - size_t end_pos = parent_path_end(StringRef(path.begin(), path.size())); - if (end_pos != StringRef::npos) - path.set_size(end_pos); -} - -void replace_extension(SmallVectorImpl &path, const Twine &extension) { - StringRef p(path.begin(), path.size()); - SmallString<32> ext_storage; - StringRef ext = extension.toStringRef(ext_storage); - - // Erase existing extension. - size_t pos = p.find_last_of('.'); - if (pos != StringRef::npos && pos >= filename_pos(p)) - path.set_size(pos); - - // Append '.' if needed. - if (ext.size() > 0 && ext[0] != '.') - path.push_back('.'); - - // Append extension. - path.append(ext.begin(), ext.end()); -} - -void native(const Twine &path, SmallVectorImpl &result) { - // Clear result. - result.clear(); -#ifdef LLVM_ON_WIN32 - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - result.reserve(p.size()); - for (StringRef::const_iterator i = p.begin(), - e = p.end(); - i != e; - ++i) { - if (*i == '/') - result.push_back('\\'); - else - result.push_back(*i); - } -#else - path.toVector(result); -#endif -} - -const StringRef filename(StringRef path) { - return *(--end(path)); -} - -const StringRef stem(StringRef path) { - StringRef fname = filename(path); - size_t pos = fname.find_last_of('.'); - if (pos == StringRef::npos) - return fname; - else - if ((fname.size() == 1 && fname == ".") || - (fname.size() == 2 && fname == "..")) - return fname; - else - return fname.substr(0, pos); -} - -const StringRef extension(StringRef path) { - StringRef fname = filename(path); - size_t pos = fname.find_last_of('.'); - if (pos == StringRef::npos) - return StringRef(); - else - if ((fname.size() == 1 && fname == ".") || - (fname.size() == 2 && fname == "..")) - return StringRef(); - else - return fname.substr(pos); -} - -bool is_separator(char value) { - switch(value) { -#ifdef LLVM_ON_WIN32 - case '\\': // fall through -#endif - case '/': return true; - default: return false; - } -} - -void system_temp_directory(bool erasedOnReboot, SmallVectorImpl &result) { - result.clear(); - -#ifdef __APPLE__ - // On Darwin, use DARWIN_USER_TEMP_DIR or DARWIN_USER_CACHE_DIR. - int ConfName = erasedOnReboot? _CS_DARWIN_USER_TEMP_DIR - : _CS_DARWIN_USER_CACHE_DIR; - size_t ConfLen = confstr(ConfName, 0, 0); - if (ConfLen > 0) { - do { - result.resize(ConfLen); - ConfLen = confstr(ConfName, result.data(), result.size()); - } while (ConfLen > 0 && ConfLen != result.size()); - - if (ConfLen > 0) { - assert(result.back() == 0); - result.pop_back(); - return; - } - - result.clear(); - } -#endif - - // Check whether the temporary directory is specified by an environment - // variable. - const char *EnvironmentVariable; -#ifdef LLVM_ON_WIN32 - EnvironmentVariable = "TEMP"; -#else - EnvironmentVariable = "TMPDIR"; -#endif - if (char *RequestedDir = getenv(EnvironmentVariable)) { - result.append(RequestedDir, RequestedDir + strlen(RequestedDir)); - return; - } - - // Fall back to a system default. - const char *DefaultResult; -#ifdef LLVM_ON_WIN32 - (void)erasedOnReboot; - DefaultResult = "C:\\TEMP"; -#else - if (erasedOnReboot) - DefaultResult = "/tmp"; - else - DefaultResult = "/var/tmp"; -#endif - result.append(DefaultResult, DefaultResult + strlen(DefaultResult)); -} - -bool has_root_name(const Twine &path) { - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - - return !root_name(p).empty(); -} - -bool has_root_directory(const Twine &path) { - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - - return !root_directory(p).empty(); -} - -bool has_root_path(const Twine &path) { - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - - return !root_path(p).empty(); -} - -bool has_relative_path(const Twine &path) { - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - - return !relative_path(p).empty(); -} - -bool has_filename(const Twine &path) { - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - - return !filename(p).empty(); -} - -bool has_parent_path(const Twine &path) { - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - - return !parent_path(p).empty(); -} - -bool has_stem(const Twine &path) { - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - - return !stem(p).empty(); -} - -bool has_extension(const Twine &path) { - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - - return !extension(p).empty(); -} - -bool is_absolute(const Twine &path) { - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - - bool rootDir = has_root_directory(p), -#ifdef LLVM_ON_WIN32 - rootName = has_root_name(p); -#else - rootName = true; -#endif - - return rootDir && rootName; -} - -bool is_relative(const Twine &path) { - return !is_absolute(path); -} - -} // end namespace path - -namespace fs { - -error_code make_absolute(SmallVectorImpl &path) { - StringRef p(path.data(), path.size()); - - bool rootDirectory = path::has_root_directory(p), -#ifdef LLVM_ON_WIN32 - rootName = path::has_root_name(p); -#else - rootName = true; -#endif - - // Already absolute. - if (rootName && rootDirectory) - return error_code::success(); - - // All of the following conditions will need the current directory. - SmallString<128> current_dir; - if (error_code ec = current_path(current_dir)) return ec; - - // Relative path. Prepend the current directory. - if (!rootName && !rootDirectory) { - // Append path to the current directory. - path::append(current_dir, p); - // Set path to the result. - path.swap(current_dir); - return error_code::success(); - } - - if (!rootName && rootDirectory) { - StringRef cdrn = path::root_name(current_dir); - SmallString<128> curDirRootName(cdrn.begin(), cdrn.end()); - path::append(curDirRootName, p); - // Set path to the result. - path.swap(curDirRootName); - return error_code::success(); - } - - if (rootName && !rootDirectory) { - StringRef pRootName = path::root_name(p); - StringRef bRootDirectory = path::root_directory(current_dir); - StringRef bRelativePath = path::relative_path(current_dir); - StringRef pRelativePath = path::relative_path(p); - - SmallString<128> res; - path::append(res, pRootName, bRootDirectory, bRelativePath, pRelativePath); - path.swap(res); - return error_code::success(); - } - - llvm_unreachable("All rootName and rootDirectory combinations should have " - "occurred above!"); -} - -error_code create_directories(const Twine &path, bool &existed) { - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - - StringRef parent = path::parent_path(p); - if (!parent.empty()) { - bool parent_exists; - if (error_code ec = fs::exists(parent, parent_exists)) return ec; - - if (!parent_exists) - if (error_code ec = create_directories(parent, existed)) return ec; - } - - return create_directory(p, existed); -} - -bool exists(file_status status) { - return status_known(status) && status.type() != file_type::file_not_found; -} - -bool status_known(file_status s) { - return s.type() != file_type::status_error; -} - -bool is_directory(file_status status) { - return status.type() == file_type::directory_file; -} - -error_code is_directory(const Twine &path, bool &result) { - file_status st; - if (error_code ec = status(path, st)) - return ec; - result = is_directory(st); - return error_code::success(); -} - -bool is_regular_file(file_status status) { - return status.type() == file_type::regular_file; -} - -error_code is_regular_file(const Twine &path, bool &result) { - file_status st; - if (error_code ec = status(path, st)) - return ec; - result = is_regular_file(st); - return error_code::success(); -} - -bool is_symlink(file_status status) { - return status.type() == file_type::symlink_file; -} - -error_code is_symlink(const Twine &path, bool &result) { - file_status st; - if (error_code ec = status(path, st)) - return ec; - result = is_symlink(st); - return error_code::success(); -} - -bool is_other(file_status status) { - return exists(status) && - !is_regular_file(status) && - !is_directory(status) && - !is_symlink(status); -} - -void directory_entry::replace_filename(const Twine &filename, file_status st) { - SmallString<128> path(Path.begin(), Path.end()); - path::remove_filename(path); - path::append(path, filename); - Path = path.str(); - Status = st; -} - -error_code has_magic(const Twine &path, const Twine &magic, bool &result) { - SmallString<32> MagicStorage; - StringRef Magic = magic.toStringRef(MagicStorage); - SmallString<32> Buffer; - - if (error_code ec = get_magic(path, Magic.size(), Buffer)) { - if (ec == errc::value_too_large) { - // Magic.size() > file_size(Path). - result = false; - return error_code::success(); - } - return ec; - } - - result = Magic == Buffer; - return error_code::success(); -} - -/// @brief Identify the magic in magic. -file_magic identify_magic(StringRef magic) { - if (magic.size() < 4) - return file_magic::unknown; - switch ((unsigned char)magic[0]) { - case 0xDE: // 0x0B17C0DE = BC wraper - if (magic[1] == (char)0xC0 && magic[2] == (char)0x17 && - magic[3] == (char)0x0B) - return file_magic::bitcode; - break; - case 'B': - if (magic[1] == 'C' && magic[2] == (char)0xC0 && magic[3] == (char)0xDE) - return file_magic::bitcode; - break; - case '!': - if (magic.size() >= 8) - if (memcmp(magic.data(),"!\n",8) == 0) - return file_magic::archive; - break; - - case '\177': - if (magic[1] == 'E' && magic[2] == 'L' && magic[3] == 'F') { - bool Data2MSB = magic[5] == 2; - unsigned high = Data2MSB ? 16 : 17; - unsigned low = Data2MSB ? 17 : 16; - if (magic.size() >= 18 && magic[high] == 0) - switch (magic[low]) { - default: break; - case 1: return file_magic::elf_relocatable; - case 2: return file_magic::elf_executable; - case 3: return file_magic::elf_shared_object; - case 4: return file_magic::elf_core; - } - } - break; - - case 0xCA: - if (magic[1] == char(0xFE) && magic[2] == char(0xBA) && - magic[3] == char(0xBE)) { - // This is complicated by an overlap with Java class files. - // See the Mach-O section in /usr/share/file/magic for details. - if (magic.size() >= 8 && magic[7] < 43) - // FIXME: Universal Binary of any type. - return file_magic::macho_dynamically_linked_shared_lib; - } - break; - - // The two magic numbers for mach-o are: - // 0xfeedface - 32-bit mach-o - // 0xfeedfacf - 64-bit mach-o - case 0xFE: - case 0xCE: - case 0xCF: { - uint16_t type = 0; - if (magic[0] == char(0xFE) && magic[1] == char(0xED) && - magic[2] == char(0xFA) && - (magic[3] == char(0xCE) || magic[3] == char(0xCF))) { - /* Native endian */ - if (magic.size() >= 16) type = magic[14] << 8 | magic[15]; - } else if ((magic[0] == char(0xCE) || magic[0] == char(0xCF)) && - magic[1] == char(0xFA) && magic[2] == char(0xED) && - magic[3] == char(0xFE)) { - /* Reverse endian */ - if (magic.size() >= 14) type = magic[13] << 8 | magic[12]; - } - switch (type) { - default: break; - case 1: return file_magic::macho_object; - case 2: return file_magic::macho_executable; - case 3: return file_magic::macho_fixed_virtual_memory_shared_lib; - case 4: return file_magic::macho_core; - case 5: return file_magic::macho_preload_executabl; - case 6: return file_magic::macho_dynamically_linked_shared_lib; - case 7: return file_magic::macho_dynamic_linker; - case 8: return file_magic::macho_bundle; - case 9: return file_magic::macho_dynamic_linker; - case 10: return file_magic::macho_dsym_companion; - } - break; - } - case 0xF0: // PowerPC Windows - case 0x83: // Alpha 32-bit - case 0x84: // Alpha 64-bit - case 0x66: // MPS R4000 Windows - case 0x50: // mc68K - case 0x4c: // 80386 Windows - if (magic[1] == 0x01) - return file_magic::coff_object; - - case 0x90: // PA-RISC Windows - case 0x68: // mc68K Windows - if (magic[1] == 0x02) - return file_magic::coff_object; - break; - - case 0x4d: // Possible MS-DOS stub on Windows PE file - if (magic[1] == 0x5a) { - uint32_t off = - *reinterpret_cast(magic.data() + 0x3c); - // PE/COFF file, either EXE or DLL. - if (off < magic.size() && memcmp(magic.data() + off, "PE\0\0",4) == 0) - return file_magic::pecoff_executable; - } - break; - - case 0x64: // x86-64 Windows. - if (magic[1] == char(0x86)) - return file_magic::coff_object; - break; - - default: - break; - } - return file_magic::unknown; -} - -error_code identify_magic(const Twine &path, file_magic &result) { - SmallString<32> Magic; - error_code ec = get_magic(path, Magic.capacity(), Magic); - if (ec && ec != errc::value_too_large) - return ec; - - result = identify_magic(Magic); - return error_code::success(); -} - -namespace { -error_code remove_all_r(StringRef path, file_type ft, uint32_t &count) { - if (ft == file_type::directory_file) { - // This code would be a lot better with exceptions ;/. - error_code ec; - directory_iterator i(path, ec); - if (ec) return ec; - for (directory_iterator e; i != e; i.increment(ec)) { - if (ec) return ec; - file_status st; - if (error_code ec = i->status(st)) return ec; - if (error_code ec = remove_all_r(i->path(), st.type(), count)) return ec; - } - bool obviously_this_exists; - if (error_code ec = remove(path, obviously_this_exists)) return ec; - assert(obviously_this_exists); - ++count; // Include the directory itself in the items removed. - } else { - bool obviously_this_exists; - if (error_code ec = remove(path, obviously_this_exists)) return ec; - assert(obviously_this_exists); - ++count; - } - - return error_code::success(); -} -} // end unnamed namespace - -error_code remove_all(const Twine &path, uint32_t &num_removed) { - SmallString<128> path_storage; - StringRef p = path.toStringRef(path_storage); - - file_status fs; - if (error_code ec = status(path, fs)) - return ec; - num_removed = 0; - return remove_all_r(p, fs.type(), num_removed); -} - -error_code directory_entry::status(file_status &result) const { - return fs::status(Path, result); -} - -} // end namespace fs -} // end namespace sys -} // end namespace llvm - -// Include the truly platform-specific parts. -#if defined(LLVM_ON_UNIX) -#include "Unix/PathV2.inc" -#endif -#if defined(LLVM_ON_WIN32) -#include "Windows/PathV2.inc" -#endif diff --git a/contrib/llvm/lib/Support/PrettyStackTrace.cpp b/contrib/llvm/lib/Support/PrettyStackTrace.cpp index 23ee5ab..722f4ca 100644 --- a/contrib/llvm/lib/Support/PrettyStackTrace.cpp +++ b/contrib/llvm/lib/Support/PrettyStackTrace.cpp @@ -15,10 +15,12 @@ #include "llvm/Support/PrettyStackTrace.h" #include "llvm/ADT/SmallString.h" #include "llvm/Config/config.h" // Get autoconf configuration settings +#include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Signals.h" #include "llvm/Support/ThreadLocal.h" #include "llvm/Support/Watchdog.h" #include "llvm/Support/raw_ostream.h" +#include "llvm-c/Core.h" #ifdef HAVE_CRASHREPORTERCLIENT_H #include @@ -26,12 +28,7 @@ using namespace llvm; -namespace llvm { - bool DisablePrettyStackTrace = false; -} - -// FIXME: This should be thread local when llvm supports threads. -static sys::ThreadLocal PrettyStackTraceHead; +static ManagedStatic > PrettyStackTraceHead; static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){ unsigned NextID = 0; @@ -49,12 +46,12 @@ static unsigned PrintStack(const PrettyStackTraceEntry *Entry, raw_ostream &OS){ /// PrintCurStackTrace - Print the current stack trace to the specified stream. static void PrintCurStackTrace(raw_ostream &OS) { // Don't print an empty trace. - if (PrettyStackTraceHead.get() == 0) return; + if (PrettyStackTraceHead->get() == 0) return; // If there are pretty stack frames registered, walk and emit them. OS << "Stack dump:\n"; - PrintStack(PrettyStackTraceHead.get(), OS); + PrintStack(PrettyStackTraceHead->get(), OS); OS.flush(); } @@ -102,26 +99,28 @@ static void CrashHandler(void *) { #endif } -static bool RegisterCrashPrinter() { - if (!DisablePrettyStackTrace) - sys::AddSignalHandler(CrashHandler, 0); - return false; -} - PrettyStackTraceEntry::PrettyStackTraceEntry() { - // The first time this is called, we register the crash printer. - static bool HandlerRegistered = RegisterCrashPrinter(); - (void)HandlerRegistered; - // Link ourselves. - NextEntry = PrettyStackTraceHead.get(); - PrettyStackTraceHead.set(this); + NextEntry = PrettyStackTraceHead->get(); + PrettyStackTraceHead->set(this); } PrettyStackTraceEntry::~PrettyStackTraceEntry() { - assert(PrettyStackTraceHead.get() == this && + // Do nothing if PrettyStackTraceHead is uninitialized. This can only happen + // if a shutdown occurred after we created the PrettyStackTraceEntry. That + // does occur in the following idiom: + // + // PrettyStackTraceProgram X(...); + // llvm_shutdown_obj Y; + // + // Without this check, we may end up removing ourselves from the stack trace + // after PrettyStackTraceHead has already been destroyed. + if (!PrettyStackTraceHead.isConstructed()) + return; + + assert(PrettyStackTraceHead->get() == this && "Pretty stack trace entry destruction is out of order"); - PrettyStackTraceHead.set(getNextEntry()); + PrettyStackTraceHead->set(getNextEntry()); } void PrettyStackTraceString::print(raw_ostream &OS) const { @@ -135,3 +134,18 @@ void PrettyStackTraceProgram::print(raw_ostream &OS) const { OS << ArgV[i] << ' '; OS << '\n'; } + +static bool RegisterCrashPrinter() { + sys::AddSignalHandler(CrashHandler, 0); + return false; +} + +void llvm::EnablePrettyStackTrace() { + // The first time this is called, we register the crash printer. + static bool HandlerRegistered = RegisterCrashPrinter(); + (void)HandlerRegistered; +} + +void LLVMEnablePrettyStackTrace() { + EnablePrettyStackTrace(); +} diff --git a/contrib/llvm/lib/Support/Process.cpp b/contrib/llvm/lib/Support/Process.cpp index 2c0d37b..d5168f0 100644 --- a/contrib/llvm/lib/Support/Process.cpp +++ b/contrib/llvm/lib/Support/Process.cpp @@ -80,6 +80,24 @@ TimeValue self_process::get_wall_time() const { #endif +#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m" + +#define ALLCOLORS(FGBG,BOLD) {\ + COLOR(FGBG, "0", BOLD),\ + COLOR(FGBG, "1", BOLD),\ + COLOR(FGBG, "2", BOLD),\ + COLOR(FGBG, "3", BOLD),\ + COLOR(FGBG, "4", BOLD),\ + COLOR(FGBG, "5", BOLD),\ + COLOR(FGBG, "6", BOLD),\ + COLOR(FGBG, "7", BOLD)\ + } + +static const char colorcodes[2][2][8][10] = { + { ALLCOLORS("3",""), ALLCOLORS("3","1;") }, + { ALLCOLORS("4",""), ALLCOLORS("4","1;") } +}; + // Include the platform-specific parts of this class. #ifdef LLVM_ON_UNIX #include "Unix/Process.inc" diff --git a/contrib/llvm/lib/Support/Program.cpp b/contrib/llvm/lib/Support/Program.cpp index 201d5c0..83f2ec4 100644 --- a/contrib/llvm/lib/Support/Program.cpp +++ b/contrib/llvm/lib/Support/Program.cpp @@ -22,33 +22,40 @@ using namespace sys; //=== independent code. //===----------------------------------------------------------------------===// -int -Program::ExecuteAndWait(const Path& path, - const char** args, - const char** envp, - const Path** redirects, - unsigned secondsToWait, - unsigned memoryLimit, - std::string* ErrMsg, +static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, + const char **env, const StringRef **Redirects, + unsigned memoryLimit, std::string *ErrMsg); + +int sys::ExecuteAndWait(StringRef Program, const char **args, const char **envp, + const StringRef **redirects, unsigned secondsToWait, + unsigned memoryLimit, std::string *ErrMsg, bool *ExecutionFailed) { - Program prg; - if (prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg)) { - if (ExecutionFailed) *ExecutionFailed = false; - return prg.Wait(path, secondsToWait, ErrMsg); + ProcessInfo PI; + if (Execute(PI, Program, args, envp, redirects, memoryLimit, ErrMsg)) { + if (ExecutionFailed) + *ExecutionFailed = false; + ProcessInfo Result = Wait(PI, secondsToWait, true, ErrMsg); + return Result.ReturnCode; } - if (ExecutionFailed) *ExecutionFailed = true; + + if (ExecutionFailed) + *ExecutionFailed = true; + return -1; } -void -Program::ExecuteNoWait(const Path& path, - const char** args, - const char** envp, - const Path** redirects, - unsigned memoryLimit, - std::string* ErrMsg) { - Program prg; - prg.Execute(path, args, envp, redirects, memoryLimit, ErrMsg); +ProcessInfo sys::ExecuteNoWait(StringRef Program, const char **args, + const char **envp, const StringRef **redirects, + unsigned memoryLimit, std::string *ErrMsg, + bool *ExecutionFailed) { + ProcessInfo PI; + if (ExecutionFailed) + *ExecutionFailed = false; + if (!Execute(PI, Program, args, envp, redirects, memoryLimit, ErrMsg)) + if (ExecutionFailed) + *ExecutionFailed = true; + + return PI; } // Include the platform-specific parts of this class. diff --git a/contrib/llvm/lib/Support/Regex.cpp b/contrib/llvm/lib/Support/Regex.cpp index efc8b90..5413641 100644 --- a/contrib/llvm/lib/Support/Regex.cpp +++ b/contrib/llvm/lib/Support/Regex.cpp @@ -43,7 +43,7 @@ bool Regex::isValid(std::string &Error) { size_t len = llvm_regerror(error, preg, NULL, 0); - Error.resize(len); + Error.resize(len - 1); llvm_regerror(error, preg, &Error[0], len); return false; } @@ -168,3 +168,10 @@ std::string Regex::sub(StringRef Repl, StringRef String, return Res; } + +bool Regex::isLiteralERE(StringRef Str) { + // Check for regex metacharacters. This list was derived from our regex + // implementation in regcomp.c and double checked against the POSIX extended + // regular expression specification. + return Str.find_first_of("()^$|*+?.[]\\{}") == StringRef::npos; +} diff --git a/contrib/llvm/lib/Support/SmallPtrSet.cpp b/contrib/llvm/lib/Support/SmallPtrSet.cpp index f0fed77..dd417b4 100644 --- a/contrib/llvm/lib/Support/SmallPtrSet.cpp +++ b/contrib/llvm/lib/Support/SmallPtrSet.cpp @@ -202,8 +202,13 @@ void SmallPtrSetImpl::CopyFrom(const SmallPtrSetImpl &RHS) { } else if (CurArraySize != RHS.CurArraySize) { if (isSmall()) CurArray = (const void**)malloc(sizeof(void*) * RHS.CurArraySize); - else - CurArray = (const void**)realloc(CurArray, sizeof(void*)*RHS.CurArraySize); + else { + const void **T = (const void**)realloc(CurArray, + sizeof(void*) * RHS.CurArraySize); + if (!T) + free(CurArray); + CurArray = T; + } assert(CurArray && "Failed to allocate memory?"); } diff --git a/contrib/llvm/lib/Support/SourceMgr.cpp b/contrib/llvm/lib/Support/SourceMgr.cpp index fac3cad..d4b94f8 100644 --- a/contrib/llvm/lib/Support/SourceMgr.cpp +++ b/contrib/llvm/lib/Support/SourceMgr.cpp @@ -52,9 +52,9 @@ SourceMgr::~SourceMgr() { /// AddIncludeFile - Search for a file with the specified name in the current /// directory or in one of the IncludeDirs. If no file is found, this returns /// ~0, otherwise it returns the buffer ID of the stacked file. -unsigned SourceMgr::AddIncludeFile(const std::string &Filename, - SMLoc IncludeLoc, - std::string &IncludedFile) { +size_t SourceMgr::AddIncludeFile(const std::string &Filename, + SMLoc IncludeLoc, + std::string &IncludedFile) { OwningPtr NewBuf; IncludedFile = Filename; MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf); @@ -65,7 +65,7 @@ unsigned SourceMgr::AddIncludeFile(const std::string &Filename, MemoryBuffer::getFile(IncludedFile.c_str(), NewBuf); } - if (NewBuf == 0) return ~0U; + if (!NewBuf) return ~0U; return AddNewSourceBuffer(NewBuf.take(), IncludeLoc); } @@ -211,7 +211,8 @@ SMDiagnostic SourceMgr::GetMessage(SMLoc Loc, SourceMgr::DiagKind Kind, LineStr, ColRanges, FixIts); } -void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, +void SourceMgr::PrintMessage(raw_ostream &OS, SMLoc Loc, + SourceMgr::DiagKind Kind, const Twine &Msg, ArrayRef Ranges, ArrayRef FixIts, bool ShowColors) const { SMDiagnostic Diagnostic = GetMessage(Loc, Kind, Msg, Ranges, FixIts); @@ -222,8 +223,6 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, return; } - raw_ostream &OS = errs(); - if (Loc != SMLoc()) { int CurBuf = FindBufferContainingLoc(Loc); assert(CurBuf != -1 && "Invalid or unspecified location!"); @@ -233,6 +232,12 @@ void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, Diagnostic.print(0, OS, ShowColors); } +void SourceMgr::PrintMessage(SMLoc Loc, SourceMgr::DiagKind Kind, + const Twine &Msg, ArrayRef Ranges, + ArrayRef FixIts, bool ShowColors) const { + PrintMessage(llvm::errs(), Loc, Kind, Msg, Ranges, FixIts, ShowColors); +} + //===----------------------------------------------------------------------===// // SMDiagnostic Implementation //===----------------------------------------------------------------------===// @@ -465,7 +470,7 @@ void SMDiagnostic::print(const char *ProgName, raw_ostream &S, if (FixItInsertionLine.empty()) return; - for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i != e; ++i) { + for (size_t i = 0, e = FixItInsertionLine.size(), OutCol = 0; i < e; ++i) { if (i >= LineContents.size() || LineContents[i] != '\t') { S << FixItInsertionLine[i]; ++OutCol; diff --git a/contrib/llvm/lib/Support/StreamableMemoryObject.cpp b/contrib/llvm/lib/Support/StreamableMemoryObject.cpp index 59e27a2..2ed7c5c 100644 --- a/contrib/llvm/lib/Support/StreamableMemoryObject.cpp +++ b/contrib/llvm/lib/Support/StreamableMemoryObject.cpp @@ -31,8 +31,7 @@ public: virtual int readByte(uint64_t address, uint8_t* ptr) const LLVM_OVERRIDE; virtual int readBytes(uint64_t address, uint64_t size, - uint8_t* buf, - uint64_t* copied) const LLVM_OVERRIDE; + uint8_t *buf) const LLVM_OVERRIDE; virtual const uint8_t *getPointer(uint64_t address, uint64_t size) const LLVM_OVERRIDE; virtual bool isValidAddress(uint64_t address) const LLVM_OVERRIDE { @@ -67,11 +66,9 @@ int RawMemoryObject::readByte(uint64_t address, uint8_t* ptr) const { int RawMemoryObject::readBytes(uint64_t address, uint64_t size, - uint8_t* buf, - uint64_t* copied) const { + uint8_t *buf) const { if (!validAddress(address) || !validAddress(address + size - 1)) return -1; memcpy(buf, (uint8_t *)(uintptr_t)(address + FirstChar), size); - if (copied) *copied = size; return size; } @@ -111,11 +108,9 @@ int StreamingMemoryObject::readByte(uint64_t address, uint8_t* ptr) const { int StreamingMemoryObject::readBytes(uint64_t address, uint64_t size, - uint8_t* buf, - uint64_t* copied) const { + uint8_t *buf) const { if (!fetchToPos(address + size - 1)) return -1; memcpy(buf, &Bytes[address + BytesSkipped], size); - if (copied) *copied = size; return 0; } diff --git a/contrib/llvm/lib/Support/StringRef.cpp b/contrib/llvm/lib/Support/StringRef.cpp index d7a0bfa..bfae754 100644 --- a/contrib/llvm/lib/Support/StringRef.cpp +++ b/contrib/llvm/lib/Support/StringRef.cpp @@ -37,20 +37,39 @@ static bool ascii_isdigit(char x) { return x >= '0' && x <= '9'; } -/// compare_lower - Compare strings, ignoring case. -int StringRef::compare_lower(StringRef RHS) const { - for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { - unsigned char LHC = ascii_tolower(Data[I]); - unsigned char RHC = ascii_tolower(RHS.Data[I]); +// strncasecmp() is not available on non-POSIX systems, so define an +// alternative function here. +static int ascii_strncasecmp(const char *LHS, const char *RHS, size_t Length) { + for (size_t I = 0; I < Length; ++I) { + unsigned char LHC = ascii_tolower(LHS[I]); + unsigned char RHC = ascii_tolower(RHS[I]); if (LHC != RHC) return LHC < RHC ? -1 : 1; } + return 0; +} +/// compare_lower - Compare strings, ignoring case. +int StringRef::compare_lower(StringRef RHS) const { + if (int Res = ascii_strncasecmp(Data, RHS.Data, min(Length, RHS.Length))) + return Res; if (Length == RHS.Length) return 0; return Length < RHS.Length ? -1 : 1; } +/// Check if this string starts with the given \p Prefix, ignoring case. +bool StringRef::startswith_lower(StringRef Prefix) const { + return Length >= Prefix.Length && + ascii_strncasecmp(Data, Prefix.Data, Prefix.Length) == 0; +} + +/// Check if this string ends with the given \p Suffix, ignoring case. +bool StringRef::endswith_lower(StringRef Suffix) const { + return Length >= Suffix.Length && + ascii_strncasecmp(end() - Suffix.Length, Suffix.Data, Suffix.Length) == 0; +} + /// compare_numeric - Compare strings, handle embedded numbers. int StringRef::compare_numeric(StringRef RHS) const { for (size_t I = 0, E = min(Length, RHS.Length); I != E; ++I) { @@ -85,7 +104,7 @@ int StringRef::compare_numeric(StringRef RHS) const { // Compute the edit distance between the two given strings. unsigned StringRef::edit_distance(llvm::StringRef Other, bool AllowReplacements, - unsigned MaxEditDistance) { + unsigned MaxEditDistance) const { return llvm::ComputeEditDistance( llvm::ArrayRef(data(), size()), llvm::ArrayRef(Other.data(), Other.size()), diff --git a/contrib/llvm/lib/Support/StringRefMemoryObject.cpp b/contrib/llvm/lib/Support/StringRefMemoryObject.cpp new file mode 100644 index 0000000..e035ed1 --- /dev/null +++ b/contrib/llvm/lib/Support/StringRefMemoryObject.cpp @@ -0,0 +1,29 @@ +//===- lib/Support/StringRefMemoryObject.cpp --------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/StringRefMemoryObject.h" + +using namespace llvm; + +int StringRefMemoryObject::readByte(uint64_t Addr, uint8_t *Byte) const { + if (Addr >= Base + getExtent() || Addr < Base) + return -1; + *Byte = Bytes[Addr - Base]; + return 0; +} + +int StringRefMemoryObject::readBytes(uint64_t Addr, + uint64_t Size, + uint8_t *Buf) const { + uint64_t Offset = Addr - Base; + if (Addr >= Base + getExtent() || Offset + Size > getExtent() || Addr < Base) + return -1; + memcpy(Buf, Bytes.data() + Offset, Size); + return 0; +} diff --git a/contrib/llvm/lib/Support/SystemUtils.cpp b/contrib/llvm/lib/Support/SystemUtils.cpp index 54b5e97..2036364 100644 --- a/contrib/llvm/lib/Support/SystemUtils.cpp +++ b/contrib/llvm/lib/Support/SystemUtils.cpp @@ -31,25 +31,3 @@ bool llvm::CheckBitcodeOutputToConsole(raw_ostream &stream_to_check, } return false; } - -/// PrependMainExecutablePath - Prepend the path to the program being executed -/// to \p ExeName, given the value of argv[0] and the address of main() -/// itself. This allows us to find another LLVM tool if it is built in the same -/// directory. An empty string is returned on error; note that this function -/// just mainpulates the path and doesn't check for executability. -/// @brief Find a named executable. -sys::Path llvm::PrependMainExecutablePath(const std::string &ExeName, - const char *Argv0, void *MainAddr) { - // Check the directory that the calling program is in. We can do - // this if ProgramPath contains at least one / character, indicating that it - // is a relative path to the executable itself. - sys::Path Result = sys::Path::GetMainExecutable(Argv0, MainAddr); - Result.eraseComponent(); - - if (!Result.isEmpty()) { - Result.appendComponent(ExeName); - Result.appendSuffix(sys::Path::GetEXESuffix()); - } - - return Result; -} diff --git a/contrib/llvm/lib/Support/TargetRegistry.cpp b/contrib/llvm/lib/Support/TargetRegistry.cpp index 9c81327..0c90c17 100644 --- a/contrib/llvm/lib/Support/TargetRegistry.cpp +++ b/contrib/llvm/lib/Support/TargetRegistry.cpp @@ -135,9 +135,9 @@ const Target *TargetRegistry::getClosestTargetForJIT(std::string &Error) { return TheTarget; } -static int TargetArraySortFn(const void *LHS, const void *RHS) { - typedef std::pair pair_ty; - return ((const pair_ty*)LHS)->first.compare(((const pair_ty*)RHS)->first); +static int TargetArraySortFn(const std::pair *LHS, + const std::pair *RHS) { + return LHS->first.compare(RHS->first); } void TargetRegistry::printRegisteredTargetsForVersion() { diff --git a/contrib/llvm/lib/Support/ThreadLocal.cpp b/contrib/llvm/lib/Support/ThreadLocal.cpp index 0587aae..868b6ea 100644 --- a/contrib/llvm/lib/Support/ThreadLocal.cpp +++ b/contrib/llvm/lib/Support/ThreadLocal.cpp @@ -23,7 +23,7 @@ // Define all methods as no-ops if threading is explicitly disabled namespace llvm { using namespace sys; -ThreadLocalImpl::ThreadLocalImpl() { } +ThreadLocalImpl::ThreadLocalImpl() : data() { } ThreadLocalImpl::~ThreadLocalImpl() { } void ThreadLocalImpl::setInstance(const void* d) { typedef int SIZE_TOO_BIG[sizeof(d) <= sizeof(data) ? 1 : -1]; diff --git a/contrib/llvm/lib/Support/Timer.cpp b/contrib/llvm/lib/Support/Timer.cpp index 896d869..100b21e 100644 --- a/contrib/llvm/lib/Support/Timer.cpp +++ b/contrib/llvm/lib/Support/Timer.cpp @@ -66,8 +66,8 @@ raw_ostream *llvm::CreateInfoOutputFile() { // compensate for this, the test-suite Makefiles have code to delete the // info output file before running commands which write to it. std::string Error; - raw_ostream *Result = new raw_fd_ostream(OutputFilename.c_str(), - Error, raw_fd_ostream::F_Append); + raw_ostream *Result = + new raw_fd_ostream(OutputFilename.c_str(), Error, sys::fs::F_Append); if (Error.empty()) return Result; diff --git a/contrib/llvm/lib/Support/ToolOutputFile.cpp b/contrib/llvm/lib/Support/ToolOutputFile.cpp index e7ca927..5c1268a 100644 --- a/contrib/llvm/lib/Support/ToolOutputFile.cpp +++ b/contrib/llvm/lib/Support/ToolOutputFile.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/ToolOutputFile.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Signals.h" using namespace llvm; @@ -19,25 +20,30 @@ tool_output_file::CleanupInstaller::CleanupInstaller(const char *filename) : Filename(filename), Keep(false) { // Arrange for the file to be deleted if the process is killed. if (Filename != "-") - sys::RemoveFileOnSignal(sys::Path(Filename)); + sys::RemoveFileOnSignal(Filename); } tool_output_file::CleanupInstaller::~CleanupInstaller() { // Delete the file if the client hasn't told us not to. - if (!Keep && Filename != "-") - sys::Path(Filename).eraseFromDisk(); + if (!Keep && Filename != "-") { + bool Existed; + sys::fs::remove(Filename, Existed); + } // Ok, the file is successfully written and closed, or deleted. There's no // further need to clean it up on signals. if (Filename != "-") - sys::DontRemoveFileOnSignal(sys::Path(Filename)); + sys::DontRemoveFileOnSignal(Filename); } tool_output_file::tool_output_file(const char *filename, std::string &ErrorInfo, - unsigned Flags) - : Installer(filename), - OS(filename, ErrorInfo, Flags) { + sys::fs::OpenFlags Flags) + : Installer(filename), OS(filename, ErrorInfo, Flags) { // If open fails, no cleanup is needed. if (!ErrorInfo.empty()) Installer.Keep = true; } + +tool_output_file::tool_output_file(const char *Filename, int FD) + : Installer(Filename), OS(FD, true) { +} diff --git a/contrib/llvm/lib/Support/Triple.cpp b/contrib/llvm/lib/Support/Triple.cpp index 412e34c..6c978a0 100644 --- a/contrib/llvm/lib/Support/Triple.cpp +++ b/contrib/llvm/lib/Support/Triple.cpp @@ -28,6 +28,7 @@ const char *Triple::getArchTypeName(ArchType Kind) { case mips64el:return "mips64el"; case msp430: return "msp430"; case ppc64: return "powerpc64"; + case ppc64le: return "powerpc64le"; case ppc: return "powerpc"; case r600: return "r600"; case sparc: return "sparc"; @@ -38,7 +39,6 @@ const char *Triple::getArchTypeName(ArchType Kind) { case x86: return "i386"; case x86_64: return "x86_64"; case xcore: return "xcore"; - case mblaze: return "mblaze"; case nvptx: return "nvptx"; case nvptx64: return "nvptx64"; case le32: return "le32"; @@ -61,10 +61,9 @@ const char *Triple::getArchTypePrefix(ArchType Kind) { case thumb: return "arm"; case ppc64: + case ppc64le: case ppc: return "ppc"; - case mblaze: return "mblaze"; - case mips: case mipsel: case mips64: @@ -104,6 +103,7 @@ const char *Triple::getVendorTypeName(VendorType Kind) { case BGQ: return "bgq"; case Freescale: return "fsl"; case IBM: return "ibm"; + case NVIDIA: return "nvidia"; } llvm_unreachable("Invalid VendorType!"); @@ -135,6 +135,8 @@ const char *Triple::getOSTypeName(OSType Kind) { case CNK: return "cnk"; case Bitrig: return "bitrig"; case AIX: return "aix"; + case CUDA: return "cuda"; + case NVCL: return "nvcl"; } llvm_unreachable("Invalid OSType"); @@ -168,7 +170,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) { .Case("ppc64", ppc64) .Case("ppc32", ppc) .Case("ppc", ppc) - .Case("mblaze", mblaze) + .Case("ppc64le", ppc64le) .Case("r600", r600) .Case("hexagon", hexagon) .Case("sparc", sparc) @@ -198,7 +200,7 @@ const char *Triple::getArchNameForAssembler() { .Case("x86_64", "x86_64") .Case("powerpc", "ppc") .Case("powerpc64", "ppc64") - .Cases("mblaze", "microblaze", "mblaze") + .Case("powerpc64le", "ppc64le") .Case("arm", "arm") .Cases("armv4t", "thumbv4t", "armv4t") .Cases("armv5", "armv5e", "thumbv5", "thumbv5e", "armv5") @@ -219,10 +221,10 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Cases("i386", "i486", "i586", "i686", Triple::x86) // FIXME: Do we need to support these? .Cases("i786", "i886", "i986", Triple::x86) - .Cases("amd64", "x86_64", Triple::x86_64) + .Cases("amd64", "x86_64", "x86_64h", Triple::x86_64) .Case("powerpc", Triple::ppc) .Cases("powerpc64", "ppu", Triple::ppc64) - .Case("mblaze", Triple::mblaze) + .Case("powerpc64le", Triple::ppc64le) .Case("aarch64", Triple::aarch64) .Cases("arm", "xscale", Triple::arm) // FIXME: It would be good to replace these with explicit names for all the @@ -239,7 +241,7 @@ static Triple::ArchType parseArch(StringRef ArchName) { .Case("hexagon", Triple::hexagon) .Case("s390x", Triple::systemz) .Case("sparc", Triple::sparc) - .Case("sparcv9", Triple::sparcv9) + .Cases("sparcv9", "sparc64", Triple::sparcv9) .Case("tce", Triple::tce) .Case("xcore", Triple::xcore) .Case("nvptx", Triple::nvptx) @@ -260,6 +262,7 @@ static Triple::VendorType parseVendor(StringRef VendorName) { .Case("bgq", Triple::BGQ) .Case("fsl", Triple::Freescale) .Case("ibm", Triple::IBM) + .Case("nvidia", Triple::NVIDIA) .Default(Triple::UnknownVendor); } @@ -287,6 +290,8 @@ static Triple::OSType parseOS(StringRef OSName) { .StartsWith("cnk", Triple::CNK) .StartsWith("bitrig", Triple::Bitrig) .StartsWith("aix", Triple::AIX) + .StartsWith("cuda", Triple::CUDA) + .StartsWith("nvcl", Triple::NVCL) .Default(Triple::UnknownOS); } @@ -672,7 +677,6 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::arm: case llvm::Triple::hexagon: case llvm::Triple::le32: - case llvm::Triple::mblaze: case llvm::Triple::mips: case llvm::Triple::mipsel: case llvm::Triple::nvptx: @@ -691,6 +695,7 @@ static unsigned getArchPointerBitWidth(llvm::Triple::ArchType Arch) { case llvm::Triple::mips64el: case llvm::Triple::nvptx64: case llvm::Triple::ppc64: + case llvm::Triple::ppc64le: case llvm::Triple::sparcv9: case llvm::Triple::systemz: case llvm::Triple::x86_64: @@ -719,6 +724,7 @@ Triple Triple::get32BitArchVariant() const { case Triple::aarch64: case Triple::msp430: case Triple::systemz: + case Triple::ppc64le: T.setArch(UnknownArch); break; @@ -727,7 +733,6 @@ Triple Triple::get32BitArchVariant() const { case Triple::arm: case Triple::hexagon: case Triple::le32: - case Triple::mblaze: case Triple::mips: case Triple::mipsel: case Triple::nvptx: @@ -760,7 +765,6 @@ Triple Triple::get64BitArchVariant() const { case Triple::arm: case Triple::hexagon: case Triple::le32: - case Triple::mblaze: case Triple::msp430: case Triple::r600: case Triple::tce: @@ -775,6 +779,7 @@ Triple Triple::get64BitArchVariant() const { case Triple::mips64el: case Triple::nvptx64: case Triple::ppc64: + case Triple::ppc64le: case Triple::sparcv9: case Triple::systemz: case Triple::x86_64: diff --git a/contrib/llvm/lib/Support/Unicode.cpp b/contrib/llvm/lib/Support/Unicode.cpp new file mode 100644 index 0000000..b719bd8 --- /dev/null +++ b/contrib/llvm/lib/Support/Unicode.cpp @@ -0,0 +1,367 @@ +//===- llvm/Support/Unicode.cpp - Unicode character properties -*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements functions that allow querying certain properties of +// Unicode characters. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Support/Unicode.h" +#include "llvm/Support/ConvertUTF.h" +#include "llvm/Support/UnicodeCharRanges.h" + +namespace llvm { +namespace sys { +namespace unicode { + +bool isPrintable(int UCS) { + // Sorted list of non-overlapping intervals of code points that are not + // supposed to be printable. + static const UnicodeCharRange NonPrintableRanges[] = { + { 0x0000, 0x001F }, { 0x007F, 0x009F }, { 0x034F, 0x034F }, + { 0x0378, 0x0379 }, { 0x037F, 0x0383 }, { 0x038B, 0x038B }, + { 0x038D, 0x038D }, { 0x03A2, 0x03A2 }, { 0x0528, 0x0530 }, + { 0x0557, 0x0558 }, { 0x0560, 0x0560 }, { 0x0588, 0x0588 }, + { 0x058B, 0x058E }, { 0x0590, 0x0590 }, { 0x05C8, 0x05CF }, + { 0x05EB, 0x05EF }, { 0x05F5, 0x0605 }, { 0x061C, 0x061D }, + { 0x06DD, 0x06DD }, { 0x070E, 0x070F }, { 0x074B, 0x074C }, + { 0x07B2, 0x07BF }, { 0x07FB, 0x07FF }, { 0x082E, 0x082F }, + { 0x083F, 0x083F }, { 0x085C, 0x085D }, { 0x085F, 0x089F }, + { 0x08A1, 0x08A1 }, { 0x08AD, 0x08E3 }, { 0x08FF, 0x08FF }, + { 0x0978, 0x0978 }, { 0x0980, 0x0980 }, { 0x0984, 0x0984 }, + { 0x098D, 0x098E }, { 0x0991, 0x0992 }, { 0x09A9, 0x09A9 }, + { 0x09B1, 0x09B1 }, { 0x09B3, 0x09B5 }, { 0x09BA, 0x09BB }, + { 0x09C5, 0x09C6 }, { 0x09C9, 0x09CA }, { 0x09CF, 0x09D6 }, + { 0x09D8, 0x09DB }, { 0x09DE, 0x09DE }, { 0x09E4, 0x09E5 }, + { 0x09FC, 0x0A00 }, { 0x0A04, 0x0A04 }, { 0x0A0B, 0x0A0E }, + { 0x0A11, 0x0A12 }, { 0x0A29, 0x0A29 }, { 0x0A31, 0x0A31 }, + { 0x0A34, 0x0A34 }, { 0x0A37, 0x0A37 }, { 0x0A3A, 0x0A3B }, + { 0x0A3D, 0x0A3D }, { 0x0A43, 0x0A46 }, { 0x0A49, 0x0A4A }, + { 0x0A4E, 0x0A50 }, { 0x0A52, 0x0A58 }, { 0x0A5D, 0x0A5D }, + { 0x0A5F, 0x0A65 }, { 0x0A76, 0x0A80 }, { 0x0A84, 0x0A84 }, + { 0x0A8E, 0x0A8E }, { 0x0A92, 0x0A92 }, { 0x0AA9, 0x0AA9 }, + { 0x0AB1, 0x0AB1 }, { 0x0AB4, 0x0AB4 }, { 0x0ABA, 0x0ABB }, + { 0x0AC6, 0x0AC6 }, { 0x0ACA, 0x0ACA }, { 0x0ACE, 0x0ACF }, + { 0x0AD1, 0x0ADF }, { 0x0AE4, 0x0AE5 }, { 0x0AF2, 0x0B00 }, + { 0x0B04, 0x0B04 }, { 0x0B0D, 0x0B0E }, { 0x0B11, 0x0B12 }, + { 0x0B29, 0x0B29 }, { 0x0B31, 0x0B31 }, { 0x0B34, 0x0B34 }, + { 0x0B3A, 0x0B3B }, { 0x0B45, 0x0B46 }, { 0x0B49, 0x0B4A }, + { 0x0B4E, 0x0B55 }, { 0x0B58, 0x0B5B }, { 0x0B5E, 0x0B5E }, + { 0x0B64, 0x0B65 }, { 0x0B78, 0x0B81 }, { 0x0B84, 0x0B84 }, + { 0x0B8B, 0x0B8D }, { 0x0B91, 0x0B91 }, { 0x0B96, 0x0B98 }, + { 0x0B9B, 0x0B9B }, { 0x0B9D, 0x0B9D }, { 0x0BA0, 0x0BA2 }, + { 0x0BA5, 0x0BA7 }, { 0x0BAB, 0x0BAD }, { 0x0BBA, 0x0BBD }, + { 0x0BC3, 0x0BC5 }, { 0x0BC9, 0x0BC9 }, { 0x0BCE, 0x0BCF }, + { 0x0BD1, 0x0BD6 }, { 0x0BD8, 0x0BE5 }, { 0x0BFB, 0x0C00 }, + { 0x0C04, 0x0C04 }, { 0x0C0D, 0x0C0D }, { 0x0C11, 0x0C11 }, + { 0x0C29, 0x0C29 }, { 0x0C34, 0x0C34 }, { 0x0C3A, 0x0C3C }, + { 0x0C45, 0x0C45 }, { 0x0C49, 0x0C49 }, { 0x0C4E, 0x0C54 }, + { 0x0C57, 0x0C57 }, { 0x0C5A, 0x0C5F }, { 0x0C64, 0x0C65 }, + { 0x0C70, 0x0C77 }, { 0x0C80, 0x0C81 }, { 0x0C84, 0x0C84 }, + { 0x0C8D, 0x0C8D }, { 0x0C91, 0x0C91 }, { 0x0CA9, 0x0CA9 }, + { 0x0CB4, 0x0CB4 }, { 0x0CBA, 0x0CBB }, { 0x0CC5, 0x0CC5 }, + { 0x0CC9, 0x0CC9 }, { 0x0CCE, 0x0CD4 }, { 0x0CD7, 0x0CDD }, + { 0x0CDF, 0x0CDF }, { 0x0CE4, 0x0CE5 }, { 0x0CF0, 0x0CF0 }, + { 0x0CF3, 0x0D01 }, { 0x0D04, 0x0D04 }, { 0x0D0D, 0x0D0D }, + { 0x0D11, 0x0D11 }, { 0x0D3B, 0x0D3C }, { 0x0D45, 0x0D45 }, + { 0x0D49, 0x0D49 }, { 0x0D4F, 0x0D56 }, { 0x0D58, 0x0D5F }, + { 0x0D64, 0x0D65 }, { 0x0D76, 0x0D78 }, { 0x0D80, 0x0D81 }, + { 0x0D84, 0x0D84 }, { 0x0D97, 0x0D99 }, { 0x0DB2, 0x0DB2 }, + { 0x0DBC, 0x0DBC }, { 0x0DBE, 0x0DBF }, { 0x0DC7, 0x0DC9 }, + { 0x0DCB, 0x0DCE }, { 0x0DD5, 0x0DD5 }, { 0x0DD7, 0x0DD7 }, + { 0x0DE0, 0x0DF1 }, { 0x0DF5, 0x0E00 }, { 0x0E3B, 0x0E3E }, + { 0x0E5C, 0x0E80 }, { 0x0E83, 0x0E83 }, { 0x0E85, 0x0E86 }, + { 0x0E89, 0x0E89 }, { 0x0E8B, 0x0E8C }, { 0x0E8E, 0x0E93 }, + { 0x0E98, 0x0E98 }, { 0x0EA0, 0x0EA0 }, { 0x0EA4, 0x0EA4 }, + { 0x0EA6, 0x0EA6 }, { 0x0EA8, 0x0EA9 }, { 0x0EAC, 0x0EAC }, + { 0x0EBA, 0x0EBA }, { 0x0EBE, 0x0EBF }, { 0x0EC5, 0x0EC5 }, + { 0x0EC7, 0x0EC7 }, { 0x0ECE, 0x0ECF }, { 0x0EDA, 0x0EDB }, + { 0x0EE0, 0x0EFF }, { 0x0F48, 0x0F48 }, { 0x0F6D, 0x0F70 }, + { 0x0F98, 0x0F98 }, { 0x0FBD, 0x0FBD }, { 0x0FCD, 0x0FCD }, + { 0x0FDB, 0x0FFF }, { 0x10C6, 0x10C6 }, { 0x10C8, 0x10CC }, + { 0x10CE, 0x10CF }, { 0x115F, 0x1160 }, { 0x1249, 0x1249 }, + { 0x124E, 0x124F }, { 0x1257, 0x1257 }, { 0x1259, 0x1259 }, + { 0x125E, 0x125F }, { 0x1289, 0x1289 }, { 0x128E, 0x128F }, + { 0x12B1, 0x12B1 }, { 0x12B6, 0x12B7 }, { 0x12BF, 0x12BF }, + { 0x12C1, 0x12C1 }, { 0x12C6, 0x12C7 }, { 0x12D7, 0x12D7 }, + { 0x1311, 0x1311 }, { 0x1316, 0x1317 }, { 0x135B, 0x135C }, + { 0x137D, 0x137F }, { 0x139A, 0x139F }, { 0x13F5, 0x13FF }, + { 0x169D, 0x169F }, { 0x16F1, 0x16FF }, { 0x170D, 0x170D }, + { 0x1715, 0x171F }, { 0x1737, 0x173F }, { 0x1754, 0x175F }, + { 0x176D, 0x176D }, { 0x1771, 0x1771 }, { 0x1774, 0x177F }, + { 0x17B4, 0x17B5 }, { 0x17DE, 0x17DF }, { 0x17EA, 0x17EF }, + { 0x17FA, 0x17FF }, { 0x180B, 0x180D }, { 0x180F, 0x180F }, + { 0x181A, 0x181F }, { 0x1878, 0x187F }, { 0x18AB, 0x18AF }, + { 0x18F6, 0x18FF }, { 0x191D, 0x191F }, { 0x192C, 0x192F }, + { 0x193C, 0x193F }, { 0x1941, 0x1943 }, { 0x196E, 0x196F }, + { 0x1975, 0x197F }, { 0x19AC, 0x19AF }, { 0x19CA, 0x19CF }, + { 0x19DB, 0x19DD }, { 0x1A1C, 0x1A1D }, { 0x1A5F, 0x1A5F }, + { 0x1A7D, 0x1A7E }, { 0x1A8A, 0x1A8F }, { 0x1A9A, 0x1A9F }, + { 0x1AAE, 0x1AFF }, { 0x1B4C, 0x1B4F }, { 0x1B7D, 0x1B7F }, + { 0x1BF4, 0x1BFB }, { 0x1C38, 0x1C3A }, { 0x1C4A, 0x1C4C }, + { 0x1C80, 0x1CBF }, { 0x1CC8, 0x1CCF }, { 0x1CF7, 0x1CFF }, + { 0x1DE7, 0x1DFB }, { 0x1F16, 0x1F17 }, { 0x1F1E, 0x1F1F }, + { 0x1F46, 0x1F47 }, { 0x1F4E, 0x1F4F }, { 0x1F58, 0x1F58 }, + { 0x1F5A, 0x1F5A }, { 0x1F5C, 0x1F5C }, { 0x1F5E, 0x1F5E }, + { 0x1F7E, 0x1F7F }, { 0x1FB5, 0x1FB5 }, { 0x1FC5, 0x1FC5 }, + { 0x1FD4, 0x1FD5 }, { 0x1FDC, 0x1FDC }, { 0x1FF0, 0x1FF1 }, + { 0x1FF5, 0x1FF5 }, { 0x1FFF, 0x1FFF }, { 0x200B, 0x200F }, + { 0x202A, 0x202E }, { 0x2060, 0x206F }, { 0x2072, 0x2073 }, + { 0x208F, 0x208F }, { 0x209D, 0x209F }, { 0x20BB, 0x20CF }, + { 0x20F1, 0x20FF }, { 0x218A, 0x218F }, { 0x23F4, 0x23FF }, + { 0x2427, 0x243F }, { 0x244B, 0x245F }, { 0x2700, 0x2700 }, + { 0x2B4D, 0x2B4F }, { 0x2B5A, 0x2BFF }, { 0x2C2F, 0x2C2F }, + { 0x2C5F, 0x2C5F }, { 0x2CF4, 0x2CF8 }, { 0x2D26, 0x2D26 }, + { 0x2D28, 0x2D2C }, { 0x2D2E, 0x2D2F }, { 0x2D68, 0x2D6E }, + { 0x2D71, 0x2D7E }, { 0x2D97, 0x2D9F }, { 0x2DA7, 0x2DA7 }, + { 0x2DAF, 0x2DAF }, { 0x2DB7, 0x2DB7 }, { 0x2DBF, 0x2DBF }, + { 0x2DC7, 0x2DC7 }, { 0x2DCF, 0x2DCF }, { 0x2DD7, 0x2DD7 }, + { 0x2DDF, 0x2DDF }, { 0x2E3C, 0x2E7F }, { 0x2E9A, 0x2E9A }, + { 0x2EF4, 0x2EFF }, { 0x2FD6, 0x2FEF }, { 0x2FFC, 0x2FFF }, + { 0x3040, 0x3040 }, { 0x3097, 0x3098 }, { 0x3100, 0x3104 }, + { 0x312E, 0x3130 }, { 0x3164, 0x3164 }, { 0x318F, 0x318F }, + { 0x31BB, 0x31BF }, { 0x31E4, 0x31EF }, { 0x321F, 0x321F }, + { 0x32FF, 0x32FF }, { 0x4DB6, 0x4DBF }, { 0x9FCD, 0x9FFF }, + { 0xA48D, 0xA48F }, { 0xA4C7, 0xA4CF }, { 0xA62C, 0xA63F }, + { 0xA698, 0xA69E }, { 0xA6F8, 0xA6FF }, { 0xA78F, 0xA78F }, + { 0xA794, 0xA79F }, { 0xA7AB, 0xA7F7 }, { 0xA82C, 0xA82F }, + { 0xA83A, 0xA83F }, { 0xA878, 0xA87F }, { 0xA8C5, 0xA8CD }, + { 0xA8DA, 0xA8DF }, { 0xA8FC, 0xA8FF }, { 0xA954, 0xA95E }, + { 0xA97D, 0xA97F }, { 0xA9CE, 0xA9CE }, { 0xA9DA, 0xA9DD }, + { 0xA9E0, 0xA9FF }, { 0xAA37, 0xAA3F }, { 0xAA4E, 0xAA4F }, + { 0xAA5A, 0xAA5B }, { 0xAA7C, 0xAA7F }, { 0xAAC3, 0xAADA }, + { 0xAAF7, 0xAB00 }, { 0xAB07, 0xAB08 }, { 0xAB0F, 0xAB10 }, + { 0xAB17, 0xAB1F }, { 0xAB27, 0xAB27 }, { 0xAB2F, 0xABBF }, + { 0xABEE, 0xABEF }, { 0xABFA, 0xABFF }, { 0xD7A4, 0xD7AF }, + { 0xD7C7, 0xD7CA }, { 0xD7FC, 0xDFFF }, { 0xFA6E, 0xFA6F }, + { 0xFADA, 0xFAFF }, { 0xFB07, 0xFB12 }, { 0xFB18, 0xFB1C }, + { 0xFB37, 0xFB37 }, { 0xFB3D, 0xFB3D }, { 0xFB3F, 0xFB3F }, + { 0xFB42, 0xFB42 }, { 0xFB45, 0xFB45 }, { 0xFBC2, 0xFBD2 }, + { 0xFD40, 0xFD4F }, { 0xFD90, 0xFD91 }, { 0xFDC8, 0xFDEF }, + { 0xFDFE, 0xFE0F }, { 0xFE1A, 0xFE1F }, { 0xFE27, 0xFE2F }, + { 0xFE53, 0xFE53 }, { 0xFE67, 0xFE67 }, { 0xFE6C, 0xFE6F }, + { 0xFE75, 0xFE75 }, { 0xFEFD, 0xFEFF }, { 0xFF00, 0xFF00 }, + { 0xFFA0, 0xFFA0 }, { 0xFFBF, 0xFFC1 }, { 0xFFC8, 0xFFC9 }, + { 0xFFD0, 0xFFD1 }, { 0xFFD8, 0xFFD9 }, { 0xFFDD, 0xFFDF }, + { 0xFFE7, 0xFFE7 }, { 0xFFEF, 0xFFFB }, { 0xFFFE, 0xFFFF }, + { 0x1000C, 0x1000C }, { 0x10027, 0x10027 }, { 0x1003B, 0x1003B }, + { 0x1003E, 0x1003E }, { 0x1004E, 0x1004F }, { 0x1005E, 0x1007F }, + { 0x100FB, 0x100FF }, { 0x10103, 0x10106 }, { 0x10134, 0x10136 }, + { 0x1018B, 0x1018F }, { 0x1019C, 0x101CF }, { 0x101FE, 0x1027F }, + { 0x1029D, 0x1029F }, { 0x102D1, 0x102FF }, { 0x1031F, 0x1031F }, + { 0x10324, 0x1032F }, { 0x1034B, 0x1037F }, { 0x1039E, 0x1039E }, + { 0x103C4, 0x103C7 }, { 0x103D6, 0x103FF }, { 0x1049E, 0x1049F }, + { 0x104AA, 0x107FF }, { 0x10806, 0x10807 }, { 0x10809, 0x10809 }, + { 0x10836, 0x10836 }, { 0x10839, 0x1083B }, { 0x1083D, 0x1083E }, + { 0x10856, 0x10856 }, { 0x10860, 0x108FF }, { 0x1091C, 0x1091E }, + { 0x1093A, 0x1093E }, { 0x10940, 0x1097F }, { 0x109B8, 0x109BD }, + { 0x109C0, 0x109FF }, { 0x10A04, 0x10A04 }, { 0x10A07, 0x10A0B }, + { 0x10A14, 0x10A14 }, { 0x10A18, 0x10A18 }, { 0x10A34, 0x10A37 }, + { 0x10A3B, 0x10A3E }, { 0x10A48, 0x10A4F }, { 0x10A59, 0x10A5F }, + { 0x10A80, 0x10AFF }, { 0x10B36, 0x10B38 }, { 0x10B56, 0x10B57 }, + { 0x10B73, 0x10B77 }, { 0x10B80, 0x10BFF }, { 0x10C49, 0x10E5F }, + { 0x10E7F, 0x10FFF }, { 0x1104E, 0x11051 }, { 0x11070, 0x1107F }, + { 0x110BD, 0x110BD }, { 0x110C2, 0x110CF }, { 0x110E9, 0x110EF }, + { 0x110FA, 0x110FF }, { 0x11135, 0x11135 }, { 0x11144, 0x1117F }, + { 0x111C9, 0x111CF }, { 0x111DA, 0x1167F }, { 0x116B8, 0x116BF }, + { 0x116CA, 0x11FFF }, { 0x1236F, 0x123FF }, { 0x12463, 0x1246F }, + { 0x12474, 0x12FFF }, { 0x1342F, 0x167FF }, { 0x16A39, 0x16EFF }, + { 0x16F45, 0x16F4F }, { 0x16F7F, 0x16F8E }, { 0x16FA0, 0x1AFFF }, + { 0x1B002, 0x1CFFF }, { 0x1D0F6, 0x1D0FF }, { 0x1D127, 0x1D128 }, + { 0x1D173, 0x1D17A }, { 0x1D1DE, 0x1D1FF }, { 0x1D246, 0x1D2FF }, + { 0x1D357, 0x1D35F }, { 0x1D372, 0x1D3FF }, { 0x1D455, 0x1D455 }, + { 0x1D49D, 0x1D49D }, { 0x1D4A0, 0x1D4A1 }, { 0x1D4A3, 0x1D4A4 }, + { 0x1D4A7, 0x1D4A8 }, { 0x1D4AD, 0x1D4AD }, { 0x1D4BA, 0x1D4BA }, + { 0x1D4BC, 0x1D4BC }, { 0x1D4C4, 0x1D4C4 }, { 0x1D506, 0x1D506 }, + { 0x1D50B, 0x1D50C }, { 0x1D515, 0x1D515 }, { 0x1D51D, 0x1D51D }, + { 0x1D53A, 0x1D53A }, { 0x1D53F, 0x1D53F }, { 0x1D545, 0x1D545 }, + { 0x1D547, 0x1D549 }, { 0x1D551, 0x1D551 }, { 0x1D6A6, 0x1D6A7 }, + { 0x1D7CC, 0x1D7CD }, { 0x1D800, 0x1EDFF }, { 0x1EE04, 0x1EE04 }, + { 0x1EE20, 0x1EE20 }, { 0x1EE23, 0x1EE23 }, { 0x1EE25, 0x1EE26 }, + { 0x1EE28, 0x1EE28 }, { 0x1EE33, 0x1EE33 }, { 0x1EE38, 0x1EE38 }, + { 0x1EE3A, 0x1EE3A }, { 0x1EE3C, 0x1EE41 }, { 0x1EE43, 0x1EE46 }, + { 0x1EE48, 0x1EE48 }, { 0x1EE4A, 0x1EE4A }, { 0x1EE4C, 0x1EE4C }, + { 0x1EE50, 0x1EE50 }, { 0x1EE53, 0x1EE53 }, { 0x1EE55, 0x1EE56 }, + { 0x1EE58, 0x1EE58 }, { 0x1EE5A, 0x1EE5A }, { 0x1EE5C, 0x1EE5C }, + { 0x1EE5E, 0x1EE5E }, { 0x1EE60, 0x1EE60 }, { 0x1EE63, 0x1EE63 }, + { 0x1EE65, 0x1EE66 }, { 0x1EE6B, 0x1EE6B }, { 0x1EE73, 0x1EE73 }, + { 0x1EE78, 0x1EE78 }, { 0x1EE7D, 0x1EE7D }, { 0x1EE7F, 0x1EE7F }, + { 0x1EE8A, 0x1EE8A }, { 0x1EE9C, 0x1EEA0 }, { 0x1EEA4, 0x1EEA4 }, + { 0x1EEAA, 0x1EEAA }, { 0x1EEBC, 0x1EEEF }, { 0x1EEF2, 0x1EFFF }, + { 0x1F02C, 0x1F02F }, { 0x1F094, 0x1F09F }, { 0x1F0AF, 0x1F0B0 }, + { 0x1F0BF, 0x1F0C0 }, { 0x1F0D0, 0x1F0D0 }, { 0x1F0E0, 0x1F0FF }, + { 0x1F10B, 0x1F10F }, { 0x1F12F, 0x1F12F }, { 0x1F16C, 0x1F16F }, + { 0x1F19B, 0x1F1E5 }, { 0x1F203, 0x1F20F }, { 0x1F23B, 0x1F23F }, + { 0x1F249, 0x1F24F }, { 0x1F252, 0x1F2FF }, { 0x1F321, 0x1F32F }, + { 0x1F336, 0x1F336 }, { 0x1F37D, 0x1F37F }, { 0x1F394, 0x1F39F }, + { 0x1F3C5, 0x1F3C5 }, { 0x1F3CB, 0x1F3DF }, { 0x1F3F1, 0x1F3FF }, + { 0x1F43F, 0x1F43F }, { 0x1F441, 0x1F441 }, { 0x1F4F8, 0x1F4F8 }, + { 0x1F4FD, 0x1F4FF }, { 0x1F53E, 0x1F53F }, { 0x1F544, 0x1F54F }, + { 0x1F568, 0x1F5FA }, { 0x1F641, 0x1F644 }, { 0x1F650, 0x1F67F }, + { 0x1F6C6, 0x1F6FF }, { 0x1F774, 0x1FFFF }, { 0x2A6D7, 0x2A6FF }, + { 0x2B735, 0x2B73F }, { 0x2B81E, 0x2F7FF }, { 0x2FA1E, 0xF0000 }, + { 0xFFFFE, 0xFFFFF }, { 0x10FFFE, 0x10FFFF } + }; + static const UnicodeCharSet NonPrintables(NonPrintableRanges); + + return UCS >= 0 && UCS <= 0x10FFFF && !NonPrintables.contains(UCS); +} + +/// Gets the number of positions a character is likely to occupy when output +/// on a terminal ("character width"). This depends on the implementation of the +/// terminal, and there's no standard definition of character width. +/// The implementation defines it in a way that is expected to be compatible +/// with a generic Unicode-capable terminal. +/// \return Character width: +/// * ErrorNonPrintableCharacter (-1) for non-printable characters (as +/// identified by isPrintable); +/// * 0 for non-spacing and enclosing combining marks; +/// * 2 for CJK characters excluding halfwidth forms; +/// * 1 for all remaining characters. +static inline int charWidth(int UCS) +{ + if (!isPrintable(UCS)) + return ErrorNonPrintableCharacter; + + // Sorted list of non-spacing and enclosing combining mark intervals as + // defined in "3.6 Combination" of + // http://www.unicode.org/versions/Unicode6.2.0/UnicodeStandard-6.2.pdf + static const UnicodeCharRange CombiningCharacterRanges[] = { + { 0x0300, 0x036F }, { 0x0483, 0x0489 }, { 0x0591, 0x05BD }, + { 0x05BF, 0x05BF }, { 0x05C1, 0x05C2 }, { 0x05C4, 0x05C5 }, + { 0x05C7, 0x05C7 }, { 0x0610, 0x061A }, { 0x064B, 0x065F }, + { 0x0670, 0x0670 }, { 0x06D6, 0x06DC }, { 0x06DF, 0x06E4 }, + { 0x06E7, 0x06E8 }, { 0x06EA, 0x06ED }, { 0x0711, 0x0711 }, + { 0x0730, 0x074A }, { 0x07A6, 0x07B0 }, { 0x07EB, 0x07F3 }, + { 0x0816, 0x0819 }, { 0x081B, 0x0823 }, { 0x0825, 0x0827 }, + { 0x0829, 0x082D }, { 0x0859, 0x085B }, { 0x08E4, 0x08FE }, + { 0x0900, 0x0902 }, { 0x093A, 0x093A }, { 0x093C, 0x093C }, + { 0x0941, 0x0948 }, { 0x094D, 0x094D }, { 0x0951, 0x0957 }, + { 0x0962, 0x0963 }, { 0x0981, 0x0981 }, { 0x09BC, 0x09BC }, + { 0x09C1, 0x09C4 }, { 0x09CD, 0x09CD }, { 0x09E2, 0x09E3 }, + { 0x0A01, 0x0A02 }, { 0x0A3C, 0x0A3C }, { 0x0A41, 0x0A42 }, + { 0x0A47, 0x0A48 }, { 0x0A4B, 0x0A4D }, { 0x0A51, 0x0A51 }, + { 0x0A70, 0x0A71 }, { 0x0A75, 0x0A75 }, { 0x0A81, 0x0A82 }, + { 0x0ABC, 0x0ABC }, { 0x0AC1, 0x0AC5 }, { 0x0AC7, 0x0AC8 }, + { 0x0ACD, 0x0ACD }, { 0x0AE2, 0x0AE3 }, { 0x0B01, 0x0B01 }, + { 0x0B3C, 0x0B3C }, { 0x0B3F, 0x0B3F }, { 0x0B41, 0x0B44 }, + { 0x0B4D, 0x0B4D }, { 0x0B56, 0x0B56 }, { 0x0B62, 0x0B63 }, + { 0x0B82, 0x0B82 }, { 0x0BC0, 0x0BC0 }, { 0x0BCD, 0x0BCD }, + { 0x0C3E, 0x0C40 }, { 0x0C46, 0x0C48 }, { 0x0C4A, 0x0C4D }, + { 0x0C55, 0x0C56 }, { 0x0C62, 0x0C63 }, { 0x0CBC, 0x0CBC }, + { 0x0CBF, 0x0CBF }, { 0x0CC6, 0x0CC6 }, { 0x0CCC, 0x0CCD }, + { 0x0CE2, 0x0CE3 }, { 0x0D41, 0x0D44 }, { 0x0D4D, 0x0D4D }, + { 0x0D62, 0x0D63 }, { 0x0DCA, 0x0DCA }, { 0x0DD2, 0x0DD4 }, + { 0x0DD6, 0x0DD6 }, { 0x0E31, 0x0E31 }, { 0x0E34, 0x0E3A }, + { 0x0E47, 0x0E4E }, { 0x0EB1, 0x0EB1 }, { 0x0EB4, 0x0EB9 }, + { 0x0EBB, 0x0EBC }, { 0x0EC8, 0x0ECD }, { 0x0F18, 0x0F19 }, + { 0x0F35, 0x0F35 }, { 0x0F37, 0x0F37 }, { 0x0F39, 0x0F39 }, + { 0x0F71, 0x0F7E }, { 0x0F80, 0x0F84 }, { 0x0F86, 0x0F87 }, + { 0x0F8D, 0x0F97 }, { 0x0F99, 0x0FBC }, { 0x0FC6, 0x0FC6 }, + { 0x102D, 0x1030 }, { 0x1032, 0x1037 }, { 0x1039, 0x103A }, + { 0x103D, 0x103E }, { 0x1058, 0x1059 }, { 0x105E, 0x1060 }, + { 0x1071, 0x1074 }, { 0x1082, 0x1082 }, { 0x1085, 0x1086 }, + { 0x108D, 0x108D }, { 0x109D, 0x109D }, { 0x135D, 0x135F }, + { 0x1712, 0x1714 }, { 0x1732, 0x1734 }, { 0x1752, 0x1753 }, + { 0x1772, 0x1773 }, { 0x17B4, 0x17B5 }, { 0x17B7, 0x17BD }, + { 0x17C6, 0x17C6 }, { 0x17C9, 0x17D3 }, { 0x17DD, 0x17DD }, + { 0x180B, 0x180D }, { 0x18A9, 0x18A9 }, { 0x1920, 0x1922 }, + { 0x1927, 0x1928 }, { 0x1932, 0x1932 }, { 0x1939, 0x193B }, + { 0x1A17, 0x1A18 }, { 0x1A56, 0x1A56 }, { 0x1A58, 0x1A5E }, + { 0x1A60, 0x1A60 }, { 0x1A62, 0x1A62 }, { 0x1A65, 0x1A6C }, + { 0x1A73, 0x1A7C }, { 0x1A7F, 0x1A7F }, { 0x1B00, 0x1B03 }, + { 0x1B34, 0x1B34 }, { 0x1B36, 0x1B3A }, { 0x1B3C, 0x1B3C }, + { 0x1B42, 0x1B42 }, { 0x1B6B, 0x1B73 }, { 0x1B80, 0x1B81 }, + { 0x1BA2, 0x1BA5 }, { 0x1BA8, 0x1BA9 }, { 0x1BAB, 0x1BAB }, + { 0x1BE6, 0x1BE6 }, { 0x1BE8, 0x1BE9 }, { 0x1BED, 0x1BED }, + { 0x1BEF, 0x1BF1 }, { 0x1C2C, 0x1C33 }, { 0x1C36, 0x1C37 }, + { 0x1CD0, 0x1CD2 }, { 0x1CD4, 0x1CE0 }, { 0x1CE2, 0x1CE8 }, + { 0x1CED, 0x1CED }, { 0x1CF4, 0x1CF4 }, { 0x1DC0, 0x1DE6 }, + { 0x1DFC, 0x1DFF }, { 0x20D0, 0x20F0 }, { 0x2CEF, 0x2CF1 }, + { 0x2D7F, 0x2D7F }, { 0x2DE0, 0x2DFF }, { 0x302A, 0x302D }, + { 0x3099, 0x309A }, { 0xA66F, 0xA672 }, { 0xA674, 0xA67D }, + { 0xA69F, 0xA69F }, { 0xA6F0, 0xA6F1 }, { 0xA802, 0xA802 }, + { 0xA806, 0xA806 }, { 0xA80B, 0xA80B }, { 0xA825, 0xA826 }, + { 0xA8C4, 0xA8C4 }, { 0xA8E0, 0xA8F1 }, { 0xA926, 0xA92D }, + { 0xA947, 0xA951 }, { 0xA980, 0xA982 }, { 0xA9B3, 0xA9B3 }, + { 0xA9B6, 0xA9B9 }, { 0xA9BC, 0xA9BC }, { 0xAA29, 0xAA2E }, + { 0xAA31, 0xAA32 }, { 0xAA35, 0xAA36 }, { 0xAA43, 0xAA43 }, + { 0xAA4C, 0xAA4C }, { 0xAAB0, 0xAAB0 }, { 0xAAB2, 0xAAB4 }, + { 0xAAB7, 0xAAB8 }, { 0xAABE, 0xAABF }, { 0xAAC1, 0xAAC1 }, + { 0xAAEC, 0xAAED }, { 0xAAF6, 0xAAF6 }, { 0xABE5, 0xABE5 }, + { 0xABE8, 0xABE8 }, { 0xABED, 0xABED }, { 0xFB1E, 0xFB1E }, + { 0xFE00, 0xFE0F }, { 0xFE20, 0xFE26 }, { 0x101FD, 0x101FD }, + { 0x10A01, 0x10A03 }, { 0x10A05, 0x10A06 }, { 0x10A0C, 0x10A0F }, + { 0x10A38, 0x10A3A }, { 0x10A3F, 0x10A3F }, { 0x11001, 0x11001 }, + { 0x11038, 0x11046 }, { 0x11080, 0x11081 }, { 0x110B3, 0x110B6 }, + { 0x110B9, 0x110BA }, { 0x11100, 0x11102 }, { 0x11127, 0x1112B }, + { 0x1112D, 0x11134 }, { 0x11180, 0x11181 }, { 0x111B6, 0x111BE }, + { 0x116AB, 0x116AB }, { 0x116AD, 0x116AD }, { 0x116B0, 0x116B5 }, + { 0x116B7, 0x116B7 }, { 0x16F8F, 0x16F92 }, { 0x1D167, 0x1D169 }, + { 0x1D17B, 0x1D182 }, { 0x1D185, 0x1D18B }, { 0x1D1AA, 0x1D1AD }, + { 0x1D242, 0x1D244 }, { 0xE0100, 0xE01EF }, + }; + static const UnicodeCharSet CombiningCharacters(CombiningCharacterRanges); + + if (CombiningCharacters.contains(UCS)) + return 0; + + static const UnicodeCharRange DoubleWidthCharacterRanges[] = { + // Hangul Jamo + { 0x1100, 0x11FF }, + // Deprecated fullwidth angle brackets + { 0x2329, 0x232A }, + // CJK Misc, CJK Unified Ideographs, Yijing Hexagrams, Yi + // excluding U+303F (IDEOGRAPHIC HALF FILL SPACE) + { 0x2E80, 0x303E }, { 0x3040, 0xA4CF }, + // Hangul + { 0xAC00, 0xD7A3 }, { 0xD7B0, 0xD7C6 }, { 0xD7CB, 0xD7FB }, + // CJK Unified Ideographs + { 0xF900, 0xFAFF }, + // Vertical forms + { 0xFE10, 0xFE19 }, + // CJK Compatibility Forms + Small Form Variants + { 0xFE30, 0xFE6F }, + // Fullwidth forms + { 0xFF01, 0xFF60 }, { 0xFFE0, 0xFFE6 }, + // CJK Unified Ideographs + { 0x20000, 0x2A6DF }, { 0x2A700, 0x2B81F }, { 0x2F800, 0x2FA1F } + }; + static const UnicodeCharSet DoubleWidthCharacters(DoubleWidthCharacterRanges); + + if (DoubleWidthCharacters.contains(UCS)) + return 2; + return 1; +} + +int columnWidthUTF8(StringRef Text) { + unsigned ColumnWidth = 0; + unsigned Length; + for (size_t i = 0, e = Text.size(); i < e; i += Length) { + Length = getNumBytesForUTF8(Text[i]); + if (Length <= 0 || i + Length > Text.size()) + return ErrorInvalidUTF8; + UTF32 buf[1]; + const UTF8 *Start = reinterpret_cast(Text.data() + i); + UTF32 *Target = &buf[0]; + if (conversionOK != ConvertUTF8toUTF32(&Start, Start + Length, &Target, + Target + 1, strictConversion)) + return ErrorInvalidUTF8; + int Width = charWidth(buf[0]); + if (Width < 0) + return ErrorNonPrintableCharacter; + ColumnWidth += Width; + } + return ColumnWidth; +} + +} // namespace unicode +} // namespace sys +} // namespace llvm + diff --git a/contrib/llvm/lib/Support/Unix/Memory.inc b/contrib/llvm/lib/Support/Unix/Memory.inc index 2bb9bf1..dcfd76e 100644 --- a/contrib/llvm/lib/Support/Unix/Memory.inc +++ b/contrib/llvm/lib/Support/Unix/Memory.inc @@ -32,7 +32,11 @@ # endif #endif +#ifdef __APPLE__ extern "C" void sys_icache_invalidate(const void *Addr, size_t len); +#else +extern "C" void __clear_cache(void *, void*); +#endif namespace { @@ -267,6 +271,9 @@ bool Memory::setExecutable (MemoryBlock &M, std::string *ErrMsg) { kern_return_t kr = vm_protect(mach_task_self(), (vm_address_t)M.Address, (vm_size_t)M.Size, 0, VM_PROT_READ | VM_PROT_EXECUTE | VM_PROT_COPY); return KERN_SUCCESS == kr; +#elif defined(__arm__) || defined(__aarch64__) + Memory::InvalidateInstructionCache(M.Address, M.Size); + return true; #else return true; #endif diff --git a/contrib/llvm/lib/Support/Unix/Path.inc b/contrib/llvm/lib/Support/Unix/Path.inc index 6a5ebb8..c9dc871 100644 --- a/contrib/llvm/lib/Support/Unix/Path.inc +++ b/contrib/llvm/lib/Support/Unix/Path.inc @@ -1,4 +1,4 @@ -//===- llvm/Support/Unix/Path.cpp - Unix Path Implementation -----*- C++ -*-===// +//===- llvm/Support/Unix/Path.inc - Unix Path Implementation ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// // -// This file implements the Unix specific portion of the Path class. +// This file implements the Unix specific implementation of the Path API. // //===----------------------------------------------------------------------===// @@ -17,6 +17,9 @@ //===----------------------------------------------------------------------===// #include "Unix.h" +#include "llvm/Support/Process.h" +#include +#include #if HAVE_SYS_STAT_H #include #endif @@ -26,15 +29,6 @@ #ifdef HAVE_SYS_MMAN_H #include #endif -#ifdef HAVE_SYS_STAT_H -#include -#endif -#if HAVE_UTIME_H -#include -#endif -#if HAVE_TIME_H -#include -#endif #if HAVE_DIRENT_H # include # define NAMLEN(dirent) strlen((dirent)->d_name) @@ -52,217 +46,143 @@ # endif #endif -#if HAVE_DLFCN_H -#include -#endif - #ifdef __APPLE__ #include #endif +// Both stdio.h and cstdio are included via different pathes and +// stdcxx's cstdio doesn't include stdio.h, so it doesn't #undef the macros +// either. +#undef ferror +#undef feof + // For GNU Hurd -#if defined(__GNU__) && !defined(MAXPATHLEN) -# define MAXPATHLEN 4096 +#if defined(__GNU__) && !defined(PATH_MAX) +# define PATH_MAX 4096 #endif -// Put in a hack for Cygwin which falsely reports that the mkdtemp function -// is available when it is not. -#ifdef __CYGWIN__ -# undef HAVE_MKDTEMP -#endif +using namespace llvm; namespace { -inline bool lastIsSlash(const std::string& path) { - return !path.empty() && path[path.length() - 1] == '/'; -} - -} + /// This class automatically closes the given file descriptor when it goes out + /// of scope. You can take back explicit ownership of the file descriptor by + /// calling take(). The destructor does not verify that close was successful. + /// Therefore, never allow this class to call close on a file descriptor that + /// has been read from or written to. + struct AutoFD { + int FileDescriptor; + + AutoFD(int fd) : FileDescriptor(fd) {} + ~AutoFD() { + if (FileDescriptor >= 0) + ::close(FileDescriptor); + } -namespace llvm { -using namespace sys; + int take() { + int ret = FileDescriptor; + FileDescriptor = -1; + return ret; + } -const char sys::PathSeparator = ':'; + operator int() const {return FileDescriptor;} + }; + + error_code TempDir(SmallVectorImpl &result) { + // FIXME: Don't use TMPDIR if program is SUID or SGID enabled. + const char *dir = 0; + (dir = std::getenv("TMPDIR" )) || + (dir = std::getenv("TMP" )) || + (dir = std::getenv("TEMP" )) || + (dir = std::getenv("TEMPDIR")) || +#ifdef P_tmpdir + (dir = P_tmpdir) || +#endif + (dir = "/tmp"); -StringRef Path::GetEXESuffix() { - return StringRef(); + result.clear(); + StringRef d(dir); + result.append(d.begin(), d.end()); + return error_code::success(); + } } -Path::Path(StringRef p) - : path(p) {} - -Path::Path(const char *StrStart, unsigned StrLen) - : path(StrStart, StrLen) {} - -Path& -Path::operator=(StringRef that) { - path.assign(that.data(), that.size()); - return *this; -} +static error_code createUniqueEntity(const Twine &Model, int &ResultFD, + SmallVectorImpl &ResultPath, + bool MakeAbsolute, unsigned Mode, + FSEntity Type) { + SmallString<128> ModelStorage; + Model.toVector(ModelStorage); + + if (MakeAbsolute) { + // Make model absolute by prepending a temp directory if it's not already. + bool absolute = sys::path::is_absolute(Twine(ModelStorage)); + if (!absolute) { + SmallString<128> TDir; + if (error_code ec = TempDir(TDir)) return ec; + sys::path::append(TDir, Twine(ModelStorage)); + ModelStorage.swap(TDir); + } + } -bool -Path::isValid() const { - // Empty paths are considered invalid here. - // This code doesn't check MAXPATHLEN because there's no need. Nothing in - // LLVM manipulates Paths with fixed-sizes arrays, and if the OS can't - // handle names longer than some limit, it'll report this on demand using - // ENAMETOLONG. - return !path.empty(); -} + // From here on, DO NOT modify model. It may be needed if the randomly chosen + // path already exists. + ResultPath = ModelStorage; + // Null terminate. + ResultPath.push_back(0); + ResultPath.pop_back(); + +retry_random_path: + // Replace '%' with random chars. + for (unsigned i = 0, e = ModelStorage.size(); i != e; ++i) { + if (ModelStorage[i] == '%') + ResultPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15]; + } -bool -Path::isAbsolute(const char *NameStart, unsigned NameLen) { - assert(NameStart); - if (NameLen == 0) - return false; - return NameStart[0] == '/'; -} + // Try to open + create the file. + switch (Type) { + case FS_File: { + int RandomFD = ::open(ResultPath.begin(), O_RDWR | O_CREAT | O_EXCL, Mode); + if (RandomFD == -1) { + int SavedErrno = errno; + // If the file existed, try again, otherwise, error. + if (SavedErrno == errc::file_exists) + goto retry_random_path; + return error_code(SavedErrno, system_category()); + } -bool -Path::isAbsolute() const { - if (path.empty()) - return false; - return path[0] == '/'; -} - -Path -Path::GetRootDirectory() { - Path result; - result.set("/"); - return result; -} - -Path -Path::GetTemporaryDirectory(std::string *ErrMsg) { -#if defined(HAVE_MKDTEMP) - // The best way is with mkdtemp but that's not available on many systems, - // Linux and FreeBSD have it. Others probably won't. - char pathname[] = "/tmp/llvm_XXXXXX"; - if (0 == mkdtemp(pathname)) { - MakeErrMsg(ErrMsg, - std::string(pathname) + ": can't create temporary directory"); - return Path(); - } - return Path(pathname); -#elif defined(HAVE_MKSTEMP) - // If no mkdtemp is available, mkstemp can be used to create a temporary file - // which is then removed and created as a directory. We prefer this over - // mktemp because of mktemp's inherent security and threading risks. We still - // have a slight race condition from the time the temporary file is created to - // the time it is re-created as a directoy. - char pathname[] = "/tmp/llvm_XXXXXX"; - int fd = 0; - if (-1 == (fd = mkstemp(pathname))) { - MakeErrMsg(ErrMsg, - std::string(pathname) + ": can't create temporary directory"); - return Path(); - } - ::close(fd); - ::unlink(pathname); // start race condition, ignore errors - if (-1 == ::mkdir(pathname, S_IRWXU)) { // end race condition - MakeErrMsg(ErrMsg, - std::string(pathname) + ": can't create temporary directory"); - return Path(); - } - return Path(pathname); -#elif defined(HAVE_MKTEMP) - // If a system doesn't have mkdtemp(3) or mkstemp(3) but it does have - // mktemp(3) then we'll assume that system (e.g. AIX) has a reasonable - // implementation of mktemp(3) and doesn't follow BSD 4.3's lead of replacing - // the XXXXXX with the pid of the process and a letter. That leads to only - // twenty six temporary files that can be generated. - char pathname[] = "/tmp/llvm_XXXXXX"; - char *TmpName = ::mktemp(pathname); - if (TmpName == 0) { - MakeErrMsg(ErrMsg, - std::string(TmpName) + ": can't create unique directory name"); - return Path(); - } - if (-1 == ::mkdir(TmpName, S_IRWXU)) { - MakeErrMsg(ErrMsg, - std::string(TmpName) + ": can't create temporary directory"); - return Path(); + ResultFD = RandomFD; + return error_code::success(); } - return Path(TmpName); -#else - // This is the worst case implementation. tempnam(3) leaks memory unless its - // on an SVID2 (or later) system. On BSD 4.3 it leaks. tmpnam(3) has thread - // issues. The mktemp(3) function doesn't have enough variability in the - // temporary name generated. So, we provide our own implementation that - // increments an integer from a random number seeded by the current time. This - // should be sufficiently unique that we don't have many collisions between - // processes. Generally LLVM processes don't run very long and don't use very - // many temporary files so this shouldn't be a big issue for LLVM. - static time_t num = ::time(0); - char pathname[MAXPATHLEN]; - do { - num++; - sprintf(pathname, "/tmp/llvm_%010u", unsigned(num)); - } while ( 0 == access(pathname, F_OK ) ); - if (-1 == ::mkdir(pathname, S_IRWXU)) { - MakeErrMsg(ErrMsg, - std::string(pathname) + ": can't create temporary directory"); - return Path(); - } - return Path(pathname); -#endif -} -void -Path::GetSystemLibraryPaths(std::vector& Paths) { -#ifdef LTDL_SHLIBPATH_VAR - char* env_var = getenv(LTDL_SHLIBPATH_VAR); - if (env_var != 0) { - getPathList(env_var,Paths); - } -#endif - // FIXME: Should this look at LD_LIBRARY_PATH too? - Paths.push_back(sys::Path("/usr/local/lib/")); - Paths.push_back(sys::Path("/usr/X11R6/lib/")); - Paths.push_back(sys::Path("/usr/lib/")); - Paths.push_back(sys::Path("/lib/")); -} - -void -Path::GetBitcodeLibraryPaths(std::vector& Paths) { - char * env_var = getenv("LLVM_LIB_SEARCH_PATH"); - if (env_var != 0) { - getPathList(env_var,Paths); - } -#ifdef LLVM_LIBDIR - { - Path tmpPath; - if (tmpPath.set(LLVM_LIBDIR)) - if (tmpPath.canRead()) - Paths.push_back(tmpPath); + case FS_Name: { + bool Exists; + error_code EC = sys::fs::exists(ResultPath.begin(), Exists); + if (EC) + return EC; + if (Exists) + goto retry_random_path; + return error_code::success(); } -#endif - GetSystemLibraryPaths(Paths); -} -Path -Path::GetUserHomeDirectory() { - const char* home = getenv("HOME"); - Path result; - if (home && result.set(home)) - return result; - result.set("/"); - return result; -} - -Path -Path::GetCurrentDirectory() { - char pathname[MAXPATHLEN]; - if (!getcwd(pathname, MAXPATHLEN)) { - assert(false && "Could not query current working directory."); - return Path(); + case FS_Dir: { + bool Existed; + error_code EC = sys::fs::create_directory(ResultPath.begin(), Existed); + if (EC) + return EC; + if (Existed) + goto retry_random_path; + return error_code::success(); } - - return Path(pathname); + } + llvm_unreachable("Invalid Type"); } +namespace llvm { +namespace sys { +namespace fs { #if defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \ defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) || \ - defined(__linux__) || defined(__CYGWIN__) + defined(__linux__) || defined(__CYGWIN__) || defined(__DragonFly__) static int test_dir(char buf[PATH_MAX], char ret[PATH_MAX], const char *dir, const char *bin) @@ -318,7 +238,7 @@ getprogpath(char ret[PATH_MAX], const char *bin) /// GetMainExecutable - Return the path to the main executable, given the /// value of argv[0] from program startup. -Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { +std::string getMainExecutable(const char *argv0, void *MainAddr) { #if defined(__APPLE__) // On OS X the executable path is saved to the stack by dyld. Reading it // from there is much faster than calling dladdr, especially for large @@ -328,14 +248,15 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { if (_NSGetExecutablePath(exe_path, &size) == 0) { char link_path[MAXPATHLEN]; if (realpath(exe_path, link_path)) - return Path(link_path); + return link_path; } #elif defined(__FreeBSD__) || defined (__NetBSD__) || defined(__Bitrig__) || \ - defined(__OpenBSD__) || defined(__minix) || defined(__FreeBSD_kernel__) + defined(__OpenBSD__) || defined(__minix) || defined(__DragonFly__) || \ + defined(__FreeBSD_kernel__) char exe_path[PATH_MAX]; if (getprogpath(exe_path, argv0) != NULL) - return Path(exe_path); + return exe_path; #elif defined(__linux__) || defined(__CYGWIN__) char exe_path[MAXPATHLEN]; StringRef aPath("/proc/self/exe"); @@ -343,558 +264,534 @@ Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { // /proc is not always mounted under Linux (chroot for example). ssize_t len = readlink(aPath.str().c_str(), exe_path, sizeof(exe_path)); if (len >= 0) - return Path(StringRef(exe_path, len)); + return StringRef(exe_path, len); } else { // Fall back to the classical detection. if (getprogpath(exe_path, argv0) != NULL) - return Path(exe_path); + return exe_path; } #elif defined(HAVE_DLFCN_H) // Use dladdr to get executable path if available. Dl_info DLInfo; int err = dladdr(MainAddr, &DLInfo); if (err == 0) - return Path(); + return ""; // If the filename is a symlink, we need to resolve and return the location of // the actual executable. char link_path[MAXPATHLEN]; if (realpath(DLInfo.dli_fname, link_path)) - return Path(link_path); + return link_path; #else #error GetMainExecutable is not implemented on this host yet. #endif - return Path(); + return ""; } +TimeValue file_status::getLastModificationTime() const { + TimeValue Ret; + Ret.fromEpochTime(fs_st_mtime); + return Ret; +} -StringRef Path::getDirname() const { - return getDirnameCharSep(path, "/"); +UniqueID file_status::getUniqueID() const { + return UniqueID(fs_st_dev, fs_st_ino); } -StringRef -Path::getBasename() const { - // Find the last slash - std::string::size_type slash = path.rfind('/'); - if (slash == std::string::npos) - slash = 0; - else - slash++; +error_code current_path(SmallVectorImpl &result) { + result.clear(); - std::string::size_type dot = path.rfind('.'); - if (dot == std::string::npos || dot < slash) - return StringRef(path).substr(slash); - else - return StringRef(path).substr(slash, dot - slash); -} + const char *pwd = ::getenv("PWD"); + llvm::sys::fs::file_status PWDStatus, DotStatus; + if (pwd && llvm::sys::path::is_absolute(pwd) && + !llvm::sys::fs::status(pwd, PWDStatus) && + !llvm::sys::fs::status(".", DotStatus) && + PWDStatus.getUniqueID() == DotStatus.getUniqueID()) { + result.append(pwd, pwd + strlen(pwd)); + return error_code::success(); + } -StringRef -Path::getSuffix() const { - // Find the last slash - std::string::size_type slash = path.rfind('/'); - if (slash == std::string::npos) - slash = 0; - else - slash++; +#ifdef MAXPATHLEN + result.reserve(MAXPATHLEN); +#else +// For GNU Hurd + result.reserve(1024); +#endif - std::string::size_type dot = path.rfind('.'); - if (dot == std::string::npos || dot < slash) - return StringRef(); - else - return StringRef(path).substr(dot + 1); -} + while (true) { + if (::getcwd(result.data(), result.capacity()) == 0) { + // See if there was a real error. + if (errno != errc::not_enough_memory) + return error_code(errno, system_category()); + // Otherwise there just wasn't enough space. + result.reserve(result.capacity() * 2); + } else + break; + } -bool Path::getMagicNumber(std::string &Magic, unsigned len) const { - assert(len < 1024 && "Request for magic string too long"); - char Buf[1025]; - int fd = ::open(path.c_str(), O_RDONLY); - if (fd < 0) - return false; - ssize_t bytes_read = ::read(fd, Buf, len); - ::close(fd); - if (ssize_t(len) != bytes_read) - return false; - Magic.assign(Buf, len); - return true; + result.set_size(strlen(result.data())); + return error_code::success(); } -bool -Path::exists() const { - return 0 == access(path.c_str(), F_OK ); -} +error_code create_directory(const Twine &path, bool &existed) { + SmallString<128> path_storage; + StringRef p = path.toNullTerminatedStringRef(path_storage); -bool -Path::isDirectory() const { - struct stat buf; - if (0 != stat(path.c_str(), &buf)) - return false; - return ((buf.st_mode & S_IFMT) == S_IFDIR) ? true : false; -} + if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) { + if (errno != errc::file_exists) + return error_code(errno, system_category()); + existed = true; + } else + existed = false; -bool -Path::isSymLink() const { - struct stat buf; - if (0 != lstat(path.c_str(), &buf)) - return false; - return S_ISLNK(buf.st_mode); + return error_code::success(); } +error_code create_hard_link(const Twine &to, const Twine &from) { + // Get arguments. + SmallString<128> from_storage; + SmallString<128> to_storage; + StringRef f = from.toNullTerminatedStringRef(from_storage); + StringRef t = to.toNullTerminatedStringRef(to_storage); -bool -Path::canRead() const { - return 0 == access(path.c_str(), R_OK); -} + if (::link(t.begin(), f.begin()) == -1) + return error_code(errno, system_category()); -bool -Path::canWrite() const { - return 0 == access(path.c_str(), W_OK); + return error_code::success(); } -bool -Path::isRegularFile() const { - // Get the status so we can determine if it's a file or directory - struct stat buf; - - if (0 != stat(path.c_str(), &buf)) - return false; +error_code create_symlink(const Twine &to, const Twine &from) { + // Get arguments. + SmallString<128> from_storage; + SmallString<128> to_storage; + StringRef f = from.toNullTerminatedStringRef(from_storage); + StringRef t = to.toNullTerminatedStringRef(to_storage); - if (S_ISREG(buf.st_mode)) - return true; + if (::symlink(t.begin(), f.begin()) == -1) + return error_code(errno, system_category()); - return false; + return error_code::success(); } -bool -Path::canExecute() const { - if (0 != access(path.c_str(), R_OK | X_OK )) - return false; +error_code remove(const Twine &path, bool &existed) { + SmallString<128> path_storage; + StringRef p = path.toNullTerminatedStringRef(path_storage); + struct stat buf; - if (0 != stat(path.c_str(), &buf)) - return false; - if (!S_ISREG(buf.st_mode)) - return false; - return true; -} + if (stat(p.begin(), &buf) != 0) { + if (errno != errc::no_such_file_or_directory) + return error_code(errno, system_category()); + existed = false; + return error_code::success(); + } -StringRef -Path::getLast() const { - // Find the last slash - size_t pos = path.rfind('/'); + // Note: this check catches strange situations. In all cases, LLVM should + // only be involved in the creation and deletion of regular files. This + // check ensures that what we're trying to erase is a regular file. It + // effectively prevents LLVM from erasing things like /dev/null, any block + // special file, or other things that aren't "regular" files. + if (!S_ISREG(buf.st_mode) && !S_ISDIR(buf.st_mode)) + return make_error_code(errc::operation_not_permitted); - // Handle the corner cases - if (pos == std::string::npos) - return path; + if (::remove(p.begin()) == -1) { + if (errno != errc::no_such_file_or_directory) + return error_code(errno, system_category()); + existed = false; + } else + existed = true; - // If the last character is a slash - if (pos == path.length()-1) { - // Find the second to last slash - size_t pos2 = path.rfind('/', pos-1); - if (pos2 == std::string::npos) - return StringRef(path).substr(0,pos); - else - return StringRef(path).substr(pos2+1,pos-pos2-1); - } - // Return everything after the last slash - return StringRef(path).substr(pos+1); + return error_code::success(); } -const FileStatus * -PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const { - if (!fsIsValid || update) { - struct stat buf; - if (0 != stat(path.c_str(), &buf)) { - MakeErrMsg(ErrStr, path + ": can't get status of file"); - return 0; - } - status.fileSize = buf.st_size; - status.modTime.fromEpochTime(buf.st_mtime); - status.mode = buf.st_mode; - status.user = buf.st_uid; - status.group = buf.st_gid; - status.uniqueID = uint64_t(buf.st_ino); - status.isDir = S_ISDIR(buf.st_mode); - status.isFile = S_ISREG(buf.st_mode); - fsIsValid = true; - } - return &status; -} +error_code rename(const Twine &from, const Twine &to) { + // Get arguments. + SmallString<128> from_storage; + SmallString<128> to_storage; + StringRef f = from.toNullTerminatedStringRef(from_storage); + StringRef t = to.toNullTerminatedStringRef(to_storage); -static bool AddPermissionBits(const Path &File, int bits) { - // Get the umask value from the operating system. We want to use it - // when changing the file's permissions. Since calling umask() sets - // the umask and returns its old value, we must call it a second - // time to reset it to the user's preference. - int mask = umask(0777); // The arg. to umask is arbitrary. - umask(mask); // Restore the umask. + if (::rename(f.begin(), t.begin()) == -1) + return error_code(errno, system_category()); - // Get the file's current mode. - struct stat buf; - if (0 != stat(File.c_str(), &buf)) - return false; - // Change the file to have whichever permissions bits from 'bits' - // that the umask would not disable. - if ((chmod(File.c_str(), (buf.st_mode | (bits & ~mask)))) == -1) - return false; - return true; + return error_code::success(); } -bool Path::makeReadableOnDisk(std::string* ErrMsg) { - if (!AddPermissionBits(*this, 0444)) - return MakeErrMsg(ErrMsg, path + ": can't make file readable"); - return false; -} +error_code resize_file(const Twine &path, uint64_t size) { + SmallString<128> path_storage; + StringRef p = path.toNullTerminatedStringRef(path_storage); -bool Path::makeWriteableOnDisk(std::string* ErrMsg) { - if (!AddPermissionBits(*this, 0222)) - return MakeErrMsg(ErrMsg, path + ": can't make file writable"); - return false; -} + if (::truncate(p.begin(), size) == -1) + return error_code(errno, system_category()); -bool Path::makeExecutableOnDisk(std::string* ErrMsg) { - if (!AddPermissionBits(*this, 0111)) - return MakeErrMsg(ErrMsg, path + ": can't make file executable"); - return false; + return error_code::success(); } -bool -Path::getDirectoryContents(std::set& result, std::string* ErrMsg) const { - DIR* direntries = ::opendir(path.c_str()); - if (direntries == 0) - return MakeErrMsg(ErrMsg, path + ": can't open directory"); - - std::string dirPath = path; - if (!lastIsSlash(dirPath)) - dirPath += '/'; +error_code exists(const Twine &path, bool &result) { + SmallString<128> path_storage; + StringRef p = path.toNullTerminatedStringRef(path_storage); - result.clear(); - struct dirent* de = ::readdir(direntries); - for ( ; de != 0; de = ::readdir(direntries)) { - if (de->d_name[0] != '.') { - Path aPath(dirPath + (const char*)de->d_name); - struct stat st; - if (0 != lstat(aPath.path.c_str(), &st)) { - if (S_ISLNK(st.st_mode)) - continue; // dangling symlink -- ignore - return MakeErrMsg(ErrMsg, - aPath.path + ": can't determine file object type"); - } - result.insert(aPath); - } - } + if (::access(p.begin(), F_OK) == -1) { + if (errno != errc::no_such_file_or_directory) + return error_code(errno, system_category()); + result = false; + } else + result = true; - closedir(direntries); - return false; + return error_code::success(); } -bool -Path::set(StringRef a_path) { - if (a_path.empty()) - return false; - path = a_path; - return true; +bool can_write(const Twine &Path) { + SmallString<128> PathStorage; + StringRef P = Path.toNullTerminatedStringRef(PathStorage); + return 0 == access(P.begin(), W_OK); } -bool -Path::appendComponent(StringRef name) { - if (name.empty()) +bool can_execute(const Twine &Path) { + SmallString<128> PathStorage; + StringRef P = Path.toNullTerminatedStringRef(PathStorage); + + if (0 != access(P.begin(), R_OK | X_OK)) + return false; + struct stat buf; + if (0 != stat(P.begin(), &buf)) + return false; + if (!S_ISREG(buf.st_mode)) return false; - if (!lastIsSlash(path)) - path += '/'; - path += name; return true; } -bool -Path::eraseComponent() { - size_t slashpos = path.rfind('/',path.size()); - if (slashpos == 0 || slashpos == std::string::npos) { - path.erase(); - return true; - } - if (slashpos == path.size() - 1) - slashpos = path.rfind('/',slashpos-1); - if (slashpos == std::string::npos) { - path.erase(); - return true; - } - path.erase(slashpos); - return true; +bool equivalent(file_status A, file_status B) { + assert(status_known(A) && status_known(B)); + return A.fs_st_dev == B.fs_st_dev && + A.fs_st_ino == B.fs_st_ino; } -bool -Path::eraseSuffix() { - size_t dotpos = path.rfind('.',path.size()); - size_t slashpos = path.rfind('/',path.size()); - if (dotpos != std::string::npos) { - if (slashpos == std::string::npos || dotpos > slashpos+1) { - path.erase(dotpos, path.size()-dotpos); - return true; - } - } - return false; +error_code equivalent(const Twine &A, const Twine &B, bool &result) { + file_status fsA, fsB; + if (error_code ec = status(A, fsA)) return ec; + if (error_code ec = status(B, fsB)) return ec; + result = equivalent(fsA, fsB); + return error_code::success(); } -static bool createDirectoryHelper(char* beg, char* end, bool create_parents) { +static error_code fillStatus(int StatRet, const struct stat &Status, + file_status &Result) { + if (StatRet != 0) { + error_code ec(errno, system_category()); + if (ec == errc::no_such_file_or_directory) + Result = file_status(file_type::file_not_found); + else + Result = file_status(file_type::status_error); + return ec; + } - if (access(beg, R_OK | W_OK) == 0) - return false; + file_type Type = file_type::type_unknown; + + if (S_ISDIR(Status.st_mode)) + Type = file_type::directory_file; + else if (S_ISREG(Status.st_mode)) + Type = file_type::regular_file; + else if (S_ISBLK(Status.st_mode)) + Type = file_type::block_file; + else if (S_ISCHR(Status.st_mode)) + Type = file_type::character_file; + else if (S_ISFIFO(Status.st_mode)) + Type = file_type::fifo_file; + else if (S_ISSOCK(Status.st_mode)) + Type = file_type::socket_file; + + perms Perms = static_cast(Status.st_mode); + Result = + file_status(Type, Perms, Status.st_dev, Status.st_ino, Status.st_mtime, + Status.st_uid, Status.st_gid, Status.st_size); + + return error_code::success(); +} + +error_code status(const Twine &Path, file_status &Result) { + SmallString<128> PathStorage; + StringRef P = Path.toNullTerminatedStringRef(PathStorage); + + struct stat Status; + int StatRet = ::stat(P.begin(), &Status); + return fillStatus(StatRet, Status, Result); +} + +error_code status(int FD, file_status &Result) { + struct stat Status; + int StatRet = ::fstat(FD, &Status); + return fillStatus(StatRet, Status, Result); +} + +error_code setLastModificationAndAccessTime(int FD, TimeValue Time) { +#if defined(HAVE_FUTIMENS) + timespec Times[2]; + Times[0].tv_sec = Time.toPosixTime(); + Times[0].tv_nsec = 0; + Times[1] = Times[0]; + if (::futimens(FD, Times)) +#elif defined(HAVE_FUTIMES) + timeval Times[2]; + Times[0].tv_sec = Time.toPosixTime(); + Times[0].tv_usec = 0; + Times[1] = Times[0]; + if (::futimes(FD, Times)) +#else +#error Missing futimes() and futimens() +#endif + return error_code(errno, system_category()); + return error_code::success(); +} + +error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { + AutoFD ScopedFD(FD); + if (!CloseFD) + ScopedFD.take(); + + // Figure out how large the file is. + struct stat FileInfo; + if (fstat(FD, &FileInfo) == -1) + return error_code(errno, system_category()); + uint64_t FileSize = FileInfo.st_size; + + if (Size == 0) + Size = FileSize; + else if (FileSize < Size) { + // We need to grow the file. + if (ftruncate(FD, Size) == -1) + return error_code(errno, system_category()); + } - if (create_parents) { + int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE; + int prot = (Mode == readonly) ? PROT_READ : (PROT_READ | PROT_WRITE); +#ifdef MAP_FILE + flags |= MAP_FILE; +#endif + Mapping = ::mmap(0, Size, prot, flags, FD, Offset); + if (Mapping == MAP_FAILED) + return error_code(errno, system_category()); + return error_code::success(); +} + +mapped_file_region::mapped_file_region(const Twine &path, + mapmode mode, + uint64_t length, + uint64_t offset, + error_code &ec) + : Mode(mode) + , Size(length) + , Mapping() { + // Make sure that the requested size fits within SIZE_T. + if (length > std::numeric_limits::max()) { + ec = make_error_code(errc::invalid_argument); + return; + } - char* c = end; + SmallString<128> path_storage; + StringRef name = path.toNullTerminatedStringRef(path_storage); + int oflags = (mode == readonly) ? O_RDONLY : O_RDWR; + int ofd = ::open(name.begin(), oflags); + if (ofd == -1) { + ec = error_code(errno, system_category()); + return; + } - for (; c != beg; --c) - if (*c == '/') { + ec = init(ofd, true, offset); + if (ec) + Mapping = 0; +} + +mapped_file_region::mapped_file_region(int fd, + bool closefd, + mapmode mode, + uint64_t length, + uint64_t offset, + error_code &ec) + : Mode(mode) + , Size(length) + , Mapping() { + // Make sure that the requested size fits within SIZE_T. + if (length > std::numeric_limits::max()) { + ec = make_error_code(errc::invalid_argument); + return; + } - // Recurse to handling the parent directory. - *c = '\0'; - bool x = createDirectoryHelper(beg, c, create_parents); - *c = '/'; + ec = init(fd, closefd, offset); + if (ec) + Mapping = 0; +} - // Return if we encountered an error. - if (x) - return true; +mapped_file_region::~mapped_file_region() { + if (Mapping) + ::munmap(Mapping, Size); +} - break; - } - } +#if LLVM_HAS_RVALUE_REFERENCES +mapped_file_region::mapped_file_region(mapped_file_region &&other) + : Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) { + other.Mapping = 0; +} +#endif - return mkdir(beg, S_IRWXU | S_IRWXG) != 0; +mapped_file_region::mapmode mapped_file_region::flags() const { + assert(Mapping && "Mapping failed but used anyway!"); + return Mode; } -bool -Path::createDirectoryOnDisk( bool create_parents, std::string* ErrMsg ) { - // Get a writeable copy of the path name - std::string pathname(path); +uint64_t mapped_file_region::size() const { + assert(Mapping && "Mapping failed but used anyway!"); + return Size; +} - // Null-terminate the last component - size_t lastchar = path.length() - 1 ; +char *mapped_file_region::data() const { + assert(Mapping && "Mapping failed but used anyway!"); + assert(Mode != readonly && "Cannot get non const data for readonly mapping!"); + return reinterpret_cast(Mapping); +} - if (pathname[lastchar] != '/') - ++lastchar; +const char *mapped_file_region::const_data() const { + assert(Mapping && "Mapping failed but used anyway!"); + return reinterpret_cast(Mapping); +} - pathname[lastchar] = '\0'; +int mapped_file_region::alignment() { + return process::get_self()->page_size(); +} - if (createDirectoryHelper(&pathname[0], &pathname[lastchar], create_parents)) - return MakeErrMsg(ErrMsg, pathname + ": can't create directory"); +error_code detail::directory_iterator_construct(detail::DirIterState &it, + StringRef path){ + SmallString<128> path_null(path); + DIR *directory = ::opendir(path_null.c_str()); + if (directory == 0) + return error_code(errno, system_category()); - return false; + it.IterationHandle = reinterpret_cast(directory); + // Add something for replace_filename to replace. + path::append(path_null, "."); + it.CurrentEntry = directory_entry(path_null.str()); + return directory_iterator_increment(it); } -bool -Path::createFileOnDisk(std::string* ErrMsg) { - // Create the file - int fd = ::creat(path.c_str(), S_IRUSR | S_IWUSR); - if (fd < 0) - return MakeErrMsg(ErrMsg, path + ": can't create file"); - ::close(fd); - return false; +error_code detail::directory_iterator_destruct(detail::DirIterState &it) { + if (it.IterationHandle) + ::closedir(reinterpret_cast(it.IterationHandle)); + it.IterationHandle = 0; + it.CurrentEntry = directory_entry(); + return error_code::success(); } -bool -Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) { - // Make this into a unique file name - if (makeUnique( reuse_current, ErrMsg )) - return true; +error_code detail::directory_iterator_increment(detail::DirIterState &it) { + errno = 0; + dirent *cur_dir = ::readdir(reinterpret_cast(it.IterationHandle)); + if (cur_dir == 0 && errno != 0) { + return error_code(errno, system_category()); + } else if (cur_dir != 0) { + StringRef name(cur_dir->d_name, NAMLEN(cur_dir)); + if ((name.size() == 1 && name[0] == '.') || + (name.size() == 2 && name[0] == '.' && name[1] == '.')) + return directory_iterator_increment(it); + it.CurrentEntry.replace_filename(name); + } else + return directory_iterator_destruct(it); - // create the file - int fd = ::open(path.c_str(), O_WRONLY|O_CREAT|O_TRUNC, 0666); - if (fd < 0) - return MakeErrMsg(ErrMsg, path + ": can't create temporary file"); - ::close(fd); - return false; + return error_code::success(); } -bool -Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const { - // Get the status so we can determine if it's a file or directory. - struct stat buf; - if (0 != stat(path.c_str(), &buf)) { - MakeErrMsg(ErrStr, path + ": can't get status of file"); - return true; - } +error_code get_magic(const Twine &path, uint32_t len, + SmallVectorImpl &result) { + SmallString<128> PathStorage; + StringRef Path = path.toNullTerminatedStringRef(PathStorage); + result.set_size(0); - // Note: this check catches strange situations. In all cases, LLVM should - // only be involved in the creation and deletion of regular files. This - // check ensures that what we're trying to erase is a regular file. It - // effectively prevents LLVM from erasing things like /dev/null, any block - // special file, or other things that aren't "regular" files. - if (S_ISREG(buf.st_mode)) { - if (unlink(path.c_str()) != 0) - return MakeErrMsg(ErrStr, path + ": can't destroy file"); - return false; - } + // Open path. + std::FILE *file = std::fopen(Path.data(), "rb"); + if (file == 0) + return error_code(errno, system_category()); - if (!S_ISDIR(buf.st_mode)) { - if (ErrStr) *ErrStr = "not a file or directory"; - return true; - } + // Reserve storage. + result.reserve(len); - if (remove_contents) { - // Recursively descend the directory to remove its contents. - std::string cmd = "/bin/rm -rf " + path; - if (system(cmd.c_str()) != 0) { - MakeErrMsg(ErrStr, path + ": failed to recursively remove directory."); - return true; + // Read magic! + size_t size = std::fread(result.data(), 1, len, file); + if (std::ferror(file) != 0) { + std::fclose(file); + return error_code(errno, system_category()); + } else if (size != len) { + if (std::feof(file) != 0) { + std::fclose(file); + result.set_size(size); + return make_error_code(errc::value_too_large); } - return false; - } - - // Otherwise, try to just remove the one directory. - std::string pathname(path); - size_t lastchar = path.length() - 1; - if (pathname[lastchar] == '/') - pathname[lastchar] = '\0'; - else - pathname[lastchar+1] = '\0'; - - if (rmdir(pathname.c_str()) != 0) - return MakeErrMsg(ErrStr, pathname + ": can't erase directory"); - return false; -} - -bool -Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) { - if (0 != ::rename(path.c_str(), newName.c_str())) - return MakeErrMsg(ErrMsg, std::string("can't rename '") + path + "' as '" + - newName.str() + "'"); - return false; -} - -bool -Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrStr) const { - struct utimbuf utb; - utb.actime = si.modTime.toPosixTime(); - utb.modtime = utb.actime; - if (0 != ::utime(path.c_str(),&utb)) - return MakeErrMsg(ErrStr, path + ": can't set file modification time"); - if (0 != ::chmod(path.c_str(),si.mode)) - return MakeErrMsg(ErrStr, path + ": can't set mode"); - return false; -} - -bool -sys::CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg){ - int inFile = -1; - int outFile = -1; - inFile = ::open(Src.c_str(), O_RDONLY); - if (inFile == -1) - return MakeErrMsg(ErrMsg, Src.str() + - ": can't open source file to copy"); - - outFile = ::open(Dest.c_str(), O_WRONLY|O_CREAT, 0666); - if (outFile == -1) { - ::close(inFile); - return MakeErrMsg(ErrMsg, Dest.str() + - ": can't create destination file for copy"); } - - char Buffer[16*1024]; - while (ssize_t Amt = ::read(inFile, Buffer, 16*1024)) { - if (Amt == -1) { - if (errno != EINTR && errno != EAGAIN) { - ::close(inFile); - ::close(outFile); - return MakeErrMsg(ErrMsg, Src.str()+": can't read source file"); - } - } else { - char *BufPtr = Buffer; - while (Amt) { - ssize_t AmtWritten = ::write(outFile, BufPtr, Amt); - if (AmtWritten == -1) { - if (errno != EINTR && errno != EAGAIN) { - ::close(inFile); - ::close(outFile); - return MakeErrMsg(ErrMsg, Dest.str() + - ": can't write destination file"); - } - } else { - Amt -= AmtWritten; - BufPtr += AmtWritten; - } - } - } + std::fclose(file); + result.set_size(size); + return error_code::success(); +} + +error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, + bool map_writable, void *&result) { + SmallString<128> path_storage; + StringRef name = path.toNullTerminatedStringRef(path_storage); + int oflags = map_writable ? O_RDWR : O_RDONLY; + int ofd = ::open(name.begin(), oflags); + if ( ofd == -1 ) + return error_code(errno, system_category()); + AutoFD fd(ofd); + int flags = map_writable ? MAP_SHARED : MAP_PRIVATE; + int prot = map_writable ? (PROT_READ|PROT_WRITE) : PROT_READ; +#ifdef MAP_FILE + flags |= MAP_FILE; +#endif + result = ::mmap(0, size, prot, flags, fd, file_offset); + if (result == MAP_FAILED) { + return error_code(errno, system_category()); } - ::close(inFile); - ::close(outFile); - return false; -} - -bool -Path::makeUnique(bool reuse_current, std::string* ErrMsg) { - bool Exists; - if (reuse_current && (fs::exists(path, Exists) || !Exists)) - return false; // File doesn't exist already, just use it! - - // Append an XXXXXX pattern to the end of the file for use with mkstemp, - // mktemp or our own implementation. - // This uses std::vector instead of SmallVector to avoid a dependence on - // libSupport. And performance isn't critical here. - std::vector Buf; - Buf.resize(path.size()+8); - char *FNBuffer = &Buf[0]; - path.copy(FNBuffer,path.size()); - bool isdir; - if (!fs::is_directory(path, isdir) && isdir) - strcpy(FNBuffer+path.size(), "/XXXXXX"); - else - strcpy(FNBuffer+path.size(), "-XXXXXX"); + + return error_code::success(); +} -#if defined(HAVE_MKSTEMP) - int TempFD; - if ((TempFD = mkstemp(FNBuffer)) == -1) - return MakeErrMsg(ErrMsg, path + ": can't make unique filename"); +error_code unmap_file_pages(void *base, size_t size) { + if ( ::munmap(base, size) == -1 ) + return error_code(errno, system_category()); + + return error_code::success(); +} - // We don't need to hold the temp file descriptor... we will trust that no one - // will overwrite/delete the file before we can open it again. - close(TempFD); +error_code openFileForRead(const Twine &Name, int &ResultFD) { + SmallString<128> Storage; + StringRef P = Name.toNullTerminatedStringRef(Storage); + while ((ResultFD = open(P.begin(), O_RDONLY)) < 0) { + if (errno != EINTR) + return error_code(errno, system_category()); + } + return error_code::success(); +} - // Save the name - path = FNBuffer; +error_code openFileForWrite(const Twine &Name, int &ResultFD, + sys::fs::OpenFlags Flags, unsigned Mode) { + // Verify that we don't have both "append" and "excl". + assert((!(Flags & sys::fs::F_Excl) || !(Flags & sys::fs::F_Append)) && + "Cannot specify both 'excl' and 'append' file creation flags!"); - // By default mkstemp sets the mode to 0600, so update mode bits now. - AddPermissionBits (*this, 0666); -#elif defined(HAVE_MKTEMP) - // If we don't have mkstemp, use the old and obsolete mktemp function. - if (mktemp(FNBuffer) == 0) - return MakeErrMsg(ErrMsg, path + ": can't make unique filename"); + int OpenFlags = O_WRONLY | O_CREAT; - // Save the name - path = FNBuffer; -#else - // Okay, looks like we have to do it all by our lonesome. - static unsigned FCounter = 0; - // Try to initialize with unique value. - if (FCounter == 0) FCounter = ((unsigned)getpid() & 0xFFFF) << 8; - char* pos = strstr(FNBuffer, "XXXXXX"); - do { - if (++FCounter > 0xFFFFFF) { - return MakeErrMsg(ErrMsg, - path + ": can't make unique filename: too many files"); - } - sprintf(pos, "%06X", FCounter); - path = FNBuffer; - } while (exists()); - // POSSIBLE SECURITY BUG: An attacker can easily guess the name and exploit - // LLVM. -#endif - return false; -} + if (Flags & F_Append) + OpenFlags |= O_APPEND; + else + OpenFlags |= O_TRUNC; -const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) { - int Flags = MAP_PRIVATE; -#ifdef MAP_FILE - Flags |= MAP_FILE; -#endif - void *BasePtr = ::mmap(0, FileSize, PROT_READ, Flags, FD, Offset); - if (BasePtr == MAP_FAILED) - return 0; - return (const char*)BasePtr; -} + if (Flags & F_Excl) + OpenFlags |= O_EXCL; -void Path::UnMapFilePages(const char *BasePtr, size_t FileSize) { - const void *Addr = static_cast(BasePtr); - ::munmap(const_cast(Addr), FileSize); + SmallString<128> Storage; + StringRef P = Name.toNullTerminatedStringRef(Storage); + while ((ResultFD = open(P.begin(), OpenFlags, Mode)) < 0) { + if (errno != EINTR) + return error_code(errno, system_category()); + } + return error_code::success(); } -} // end llvm namespace +} // end namespace fs +} // end namespace sys +} // end namespace llvm diff --git a/contrib/llvm/lib/Support/Unix/PathV2.inc b/contrib/llvm/lib/Support/Unix/PathV2.inc deleted file mode 100644 index 7e0aead..0000000 --- a/contrib/llvm/lib/Support/Unix/PathV2.inc +++ /dev/null @@ -1,693 +0,0 @@ -//===- llvm/Support/Unix/PathV2.cpp - Unix Path Implementation --*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the Unix specific implementation of the PathV2 API. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic UNIX code that -//=== is guaranteed to work on *all* UNIX variants. -//===----------------------------------------------------------------------===// - -#include "Unix.h" -#include "llvm/Support/Process.h" -#if HAVE_SYS_STAT_H -#include -#endif -#if HAVE_FCNTL_H -#include -#endif -#ifdef HAVE_SYS_MMAN_H -#include -#endif -#if HAVE_DIRENT_H -# include -# define NAMLEN(dirent) strlen((dirent)->d_name) -#else -# define dirent direct -# define NAMLEN(dirent) (dirent)->d_namlen -# if HAVE_SYS_NDIR_H -# include -# endif -# if HAVE_SYS_DIR_H -# include -# endif -# if HAVE_NDIR_H -# include -# endif -#endif -#if HAVE_STDIO_H -#include -#endif -#if HAVE_LIMITS_H -#include -#endif - -// Both stdio.h and cstdio are included via different pathes and -// stdcxx's cstdio doesn't include stdio.h, so it doesn't #undef the macros -// either. -#undef ferror -#undef feof - -// For GNU Hurd -#if defined(__GNU__) && !defined(PATH_MAX) -# define PATH_MAX 4096 -#endif - -using namespace llvm; - -namespace { - /// This class automatically closes the given file descriptor when it goes out - /// of scope. You can take back explicit ownership of the file descriptor by - /// calling take(). The destructor does not verify that close was successful. - /// Therefore, never allow this class to call close on a file descriptor that - /// has been read from or written to. - struct AutoFD { - int FileDescriptor; - - AutoFD(int fd) : FileDescriptor(fd) {} - ~AutoFD() { - if (FileDescriptor >= 0) - ::close(FileDescriptor); - } - - int take() { - int ret = FileDescriptor; - FileDescriptor = -1; - return ret; - } - - operator int() const {return FileDescriptor;} - }; - - error_code TempDir(SmallVectorImpl &result) { - // FIXME: Don't use TMPDIR if program is SUID or SGID enabled. - const char *dir = 0; - (dir = std::getenv("TMPDIR" )) || - (dir = std::getenv("TMP" )) || - (dir = std::getenv("TEMP" )) || - (dir = std::getenv("TEMPDIR")) || -#ifdef P_tmpdir - (dir = P_tmpdir) || -#endif - (dir = "/tmp"); - - result.clear(); - StringRef d(dir); - result.append(d.begin(), d.end()); - return error_code::success(); - } -} - -namespace llvm { -namespace sys { -namespace fs { - -error_code current_path(SmallVectorImpl &result) { -#ifdef MAXPATHLEN - result.reserve(MAXPATHLEN); -#else -// For GNU Hurd - result.reserve(1024); -#endif - - while (true) { - if (::getcwd(result.data(), result.capacity()) == 0) { - // See if there was a real error. - if (errno != errc::not_enough_memory) - return error_code(errno, system_category()); - // Otherwise there just wasn't enough space. - result.reserve(result.capacity() * 2); - } else - break; - } - - result.set_size(strlen(result.data())); - return error_code::success(); -} - -error_code copy_file(const Twine &from, const Twine &to, copy_option copt) { - // Get arguments. - SmallString<128> from_storage; - SmallString<128> to_storage; - StringRef f = from.toNullTerminatedStringRef(from_storage); - StringRef t = to.toNullTerminatedStringRef(to_storage); - - const size_t buf_sz = 32768; - char buffer[buf_sz]; - int from_file = -1, to_file = -1; - - // Open from. - if ((from_file = ::open(f.begin(), O_RDONLY)) < 0) - return error_code(errno, system_category()); - AutoFD from_fd(from_file); - - // Stat from. - struct stat from_stat; - if (::stat(f.begin(), &from_stat) != 0) - return error_code(errno, system_category()); - - // Setup to flags. - int to_flags = O_CREAT | O_WRONLY; - if (copt == copy_option::fail_if_exists) - to_flags |= O_EXCL; - - // Open to. - if ((to_file = ::open(t.begin(), to_flags, from_stat.st_mode)) < 0) - return error_code(errno, system_category()); - AutoFD to_fd(to_file); - - // Copy! - ssize_t sz, sz_read = 1, sz_write; - while (sz_read > 0 && - (sz_read = ::read(from_fd, buffer, buf_sz)) > 0) { - // Allow for partial writes - see Advanced Unix Programming (2nd Ed.), - // Marc Rochkind, Addison-Wesley, 2004, page 94 - sz_write = 0; - do { - if ((sz = ::write(to_fd, buffer + sz_write, sz_read - sz_write)) < 0) { - sz_read = sz; // cause read loop termination. - break; // error. - } - sz_write += sz; - } while (sz_write < sz_read); - } - - // After all the file operations above the return value of close actually - // matters. - if (::close(from_fd.take()) < 0) sz_read = -1; - if (::close(to_fd.take()) < 0) sz_read = -1; - - // Check for errors. - if (sz_read < 0) - return error_code(errno, system_category()); - - return error_code::success(); -} - -error_code create_directory(const Twine &path, bool &existed) { - SmallString<128> path_storage; - StringRef p = path.toNullTerminatedStringRef(path_storage); - - if (::mkdir(p.begin(), S_IRWXU | S_IRWXG) == -1) { - if (errno != errc::file_exists) - return error_code(errno, system_category()); - existed = true; - } else - existed = false; - - return error_code::success(); -} - -error_code create_hard_link(const Twine &to, const Twine &from) { - // Get arguments. - SmallString<128> from_storage; - SmallString<128> to_storage; - StringRef f = from.toNullTerminatedStringRef(from_storage); - StringRef t = to.toNullTerminatedStringRef(to_storage); - - if (::link(t.begin(), f.begin()) == -1) - return error_code(errno, system_category()); - - return error_code::success(); -} - -error_code create_symlink(const Twine &to, const Twine &from) { - // Get arguments. - SmallString<128> from_storage; - SmallString<128> to_storage; - StringRef f = from.toNullTerminatedStringRef(from_storage); - StringRef t = to.toNullTerminatedStringRef(to_storage); - - if (::symlink(t.begin(), f.begin()) == -1) - return error_code(errno, system_category()); - - return error_code::success(); -} - -error_code remove(const Twine &path, bool &existed) { - SmallString<128> path_storage; - StringRef p = path.toNullTerminatedStringRef(path_storage); - - if (::remove(p.begin()) == -1) { - if (errno != errc::no_such_file_or_directory) - return error_code(errno, system_category()); - existed = false; - } else - existed = true; - - return error_code::success(); -} - -error_code rename(const Twine &from, const Twine &to) { - // Get arguments. - SmallString<128> from_storage; - SmallString<128> to_storage; - StringRef f = from.toNullTerminatedStringRef(from_storage); - StringRef t = to.toNullTerminatedStringRef(to_storage); - - if (::rename(f.begin(), t.begin()) == -1) { - // If it's a cross device link, copy then delete, otherwise return the error - if (errno == EXDEV) { - if (error_code ec = copy_file(from, to, copy_option::overwrite_if_exists)) - return ec; - bool Existed; - if (error_code ec = remove(from, Existed)) - return ec; - } else - return error_code(errno, system_category()); - } - - return error_code::success(); -} - -error_code resize_file(const Twine &path, uint64_t size) { - SmallString<128> path_storage; - StringRef p = path.toNullTerminatedStringRef(path_storage); - - if (::truncate(p.begin(), size) == -1) - return error_code(errno, system_category()); - - return error_code::success(); -} - -error_code exists(const Twine &path, bool &result) { - SmallString<128> path_storage; - StringRef p = path.toNullTerminatedStringRef(path_storage); - - if (::access(p.begin(), F_OK) == -1) { - if (errno != errc::no_such_file_or_directory) - return error_code(errno, system_category()); - result = false; - } else - result = true; - - return error_code::success(); -} - -bool equivalent(file_status A, file_status B) { - assert(status_known(A) && status_known(B)); - return A.fs_st_dev == B.fs_st_dev && - A.fs_st_ino == B.fs_st_ino; -} - -error_code equivalent(const Twine &A, const Twine &B, bool &result) { - file_status fsA, fsB; - if (error_code ec = status(A, fsA)) return ec; - if (error_code ec = status(B, fsB)) return ec; - result = equivalent(fsA, fsB); - return error_code::success(); -} - -error_code file_size(const Twine &path, uint64_t &result) { - SmallString<128> path_storage; - StringRef p = path.toNullTerminatedStringRef(path_storage); - - struct stat status; - if (::stat(p.begin(), &status) == -1) - return error_code(errno, system_category()); - if (!S_ISREG(status.st_mode)) - return make_error_code(errc::operation_not_permitted); - - result = status.st_size; - return error_code::success(); -} - -error_code status(const Twine &path, file_status &result) { - SmallString<128> path_storage; - StringRef p = path.toNullTerminatedStringRef(path_storage); - - struct stat status; - if (::stat(p.begin(), &status) != 0) { - error_code ec(errno, system_category()); - if (ec == errc::no_such_file_or_directory) - result = file_status(file_type::file_not_found); - else - result = file_status(file_type::status_error); - return ec; - } - - perms prms = static_cast(status.st_mode & perms_mask); - - if (S_ISDIR(status.st_mode)) - result = file_status(file_type::directory_file, prms); - else if (S_ISREG(status.st_mode)) - result = file_status(file_type::regular_file, prms); - else if (S_ISBLK(status.st_mode)) - result = file_status(file_type::block_file, prms); - else if (S_ISCHR(status.st_mode)) - result = file_status(file_type::character_file, prms); - else if (S_ISFIFO(status.st_mode)) - result = file_status(file_type::fifo_file, prms); - else if (S_ISSOCK(status.st_mode)) - result = file_status(file_type::socket_file, prms); - else - result = file_status(file_type::type_unknown, prms); - - result.fs_st_dev = status.st_dev; - result.fs_st_ino = status.st_ino; - - return error_code::success(); -} - -// Modifies permissions on a file. -error_code permissions(const Twine &path, perms prms) { - if ((prms & add_perms) && (prms & remove_perms)) - llvm_unreachable("add_perms and remove_perms are mutually exclusive"); - - // Get current permissions - file_status info; - if (error_code ec = status(path, info)) { - return ec; - } - - // Set updated permissions. - SmallString<128> path_storage; - StringRef p = path.toNullTerminatedStringRef(path_storage); - perms permsToSet; - if (prms & add_perms) { - permsToSet = (info.permissions() | prms) & perms_mask; - } else if (prms & remove_perms) { - permsToSet = (info.permissions() & ~prms) & perms_mask; - } else { - permsToSet = prms & perms_mask; - } - if (::chmod(p.begin(), static_cast(permsToSet))) { - return error_code(errno, system_category()); - } - - return error_code::success(); -} - -// Since this is most often used for temporary files, mode defaults to 0600. -error_code unique_file(const Twine &model, int &result_fd, - SmallVectorImpl &result_path, - bool makeAbsolute, unsigned mode) { - SmallString<128> Model; - model.toVector(Model); - // Null terminate. - Model.c_str(); - - if (makeAbsolute) { - // Make model absolute by prepending a temp directory if it's not already. - bool absolute = path::is_absolute(Twine(Model)); - if (!absolute) { - SmallString<128> TDir; - if (error_code ec = TempDir(TDir)) return ec; - path::append(TDir, Twine(Model)); - Model.swap(TDir); - } - } - - // From here on, DO NOT modify model. It may be needed if the randomly chosen - // path already exists. - SmallString<128> RandomPath = Model; - -retry_random_path: - // Replace '%' with random chars. - for (unsigned i = 0, e = Model.size(); i != e; ++i) { - if (Model[i] == '%') - RandomPath[i] = "0123456789abcdef"[sys::Process::GetRandomNumber() & 15]; - } - - // Make sure we don't fall into an infinite loop by constantly trying - // to create the parent path. - bool TriedToCreateParent = false; - - // Try to open + create the file. -rety_open_create: - int RandomFD = ::open(RandomPath.c_str(), O_RDWR | O_CREAT | O_EXCL, mode); - if (RandomFD == -1) { - int SavedErrno = errno; - // If the file existed, try again, otherwise, error. - if (SavedErrno == errc::file_exists) - goto retry_random_path; - // If path prefix doesn't exist, try to create it. - if (SavedErrno == errc::no_such_file_or_directory && !TriedToCreateParent) { - TriedToCreateParent = true; - StringRef p(RandomPath); - SmallString<64> dir_to_create; - for (path::const_iterator i = path::begin(p), - e = --path::end(p); i != e; ++i) { - path::append(dir_to_create, *i); - bool Exists; - if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec; - if (!Exists) { - // Don't try to create network paths. - if (i->size() > 2 && (*i)[0] == '/' && - (*i)[1] == '/' && - (*i)[2] != '/') - return make_error_code(errc::no_such_file_or_directory); - if (::mkdir(dir_to_create.c_str(), 0700) == -1 && - errno != errc::file_exists) - return error_code(errno, system_category()); - } - } - goto rety_open_create; - } - - return error_code(SavedErrno, system_category()); - } - - // Make the path absolute. - char real_path_buff[PATH_MAX + 1]; - if (realpath(RandomPath.c_str(), real_path_buff) == NULL) { - int error = errno; - ::close(RandomFD); - ::unlink(RandomPath.c_str()); - return error_code(error, system_category()); - } - - result_path.clear(); - StringRef d(real_path_buff); - result_path.append(d.begin(), d.end()); - - result_fd = RandomFD; - return error_code::success(); -} - -error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { - AutoFD ScopedFD(FD); - if (!CloseFD) - ScopedFD.take(); - - // Figure out how large the file is. - struct stat FileInfo; - if (fstat(FD, &FileInfo) == -1) - return error_code(errno, system_category()); - uint64_t FileSize = FileInfo.st_size; - - if (Size == 0) - Size = FileSize; - else if (FileSize < Size) { - // We need to grow the file. - if (ftruncate(FD, Size) == -1) - return error_code(errno, system_category()); - } - - int flags = (Mode == readwrite) ? MAP_SHARED : MAP_PRIVATE; - int prot = (Mode == readonly) ? PROT_READ : (PROT_READ | PROT_WRITE); -#ifdef MAP_FILE - flags |= MAP_FILE; -#endif - Mapping = ::mmap(0, Size, prot, flags, FD, Offset); - if (Mapping == MAP_FAILED) - return error_code(errno, system_category()); - return error_code::success(); -} - -mapped_file_region::mapped_file_region(const Twine &path, - mapmode mode, - uint64_t length, - uint64_t offset, - error_code &ec) - : Mode(mode) - , Size(length) - , Mapping() { - // Make sure that the requested size fits within SIZE_T. - if (length > std::numeric_limits::max()) { - ec = make_error_code(errc::invalid_argument); - return; - } - - SmallString<128> path_storage; - StringRef name = path.toNullTerminatedStringRef(path_storage); - int oflags = (mode == readonly) ? O_RDONLY : O_RDWR; - int ofd = ::open(name.begin(), oflags); - if (ofd == -1) { - ec = error_code(errno, system_category()); - return; - } - - ec = init(ofd, true, offset); - if (ec) - Mapping = 0; -} - -mapped_file_region::mapped_file_region(int fd, - bool closefd, - mapmode mode, - uint64_t length, - uint64_t offset, - error_code &ec) - : Mode(mode) - , Size(length) - , Mapping() { - // Make sure that the requested size fits within SIZE_T. - if (length > std::numeric_limits::max()) { - ec = make_error_code(errc::invalid_argument); - return; - } - - ec = init(fd, closefd, offset); - if (ec) - Mapping = 0; -} - -mapped_file_region::~mapped_file_region() { - if (Mapping) - ::munmap(Mapping, Size); -} - -#if LLVM_HAS_RVALUE_REFERENCES -mapped_file_region::mapped_file_region(mapped_file_region &&other) - : Mode(other.Mode), Size(other.Size), Mapping(other.Mapping) { - other.Mapping = 0; -} -#endif - -mapped_file_region::mapmode mapped_file_region::flags() const { - assert(Mapping && "Mapping failed but used anyway!"); - return Mode; -} - -uint64_t mapped_file_region::size() const { - assert(Mapping && "Mapping failed but used anyway!"); - return Size; -} - -char *mapped_file_region::data() const { - assert(Mapping && "Mapping failed but used anyway!"); - assert(Mode != readonly && "Cannot get non const data for readonly mapping!"); - return reinterpret_cast(Mapping); -} - -const char *mapped_file_region::const_data() const { - assert(Mapping && "Mapping failed but used anyway!"); - return reinterpret_cast(Mapping); -} - -int mapped_file_region::alignment() { - return process::get_self()->page_size(); -} - -error_code detail::directory_iterator_construct(detail::DirIterState &it, - StringRef path){ - SmallString<128> path_null(path); - DIR *directory = ::opendir(path_null.c_str()); - if (directory == 0) - return error_code(errno, system_category()); - - it.IterationHandle = reinterpret_cast(directory); - // Add something for replace_filename to replace. - path::append(path_null, "."); - it.CurrentEntry = directory_entry(path_null.str()); - return directory_iterator_increment(it); -} - -error_code detail::directory_iterator_destruct(detail::DirIterState &it) { - if (it.IterationHandle) - ::closedir(reinterpret_cast(it.IterationHandle)); - it.IterationHandle = 0; - it.CurrentEntry = directory_entry(); - return error_code::success(); -} - -error_code detail::directory_iterator_increment(detail::DirIterState &it) { - errno = 0; - dirent *cur_dir = ::readdir(reinterpret_cast(it.IterationHandle)); - if (cur_dir == 0 && errno != 0) { - return error_code(errno, system_category()); - } else if (cur_dir != 0) { - StringRef name(cur_dir->d_name, NAMLEN(cur_dir)); - if ((name.size() == 1 && name[0] == '.') || - (name.size() == 2 && name[0] == '.' && name[1] == '.')) - return directory_iterator_increment(it); - it.CurrentEntry.replace_filename(name); - } else - return directory_iterator_destruct(it); - - return error_code::success(); -} - -error_code get_magic(const Twine &path, uint32_t len, - SmallVectorImpl &result) { - SmallString<128> PathStorage; - StringRef Path = path.toNullTerminatedStringRef(PathStorage); - result.set_size(0); - - // Open path. - std::FILE *file = std::fopen(Path.data(), "rb"); - if (file == 0) - return error_code(errno, system_category()); - - // Reserve storage. - result.reserve(len); - - // Read magic! - size_t size = std::fread(result.data(), 1, len, file); - if (std::ferror(file) != 0) { - std::fclose(file); - return error_code(errno, system_category()); - } else if (size != result.size()) { - if (std::feof(file) != 0) { - std::fclose(file); - result.set_size(size); - return make_error_code(errc::value_too_large); - } - } - std::fclose(file); - result.set_size(len); - return error_code::success(); -} - -error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, - bool map_writable, void *&result) { - SmallString<128> path_storage; - StringRef name = path.toNullTerminatedStringRef(path_storage); - int oflags = map_writable ? O_RDWR : O_RDONLY; - int ofd = ::open(name.begin(), oflags); - if ( ofd == -1 ) - return error_code(errno, system_category()); - AutoFD fd(ofd); - int flags = map_writable ? MAP_SHARED : MAP_PRIVATE; - int prot = map_writable ? (PROT_READ|PROT_WRITE) : PROT_READ; -#ifdef MAP_FILE - flags |= MAP_FILE; -#endif - result = ::mmap(0, size, prot, flags, fd, file_offset); - if (result == MAP_FAILED) { - return error_code(errno, system_category()); - } - - return error_code::success(); -} - -error_code unmap_file_pages(void *base, size_t size) { - if ( ::munmap(base, size) == -1 ) - return error_code(errno, system_category()); - - return error_code::success(); -} - - -} // end namespace fs -} // end namespace sys -} // end namespace llvm diff --git a/contrib/llvm/lib/Support/Unix/Process.inc b/contrib/llvm/lib/Support/Unix/Process.inc index 9a4454f..c5778e7 100644 --- a/contrib/llvm/lib/Support/Unix/Process.inc +++ b/contrib/llvm/lib/Support/Unix/Process.inc @@ -13,6 +13,9 @@ #include "Unix.h" #include "llvm/ADT/Hashing.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/MutexGuard.h" #include "llvm/Support/TimeValue.h" #ifdef HAVE_SYS_TIME_H #include @@ -86,13 +89,10 @@ TimeValue self_process::get_system_time() const { return getRUsageTimes().second; } +// On Cygwin, getpagesize() returns 64k(AllocationGranularity) and +// offset in mmap(3) should be aligned to the AllocationGranularity. static unsigned getPageSize() { -#if defined(__CYGWIN__) - // On Cygwin, getpagesize() returns 64k but the page size for the purposes of - // memory protection and mmap() is 4k. - // See http://www.cygwin.com/ml/cygwin/2009-01/threads.html#00492 - const int page_size = 0x1000; -#elif defined(HAVE_GETPAGESIZE) +#if defined(HAVE_GETPAGESIZE) const int page_size = ::getpagesize(); #elif defined(HAVE_SYSCONF) long page_size = ::sysconf(_SC_PAGE_SIZE); @@ -138,14 +138,6 @@ void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time, llvm::tie(user_time, sys_time) = getRUsageTimes(); } -int Process::GetCurrentUserId() { - return getuid(); -} - -int Process::GetCurrentGroupId() { - return getgid(); -} - #if defined(HAVE_MACH_MACH_H) && !defined(__GNU__) #include #endif @@ -190,6 +182,22 @@ void Process::PreventCoreFiles() { #endif } +Optional Process::GetEnv(StringRef Name) { + std::string NameStr = Name.str(); + const char *Val = ::getenv(NameStr.c_str()); + if (!Val) + return None; + return std::string(Val); +} + +error_code Process::GetArgumentVector(SmallVectorImpl &ArgsOut, + ArrayRef ArgsIn, + SpecificBumpPtrAllocator &) { + ArgsOut.append(ArgsIn.begin(), ArgsIn.end()); + + return error_code::success(); +} + bool Process::StandardInIsUserInput() { return FileDescriptorIsDisplayed(STDIN_FILENO); } @@ -224,8 +232,6 @@ static unsigned getColumns(int FileID) { #if defined(HAVE_SYS_IOCTL_H) && defined(HAVE_TERMIOS_H) // Try to determine the width of the terminal. struct winsize ws; - // Zero-fill ws to avoid a false positive from MemorySanitizer. - memset(&ws, 0, sizeof(ws)); if (ioctl(FileID, TIOCGWINSZ, &ws) == 0) Columns = ws.ws_col; #endif @@ -247,22 +253,61 @@ unsigned Process::StandardErrColumns() { return getColumns(2); } -static bool terminalHasColors() { - if (const char *term = std::getenv("TERM")) { - // Most modern terminals support ANSI escape sequences for colors. - // We could check terminfo, or have a list of known terms that support - // colors, but that would be overkill. - // The user can always ask for no colors by setting TERM to dumb, or - // using a commandline flag. - return strcmp(term, "dumb") != 0; - } +#ifdef HAVE_TERMINFO +// We manually declare these extern functions because finding the correct +// headers from various terminfo, curses, or other sources is harder than +// writing their specs down. +extern "C" int setupterm(char *term, int filedes, int *errret); +extern "C" struct term *set_curterm(struct term *termp); +extern "C" int del_curterm(struct term *termp); +extern "C" int tigetnum(char *capname); +#endif + +static bool terminalHasColors(int fd) { +#ifdef HAVE_TERMINFO + // First, acquire a global lock because these C routines are thread hostile. + static sys::Mutex M; + MutexGuard G(M); + + int errret = 0; + if (setupterm((char *)0, fd, &errret) != 0) + // Regardless of why, if we can't get terminfo, we shouldn't try to print + // colors. + return false; + + // Test whether the terminal as set up supports color output. How to do this + // isn't entirely obvious. We can use the curses routine 'has_colors' but it + // would be nice to avoid a dependency on curses proper when we can make do + // with a minimal terminfo parsing library. Also, we don't really care whether + // the terminal supports the curses-specific color changing routines, merely + // if it will interpret ANSI color escape codes in a reasonable way. Thus, the + // strategy here is just to query the baseline colors capability and if it + // supports colors at all to assume it will translate the escape codes into + // whatever range of colors it does support. We can add more detailed tests + // here if users report them as necessary. + // + // The 'tigetnum' routine returns -2 or -1 on errors, and might return 0 if + // the terminfo says that no colors are supported. + bool HasColors = tigetnum(const_cast("colors")) > 0; + + // Now extract the structure allocated by setupterm and free its memory + // through a really silly dance. + struct term *termp = set_curterm((struct term *)0); + (void)del_curterm(termp); // Drop any errors here. + + // Return true if we found a color capabilities for the current terminal. + if (HasColors) + return true; +#endif + + // Otherwise, be conservative. return false; } bool Process::FileDescriptorHasColors(int fd) { // A file descriptor has colors if it is displayed and the terminal has // colors. - return FileDescriptorIsDisplayed(fd) && terminalHasColors(); + return FileDescriptorIsDisplayed(fd) && terminalHasColors(fd); } bool Process::StandardOutHasColors() { @@ -273,29 +318,15 @@ bool Process::StandardErrHasColors() { return FileDescriptorHasColors(STDERR_FILENO); } +void Process::UseANSIEscapeCodes(bool /*enable*/) { + // No effect. +} + bool Process::ColorNeedsFlush() { // No, we use ANSI escape sequences. return false; } -#define COLOR(FGBG, CODE, BOLD) "\033[0;" BOLD FGBG CODE "m" - -#define ALLCOLORS(FGBG,BOLD) {\ - COLOR(FGBG, "0", BOLD),\ - COLOR(FGBG, "1", BOLD),\ - COLOR(FGBG, "2", BOLD),\ - COLOR(FGBG, "3", BOLD),\ - COLOR(FGBG, "4", BOLD),\ - COLOR(FGBG, "5", BOLD),\ - COLOR(FGBG, "6", BOLD),\ - COLOR(FGBG, "7", BOLD)\ - } - -static const char colorcodes[2][2][8][10] = { - { ALLCOLORS("3",""), ALLCOLORS("3","1;") }, - { ALLCOLORS("4",""), ALLCOLORS("4","1;") } -}; - const char *Process::OutputColor(char code, bool bold, bool bg) { return colorcodes[bg?1:0][bold?1:0][code&7]; } diff --git a/contrib/llvm/lib/Support/Unix/Program.inc b/contrib/llvm/lib/Support/Unix/Program.inc index aa03d48..78b2971 100644 --- a/contrib/llvm/lib/Support/Unix/Program.inc +++ b/contrib/llvm/lib/Support/Unix/Program.inc @@ -36,6 +36,9 @@ #include #endif #ifdef HAVE_POSIX_SPAWN +#ifdef __sun__ +#define _RESTRICT_KYWD +#endif #include #if !defined(__APPLE__) extern char **environ; @@ -47,20 +50,16 @@ namespace llvm { using namespace sys; -Program::Program() : Data_(0) {} - -Program::~Program() {} +ProcessInfo::ProcessInfo() : Pid(0), ReturnCode(0) {} // This function just uses the PATH environment variable to find the program. -Path -Program::FindProgramByName(const std::string& progName) { +std::string +sys::FindProgramByName(const std::string& progName) { // Check some degenerate cases if (progName.length() == 0) // no program - return Path(); - Path temp; - if (!temp.set(progName)) // invalid name - return Path(); + return ""; + std::string temp = progName; // Use the given path verbatim if it contains any slashes; this matches // the behavior of sh(1) and friends. if (progName.find('/') != std::string::npos) @@ -72,7 +71,7 @@ Program::FindProgramByName(const std::string& progName) { // Get the path. If its empty, we can't do anything to find it. const char *PathStr = getenv("PATH"); if (PathStr == 0) - return Path(); + return ""; // Now we have a colon separated list of directories to search; try them. size_t PathLen = strlen(PathStr); @@ -81,12 +80,10 @@ Program::FindProgramByName(const std::string& progName) { const char *Colon = std::find(PathStr, PathStr+PathLen, ':'); // Check to see if this first directory contains the executable... - Path FilePath; - if (FilePath.set(std::string(PathStr,Colon))) { - FilePath.appendComponent(progName); - if (FilePath.canExecute()) - return FilePath; // Found the executable! - } + SmallString<128> FilePath(PathStr,Colon); + sys::path::append(FilePath, progName); + if (sys::fs::can_execute(Twine(FilePath))) + return FilePath.str(); // Found the executable! // Nope it wasn't in this directory, check the next path in the list! PathLen -= Colon-PathStr; @@ -98,23 +95,23 @@ Program::FindProgramByName(const std::string& progName) { PathLen--; } } - return Path(); + return ""; } -static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { +static bool RedirectIO(const StringRef *Path, int FD, std::string* ErrMsg) { if (Path == 0) // Noop return false; - const char *File; - if (Path->isEmpty()) + std::string File; + if (Path->empty()) // Redirect empty paths to /dev/null File = "/dev/null"; else - File = Path->c_str(); + File = *Path; // Open the file - int InFD = open(File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666); + int InFD = open(File.c_str(), FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666); if (InFD == -1) { - MakeErrMsg(ErrMsg, "Cannot open file '" + std::string(File) + "' for " + MakeErrMsg(ErrMsg, "Cannot open file '" + File + "' for " + (FD == 0 ? "input" : "output")); return true; } @@ -130,19 +127,20 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) { } #ifdef HAVE_POSIX_SPAWN -static bool RedirectIO_PS(const Path *Path, int FD, std::string *ErrMsg, +static bool RedirectIO_PS(const std::string *Path, int FD, std::string *ErrMsg, posix_spawn_file_actions_t *FileActions) { if (Path == 0) // Noop return false; const char *File; - if (Path->isEmpty()) + if (Path->empty()) // Redirect empty paths to /dev/null File = "/dev/null"; else File = Path->c_str(); - if (int Err = posix_spawn_file_actions_addopen(FileActions, FD, - File, FD == 0 ? O_RDONLY : O_WRONLY|O_CREAT, 0666)) + if (int Err = posix_spawn_file_actions_addopen( + FileActions, FD, File, + FD == 0 ? O_RDONLY : O_WRONLY | O_CREAT, 0666)) return MakeErrMsg(ErrMsg, "Cannot dup2", Err); return false; } @@ -180,10 +178,18 @@ static void SetMemoryLimits (unsigned size) #endif } -bool -Program::Execute(const Path &path, const char **args, const char **envp, - const Path **redirects, unsigned memoryLimit, - std::string *ErrMsg) { +} + +static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, + const char **envp, const StringRef **redirects, + unsigned memoryLimit, std::string *ErrMsg) { + if (!llvm::sys::fs::exists(Program)) { + if (ErrMsg) + *ErrMsg = std::string("Executable \"") + Program.str() + + std::string("\" doesn't exist!"); + return false; + } + // If this OS has posix_spawn and there is no memory limit being implied, use // posix_spawn. It is more efficient than fork/exec. #ifdef HAVE_POSIX_SPAWN @@ -191,18 +197,32 @@ Program::Execute(const Path &path, const char **args, const char **envp, posix_spawn_file_actions_t FileActionsStore; posix_spawn_file_actions_t *FileActions = 0; + // If we call posix_spawn_file_actions_addopen we have to make sure the + // c strings we pass to it stay alive until the call to posix_spawn, + // so we copy any StringRefs into this variable. + std::string RedirectsStorage[3]; + if (redirects) { + std::string *RedirectsStr[3] = {0, 0, 0}; + for (int I = 0; I < 3; ++I) { + if (redirects[I]) { + RedirectsStorage[I] = *redirects[I]; + RedirectsStr[I] = &RedirectsStorage[I]; + } + } + FileActions = &FileActionsStore; posix_spawn_file_actions_init(FileActions); // Redirect stdin/stdout. - if (RedirectIO_PS(redirects[0], 0, ErrMsg, FileActions) || - RedirectIO_PS(redirects[1], 1, ErrMsg, FileActions)) + if (RedirectIO_PS(RedirectsStr[0], 0, ErrMsg, FileActions) || + RedirectIO_PS(RedirectsStr[1], 1, ErrMsg, FileActions)) return false; if (redirects[1] == 0 || redirects[2] == 0 || *redirects[1] != *redirects[2]) { // Just redirect stderr - if (RedirectIO_PS(redirects[2], 2, ErrMsg, FileActions)) return false; + if (RedirectIO_PS(RedirectsStr[2], 2, ErrMsg, FileActions)) + return false; } else { // If stdout and stderr should go to the same place, redirect stderr // to the FD already open for stdout. @@ -222,7 +242,7 @@ Program::Execute(const Path &path, const char **args, const char **envp, // Explicitly initialized to prevent what appears to be a valgrind false // positive. pid_t PID = 0; - int Err = posix_spawn(&PID, path.c_str(), FileActions, /*attrp*/0, + int Err = posix_spawn(&PID, Program.str().c_str(), FileActions, /*attrp*/0, const_cast(args), const_cast(envp)); if (FileActions) @@ -231,7 +251,8 @@ Program::Execute(const Path &path, const char **args, const char **envp, if (Err) return !MakeErrMsg(ErrMsg, "posix_spawn failed", Err); - Data_ = reinterpret_cast(PID); + PI.Pid = PID; + return true; } #endif @@ -272,12 +293,13 @@ Program::Execute(const Path &path, const char **args, const char **envp, } // Execute! + std::string PathStr = Program; if (envp != 0) - execve(path.c_str(), + execve(PathStr.c_str(), const_cast(args), const_cast(envp)); else - execv(path.c_str(), + execv(PathStr.c_str(), const_cast(args)); // If the execve() failed, we should exit. Follow Unix protocol and // return 127 if the executable was not found, and 126 otherwise. @@ -293,62 +315,71 @@ Program::Execute(const Path &path, const char **args, const char **envp, break; } - Data_ = reinterpret_cast(child); + PI.Pid = child; return true; } -int -Program::Wait(const sys::Path &path, - unsigned secondsToWait, - std::string* ErrMsg) -{ +namespace llvm { + +ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, + bool WaitUntilTerminates, std::string *ErrMsg) { #ifdef HAVE_SYS_WAIT_H struct sigaction Act, Old; - - if (Data_ == 0) { - MakeErrMsg(ErrMsg, "Process not started!"); - return -1; - } - - // Install a timeout handler. The handler itself does nothing, but the simple - // fact of having a handler at all causes the wait below to return with EINTR, - // unlike if we used SIG_IGN. - if (secondsToWait) { + assert(PI.Pid && "invalid pid to wait on, process not started?"); + + int WaitPidOptions = 0; + pid_t ChildPid = PI.Pid; + if (WaitUntilTerminates) { + SecondsToWait = 0; + ChildPid = -1; // mimic a wait() using waitpid() + } else if (SecondsToWait) { + // Install a timeout handler. The handler itself does nothing, but the + // simple fact of having a handler at all causes the wait below to return + // with EINTR, unlike if we used SIG_IGN. memset(&Act, 0, sizeof(Act)); Act.sa_handler = TimeOutHandler; sigemptyset(&Act.sa_mask); sigaction(SIGALRM, &Act, &Old); - alarm(secondsToWait); - } + alarm(SecondsToWait); + } else if (SecondsToWait == 0) + WaitPidOptions = WNOHANG; // Parent process: Wait for the child process to terminate. int status; - uint64_t pid = reinterpret_cast(Data_); - pid_t child = static_cast(pid); - while (waitpid(pid, &status, 0) != child) - if (secondsToWait && errno == EINTR) { - // Kill the child. - kill(child, SIGKILL); - - // Turn off the alarm and restore the signal handler - alarm(0); - sigaction(SIGALRM, &Old, 0); - - // Wait for child to die - if (wait(&status) != child) - MakeErrMsg(ErrMsg, "Child timed out but wouldn't die"); - else - MakeErrMsg(ErrMsg, "Child timed out", 0); - - return -2; // Timeout detected - } else if (errno != EINTR) { - MakeErrMsg(ErrMsg, "Error waiting for child process"); - return -1; + ProcessInfo WaitResult; + WaitResult.Pid = waitpid(ChildPid, &status, WaitPidOptions); + if (WaitResult.Pid != PI.Pid) { + if (WaitResult.Pid == 0) { + // Non-blocking wait. + return WaitResult; + } else { + if (SecondsToWait && errno == EINTR) { + // Kill the child. + kill(PI.Pid, SIGKILL); + + // Turn off the alarm and restore the signal handler + alarm(0); + sigaction(SIGALRM, &Old, 0); + + // Wait for child to die + if (wait(&status) != ChildPid) + MakeErrMsg(ErrMsg, "Child timed out but wouldn't die"); + else + MakeErrMsg(ErrMsg, "Child timed out", 0); + + WaitResult.ReturnCode = -2; // Timeout detected + return WaitResult; + } else if (errno != EINTR) { + MakeErrMsg(ErrMsg, "Error waiting for child process"); + WaitResult.ReturnCode = -1; + return WaitResult; + } } + } // We exited normally without timeout, so turn off the timer. - if (secondsToWait) { + if (SecondsToWait && !WaitUntilTerminates) { alarm(0); sigaction(SIGALRM, &Old, 0); } @@ -358,24 +389,19 @@ Program::Wait(const sys::Path &path, int result = 0; if (WIFEXITED(status)) { result = WEXITSTATUS(status); -#ifdef HAVE_POSIX_SPAWN - // The posix_spawn child process returns 127 on any kind of error. - // Following the POSIX convention for command-line tools (which posix_spawn - // itself apparently does not), check to see if the failure was due to some - // reason other than the file not existing, and return 126 in this case. - bool Exists; - if (result == 127 && !llvm::sys::fs::exists(path.str(), Exists) && Exists) - result = 126; -#endif + WaitResult.ReturnCode = result; + if (result == 127) { if (ErrMsg) *ErrMsg = llvm::sys::StrError(ENOENT); - return -1; + WaitResult.ReturnCode = -1; + return WaitResult; } if (result == 126) { if (ErrMsg) *ErrMsg = "Program could not be executed"; - return -1; + WaitResult.ReturnCode = -1; + return WaitResult; } } else if (WIFSIGNALED(status)) { if (ErrMsg) { @@ -387,27 +413,27 @@ Program::Wait(const sys::Path &path, } // Return a special value to indicate that the process received an unhandled // signal during execution as opposed to failing to execute. - return -2; + WaitResult.ReturnCode = -2; } - return result; #else if (ErrMsg) *ErrMsg = "Program::Wait is not implemented on this platform yet!"; - return -1; + WaitResult.ReturnCode = -2; #endif + return WaitResult; } -error_code Program::ChangeStdinToBinary(){ +error_code sys::ChangeStdinToBinary(){ // Do nothing, as Unix doesn't differentiate between text and binary. return make_error_code(errc::success); } -error_code Program::ChangeStdoutToBinary(){ +error_code sys::ChangeStdoutToBinary(){ // Do nothing, as Unix doesn't differentiate between text and binary. return make_error_code(errc::success); } -error_code Program::ChangeStderrToBinary(){ +error_code sys::ChangeStderrToBinary(){ // Do nothing, as Unix doesn't differentiate between text and binary. return make_error_code(errc::success); } @@ -432,5 +458,4 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef Args) { } return true; } - } diff --git a/contrib/llvm/lib/Support/Unix/Signals.inc b/contrib/llvm/lib/Support/Unix/Signals.inc index 64d1fc1..b4c78d6 100644 --- a/contrib/llvm/lib/Support/Unix/Signals.inc +++ b/contrib/llvm/lib/Support/Unix/Signals.inc @@ -55,8 +55,7 @@ static std::vector > CallBacksToRun; static const int IntSigs[] = { SIGHUP, SIGINT, SIGPIPE, SIGTERM, SIGUSR1, SIGUSR2 }; -static const int *const IntSigsEnd = - IntSigs + sizeof(IntSigs) / sizeof(IntSigs[0]); +static const int *const IntSigsEnd = array_endof(IntSigs); // KillSigs - Signals that represent that we have a bug, and our prompt // termination has been ordered. @@ -75,8 +74,7 @@ static const int KillSigs[] = { , SIGEMT #endif }; -static const int *const KillSigsEnd = - KillSigs + sizeof(KillSigs) / sizeof(KillSigs[0]); +static const int *const KillSigsEnd = array_endof(KillSigs); static unsigned NumRegisteredSignals = 0; static struct { @@ -211,11 +209,11 @@ void llvm::sys::SetInterruptFunction(void (*IF)()) { } // RemoveFileOnSignal - The public API -bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename, +bool llvm::sys::RemoveFileOnSignal(StringRef Filename, std::string* ErrMsg) { SignalsMutex.acquire(); std::string *OldPtr = FilesToRemove.empty() ? 0 : &FilesToRemove[0]; - FilesToRemove.push_back(Filename.str()); + FilesToRemove.push_back(Filename); // We want to call 'c_str()' on every std::string in this vector so that if // the underlying implementation requires a re-allocation, it happens here @@ -235,10 +233,10 @@ bool llvm::sys::RemoveFileOnSignal(const sys::Path &Filename, } // DontRemoveFileOnSignal - The public API -void llvm::sys::DontRemoveFileOnSignal(const sys::Path &Filename) { +void llvm::sys::DontRemoveFileOnSignal(StringRef Filename) { SignalsMutex.acquire(); std::vector::reverse_iterator RI = - std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename.str()); + std::find(FilesToRemove.rbegin(), FilesToRemove.rend(), Filename); std::vector::iterator I = FilesToRemove.end(); if (RI != FilesToRemove.rend()) I = FilesToRemove.erase(RI.base()-1); @@ -335,7 +333,7 @@ static void PrintStackTraceSignalHandler(void *) { void llvm::sys::PrintStackTraceOnErrorSignal() { AddSignalHandler(PrintStackTraceSignalHandler, 0); -#if defined(__APPLE__) +#if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES) // Environment variable to disable any kind of crash dialog. if (getenv("LLVM_DISABLE_CRASH_REPORT")) { mach_port_t self = mach_task_self(); @@ -361,7 +359,7 @@ void llvm::sys::PrintStackTraceOnErrorSignal() { // the same linkage unit by just defining our own versions of the assert handler // and abort. -#ifdef __APPLE__ +#if defined(__APPLE__) && defined(ENABLE_CRASH_OVERRIDES) #include #include diff --git a/contrib/llvm/lib/Support/Unix/ThreadLocal.inc b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc index 2b4c901..f14d0fa 100644 --- a/contrib/llvm/lib/Support/Unix/ThreadLocal.inc +++ b/contrib/llvm/lib/Support/Unix/ThreadLocal.inc @@ -18,7 +18,7 @@ namespace llvm { using namespace sys; -ThreadLocalImpl::ThreadLocalImpl() { } +ThreadLocalImpl::ThreadLocalImpl() : data() { } ThreadLocalImpl::~ThreadLocalImpl() { } void ThreadLocalImpl::setInstance(const void* d) { data = const_cast(d);} const void* ThreadLocalImpl::getInstance() { return data; } diff --git a/contrib/llvm/lib/Support/Unix/TimeValue.inc b/contrib/llvm/lib/Support/Unix/TimeValue.inc index df8558b..80532b0 100644 --- a/contrib/llvm/lib/Support/Unix/TimeValue.inc +++ b/contrib/llvm/lib/Support/Unix/TimeValue.inc @@ -22,18 +22,13 @@ namespace llvm { using namespace sys; std::string TimeValue::str() const { - char buffer[32]; - - time_t ourTime = time_t(this->toEpochTime()); -#ifdef __hpux -// note that the following line needs -D_REENTRANT on HP-UX to be picked up - asctime_r(localtime(&ourTime), buffer); -#else - ::asctime_r(::localtime(&ourTime), buffer); -#endif - - std::string result(buffer); - return result.substr(0,24); + time_t OurTime = time_t(this->toEpochTime()); + struct tm Storage; + struct tm *LT = ::localtime_r(&OurTime, &Storage); + assert(LT); + char Buffer[25]; + strftime(Buffer, 25, "%b %e %H:%M %Y", LT); + return std::string(Buffer); } TimeValue TimeValue::now() { diff --git a/contrib/llvm/lib/Support/Unix/Unix.h b/contrib/llvm/lib/Support/Unix/Unix.h index 051f56f..ba688e3 100644 --- a/contrib/llvm/lib/Support/Unix/Unix.h +++ b/contrib/llvm/lib/Support/Unix/Unix.h @@ -22,28 +22,22 @@ #include "llvm/Config/config.h" // Get autoconf configuration settings #include "llvm/Support/Errno.h" #include +#include #include #include #include #include #include +#include #ifdef HAVE_UNISTD_H #include #endif -#ifdef HAVE_SYS_TYPES_H -#include -#endif - #ifdef HAVE_SYS_PARAM_H #include #endif -#ifdef HAVE_ASSERT_H -#include -#endif - #ifdef HAVE_SYS_TIME_H # include #endif @@ -53,6 +47,10 @@ # include #endif +#ifdef HAVE_DLFCN_H +# include +#endif + #ifndef WEXITSTATUS # define WEXITSTATUS(stat_val) ((unsigned)(stat_val) >> 8) #endif diff --git a/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc index 83da82a..5a7b219 100644 --- a/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc +++ b/contrib/llvm/lib/Support/Windows/DynamicLibrary.inc @@ -71,7 +71,7 @@ extern "C" { DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, std::string *errMsg) { - SmartScopedLock lock(getMutex()); + SmartScopedLock lock(*SymbolsMutex); if (!filename) { // When no file is specified, enumerate all DLLs and EXEs in the process. @@ -83,8 +83,15 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, // This is mostly to ensure that the return value still shows up as "valid". return DynamicLibrary(&OpenedHandles); } + + SmallVector filenameUnicode; + if (error_code ec = windows::UTF8ToUTF16(filename, filenameUnicode)) { + SetLastError(ec.value()); + MakeErrMsg(errMsg, std::string(filename) + ": Can't convert to UTF-16: "); + return DynamicLibrary(); + } - HMODULE a_handle = LoadLibrary(filename); + HMODULE a_handle = LoadLibraryW(filenameUnicode.data()); if (a_handle == 0) { MakeErrMsg(errMsg, std::string(filename) + ": Can't open : "); @@ -114,10 +121,10 @@ DynamicLibrary DynamicLibrary::getPermanentLibrary(const char *filename, #undef EXPLICIT_SYMBOL2 void* DynamicLibrary::SearchForAddressOfSymbol(const char* symbolName) { - SmartScopedLock Lock(getMutex()); + SmartScopedLock Lock(*SymbolsMutex); // First check symbols added via AddSymbol(). - if (ExplicitSymbols) { + if (ExplicitSymbols.isConstructed()) { StringMap::iterator i = ExplicitSymbols->find(symbolName); if (i != ExplicitSymbols->end()) diff --git a/contrib/llvm/lib/Support/Windows/Memory.inc b/contrib/llvm/lib/Support/Windows/Memory.inc index 4c5aebd..1260452 100644 --- a/contrib/llvm/lib/Support/Windows/Memory.inc +++ b/contrib/llvm/lib/Support/Windows/Memory.inc @@ -82,7 +82,7 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, uintptr_t Start = NearBlock ? reinterpret_cast(NearBlock->base()) + NearBlock->size() - : NULL; + : 0; // If the requested address is not aligned to the allocation granularity, // round up to get beyond NearBlock. VirtualAlloc would have rounded down. @@ -106,7 +106,7 @@ MemoryBlock Memory::allocateMappedMemory(size_t NumBytes, MemoryBlock Result; Result.Address = PA; Result.Size = NumBlocks*Granularity; - ; + if (Flags & MF_EXEC) Memory::InvalidateInstructionCache(Result.Address, Result.Size); diff --git a/contrib/llvm/lib/Support/Windows/Path.inc b/contrib/llvm/lib/Support/Windows/Path.inc index f4898e6..0b39198 100644 --- a/contrib/llvm/lib/Support/Windows/Path.inc +++ b/contrib/llvm/lib/Support/Windows/Path.inc @@ -1,4 +1,4 @@ -//===- llvm/Support/Win32/Path.cpp - Win32 Path Implementation ---*- C++ -*-===// +//===- llvm/Support/Windows/Path.inc - Windows Path Impl --------*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -7,920 +7,1116 @@ // //===----------------------------------------------------------------------===// // -// This file provides the Win32 specific implementation of the Path class. +// This file implements the Windows specific implementation of the Path API. // //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic Win32 code that -//=== is guaranteed to work on *all* Win32 variants. +//=== WARNING: Implementation here must contain only generic Windows code that +//=== is guaranteed to work on *all* Windows variants. //===----------------------------------------------------------------------===// +#include "llvm/ADT/STLExtras.h" #include "Windows.h" -#include -#include +#include +#include +#include +#include -// We need to undo a macro defined in Windows.h, otherwise we won't compile: -#undef CopyFile -#undef GetCurrentDirectory +#undef max -// Windows happily accepts either forward or backward slashes, though any path -// returned by a Win32 API will have backward slashes. As LLVM code basically -// assumes forward slashes are used, backward slashs are converted where they -// can be introduced into a path. -// -// Another invariant is that a path ends with a slash if and only if the path -// is a root directory. Any other use of a trailing slash is stripped. Unlike -// in Unix, Windows has a rather complicated notion of a root path and this -// invariant helps simply the code. - -static void FlipBackSlashes(std::string& s) { - for (size_t i = 0; i < s.size(); i++) - if (s[i] == '\\') - s[i] = '/'; -} +// MinGW doesn't define this. +#ifndef _ERRNO_T_DEFINED +#define _ERRNO_T_DEFINED +typedef int errno_t; +#endif -namespace llvm { -namespace sys { +#ifdef _MSC_VER +# pragma comment(lib, "advapi32.lib") // This provides CryptAcquireContextW. +#endif -const char PathSeparator = ';'; +using namespace llvm; -StringRef Path::GetEXESuffix() { - return "exe"; -} +using llvm::sys::windows::UTF8ToUTF16; +using llvm::sys::windows::UTF16ToUTF8; -Path::Path(llvm::StringRef p) - : path(p) { - FlipBackSlashes(path); -} +namespace { + typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)( + /*__in*/ LPCWSTR lpSymlinkFileName, + /*__in*/ LPCWSTR lpTargetFileName, + /*__in*/ DWORD dwFlags); -Path::Path(const char *StrStart, unsigned StrLen) - : path(StrStart, StrLen) { - FlipBackSlashes(path); -} + PtrCreateSymbolicLinkW create_symbolic_link_api = + PtrCreateSymbolicLinkW(::GetProcAddress( + ::GetModuleHandleW(L"Kernel32.dll"), "CreateSymbolicLinkW")); -Path& -Path::operator=(StringRef that) { - path.assign(that.data(), that.size()); - FlipBackSlashes(path); - return *this; -} + error_code TempDir(SmallVectorImpl &result) { + retry_temp_dir: + DWORD len = ::GetTempPathW(result.capacity(), result.begin()); -bool -Path::isValid() const { - if (path.empty()) - return false; + if (len == 0) + return windows_error(::GetLastError()); - size_t len = path.size(); - // If there is a null character, it and all its successors are ignored. - size_t pos = path.find_first_of('\0'); - if (pos != std::string::npos) - len = pos; - - // If there is a colon, it must be the second character, preceded by a letter - // and followed by something. - pos = path.rfind(':',len); - size_t rootslash = 0; - if (pos != std::string::npos) { - if (pos != 1 || !isalpha(static_cast(path[0])) || len < 3) - return false; - rootslash = 2; - } + if (len > result.capacity()) { + result.reserve(len); + goto retry_temp_dir; + } - // Look for a UNC path, and if found adjust our notion of the root slash. - if (len > 3 && path[0] == '/' && path[1] == '/') { - rootslash = path.find('/', 2); - if (rootslash == std::string::npos) - rootslash = 0; + result.set_size(len); + return error_code::success(); } - // Check for illegal characters. - if (path.find_first_of("\\<>\"|\001\002\003\004\005\006\007\010\011\012" - "\013\014\015\016\017\020\021\022\023\024\025\026" - "\027\030\031\032\033\034\035\036\037") - != std::string::npos) - return false; - - // Remove trailing slash, unless it's a root slash. - if (len > rootslash+1 && path[len-1] == '/') - path.erase(--len); - - // Check each component for legality. - for (pos = 0; pos < len; ++pos) { - // A component may not end in a space. - if (path[pos] == ' ') { - if (pos+1 == len || path[pos+1] == '/' || path[pos+1] == '\0') - return false; + bool is_separator(const wchar_t value) { + switch (value) { + case L'\\': + case L'/': + return true; + default: + return false; } + } +} - // A component may not end in a period. - if (path[pos] == '.') { - if (pos+1 == len || path[pos+1] == '/') { - // Unless it is the pseudo-directory "."... - if (pos == 0 || path[pos-1] == '/' || path[pos-1] == ':') - return true; - // or "..". - if (pos > 0 && path[pos-1] == '.') { - if (pos == 1 || path[pos-2] == '/' || path[pos-2] == ':') - return true; - } - return false; +// FIXME: mode should be used here and default to user r/w only, +// it currently comes in as a UNIX mode. +static error_code createUniqueEntity(const Twine &model, int &result_fd, + SmallVectorImpl &result_path, + bool makeAbsolute, unsigned mode, + FSEntity Type) { + // Use result_path as temp storage. + result_path.set_size(0); + StringRef m = model.toStringRef(result_path); + + SmallVector model_utf16; + if (error_code ec = UTF8ToUTF16(m, model_utf16)) return ec; + + if (makeAbsolute) { + // Make model absolute by prepending a temp directory if it's not already. + bool absolute = sys::path::is_absolute(m); + + if (!absolute) { + SmallVector temp_dir; + if (error_code ec = TempDir(temp_dir)) return ec; + // Handle c: by removing it. + if (model_utf16.size() > 2 && model_utf16[1] == L':') { + model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2); } + model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end()); } } - return true; -} + // Replace '%' with random chars. From here on, DO NOT modify model. It may be + // needed if the randomly chosen path already exists. + SmallVector random_path_utf16; + + // Get a Crypto Provider for CryptGenRandom. + HCRYPTPROV HCPC; + if (!::CryptAcquireContextW(&HCPC, + NULL, + NULL, + PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT)) + return windows_error(::GetLastError()); + ScopedCryptContext CryptoProvider(HCPC); + +retry_random_path: + random_path_utf16.set_size(0); + for (SmallVectorImpl::const_iterator i = model_utf16.begin(), + e = model_utf16.end(); + i != e; ++i) { + if (*i == L'%') { + BYTE val = 0; + if (!::CryptGenRandom(CryptoProvider, 1, &val)) + return windows_error(::GetLastError()); + random_path_utf16.push_back(L"0123456789abcdef"[val & 15]); + } + else + random_path_utf16.push_back(*i); + } + // Make random_path_utf16 null terminated. + random_path_utf16.push_back(0); + random_path_utf16.pop_back(); + + HANDLE TempFileHandle = INVALID_HANDLE_VALUE; + + switch (Type) { + case FS_File: { + // Try to create + open the path. + TempFileHandle = + ::CreateFileW(random_path_utf16.begin(), GENERIC_READ | GENERIC_WRITE, + FILE_SHARE_READ, NULL, + // Return ERROR_FILE_EXISTS if the file + // already exists. + CREATE_NEW, FILE_ATTRIBUTE_TEMPORARY, NULL); + if (TempFileHandle == INVALID_HANDLE_VALUE) { + // If the file existed, try again, otherwise, error. + error_code ec = windows_error(::GetLastError()); + if (ec == windows_error::file_exists) + goto retry_random_path; + + return ec; + } -void Path::makeAbsolute() { - TCHAR FullPath[MAX_PATH + 1] = {0}; - LPTSTR FilePart = NULL; + // Convert the Windows API file handle into a C-runtime handle. + int fd = ::_open_osfhandle(intptr_t(TempFileHandle), 0); + if (fd == -1) { + ::CloseHandle(TempFileHandle); + ::DeleteFileW(random_path_utf16.begin()); + // MSDN doesn't say anything about _open_osfhandle setting errno or + // GetLastError(), so just return invalid_handle. + return windows_error::invalid_handle; + } - DWORD RetLength = ::GetFullPathNameA(path.c_str(), - sizeof(FullPath)/sizeof(FullPath[0]), - FullPath, &FilePart); + result_fd = fd; + break; + } - if (0 == RetLength) { - // FIXME: Report the error GetLastError() - assert(0 && "Unable to make absolute path!"); - } else if (RetLength > MAX_PATH) { - // FIXME: Report too small buffer (needed RetLength bytes). - assert(0 && "Unable to make absolute path!"); - } else { - path = FullPath; + case FS_Name: { + DWORD attributes = ::GetFileAttributesW(random_path_utf16.begin()); + if (attributes != INVALID_FILE_ATTRIBUTES) + goto retry_random_path; + error_code EC = make_error_code(windows_error(::GetLastError())); + if (EC != windows_error::file_not_found && + EC != windows_error::path_not_found) + return EC; + break; } -} -bool -Path::isAbsolute(const char *NameStart, unsigned NameLen) { - assert(NameStart); - // FIXME: This does not handle correctly an absolute path starting from - // a drive letter or in UNC format. - switch (NameLen) { - case 0: - return false; - case 1: - case 2: - return NameStart[0] == '/'; - default: - return - (NameStart[0] == '/' || (NameStart[1] == ':' && NameStart[2] == '/')) || - (NameStart[0] == '\\' || (NameStart[1] == ':' && NameStart[2] == '\\')); + case FS_Dir: + if (!::CreateDirectoryW(random_path_utf16.begin(), NULL)) { + error_code EC = windows_error(::GetLastError()); + if (EC != windows_error::already_exists) + return EC; + goto retry_random_path; + } + break; } -} -bool -Path::isAbsolute() const { - // FIXME: This does not handle correctly an absolute path starting from - // a drive letter or in UNC format. - switch (path.length()) { - case 0: - return false; - case 1: - case 2: - return path[0] == '/'; - default: - return path[0] == '/' || (path[1] == ':' && path[2] == '/'); + // Set result_path to the utf-8 representation of the path. + if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(), + random_path_utf16.size(), result_path)) { + switch (Type) { + case FS_File: + ::CloseHandle(TempFileHandle); + ::DeleteFileW(random_path_utf16.begin()); + case FS_Name: + break; + case FS_Dir: + ::RemoveDirectoryW(random_path_utf16.begin()); + break; + } + return ec; } + + return error_code::success(); } -static Path *TempDirectory; +namespace llvm { +namespace sys { +namespace fs { -Path -Path::GetTemporaryDirectory(std::string* ErrMsg) { - if (TempDirectory) { -#if defined(_MSC_VER) - // Visual Studio gets confused and emits a diagnostic about calling exists, - // even though this is the implementation for PathV1. Temporarily - // disable the deprecated warning message - #pragma warning(push) - #pragma warning(disable:4996) -#endif - assert(TempDirectory->exists() && "Who has removed TempDirectory?"); -#if defined(_MSC_VER) - #pragma warning(pop) -#endif - return *TempDirectory; - } +std::string getMainExecutable(const char *argv0, void *MainExecAddr) { + SmallVector PathName; + DWORD Size = ::GetModuleFileNameW(NULL, PathName.data(), PathName.capacity()); - char pathname[MAX_PATH]; - if (!GetTempPath(MAX_PATH, pathname)) { - if (ErrMsg) - *ErrMsg = "Can't determine temporary directory"; - return Path(); - } + // A zero return value indicates a failure other than insufficient space. + if (Size == 0) + return ""; - Path result; - result.set(pathname); + // Insufficient space is determined by a return value equal to the size of + // the buffer passed in. + if (Size == PathName.capacity()) + return ""; - // Append a subdirectory based on our process id so multiple LLVMs don't - // step on each other's toes. -#ifdef __MINGW32__ - // Mingw's Win32 header files are broken. - sprintf(pathname, "LLVM_%u", unsigned(GetCurrentProcessId())); -#else - sprintf(pathname, "LLVM_%u", GetCurrentProcessId()); -#endif - result.appendComponent(pathname); - - // If there's a directory left over from a previous LLVM execution that - // happened to have the same process id, get rid of it. - result.eraseFromDisk(true); - - // And finally (re-)create the empty directory. - result.createDirectoryOnDisk(false); - TempDirectory = new Path(result); - return *TempDirectory; -} - -// FIXME: the following set of functions don't map to Windows very well. -Path -Path::GetRootDirectory() { - // This is the only notion that that Windows has of a root directory. Nothing - // is here except for drives. - return Path("file:///"); -} - -void -Path::GetSystemLibraryPaths(std::vector& Paths) { - char buff[MAX_PATH]; - // Generic form of C:\Windows\System32 - HRESULT res = SHGetFolderPathA(NULL, - CSIDL_FLAG_CREATE | CSIDL_SYSTEM, - NULL, - SHGFP_TYPE_CURRENT, - buff); - if (res != S_OK) { - assert(0 && "Failed to get system directory"); - return; - } - Paths.push_back(sys::Path(buff)); - - // Reset buff. - buff[0] = 0; - // Generic form of C:\Windows - res = SHGetFolderPathA(NULL, - CSIDL_FLAG_CREATE | CSIDL_WINDOWS, - NULL, - SHGFP_TYPE_CURRENT, - buff); - if (res != S_OK) { - assert(0 && "Failed to get windows directory"); - return; - } - Paths.push_back(sys::Path(buff)); -} + // On success, GetModuleFileNameW returns the number of characters written to + // the buffer not including the NULL terminator. + PathName.set_size(Size); -void -Path::GetBitcodeLibraryPaths(std::vector& Paths) { - char * env_var = getenv("LLVM_LIB_SEARCH_PATH"); - if (env_var != 0) { - getPathList(env_var,Paths); - } -#ifdef LLVM_LIBDIR - { - Path tmpPath; - if (tmpPath.set(LLVM_LIBDIR)) - if (tmpPath.canRead()) - Paths.push_back(tmpPath); - } -#endif - GetSystemLibraryPaths(Paths); -} + // Convert the result from UTF-16 to UTF-8. + SmallVector PathNameUTF8; + if (UTF16ToUTF8(PathName.data(), PathName.size(), PathNameUTF8)) + return ""; -Path -Path::GetUserHomeDirectory() { - char buff[MAX_PATH]; - HRESULT res = SHGetFolderPathA(NULL, - CSIDL_FLAG_CREATE | CSIDL_APPDATA, - NULL, - SHGFP_TYPE_CURRENT, - buff); - if (res != S_OK) - assert(0 && "Failed to get user home directory"); - return Path(buff); + return std::string(PathNameUTF8.data()); } -Path -Path::GetCurrentDirectory() { - char pathname[MAX_PATH]; - ::GetCurrentDirectoryA(MAX_PATH,pathname); - return Path(pathname); +UniqueID file_status::getUniqueID() const { + // The file is uniquely identified by the volume serial number along + // with the 64-bit file identifier. + uint64_t FileID = (static_cast(FileIndexHigh) << 32ULL) | + static_cast(FileIndexLow); + + return UniqueID(VolumeSerialNumber, FileID); } -/// GetMainExecutable - Return the path to the main executable, given the -/// value of argv[0] from program startup. -Path Path::GetMainExecutable(const char *argv0, void *MainAddr) { - char pathname[MAX_PATH]; - DWORD ret = ::GetModuleFileNameA(NULL, pathname, MAX_PATH); - return ret != MAX_PATH ? Path(pathname) : Path(); +TimeValue file_status::getLastModificationTime() const { + ULARGE_INTEGER UI; + UI.LowPart = LastWriteTimeLow; + UI.HighPart = LastWriteTimeHigh; + + TimeValue Ret; + Ret.fromWin32Time(UI.QuadPart); + return Ret; } +error_code current_path(SmallVectorImpl &result) { + SmallVector cur_path; + DWORD len = MAX_PATH; -// FIXME: the above set of functions don't map to Windows very well. + do { + cur_path.reserve(len); + len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data()); + + // A zero return value indicates a failure other than insufficient space. + if (len == 0) + return windows_error(::GetLastError()); + // If there's insufficient space, the len returned is larger than the len + // given. + } while (len > cur_path.capacity()); -StringRef Path::getDirname() const { - return getDirnameCharSep(path, "/"); + // On success, GetCurrentDirectoryW returns the number of characters not + // including the null-terminator. + cur_path.set_size(len); + return UTF16ToUTF8(cur_path.begin(), cur_path.size(), result); } -StringRef -Path::getBasename() const { - // Find the last slash - size_t slash = path.rfind('/'); - if (slash == std::string::npos) - slash = 0; - else - slash++; +error_code create_directory(const Twine &path, bool &existed) { + SmallString<128> path_storage; + SmallVector path_utf16; - size_t dot = path.rfind('.'); - if (dot == std::string::npos || dot < slash) - return StringRef(path).substr(slash); - else - return StringRef(path).substr(slash, dot - slash); -} + if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), + path_utf16)) + return ec; -StringRef -Path::getSuffix() const { - // Find the last slash - size_t slash = path.rfind('/'); - if (slash == std::string::npos) - slash = 0; - else - slash++; + if (!::CreateDirectoryW(path_utf16.begin(), NULL)) { + error_code ec = windows_error(::GetLastError()); + if (ec == windows_error::already_exists) + existed = true; + else + return ec; + } else + existed = false; - size_t dot = path.rfind('.'); - if (dot == std::string::npos || dot < slash) - return StringRef(""); - else - return StringRef(path).substr(dot + 1); + return error_code::success(); } -bool -Path::exists() const { - DWORD attr = GetFileAttributes(path.c_str()); - return attr != INVALID_FILE_ATTRIBUTES; -} +error_code create_hard_link(const Twine &to, const Twine &from) { + // Get arguments. + SmallString<128> from_storage; + SmallString<128> to_storage; + StringRef f = from.toStringRef(from_storage); + StringRef t = to.toStringRef(to_storage); + + // Convert to utf-16. + SmallVector wide_from; + SmallVector wide_to; + if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec; + if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec; -bool -Path::isDirectory() const { - DWORD attr = GetFileAttributes(path.c_str()); - return (attr != INVALID_FILE_ATTRIBUTES) && - (attr & FILE_ATTRIBUTE_DIRECTORY); + if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL)) + return windows_error(::GetLastError()); + + return error_code::success(); } -bool -Path::isSymLink() const { - DWORD attributes = GetFileAttributes(path.c_str()); +error_code create_symlink(const Twine &to, const Twine &from) { + // Only do it if the function is available at runtime. + if (!create_symbolic_link_api) + return make_error_code(errc::function_not_supported); - if (attributes == INVALID_FILE_ATTRIBUTES) - // There's no sane way to report this :(. - assert(0 && "GetFileAttributes returned INVALID_FILE_ATTRIBUTES"); + // Get arguments. + SmallString<128> from_storage; + SmallString<128> to_storage; + StringRef f = from.toStringRef(from_storage); + StringRef t = to.toStringRef(to_storage); - // This isn't exactly what defines a NTFS symlink, but it is only true for - // paths that act like a symlink. - return attributes & FILE_ATTRIBUTE_REPARSE_POINT; -} + // Convert to utf-16. + SmallVector wide_from; + SmallVector wide_to; + if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec; + if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec; -bool -Path::canRead() const { - // FIXME: take security attributes into account. - DWORD attr = GetFileAttributes(path.c_str()); - return attr != INVALID_FILE_ATTRIBUTES; -} + if (!create_symbolic_link_api(wide_from.begin(), wide_to.begin(), 0)) + return windows_error(::GetLastError()); -bool -Path::canWrite() const { - // FIXME: take security attributes into account. - DWORD attr = GetFileAttributes(path.c_str()); - return (attr != INVALID_FILE_ATTRIBUTES) && !(attr & FILE_ATTRIBUTE_READONLY); + return error_code::success(); } -bool -Path::canExecute() const { - // FIXME: take security attributes into account. - DWORD attr = GetFileAttributes(path.c_str()); - return attr != INVALID_FILE_ATTRIBUTES; +error_code remove(const Twine &path, bool &existed) { + SmallString<128> path_storage; + SmallVector path_utf16; + + file_status st; + error_code EC = status(path, st); + if (EC) { + if (EC == windows_error::file_not_found || + EC == windows_error::path_not_found) { + existed = false; + return error_code::success(); + } + return EC; + } + + if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), + path_utf16)) + return ec; + + if (st.type() == file_type::directory_file) { + if (!::RemoveDirectoryW(c_str(path_utf16))) { + error_code ec = windows_error(::GetLastError()); + if (ec != windows_error::file_not_found) + return ec; + existed = false; + } else + existed = true; + } else { + if (!::DeleteFileW(c_str(path_utf16))) { + error_code ec = windows_error(::GetLastError()); + if (ec != windows_error::file_not_found) + return ec; + existed = false; + } else + existed = true; + } + + return error_code::success(); } -bool -Path::isRegularFile() const { - bool res; - if (fs::is_regular_file(path, res)) - return false; - return res; +error_code rename(const Twine &from, const Twine &to) { + // Get arguments. + SmallString<128> from_storage; + SmallString<128> to_storage; + StringRef f = from.toStringRef(from_storage); + StringRef t = to.toStringRef(to_storage); + + // Convert to utf-16. + SmallVector wide_from; + SmallVector wide_to; + if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec; + if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec; + + error_code ec = error_code::success(); + for (int i = 0; i < 2000; i++) { + if (::MoveFileExW(wide_from.begin(), wide_to.begin(), + MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING)) + return error_code::success(); + ec = windows_error(::GetLastError()); + if (ec != windows_error::access_denied) + break; + // Retry MoveFile() at ACCESS_DENIED. + // System scanners (eg. indexer) might open the source file when + // It is written and closed. + ::Sleep(1); + } + + return ec; } -StringRef -Path::getLast() const { - // Find the last slash - size_t pos = path.rfind('/'); +error_code resize_file(const Twine &path, uint64_t size) { + SmallString<128> path_storage; + SmallVector path_utf16; - // Handle the corner cases - if (pos == std::string::npos) - return path; + if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), + path_utf16)) + return ec; - // If the last character is a slash, we have a root directory - if (pos == path.length()-1) - return path; + int fd = ::_wopen(path_utf16.begin(), O_BINARY | _O_RDWR, S_IWRITE); + if (fd == -1) + return error_code(errno, generic_category()); +#ifdef HAVE__CHSIZE_S + errno_t error = ::_chsize_s(fd, size); +#else + errno_t error = ::_chsize(fd, size); +#endif + ::close(fd); + return error_code(error, generic_category()); +} - // Return everything after the last slash - return StringRef(path).substr(pos+1); +error_code exists(const Twine &path, bool &result) { + SmallString<128> path_storage; + SmallVector path_utf16; + + if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), + path_utf16)) + return ec; + + DWORD attributes = ::GetFileAttributesW(path_utf16.begin()); + + if (attributes == INVALID_FILE_ATTRIBUTES) { + // See if the file didn't actually exist. + error_code ec = make_error_code(windows_error(::GetLastError())); + if (ec != windows_error::file_not_found && + ec != windows_error::path_not_found) + return ec; + result = false; + } else + result = true; + return error_code::success(); } -const FileStatus * -PathWithStatus::getFileStatus(bool update, std::string *ErrStr) const { - if (!fsIsValid || update) { - WIN32_FILE_ATTRIBUTE_DATA fi; - if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) { - MakeErrMsg(ErrStr, "getStatusInfo():" + std::string(path) + - ": Can't get status: "); - return 0; - } +bool can_write(const Twine &Path) { + // FIXME: take security attributes into account. + SmallString<128> PathStorage; + SmallVector PathUtf16; - status.fileSize = fi.nFileSizeHigh; - status.fileSize <<= sizeof(fi.nFileSizeHigh)*8; - status.fileSize += fi.nFileSizeLow; + if (UTF8ToUTF16(Path.toStringRef(PathStorage), PathUtf16)) + return false; - status.mode = fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY ? 0555 : 0777; - status.user = 9999; // Not applicable to Windows, so... - status.group = 9999; // Not applicable to Windows, so... + DWORD Attr = ::GetFileAttributesW(PathUtf16.begin()); + return (Attr != INVALID_FILE_ATTRIBUTES) && !(Attr & FILE_ATTRIBUTE_READONLY); +} - // FIXME: this is only unique if the file is accessed by the same file path. - // How do we do this for C:\dir\file and ..\dir\file ? Unix has inode - // numbers, but the concept doesn't exist in Windows. - status.uniqueID = 0; - for (unsigned i = 0; i < path.length(); ++i) - status.uniqueID += path[i]; +bool can_execute(const Twine &Path) { + SmallString<128> PathStorage; + SmallVector PathUtf16; - ULARGE_INTEGER ui; - ui.LowPart = fi.ftLastWriteTime.dwLowDateTime; - ui.HighPart = fi.ftLastWriteTime.dwHighDateTime; - status.modTime.fromWin32Time(ui.QuadPart); + if (UTF8ToUTF16(Path.toStringRef(PathStorage), PathUtf16)) + return false; - status.isDir = fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY; - fsIsValid = true; - } - return &status; + DWORD Attr = ::GetFileAttributesW(PathUtf16.begin()); + return Attr != INVALID_FILE_ATTRIBUTES; } -bool Path::makeReadableOnDisk(std::string* ErrMsg) { - // All files are readable on Windows (ignoring security attributes). - return false; +bool equivalent(file_status A, file_status B) { + assert(status_known(A) && status_known(B)); + return A.FileIndexHigh == B.FileIndexHigh && + A.FileIndexLow == B.FileIndexLow && + A.FileSizeHigh == B.FileSizeHigh && + A.FileSizeLow == B.FileSizeLow && + A.LastWriteTimeHigh == B.LastWriteTimeHigh && + A.LastWriteTimeLow == B.LastWriteTimeLow && + A.VolumeSerialNumber == B.VolumeSerialNumber; } -bool Path::makeWriteableOnDisk(std::string* ErrMsg) { - DWORD attr = GetFileAttributes(path.c_str()); +error_code equivalent(const Twine &A, const Twine &B, bool &result) { + file_status fsA, fsB; + if (error_code ec = status(A, fsA)) return ec; + if (error_code ec = status(B, fsB)) return ec; + result = equivalent(fsA, fsB); + return error_code::success(); +} - // If it doesn't exist, we're done. - if (attr == INVALID_FILE_ATTRIBUTES) - return false; +static bool isReservedName(StringRef path) { + // This list of reserved names comes from MSDN, at: + // http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx + static const char *sReservedNames[] = { "nul", "con", "prn", "aux", + "com1", "com2", "com3", "com4", "com5", "com6", + "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", + "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9" }; + + // First, check to see if this is a device namespace, which always + // starts with \\.\, since device namespaces are not legal file paths. + if (path.startswith("\\\\.\\")) + return true; - if (attr & FILE_ATTRIBUTE_READONLY) { - if (!SetFileAttributes(path.c_str(), attr & ~FILE_ATTRIBUTE_READONLY)) { - MakeErrMsg(ErrMsg, std::string(path) + ": Can't make file writable: "); + // Then compare against the list of ancient reserved names + for (size_t i = 0; i < array_lengthof(sReservedNames); ++i) { + if (path.equals_lower(sReservedNames[i])) return true; - } } - return false; -} -bool Path::makeExecutableOnDisk(std::string* ErrMsg) { - // All files are executable on Windows (ignoring security attributes). + // The path isn't what we consider reserved. return false; } -bool -Path::getDirectoryContents(std::set& result, std::string* ErrMsg) const { - WIN32_FILE_ATTRIBUTE_DATA fi; - if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) { - MakeErrMsg(ErrMsg, path + ": can't get status of file"); - return true; +static error_code getStatus(HANDLE FileHandle, file_status &Result) { + if (FileHandle == INVALID_HANDLE_VALUE) + goto handle_status_error; + + switch (::GetFileType(FileHandle)) { + default: + llvm_unreachable("Don't know anything about this file type"); + case FILE_TYPE_UNKNOWN: { + DWORD Err = ::GetLastError(); + if (Err != NO_ERROR) + return windows_error(Err); + Result = file_status(file_type::type_unknown); + return error_code::success(); + } + case FILE_TYPE_DISK: + break; + case FILE_TYPE_CHAR: + Result = file_status(file_type::character_file); + return error_code::success(); + case FILE_TYPE_PIPE: + Result = file_status(file_type::fifo_file); + return error_code::success(); } - if (!(fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY)) { - if (ErrMsg) - *ErrMsg = path + ": not a directory"; - return true; + BY_HANDLE_FILE_INFORMATION Info; + if (!::GetFileInformationByHandle(FileHandle, &Info)) + goto handle_status_error; + + { + file_type Type = (Info.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) + ? file_type::directory_file + : file_type::regular_file; + Result = + file_status(Type, Info.ftLastWriteTime.dwHighDateTime, + Info.ftLastWriteTime.dwLowDateTime, + Info.dwVolumeSerialNumber, Info.nFileSizeHigh, + Info.nFileSizeLow, Info.nFileIndexHigh, Info.nFileIndexLow); + return error_code::success(); } - result.clear(); - WIN32_FIND_DATA fd; - std::string searchpath = path; - if (path.size() == 0 || searchpath[path.size()-1] == '/') - searchpath += "*"; +handle_status_error: + error_code EC = windows_error(::GetLastError()); + if (EC == windows_error::file_not_found || + EC == windows_error::path_not_found) + Result = file_status(file_type::file_not_found); + else if (EC == windows_error::sharing_violation) + Result = file_status(file_type::type_unknown); else - searchpath += "/*"; + Result = file_status(file_type::status_error); + return EC; +} - HANDLE h = FindFirstFile(searchpath.c_str(), &fd); - if (h == INVALID_HANDLE_VALUE) { - if (GetLastError() == ERROR_FILE_NOT_FOUND) - return true; // not really an error, now is it? - MakeErrMsg(ErrMsg, path + ": Can't read directory: "); - return true; - } +error_code status(const Twine &path, file_status &result) { + SmallString<128> path_storage; + SmallVector path_utf16; - do { - if (fd.cFileName[0] == '.') - continue; - Path aPath(path); - aPath.appendComponent(&fd.cFileName[0]); - result.insert(aPath); - } while (FindNextFile(h, &fd)); - - DWORD err = GetLastError(); - FindClose(h); - if (err != ERROR_NO_MORE_FILES) { - SetLastError(err); - MakeErrMsg(ErrMsg, path + ": Can't read directory: "); - return true; + StringRef path8 = path.toStringRef(path_storage); + if (isReservedName(path8)) { + result = file_status(file_type::character_file); + return error_code::success(); } - return false; -} -bool -Path::set(StringRef a_path) { - if (a_path.empty()) - return false; - std::string save(path); - path = a_path; - FlipBackSlashes(path); - if (!isValid()) { - path = save; - return false; + if (error_code ec = UTF8ToUTF16(path8, path_utf16)) + return ec; + + DWORD attr = ::GetFileAttributesW(path_utf16.begin()); + if (attr == INVALID_FILE_ATTRIBUTES) + return getStatus(INVALID_HANDLE_VALUE, result); + + // Handle reparse points. + if (attr & FILE_ATTRIBUTE_REPARSE_POINT) { + ScopedFileHandle h( + ::CreateFileW(path_utf16.begin(), + 0, // Attributes only. + FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, + OPEN_EXISTING, + FILE_FLAG_BACKUP_SEMANTICS, + 0)); + if (!h) + return getStatus(INVALID_HANDLE_VALUE, result); } - return true; + + ScopedFileHandle h( + ::CreateFileW(path_utf16.begin(), 0, // Attributes only. + FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, + NULL, OPEN_EXISTING, FILE_FLAG_BACKUP_SEMANTICS, 0)); + if (!h) + return getStatus(INVALID_HANDLE_VALUE, result); + + return getStatus(h, result); } -bool -Path::appendComponent(StringRef name) { - if (name.empty()) - return false; - std::string save(path); - if (!path.empty()) { - size_t last = path.size() - 1; - if (path[last] != '/') - path += '/'; - } - path += name; - if (!isValid()) { - path = save; - return false; - } - return true; +error_code status(int FD, file_status &Result) { + HANDLE FileHandle = reinterpret_cast(_get_osfhandle(FD)); + return getStatus(FileHandle, Result); } -bool -Path::eraseComponent() { - size_t slashpos = path.rfind('/',path.size()); - if (slashpos == path.size() - 1 || slashpos == std::string::npos) - return false; - std::string save(path); - path.erase(slashpos); - if (!isValid()) { - path = save; - return false; - } - return true; +error_code setLastModificationAndAccessTime(int FD, TimeValue Time) { + ULARGE_INTEGER UI; + UI.QuadPart = Time.toWin32Time(); + FILETIME FT; + FT.dwLowDateTime = UI.LowPart; + FT.dwHighDateTime = UI.HighPart; + HANDLE FileHandle = reinterpret_cast(_get_osfhandle(FD)); + if (!SetFileTime(FileHandle, NULL, &FT, &FT)) + return windows_error(::GetLastError()); + return error_code::success(); } -bool -Path::eraseSuffix() { - size_t dotpos = path.rfind('.',path.size()); - size_t slashpos = path.rfind('/',path.size()); - if (dotpos != std::string::npos) { - if (slashpos == std::string::npos || dotpos > slashpos+1) { - std::string save(path); - path.erase(dotpos, path.size()-dotpos); - if (!isValid()) { - path = save; - return false; - } - return true; - } +error_code get_magic(const Twine &path, uint32_t len, + SmallVectorImpl &result) { + SmallString<128> path_storage; + SmallVector path_utf16; + result.set_size(0); + + // Convert path to UTF-16. + if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), + path_utf16)) + return ec; + + // Open file. + HANDLE file = ::CreateFileW(c_str(path_utf16), + GENERIC_READ, + FILE_SHARE_READ, + NULL, + OPEN_EXISTING, + FILE_ATTRIBUTE_READONLY, + NULL); + if (file == INVALID_HANDLE_VALUE) + return windows_error(::GetLastError()); + + // Allocate buffer. + result.reserve(len); + + // Get magic! + DWORD bytes_read = 0; + BOOL read_success = ::ReadFile(file, result.data(), len, &bytes_read, NULL); + error_code ec = windows_error(::GetLastError()); + ::CloseHandle(file); + if (!read_success || (bytes_read != len)) { + // Set result size to the number of bytes read if it's valid. + if (bytes_read <= len) + result.set_size(bytes_read); + // ERROR_HANDLE_EOF is mapped to errc::value_too_large. + return ec; } - return false; -} -inline bool PathMsg(std::string* ErrMsg, const char* pathname, const char*msg) { - if (ErrMsg) - *ErrMsg = std::string(pathname) + ": " + std::string(msg); - return true; + result.set_size(len); + return error_code::success(); } -bool -Path::createDirectoryOnDisk(bool create_parents, std::string* ErrMsg) { - // Get a writeable copy of the path name - size_t len = path.length(); - char *pathname = reinterpret_cast(_alloca(len+2)); - path.copy(pathname, len); - pathname[len] = 0; - - // Make sure it ends with a slash. - if (len == 0 || pathname[len - 1] != '/') { - pathname[len] = '/'; - pathname[++len] = 0; +error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { + FileDescriptor = FD; + // Make sure that the requested size fits within SIZE_T. + if (Size > std::numeric_limits::max()) { + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else + ::CloseHandle(FileHandle); + return make_error_code(errc::invalid_argument); } - // Determine starting point for initial / search. - char *next = pathname; - if (pathname[0] == '/' && pathname[1] == '/') { - // Skip host name. - next = strchr(pathname+2, '/'); - if (next == NULL) - return PathMsg(ErrMsg, pathname, "badly formed remote directory"); + DWORD flprotect; + switch (Mode) { + case readonly: flprotect = PAGE_READONLY; break; + case readwrite: flprotect = PAGE_READWRITE; break; + case priv: flprotect = PAGE_WRITECOPY; break; + } - // Skip share name. - next = strchr(next+1, '/'); - if (next == NULL) - return PathMsg(ErrMsg, pathname,"badly formed remote directory"); + FileMappingHandle = + ::CreateFileMappingW(FileHandle, 0, flprotect, + (Offset + Size) >> 32, + (Offset + Size) & 0xffffffff, + 0); + if (FileMappingHandle == NULL) { + error_code ec = windows_error(GetLastError()); + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else + ::CloseHandle(FileHandle); + return ec; + } - next++; - if (*next == 0) - return PathMsg(ErrMsg, pathname, "badly formed remote directory"); + DWORD dwDesiredAccess; + switch (Mode) { + case readonly: dwDesiredAccess = FILE_MAP_READ; break; + case readwrite: dwDesiredAccess = FILE_MAP_WRITE; break; + case priv: dwDesiredAccess = FILE_MAP_COPY; break; + } + Mapping = ::MapViewOfFile(FileMappingHandle, + dwDesiredAccess, + Offset >> 32, + Offset & 0xffffffff, + Size); + if (Mapping == NULL) { + error_code ec = windows_error(GetLastError()); + ::CloseHandle(FileMappingHandle); + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else + ::CloseHandle(FileHandle); + return ec; + } - } else { - if (pathname[1] == ':') - next += 2; // skip drive letter - if (*next == '/') - next++; // skip root directory - } - - // If we're supposed to create intermediate directories - if (create_parents) { - // Loop through the directory components until we're done - while (*next) { - next = strchr(next, '/'); - *next = 0; - if (!CreateDirectory(pathname, NULL) && - GetLastError() != ERROR_ALREADY_EXISTS) - return MakeErrMsg(ErrMsg, - std::string(pathname) + ": Can't create directory: "); - *next++ = '/'; - } - } else { - // Drop trailing slash. - pathname[len-1] = 0; - if (!CreateDirectory(pathname, NULL) && - GetLastError() != ERROR_ALREADY_EXISTS) { - return MakeErrMsg(ErrMsg, std::string(pathname) + - ": Can't create directory: "); + if (Size == 0) { + MEMORY_BASIC_INFORMATION mbi; + SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi)); + if (Result == 0) { + error_code ec = windows_error(GetLastError()); + ::UnmapViewOfFile(Mapping); + ::CloseHandle(FileMappingHandle); + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); + } else + ::CloseHandle(FileHandle); + return ec; } + Size = mbi.RegionSize; } - return false; -} - -bool -Path::createFileOnDisk(std::string* ErrMsg) { - // Create the file - HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW, - FILE_ATTRIBUTE_NORMAL, NULL); - if (h == INVALID_HANDLE_VALUE) - return MakeErrMsg(ErrMsg, path + ": Can't create file: "); - CloseHandle(h); - return false; + // Close all the handles except for the view. It will keep the other handles + // alive. + ::CloseHandle(FileMappingHandle); + if (FileDescriptor) { + if (CloseFD) + _close(FileDescriptor); // Also closes FileHandle. + } else + ::CloseHandle(FileHandle); + return error_code::success(); } -bool -Path::eraseFromDisk(bool remove_contents, std::string *ErrStr) const { - WIN32_FILE_ATTRIBUTE_DATA fi; - if (!GetFileAttributesEx(path.c_str(), GetFileExInfoStandard, &fi)) - return true; +mapped_file_region::mapped_file_region(const Twine &path, + mapmode mode, + uint64_t length, + uint64_t offset, + error_code &ec) + : Mode(mode) + , Size(length) + , Mapping() + , FileDescriptor() + , FileHandle(INVALID_HANDLE_VALUE) + , FileMappingHandle() { + SmallString<128> path_storage; + SmallVector path_utf16; + + // Convert path to UTF-16. + if ((ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))) + return; - if (fi.dwFileAttributes & FILE_ATTRIBUTE_DIRECTORY) { - // If it doesn't exist, we're done. - bool Exists; - if (fs::exists(path, Exists) || !Exists) - return false; + // Get file handle for creating a file mapping. + FileHandle = ::CreateFileW(c_str(path_utf16), + Mode == readonly ? GENERIC_READ + : GENERIC_READ | GENERIC_WRITE, + Mode == readonly ? FILE_SHARE_READ + : 0, + 0, + Mode == readonly ? OPEN_EXISTING + : OPEN_ALWAYS, + Mode == readonly ? FILE_ATTRIBUTE_READONLY + : FILE_ATTRIBUTE_NORMAL, + 0); + if (FileHandle == INVALID_HANDLE_VALUE) { + ec = windows_error(::GetLastError()); + return; + } - char *pathname = reinterpret_cast(_alloca(path.length()+3)); - int lastchar = path.length() - 1 ; - path.copy(pathname, lastchar+1); - - // Make path end with '/*'. - if (pathname[lastchar] != '/') - pathname[++lastchar] = '/'; - pathname[lastchar+1] = '*'; - pathname[lastchar+2] = 0; - - if (remove_contents) { - WIN32_FIND_DATA fd; - HANDLE h = FindFirstFile(pathname, &fd); - - // It's a bad idea to alter the contents of a directory while enumerating - // its contents. So build a list of its contents first, then destroy them. - - if (h != INVALID_HANDLE_VALUE) { - std::vector list; - - do { - if (strcmp(fd.cFileName, ".") == 0) - continue; - if (strcmp(fd.cFileName, "..") == 0) - continue; - - Path aPath(path); - aPath.appendComponent(&fd.cFileName[0]); - list.push_back(aPath); - } while (FindNextFile(h, &fd)); - - DWORD err = GetLastError(); - FindClose(h); - if (err != ERROR_NO_MORE_FILES) { - SetLastError(err); - return MakeErrMsg(ErrStr, path + ": Can't read directory: "); - } - - for (std::vector::iterator I = list.begin(); I != list.end(); - ++I) { - Path &aPath = *I; - aPath.eraseFromDisk(true); - } - } else { - if (GetLastError() != ERROR_FILE_NOT_FOUND) - return MakeErrMsg(ErrStr, path + ": Can't read directory: "); - } - } + FileDescriptor = 0; + ec = init(FileDescriptor, true, offset); + if (ec) { + Mapping = FileMappingHandle = 0; + FileHandle = INVALID_HANDLE_VALUE; + FileDescriptor = 0; + } +} - pathname[lastchar] = 0; - if (!RemoveDirectory(pathname)) - return MakeErrMsg(ErrStr, - std::string(pathname) + ": Can't destroy directory: "); - return false; - } else { - // Read-only files cannot be deleted on Windows. Must remove the read-only - // attribute first. - if (fi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) { - if (!SetFileAttributes(path.c_str(), - fi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY)) - return MakeErrMsg(ErrStr, path + ": Can't destroy file: "); - } +mapped_file_region::mapped_file_region(int fd, + bool closefd, + mapmode mode, + uint64_t length, + uint64_t offset, + error_code &ec) + : Mode(mode) + , Size(length) + , Mapping() + , FileDescriptor(fd) + , FileHandle(INVALID_HANDLE_VALUE) + , FileMappingHandle() { + FileHandle = reinterpret_cast(_get_osfhandle(fd)); + if (FileHandle == INVALID_HANDLE_VALUE) { + if (closefd) + _close(FileDescriptor); + FileDescriptor = 0; + ec = make_error_code(errc::bad_file_descriptor); + return; + } - if (!DeleteFile(path.c_str())) - return MakeErrMsg(ErrStr, path + ": Can't destroy file: "); - return false; + ec = init(FileDescriptor, closefd, offset); + if (ec) { + Mapping = FileMappingHandle = 0; + FileHandle = INVALID_HANDLE_VALUE; + FileDescriptor = 0; } } -bool Path::getMagicNumber(std::string& Magic, unsigned len) const { - assert(len < 1024 && "Request for magic string too long"); - char* buf = reinterpret_cast(alloca(len)); +mapped_file_region::~mapped_file_region() { + if (Mapping) + ::UnmapViewOfFile(Mapping); +} + +#if LLVM_HAS_RVALUE_REFERENCES +mapped_file_region::mapped_file_region(mapped_file_region &&other) + : Mode(other.Mode) + , Size(other.Size) + , Mapping(other.Mapping) + , FileDescriptor(other.FileDescriptor) + , FileHandle(other.FileHandle) + , FileMappingHandle(other.FileMappingHandle) { + other.Mapping = other.FileMappingHandle = 0; + other.FileHandle = INVALID_HANDLE_VALUE; + other.FileDescriptor = 0; +} +#endif - HANDLE h = CreateFile(path.c_str(), - GENERIC_READ, - FILE_SHARE_READ, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - NULL); - if (h == INVALID_HANDLE_VALUE) - return false; +mapped_file_region::mapmode mapped_file_region::flags() const { + assert(Mapping && "Mapping failed but used anyway!"); + return Mode; +} - DWORD nRead = 0; - BOOL ret = ReadFile(h, buf, len, &nRead, NULL); - CloseHandle(h); +uint64_t mapped_file_region::size() const { + assert(Mapping && "Mapping failed but used anyway!"); + return Size; +} - if (!ret || nRead != len) - return false; +char *mapped_file_region::data() const { + assert(Mode != readonly && "Cannot get non const data for readonly mapping!"); + assert(Mapping && "Mapping failed but used anyway!"); + return reinterpret_cast(Mapping); +} - Magic = std::string(buf, len); - return true; +const char *mapped_file_region::const_data() const { + assert(Mapping && "Mapping failed but used anyway!"); + return reinterpret_cast(Mapping); } -bool -Path::renamePathOnDisk(const Path& newName, std::string* ErrMsg) { - if (!MoveFileEx(path.c_str(), newName.c_str(), MOVEFILE_REPLACE_EXISTING)) - return MakeErrMsg(ErrMsg, "Can't move '" + path + "' to '" + newName.path - + "': "); - return false; +int mapped_file_region::alignment() { + SYSTEM_INFO SysInfo; + ::GetSystemInfo(&SysInfo); + return SysInfo.dwAllocationGranularity; } -bool -Path::setStatusInfoOnDisk(const FileStatus &si, std::string *ErrMsg) const { - // FIXME: should work on directories also. - if (!si.isFile) { - return true; - } +error_code detail::directory_iterator_construct(detail::DirIterState &it, + StringRef path){ + SmallVector path_utf16; - HANDLE h = CreateFile(path.c_str(), - FILE_READ_ATTRIBUTES | FILE_WRITE_ATTRIBUTES, - FILE_SHARE_READ | FILE_SHARE_WRITE | FILE_SHARE_DELETE, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_NORMAL, - NULL); - if (h == INVALID_HANDLE_VALUE) - return true; + if (error_code ec = UTF8ToUTF16(path, + path_utf16)) + return ec; - BY_HANDLE_FILE_INFORMATION bhfi; - if (!GetFileInformationByHandle(h, &bhfi)) { - DWORD err = GetLastError(); - CloseHandle(h); - SetLastError(err); - return MakeErrMsg(ErrMsg, path + ": GetFileInformationByHandle: "); - } - - ULARGE_INTEGER ui; - ui.QuadPart = si.modTime.toWin32Time(); - FILETIME ft; - ft.dwLowDateTime = ui.LowPart; - ft.dwHighDateTime = ui.HighPart; - BOOL ret = SetFileTime(h, NULL, &ft, &ft); - DWORD err = GetLastError(); - CloseHandle(h); - if (!ret) { - SetLastError(err); - return MakeErrMsg(ErrMsg, path + ": SetFileTime: "); - } - - // Best we can do with Unix permission bits is to interpret the owner - // writable bit. - if (si.mode & 0200) { - if (bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY) { - if (!SetFileAttributes(path.c_str(), - bhfi.dwFileAttributes & ~FILE_ATTRIBUTE_READONLY)) - return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: "); - } + // Convert path to the format that Windows is happy with. + if (path_utf16.size() > 0 && + !is_separator(path_utf16[path.size() - 1]) && + path_utf16[path.size() - 1] != L':') { + path_utf16.push_back(L'\\'); + path_utf16.push_back(L'*'); } else { - if (!(bhfi.dwFileAttributes & FILE_ATTRIBUTE_READONLY)) { - if (!SetFileAttributes(path.c_str(), - bhfi.dwFileAttributes | FILE_ATTRIBUTE_READONLY)) - return MakeErrMsg(ErrMsg, path + ": SetFileAttributes: "); - } + path_utf16.push_back(L'*'); } - return false; + // Get the first directory entry. + WIN32_FIND_DATAW FirstFind; + ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind)); + if (!FindHandle) + return windows_error(::GetLastError()); + + size_t FilenameLen = ::wcslen(FirstFind.cFileName); + while ((FilenameLen == 1 && FirstFind.cFileName[0] == L'.') || + (FilenameLen == 2 && FirstFind.cFileName[0] == L'.' && + FirstFind.cFileName[1] == L'.')) + if (!::FindNextFileW(FindHandle, &FirstFind)) { + error_code ec = windows_error(::GetLastError()); + // Check for end. + if (ec == windows_error::no_more_files) + return detail::directory_iterator_destruct(it); + return ec; + } else + FilenameLen = ::wcslen(FirstFind.cFileName); + + // Construct the current directory entry. + SmallString<128> directory_entry_name_utf8; + if (error_code ec = UTF16ToUTF8(FirstFind.cFileName, + ::wcslen(FirstFind.cFileName), + directory_entry_name_utf8)) + return ec; + + it.IterationHandle = intptr_t(FindHandle.take()); + SmallString<128> directory_entry_path(path); + path::append(directory_entry_path, directory_entry_name_utf8.str()); + it.CurrentEntry = directory_entry(directory_entry_path.str()); + + return error_code::success(); } -bool -CopyFile(const sys::Path &Dest, const sys::Path &Src, std::string* ErrMsg) { - // Can't use CopyFile macro defined in Windows.h because it would mess up the - // above line. We use the expansion it would have in a non-UNICODE build. - if (!::CopyFileA(Src.c_str(), Dest.c_str(), false)) - return MakeErrMsg(ErrMsg, "Can't copy '" + Src.str() + - "' to '" + Dest.str() + "': "); - return false; +error_code detail::directory_iterator_destruct(detail::DirIterState &it) { + if (it.IterationHandle != 0) + // Closes the handle if it's valid. + ScopedFindHandle close(HANDLE(it.IterationHandle)); + it.IterationHandle = 0; + it.CurrentEntry = directory_entry(); + return error_code::success(); } -bool -Path::makeUnique(bool reuse_current, std::string* ErrMsg) { - bool Exists; - if (reuse_current && (fs::exists(path, Exists) || !Exists)) - return false; // File doesn't exist already, just use it! - - // Reserve space for -XXXXXX at the end. - char *FNBuffer = (char*) alloca(path.size()+8); - unsigned offset = path.size(); - path.copy(FNBuffer, offset); - - // Find a numeric suffix that isn't used by an existing file. Assume there - // won't be more than 1 million files with the same prefix. Probably a safe - // bet. - static int FCounter = -1; - if (FCounter < 0) { - // Give arbitrary initial seed. - // FIXME: We should use sys::fs::unique_file() in future. - LARGE_INTEGER cnt64; - DWORD x = GetCurrentProcessId(); - x = (x << 16) | (x >> 16); - if (QueryPerformanceCounter(&cnt64)) // RDTSC - x ^= cnt64.HighPart ^ cnt64.LowPart; - FCounter = x % 1000000; +error_code detail::directory_iterator_increment(detail::DirIterState &it) { + WIN32_FIND_DATAW FindData; + if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) { + error_code ec = windows_error(::GetLastError()); + // Check for end. + if (ec == windows_error::no_more_files) + return detail::directory_iterator_destruct(it); + return ec; } - do { - sprintf(FNBuffer+offset, "-%06u", FCounter); - if (++FCounter > 999999) - FCounter = 0; - path = FNBuffer; - } while (!fs::exists(path, Exists) && Exists); - return false; -} -bool -Path::createTemporaryFileOnDisk(bool reuse_current, std::string* ErrMsg) { - // Make this into a unique file name - makeUnique(reuse_current, ErrMsg); + size_t FilenameLen = ::wcslen(FindData.cFileName); + if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') || + (FilenameLen == 2 && FindData.cFileName[0] == L'.' && + FindData.cFileName[1] == L'.')) + return directory_iterator_increment(it); - // Now go and create it - HANDLE h = CreateFile(path.c_str(), GENERIC_WRITE, 0, NULL, CREATE_NEW, - FILE_ATTRIBUTE_NORMAL, NULL); - if (h == INVALID_HANDLE_VALUE) - return MakeErrMsg(ErrMsg, path + ": can't create file"); + SmallString<128> directory_entry_path_utf8; + if (error_code ec = UTF16ToUTF8(FindData.cFileName, + ::wcslen(FindData.cFileName), + directory_entry_path_utf8)) + return ec; - CloseHandle(h); - return false; + it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8)); + return error_code::success(); } -/// MapInFilePages - Not yet implemented on win32. -const char *Path::MapInFilePages(int FD, size_t FileSize, off_t Offset) { - return 0; +error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, + bool map_writable, void *&result) { + assert(0 && "NOT IMPLEMENTED"); + return windows_error::invalid_function; } -/// MapInFilePages - Not yet implemented on win32. -void Path::UnMapFilePages(const char *Base, size_t FileSize) { +error_code unmap_file_pages(void *base, size_t size) { assert(0 && "NOT IMPLEMENTED"); + return windows_error::invalid_function; +} + +error_code openFileForRead(const Twine &Name, int &ResultFD) { + SmallString<128> PathStorage; + SmallVector PathUTF16; + + if (error_code EC = UTF8ToUTF16(Name.toStringRef(PathStorage), + PathUTF16)) + return EC; + + HANDLE H = ::CreateFileW(PathUTF16.begin(), GENERIC_READ, + FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, + OPEN_EXISTING, FILE_ATTRIBUTE_NORMAL, NULL); + if (H == INVALID_HANDLE_VALUE) { + error_code EC = windows_error(::GetLastError()); + // Provide a better error message when trying to open directories. + // This only runs if we failed to open the file, so there is probably + // no performances issues. + if (EC != windows_error::access_denied) + return EC; + if (is_directory(Name)) + return error_code(errc::is_a_directory, posix_category()); + return EC; + } + + int FD = ::_open_osfhandle(intptr_t(H), 0); + if (FD == -1) { + ::CloseHandle(H); + return windows_error::invalid_handle; + } + + ResultFD = FD; + return error_code::success(); +} + +error_code openFileForWrite(const Twine &Name, int &ResultFD, + sys::fs::OpenFlags Flags, unsigned Mode) { + // Verify that we don't have both "append" and "excl". + assert((!(Flags & sys::fs::F_Excl) || !(Flags & sys::fs::F_Append)) && + "Cannot specify both 'excl' and 'append' file creation flags!"); + + SmallString<128> PathStorage; + SmallVector PathUTF16; + + if (error_code EC = UTF8ToUTF16(Name.toStringRef(PathStorage), + PathUTF16)) + return EC; + + DWORD CreationDisposition; + if (Flags & F_Excl) + CreationDisposition = CREATE_NEW; + else if (Flags & F_Append) + CreationDisposition = OPEN_ALWAYS; + else + CreationDisposition = CREATE_ALWAYS; + + HANDLE H = ::CreateFileW(PathUTF16.begin(), GENERIC_WRITE, + FILE_SHARE_READ | FILE_SHARE_WRITE, NULL, + CreationDisposition, FILE_ATTRIBUTE_NORMAL, NULL); + + if (H == INVALID_HANDLE_VALUE) { + error_code EC = windows_error(::GetLastError()); + // Provide a better error message when trying to open directories. + // This only runs if we failed to open the file, so there is probably + // no performances issues. + if (EC != windows_error::access_denied) + return EC; + if (is_directory(Name)) + return error_code(errc::is_a_directory, posix_category()); + return EC; + } + + int OpenFlags = 0; + if (Flags & F_Append) + OpenFlags |= _O_APPEND; + + if (!(Flags & F_Binary)) + OpenFlags |= _O_TEXT; + + int FD = ::_open_osfhandle(intptr_t(H), OpenFlags); + if (FD == -1) { + ::CloseHandle(H); + return windows_error::invalid_handle; + } + + ResultFD = FD; + return error_code::success(); } +} // end namespace fs +namespace windows { +llvm::error_code UTF8ToUTF16(llvm::StringRef utf8, + llvm::SmallVectorImpl &utf16) { + int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, + utf8.begin(), utf8.size(), + utf16.begin(), 0); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + utf16.reserve(len + 1); + utf16.set_size(len); + + len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, + utf8.begin(), utf8.size(), + utf16.begin(), utf16.size()); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + // Make utf16 null terminated. + utf16.push_back(0); + utf16.pop_back(); + + return llvm::error_code::success(); } + +llvm::error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, + llvm::SmallVectorImpl &utf8) { + // Get length. + int len = ::WideCharToMultiByte(CP_UTF8, 0, + utf16, utf16_len, + utf8.begin(), 0, + NULL, NULL); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + utf8.reserve(len); + utf8.set_size(len); + + // Now do the actual conversion. + len = ::WideCharToMultiByte(CP_UTF8, 0, + utf16, utf16_len, + utf8.data(), utf8.size(), + NULL, NULL); + + if (len == 0) + return llvm::windows_error(::GetLastError()); + + // Make utf8 null terminated. + utf8.push_back(0); + utf8.pop_back(); + + return llvm::error_code::success(); } +} // end namespace windows +} // end namespace sys +} // end namespace llvm diff --git a/contrib/llvm/lib/Support/Windows/PathV2.inc b/contrib/llvm/lib/Support/Windows/PathV2.inc deleted file mode 100644 index 23f3d14..0000000 --- a/contrib/llvm/lib/Support/Windows/PathV2.inc +++ /dev/null @@ -1,1022 +0,0 @@ -//===- llvm/Support/Windows/PathV2.inc - Windows Path Impl ------*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the Windows specific implementation of the PathV2 API. -// -//===----------------------------------------------------------------------===// - -//===----------------------------------------------------------------------===// -//=== WARNING: Implementation here must contain only generic Windows code that -//=== is guaranteed to work on *all* Windows variants. -//===----------------------------------------------------------------------===// - -#include "Windows.h" -#include -#include -#include -#include - -#undef max - -// MinGW doesn't define this. -#ifndef _ERRNO_T_DEFINED -#define _ERRNO_T_DEFINED -typedef int errno_t; -#endif - -using namespace llvm; - -namespace { - typedef BOOLEAN (WINAPI *PtrCreateSymbolicLinkW)( - /*__in*/ LPCWSTR lpSymlinkFileName, - /*__in*/ LPCWSTR lpTargetFileName, - /*__in*/ DWORD dwFlags); - - PtrCreateSymbolicLinkW create_symbolic_link_api = PtrCreateSymbolicLinkW( - ::GetProcAddress(::GetModuleHandleA("kernel32.dll"), - "CreateSymbolicLinkW")); - - error_code UTF8ToUTF16(StringRef utf8, SmallVectorImpl &utf16) { - int len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, - utf8.begin(), utf8.size(), - utf16.begin(), 0); - - if (len == 0) - return windows_error(::GetLastError()); - - utf16.reserve(len + 1); - utf16.set_size(len); - - len = ::MultiByteToWideChar(CP_UTF8, MB_ERR_INVALID_CHARS, - utf8.begin(), utf8.size(), - utf16.begin(), utf16.size()); - - if (len == 0) - return windows_error(::GetLastError()); - - // Make utf16 null terminated. - utf16.push_back(0); - utf16.pop_back(); - - return error_code::success(); - } - - error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, - SmallVectorImpl &utf8) { - // Get length. - int len = ::WideCharToMultiByte(CP_UTF8, 0, - utf16, utf16_len, - utf8.begin(), 0, - NULL, NULL); - - if (len == 0) - return windows_error(::GetLastError()); - - utf8.reserve(len); - utf8.set_size(len); - - // Now do the actual conversion. - len = ::WideCharToMultiByte(CP_UTF8, 0, - utf16, utf16_len, - utf8.data(), utf8.size(), - NULL, NULL); - - if (len == 0) - return windows_error(::GetLastError()); - - // Make utf8 null terminated. - utf8.push_back(0); - utf8.pop_back(); - - return error_code::success(); - } - - error_code TempDir(SmallVectorImpl &result) { - retry_temp_dir: - DWORD len = ::GetTempPathW(result.capacity(), result.begin()); - - if (len == 0) - return windows_error(::GetLastError()); - - if (len > result.capacity()) { - result.reserve(len); - goto retry_temp_dir; - } - - result.set_size(len); - return error_code::success(); - } - - bool is_separator(const wchar_t value) { - switch (value) { - case L'\\': - case L'/': - return true; - default: - return false; - } - } -} - -namespace llvm { -namespace sys { -namespace fs { - -error_code current_path(SmallVectorImpl &result) { - SmallVector cur_path; - cur_path.reserve(128); -retry_cur_dir: - DWORD len = ::GetCurrentDirectoryW(cur_path.capacity(), cur_path.data()); - - // A zero return value indicates a failure other than insufficient space. - if (len == 0) - return windows_error(::GetLastError()); - - // If there's insufficient space, the len returned is larger than the len - // given. - if (len > cur_path.capacity()) { - cur_path.reserve(len); - goto retry_cur_dir; - } - - cur_path.set_size(len); - // cur_path now holds the current directory in utf-16. Convert to utf-8. - - // Find out how much space we need. Sadly, this function doesn't return the - // size needed unless you tell it the result size is 0, which means you - // _always_ have to call it twice. - len = ::WideCharToMultiByte(CP_UTF8, 0, - cur_path.data(), cur_path.size(), - result.data(), 0, - NULL, NULL); - - if (len == 0) - return make_error_code(windows_error(::GetLastError())); - - result.reserve(len); - result.set_size(len); - // Now do the actual conversion. - len = ::WideCharToMultiByte(CP_UTF8, 0, - cur_path.data(), cur_path.size(), - result.data(), result.size(), - NULL, NULL); - if (len == 0) - return windows_error(::GetLastError()); - - return error_code::success(); -} - -error_code copy_file(const Twine &from, const Twine &to, copy_option copt) { - // Get arguments. - SmallString<128> from_storage; - SmallString<128> to_storage; - StringRef f = from.toStringRef(from_storage); - StringRef t = to.toStringRef(to_storage); - - // Convert to utf-16. - SmallVector wide_from; - SmallVector wide_to; - if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec; - if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec; - - // Copy the file. - BOOL res = ::CopyFileW(wide_from.begin(), wide_to.begin(), - copt != copy_option::overwrite_if_exists); - - if (res == 0) - return windows_error(::GetLastError()); - - return error_code::success(); -} - -error_code create_directory(const Twine &path, bool &existed) { - SmallString<128> path_storage; - SmallVector path_utf16; - - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) - return ec; - - if (!::CreateDirectoryW(path_utf16.begin(), NULL)) { - error_code ec = windows_error(::GetLastError()); - if (ec == windows_error::already_exists) - existed = true; - else - return ec; - } else - existed = false; - - return error_code::success(); -} - -error_code create_hard_link(const Twine &to, const Twine &from) { - // Get arguments. - SmallString<128> from_storage; - SmallString<128> to_storage; - StringRef f = from.toStringRef(from_storage); - StringRef t = to.toStringRef(to_storage); - - // Convert to utf-16. - SmallVector wide_from; - SmallVector wide_to; - if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec; - if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec; - - if (!::CreateHardLinkW(wide_from.begin(), wide_to.begin(), NULL)) - return windows_error(::GetLastError()); - - return error_code::success(); -} - -error_code create_symlink(const Twine &to, const Twine &from) { - // Only do it if the function is available at runtime. - if (!create_symbolic_link_api) - return make_error_code(errc::function_not_supported); - - // Get arguments. - SmallString<128> from_storage; - SmallString<128> to_storage; - StringRef f = from.toStringRef(from_storage); - StringRef t = to.toStringRef(to_storage); - - // Convert to utf-16. - SmallVector wide_from; - SmallVector wide_to; - if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec; - if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec; - - if (!create_symbolic_link_api(wide_from.begin(), wide_to.begin(), 0)) - return windows_error(::GetLastError()); - - return error_code::success(); -} - -error_code remove(const Twine &path, bool &existed) { - SmallString<128> path_storage; - SmallVector path_utf16; - - file_status st; - if (error_code ec = status(path, st)) - return ec; - - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) - return ec; - - if (st.type() == file_type::directory_file) { - if (!::RemoveDirectoryW(c_str(path_utf16))) { - error_code ec = windows_error(::GetLastError()); - if (ec != windows_error::file_not_found) - return ec; - existed = false; - } else - existed = true; - } else { - if (!::DeleteFileW(c_str(path_utf16))) { - error_code ec = windows_error(::GetLastError()); - if (ec != windows_error::file_not_found) - return ec; - existed = false; - } else - existed = true; - } - - return error_code::success(); -} - -error_code rename(const Twine &from, const Twine &to) { - // Get arguments. - SmallString<128> from_storage; - SmallString<128> to_storage; - StringRef f = from.toStringRef(from_storage); - StringRef t = to.toStringRef(to_storage); - - // Convert to utf-16. - SmallVector wide_from; - SmallVector wide_to; - if (error_code ec = UTF8ToUTF16(f, wide_from)) return ec; - if (error_code ec = UTF8ToUTF16(t, wide_to)) return ec; - - error_code ec = error_code::success(); - for (int i = 0; i < 2000; i++) { - if (::MoveFileExW(wide_from.begin(), wide_to.begin(), - MOVEFILE_COPY_ALLOWED | MOVEFILE_REPLACE_EXISTING)) - return error_code::success(); - ec = windows_error(::GetLastError()); - if (ec != windows_error::access_denied) - break; - // Retry MoveFile() at ACCESS_DENIED. - // System scanners (eg. indexer) might open the source file when - // It is written and closed. - ::Sleep(1); - } - - return ec; -} - -error_code resize_file(const Twine &path, uint64_t size) { - SmallString<128> path_storage; - SmallVector path_utf16; - - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) - return ec; - - int fd = ::_wopen(path_utf16.begin(), O_BINARY | _O_RDWR, S_IWRITE); - if (fd == -1) - return error_code(errno, generic_category()); -#ifdef HAVE__CHSIZE_S - errno_t error = ::_chsize_s(fd, size); -#else - errno_t error = ::_chsize(fd, size); -#endif - ::close(fd); - return error_code(error, generic_category()); -} - -error_code exists(const Twine &path, bool &result) { - SmallString<128> path_storage; - SmallVector path_utf16; - - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) - return ec; - - DWORD attributes = ::GetFileAttributesW(path_utf16.begin()); - - if (attributes == INVALID_FILE_ATTRIBUTES) { - // See if the file didn't actually exist. - error_code ec = make_error_code(windows_error(::GetLastError())); - if (ec != windows_error::file_not_found && - ec != windows_error::path_not_found) - return ec; - result = false; - } else - result = true; - return error_code::success(); -} - -bool equivalent(file_status A, file_status B) { - assert(status_known(A) && status_known(B)); - return A.FileIndexHigh == B.FileIndexHigh && - A.FileIndexLow == B.FileIndexLow && - A.FileSizeHigh == B.FileSizeHigh && - A.FileSizeLow == B.FileSizeLow && - A.LastWriteTimeHigh == B.LastWriteTimeHigh && - A.LastWriteTimeLow == B.LastWriteTimeLow && - A.VolumeSerialNumber == B.VolumeSerialNumber; -} - -error_code equivalent(const Twine &A, const Twine &B, bool &result) { - file_status fsA, fsB; - if (error_code ec = status(A, fsA)) return ec; - if (error_code ec = status(B, fsB)) return ec; - result = equivalent(fsA, fsB); - return error_code::success(); -} - -error_code file_size(const Twine &path, uint64_t &result) { - SmallString<128> path_storage; - SmallVector path_utf16; - - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) - return ec; - - WIN32_FILE_ATTRIBUTE_DATA FileData; - if (!::GetFileAttributesExW(path_utf16.begin(), - ::GetFileExInfoStandard, - &FileData)) - return windows_error(::GetLastError()); - - result = - (uint64_t(FileData.nFileSizeHigh) << (sizeof(FileData.nFileSizeLow) * 8)) - + FileData.nFileSizeLow; - - return error_code::success(); -} - -static bool isReservedName(StringRef path) { - // This list of reserved names comes from MSDN, at: - // http://msdn.microsoft.com/en-us/library/aa365247%28v=vs.85%29.aspx - static const char *sReservedNames[] = { "nul", "con", "prn", "aux", - "com1", "com2", "com3", "com4", "com5", "com6", - "com7", "com8", "com9", "lpt1", "lpt2", "lpt3", - "lpt4", "lpt5", "lpt6", "lpt7", "lpt8", "lpt9" }; - - // First, check to see if this is a device namespace, which always - // starts with \\.\, since device namespaces are not legal file paths. - if (path.startswith("\\\\.\\")) - return true; - - // Then compare against the list of ancient reserved names - for (size_t i = 0; i < sizeof(sReservedNames) / sizeof(const char *); ++i) { - if (path.equals_lower(sReservedNames[i])) - return true; - } - - // The path isn't what we consider reserved. - return false; -} - -error_code status(const Twine &path, file_status &result) { - SmallString<128> path_storage; - SmallVector path_utf16; - - StringRef path8 = path.toStringRef(path_storage); - if (isReservedName(path8)) { - result = file_status(file_type::character_file); - return error_code::success(); - } - - if (error_code ec = UTF8ToUTF16(path8, path_utf16)) - return ec; - - DWORD attr = ::GetFileAttributesW(path_utf16.begin()); - if (attr == INVALID_FILE_ATTRIBUTES) - goto handle_status_error; - - // Handle reparse points. - if (attr & FILE_ATTRIBUTE_REPARSE_POINT) { - ScopedFileHandle h( - ::CreateFileW(path_utf16.begin(), - 0, // Attributes only. - FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, - NULL, - OPEN_EXISTING, - FILE_FLAG_BACKUP_SEMANTICS, - 0)); - if (!h) - goto handle_status_error; - } - - if (attr & FILE_ATTRIBUTE_DIRECTORY) - result = file_status(file_type::directory_file); - else { - result = file_status(file_type::regular_file); - ScopedFileHandle h( - ::CreateFileW(path_utf16.begin(), - 0, // Attributes only. - FILE_SHARE_DELETE | FILE_SHARE_READ | FILE_SHARE_WRITE, - NULL, - OPEN_EXISTING, - FILE_FLAG_BACKUP_SEMANTICS, - 0)); - if (!h) - goto handle_status_error; - BY_HANDLE_FILE_INFORMATION Info; - if (!::GetFileInformationByHandle(h, &Info)) - goto handle_status_error; - result.FileIndexHigh = Info.nFileIndexHigh; - result.FileIndexLow = Info.nFileIndexLow; - result.FileSizeHigh = Info.nFileSizeHigh; - result.FileSizeLow = Info.nFileSizeLow; - result.LastWriteTimeHigh = Info.ftLastWriteTime.dwHighDateTime; - result.LastWriteTimeLow = Info.ftLastWriteTime.dwLowDateTime; - result.VolumeSerialNumber = Info.dwVolumeSerialNumber; - } - - return error_code::success(); - -handle_status_error: - error_code ec = windows_error(::GetLastError()); - if (ec == windows_error::file_not_found || - ec == windows_error::path_not_found) - result = file_status(file_type::file_not_found); - else if (ec == windows_error::sharing_violation) - result = file_status(file_type::type_unknown); - else { - result = file_status(file_type::status_error); - return ec; - } - - return error_code::success(); -} - - -// Modifies permissions on a file. -error_code permissions(const Twine &path, perms prms) { -#if 0 // verify code below before enabling: - // If the permissions bits are not trying to modify - // "write" permissions, there is nothing to do. - if (!(prms & (owner_write|group_write|others_write))) - return error_code::success(); - - SmallString<128> path_storage; - SmallVector path_utf16; - - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) - return ec; - - DWORD attributes = ::GetFileAttributesW(path_utf16.begin()); - - if (prms & add_perms) { - attributes &= ~FILE_ATTRIBUTE_READONLY; - } - else if (prms & remove_perms) { - attributes |= FILE_ATTRIBUTE_READONLY; - } - else { - assert(0 && "neither add_perms or remove_perms is set"); - } - - if ( ! ::SetFileAttributesW(path_utf16.begin(), attributes)) - return windows_error(::GetLastError()); -#endif - return error_code::success(); -} - - -// FIXME: mode should be used here and default to user r/w only, -// it currently comes in as a UNIX mode. -error_code unique_file(const Twine &model, int &result_fd, - SmallVectorImpl &result_path, - bool makeAbsolute, unsigned mode) { - // Use result_path as temp storage. - result_path.set_size(0); - StringRef m = model.toStringRef(result_path); - - SmallVector model_utf16; - if (error_code ec = UTF8ToUTF16(m, model_utf16)) return ec; - - if (makeAbsolute) { - // Make model absolute by prepending a temp directory if it's not already. - bool absolute = path::is_absolute(m); - - if (!absolute) { - SmallVector temp_dir; - if (error_code ec = TempDir(temp_dir)) return ec; - // Handle c: by removing it. - if (model_utf16.size() > 2 && model_utf16[1] == L':') { - model_utf16.erase(model_utf16.begin(), model_utf16.begin() + 2); - } - model_utf16.insert(model_utf16.begin(), temp_dir.begin(), temp_dir.end()); - } - } - - // Replace '%' with random chars. From here on, DO NOT modify model. It may be - // needed if the randomly chosen path already exists. - SmallVector random_path_utf16; - - // Get a Crypto Provider for CryptGenRandom. - HCRYPTPROV HCPC; - if (!::CryptAcquireContextW(&HCPC, - NULL, - NULL, - PROV_RSA_FULL, - CRYPT_VERIFYCONTEXT)) - return windows_error(::GetLastError()); - ScopedCryptContext CryptoProvider(HCPC); - -retry_random_path: - random_path_utf16.set_size(0); - for (SmallVectorImpl::const_iterator i = model_utf16.begin(), - e = model_utf16.end(); - i != e; ++i) { - if (*i == L'%') { - BYTE val = 0; - if (!::CryptGenRandom(CryptoProvider, 1, &val)) - return windows_error(::GetLastError()); - random_path_utf16.push_back("0123456789abcdef"[val & 15]); - } - else - random_path_utf16.push_back(*i); - } - // Make random_path_utf16 null terminated. - random_path_utf16.push_back(0); - random_path_utf16.pop_back(); - - // Make sure we don't fall into an infinite loop by constantly trying - // to create the parent path. - bool TriedToCreateParent = false; - - // Try to create + open the path. -retry_create_file: - HANDLE TempFileHandle = ::CreateFileW(random_path_utf16.begin(), - GENERIC_READ | GENERIC_WRITE, - FILE_SHARE_READ, - NULL, - // Return ERROR_FILE_EXISTS if the file - // already exists. - CREATE_NEW, - FILE_ATTRIBUTE_TEMPORARY, - NULL); - if (TempFileHandle == INVALID_HANDLE_VALUE) { - // If the file existed, try again, otherwise, error. - error_code ec = windows_error(::GetLastError()); - if (ec == windows_error::file_exists) - goto retry_random_path; - // Check for non-existing parent directories. - if (ec == windows_error::path_not_found && !TriedToCreateParent) { - TriedToCreateParent = true; - - // Create the directories using result_path as temp storage. - if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(), - random_path_utf16.size(), result_path)) - return ec; - StringRef p(result_path.begin(), result_path.size()); - SmallString<64> dir_to_create; - for (path::const_iterator i = path::begin(p), - e = --path::end(p); i != e; ++i) { - path::append(dir_to_create, *i); - bool Exists; - if (error_code ec = exists(Twine(dir_to_create), Exists)) return ec; - if (!Exists) { - // If c: doesn't exist, bail. - if (i->endswith(":")) - return ec; - - SmallVector dir_to_create_utf16; - if (error_code ec = UTF8ToUTF16(dir_to_create, dir_to_create_utf16)) - return ec; - - // Create the directory. - if (!::CreateDirectoryW(dir_to_create_utf16.begin(), NULL)) - return windows_error(::GetLastError()); - } - } - goto retry_create_file; - } - return ec; - } - - // Set result_path to the utf-8 representation of the path. - if (error_code ec = UTF16ToUTF8(random_path_utf16.begin(), - random_path_utf16.size(), result_path)) { - ::CloseHandle(TempFileHandle); - ::DeleteFileW(random_path_utf16.begin()); - return ec; - } - - // Convert the Windows API file handle into a C-runtime handle. - int fd = ::_open_osfhandle(intptr_t(TempFileHandle), 0); - if (fd == -1) { - ::CloseHandle(TempFileHandle); - ::DeleteFileW(random_path_utf16.begin()); - // MSDN doesn't say anything about _open_osfhandle setting errno or - // GetLastError(), so just return invalid_handle. - return windows_error::invalid_handle; - } - - result_fd = fd; - return error_code::success(); -} - -error_code get_magic(const Twine &path, uint32_t len, - SmallVectorImpl &result) { - SmallString<128> path_storage; - SmallVector path_utf16; - result.set_size(0); - - // Convert path to UTF-16. - if (error_code ec = UTF8ToUTF16(path.toStringRef(path_storage), - path_utf16)) - return ec; - - // Open file. - HANDLE file = ::CreateFileW(c_str(path_utf16), - GENERIC_READ, - FILE_SHARE_READ, - NULL, - OPEN_EXISTING, - FILE_ATTRIBUTE_READONLY, - NULL); - if (file == INVALID_HANDLE_VALUE) - return windows_error(::GetLastError()); - - // Allocate buffer. - result.reserve(len); - - // Get magic! - DWORD bytes_read = 0; - BOOL read_success = ::ReadFile(file, result.data(), len, &bytes_read, NULL); - error_code ec = windows_error(::GetLastError()); - ::CloseHandle(file); - if (!read_success || (bytes_read != len)) { - // Set result size to the number of bytes read if it's valid. - if (bytes_read <= len) - result.set_size(bytes_read); - // ERROR_HANDLE_EOF is mapped to errc::value_too_large. - return ec; - } - - result.set_size(len); - return error_code::success(); -} - -error_code mapped_file_region::init(int FD, bool CloseFD, uint64_t Offset) { - FileDescriptor = FD; - // Make sure that the requested size fits within SIZE_T. - if (Size > std::numeric_limits::max()) { - if (FileDescriptor) { - if (CloseFD) - _close(FileDescriptor); - } else - ::CloseHandle(FileHandle); - return make_error_code(errc::invalid_argument); - } - - DWORD flprotect; - switch (Mode) { - case readonly: flprotect = PAGE_READONLY; break; - case readwrite: flprotect = PAGE_READWRITE; break; - case priv: flprotect = PAGE_WRITECOPY; break; - default: llvm_unreachable("invalid mapping mode"); - } - - FileMappingHandle = ::CreateFileMapping(FileHandle, - 0, - flprotect, - Size >> 32, - Size & 0xffffffff, - 0); - if (FileMappingHandle == NULL) { - error_code ec = windows_error(GetLastError()); - if (FileDescriptor) { - if (CloseFD) - _close(FileDescriptor); - } else - ::CloseHandle(FileHandle); - return ec; - } - - DWORD dwDesiredAccess; - switch (Mode) { - case readonly: dwDesiredAccess = FILE_MAP_READ; break; - case readwrite: dwDesiredAccess = FILE_MAP_WRITE; break; - case priv: dwDesiredAccess = FILE_MAP_COPY; break; - default: llvm_unreachable("invalid mapping mode"); - } - Mapping = ::MapViewOfFile(FileMappingHandle, - dwDesiredAccess, - Offset >> 32, - Offset & 0xffffffff, - Size); - if (Mapping == NULL) { - error_code ec = windows_error(GetLastError()); - ::CloseHandle(FileMappingHandle); - if (FileDescriptor) { - if (CloseFD) - _close(FileDescriptor); - } else - ::CloseHandle(FileHandle); - return ec; - } - - if (Size == 0) { - MEMORY_BASIC_INFORMATION mbi; - SIZE_T Result = VirtualQuery(Mapping, &mbi, sizeof(mbi)); - if (Result == 0) { - error_code ec = windows_error(GetLastError()); - ::UnmapViewOfFile(Mapping); - ::CloseHandle(FileMappingHandle); - if (FileDescriptor) { - if (CloseFD) - _close(FileDescriptor); - } else - ::CloseHandle(FileHandle); - return ec; - } - Size = mbi.RegionSize; - } - - // Close all the handles except for the view. It will keep the other handles - // alive. - ::CloseHandle(FileMappingHandle); - if (FileDescriptor) { - if (CloseFD) - _close(FileDescriptor); // Also closes FileHandle. - } else - ::CloseHandle(FileHandle); - return error_code::success(); -} - -mapped_file_region::mapped_file_region(const Twine &path, - mapmode mode, - uint64_t length, - uint64_t offset, - error_code &ec) - : Mode(mode) - , Size(length) - , Mapping() - , FileDescriptor() - , FileHandle(INVALID_HANDLE_VALUE) - , FileMappingHandle() { - SmallString<128> path_storage; - SmallVector path_utf16; - - // Convert path to UTF-16. - if ((ec = UTF8ToUTF16(path.toStringRef(path_storage), path_utf16))) - return; - - // Get file handle for creating a file mapping. - FileHandle = ::CreateFileW(c_str(path_utf16), - Mode == readonly ? GENERIC_READ - : GENERIC_READ | GENERIC_WRITE, - Mode == readonly ? FILE_SHARE_READ - : 0, - 0, - Mode == readonly ? OPEN_EXISTING - : OPEN_ALWAYS, - Mode == readonly ? FILE_ATTRIBUTE_READONLY - : FILE_ATTRIBUTE_NORMAL, - 0); - if (FileHandle == INVALID_HANDLE_VALUE) { - ec = windows_error(::GetLastError()); - return; - } - - FileDescriptor = 0; - ec = init(FileDescriptor, true, offset); - if (ec) { - Mapping = FileMappingHandle = 0; - FileHandle = INVALID_HANDLE_VALUE; - FileDescriptor = 0; - } -} - -mapped_file_region::mapped_file_region(int fd, - bool closefd, - mapmode mode, - uint64_t length, - uint64_t offset, - error_code &ec) - : Mode(mode) - , Size(length) - , Mapping() - , FileDescriptor(fd) - , FileHandle(INVALID_HANDLE_VALUE) - , FileMappingHandle() { - FileHandle = reinterpret_cast(_get_osfhandle(fd)); - if (FileHandle == INVALID_HANDLE_VALUE) { - if (closefd) - _close(FileDescriptor); - FileDescriptor = 0; - ec = make_error_code(errc::bad_file_descriptor); - return; - } - - ec = init(FileDescriptor, closefd, offset); - if (ec) { - Mapping = FileMappingHandle = 0; - FileHandle = INVALID_HANDLE_VALUE; - FileDescriptor = 0; - } -} - -mapped_file_region::~mapped_file_region() { - if (Mapping) - ::UnmapViewOfFile(Mapping); -} - -#if LLVM_HAS_RVALUE_REFERENCES -mapped_file_region::mapped_file_region(mapped_file_region &&other) - : Mode(other.Mode) - , Size(other.Size) - , Mapping(other.Mapping) - , FileDescriptor(other.FileDescriptor) - , FileHandle(other.FileHandle) - , FileMappingHandle(other.FileMappingHandle) { - other.Mapping = other.FileMappingHandle = 0; - other.FileHandle = INVALID_HANDLE_VALUE; - other.FileDescriptor = 0; -} -#endif - -mapped_file_region::mapmode mapped_file_region::flags() const { - assert(Mapping && "Mapping failed but used anyway!"); - return Mode; -} - -uint64_t mapped_file_region::size() const { - assert(Mapping && "Mapping failed but used anyway!"); - return Size; -} - -char *mapped_file_region::data() const { - assert(Mode != readonly && "Cannot get non const data for readonly mapping!"); - assert(Mapping && "Mapping failed but used anyway!"); - return reinterpret_cast(Mapping); -} - -const char *mapped_file_region::const_data() const { - assert(Mapping && "Mapping failed but used anyway!"); - return reinterpret_cast(Mapping); -} - -int mapped_file_region::alignment() { - SYSTEM_INFO SysInfo; - ::GetSystemInfo(&SysInfo); - return SysInfo.dwAllocationGranularity; -} - -error_code detail::directory_iterator_construct(detail::DirIterState &it, - StringRef path){ - SmallVector path_utf16; - - if (error_code ec = UTF8ToUTF16(path, - path_utf16)) - return ec; - - // Convert path to the format that Windows is happy with. - if (path_utf16.size() > 0 && - !is_separator(path_utf16[path.size() - 1]) && - path_utf16[path.size() - 1] != L':') { - path_utf16.push_back(L'\\'); - path_utf16.push_back(L'*'); - } else { - path_utf16.push_back(L'*'); - } - - // Get the first directory entry. - WIN32_FIND_DATAW FirstFind; - ScopedFindHandle FindHandle(::FindFirstFileW(c_str(path_utf16), &FirstFind)); - if (!FindHandle) - return windows_error(::GetLastError()); - - size_t FilenameLen = ::wcslen(FirstFind.cFileName); - while ((FilenameLen == 1 && FirstFind.cFileName[0] == L'.') || - (FilenameLen == 2 && FirstFind.cFileName[0] == L'.' && - FirstFind.cFileName[1] == L'.')) - if (!::FindNextFileW(FindHandle, &FirstFind)) { - error_code ec = windows_error(::GetLastError()); - // Check for end. - if (ec == windows_error::no_more_files) - return detail::directory_iterator_destruct(it); - return ec; - } else - FilenameLen = ::wcslen(FirstFind.cFileName); - - // Construct the current directory entry. - SmallString<128> directory_entry_name_utf8; - if (error_code ec = UTF16ToUTF8(FirstFind.cFileName, - ::wcslen(FirstFind.cFileName), - directory_entry_name_utf8)) - return ec; - - it.IterationHandle = intptr_t(FindHandle.take()); - SmallString<128> directory_entry_path(path); - path::append(directory_entry_path, directory_entry_name_utf8.str()); - it.CurrentEntry = directory_entry(directory_entry_path.str()); - - return error_code::success(); -} - -error_code detail::directory_iterator_destruct(detail::DirIterState &it) { - if (it.IterationHandle != 0) - // Closes the handle if it's valid. - ScopedFindHandle close(HANDLE(it.IterationHandle)); - it.IterationHandle = 0; - it.CurrentEntry = directory_entry(); - return error_code::success(); -} - -error_code detail::directory_iterator_increment(detail::DirIterState &it) { - WIN32_FIND_DATAW FindData; - if (!::FindNextFileW(HANDLE(it.IterationHandle), &FindData)) { - error_code ec = windows_error(::GetLastError()); - // Check for end. - if (ec == windows_error::no_more_files) - return detail::directory_iterator_destruct(it); - return ec; - } - - size_t FilenameLen = ::wcslen(FindData.cFileName); - if ((FilenameLen == 1 && FindData.cFileName[0] == L'.') || - (FilenameLen == 2 && FindData.cFileName[0] == L'.' && - FindData.cFileName[1] == L'.')) - return directory_iterator_increment(it); - - SmallString<128> directory_entry_path_utf8; - if (error_code ec = UTF16ToUTF8(FindData.cFileName, - ::wcslen(FindData.cFileName), - directory_entry_path_utf8)) - return ec; - - it.CurrentEntry.replace_filename(Twine(directory_entry_path_utf8)); - return error_code::success(); -} - -error_code map_file_pages(const Twine &path, off_t file_offset, size_t size, - bool map_writable, void *&result) { - assert(0 && "NOT IMPLEMENTED"); - return windows_error::invalid_function; -} - -error_code unmap_file_pages(void *base, size_t size) { - assert(0 && "NOT IMPLEMENTED"); - return windows_error::invalid_function; -} - - - -} // end namespace fs -} // end namespace sys -} // end namespace llvm diff --git a/contrib/llvm/lib/Support/Windows/Process.inc b/contrib/llvm/lib/Support/Windows/Process.inc index ad94128..f9a3db9 100644 --- a/contrib/llvm/lib/Support/Windows/Process.inc +++ b/contrib/llvm/lib/Support/Windows/Process.inc @@ -11,18 +11,25 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/Allocator.h" + #include "Windows.h" #include #include #include #include +#include #ifdef __MINGW32__ #if (HAVE_LIBPSAPI != 1) #error "libpsapi.a should be present" #endif + #if (HAVE_LIBSHELL32 != 1) + #error "libshell32.a should be present" + #endif #else #pragma comment(lib, "psapi.lib") + #pragma comment(lib, "shell32.lib") #endif //===----------------------------------------------------------------------===// @@ -40,7 +47,7 @@ using namespace sys; process::id_type self_process::get_id() { - return GetCurrentProcess(); + return GetCurrentProcessId(); } static TimeValue getTimeValueFromFILETIME(FILETIME Time) { @@ -83,6 +90,8 @@ static unsigned getPageSize() { // that LLVM ought to run as 64-bits on a 64-bit system, anyway. SYSTEM_INFO info; GetSystemInfo(&info); + // FIXME: FileOffset in MapViewOfFile() should be aligned to not dwPageSize, + // but dwAllocationGranularity. return static_cast(info.dwPageSize); } @@ -119,28 +128,89 @@ void Process::GetTimeUsage(TimeValue &elapsed, TimeValue &user_time, sys_time = getTimeValueFromFILETIME(KernelTime); } -int Process::GetCurrentUserId() -{ - return 65536; -} - -int Process::GetCurrentGroupId() -{ - return 65536; -} - // Some LLVM programs such as bugpoint produce core files as a normal part of -// their operation. To prevent the disk from filling up, this configuration item -// does what's necessary to prevent their generation. +// their operation. To prevent the disk from filling up, this configuration +// item does what's necessary to prevent their generation. void Process::PreventCoreFiles() { - // Windows doesn't do core files, but it does do modal pop-up message - // boxes. As this method is used by bugpoint, preventing these pop-ups - // is the moral equivalent of suppressing core files. + // Windows does have the concept of core files, called minidumps. However, + // disabling minidumps for a particular application extends past the lifetime + // of that application, which is the incorrect behavior for this API. + // Additionally, the APIs require elevated privileges to disable and re- + // enable minidumps, which makes this untenable. For more information, see + // WerAddExcludedApplication and WerRemoveExcludedApplication (Vista and + // later). + // + // Windows also has modal pop-up message boxes. As this method is used by + // bugpoint, preventing these pop-ups is additionally important. SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX | SEM_NOOPENFILEERRORBOX); } +/// Returns the environment variable \arg Name's value as a string encoded in +/// UTF-8. \arg Name is assumed to be in UTF-8 encoding. +Optional Process::GetEnv(StringRef Name) { + // Convert the argument to UTF-16 to pass it to _wgetenv(). + SmallVector NameUTF16; + if (error_code ec = windows::UTF8ToUTF16(Name, NameUTF16)) + return None; + + // Environment variable can be encoded in non-UTF8 encoding, and there's no + // way to know what the encoding is. The only reliable way to look up + // multibyte environment variable is to use GetEnvironmentVariableW(). + SmallVector Buf; + size_t Size = MAX_PATH; + do { + Buf.reserve(Size); + Size = + GetEnvironmentVariableW(NameUTF16.data(), Buf.data(), Buf.capacity()); + if (Size == 0) + return None; + + // Try again with larger buffer. + } while (Size > Buf.capacity()); + Buf.set_size(Size); + + // Convert the result from UTF-16 to UTF-8. + SmallVector Res; + if (error_code ec = windows::UTF16ToUTF8(Buf.data(), Size, Res)) + return None; + return std::string(Res.data()); +} + +error_code +Process::GetArgumentVector(SmallVectorImpl &Args, + ArrayRef, + SpecificBumpPtrAllocator &ArgAllocator) { + int NewArgCount; + error_code ec; + + wchar_t **UnicodeCommandLine = CommandLineToArgvW(GetCommandLineW(), + &NewArgCount); + if (!UnicodeCommandLine) + return windows_error(::GetLastError()); + + Args.reserve(NewArgCount); + + for (int i = 0; i < NewArgCount; ++i) { + SmallVector NewArgString; + ec = windows::UTF16ToUTF8(UnicodeCommandLine[i], + wcslen(UnicodeCommandLine[i]), + NewArgString); + if (ec) + break; + + char *Buffer = ArgAllocator.Allocate(NewArgString.size() + 1); + ::memcpy(Buffer, NewArgString.data(), NewArgString.size() + 1); + Args.push_back(Buffer); + } + LocalFree(UnicodeCommandLine); + if (ec) + return ec; + + return error_code::success(); +} + bool Process::StandardInIsUserInput() { return FileDescriptorIsDisplayed(0); } @@ -187,6 +257,11 @@ bool Process::StandardErrHasColors() { return FileDescriptorHasColors(2); } +static bool UseANSI = false; +void Process::UseANSIEscapeCodes(bool enable) { + UseANSI = enable; +} + namespace { class DefaultColors { @@ -208,10 +283,12 @@ DefaultColors defaultColors; } bool Process::ColorNeedsFlush() { - return true; + return !UseANSI; } const char *Process::OutputBold(bool bg) { + if (UseANSI) return "\033[1m"; + WORD colors = DefaultColors::GetCurrentColor(); if (bg) colors |= BACKGROUND_INTENSITY; @@ -222,6 +299,8 @@ const char *Process::OutputBold(bool bg) { } const char *Process::OutputColor(char code, bool bold, bool bg) { + if (UseANSI) return colorcodes[bg?1:0][bold?1:0][code&7]; + WORD colors; if (bg) { colors = ((code&1) ? BACKGROUND_RED : 0) | @@ -247,6 +326,8 @@ static WORD GetConsoleTextAttribute(HANDLE hConsoleOutput) { } const char *Process::OutputReverse() { + if (UseANSI) return "\033[7m"; + const WORD attributes = GetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE)); @@ -273,6 +354,7 @@ const char *Process::OutputReverse() { } const char *Process::ResetColor() { + if (UseANSI) return "\033[0m"; SetConsoleTextAttribute(GetStdHandle(STD_OUTPUT_HANDLE), defaultColors()); return 0; } diff --git a/contrib/llvm/lib/Support/Windows/Program.inc b/contrib/llvm/lib/Support/Windows/Program.inc index 619ae5d..dc09738 100644 --- a/contrib/llvm/lib/Support/Windows/Program.inc +++ b/contrib/llvm/lib/Support/Windows/Program.inc @@ -12,6 +12,8 @@ //===----------------------------------------------------------------------===// #include "Windows.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/Support/FileSystem.h" #include #include #include @@ -22,37 +24,17 @@ //=== and must not be UNIX code //===----------------------------------------------------------------------===// -namespace { - struct Win32ProcessInfo { - HANDLE hProcess; - DWORD dwProcessId; - }; -} - namespace llvm { using namespace sys; -Program::Program() : Data_(0) {} - -Program::~Program() { - if (Data_) { - Win32ProcessInfo* wpi = reinterpret_cast(Data_); - CloseHandle(wpi->hProcess); - delete wpi; - Data_ = 0; - } -} +ProcessInfo::ProcessInfo() : ProcessHandle(0), Pid(0), ReturnCode(0) {} // This function just uses the PATH environment variable to find the program. -Path -Program::FindProgramByName(const std::string& progName) { - +std::string sys::FindProgramByName(const std::string &progName) { // Check some degenerate cases if (progName.length() == 0) // no program - return Path(); - Path temp; - if (!temp.set(progName)) // invalid name - return Path(); + return ""; + std::string temp = progName; // Return paths with slashes verbatim. if (progName.find('\\') != std::string::npos || progName.find('/') != std::string::npos) @@ -60,58 +42,60 @@ Program::FindProgramByName(const std::string& progName) { // At this point, the file name is valid and does not contain slashes. // Let Windows search for it. - char buffer[MAX_PATH]; - char *dummy = NULL; - DWORD len = SearchPath(NULL, progName.c_str(), ".exe", MAX_PATH, - buffer, &dummy); - - // See if it wasn't found. - if (len == 0) - return Path(); - - // See if we got the entire path. - if (len < MAX_PATH) - return Path(buffer); - - // Buffer was too small; grow and retry. - while (true) { - char *b = reinterpret_cast(_alloca(len+1)); - DWORD len2 = SearchPath(NULL, progName.c_str(), ".exe", len+1, b, &dummy); - - // It is unlikely the search failed, but it's always possible some file - // was added or removed since the last search, so be paranoid... - if (len2 == 0) - return Path(); - else if (len2 <= len) - return Path(b); - - len = len2; - } + SmallVector progNameUnicode; + if (windows::UTF8ToUTF16(progName, progNameUnicode)) + return ""; + + SmallVector buffer; + DWORD len = MAX_PATH; + do { + buffer.reserve(len); + len = ::SearchPathW(NULL, progNameUnicode.data(), L".exe", + buffer.capacity(), buffer.data(), NULL); + + // See if it wasn't found. + if (len == 0) + return ""; + + // Buffer was too small; grow and retry. + } while (len > buffer.capacity()); + + buffer.set_size(len); + SmallVector result; + if (windows::UTF16ToUTF8(buffer.begin(), buffer.size(), result)) + return ""; + + return std::string(result.data(), result.size()); } -static HANDLE RedirectIO(const Path *path, int fd, std::string* ErrMsg) { +static HANDLE RedirectIO(const StringRef *path, int fd, std::string* ErrMsg) { HANDLE h; if (path == 0) { - DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd), - GetCurrentProcess(), &h, - 0, TRUE, DUPLICATE_SAME_ACCESS); + if (!DuplicateHandle(GetCurrentProcess(), (HANDLE)_get_osfhandle(fd), + GetCurrentProcess(), &h, + 0, TRUE, DUPLICATE_SAME_ACCESS)) + return INVALID_HANDLE_VALUE; return h; } - const char *fname; - if (path->isEmpty()) + std::string fname; + if (path->empty()) fname = "NUL"; else - fname = path->c_str(); + fname = *path; SECURITY_ATTRIBUTES sa; sa.nLength = sizeof(sa); sa.lpSecurityDescriptor = 0; sa.bInheritHandle = TRUE; - h = CreateFile(fname, fd ? GENERIC_WRITE : GENERIC_READ, FILE_SHARE_READ, - &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS, - FILE_ATTRIBUTE_NORMAL, NULL); + SmallVector fnameUnicode; + if (windows::UTF8ToUTF16(fname, fnameUnicode)) + return INVALID_HANDLE_VALUE; + + h = CreateFileW(fnameUnicode.data(), fd ? GENERIC_WRITE : GENERIC_READ, + FILE_SHARE_READ, &sa, fd == 0 ? OPEN_EXISTING : CREATE_ALWAYS, + FILE_ATTRIBUTE_NORMAL, NULL); if (h == INVALID_HANDLE_VALUE) { MakeErrMsg(ErrMsg, std::string(fname) + ": Can't open file for " + (fd ? "input: " : "output: ")); @@ -181,22 +165,12 @@ static unsigned int ArgLenWithQuotes(const char *Str) { return len; } +} -bool -Program::Execute(const Path& path, - const char** args, - const char** envp, - const Path** redirects, - unsigned memoryLimit, - std::string* ErrMsg) { - if (Data_) { - Win32ProcessInfo* wpi = reinterpret_cast(Data_); - CloseHandle(wpi->hProcess); - delete wpi; - Data_ = 0; - } - - if (!path.canExecute()) { +static bool Execute(ProcessInfo &PI, StringRef Program, const char **args, + const char **envp, const StringRef **redirects, + unsigned memoryLimit, std::string *ErrMsg) { + if (!sys::fs::can_execute(Program)) { if (ErrMsg) *ErrMsg = "program not executable"; return false; @@ -213,8 +187,8 @@ Program::Execute(const Path& path, } // Now build the command line. - char *command = reinterpret_cast(_alloca(len+1)); - char *p = command; + OwningArrayPtr command(new char[len+1]); + char *p = command.get(); for (unsigned i = 0; args[i]; i++) { const char *arg = args[i]; @@ -245,34 +219,28 @@ Program::Execute(const Path& path, *p = 0; // The pointer to the environment block for the new process. - char *envblock = 0; + std::vector EnvBlock; if (envp) { // An environment block consists of a null-terminated block of // null-terminated strings. Convert the array of environment variables to // an environment block by concatenating them. + for (unsigned i = 0; envp[i]; ++i) { + SmallVector EnvString; + if (error_code ec = windows::UTF8ToUTF16(envp[i], EnvString)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, "Unable to convert environment variable to UTF-16"); + return false; + } - // First, determine the length of the environment block. - len = 0; - for (unsigned i = 0; envp[i]; i++) - len += strlen(envp[i]) + 1; - - // Now build the environment block. - envblock = reinterpret_cast(_alloca(len+1)); - p = envblock; - - for (unsigned i = 0; envp[i]; i++) { - const char *ev = envp[i]; - size_t len = strlen(ev) + 1; - memcpy(p, ev, len); - p += len; + EnvBlock.insert(EnvBlock.end(), EnvString.begin(), EnvString.end()); + EnvBlock.push_back(0); } - - *p = 0; + EnvBlock.push_back(0); } // Create a child process. - STARTUPINFO si; + STARTUPINFOW si; memset(&si, 0, sizeof(si)); si.cb = sizeof(si); si.hStdInput = INVALID_HANDLE_VALUE; @@ -296,9 +264,14 @@ Program::Execute(const Path& path, if (redirects[1] && redirects[2] && *(redirects[1]) == *(redirects[2])) { // If stdout and stderr should go to the same place, redirect stderr // to the handle already open for stdout. - DuplicateHandle(GetCurrentProcess(), si.hStdOutput, - GetCurrentProcess(), &si.hStdError, - 0, TRUE, DUPLICATE_SAME_ACCESS); + if (!DuplicateHandle(GetCurrentProcess(), si.hStdOutput, + GetCurrentProcess(), &si.hStdError, + 0, TRUE, DUPLICATE_SAME_ACCESS)) { + CloseHandle(si.hStdInput); + CloseHandle(si.hStdOutput); + MakeErrMsg(ErrMsg, "can't dup stderr to stdout"); + return false; + } } else { // Just redirect stderr si.hStdError = RedirectIO(redirects[2], 2, ErrMsg); @@ -316,8 +289,27 @@ Program::Execute(const Path& path, fflush(stdout); fflush(stderr); - BOOL rc = CreateProcess(path.c_str(), command, NULL, NULL, TRUE, 0, - envblock, NULL, &si, &pi); + + SmallVector ProgramUtf16; + if (error_code ec = windows::UTF8ToUTF16(Program, ProgramUtf16)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, + std::string("Unable to convert application name to UTF-16")); + return false; + } + + SmallVector CommandUtf16; + if (error_code ec = windows::UTF8ToUTF16(command.get(), CommandUtf16)) { + SetLastError(ec.value()); + MakeErrMsg(ErrMsg, + std::string("Unable to convert command-line to UTF-16")); + return false; + } + + BOOL rc = CreateProcessW(ProgramUtf16.data(), CommandUtf16.data(), 0, 0, + TRUE, CREATE_UNICODE_ENVIRONMENT, + EnvBlock.empty() ? 0 : EnvBlock.data(), 0, &si, + &pi); DWORD err = GetLastError(); // Regardless of whether the process got created or not, we are done with @@ -330,13 +322,12 @@ Program::Execute(const Path& path, if (!rc) { SetLastError(err); MakeErrMsg(ErrMsg, std::string("Couldn't execute program '") + - path.str() + "'"); + Program.str() + "'"); return false; } - Win32ProcessInfo* wpi = new Win32ProcessInfo; - wpi->hProcess = pi.hProcess; - wpi->dwProcessId = pi.dwProcessId; - Data_ = wpi; + + PI.Pid = pi.dwProcessId; + PI.ProcessHandle = pi.hProcess; // Make sure these get closed no matter what. ScopedCommonHandle hThread(pi.hThread); @@ -344,7 +335,7 @@ Program::Execute(const Path& path, // Assign the process to a job if a memory limit is defined. ScopedJobHandle hJob; if (memoryLimit != 0) { - hJob = CreateJobObject(0, 0); + hJob = CreateJobObjectW(0, 0); bool success = false; if (hJob) { JOBOBJECT_EXTENDED_LIMIT_INFORMATION jeli; @@ -369,72 +360,84 @@ Program::Execute(const Path& path, return true; } -int -Program::Wait(const Path &path, - unsigned secondsToWait, - std::string* ErrMsg) { - if (Data_ == 0) { - MakeErrMsg(ErrMsg, "Process not started!"); - return -1; - } - - Win32ProcessInfo* wpi = reinterpret_cast(Data_); - HANDLE hProcess = wpi->hProcess; - - // Wait for the process to terminate. - DWORD millisecondsToWait = INFINITE; - if (secondsToWait > 0) - millisecondsToWait = secondsToWait * 1000; - - if (WaitForSingleObject(hProcess, millisecondsToWait) == WAIT_TIMEOUT) { - if (!TerminateProcess(hProcess, 1)) { - MakeErrMsg(ErrMsg, "Failed to terminate timed-out program."); - // -2 indicates a crash or timeout as opposed to failure to execute. - return -2; +namespace llvm { +ProcessInfo sys::Wait(const ProcessInfo &PI, unsigned SecondsToWait, + bool WaitUntilChildTerminates, std::string *ErrMsg) { + assert(PI.Pid && "invalid pid to wait on, process not started?"); + assert(PI.ProcessHandle && + "invalid process handle to wait on, process not started?"); + DWORD milliSecondsToWait = 0; + if (WaitUntilChildTerminates) + milliSecondsToWait = INFINITE; + else if (SecondsToWait > 0) + milliSecondsToWait = SecondsToWait * 1000; + + ProcessInfo WaitResult = PI; + DWORD WaitStatus = WaitForSingleObject(PI.ProcessHandle, milliSecondsToWait); + if (WaitStatus == WAIT_TIMEOUT) { + if (SecondsToWait) { + if (!TerminateProcess(PI.ProcessHandle, 1)) { + if (ErrMsg) + MakeErrMsg(ErrMsg, "Failed to terminate timed-out program."); + + // -2 indicates a crash or timeout as opposed to failure to execute. + WaitResult.ReturnCode = -2; + CloseHandle(PI.ProcessHandle); + return WaitResult; + } + WaitForSingleObject(PI.ProcessHandle, INFINITE); + CloseHandle(PI.ProcessHandle); + } else { + // Non-blocking wait. + return ProcessInfo(); } - WaitForSingleObject(hProcess, INFINITE); } // Get its exit status. DWORD status; - BOOL rc = GetExitCodeProcess(hProcess, &status); + BOOL rc = GetExitCodeProcess(PI.ProcessHandle, &status); DWORD err = GetLastError(); + CloseHandle(PI.ProcessHandle); if (!rc) { SetLastError(err); - MakeErrMsg(ErrMsg, "Failed getting status for program."); + if (ErrMsg) + MakeErrMsg(ErrMsg, "Failed getting status for program."); + // -2 indicates a crash or timeout as opposed to failure to execute. - return -2; + WaitResult.ReturnCode = -2; + return WaitResult; } if (!status) - return 0; + return WaitResult; // Pass 10(Warning) and 11(Error) to the callee as negative value. if ((status & 0xBFFF0000U) == 0x80000000U) - return (int)status; - - if (status & 0xFF) - return status & 0x7FFFFFFF; + WaitResult.ReturnCode = static_cast(status); + else if (status & 0xFF) + WaitResult.ReturnCode = status & 0x7FFFFFFF; + else + WaitResult.ReturnCode = 1; - return 1; + return WaitResult; } -error_code Program::ChangeStdinToBinary(){ +error_code sys::ChangeStdinToBinary(){ int result = _setmode( _fileno(stdin), _O_BINARY ); if (result == -1) return error_code(errno, generic_category()); return make_error_code(errc::success); } -error_code Program::ChangeStdoutToBinary(){ +error_code sys::ChangeStdoutToBinary(){ int result = _setmode( _fileno(stdout), _O_BINARY ); if (result == -1) return error_code(errno, generic_category()); return make_error_code(errc::success); } -error_code Program::ChangeStderrToBinary(){ +error_code sys::ChangeStderrToBinary(){ int result = _setmode( _fileno(stderr), _O_BINARY ); if (result == -1) return error_code(errno, generic_category()); @@ -456,5 +459,4 @@ bool llvm::sys::argumentsFitWithinSystemLimits(ArrayRef Args) { } return true; } - } diff --git a/contrib/llvm/lib/Support/Windows/RWMutex.inc b/contrib/llvm/lib/Support/Windows/RWMutex.inc index 9593923..c431844 100644 --- a/contrib/llvm/lib/Support/Windows/RWMutex.inc +++ b/contrib/llvm/lib/Support/Windows/RWMutex.inc @@ -48,8 +48,7 @@ static bool loadSRW() { if (!sChecked) { sChecked = true; - HMODULE hLib = ::LoadLibrary(TEXT("Kernel32")); - if (hLib) { + if (HMODULE hLib = ::GetModuleHandleW(L"Kernel32.dll")) { fpInitializeSRWLock = (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, "InitializeSRWLock"); @@ -65,7 +64,6 @@ static bool loadSRW() { fpReleaseSRWLockShared = (VOID (WINAPI *)(PSRWLOCK))::GetProcAddress(hLib, "ReleaseSRWLockShared"); - ::FreeLibrary(hLib); if (fpInitializeSRWLock != NULL) { sHasSRW = true; diff --git a/contrib/llvm/lib/Support/Windows/Signals.inc b/contrib/llvm/lib/Support/Windows/Signals.inc index b18b4d1..4b40d51 100644 --- a/contrib/llvm/lib/Support/Windows/Signals.inc +++ b/contrib/llvm/lib/Support/Windows/Signals.inc @@ -11,6 +11,8 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Support/FileSystem.h" + #include "Windows.h" #include #include @@ -133,7 +135,7 @@ typedef PVOID (WINAPI *fpSymFunctionTableAccess64)(HANDLE, DWORD64); static fpSymFunctionTableAccess64 SymFunctionTableAccess64; static bool load64BitDebugHelp(void) { - HMODULE hLib = ::LoadLibrary("Dbghelp.dll"); + HMODULE hLib = ::LoadLibraryW(L"Dbghelp.dll"); if (hLib) { StackWalk64 = (fpStackWalk64) ::GetProcAddress(hLib, "StackWalk64"); @@ -158,7 +160,7 @@ static BOOL WINAPI LLVMConsoleCtrlHandler(DWORD dwCtrlType); // InterruptFunction - The function to call if ctrl-c is pressed. static void (*InterruptFunction)() = 0; -static std::vector *FilesToRemove = NULL; +static std::vector *FilesToRemove = NULL; static std::vector > *CallBacksToRun = 0; static bool RegisteredUnhandledExceptionFilter = false; static bool CleanupExecuted = false; @@ -191,34 +193,6 @@ static int AvoidMessageBoxHook(int ReportType, char *Message, int *Return) { return TRUE; } -/// CRTReportHook - Function called on a CRT debugging event. -static int CRTReportHook(int ReportType, char *Message, int *Return) { - // Don't cause a DebugBreak() on return. - if (Return) - *Return = 0; - - switch (ReportType) { - default: - case _CRT_ASSERT: - fprintf(stderr, "CRT assert: %s\n", Message); - // FIXME: Is there a way to just crash? Perhaps throw to the unhandled - // exception code? Perhaps SetErrorMode() handles this. - _exit(3); - break; - case _CRT_ERROR: - fprintf(stderr, "CRT error: %s\n", Message); - // FIXME: Is there a way to just crash? Perhaps throw to the unhandled - // exception code? Perhaps SetErrorMode() handles this. - _exit(3); - break; - case _CRT_WARN: - fprintf(stderr, "CRT warn: %s\n", Message); - break; - } - - // Don't call _CrtDbgReport. - return TRUE; -} #endif static void RegisterHandler() { @@ -251,19 +225,10 @@ static void RegisterHandler() { OldFilter = SetUnhandledExceptionFilter(LLVMUnhandledExceptionFilter); SetConsoleCtrlHandler(LLVMConsoleCtrlHandler, TRUE); -#ifdef _MSC_VER - const char *EnableMsgbox = getenv("LLVM_ENABLE_CRT_REPORT"); - if (!EnableMsgbox || strcmp("0", EnableMsgbox) == 0) { - // Setting a report hook overrides the default behavior of popping an "abort, - // retry, or ignore" dialog. - _CrtSetReportHook(AvoidMessageBoxHook); - } -#endif - // Environment variable to disable any kind of crash dialog. if (getenv("LLVM_DISABLE_CRASH_REPORT")) { #ifdef _MSC_VER - _CrtSetReportHook(CRTReportHook); + _CrtSetReportHook(AvoidMessageBoxHook); #endif SetErrorMode(SEM_FAILCRITICALERRORS | SEM_NOGPFAULTERRORBOX | @@ -276,7 +241,7 @@ static void RegisterHandler() { } // RemoveFileOnSignal - The public API -bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) { +bool sys::RemoveFileOnSignal(StringRef Filename, std::string* ErrMsg) { RegisterHandler(); if (CleanupExecuted) { @@ -286,7 +251,7 @@ bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) { } if (FilesToRemove == NULL) - FilesToRemove = new std::vector; + FilesToRemove = new std::vector; FilesToRemove->push_back(Filename); @@ -295,14 +260,14 @@ bool sys::RemoveFileOnSignal(const sys::Path &Filename, std::string* ErrMsg) { } // DontRemoveFileOnSignal - The public API -void sys::DontRemoveFileOnSignal(const sys::Path &Filename) { +void sys::DontRemoveFileOnSignal(StringRef Filename) { if (FilesToRemove == NULL) return; RegisterHandler(); FilesToRemove->push_back(Filename); - std::vector::reverse_iterator I = + std::vector::reverse_iterator I = std::find(FilesToRemove->rbegin(), FilesToRemove->rend(), Filename); if (I != FilesToRemove->rend()) FilesToRemove->erase(I.base()-1); @@ -352,7 +317,8 @@ static void Cleanup() { if (FilesToRemove != NULL) while (!FilesToRemove->empty()) { - FilesToRemove->back().eraseFromDisk(); + bool Existed; + llvm::sys::fs::remove(FilesToRemove->back(), Existed); FilesToRemove->pop_back(); } diff --git a/contrib/llvm/lib/Support/Windows/TimeValue.inc b/contrib/llvm/lib/Support/Windows/TimeValue.inc index 1227552..98b07d6 100644 --- a/contrib/llvm/lib/Support/Windows/TimeValue.inc +++ b/contrib/llvm/lib/Support/Windows/TimeValue.inc @@ -12,10 +12,11 @@ //===----------------------------------------------------------------------===// #include "Windows.h" +#include #include -namespace llvm { -using namespace sys; +using namespace llvm; +using namespace llvm::sys; //===----------------------------------------------------------------------===// //=== WARNING: Implementation here must contain only Win32 specific code. @@ -31,21 +32,28 @@ TimeValue TimeValue::now() { } std::string TimeValue::str() const { + struct tm *LT; #ifdef __MINGW32__ - // This ban may be lifted by either: - // (i) a future MinGW version other than 1.0 inherents the __time64_t type, or - // (ii) configure tests for either the time_t or __time64_t type. - time_t ourTime = time_t(this->toEpochTime()); - struct tm *lt = ::localtime(&ourTime); + // Old versions of mingw don't have _localtime64_s. Remove this once we drop support + // for them. + time_t OurTime = time_t(this->toEpochTime()); + LT = ::localtime(&OurTime); + assert(LT); #else - __time64_t ourTime = this->toEpochTime(); - struct tm *lt = ::_localtime64(&ourTime); + struct tm Storage; + __time64_t OurTime = this->toEpochTime(); + int Error = ::_localtime64_s(&Storage, &OurTime); + assert(!Error); + LT = &Storage; #endif - char buffer[25]; - strftime(buffer, 25, "%a %b %d %H:%M:%S %Y", lt); - return std::string(buffer); -} - - + char Buffer[25]; + // FIXME: the windows version of strftime doesn't support %e + strftime(Buffer, 25, "%b %d %H:%M %Y", LT); + assert((Buffer[3] == ' ' && isdigit(Buffer[5]) && Buffer[6] == ' ') && + "Unexpected format in strftime()!"); + // Emulate %e on %d to mute '0'. + if (Buffer[4] == '0') + Buffer[4] = ' '; + return std::string(Buffer); } diff --git a/contrib/llvm/lib/Support/Windows/Windows.h b/contrib/llvm/lib/Support/Windows/Windows.h index 5c1da0d..1f3417d 100644 --- a/contrib/llvm/lib/Support/Windows/Windows.h +++ b/contrib/llvm/lib/Support/Windows/Windows.h @@ -24,22 +24,31 @@ #define _WIN32_IE 0x0600 // MinGW at it again. #define WIN32_LEAN_AND_MEAN +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringRef.h" #include "llvm/Config/config.h" // Get build system configuration settings +#include "llvm/Support/Compiler.h" +#include "llvm/Support/system_error.h" #include #include -#include #include #include +#include inline bool MakeErrMsg(std::string* ErrMsg, const std::string& prefix) { if (!ErrMsg) return true; char *buffer = NULL; - FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER|FORMAT_MESSAGE_FROM_SYSTEM, - NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL); - *ErrMsg = prefix + buffer; + DWORD R = FormatMessage(FORMAT_MESSAGE_ALLOCATE_BUFFER | + FORMAT_MESSAGE_FROM_SYSTEM, + NULL, GetLastError(), 0, (LPSTR)&buffer, 1, NULL); + if (R) + *ErrMsg = prefix + buffer; + else + *ErrMsg = prefix + "Unknown error"; + LocalFree(buffer); - return true; + return R != 0; } template @@ -75,7 +84,7 @@ public: } // True if Handle is valid. - operator bool() const { + LLVM_EXPLICIT operator bool() const { return HandleTraits::IsValid(Handle) ? true : false; } @@ -147,4 +156,13 @@ c_str(SmallVectorImpl &str) { str.pop_back(); return str.data(); } + +namespace sys { +namespace windows { +error_code UTF8ToUTF16(StringRef utf8, + SmallVectorImpl &utf16); +error_code UTF16ToUTF8(const wchar_t *utf16, size_t utf16_len, + SmallVectorImpl &utf8); +} // end namespace windows +} // end namespace sys } // end namespace llvm. diff --git a/contrib/llvm/lib/Support/YAMLParser.cpp b/contrib/llvm/lib/Support/YAMLParser.cpp index 213f5e1..9495cd4 100644 --- a/contrib/llvm/lib/Support/YAMLParser.cpp +++ b/contrib/llvm/lib/Support/YAMLParser.cpp @@ -96,6 +96,15 @@ static EncodingInfo getUnicodeEncoding(StringRef Input) { namespace llvm { namespace yaml { +/// Pin the vtables to this file. +void Node::anchor() {} +void NullNode::anchor() {} +void ScalarNode::anchor() {} +void KeyValueNode::anchor() {} +void MappingNode::anchor() {} +void SequenceNode::anchor() {} +void AliasNode::anchor() {} + /// Token - A single YAML token. struct Token : ilist_node { enum TokenKind { @@ -1070,14 +1079,22 @@ bool Scanner::scanDirective() { Current = skip_while(&Scanner::skip_ns_char, Current); StringRef Name(NameStart, Current - NameStart); Current = skip_while(&Scanner::skip_s_white, Current); - + + Token T; if (Name == "YAML") { Current = skip_while(&Scanner::skip_ns_char, Current); - Token T; T.Kind = Token::TK_VersionDirective; T.Range = StringRef(Start, Current - Start); TokenQueue.push_back(T); return true; + } else if(Name == "TAG") { + Current = skip_while(&Scanner::skip_ns_char, Current); + Current = skip_while(&Scanner::skip_s_white, Current); + Current = skip_while(&Scanner::skip_ns_char, Current); + T.Kind = Token::TK_TagDirective; + T.Range = StringRef(Start, Current - Start); + TokenQueue.push_back(T); + return true; } return false; } @@ -1564,10 +1581,6 @@ void Stream::printError(Node *N, const Twine &Msg) { , Ranges); } -void Stream::handleYAMLDirective(const Token &t) { - // TODO: Ensure version is 1.x. -} - document_iterator Stream::begin() { if (CurrentDoc) report_fatal_error("Can only iterate over the stream once"); @@ -1588,14 +1601,59 @@ void Stream::skip() { i->skip(); } -Node::Node(unsigned int Type, OwningPtr &D, StringRef A) +Node::Node(unsigned int Type, OwningPtr &D, StringRef A, StringRef T) : Doc(D) , TypeID(Type) - , Anchor(A) { + , Anchor(A) + , Tag(T) { SMLoc Start = SMLoc::getFromPointer(peekNext().Range.begin()); SourceRange = SMRange(Start, Start); } +std::string Node::getVerbatimTag() const { + StringRef Raw = getRawTag(); + if (!Raw.empty() && Raw != "!") { + std::string Ret; + if (Raw.find_last_of('!') == 0) { + Ret = Doc->getTagMap().find("!")->second; + Ret += Raw.substr(1); + return llvm_move(Ret); + } else if (Raw.startswith("!!")) { + Ret = Doc->getTagMap().find("!!")->second; + Ret += Raw.substr(2); + return llvm_move(Ret); + } else { + StringRef TagHandle = Raw.substr(0, Raw.find_last_of('!') + 1); + std::map::const_iterator It = + Doc->getTagMap().find(TagHandle); + if (It != Doc->getTagMap().end()) + Ret = It->second; + else { + Token T; + T.Kind = Token::TK_Tag; + T.Range = TagHandle; + setError(Twine("Unknown tag handle ") + TagHandle, T); + } + Ret += Raw.substr(Raw.find_last_of('!') + 1); + return llvm_move(Ret); + } + } + + switch (getType()) { + case NK_Null: + return "tag:yaml.org,2002:null"; + case NK_Scalar: + // TODO: Tag resolution. + return "tag:yaml.org,2002:str"; + case NK_Mapping: + return "tag:yaml.org,2002:map"; + case NK_Sequence: + return "tag:yaml.org,2002:seq"; + } + + return ""; +} + Token &Node::peekNext() { return Doc->peekNext(); } @@ -1999,6 +2057,10 @@ void SequenceNode::increment() { } Document::Document(Stream &S) : stream(S), Root(0) { + // Tag maps starts with two default mappings. + TagMap["!"] = "!"; + TagMap["!!"] = "tag:yaml.org,2002:"; + if (parseDirectives()) expectToken(Token::TK_DocumentStart); Token &T = peekNext(); @@ -2042,6 +2104,7 @@ Node *Document::parseBlockNode() { Token T = peekNext(); // Handle properties. Token AnchorInfo; + Token TagInfo; parse_property: switch (T.Kind) { case Token::TK_Alias: @@ -2056,7 +2119,11 @@ parse_property: T = peekNext(); goto parse_property; case Token::TK_Tag: - getNext(); // Skip TK_Tag. + if (TagInfo.Kind == Token::TK_Tag) { + setError("Already encountered a tag for this node!", T); + return 0; + } + TagInfo = getNext(); // Consume TK_Tag. T = peekNext(); goto parse_property; default: @@ -2070,42 +2137,49 @@ parse_property: // Don't eat the TK_BlockEntry, SequenceNode needs it. return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Indentless); case Token::TK_BlockSequenceStart: getNext(); return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Block); case Token::TK_BlockMappingStart: getNext(); return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Block); case Token::TK_FlowSequenceStart: getNext(); return new (NodeAllocator) SequenceNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , SequenceNode::ST_Flow); case Token::TK_FlowMappingStart: getNext(); return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Flow); case Token::TK_Scalar: getNext(); return new (NodeAllocator) ScalarNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , T.Range); case Token::TK_Key: // Don't eat the TK_Key, KeyValueNode expects it. return new (NodeAllocator) MappingNode( stream.CurrentDoc , AnchorInfo.Range.substr(1) + , TagInfo.Range , MappingNode::MT_Inline); case Token::TK_DocumentStart: case Token::TK_DocumentEnd: @@ -2126,10 +2200,10 @@ bool Document::parseDirectives() { while (true) { Token T = peekNext(); if (T.Kind == Token::TK_TagDirective) { - handleTagDirective(getNext()); + parseTAGDirective(); isDirective = true; } else if (T.Kind == Token::TK_VersionDirective) { - stream.handleYAMLDirective(getNext()); + parseYAMLDirective(); isDirective = true; } else break; @@ -2137,6 +2211,21 @@ bool Document::parseDirectives() { return isDirective; } +void Document::parseYAMLDirective() { + getNext(); // Eat %YAML +} + +void Document::parseTAGDirective() { + Token Tag = getNext(); // %TAG + StringRef T = Tag.Range; + // Strip %TAG + T = T.substr(T.find_first_of(" \t")).ltrim(" \t"); + std::size_t HandleEnd = T.find_first_of(" \t"); + StringRef TagHandle = T.substr(0, HandleEnd); + StringRef TagPrefix = T.substr(HandleEnd).ltrim(" \t"); + TagMap[TagHandle] = TagPrefix; +} + bool Document::expectToken(int TK) { Token T = getNext(); if (T.Kind != TK) { diff --git a/contrib/llvm/lib/Support/YAMLTraits.cpp b/contrib/llvm/lib/Support/YAMLTraits.cpp index 9da2aa7..42bff96 100644 --- a/contrib/llvm/lib/Support/YAMLTraits.cpp +++ b/contrib/llvm/lib/Support/YAMLTraits.cpp @@ -15,6 +15,7 @@ #include "llvm/Support/YAMLParser.h" #include "llvm/Support/raw_ostream.h" #include +#include using namespace llvm; using namespace yaml; @@ -40,32 +41,43 @@ void IO::setContext(void *Context) { // Input //===----------------------------------------------------------------------===// -Input::Input(StringRef InputContent, void *Ctxt) - : IO(Ctxt), +Input::Input(StringRef InputContent, + void *Ctxt, + SourceMgr::DiagHandlerTy DiagHandler, + void *DiagHandlerCtxt) + : IO(Ctxt), Strm(new Stream(InputContent, SrcMgr)), CurrentNode(NULL) { + if (DiagHandler) + SrcMgr.setDiagHandler(DiagHandler, DiagHandlerCtxt); DocIterator = Strm->begin(); } Input::~Input() { - } error_code Input::error() { return EC; } -void Input::setDiagHandler(SourceMgr::DiagHandlerTy Handler, void *Ctxt) { - SrcMgr.setDiagHandler(Handler, Ctxt); -} +// Pin the vtables to this file. +void Input::HNode::anchor() {} +void Input::EmptyHNode::anchor() {} +void Input::ScalarHNode::anchor() {} -bool Input::outputting() { +bool Input::outputting() const { return false; } bool Input::setCurrentDocument() { if (DocIterator != Strm->end()) { Node *N = DocIterator->getRoot(); + if (!N) { + assert(Strm->failed() && "Root is NULL iff parsing failed"); + EC = make_error_code(errc::invalid_argument); + return false; + } + if (isa(N)) { // Empty files are allowed and ignored ++DocIterator; @@ -82,10 +94,21 @@ void Input::nextDocument() { ++DocIterator; } +bool Input::mapTag(StringRef Tag, bool Default) { + std::string foundTag = CurrentNode->_node->getVerbatimTag(); + if (foundTag.empty()) { + // If no tag found and 'Tag' is the default, say it was found. + return Default; + } + // Return true iff found tag matches supplied tag. + return Tag.equals(foundTag); +} + void Input::beginMapping() { if (EC) return; - MapHNode *MN = dyn_cast(CurrentNode); + // CurrentNode can be null if the document is empty. + MapHNode *MN = dyn_cast_or_null(CurrentNode); if (MN) { MN->ValidKeys.clear(); } @@ -96,6 +119,15 @@ bool Input::preflightKey(const char *Key, bool Required, bool, bool &UseDefault, UseDefault = false; if (EC) return false; + + // CurrentNode is null for empty documents, which is an error in case required + // nodes are present. + if (!CurrentNode) { + if (Required) + EC = make_error_code(errc::invalid_argument); + return false; + } + MapHNode *MN = dyn_cast(CurrentNode); if (!MN) { setError(CurrentNode, "not a mapping"); @@ -122,13 +154,14 @@ void Input::postflightKey(void *saveInfo) { void Input::endMapping() { if (EC) return; - MapHNode *MN = dyn_cast(CurrentNode); + // CurrentNode can be null if the document is empty. + MapHNode *MN = dyn_cast_or_null(CurrentNode); if (!MN) return; for (MapHNode::NameToNode::iterator i = MN->Mapping.begin(), End = MN->Mapping.end(); i != End; ++i) { - if (!MN->isValidKey(i->first)) { - setError(i->second, Twine("unknown key '") + i->first + "'"); + if (!MN->isValidKey(i->first())) { + setError(i->second, Twine("unknown key '") + i->first() + "'"); break; } } @@ -263,6 +296,7 @@ void Input::scalarString(StringRef &S) { } void Input::setError(HNode *hnode, const Twine &message) { + assert(hnode && "HNode must not be NULL"); this->setError(hnode->_node, message); } @@ -322,7 +356,7 @@ Input::HNode *Input::createHNodes(Node *N) { } bool Input::MapHNode::isValidKey(StringRef Key) { - for (SmallVector::iterator i = ValidKeys.begin(), + for (SmallVectorImpl::iterator i = ValidKeys.begin(), End = ValidKeys.end(); i != End; ++i) { if (Key.equals(*i)) return true; @@ -334,6 +368,10 @@ void Input::setError(const Twine &Message) { this->setError(CurrentNode, Message); } +bool Input::canElideEmptySequence() { + return false; +} + Input::MapHNode::~MapHNode() { for (MapHNode::NameToNode::iterator i = Mapping.begin(), End = Mapping.end(); i != End; ++i) { @@ -368,7 +406,7 @@ Output::Output(raw_ostream &yout, void *context) Output::~Output() { } -bool Output::outputting() { +bool Output::outputting() const { return true; } @@ -377,6 +415,14 @@ void Output::beginMapping() { NeedsNewLine = true; } +bool Output::mapTag(StringRef Tag, bool Use) { + if (Use) { + this->output(" "); + this->output(Tag); + } + return Use; +} + void Output::endMapping() { StateStack.pop_back(); } @@ -505,9 +551,20 @@ void Output::endBitSetScalar() { } void Output::scalarString(StringRef &S) { + const char ScalarSafeChars[] = "abcdefghijklmnopqrstuvwxyz" + "ABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789_-/^., \t"; + this->newLineCheck(); - if (S.find('\n') == StringRef::npos) { - // No embedded new-line chars, just print string. + if (S.empty()) { + // Print '' for the empty string because leaving the field empty is not + // allowed. + this->outputUpToEndOfLine("''"); + return; + } + if (S.find_first_not_of(ScalarSafeChars) == StringRef::npos && + !isspace(S.front()) && !isspace(S.back())) { + // If the string consists only of safe characters, print it out without + // quotes. this->outputUpToEndOfLine(S); return; } @@ -532,6 +589,19 @@ void Output::scalarString(StringRef &S) { void Output::setError(const Twine &message) { } +bool Output::canElideEmptySequence() { + // Normally, with an optional key/value where the value is an empty sequence, + // the whole key/value can be not written. But, that produces wrong yaml + // if the key/value is the only thing in the map and the map is used in + // a sequence. This detects if the this sequence is the first key/value + // in map that itself is embedded in a sequnce. + if (StateStack.size() < 2) + return true; + if (StateStack.back() != inMapFirstKey) + return true; + return (StateStack[StateStack.size()-2] != inSeq); +} + void Output::output(StringRef s) { Column += s.size(); Out << s; diff --git a/contrib/llvm/lib/Support/raw_ostream.cpp b/contrib/llvm/lib/Support/raw_ostream.cpp index a433088..cb96489 100644 --- a/contrib/llvm/lib/Support/raw_ostream.cpp +++ b/contrib/llvm/lib/Support/raw_ostream.cpp @@ -18,6 +18,7 @@ #include "llvm/Config/config.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FileSystem.h" #include "llvm/Support/Format.h" #include "llvm/Support/Process.h" #include "llvm/Support/Program.h" @@ -25,14 +26,15 @@ #include #include #include -#include -#if defined(HAVE_UNISTD_H) -# include -#endif +// may provide O_BINARY. #if defined(HAVE_FCNTL_H) # include #endif + +#if defined(HAVE_UNISTD_H) +# include +#endif #if defined(HAVE_SYS_UIO_H) && defined(HAVE_WRITEV) # include #endif @@ -43,7 +45,6 @@ #if defined(_MSC_VER) #include -#include #ifndef STDIN_FILENO # define STDIN_FILENO 0 #endif @@ -424,14 +425,9 @@ void format_object_base::home() { /// stream should be immediately destroyed; the string will be empty /// if no error occurred. raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, - unsigned Flags) - : Error(false), UseAtomicWrites(false), pos(0) -{ + sys::fs::OpenFlags Flags) + : Error(false), UseAtomicWrites(false), pos(0) { assert(Filename != 0 && "Filename is null"); - // Verify that we don't have both "append" and "excl". - assert((!(Flags & F_Excl) || !(Flags & F_Append)) && - "Cannot specify both 'excl' and 'append' file creation flags!"); - ErrorInfo.clear(); // Handle "-" as stdout. Note that when we do this, we consider ourself @@ -441,32 +437,20 @@ raw_fd_ostream::raw_fd_ostream(const char *Filename, std::string &ErrorInfo, FD = STDOUT_FILENO; // If user requested binary then put stdout into binary mode if // possible. - if (Flags & F_Binary) - sys::Program::ChangeStdoutToBinary(); + if (Flags & sys::fs::F_Binary) + sys::ChangeStdoutToBinary(); // Close stdout when we're done, to detect any output errors. ShouldClose = true; return; } - int OpenFlags = O_WRONLY|O_CREAT; -#ifdef O_BINARY - if (Flags & F_Binary) - OpenFlags |= O_BINARY; -#endif - - if (Flags & F_Append) - OpenFlags |= O_APPEND; - else - OpenFlags |= O_TRUNC; - if (Flags & F_Excl) - OpenFlags |= O_EXCL; + error_code EC = sys::fs::openFileForWrite(Filename, FD, Flags); - while ((FD = open(Filename, OpenFlags, 0664)) < 0) { - if (errno != EINTR) { - ErrorInfo = "Error opening output file '" + std::string(Filename) + "'"; - ShouldClose = false; - return; - } + if (EC) { + ErrorInfo = "Error opening output file '" + std::string(Filename) + "': " + + EC.message(); + ShouldClose = false; + return; } // Ok, we successfully opened the file, so it'll need to be closed. diff --git a/contrib/llvm/lib/TableGen/Main.cpp b/contrib/llvm/lib/TableGen/Main.cpp index dc4167b..7fe47bc 100644 --- a/contrib/llvm/lib/TableGen/Main.cpp +++ b/contrib/llvm/lib/TableGen/Main.cpp @@ -83,7 +83,7 @@ int TableGenMain(char *argv0, TableGenMainFn *MainFn) { // Parse the input file. OwningPtr File; if (error_code ec = - MemoryBuffer::getFileOrSTDIN(InputFilename.c_str(), File)) { + MemoryBuffer::getFileOrSTDIN(InputFilename, File)) { errs() << "Could not open input file '" << InputFilename << "': " << ec.message() <<"\n"; return 1; diff --git a/contrib/llvm/lib/TableGen/Record.cpp b/contrib/llvm/lib/TableGen/Record.cpp index 9ad2053..431f4aa 100644 --- a/contrib/llvm/lib/TableGen/Record.cpp +++ b/contrib/llvm/lib/TableGen/Record.cpp @@ -557,9 +557,23 @@ Init *BitsInit::resolveReferences(Record &R, const RecordVal *RV) const { return const_cast(this); } +namespace { + template + class Pool : public T { + public: + ~Pool(); + }; + template + Pool::~Pool() { + for (typename T::iterator I = this->begin(), E = this->end(); I != E; ++I) { + typename T::value_type &Item = *I; + delete Item.second; + } + } +} + IntInit *IntInit::get(int64_t V) { - typedef DenseMap Pool; - static Pool ThePool; + static Pool > ThePool; IntInit *&I = ThePool[V]; if (!I) I = new IntInit(V); @@ -586,8 +600,7 @@ IntInit::convertInitializerBitRange(const std::vector &Bits) const { void StringInit::anchor() { } StringInit *StringInit::get(StringRef V) { - typedef StringMap Pool; - static Pool ThePool; + static Pool > ThePool; StringInit *&I = ThePool[V]; if (!I) I = new StringInit(V); @@ -726,9 +739,7 @@ Init *OpInit::getBit(unsigned Bit) const { UnOpInit *UnOpInit::get(UnaryOp opc, Init *lhs, RecTy *Type) { typedef std::pair, RecTy *> Key; - - typedef DenseMap Pool; - static Pool ThePool; + static Pool > ThePool; Key TheKey(std::make_pair(std::make_pair(opc, lhs), Type)); @@ -873,8 +884,7 @@ BinOpInit *BinOpInit::get(BinaryOp opc, Init *lhs, RecTy * > Key; - typedef DenseMap Pool; - static Pool ThePool; + static Pool > ThePool; Key TheKey(std::make_pair(std::make_pair(std::make_pair(opc, lhs), rhs), Type)); @@ -1298,8 +1308,7 @@ VarInit *VarInit::get(const std::string &VN, RecTy *T) { VarInit *VarInit::get(Init *VN, RecTy *T) { typedef std::pair Key; - typedef DenseMap Pool; - static Pool ThePool; + static Pool > ThePool; Key TheKey(std::make_pair(T, VN)); diff --git a/contrib/llvm/lib/TableGen/TGParser.cpp b/contrib/llvm/lib/TableGen/TGParser.cpp index 86ad2a6..daac574 100644 --- a/contrib/llvm/lib/TableGen/TGParser.cpp +++ b/contrib/llvm/lib/TableGen/TGParser.cpp @@ -1271,10 +1271,11 @@ Init *TGParser::ParseSimpleValue(Record *CurRec, RecTy *ItemType, if (ItemType != 0) { ListRecTy *ListType = dyn_cast(ItemType); if (ListType == 0) { - std::stringstream s; - s << "Type mismatch for list, expected list type, got " - << ItemType->getAsString(); - TokError(s.str()); + std::string s; + raw_string_ostream ss(s); + ss << "Type mismatch for list, expected list type, got " + << ItemType->getAsString(); + TokError(ss.str()); return 0; } GivenListTy = ListType; @@ -2495,6 +2496,9 @@ bool TGParser::ParseDefm(MultiClass *CurMultiClass) { if (Lex.getCode() != tgtok::comma) break; Lex.Lex(); // eat ','. + if (Lex.getCode() != tgtok::Id) + return TokError("expected identifier"); + SubClassLoc = Lex.getLoc(); // A defm can inherit from regular classes (non-multiclass) as diff --git a/contrib/llvm/lib/Target/AArch64/AArch64.td b/contrib/llvm/lib/Target/AArch64/AArch64.td index e17052b..9c2c69a 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64.td @@ -21,8 +21,11 @@ include "llvm/Target/Target.td" // AArch64 Subtarget features. // +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", "true", + "Enable ARMv8 FP">; + def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", - "Enable Advanced SIMD instructions">; + "Enable Advanced SIMD instructions", [FeatureFPARMv8]>; def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", "Enable cryptographic instructions">; @@ -33,7 +36,7 @@ def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", include "AArch64Schedule.td" -def : Processor<"generic", GenericItineraries, [FeatureNEON, FeatureCrypto]>; +def : Processor<"generic", GenericItineraries, [FeatureFPARMv8]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp index 47ebb82..d59ca56 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.cpp @@ -27,32 +27,23 @@ using namespace llvm; -MachineLocation -AArch64AsmPrinter::getDebugValueLocation(const MachineInstr *MI) const { - // See emitFrameIndexDebugValue in InstrInfo for where this instruction is - // expected to be created. - assert(MI->getNumOperands() == 4 && MI->getOperand(0).isReg() - && MI->getOperand(1).isImm() && "unexpected custom DBG_VALUE"); - return MachineLocation(MI->getOperand(0).getReg(), - MI->getOperand(1).getImm()); -} - /// Try to print a floating-point register as if it belonged to a specified /// register-class. For example the inline asm operand modifier "b" requires its /// argument to be printed as "bN". static bool printModifiedFPRAsmOperand(const MachineOperand &MO, const TargetRegisterInfo *TRI, - const TargetRegisterClass &RegClass, - raw_ostream &O) { + char RegType, raw_ostream &O) { if (!MO.isReg()) return true; for (MCRegAliasIterator AR(MO.getReg(), TRI, true); AR.isValid(); ++AR) { - if (RegClass.contains(*AR)) { - O << AArch64InstPrinter::getRegisterName(*AR); + if (AArch64::FPR8RegClass.contains(*AR)) { + O << RegType << TRI->getEncodingValue(MO.getReg()); return false; } } + + // The register doesn't correspond to anything floating-point like. return true; } @@ -91,9 +82,9 @@ bool AArch64AsmPrinter::printSymbolicAddress(const MachineOperand &MO, StringRef Modifier; switch (MO.getType()) { default: - llvm_unreachable("Unexpected operand for symbolic address constraint"); + return true; case MachineOperand::MO_GlobalAddress: - Name = Mang->getSymbol(MO.getGlobal())->getName(); + Name = getSymbol(MO.getGlobal())->getName(); // Global variables may be accessed either via a GOT or in various fun and // interesting TLS-model specific ways. Set the prefix modifier as @@ -155,57 +146,29 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, unsigned AsmVariant, const char *ExtraCode, raw_ostream &O) { const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); - if (!ExtraCode || !ExtraCode[0]) { - // There's actually no operand modifier, which leads to a slightly eclectic - // set of behaviour which we have to handle here. - const MachineOperand &MO = MI->getOperand(OpNum); - switch (MO.getType()) { - default: - llvm_unreachable("Unexpected operand for inline assembly"); - case MachineOperand::MO_Register: - // GCC prints the unmodified operand of a 'w' constraint as the vector - // register. Technically, we could allocate the argument as a VPR128, but - // that leads to extremely dodgy copies being generated to get the data - // there. - if (printModifiedFPRAsmOperand(MO, TRI, AArch64::VPR128RegClass, O)) - O << AArch64InstPrinter::getRegisterName(MO.getReg()); - break; - case MachineOperand::MO_Immediate: - O << '#' << MO.getImm(); - break; - case MachineOperand::MO_FPImmediate: - assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected"); - O << "#0.0"; - break; - case MachineOperand::MO_BlockAddress: - case MachineOperand::MO_ConstantPoolIndex: - case MachineOperand::MO_GlobalAddress: - case MachineOperand::MO_ExternalSymbol: - return printSymbolicAddress(MO, false, "", O); - } - return false; - } - // We have a real modifier to handle. + if (!ExtraCode) + ExtraCode = ""; + switch(ExtraCode[0]) { default: - // See if this is a generic operand - return AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O); - case 'c': // Don't print "#" before an immediate operand. - if (!MI->getOperand(OpNum).isImm()) - return true; - O << MI->getOperand(OpNum).getImm(); - return false; + if (!AsmPrinter::PrintAsmOperand(MI, OpNum, AsmVariant, ExtraCode, O)) + return false; + break; case 'w': // Output 32-bit general register operand, constant zero as wzr, or stack // pointer as wsp. Ignored when used with other operand types. - return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::GPR32RegClass, O); + if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::GPR32RegClass, O)) + return false; + break; case 'x': // Output 64-bit general register operand, constant zero as xzr, or stack // pointer as sp. Ignored when used with other operand types. - return printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::GPR64RegClass, O); + if (!printModifiedGPRAsmOperand(MI->getOperand(OpNum), TRI, + AArch64::GPR64RegClass, O)) + return false; + break; case 'H': // Output higher numbered of a 64-bit general register pair case 'Q': @@ -221,40 +184,65 @@ bool AArch64AsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // copies ...). llvm_unreachable("FIXME: Unimplemented register pairs"); case 'b': - // Output 8-bit FP/SIMD scalar register operand, prefixed with b. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR8RegClass, O); case 'h': - // Output 16-bit FP/SIMD scalar register operand, prefixed with h. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR16RegClass, O); case 's': - // Output 32-bit FP/SIMD scalar register operand, prefixed with s. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR32RegClass, O); case 'd': - // Output 64-bit FP/SIMD scalar register operand, prefixed with d. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR64RegClass, O); case 'q': - // Output 128-bit FP/SIMD scalar register operand, prefixed with q. - return printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, - AArch64::FPR128RegClass, O); + if (!printModifiedFPRAsmOperand(MI->getOperand(OpNum), TRI, + ExtraCode[0], O)) + return false; + break; case 'A': // Output symbolic address with appropriate relocation modifier (also // suitable for ADRP). - return printSymbolicAddress(MI->getOperand(OpNum), false, "", O); + if (!printSymbolicAddress(MI->getOperand(OpNum), false, "", O)) + return false; + break; case 'L': // Output bits 11:0 of symbolic address with appropriate :lo12: relocation // modifier. - return printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O); + if (!printSymbolicAddress(MI->getOperand(OpNum), true, "lo12", O)) + return false; + break; case 'G': // Output bits 23:12 of symbolic address with appropriate :hi12: relocation // modifier (currently only for TLS local exec). - return printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O); + if (!printSymbolicAddress(MI->getOperand(OpNum), true, "hi12", O)) + return false; + break; + case 'a': + return PrintAsmMemoryOperand(MI, OpNum, AsmVariant, ExtraCode, O); } + // There's actually no operand modifier, which leads to a slightly eclectic + // set of behaviour which we have to handle here. + const MachineOperand &MO = MI->getOperand(OpNum); + switch (MO.getType()) { + default: + llvm_unreachable("Unexpected operand for inline assembly"); + case MachineOperand::MO_Register: + // GCC prints the unmodified operand of a 'w' constraint as the vector + // register. Technically, we could allocate the argument as a VPR128, but + // that leads to extremely dodgy copies being generated to get the data + // there. + if (printModifiedFPRAsmOperand(MO, TRI, 'v', O)) + O << AArch64InstPrinter::getRegisterName(MO.getReg()); + break; + case MachineOperand::MO_Immediate: + O << '#' << MO.getImm(); + break; + case MachineOperand::MO_FPImmediate: + assert(MO.getFPImm()->isExactlyValue(0.0) && "Only FP 0.0 expected"); + O << "#0.0"; + break; + case MachineOperand::MO_BlockAddress: + case MachineOperand::MO_ConstantPoolIndex: + case MachineOperand::MO_GlobalAddress: + case MachineOperand::MO_ExternalSymbol: + return printSymbolicAddress(MO, false, "", O); + } + return false; } bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, @@ -271,24 +259,6 @@ bool AArch64AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI, return false; } -void AArch64AsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &OS) { - unsigned NOps = MI->getNumOperands(); - assert(NOps==4); - OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast(MI->getOperand(NOps-1).getMetadata())); - OS << V.getName(); - OS << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - OS << '[' << AArch64InstPrinter::getRegisterName(MI->getOperand(0).getReg()); - OS << '+' << MI->getOperand(1).getImm(); - OS << ']'; - OS << "+" << MI->getOperand(NOps - 2).getImm(); -} - - #include "AArch64GenMCPseudoLowering.inc" void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { @@ -296,18 +266,6 @@ void AArch64AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (emitPseudoExpansionLowering(OutStreamer, MI)) return; - switch (MI->getOpcode()) { - case AArch64::DBG_VALUE: { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { - SmallString<128> TmpStr; - raw_svector_ostream OS(TmpStr); - PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); - } - return; - } - } - MCInst TmpInst; LowerAArch64MachineInstrToMCInst(MI, TmpInst, *this); OutStreamer.EmitInstruction(TmpInst); @@ -329,7 +287,7 @@ void AArch64AsmPrinter::EmitEndOfAsmFile(Module &M) { for (unsigned i = 0, e = Stubs.size(); i != e; ++i) { OutStreamer.EmitLabel(Stubs[i].first); OutStreamer.EmitSymbolValue(Stubs[i].second.getPointer(), - TD->getPointerSize(0), 0); + TD->getPointerSize(0)); } Stubs.clear(); } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.h b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.h index af0c9fe..824f003 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64AsmPrinter.h @@ -55,8 +55,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { unsigned AsmVariant, const char *ExtraCode, raw_ostream &O); - void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - /// printSymbolicAddress - Given some kind of reasonably bare symbolic /// reference, print out the appropriate asm string to represent it. If /// appropriate, a relocation-specifier will be produced, composed of a @@ -67,8 +65,6 @@ class LLVM_LIBRARY_VISIBILITY AArch64AsmPrinter : public AsmPrinter { bool PrintImmediatePrefix, StringRef Suffix, raw_ostream &O); - MachineLocation getDebugValueLocation(const MachineInstr *MI) const; - virtual const char *getPassName() const { return "AArch64 Assembly Printer"; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp b/contrib/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp index 71233ba..11e7f41 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64BranchFixupPass.cpp @@ -87,7 +87,7 @@ namespace { // If the block size isn't a multiple of the known bits, assume the // worst case padding. if (Size & ((1u << Bits) - 1)) - Bits = CountTrailingZeros_32(Size); + Bits = countTrailingZeros(Size); return Bits; } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64CallingConv.td b/contrib/llvm/lib/Target/AArch64/AArch64CallingConv.td index b880d83..a2a9f3f 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64CallingConv.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64CallingConv.td @@ -59,9 +59,9 @@ def CC_A64_APCS : CallingConv<[ // Canonicalise the various types that live in different floating-point // registers. This makes sense because the PCS does not distinguish Short // Vectors and Floating-point types. - CCIfType<[v2i8], CCBitConvertToType>, - CCIfType<[v4i8, v2i16], CCBitConvertToType>, - CCIfType<[v8i8, v4i16, v2i32, v2f32], CCBitConvertToType>, + CCIfType<[v1i16, v2i8], CCBitConvertToType>, + CCIfType<[v1i32, v4i8, v2i16, v1f32], CCBitConvertToType>, + CCIfType<[v8i8, v4i16, v2i32, v2f32, v1i64, v1f64], CCBitConvertToType>, CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], CCBitConvertToType>, @@ -70,7 +70,8 @@ def CC_A64_APCS : CallingConv<[ // argument is allocated to the least significant bits of register // v[NSRN]. The NSRN is incremented by one. The argument has now been // allocated." - CCIfType<[f16], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[v1i8], CCAssignToReg<[B0, B1, B2, B3, B4, B5, B6, B7]>>, + CCIfType<[f16], CCAssignToReg<[H0, H1, H2, H3, H4, H5, H6, H7]>>, CCIfType<[f32], CCAssignToReg<[S0, S1, S2, S3, S4, S5, S6, S7]>>, CCIfType<[f64], CCAssignToReg<[D0, D1, D2, D3, D4, D5, D6, D7]>>, CCIfType<[f128], CCAssignToReg<[Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7]>>, diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp index daa7f1d..7318230 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp @@ -54,7 +54,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { DebugLoc DL = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); MachineModuleInfo &MMI = MF.getMMI(); - std::vector &Moves = MMI.getFrameMoves(); + const MCRegisterInfo *MRI = MMI.getContext().getRegisterInfo(); bool NeedsFrameMoves = MMI.hasDebugInfo() || MF.getFunction()->needsUnwindTableEntry(); @@ -97,8 +97,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { .addSym(SPLabel); MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(AArch64::XSP, NumInitialBytes); - Moves.push_back(MachineMove(SPLabel, Dst, Src)); + unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); + MMI.addFrameInst( + MCCFIInstruction::createDefCfa(SPLabel, Reg, -NumInitialBytes)); } // Otherwise we need to set the frame pointer and/or add a second stack @@ -131,9 +132,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { MCSymbol *FPLabel = MMI.getContext().CreateTempSymbol(); BuildMI(MBB, MBBI, DL, TII.get(TargetOpcode::PROLOG_LABEL)) .addSym(FPLabel); - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(AArch64::X29, -MFI->getObjectOffset(X29FrameIdx)); - Moves.push_back(MachineMove(FPLabel, Dst, Src)); + unsigned Reg = MRI->getDwarfRegNum(AArch64::X29, true); + unsigned Offset = MFI->getObjectOffset(X29FrameIdx); + MMI.addFrameInst(MCCFIInstruction::createDefCfa(FPLabel, Reg, Offset)); } FPNeedsSetting = false; @@ -164,8 +165,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { .addSym(CSLabel); MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(AArch64::XSP, NumResidualBytes + NumInitialBytes); - Moves.push_back(MachineMove(CSLabel, Dst, Src)); + unsigned Reg = MRI->getDwarfRegNum(AArch64::XSP, true); + unsigned Offset = NumResidualBytes + NumInitialBytes; + MMI.addFrameInst(MCCFIInstruction::createDefCfa(CSLabel, Reg, -Offset)); } // And any callee-saved registers (it's fine to leave them to the end here, @@ -180,10 +182,9 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF) const { for (std::vector::const_iterator I = CSI.begin(), E = CSI.end(); I != E; ++I) { - MachineLocation Dst(MachineLocation::VirtualFP, - MFI->getObjectOffset(I->getFrameIdx())); - MachineLocation Src(I->getReg()); - Moves.push_back(MachineMove(CSLabel, Dst, Src)); + unsigned Offset = MFI->getObjectOffset(I->getFrameIdx()); + unsigned Reg = MRI->getDwarfRegNum(I->getReg(), true); + MMI.addFrameInst(MCCFIInstruction::createOffset(CSLabel, Reg, Offset)); } } } @@ -424,7 +425,7 @@ AArch64FrameLowering::emitFrameMemOps(bool isPrologue, MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, const std::vector &CSI, const TargetRegisterInfo *TRI, - LoadStoreMethod PossClasses[], + const LoadStoreMethod PossClasses[], unsigned NumClasses) const { DebugLoc DL = MBB.findDebugLoc(MBBI); MachineFunction &MF = *MBB.getParent(); @@ -527,11 +528,11 @@ AArch64FrameLowering::spillCalleeSavedRegisters(MachineBasicBlock &MBB, if (CSI.empty()) return false; - static LoadStoreMethod PossibleClasses[] = { + static const LoadStoreMethod PossibleClasses[] = { {&AArch64::GPR64RegClass, AArch64::LSPair64_STR, AArch64::LS64_STR}, {&AArch64::FPR64RegClass, AArch64::LSFPPair64_STR, AArch64::LSFP64_STR}, }; - unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + const unsigned NumClasses = llvm::array_lengthof(PossibleClasses); emitFrameMemOps(/* isPrologue = */ true, MBB, MBBI, CSI, TRI, PossibleClasses, NumClasses); @@ -548,11 +549,11 @@ AArch64FrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (CSI.empty()) return false; - static LoadStoreMethod PossibleClasses[] = { + static const LoadStoreMethod PossibleClasses[] = { {&AArch64::GPR64RegClass, AArch64::LSPair64_LDR, AArch64::LS64_LDR}, {&AArch64::FPR64RegClass, AArch64::LSFPPair64_LDR, AArch64::LSFP64_LDR}, }; - unsigned NumClasses = llvm::array_lengthof(PossibleClasses); + const unsigned NumClasses = llvm::array_lengthof(PossibleClasses); emitFrameMemOps(/* isPrologue = */ false, MBB, MBBI, CSI, TRI, PossibleClasses, NumClasses); diff --git a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h index 45ea0ec..032dd90 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64FrameLowering.h @@ -90,7 +90,7 @@ public: MachineBasicBlock::iterator MI, const std::vector &CSI, const TargetRegisterInfo *TRI, - LoadStoreMethod PossibleClasses[], + const LoadStoreMethod PossibleClasses[], unsigned NumClasses) const; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp index 7e6aaf3..ef99541 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelDAGToDAG.cpp @@ -33,7 +33,6 @@ namespace { class AArch64DAGToDAGISel : public SelectionDAGISel { AArch64TargetMachine &TM; - const AArch64InstrInfo *TII; /// Keep a pointer to the AArch64Subtarget around so that we can /// make the right decision when generating code for different targets. @@ -43,7 +42,6 @@ public: explicit AArch64DAGToDAGISel(AArch64TargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), TM(tm), - TII(static_cast(TM.getInstrInfo())), Subtarget(&TM.getSubtarget()) { } @@ -72,10 +70,11 @@ public: /// Used for pre-lowered address-reference nodes, so we already know /// the fields match. This operand's job is simply to add an - /// appropriate shift operand (i.e. 0) to the MOVZ/MOVK instruction. + /// appropriate shift operand to the MOVZ/MOVK instruction. + template bool SelectMOVWAddressRef(SDValue N, SDValue &Imm, SDValue &Shift) { Imm = N; - Shift = CurDAG->getTargetConstant(0, MVT::i32); + Shift = CurDAG->getTargetConstant(LogShift, MVT::i32); return true; } @@ -102,7 +101,7 @@ public: /// Put the given constant into a pool and return a DAG which will give its /// address. - SDValue getConstantPoolItemAddress(DebugLoc DL, const Constant *CV); + SDValue getConstantPoolItemAddress(SDLoc DL, const Constant *CV); SDNode *TrySelectToMoveImm(SDNode *N); SDNode *LowerToFPLitPool(SDNode *Node); @@ -110,6 +109,45 @@ public: SDNode* Select(SDNode*); private: + /// Get the opcode for table lookup instruction + unsigned getTBLOpc(bool IsExt, bool Is64Bit, unsigned NumOfVec); + + /// Select NEON table lookup intrinsics. NumVecs should be 1, 2, 3 or 4. + /// IsExt is to indicate if the result will be extended with an argument. + SDNode *SelectVTBL(SDNode *N, unsigned NumVecs, bool IsExt); + + /// Select NEON load intrinsics. NumVecs should be 1, 2, 3 or 4. + SDNode *SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, + const uint16_t *Opcode); + + /// Select NEON store intrinsics. NumVecs should be 1, 2, 3 or 4. + SDNode *SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, + const uint16_t *Opcodes); + + /// Form sequences of consecutive 64/128-bit registers for use in NEON + /// instructions making use of a vector-list (e.g. ldN, tbl). Vecs must have + /// between 1 and 4 elements. If it contains a single element that is returned + /// unchanged; otherwise a REG_SEQUENCE value is returned. + SDValue createDTuple(ArrayRef Vecs); + SDValue createQTuple(ArrayRef Vecs); + + /// Generic helper for the createDTuple/createQTuple + /// functions. Those should almost always be called instead. + SDValue createTuple(ArrayRef Vecs, unsigned RegClassIDs[], + unsigned SubRegs[]); + + /// Select NEON load-duplicate intrinsics. NumVecs should be 2, 3 or 4. + /// The opcode array specifies the instructions used for load. + SDNode *SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, + const uint16_t *Opcodes); + + /// Select NEON load/store lane intrinsics. NumVecs should be 2, 3 or 4. + /// The opcode arrays specify the instructions used for load/store. + SDNode *SelectVLDSTLane(SDNode *N, bool IsLoad, bool isUpdating, + unsigned NumVecs, const uint16_t *Opcodes); + + SDValue getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, + SDValue Operand); }; } @@ -191,7 +229,7 @@ bool AArch64DAGToDAGISel::SelectLogicalImm(SDValue N, SDValue &Imm) { SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { SDNode *ResNode; - DebugLoc dl = Node->getDebugLoc(); + SDLoc dl(Node); EVT DestType = Node->getValueType(0); unsigned DestWidth = DestType.getSizeInBits(); @@ -241,14 +279,14 @@ SDNode *AArch64DAGToDAGISel::TrySelectToMoveImm(SDNode *Node) { } SDValue -AArch64DAGToDAGISel::getConstantPoolItemAddress(DebugLoc DL, +AArch64DAGToDAGISel::getConstantPoolItemAddress(SDLoc DL, const Constant *CV) { - EVT PtrVT = TLI.getPointerTy(); + EVT PtrVT = getTargetLowering()->getPointerTy(); - switch (TLI.getTargetMachine().getCodeModel()) { + switch (getTargetLowering()->getTargetMachine().getCodeModel()) { case CodeModel::Small: { unsigned Alignment = - TLI.getDataLayout()->getABITypeAlignment(CV->getType()); + getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); return CurDAG->getNode( AArch64ISD::WrapperSmall, DL, PtrVT, CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_NO_FLAG), @@ -260,15 +298,15 @@ AArch64DAGToDAGISel::getConstantPoolItemAddress(DebugLoc DL, LitAddr = CurDAG->getMachineNode( AArch64::MOVZxii, DL, PtrVT, CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G3), - CurDAG->getTargetConstant(0, MVT::i32)); + CurDAG->getTargetConstant(3, MVT::i32)); LitAddr = CurDAG->getMachineNode( AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G2_NC), - CurDAG->getTargetConstant(0, MVT::i32)); + CurDAG->getTargetConstant(2, MVT::i32)); LitAddr = CurDAG->getMachineNode( AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G1_NC), - CurDAG->getTargetConstant(0, MVT::i32)); + CurDAG->getTargetConstant(1, MVT::i32)); LitAddr = CurDAG->getMachineNode( AArch64::MOVKxii, DL, PtrVT, SDValue(LitAddr, 0), CurDAG->getTargetConstantPool(CV, PtrVT, 0, 0, AArch64II::MO_ABS_G0_NC), @@ -281,7 +319,7 @@ AArch64DAGToDAGISel::getConstantPoolItemAddress(DebugLoc DL, } SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { - DebugLoc DL = Node->getDebugLoc(); + SDLoc DL(Node); uint64_t UnsignedVal = cast(Node)->getZExtValue(); int64_t SignedVal = cast(Node)->getSExtValue(); EVT DestType = Node->getValueType(0); @@ -312,7 +350,8 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { MemType.getSizeInBits()), UnsignedVal); SDValue PoolAddr = getConstantPoolItemAddress(DL, CV); - unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(CV->getType()); + unsigned Alignment = + getTargetLowering()->getDataLayout()->getABITypeAlignment(CV->getType()); return CurDAG->getExtLoad(Extension, DL, DestType, CurDAG->getEntryNode(), PoolAddr, @@ -323,11 +362,12 @@ SDNode *AArch64DAGToDAGISel::SelectToLitPool(SDNode *Node) { } SDNode *AArch64DAGToDAGISel::LowerToFPLitPool(SDNode *Node) { - DebugLoc DL = Node->getDebugLoc(); + SDLoc DL(Node); const ConstantFP *FV = cast(Node)->getConstantFPValue(); EVT DestType = Node->getValueType(0); - unsigned Alignment = TLI.getDataLayout()->getABITypeAlignment(FV->getType()); + unsigned Alignment = + getTargetLowering()->getDataLayout()->getABITypeAlignment(FV->getType()); SDValue PoolAddr = getConstantPoolItemAddress(DL, FV); return CurDAG->getLoad(DestType, DL, CurDAG->getEntryNode(), PoolAddr, @@ -389,6 +429,600 @@ SDNode *AArch64DAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, &Ops[0], Ops.size()); } +SDValue AArch64DAGToDAGISel::createDTuple(ArrayRef Regs) { + static unsigned RegClassIDs[] = { AArch64::DPairRegClassID, + AArch64::DTripleRegClassID, + AArch64::DQuadRegClassID }; + static unsigned SubRegs[] = { AArch64::dsub_0, AArch64::dsub_1, + AArch64::dsub_2, AArch64::dsub_3 }; + + return createTuple(Regs, RegClassIDs, SubRegs); +} + +SDValue AArch64DAGToDAGISel::createQTuple(ArrayRef Regs) { + static unsigned RegClassIDs[] = { AArch64::QPairRegClassID, + AArch64::QTripleRegClassID, + AArch64::QQuadRegClassID }; + static unsigned SubRegs[] = { AArch64::qsub_0, AArch64::qsub_1, + AArch64::qsub_2, AArch64::qsub_3 }; + + return createTuple(Regs, RegClassIDs, SubRegs); +} + +SDValue AArch64DAGToDAGISel::createTuple(ArrayRef Regs, + unsigned RegClassIDs[], + unsigned SubRegs[]) { + // There's no special register-class for a vector-list of 1 element: it's just + // a vector. + if (Regs.size() == 1) + return Regs[0]; + + assert(Regs.size() >= 2 && Regs.size() <= 4); + + SDLoc DL(Regs[0].getNode()); + + SmallVector Ops; + + // First operand of REG_SEQUENCE is the desired RegClass. + Ops.push_back( + CurDAG->getTargetConstant(RegClassIDs[Regs.size() - 2], MVT::i32)); + + // Then we get pairs of source & subregister-position for the components. + for (unsigned i = 0; i < Regs.size(); ++i) { + Ops.push_back(Regs[i]); + Ops.push_back(CurDAG->getTargetConstant(SubRegs[i], MVT::i32)); + } + + SDNode *N = + CurDAG->getMachineNode(TargetOpcode::REG_SEQUENCE, DL, MVT::Untyped, Ops); + return SDValue(N, 0); +} + + +// Get the register stride update opcode of a VLD/VST instruction that +// is otherwise equivalent to the given fixed stride updating instruction. +static unsigned getVLDSTRegisterUpdateOpcode(unsigned Opc) { + switch (Opc) { + default: break; + case AArch64::LD1WB_8B_fixed: return AArch64::LD1WB_8B_register; + case AArch64::LD1WB_4H_fixed: return AArch64::LD1WB_4H_register; + case AArch64::LD1WB_2S_fixed: return AArch64::LD1WB_2S_register; + case AArch64::LD1WB_1D_fixed: return AArch64::LD1WB_1D_register; + case AArch64::LD1WB_16B_fixed: return AArch64::LD1WB_16B_register; + case AArch64::LD1WB_8H_fixed: return AArch64::LD1WB_8H_register; + case AArch64::LD1WB_4S_fixed: return AArch64::LD1WB_4S_register; + case AArch64::LD1WB_2D_fixed: return AArch64::LD1WB_2D_register; + + case AArch64::LD2WB_8B_fixed: return AArch64::LD2WB_8B_register; + case AArch64::LD2WB_4H_fixed: return AArch64::LD2WB_4H_register; + case AArch64::LD2WB_2S_fixed: return AArch64::LD2WB_2S_register; + case AArch64::LD2WB_16B_fixed: return AArch64::LD2WB_16B_register; + case AArch64::LD2WB_8H_fixed: return AArch64::LD2WB_8H_register; + case AArch64::LD2WB_4S_fixed: return AArch64::LD2WB_4S_register; + case AArch64::LD2WB_2D_fixed: return AArch64::LD2WB_2D_register; + + case AArch64::LD3WB_8B_fixed: return AArch64::LD3WB_8B_register; + case AArch64::LD3WB_4H_fixed: return AArch64::LD3WB_4H_register; + case AArch64::LD3WB_2S_fixed: return AArch64::LD3WB_2S_register; + case AArch64::LD3WB_16B_fixed: return AArch64::LD3WB_16B_register; + case AArch64::LD3WB_8H_fixed: return AArch64::LD3WB_8H_register; + case AArch64::LD3WB_4S_fixed: return AArch64::LD3WB_4S_register; + case AArch64::LD3WB_2D_fixed: return AArch64::LD3WB_2D_register; + + case AArch64::LD4WB_8B_fixed: return AArch64::LD4WB_8B_register; + case AArch64::LD4WB_4H_fixed: return AArch64::LD4WB_4H_register; + case AArch64::LD4WB_2S_fixed: return AArch64::LD4WB_2S_register; + case AArch64::LD4WB_16B_fixed: return AArch64::LD4WB_16B_register; + case AArch64::LD4WB_8H_fixed: return AArch64::LD4WB_8H_register; + case AArch64::LD4WB_4S_fixed: return AArch64::LD4WB_4S_register; + case AArch64::LD4WB_2D_fixed: return AArch64::LD4WB_2D_register; + + case AArch64::LD1x2WB_8B_fixed: return AArch64::LD1x2WB_8B_register; + case AArch64::LD1x2WB_4H_fixed: return AArch64::LD1x2WB_4H_register; + case AArch64::LD1x2WB_2S_fixed: return AArch64::LD1x2WB_2S_register; + case AArch64::LD1x2WB_1D_fixed: return AArch64::LD1x2WB_1D_register; + case AArch64::LD1x2WB_16B_fixed: return AArch64::LD1x2WB_16B_register; + case AArch64::LD1x2WB_8H_fixed: return AArch64::LD1x2WB_8H_register; + case AArch64::LD1x2WB_4S_fixed: return AArch64::LD1x2WB_4S_register; + case AArch64::LD1x2WB_2D_fixed: return AArch64::LD1x2WB_2D_register; + + case AArch64::LD1x3WB_8B_fixed: return AArch64::LD1x3WB_8B_register; + case AArch64::LD1x3WB_4H_fixed: return AArch64::LD1x3WB_4H_register; + case AArch64::LD1x3WB_2S_fixed: return AArch64::LD1x3WB_2S_register; + case AArch64::LD1x3WB_1D_fixed: return AArch64::LD1x3WB_1D_register; + case AArch64::LD1x3WB_16B_fixed: return AArch64::LD1x3WB_16B_register; + case AArch64::LD1x3WB_8H_fixed: return AArch64::LD1x3WB_8H_register; + case AArch64::LD1x3WB_4S_fixed: return AArch64::LD1x3WB_4S_register; + case AArch64::LD1x3WB_2D_fixed: return AArch64::LD1x3WB_2D_register; + + case AArch64::LD1x4WB_8B_fixed: return AArch64::LD1x4WB_8B_register; + case AArch64::LD1x4WB_4H_fixed: return AArch64::LD1x4WB_4H_register; + case AArch64::LD1x4WB_2S_fixed: return AArch64::LD1x4WB_2S_register; + case AArch64::LD1x4WB_1D_fixed: return AArch64::LD1x4WB_1D_register; + case AArch64::LD1x4WB_16B_fixed: return AArch64::LD1x4WB_16B_register; + case AArch64::LD1x4WB_8H_fixed: return AArch64::LD1x4WB_8H_register; + case AArch64::LD1x4WB_4S_fixed: return AArch64::LD1x4WB_4S_register; + case AArch64::LD1x4WB_2D_fixed: return AArch64::LD1x4WB_2D_register; + + case AArch64::ST1WB_8B_fixed: return AArch64::ST1WB_8B_register; + case AArch64::ST1WB_4H_fixed: return AArch64::ST1WB_4H_register; + case AArch64::ST1WB_2S_fixed: return AArch64::ST1WB_2S_register; + case AArch64::ST1WB_1D_fixed: return AArch64::ST1WB_1D_register; + case AArch64::ST1WB_16B_fixed: return AArch64::ST1WB_16B_register; + case AArch64::ST1WB_8H_fixed: return AArch64::ST1WB_8H_register; + case AArch64::ST1WB_4S_fixed: return AArch64::ST1WB_4S_register; + case AArch64::ST1WB_2D_fixed: return AArch64::ST1WB_2D_register; + + case AArch64::ST2WB_8B_fixed: return AArch64::ST2WB_8B_register; + case AArch64::ST2WB_4H_fixed: return AArch64::ST2WB_4H_register; + case AArch64::ST2WB_2S_fixed: return AArch64::ST2WB_2S_register; + case AArch64::ST2WB_16B_fixed: return AArch64::ST2WB_16B_register; + case AArch64::ST2WB_8H_fixed: return AArch64::ST2WB_8H_register; + case AArch64::ST2WB_4S_fixed: return AArch64::ST2WB_4S_register; + case AArch64::ST2WB_2D_fixed: return AArch64::ST2WB_2D_register; + + case AArch64::ST3WB_8B_fixed: return AArch64::ST3WB_8B_register; + case AArch64::ST3WB_4H_fixed: return AArch64::ST3WB_4H_register; + case AArch64::ST3WB_2S_fixed: return AArch64::ST3WB_2S_register; + case AArch64::ST3WB_16B_fixed: return AArch64::ST3WB_16B_register; + case AArch64::ST3WB_8H_fixed: return AArch64::ST3WB_8H_register; + case AArch64::ST3WB_4S_fixed: return AArch64::ST3WB_4S_register; + case AArch64::ST3WB_2D_fixed: return AArch64::ST3WB_2D_register; + + case AArch64::ST4WB_8B_fixed: return AArch64::ST4WB_8B_register; + case AArch64::ST4WB_4H_fixed: return AArch64::ST4WB_4H_register; + case AArch64::ST4WB_2S_fixed: return AArch64::ST4WB_2S_register; + case AArch64::ST4WB_16B_fixed: return AArch64::ST4WB_16B_register; + case AArch64::ST4WB_8H_fixed: return AArch64::ST4WB_8H_register; + case AArch64::ST4WB_4S_fixed: return AArch64::ST4WB_4S_register; + case AArch64::ST4WB_2D_fixed: return AArch64::ST4WB_2D_register; + + case AArch64::ST1x2WB_8B_fixed: return AArch64::ST1x2WB_8B_register; + case AArch64::ST1x2WB_4H_fixed: return AArch64::ST1x2WB_4H_register; + case AArch64::ST1x2WB_2S_fixed: return AArch64::ST1x2WB_2S_register; + case AArch64::ST1x2WB_1D_fixed: return AArch64::ST1x2WB_1D_register; + case AArch64::ST1x2WB_16B_fixed: return AArch64::ST1x2WB_16B_register; + case AArch64::ST1x2WB_8H_fixed: return AArch64::ST1x2WB_8H_register; + case AArch64::ST1x2WB_4S_fixed: return AArch64::ST1x2WB_4S_register; + case AArch64::ST1x2WB_2D_fixed: return AArch64::ST1x2WB_2D_register; + + case AArch64::ST1x3WB_8B_fixed: return AArch64::ST1x3WB_8B_register; + case AArch64::ST1x3WB_4H_fixed: return AArch64::ST1x3WB_4H_register; + case AArch64::ST1x3WB_2S_fixed: return AArch64::ST1x3WB_2S_register; + case AArch64::ST1x3WB_1D_fixed: return AArch64::ST1x3WB_1D_register; + case AArch64::ST1x3WB_16B_fixed: return AArch64::ST1x3WB_16B_register; + case AArch64::ST1x3WB_8H_fixed: return AArch64::ST1x3WB_8H_register; + case AArch64::ST1x3WB_4S_fixed: return AArch64::ST1x3WB_4S_register; + case AArch64::ST1x3WB_2D_fixed: return AArch64::ST1x3WB_2D_register; + + case AArch64::ST1x4WB_8B_fixed: return AArch64::ST1x4WB_8B_register; + case AArch64::ST1x4WB_4H_fixed: return AArch64::ST1x4WB_4H_register; + case AArch64::ST1x4WB_2S_fixed: return AArch64::ST1x4WB_2S_register; + case AArch64::ST1x4WB_1D_fixed: return AArch64::ST1x4WB_1D_register; + case AArch64::ST1x4WB_16B_fixed: return AArch64::ST1x4WB_16B_register; + case AArch64::ST1x4WB_8H_fixed: return AArch64::ST1x4WB_8H_register; + case AArch64::ST1x4WB_4S_fixed: return AArch64::ST1x4WB_4S_register; + case AArch64::ST1x4WB_2D_fixed: return AArch64::ST1x4WB_2D_register; + + // Post-index of duplicate loads + case AArch64::LD2R_WB_8B_fixed: return AArch64::LD2R_WB_8B_register; + case AArch64::LD2R_WB_4H_fixed: return AArch64::LD2R_WB_4H_register; + case AArch64::LD2R_WB_2S_fixed: return AArch64::LD2R_WB_2S_register; + case AArch64::LD2R_WB_1D_fixed: return AArch64::LD2R_WB_1D_register; + case AArch64::LD2R_WB_16B_fixed: return AArch64::LD2R_WB_16B_register; + case AArch64::LD2R_WB_8H_fixed: return AArch64::LD2R_WB_8H_register; + case AArch64::LD2R_WB_4S_fixed: return AArch64::LD2R_WB_4S_register; + case AArch64::LD2R_WB_2D_fixed: return AArch64::LD2R_WB_2D_register; + + case AArch64::LD3R_WB_8B_fixed: return AArch64::LD3R_WB_8B_register; + case AArch64::LD3R_WB_4H_fixed: return AArch64::LD3R_WB_4H_register; + case AArch64::LD3R_WB_2S_fixed: return AArch64::LD3R_WB_2S_register; + case AArch64::LD3R_WB_1D_fixed: return AArch64::LD3R_WB_1D_register; + case AArch64::LD3R_WB_16B_fixed: return AArch64::LD3R_WB_16B_register; + case AArch64::LD3R_WB_8H_fixed: return AArch64::LD3R_WB_8H_register; + case AArch64::LD3R_WB_4S_fixed: return AArch64::LD3R_WB_4S_register; + case AArch64::LD3R_WB_2D_fixed: return AArch64::LD3R_WB_2D_register; + + case AArch64::LD4R_WB_8B_fixed: return AArch64::LD4R_WB_8B_register; + case AArch64::LD4R_WB_4H_fixed: return AArch64::LD4R_WB_4H_register; + case AArch64::LD4R_WB_2S_fixed: return AArch64::LD4R_WB_2S_register; + case AArch64::LD4R_WB_1D_fixed: return AArch64::LD4R_WB_1D_register; + case AArch64::LD4R_WB_16B_fixed: return AArch64::LD4R_WB_16B_register; + case AArch64::LD4R_WB_8H_fixed: return AArch64::LD4R_WB_8H_register; + case AArch64::LD4R_WB_4S_fixed: return AArch64::LD4R_WB_4S_register; + case AArch64::LD4R_WB_2D_fixed: return AArch64::LD4R_WB_2D_register; + + // Post-index of lane loads + case AArch64::LD2LN_WB_B_fixed: return AArch64::LD2LN_WB_B_register; + case AArch64::LD2LN_WB_H_fixed: return AArch64::LD2LN_WB_H_register; + case AArch64::LD2LN_WB_S_fixed: return AArch64::LD2LN_WB_S_register; + case AArch64::LD2LN_WB_D_fixed: return AArch64::LD2LN_WB_D_register; + + case AArch64::LD3LN_WB_B_fixed: return AArch64::LD3LN_WB_B_register; + case AArch64::LD3LN_WB_H_fixed: return AArch64::LD3LN_WB_H_register; + case AArch64::LD3LN_WB_S_fixed: return AArch64::LD3LN_WB_S_register; + case AArch64::LD3LN_WB_D_fixed: return AArch64::LD3LN_WB_D_register; + + case AArch64::LD4LN_WB_B_fixed: return AArch64::LD4LN_WB_B_register; + case AArch64::LD4LN_WB_H_fixed: return AArch64::LD4LN_WB_H_register; + case AArch64::LD4LN_WB_S_fixed: return AArch64::LD4LN_WB_S_register; + case AArch64::LD4LN_WB_D_fixed: return AArch64::LD4LN_WB_D_register; + + // Post-index of lane stores + case AArch64::ST2LN_WB_B_fixed: return AArch64::ST2LN_WB_B_register; + case AArch64::ST2LN_WB_H_fixed: return AArch64::ST2LN_WB_H_register; + case AArch64::ST2LN_WB_S_fixed: return AArch64::ST2LN_WB_S_register; + case AArch64::ST2LN_WB_D_fixed: return AArch64::ST2LN_WB_D_register; + + case AArch64::ST3LN_WB_B_fixed: return AArch64::ST3LN_WB_B_register; + case AArch64::ST3LN_WB_H_fixed: return AArch64::ST3LN_WB_H_register; + case AArch64::ST3LN_WB_S_fixed: return AArch64::ST3LN_WB_S_register; + case AArch64::ST3LN_WB_D_fixed: return AArch64::ST3LN_WB_D_register; + + case AArch64::ST4LN_WB_B_fixed: return AArch64::ST4LN_WB_B_register; + case AArch64::ST4LN_WB_H_fixed: return AArch64::ST4LN_WB_H_register; + case AArch64::ST4LN_WB_S_fixed: return AArch64::ST4LN_WB_S_register; + case AArch64::ST4LN_WB_D_fixed: return AArch64::ST4LN_WB_D_register; + } + return Opc; // If not one we handle, return it unchanged. +} + +SDNode *AArch64DAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, + unsigned NumVecs, + const uint16_t *Opcodes) { + assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); + + EVT VT = N->getValueType(0); + unsigned OpcodeIndex; + bool is64BitVector = VT.is64BitVector(); + switch (VT.getScalarType().getSizeInBits()) { + case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; + case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; + case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; + case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; + default: llvm_unreachable("unhandled vector load type"); + } + unsigned Opc = Opcodes[OpcodeIndex]; + + SmallVector Ops; + unsigned AddrOpIdx = isUpdating ? 1 : 2; + Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address + + if (isUpdating) { + SDValue Inc = N->getOperand(AddrOpIdx + 1); + if (!isa(Inc.getNode())) // Increment in Register + Opc = getVLDSTRegisterUpdateOpcode(Opc); + Ops.push_back(Inc); + } + + Ops.push_back(N->getOperand(0)); // Push back the Chain + + SmallVector ResTys; + // Push back the type of return super register + if (NumVecs == 1) + ResTys.push_back(VT); + else if (NumVecs == 3) + ResTys.push_back(MVT::Untyped); + else { + EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, + is64BitVector ? NumVecs : NumVecs * 2); + ResTys.push_back(ResTy); + } + + if (isUpdating) + ResTys.push_back(MVT::i64); // Type of the updated register + ResTys.push_back(MVT::Other); // Type of the Chain + SDLoc dl(N); + SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(VLd)->setMemRefs(MemOp, MemOp + 1); + + if (NumVecs == 1) + return VLd; + + // If NumVecs > 1, the return result is a super register containing 2-4 + // consecutive vector registers. + SDValue SuperReg = SDValue(VLd, 0); + + unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); + // Update users of the Chain + ReplaceUses(SDValue(N, NumVecs), SDValue(VLd, 1)); + if (isUpdating) + ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLd, 2)); + + return NULL; +} + +SDNode *AArch64DAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, + unsigned NumVecs, + const uint16_t *Opcodes) { + assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); + SDLoc dl(N); + + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + + unsigned AddrOpIdx = isUpdating ? 1 : 2; + unsigned Vec0Idx = 3; + EVT VT = N->getOperand(Vec0Idx).getValueType(); + unsigned OpcodeIndex; + bool is64BitVector = VT.is64BitVector(); + switch (VT.getScalarType().getSizeInBits()) { + case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; + case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; + case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; + case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; + default: llvm_unreachable("unhandled vector store type"); + } + unsigned Opc = Opcodes[OpcodeIndex]; + + SmallVector ResTys; + if (isUpdating) + ResTys.push_back(MVT::i64); + ResTys.push_back(MVT::Other); // Type for the Chain + + SmallVector Ops; + Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address + + if (isUpdating) { + SDValue Inc = N->getOperand(AddrOpIdx + 1); + if (!isa(Inc.getNode())) // Increment in Register + Opc = getVLDSTRegisterUpdateOpcode(Opc); + Ops.push_back(Inc); + } + + SmallVector Regs(N->op_begin() + Vec0Idx, + N->op_begin() + Vec0Idx + NumVecs); + SDValue SrcReg = is64BitVector ? createDTuple(Regs) : createQTuple(Regs); + Ops.push_back(SrcReg); + + // Push back the Chain + Ops.push_back(N->getOperand(0)); + + // Transfer memoperands. + SDNode *VSt = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + cast(VSt)->setMemRefs(MemOp, MemOp + 1); + + return VSt; +} + +SDValue +AArch64DAGToDAGISel::getTargetSubregToReg(int SRIdx, SDLoc DL, EVT VT, EVT VTD, + SDValue Operand) { + SDNode *Reg = CurDAG->getMachineNode(TargetOpcode::SUBREG_TO_REG, DL, + VT, VTD, MVT::Other, + CurDAG->getTargetConstant(0, MVT::i64), + Operand, + CurDAG->getTargetConstant(AArch64::sub_64, MVT::i32)); + return SDValue(Reg, 0); +} + +SDNode *AArch64DAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, + unsigned NumVecs, + const uint16_t *Opcodes) { + assert(NumVecs >=2 && NumVecs <= 4 && "Load Dup NumVecs out-of-range"); + SDLoc dl(N); + + EVT VT = N->getValueType(0); + unsigned OpcodeIndex; + bool is64BitVector = VT.is64BitVector(); + switch (VT.getScalarType().getSizeInBits()) { + case 8: OpcodeIndex = is64BitVector ? 0 : 4; break; + case 16: OpcodeIndex = is64BitVector ? 1 : 5; break; + case 32: OpcodeIndex = is64BitVector ? 2 : 6; break; + case 64: OpcodeIndex = is64BitVector ? 3 : 7; break; + default: llvm_unreachable("unhandled vector duplicate lane load type"); + } + unsigned Opc = Opcodes[OpcodeIndex]; + + SDValue SuperReg; + SmallVector Ops; + Ops.push_back(N->getOperand(1)); // Push back the Memory Address + if (isUpdating) { + SDValue Inc = N->getOperand(2); + if (!isa(Inc.getNode())) // Increment in Register + Opc = getVLDSTRegisterUpdateOpcode(Opc); + Ops.push_back(Inc); + } + Ops.push_back(N->getOperand(0)); // Push back the Chain + + SmallVector ResTys; + // Push back the type of return super register + if (NumVecs == 3) + ResTys.push_back(MVT::Untyped); + else { + EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, + is64BitVector ? NumVecs : NumVecs * 2); + ResTys.push_back(ResTy); + } + if (isUpdating) + ResTys.push_back(MVT::i64); // Type of the updated register + ResTys.push_back(MVT::Other); // Type of the Chain + SDNode *VLdDup = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + + // Transfer memoperands. + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(VLdDup)->setMemRefs(MemOp, MemOp + 1); + + SuperReg = SDValue(VLdDup, 0); + unsigned Sub0 = is64BitVector ? AArch64::dsub_0 : AArch64::qsub_0; + // Update uses of each registers in super register + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) + ReplaceUses(SDValue(N, Vec), + CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg)); + // Update uses of the Chain + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdDup, 1)); + if (isUpdating) + ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdDup, 2)); + return NULL; +} + +// We only have 128-bit vector type of load/store lane instructions. +// If it is 64-bit vector, we also select it to the 128-bit instructions. +// Just use SUBREG_TO_REG to adapt the input to 128-bit vector and +// EXTRACT_SUBREG to get the 64-bit vector from the 128-bit vector output. +SDNode *AArch64DAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, + bool isUpdating, unsigned NumVecs, + const uint16_t *Opcodes) { + assert(NumVecs >= 2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); + SDLoc dl(N); + unsigned AddrOpIdx = isUpdating ? 1 : 2; + unsigned Vec0Idx = 3; + + SDValue Chain = N->getOperand(0); + unsigned Lane = + cast(N->getOperand(Vec0Idx + NumVecs))->getZExtValue(); + EVT VT = N->getOperand(Vec0Idx).getValueType(); + bool is64BitVector = VT.is64BitVector(); + EVT VT64; // 64-bit Vector Type + + if (is64BitVector) { + VT64 = VT; + VT = EVT::getVectorVT(*CurDAG->getContext(), VT.getVectorElementType(), + VT.getVectorNumElements() * 2); + } + + unsigned OpcodeIndex; + switch (VT.getScalarType().getSizeInBits()) { + case 8: OpcodeIndex = 0; break; + case 16: OpcodeIndex = 1; break; + case 32: OpcodeIndex = 2; break; + case 64: OpcodeIndex = 3; break; + default: llvm_unreachable("unhandled vector lane load/store type"); + } + unsigned Opc = Opcodes[OpcodeIndex]; + + SmallVector ResTys; + if (IsLoad) { + // Push back the type of return super register + if (NumVecs == 3) + ResTys.push_back(MVT::Untyped); + else { + EVT ResTy = EVT::getVectorVT(*CurDAG->getContext(), MVT::i64, + is64BitVector ? NumVecs : NumVecs * 2); + ResTys.push_back(ResTy); + } + } + if (isUpdating) + ResTys.push_back(MVT::i64); // Type of the updated register + ResTys.push_back(MVT::Other); // Type of Chain + SmallVector Ops; + Ops.push_back(N->getOperand(AddrOpIdx)); // Push back the Memory Address + if (isUpdating) { + SDValue Inc = N->getOperand(AddrOpIdx + 1); + if (!isa(Inc.getNode())) // Increment in Register + Opc = getVLDSTRegisterUpdateOpcode(Opc); + Ops.push_back(Inc); + } + + SmallVector Regs(N->op_begin() + Vec0Idx, + N->op_begin() + Vec0Idx + NumVecs); + if (is64BitVector) + for (unsigned i = 0; i < Regs.size(); i++) + Regs[i] = getTargetSubregToReg(AArch64::sub_64, dl, VT, VT64, Regs[i]); + SDValue SuperReg = createQTuple(Regs); + + Ops.push_back(SuperReg); // Source Reg + SDValue LaneValue = CurDAG->getTargetConstant(Lane, MVT::i32); + Ops.push_back(LaneValue); + Ops.push_back(Chain); // Push back the Chain + + SDNode *VLdLn = CurDAG->getMachineNode(Opc, dl, ResTys, Ops); + MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); + MemOp[0] = cast(N)->getMemOperand(); + cast(VLdLn)->setMemRefs(MemOp, MemOp + 1); + if (!IsLoad) + return VLdLn; + + // Extract the subregisters. + SuperReg = SDValue(VLdLn, 0); + unsigned Sub0 = AArch64::qsub_0; + // Update uses of each registers in super register + for (unsigned Vec = 0; Vec < NumVecs; ++Vec) { + SDValue SUB0 = CurDAG->getTargetExtractSubreg(Sub0 + Vec, dl, VT, SuperReg); + if (is64BitVector) { + SUB0 = CurDAG->getTargetExtractSubreg(AArch64::sub_64, dl, VT64, SUB0); + } + ReplaceUses(SDValue(N, Vec), SUB0); + } + ReplaceUses(SDValue(N, NumVecs), SDValue(VLdLn, 1)); + if (isUpdating) + ReplaceUses(SDValue(N, NumVecs + 1), SDValue(VLdLn, 2)); + return NULL; +} + +unsigned AArch64DAGToDAGISel::getTBLOpc(bool IsExt, bool Is64Bit, + unsigned NumOfVec) { + assert(NumOfVec >= 1 && NumOfVec <= 4 && "VST NumVecs out-of-range"); + + unsigned Opc = 0; + switch (NumOfVec) { + default: + break; + case 1: + if (IsExt) + Opc = Is64Bit ? AArch64::TBX1_8b : AArch64::TBX1_16b; + else + Opc = Is64Bit ? AArch64::TBL1_8b : AArch64::TBL1_16b; + break; + case 2: + if (IsExt) + Opc = Is64Bit ? AArch64::TBX2_8b : AArch64::TBX2_16b; + else + Opc = Is64Bit ? AArch64::TBL2_8b : AArch64::TBL2_16b; + break; + case 3: + if (IsExt) + Opc = Is64Bit ? AArch64::TBX3_8b : AArch64::TBX3_16b; + else + Opc = Is64Bit ? AArch64::TBL3_8b : AArch64::TBL3_16b; + break; + case 4: + if (IsExt) + Opc = Is64Bit ? AArch64::TBX4_8b : AArch64::TBX4_16b; + else + Opc = Is64Bit ? AArch64::TBL4_8b : AArch64::TBL4_16b; + break; + } + + return Opc; +} + +SDNode *AArch64DAGToDAGISel::SelectVTBL(SDNode *N, unsigned NumVecs, + bool IsExt) { + assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); + SDLoc dl(N); + + // Check the element of look up table is 64-bit or not + unsigned Vec0Idx = IsExt ? 2 : 1; + assert(!N->getOperand(Vec0Idx + 0).getValueType().is64BitVector() && + "The element of lookup table for vtbl and vtbx must be 128-bit"); + + // Check the return value type is 64-bit or not + EVT ResVT = N->getValueType(0); + bool is64BitRes = ResVT.is64BitVector(); + + // Create new SDValue for vector list + SmallVector Regs(N->op_begin() + Vec0Idx, + N->op_begin() + Vec0Idx + NumVecs); + SDValue TblReg = createQTuple(Regs); + unsigned Opc = getTBLOpc(IsExt, is64BitRes, NumVecs); + + SmallVector Ops; + if (IsExt) + Ops.push_back(N->getOperand(1)); + Ops.push_back(TblReg); + Ops.push_back(N->getOperand(Vec0Idx + NumVecs)); + return CurDAG->getMachineNode(Opc, dl, ResVT, Ops); +} + SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { // Dump information about the Node being selected DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << "\n"); @@ -474,7 +1108,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { AArch64::ATOMIC_CMP_SWAP_I64); case ISD::FrameIndex: { int FI = cast(Node)->getIndex(); - EVT PtrTy = TLI.getPointerTy(); + EVT PtrTy = getTargetLowering()->getPointerTy(); SDValue TFI = CurDAG->getTargetFrameIndex(FI, PtrTy); return CurDAG->SelectNodeTo(Node, AArch64::ADDxxi_lsl0_s, PtrTy, TFI, CurDAG->getTargetConstant(0, PtrTy)); @@ -498,7 +1132,7 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { assert((Ty == MVT::i32 || Ty == MVT::i64) && "unexpected type"); uint16_t Register = Ty == MVT::i32 ? AArch64::WZR : AArch64::XZR; ResNode = CurDAG->getCopyFromReg(CurDAG->getEntryNode(), - Node->getDebugLoc(), + SDLoc(Node), Register, Ty).getNode(); } @@ -535,6 +1169,399 @@ SDNode *AArch64DAGToDAGISel::Select(SDNode *Node) { Node = ResNode; break; } + case AArch64ISD::NEON_LD1_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD1WB_8B_fixed, AArch64::LD1WB_4H_fixed, + AArch64::LD1WB_2S_fixed, AArch64::LD1WB_1D_fixed, + AArch64::LD1WB_16B_fixed, AArch64::LD1WB_8H_fixed, + AArch64::LD1WB_4S_fixed, AArch64::LD1WB_2D_fixed + }; + return SelectVLD(Node, true, 1, Opcodes); + } + case AArch64ISD::NEON_LD2_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD2WB_8B_fixed, AArch64::LD2WB_4H_fixed, + AArch64::LD2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, + AArch64::LD2WB_16B_fixed, AArch64::LD2WB_8H_fixed, + AArch64::LD2WB_4S_fixed, AArch64::LD2WB_2D_fixed + }; + return SelectVLD(Node, true, 2, Opcodes); + } + case AArch64ISD::NEON_LD3_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD3WB_8B_fixed, AArch64::LD3WB_4H_fixed, + AArch64::LD3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, + AArch64::LD3WB_16B_fixed, AArch64::LD3WB_8H_fixed, + AArch64::LD3WB_4S_fixed, AArch64::LD3WB_2D_fixed + }; + return SelectVLD(Node, true, 3, Opcodes); + } + case AArch64ISD::NEON_LD4_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD4WB_8B_fixed, AArch64::LD4WB_4H_fixed, + AArch64::LD4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, + AArch64::LD4WB_16B_fixed, AArch64::LD4WB_8H_fixed, + AArch64::LD4WB_4S_fixed, AArch64::LD4WB_2D_fixed + }; + return SelectVLD(Node, true, 4, Opcodes); + } + case AArch64ISD::NEON_LD1x2_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD1x2WB_8B_fixed, AArch64::LD1x2WB_4H_fixed, + AArch64::LD1x2WB_2S_fixed, AArch64::LD1x2WB_1D_fixed, + AArch64::LD1x2WB_16B_fixed, AArch64::LD1x2WB_8H_fixed, + AArch64::LD1x2WB_4S_fixed, AArch64::LD1x2WB_2D_fixed + }; + return SelectVLD(Node, true, 2, Opcodes); + } + case AArch64ISD::NEON_LD1x3_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD1x3WB_8B_fixed, AArch64::LD1x3WB_4H_fixed, + AArch64::LD1x3WB_2S_fixed, AArch64::LD1x3WB_1D_fixed, + AArch64::LD1x3WB_16B_fixed, AArch64::LD1x3WB_8H_fixed, + AArch64::LD1x3WB_4S_fixed, AArch64::LD1x3WB_2D_fixed + }; + return SelectVLD(Node, true, 3, Opcodes); + } + case AArch64ISD::NEON_LD1x4_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD1x4WB_8B_fixed, AArch64::LD1x4WB_4H_fixed, + AArch64::LD1x4WB_2S_fixed, AArch64::LD1x4WB_1D_fixed, + AArch64::LD1x4WB_16B_fixed, AArch64::LD1x4WB_8H_fixed, + AArch64::LD1x4WB_4S_fixed, AArch64::LD1x4WB_2D_fixed + }; + return SelectVLD(Node, true, 4, Opcodes); + } + case AArch64ISD::NEON_ST1_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST1WB_8B_fixed, AArch64::ST1WB_4H_fixed, + AArch64::ST1WB_2S_fixed, AArch64::ST1WB_1D_fixed, + AArch64::ST1WB_16B_fixed, AArch64::ST1WB_8H_fixed, + AArch64::ST1WB_4S_fixed, AArch64::ST1WB_2D_fixed + }; + return SelectVST(Node, true, 1, Opcodes); + } + case AArch64ISD::NEON_ST2_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST2WB_8B_fixed, AArch64::ST2WB_4H_fixed, + AArch64::ST2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, + AArch64::ST2WB_16B_fixed, AArch64::ST2WB_8H_fixed, + AArch64::ST2WB_4S_fixed, AArch64::ST2WB_2D_fixed + }; + return SelectVST(Node, true, 2, Opcodes); + } + case AArch64ISD::NEON_ST3_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST3WB_8B_fixed, AArch64::ST3WB_4H_fixed, + AArch64::ST3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, + AArch64::ST3WB_16B_fixed, AArch64::ST3WB_8H_fixed, + AArch64::ST3WB_4S_fixed, AArch64::ST3WB_2D_fixed + }; + return SelectVST(Node, true, 3, Opcodes); + } + case AArch64ISD::NEON_ST4_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST4WB_8B_fixed, AArch64::ST4WB_4H_fixed, + AArch64::ST4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, + AArch64::ST4WB_16B_fixed, AArch64::ST4WB_8H_fixed, + AArch64::ST4WB_4S_fixed, AArch64::ST4WB_2D_fixed + }; + return SelectVST(Node, true, 4, Opcodes); + } + case AArch64ISD::NEON_LD2DUP: { + static const uint16_t Opcodes[] = { + AArch64::LD2R_8B, AArch64::LD2R_4H, AArch64::LD2R_2S, + AArch64::LD2R_1D, AArch64::LD2R_16B, AArch64::LD2R_8H, + AArch64::LD2R_4S, AArch64::LD2R_2D + }; + return SelectVLDDup(Node, false, 2, Opcodes); + } + case AArch64ISD::NEON_LD3DUP: { + static const uint16_t Opcodes[] = { + AArch64::LD3R_8B, AArch64::LD3R_4H, AArch64::LD3R_2S, + AArch64::LD3R_1D, AArch64::LD3R_16B, AArch64::LD3R_8H, + AArch64::LD3R_4S, AArch64::LD3R_2D + }; + return SelectVLDDup(Node, false, 3, Opcodes); + } + case AArch64ISD::NEON_LD4DUP: { + static const uint16_t Opcodes[] = { + AArch64::LD4R_8B, AArch64::LD4R_4H, AArch64::LD4R_2S, + AArch64::LD4R_1D, AArch64::LD4R_16B, AArch64::LD4R_8H, + AArch64::LD4R_4S, AArch64::LD4R_2D + }; + return SelectVLDDup(Node, false, 4, Opcodes); + } + case AArch64ISD::NEON_LD2DUP_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD2R_WB_8B_fixed, AArch64::LD2R_WB_4H_fixed, + AArch64::LD2R_WB_2S_fixed, AArch64::LD2R_WB_1D_fixed, + AArch64::LD2R_WB_16B_fixed, AArch64::LD2R_WB_8H_fixed, + AArch64::LD2R_WB_4S_fixed, AArch64::LD2R_WB_2D_fixed + }; + return SelectVLDDup(Node, true, 2, Opcodes); + } + case AArch64ISD::NEON_LD3DUP_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD3R_WB_8B_fixed, AArch64::LD3R_WB_4H_fixed, + AArch64::LD3R_WB_2S_fixed, AArch64::LD3R_WB_1D_fixed, + AArch64::LD3R_WB_16B_fixed, AArch64::LD3R_WB_8H_fixed, + AArch64::LD3R_WB_4S_fixed, AArch64::LD3R_WB_2D_fixed + }; + return SelectVLDDup(Node, true, 3, Opcodes); + } + case AArch64ISD::NEON_LD4DUP_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD4R_WB_8B_fixed, AArch64::LD4R_WB_4H_fixed, + AArch64::LD4R_WB_2S_fixed, AArch64::LD4R_WB_1D_fixed, + AArch64::LD4R_WB_16B_fixed, AArch64::LD4R_WB_8H_fixed, + AArch64::LD4R_WB_4S_fixed, AArch64::LD4R_WB_2D_fixed + }; + return SelectVLDDup(Node, true, 4, Opcodes); + } + case AArch64ISD::NEON_LD2LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD2LN_WB_B_fixed, AArch64::LD2LN_WB_H_fixed, + AArch64::LD2LN_WB_S_fixed, AArch64::LD2LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, true, true, 2, Opcodes); + } + case AArch64ISD::NEON_LD3LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD3LN_WB_B_fixed, AArch64::LD3LN_WB_H_fixed, + AArch64::LD3LN_WB_S_fixed, AArch64::LD3LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, true, true, 3, Opcodes); + } + case AArch64ISD::NEON_LD4LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::LD4LN_WB_B_fixed, AArch64::LD4LN_WB_H_fixed, + AArch64::LD4LN_WB_S_fixed, AArch64::LD4LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, true, true, 4, Opcodes); + } + case AArch64ISD::NEON_ST2LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST2LN_WB_B_fixed, AArch64::ST2LN_WB_H_fixed, + AArch64::ST2LN_WB_S_fixed, AArch64::ST2LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, false, true, 2, Opcodes); + } + case AArch64ISD::NEON_ST3LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST3LN_WB_B_fixed, AArch64::ST3LN_WB_H_fixed, + AArch64::ST3LN_WB_S_fixed, AArch64::ST3LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, false, true, 3, Opcodes); + } + case AArch64ISD::NEON_ST4LN_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST4LN_WB_B_fixed, AArch64::ST4LN_WB_H_fixed, + AArch64::ST4LN_WB_S_fixed, AArch64::ST4LN_WB_D_fixed + }; + return SelectVLDSTLane(Node, false, true, 4, Opcodes); + } + case AArch64ISD::NEON_ST1x2_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST1x2WB_8B_fixed, AArch64::ST1x2WB_4H_fixed, + AArch64::ST1x2WB_2S_fixed, AArch64::ST1x2WB_1D_fixed, + AArch64::ST1x2WB_16B_fixed, AArch64::ST1x2WB_8H_fixed, + AArch64::ST1x2WB_4S_fixed, AArch64::ST1x2WB_2D_fixed + }; + return SelectVST(Node, true, 2, Opcodes); + } + case AArch64ISD::NEON_ST1x3_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST1x3WB_8B_fixed, AArch64::ST1x3WB_4H_fixed, + AArch64::ST1x3WB_2S_fixed, AArch64::ST1x3WB_1D_fixed, + AArch64::ST1x3WB_16B_fixed, AArch64::ST1x3WB_8H_fixed, + AArch64::ST1x3WB_4S_fixed, AArch64::ST1x3WB_2D_fixed + }; + return SelectVST(Node, true, 3, Opcodes); + } + case AArch64ISD::NEON_ST1x4_UPD: { + static const uint16_t Opcodes[] = { + AArch64::ST1x4WB_8B_fixed, AArch64::ST1x4WB_4H_fixed, + AArch64::ST1x4WB_2S_fixed, AArch64::ST1x4WB_1D_fixed, + AArch64::ST1x4WB_16B_fixed, AArch64::ST1x4WB_8H_fixed, + AArch64::ST1x4WB_4S_fixed, AArch64::ST1x4WB_2D_fixed + }; + return SelectVST(Node, true, 4, Opcodes); + } + case ISD::INTRINSIC_WO_CHAIN: { + unsigned IntNo = cast(Node->getOperand(0))->getZExtValue(); + bool IsExt = false; + switch (IntNo) { + default: + break; + case Intrinsic::aarch64_neon_vtbx1: + IsExt = true; + case Intrinsic::aarch64_neon_vtbl1: + return SelectVTBL(Node, 1, IsExt); + case Intrinsic::aarch64_neon_vtbx2: + IsExt = true; + case Intrinsic::aarch64_neon_vtbl2: + return SelectVTBL(Node, 2, IsExt); + case Intrinsic::aarch64_neon_vtbx3: + IsExt = true; + case Intrinsic::aarch64_neon_vtbl3: + return SelectVTBL(Node, 3, IsExt); + case Intrinsic::aarch64_neon_vtbx4: + IsExt = true; + case Intrinsic::aarch64_neon_vtbl4: + return SelectVTBL(Node, 4, IsExt); + } + break; + } + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: { + unsigned IntNo = cast(Node->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: + break; + case Intrinsic::arm_neon_vld1: { + static const uint16_t Opcodes[] = { + AArch64::LD1_8B, AArch64::LD1_4H, AArch64::LD1_2S, AArch64::LD1_1D, + AArch64::LD1_16B, AArch64::LD1_8H, AArch64::LD1_4S, AArch64::LD1_2D + }; + return SelectVLD(Node, false, 1, Opcodes); + } + case Intrinsic::arm_neon_vld2: { + static const uint16_t Opcodes[] = { + AArch64::LD2_8B, AArch64::LD2_4H, AArch64::LD2_2S, AArch64::LD1x2_1D, + AArch64::LD2_16B, AArch64::LD2_8H, AArch64::LD2_4S, AArch64::LD2_2D + }; + return SelectVLD(Node, false, 2, Opcodes); + } + case Intrinsic::arm_neon_vld3: { + static const uint16_t Opcodes[] = { + AArch64::LD3_8B, AArch64::LD3_4H, AArch64::LD3_2S, AArch64::LD1x3_1D, + AArch64::LD3_16B, AArch64::LD3_8H, AArch64::LD3_4S, AArch64::LD3_2D + }; + return SelectVLD(Node, false, 3, Opcodes); + } + case Intrinsic::arm_neon_vld4: { + static const uint16_t Opcodes[] = { + AArch64::LD4_8B, AArch64::LD4_4H, AArch64::LD4_2S, AArch64::LD1x4_1D, + AArch64::LD4_16B, AArch64::LD4_8H, AArch64::LD4_4S, AArch64::LD4_2D + }; + return SelectVLD(Node, false, 4, Opcodes); + } + case Intrinsic::aarch64_neon_vld1x2: { + static const uint16_t Opcodes[] = { + AArch64::LD1x2_8B, AArch64::LD1x2_4H, AArch64::LD1x2_2S, + AArch64::LD1x2_1D, AArch64::LD1x2_16B, AArch64::LD1x2_8H, + AArch64::LD1x2_4S, AArch64::LD1x2_2D + }; + return SelectVLD(Node, false, 2, Opcodes); + } + case Intrinsic::aarch64_neon_vld1x3: { + static const uint16_t Opcodes[] = { + AArch64::LD1x3_8B, AArch64::LD1x3_4H, AArch64::LD1x3_2S, + AArch64::LD1x3_1D, AArch64::LD1x3_16B, AArch64::LD1x3_8H, + AArch64::LD1x3_4S, AArch64::LD1x3_2D + }; + return SelectVLD(Node, false, 3, Opcodes); + } + case Intrinsic::aarch64_neon_vld1x4: { + static const uint16_t Opcodes[] = { + AArch64::LD1x4_8B, AArch64::LD1x4_4H, AArch64::LD1x4_2S, + AArch64::LD1x4_1D, AArch64::LD1x4_16B, AArch64::LD1x4_8H, + AArch64::LD1x4_4S, AArch64::LD1x4_2D + }; + return SelectVLD(Node, false, 4, Opcodes); + } + case Intrinsic::arm_neon_vst1: { + static const uint16_t Opcodes[] = { + AArch64::ST1_8B, AArch64::ST1_4H, AArch64::ST1_2S, AArch64::ST1_1D, + AArch64::ST1_16B, AArch64::ST1_8H, AArch64::ST1_4S, AArch64::ST1_2D + }; + return SelectVST(Node, false, 1, Opcodes); + } + case Intrinsic::arm_neon_vst2: { + static const uint16_t Opcodes[] = { + AArch64::ST2_8B, AArch64::ST2_4H, AArch64::ST2_2S, AArch64::ST1x2_1D, + AArch64::ST2_16B, AArch64::ST2_8H, AArch64::ST2_4S, AArch64::ST2_2D + }; + return SelectVST(Node, false, 2, Opcodes); + } + case Intrinsic::arm_neon_vst3: { + static const uint16_t Opcodes[] = { + AArch64::ST3_8B, AArch64::ST3_4H, AArch64::ST3_2S, AArch64::ST1x3_1D, + AArch64::ST3_16B, AArch64::ST3_8H, AArch64::ST3_4S, AArch64::ST3_2D + }; + return SelectVST(Node, false, 3, Opcodes); + } + case Intrinsic::arm_neon_vst4: { + static const uint16_t Opcodes[] = { + AArch64::ST4_8B, AArch64::ST4_4H, AArch64::ST4_2S, AArch64::ST1x4_1D, + AArch64::ST4_16B, AArch64::ST4_8H, AArch64::ST4_4S, AArch64::ST4_2D + }; + return SelectVST(Node, false, 4, Opcodes); + } + case Intrinsic::aarch64_neon_vst1x2: { + static const uint16_t Opcodes[] = { + AArch64::ST1x2_8B, AArch64::ST1x2_4H, AArch64::ST1x2_2S, + AArch64::ST1x2_1D, AArch64::ST1x2_16B, AArch64::ST1x2_8H, + AArch64::ST1x2_4S, AArch64::ST1x2_2D + }; + return SelectVST(Node, false, 2, Opcodes); + } + case Intrinsic::aarch64_neon_vst1x3: { + static const uint16_t Opcodes[] = { + AArch64::ST1x3_8B, AArch64::ST1x3_4H, AArch64::ST1x3_2S, + AArch64::ST1x3_1D, AArch64::ST1x3_16B, AArch64::ST1x3_8H, + AArch64::ST1x3_4S, AArch64::ST1x3_2D + }; + return SelectVST(Node, false, 3, Opcodes); + } + case Intrinsic::aarch64_neon_vst1x4: { + static const uint16_t Opcodes[] = { + AArch64::ST1x4_8B, AArch64::ST1x4_4H, AArch64::ST1x4_2S, + AArch64::ST1x4_1D, AArch64::ST1x4_16B, AArch64::ST1x4_8H, + AArch64::ST1x4_4S, AArch64::ST1x4_2D + }; + return SelectVST(Node, false, 4, Opcodes); + } + case Intrinsic::arm_neon_vld2lane: { + static const uint16_t Opcodes[] = { + AArch64::LD2LN_B, AArch64::LD2LN_H, AArch64::LD2LN_S, AArch64::LD2LN_D + }; + return SelectVLDSTLane(Node, true, false, 2, Opcodes); + } + case Intrinsic::arm_neon_vld3lane: { + static const uint16_t Opcodes[] = { + AArch64::LD3LN_B, AArch64::LD3LN_H, AArch64::LD3LN_S, AArch64::LD3LN_D + }; + return SelectVLDSTLane(Node, true, false, 3, Opcodes); + } + case Intrinsic::arm_neon_vld4lane: { + static const uint16_t Opcodes[] = { + AArch64::LD4LN_B, AArch64::LD4LN_H, AArch64::LD4LN_S, AArch64::LD4LN_D + }; + return SelectVLDSTLane(Node, true, false, 4, Opcodes); + } + case Intrinsic::arm_neon_vst2lane: { + static const uint16_t Opcodes[] = { + AArch64::ST2LN_B, AArch64::ST2LN_H, AArch64::ST2LN_S, AArch64::ST2LN_D + }; + return SelectVLDSTLane(Node, false, false, 2, Opcodes); + } + case Intrinsic::arm_neon_vst3lane: { + static const uint16_t Opcodes[] = { + AArch64::ST3LN_B, AArch64::ST3LN_H, AArch64::ST3LN_S, AArch64::ST3LN_D + }; + return SelectVLDSTLane(Node, false, false, 3, Opcodes); + } + case Intrinsic::arm_neon_vst4lane: { + static const uint16_t Opcodes[] = { + AArch64::ST4LN_B, AArch64::ST4LN_H, AArch64::ST4LN_S, AArch64::ST4LN_D + }; + return SelectVLDSTLane(Node, false, false, 4, Opcodes); + } + } // End of switch IntNo + break; + } // End of case ISD::INTRINSIC_VOID and :ISD::INTRINSIC_W_CHAIN default: break; // Let generic code handle it } diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp index 56f6751..4fdb667 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -39,12 +39,10 @@ static TargetLoweringObjectFile *createTLOF(AArch64TargetMachine &TM) { llvm_unreachable("unknown subtarget type"); } - AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) - : TargetLowering(TM, createTLOF(TM)), - Subtarget(&TM.getSubtarget()), - RegInfo(TM.getRegisterInfo()), - Itins(TM.getInstrItineraryData()) { + : TargetLowering(TM, createTLOF(TM)), Itins(TM.getInstrItineraryData()) { + + const AArch64Subtarget *Subtarget = &TM.getSubtarget(); // SIMD compares set the entire lane's bits to 1 setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); @@ -52,10 +50,34 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) // Scalar register <-> type mapping addRegisterClass(MVT::i32, &AArch64::GPR32RegClass); addRegisterClass(MVT::i64, &AArch64::GPR64RegClass); - addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); - addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); - addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); - addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); + + if (Subtarget->hasFPARMv8()) { + addRegisterClass(MVT::f16, &AArch64::FPR16RegClass); + addRegisterClass(MVT::f32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::f64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::f128, &AArch64::FPR128RegClass); + } + + if (Subtarget->hasNEON()) { + // And the vectors + addRegisterClass(MVT::v1i8, &AArch64::FPR8RegClass); + addRegisterClass(MVT::v1i16, &AArch64::FPR16RegClass); + addRegisterClass(MVT::v1i32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v1f32, &AArch64::FPR32RegClass); + addRegisterClass(MVT::v1f64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v8i8, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v4i16, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v2i32, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v1i64, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v2f32, &AArch64::FPR64RegClass); + addRegisterClass(MVT::v16i8, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v8i16, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v4i32, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v2i64, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v4f32, &AArch64::FPR128RegClass); + addRegisterClass(MVT::v2f64, &AArch64::FPR128RegClass); + } computeRegisterProperties(); @@ -64,6 +86,12 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setTargetDAGCombine(ISD::AND); setTargetDAGCombine(ISD::SRA); + setTargetDAGCombine(ISD::SRL); + setTargetDAGCombine(ISD::SHL); + + setTargetDAGCombine(ISD::INTRINSIC_WO_CHAIN); + setTargetDAGCombine(ISD::INTRINSIC_VOID); + setTargetDAGCombine(ISD::INTRINSIC_W_CHAIN); // AArch64 does not have i1 loads, or much of anything for i1 really. setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote); @@ -253,14 +281,97 @@ AArch64TargetLowering::AArch64TargetLowering(AArch64TargetMachine &TM) setTruncStoreAction(MVT::f64, MVT::f16, Expand); setTruncStoreAction(MVT::f32, MVT::f16, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i64, Expand); - setOperationAction(ISD::EHSELECTION, MVT::i64, Expand); - setExceptionPointerRegister(AArch64::X0); setExceptionSelectorRegister(AArch64::X1); + + if (Subtarget->hasNEON()) { + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i8, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i8, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v16i8, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v8i16, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4i32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1i64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2i64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1f32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v4f32, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v1f64, Custom); + setOperationAction(ISD::BUILD_VECTOR, MVT::v2f64, Custom); + + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v16i8, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i16, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1i64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2i64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4f32, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v1f64, Custom); + setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v2f64, Custom); + + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i8, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v8i16, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4i32, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2i64, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v4f32, Legal); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v2f64, Legal); + + setOperationAction(ISD::SETCC, MVT::v8i8, Custom); + setOperationAction(ISD::SETCC, MVT::v16i8, Custom); + setOperationAction(ISD::SETCC, MVT::v4i16, Custom); + setOperationAction(ISD::SETCC, MVT::v8i16, Custom); + setOperationAction(ISD::SETCC, MVT::v2i32, Custom); + setOperationAction(ISD::SETCC, MVT::v4i32, Custom); + setOperationAction(ISD::SETCC, MVT::v1i64, Custom); + setOperationAction(ISD::SETCC, MVT::v2i64, Custom); + setOperationAction(ISD::SETCC, MVT::v1f32, Custom); + setOperationAction(ISD::SETCC, MVT::v2f32, Custom); + setOperationAction(ISD::SETCC, MVT::v4f32, Custom); + setOperationAction(ISD::SETCC, MVT::v1f64, Custom); + setOperationAction(ISD::SETCC, MVT::v2f64, Custom); + + setOperationAction(ISD::FFLOOR, MVT::v2f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v4f32, Legal); + setOperationAction(ISD::FFLOOR, MVT::v1f64, Legal); + setOperationAction(ISD::FFLOOR, MVT::v2f64, Legal); + + setOperationAction(ISD::FCEIL, MVT::v2f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v4f32, Legal); + setOperationAction(ISD::FCEIL, MVT::v1f64, Legal); + setOperationAction(ISD::FCEIL, MVT::v2f64, Legal); + + setOperationAction(ISD::FTRUNC, MVT::v2f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v4f32, Legal); + setOperationAction(ISD::FTRUNC, MVT::v1f64, Legal); + setOperationAction(ISD::FTRUNC, MVT::v2f64, Legal); + + setOperationAction(ISD::FRINT, MVT::v2f32, Legal); + setOperationAction(ISD::FRINT, MVT::v4f32, Legal); + setOperationAction(ISD::FRINT, MVT::v1f64, Legal); + setOperationAction(ISD::FRINT, MVT::v2f64, Legal); + + setOperationAction(ISD::FNEARBYINT, MVT::v2f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v4f32, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v1f64, Legal); + setOperationAction(ISD::FNEARBYINT, MVT::v2f64, Legal); + + setOperationAction(ISD::FROUND, MVT::v2f32, Legal); + setOperationAction(ISD::FROUND, MVT::v4f32, Legal); + setOperationAction(ISD::FROUND, MVT::v1f64, Legal); + setOperationAction(ISD::FROUND, MVT::v2f64, Legal); + } } -EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const { +EVT AArch64TargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { // It's reasonably important that this value matches the "natural" legal // promotion from i1 for scalar types. Otherwise LegalizeTypes can get itself // in a twist (e.g. inserting an any_extend which then becomes i64 -> i64). @@ -271,16 +382,16 @@ EVT AArch64TargetLowering::getSetCCResultType(EVT VT) const { static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, unsigned &LdrOpc, unsigned &StrOpc) { - static unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword, - AArch64::LDXR_word, AArch64::LDXR_dword}; - static unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword, - AArch64::LDAXR_word, AArch64::LDAXR_dword}; - static unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword, - AArch64::STXR_word, AArch64::STXR_dword}; - static unsigned StoreRels[] = {AArch64::STLXR_byte, AArch64::STLXR_hword, - AArch64::STLXR_word, AArch64::STLXR_dword}; - - unsigned *LoadOps, *StoreOps; + static const unsigned LoadBares[] = {AArch64::LDXR_byte, AArch64::LDXR_hword, + AArch64::LDXR_word, AArch64::LDXR_dword}; + static const unsigned LoadAcqs[] = {AArch64::LDAXR_byte, AArch64::LDAXR_hword, + AArch64::LDAXR_word, AArch64::LDAXR_dword}; + static const unsigned StoreBares[] = {AArch64::STXR_byte, AArch64::STXR_hword, + AArch64::STXR_word, AArch64::STXR_dword}; + static const unsigned StoreRels[] = {AArch64::STLXR_byte,AArch64::STLXR_hword, + AArch64::STLXR_word, AArch64::STLXR_dword}; + + const unsigned *LoadOps, *StoreOps; if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) LoadOps = LoadAcqs; else @@ -298,6 +409,29 @@ static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, StrOpc = StoreOps[Log2_32(Size)]; } +// FIXME: AArch64::DTripleRegClass and AArch64::QTripleRegClass don't really +// have value type mapped, and they are both being defined as MVT::untyped. +// Without knowing the MVT type, MachineLICM::getRegisterClassIDAndCost +// would fail to figure out the register pressure correctly. +std::pair +AArch64TargetLowering::findRepresentativeClass(MVT VT) const{ + const TargetRegisterClass *RRC = 0; + uint8_t Cost = 1; + switch (VT.SimpleTy) { + default: + return TargetLowering::findRepresentativeClass(VT); + case MVT::v4i64: + RRC = &AArch64::QPairRegClass; + Cost = 2; + break; + case MVT::v8i64: + RRC = &AArch64::QQuadRegClass; + Cost = 4; + break; + } + return std::make_pair(RRC, Cost); +} + MachineBasicBlock * AArch64TargetLowering::emitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned Size, @@ -623,6 +757,12 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, MBB->addSuccessor(TrueBB); MBB->addSuccessor(EndBB); + if (!NZCVKilled) { + // NZCV is live-through TrueBB. + TrueBB->addLiveIn(AArch64::NZCV); + EndBB->addLiveIn(AArch64::NZCV); + } + // IfTrue: // str qIFTRUE, [sp] BuildMI(TrueBB, DL, TII->get(AArch64::LSFP128_STR)) @@ -637,8 +777,6 @@ AArch64TargetLowering::EmitF128CSEL(MachineInstr *MI, // Done: // ldr qDEST, [sp] // [... rest of incoming MBB ...] - if (!NZCVKilled) - EndBB->addLiveIn(AArch64::NZCV); MachineInstr *StartOfEnd = EndBB->begin(); BuildMI(*EndBB, StartOfEnd, DL, TII->get(AArch64::LSFP128_LDR), DestReg) .addFrameIndex(ScratchFI) @@ -784,7 +922,102 @@ const char *AArch64TargetLowering::getTargetNodeName(unsigned Opcode) const { case AArch64ISD::WrapperLarge: return "AArch64ISD::WrapperLarge"; case AArch64ISD::WrapperSmall: return "AArch64ISD::WrapperSmall"; - default: return NULL; + case AArch64ISD::NEON_BSL: + return "AArch64ISD::NEON_BSL"; + case AArch64ISD::NEON_MOVIMM: + return "AArch64ISD::NEON_MOVIMM"; + case AArch64ISD::NEON_MVNIMM: + return "AArch64ISD::NEON_MVNIMM"; + case AArch64ISD::NEON_FMOVIMM: + return "AArch64ISD::NEON_FMOVIMM"; + case AArch64ISD::NEON_CMP: + return "AArch64ISD::NEON_CMP"; + case AArch64ISD::NEON_CMPZ: + return "AArch64ISD::NEON_CMPZ"; + case AArch64ISD::NEON_TST: + return "AArch64ISD::NEON_TST"; + case AArch64ISD::NEON_QSHLs: + return "AArch64ISD::NEON_QSHLs"; + case AArch64ISD::NEON_QSHLu: + return "AArch64ISD::NEON_QSHLu"; + case AArch64ISD::NEON_VDUP: + return "AArch64ISD::NEON_VDUP"; + case AArch64ISD::NEON_VDUPLANE: + return "AArch64ISD::NEON_VDUPLANE"; + case AArch64ISD::NEON_REV16: + return "AArch64ISD::NEON_REV16"; + case AArch64ISD::NEON_REV32: + return "AArch64ISD::NEON_REV32"; + case AArch64ISD::NEON_REV64: + return "AArch64ISD::NEON_REV64"; + case AArch64ISD::NEON_UZP1: + return "AArch64ISD::NEON_UZP1"; + case AArch64ISD::NEON_UZP2: + return "AArch64ISD::NEON_UZP2"; + case AArch64ISD::NEON_ZIP1: + return "AArch64ISD::NEON_ZIP1"; + case AArch64ISD::NEON_ZIP2: + return "AArch64ISD::NEON_ZIP2"; + case AArch64ISD::NEON_TRN1: + return "AArch64ISD::NEON_TRN1"; + case AArch64ISD::NEON_TRN2: + return "AArch64ISD::NEON_TRN2"; + case AArch64ISD::NEON_LD1_UPD: + return "AArch64ISD::NEON_LD1_UPD"; + case AArch64ISD::NEON_LD2_UPD: + return "AArch64ISD::NEON_LD2_UPD"; + case AArch64ISD::NEON_LD3_UPD: + return "AArch64ISD::NEON_LD3_UPD"; + case AArch64ISD::NEON_LD4_UPD: + return "AArch64ISD::NEON_LD4_UPD"; + case AArch64ISD::NEON_ST1_UPD: + return "AArch64ISD::NEON_ST1_UPD"; + case AArch64ISD::NEON_ST2_UPD: + return "AArch64ISD::NEON_ST2_UPD"; + case AArch64ISD::NEON_ST3_UPD: + return "AArch64ISD::NEON_ST3_UPD"; + case AArch64ISD::NEON_ST4_UPD: + return "AArch64ISD::NEON_ST4_UPD"; + case AArch64ISD::NEON_LD1x2_UPD: + return "AArch64ISD::NEON_LD1x2_UPD"; + case AArch64ISD::NEON_LD1x3_UPD: + return "AArch64ISD::NEON_LD1x3_UPD"; + case AArch64ISD::NEON_LD1x4_UPD: + return "AArch64ISD::NEON_LD1x4_UPD"; + case AArch64ISD::NEON_ST1x2_UPD: + return "AArch64ISD::NEON_ST1x2_UPD"; + case AArch64ISD::NEON_ST1x3_UPD: + return "AArch64ISD::NEON_ST1x3_UPD"; + case AArch64ISD::NEON_ST1x4_UPD: + return "AArch64ISD::NEON_ST1x4_UPD"; + case AArch64ISD::NEON_LD2DUP: + return "AArch64ISD::NEON_LD2DUP"; + case AArch64ISD::NEON_LD3DUP: + return "AArch64ISD::NEON_LD3DUP"; + case AArch64ISD::NEON_LD4DUP: + return "AArch64ISD::NEON_LD4DUP"; + case AArch64ISD::NEON_LD2DUP_UPD: + return "AArch64ISD::NEON_LD2DUP_UPD"; + case AArch64ISD::NEON_LD3DUP_UPD: + return "AArch64ISD::NEON_LD3DUP_UPD"; + case AArch64ISD::NEON_LD4DUP_UPD: + return "AArch64ISD::NEON_LD4DUP_UPD"; + case AArch64ISD::NEON_LD2LN_UPD: + return "AArch64ISD::NEON_LD2LN_UPD"; + case AArch64ISD::NEON_LD3LN_UPD: + return "AArch64ISD::NEON_LD3LN_UPD"; + case AArch64ISD::NEON_LD4LN_UPD: + return "AArch64ISD::NEON_LD4LN_UPD"; + case AArch64ISD::NEON_ST2LN_UPD: + return "AArch64ISD::NEON_ST2LN_UPD"; + case AArch64ISD::NEON_ST3LN_UPD: + return "AArch64ISD::NEON_ST3LN_UPD"; + case AArch64ISD::NEON_ST4LN_UPD: + return "AArch64ISD::NEON_ST4LN_UPD"; + case AArch64ISD::NEON_VEXTRACT: + return "AArch64ISD::NEON_VEXTRACT"; + default: + return NULL; } } @@ -826,7 +1059,7 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForNode(CallingConv::ID CC) const { void AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc DL, SDValue &Chain) const { + SDLoc DL, SDValue &Chain) const { MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); AArch64MachineFunctionInfo *FuncInfo @@ -858,24 +1091,31 @@ AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, } } + if (getSubtarget()->hasFPARMv8()) { unsigned FPRSaveSize = 16 * (NumFPRArgRegs - FirstVariadicFPR); int FPRIdx = 0; - if (FPRSaveSize != 0) { - FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); - - SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); - - for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { - unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i], - &AArch64::FPR128RegClass); - SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); - SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, - MachinePointerInfo::getStack(i * 16), - false, false, 0); - MemOps.push_back(Store); - FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, - DAG.getConstant(16, getPointerTy())); + // According to the AArch64 Procedure Call Standard, section B.1/B.3, we + // can omit a register save area if we know we'll never use registers of + // that class. + if (FPRSaveSize != 0) { + FPRIdx = MFI->CreateStackObject(FPRSaveSize, 16, false); + + SDValue FIN = DAG.getFrameIndex(FPRIdx, getPointerTy()); + + for (unsigned i = FirstVariadicFPR; i < NumFPRArgRegs; ++i) { + unsigned VReg = MF.addLiveIn(AArch64FPRArgRegs[i], + &AArch64::FPR128RegClass); + SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::f128); + SDValue Store = DAG.getStore(Val.getValue(1), DL, Val, FIN, + MachinePointerInfo::getStack(i * 16), + false, false, 0); + MemOps.push_back(Store); + FIN = DAG.getNode(ISD::ADD, DL, getPointerTy(), FIN, + DAG.getConstant(16, getPointerTy())); + } } + FuncInfo->setVariadicFPRIdx(FPRIdx); + FuncInfo->setVariadicFPRSize(FPRSaveSize); } int StackIdx = MFI->CreateFixedObject(8, CCInfo.getNextStackOffset(), true); @@ -883,8 +1123,6 @@ AArch64TargetLowering::SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, FuncInfo->setVariadicStackIdx(StackIdx); FuncInfo->setVariadicGPRIdx(GPRIdx); FuncInfo->setVariadicGPRSize(GPRSaveSize); - FuncInfo->setVariadicFPRIdx(FPRIdx); - FuncInfo->setVariadicFPRSize(FPRSaveSize); if (!MemOps.empty()) { Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, &MemOps[0], @@ -897,7 +1135,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); AArch64MachineFunctionInfo *FuncInfo @@ -1012,7 +1250,7 @@ AArch64TargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { + SDLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector RVLocs; @@ -1085,10 +1323,10 @@ SDValue AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SDLoc &dl = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &IsTailCall = CLI.IsTailCall; @@ -1151,7 +1389,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, } if (!IsSibCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + dl); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, AArch64::XSP, getPointerTy()); @@ -1282,7 +1521,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, // in the correct location. if (IsTailCall && !IsSibCall) { Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag); + DAG.getIntPtrConstant(0, true), InFlag, dl); InFlag = Chain.getValue(1); } @@ -1336,7 +1575,7 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), DAG.getIntPtrConstant(CalleePopBytes, true), - InFlag); + InFlag, dl); InFlag = Chain.getValue(1); } @@ -1348,7 +1587,7 @@ SDValue AArch64TargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { // Assign locations to each value returned by this call. SmallVector RVLocs; @@ -1537,7 +1776,7 @@ SDValue AArch64TargetLowering::addTokenForArgument(SDValue Chain, } // Build a tokenfactor for all the chains. - return DAG.getNode(ISD::TokenFactor, Chain.getDebugLoc(), MVT::Other, + return DAG.getNode(ISD::TokenFactor, SDLoc(Chain), MVT::Other, &ArgChains[0], ArgChains.size()); } @@ -1570,7 +1809,7 @@ bool AArch64TargetLowering::isLegalICmpImmediate(int64_t Val) const { SDValue AArch64TargetLowering::getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &A64cc, - SelectionDAG &DAG, DebugLoc &dl) const { + SelectionDAG &DAG, SDLoc &dl) const { if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { int64_t C = 0; EVT VT = RHSC->getValueType(0); @@ -1663,7 +1902,7 @@ static A64CC::CondCodes FPCCToA64CC(ISD::CondCode CC, SDValue AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); EVT PtrVT = getPointerTy(); const BlockAddress *BA = cast(Op)->getBlockAddress(); @@ -1693,7 +1932,7 @@ AArch64TargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) const { // (BRCOND chain, val, dest) SDValue AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue Chain = Op.getOperand(0); SDValue TheBit = Op.getOperand(1); SDValue DestBB = Op.getOperand(2); @@ -1716,7 +1955,7 @@ AArch64TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) const { // (BR_CC chain, condcode, lhs, rhs, dest) SDValue AArch64TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue Chain = Op.getOperand(0); ISD::CondCode CC = cast(Op.getOperand(1))->get(); SDValue LHS = Op.getOperand(2); @@ -1802,7 +2041,7 @@ AArch64TargetLowering::LowerF128ToCall(SDValue Op, SelectionDAG &DAG, CallLoweringInfo CLI(InChain, RetTy, false, false, false, false, 0, getLibcallCallingConv(Call), isTailCall, /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, Op->getDebugLoc()); + Callee, Args, DAG, SDLoc(Op)); std::pair CallInfo = LowerCallTo(CLI); if (!CallInfo.second.getNode()) @@ -1824,7 +2063,7 @@ AArch64TargetLowering::LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const { SDValue SrcVal = Op.getOperand(0); return makeLibCall(DAG, LC, Op.getValueType(), &SrcVal, 1, - /*isSigned*/ false, Op.getDebugLoc()); + /*isSigned*/ false, SDLoc(Op)).first; } SDValue @@ -1854,6 +2093,45 @@ AArch64TargetLowering::LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, return LowerF128ToCall(Op, DAG, LC); } +SDValue AArch64TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ + MachineFunction &MF = DAG.getMachineFunction(); + MachineFrameInfo *MFI = MF.getFrameInfo(); + MFI->setReturnAddressIsTaken(true); + + EVT VT = Op.getValueType(); + SDLoc dl(Op); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + if (Depth) { + SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); + SDValue Offset = DAG.getConstant(8, MVT::i64); + return DAG.getLoad(VT, dl, DAG.getEntryNode(), + DAG.getNode(ISD::ADD, dl, VT, FrameAddr, Offset), + MachinePointerInfo(), false, false, false, 0); + } + + // Return X30, which contains the return address. Mark it an implicit live-in. + unsigned Reg = MF.addLiveIn(AArch64::X30, getRegClassFor(MVT::i64)); + return DAG.getCopyFromReg(DAG.getEntryNode(), dl, Reg, MVT::i64); +} + + +SDValue AArch64TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) + const { + MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo(); + MFI->setFrameAddressIsTaken(true); + + EVT VT = Op.getValueType(); + SDLoc dl(Op); + unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); + unsigned FrameReg = AArch64::X29; + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl, FrameReg, VT); + while (Depth--) + FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, + MachinePointerInfo(), + false, false, false, 0); + return FrameAddr; +} + SDValue AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const { @@ -1861,7 +2139,7 @@ AArch64TargetLowering::LowerGlobalAddressELFLarge(SDValue Op, assert(getTargetMachine().getRelocationModel() == Reloc::Static); EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); const GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); @@ -1885,7 +2163,7 @@ AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op, assert(getTargetMachine().getCodeModel() == CodeModel::Small); EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); const GlobalAddressSDNode *GN = cast(Op); const GlobalValue *GV = GN->getGlobal(); unsigned Alignment = GV->getAlignment(); @@ -1927,7 +2205,7 @@ AArch64TargetLowering::LowerGlobalAddressELFSmall(SDValue Op, } unsigned char HiFixup, LoFixup; - bool UseGOT = Subtarget->GVIsIndirectSymbol(GV, RelocM); + bool UseGOT = getSubtarget()->GVIsIndirectSymbol(GV, RelocM); if (UseGOT) { HiFixup = AArch64II::MO_GOT; @@ -1978,7 +2256,7 @@ AArch64TargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, - DebugLoc DL, + SDLoc DL, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); @@ -2023,7 +2301,7 @@ SDValue AArch64TargetLowering::LowerTLSDescCall(SDValue SymAddr, SDValue AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { - assert(Subtarget->isTargetELF() && + assert(getSubtarget()->isTargetELF() && "TLS not implemented for non-ELF targets"); assert(getTargetMachine().getCodeModel() == CodeModel::Small && "TLS only supported in small memory model"); @@ -2033,7 +2311,7 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, SDValue TPOff; EVT PtrVT = getPointerTy(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); const GlobalValue *GV = GA->getGlobal(); SDValue ThreadBase = DAG.getNode(AArch64ISD::THREAD_POINTER, DL, PtrVT); @@ -2054,7 +2332,7 @@ AArch64TargetLowering::LowerGlobalTLSAddress(SDValue Op, AArch64II::MO_TPREL_G0_NC); TPOff = SDValue(DAG.getMachineNode(AArch64::MOVZxii, DL, PtrVT, HiVar, - DAG.getTargetConstant(0, MVT::i32)), 0); + DAG.getTargetConstant(1, MVT::i32)), 0); TPOff = SDValue(DAG.getMachineNode(AArch64::MOVKxii, DL, PtrVT, TPOff, LoVar, DAG.getTargetConstant(0, MVT::i32)), 0); @@ -2134,7 +2412,7 @@ AArch64TargetLowering::LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, SDValue AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { JumpTableSDNode *JT = cast(Op); - DebugLoc dl = JT->getDebugLoc(); + SDLoc dl(JT); EVT PtrVT = getPointerTy(); // When compiling PIC, jump tables get put in the code section so a static @@ -2161,7 +2439,7 @@ AArch64TargetLowering::LowerJumpTable(SDValue Op, SelectionDAG &DAG) const { // (SELECT_CC lhs, rhs, iftrue, iffalse, condcode) SDValue AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); SDValue IfTrue = Op.getOperand(2); @@ -2217,7 +2495,7 @@ AArch64TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { // (SELECT testbit, iftrue, iffalse) SDValue AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue TheBit = Op.getOperand(0); SDValue IfTrue = Op.getOperand(1); SDValue IfFalse = Op.getOperand(2); @@ -2236,15 +2514,225 @@ AArch64TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { DAG.getConstant(A64CC::NE, MVT::i32)); } +static SDValue LowerVectorSETCC(SDValue Op, SelectionDAG &DAG) { + SDLoc DL(Op); + SDValue LHS = Op.getOperand(0); + SDValue RHS = Op.getOperand(1); + ISD::CondCode CC = cast(Op.getOperand(2))->get(); + EVT VT = Op.getValueType(); + bool Invert = false; + SDValue Op0, Op1; + unsigned Opcode; + + if (LHS.getValueType().isInteger()) { + + // Attempt to use Vector Integer Compare Mask Test instruction. + // TST = icmp ne (and (op0, op1), zero). + if (CC == ISD::SETNE) { + if (((LHS.getOpcode() == ISD::AND) && + ISD::isBuildVectorAllZeros(RHS.getNode())) || + ((RHS.getOpcode() == ISD::AND) && + ISD::isBuildVectorAllZeros(LHS.getNode()))) { + + SDValue AndOp = (LHS.getOpcode() == ISD::AND) ? LHS : RHS; + SDValue NewLHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(0)); + SDValue NewRHS = DAG.getNode(ISD::BITCAST, DL, VT, AndOp.getOperand(1)); + return DAG.getNode(AArch64ISD::NEON_TST, DL, VT, NewLHS, NewRHS); + } + } + + // Attempt to use Vector Integer Compare Mask against Zero instr (Signed). + // Note: Compare against Zero does not support unsigned predicates. + if ((ISD::isBuildVectorAllZeros(RHS.getNode()) || + ISD::isBuildVectorAllZeros(LHS.getNode())) && + !isUnsignedIntSetCC(CC)) { + + // If LHS is the zero value, swap operands and CondCode. + if (ISD::isBuildVectorAllZeros(LHS.getNode())) { + CC = getSetCCSwappedOperands(CC); + Op0 = RHS; + } else + Op0 = LHS; + + // Ensure valid CondCode for Compare Mask against Zero instruction: + // EQ, GE, GT, LE, LT. + if (ISD::SETNE == CC) { + Invert = true; + CC = ISD::SETEQ; + } + + // Using constant type to differentiate integer and FP compares with zero. + Op1 = DAG.getConstant(0, MVT::i32); + Opcode = AArch64ISD::NEON_CMPZ; + + } else { + // Attempt to use Vector Integer Compare Mask instr (Signed/Unsigned). + // Ensure valid CondCode for Compare Mask instr: EQ, GE, GT, UGE, UGT. + bool Swap = false; + switch (CC) { + default: + llvm_unreachable("Illegal integer comparison."); + case ISD::SETEQ: + case ISD::SETGT: + case ISD::SETGE: + case ISD::SETUGT: + case ISD::SETUGE: + break; + case ISD::SETNE: + Invert = true; + CC = ISD::SETEQ; + break; + case ISD::SETULT: + case ISD::SETULE: + case ISD::SETLT: + case ISD::SETLE: + Swap = true; + CC = getSetCCSwappedOperands(CC); + } + + if (Swap) + std::swap(LHS, RHS); + + Opcode = AArch64ISD::NEON_CMP; + Op0 = LHS; + Op1 = RHS; + } + + // Generate Compare Mask instr or Compare Mask against Zero instr. + SDValue NeonCmp = + DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC)); + + if (Invert) + NeonCmp = DAG.getNOT(DL, NeonCmp, VT); + + return NeonCmp; + } + + // Now handle Floating Point cases. + // Attempt to use Vector Floating Point Compare Mask against Zero instruction. + if (ISD::isBuildVectorAllZeros(RHS.getNode()) || + ISD::isBuildVectorAllZeros(LHS.getNode())) { + + // If LHS is the zero value, swap operands and CondCode. + if (ISD::isBuildVectorAllZeros(LHS.getNode())) { + CC = getSetCCSwappedOperands(CC); + Op0 = RHS; + } else + Op0 = LHS; + + // Using constant type to differentiate integer and FP compares with zero. + Op1 = DAG.getConstantFP(0, MVT::f32); + Opcode = AArch64ISD::NEON_CMPZ; + } else { + // Attempt to use Vector Floating Point Compare Mask instruction. + Op0 = LHS; + Op1 = RHS; + Opcode = AArch64ISD::NEON_CMP; + } + + SDValue NeonCmpAlt; + // Some register compares have to be implemented with swapped CC and operands, + // e.g.: OLT implemented as OGT with swapped operands. + bool SwapIfRegArgs = false; + + // Ensure valid CondCode for FP Compare Mask against Zero instruction: + // EQ, GE, GT, LE, LT. + // And ensure valid CondCode for FP Compare Mask instruction: EQ, GE, GT. + switch (CC) { + default: + llvm_unreachable("Illegal FP comparison"); + case ISD::SETUNE: + case ISD::SETNE: + Invert = true; // Fallthrough + case ISD::SETOEQ: + case ISD::SETEQ: + CC = ISD::SETEQ; + break; + case ISD::SETOLT: + case ISD::SETLT: + CC = ISD::SETLT; + SwapIfRegArgs = true; + break; + case ISD::SETOGT: + case ISD::SETGT: + CC = ISD::SETGT; + break; + case ISD::SETOLE: + case ISD::SETLE: + CC = ISD::SETLE; + SwapIfRegArgs = true; + break; + case ISD::SETOGE: + case ISD::SETGE: + CC = ISD::SETGE; + break; + case ISD::SETUGE: + Invert = true; + CC = ISD::SETLT; + SwapIfRegArgs = true; + break; + case ISD::SETULE: + Invert = true; + CC = ISD::SETGT; + break; + case ISD::SETUGT: + Invert = true; + CC = ISD::SETLE; + SwapIfRegArgs = true; + break; + case ISD::SETULT: + Invert = true; + CC = ISD::SETGE; + break; + case ISD::SETUEQ: + Invert = true; // Fallthrough + case ISD::SETONE: + // Expand this to (OGT |OLT). + NeonCmpAlt = + DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGT)); + CC = ISD::SETLT; + SwapIfRegArgs = true; + break; + case ISD::SETUO: + Invert = true; // Fallthrough + case ISD::SETO: + // Expand this to (OGE | OLT). + NeonCmpAlt = + DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(ISD::SETGE)); + CC = ISD::SETLT; + SwapIfRegArgs = true; + break; + } + + if (Opcode == AArch64ISD::NEON_CMP && SwapIfRegArgs) { + CC = getSetCCSwappedOperands(CC); + std::swap(Op0, Op1); + } + + // Generate FP Compare Mask instr or FP Compare Mask against Zero instr + SDValue NeonCmp = DAG.getNode(Opcode, DL, VT, Op0, Op1, DAG.getCondCode(CC)); + + if (NeonCmpAlt.getNode()) + NeonCmp = DAG.getNode(ISD::OR, DL, VT, NeonCmp, NeonCmpAlt); + + if (Invert) + NeonCmp = DAG.getNOT(DL, NeonCmp, VT); + + return NeonCmp; +} + // (SETCC lhs, rhs, condcode) SDValue AArch64TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue LHS = Op.getOperand(0); SDValue RHS = Op.getOperand(1); ISD::CondCode CC = cast(Op.getOperand(2))->get(); EVT VT = Op.getValueType(); + if (VT.isVector()) + return LowerVectorSETCC(Op, DAG); + if (LHS.getValueType() == MVT::f128) { // f128 comparisons will be lowered to libcalls giving a valid LHS and RHS // for the rest of the function (some i32 or i64 values). @@ -2298,7 +2786,7 @@ AArch64TargetLowering::LowerVACOPY(SDValue Op, SelectionDAG &DAG) const { // We have to make sure we copy the entire structure: 8+8+8+4+4 = 32 bytes // rather than just 8. - return DAG.getMemcpy(Op.getOperand(0), Op.getDebugLoc(), + return DAG.getMemcpy(Op.getOperand(0), SDLoc(Op), Op.getOperand(1), Op.getOperand(2), DAG.getConstant(32, MVT::i32), 8, false, false, MachinePointerInfo(DestSV), MachinePointerInfo(SrcSV)); @@ -2311,7 +2799,7 @@ AArch64TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { MachineFunction &MF = DAG.getMachineFunction(); AArch64MachineFunctionInfo *FuncInfo = MF.getInfo(); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Chain = Op.getOperand(0); SDValue VAList = Op.getOperand(1); @@ -2389,6 +2877,8 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG, false); case ISD::FP_ROUND: return LowerFP_ROUND(Op, DAG); case ISD::FP_EXTEND: return LowerFP_EXTEND(Op, DAG); + case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG); + case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG); case ISD::BlockAddress: return LowerBlockAddress(Op, DAG); case ISD::BRCOND: return LowerBRCOND(Op, DAG); @@ -2401,16 +2891,161 @@ AArch64TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SETCC: return LowerSETCC(Op, DAG); case ISD::VACOPY: return LowerVACOPY(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG); + case ISD::BUILD_VECTOR: + return LowerBUILD_VECTOR(Op, DAG, getSubtarget()); + case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG); } return SDValue(); } +/// Check if the specified splat value corresponds to a valid vector constant +/// for a Neon instruction with a "modified immediate" operand (e.g., MOVI). If +/// so, return the encoded 8-bit immediate and the OpCmode instruction fields +/// values. +static bool isNeonModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, + unsigned SplatBitSize, SelectionDAG &DAG, + bool is128Bits, NeonModImmType type, EVT &VT, + unsigned &Imm, unsigned &OpCmode) { + switch (SplatBitSize) { + default: + llvm_unreachable("unexpected size for isNeonModifiedImm"); + case 8: { + if (type != Neon_Mov_Imm) + return false; + assert((SplatBits & ~0xff) == 0 && "one byte splat value is too big"); + // Neon movi per byte: Op=0, Cmode=1110. + OpCmode = 0xe; + Imm = SplatBits; + VT = is128Bits ? MVT::v16i8 : MVT::v8i8; + break; + } + case 16: { + // Neon move inst per halfword + VT = is128Bits ? MVT::v8i16 : MVT::v4i16; + if ((SplatBits & ~0xff) == 0) { + // Value = 0x00nn is 0x00nn LSL 0 + // movi: Op=0, Cmode=1000; mvni: Op=1, Cmode=1000 + // bic: Op=1, Cmode=1001; orr: Op=0, Cmode=1001 + // Op=x, Cmode=100y + Imm = SplatBits; + OpCmode = 0x8; + break; + } + if ((SplatBits & ~0xff00) == 0) { + // Value = 0xnn00 is 0x00nn LSL 8 + // movi: Op=0, Cmode=1010; mvni: Op=1, Cmode=1010 + // bic: Op=1, Cmode=1011; orr: Op=0, Cmode=1011 + // Op=x, Cmode=101x + Imm = SplatBits >> 8; + OpCmode = 0xa; + break; + } + // can't handle any other + return false; + } + + case 32: { + // First the LSL variants (MSL is unusable by some interested instructions). + + // Neon move instr per word, shift zeros + VT = is128Bits ? MVT::v4i32 : MVT::v2i32; + if ((SplatBits & ~0xff) == 0) { + // Value = 0x000000nn is 0x000000nn LSL 0 + // movi: Op=0, Cmode= 0000; mvni: Op=1, Cmode= 0000 + // bic: Op=1, Cmode= 0001; orr: Op=0, Cmode= 0001 + // Op=x, Cmode=000x + Imm = SplatBits; + OpCmode = 0; + break; + } + if ((SplatBits & ~0xff00) == 0) { + // Value = 0x0000nn00 is 0x000000nn LSL 8 + // movi: Op=0, Cmode= 0010; mvni: Op=1, Cmode= 0010 + // bic: Op=1, Cmode= 0011; orr : Op=0, Cmode= 0011 + // Op=x, Cmode=001x + Imm = SplatBits >> 8; + OpCmode = 0x2; + break; + } + if ((SplatBits & ~0xff0000) == 0) { + // Value = 0x00nn0000 is 0x000000nn LSL 16 + // movi: Op=0, Cmode= 0100; mvni: Op=1, Cmode= 0100 + // bic: Op=1, Cmode= 0101; orr: Op=0, Cmode= 0101 + // Op=x, Cmode=010x + Imm = SplatBits >> 16; + OpCmode = 0x4; + break; + } + if ((SplatBits & ~0xff000000) == 0) { + // Value = 0xnn000000 is 0x000000nn LSL 24 + // movi: Op=0, Cmode= 0110; mvni: Op=1, Cmode= 0110 + // bic: Op=1, Cmode= 0111; orr: Op=0, Cmode= 0111 + // Op=x, Cmode=011x + Imm = SplatBits >> 24; + OpCmode = 0x6; + break; + } + + // Now the MSL immediates. + + // Neon move instr per word, shift ones + if ((SplatBits & ~0xffff) == 0 && + ((SplatBits | SplatUndef) & 0xff) == 0xff) { + // Value = 0x0000nnff is 0x000000nn MSL 8 + // movi: Op=0, Cmode= 1100; mvni: Op=1, Cmode= 1100 + // Op=x, Cmode=1100 + Imm = SplatBits >> 8; + OpCmode = 0xc; + break; + } + if ((SplatBits & ~0xffffff) == 0 && + ((SplatBits | SplatUndef) & 0xffff) == 0xffff) { + // Value = 0x00nnffff is 0x000000nn MSL 16 + // movi: Op=1, Cmode= 1101; mvni: Op=1, Cmode= 1101 + // Op=x, Cmode=1101 + Imm = SplatBits >> 16; + OpCmode = 0xd; + break; + } + // can't handle any other + return false; + } + + case 64: { + if (type != Neon_Mov_Imm) + return false; + // Neon move instr bytemask, where each byte is either 0x00 or 0xff. + // movi Op=1, Cmode=1110. + OpCmode = 0x1e; + uint64_t BitMask = 0xff; + uint64_t Val = 0; + unsigned ImmMask = 1; + Imm = 0; + for (int ByteNum = 0; ByteNum < 8; ++ByteNum) { + if (((SplatBits | SplatUndef) & BitMask) == BitMask) { + Val |= BitMask; + Imm |= ImmMask; + } else if ((SplatBits & BitMask) != 0) { + return false; + } + BitMask <<= 8; + ImmMask <<= 1; + } + SplatBits = Val; + VT = is128Bits ? MVT::v2i64 : MVT::v1i64; + break; + } + } + + return true; +} + static SDValue PerformANDCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT VT = N->getValueType(0); // We're looking for an SRA/SHL pair which form an SBFX. @@ -2448,7 +3083,7 @@ static SDValue PerformANDCombine(SDNode *N, /// a compatible SHL operation (unless they're already low). This function /// checks that condition and returns the least-significant bit that's /// intended. If the operation not a field preparation, -1 is returned. -static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT, +static int32_t getLSBForBFI(SelectionDAG &DAG, SDLoc DL, EVT VT, SDValue &MaskedVal, uint64_t Mask) { if (!isShiftedMask_64(Mask)) return -1; @@ -2464,7 +3099,7 @@ static int32_t getLSBForBFI(SelectionDAG &DAG, DebugLoc DL, EVT VT, // cases (e.g. bitfield to bitfield copy) may still need a real shift before // the BFI. - uint64_t LSB = CountTrailingZeros_64(Mask); + uint64_t LSB = countTrailingZeros(Mask); int64_t ShiftRightRequired = LSB; if (MaskedVal.getOpcode() == ISD::SHL && isa(MaskedVal.getOperand(1))) { @@ -2524,7 +3159,7 @@ static SDValue tryCombineToBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT VT = N->getValueType(0); assert(N->getOpcode() == ISD::OR && "Unexpected root"); @@ -2605,7 +3240,7 @@ static SDValue tryCombineToLargerBFI(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const AArch64Subtarget *Subtarget) { SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT VT = N->getValueType(0); // First job is to hunt for a MaskedBFI on either the left or right. Swap @@ -2687,7 +3322,7 @@ static bool findEXTRHalf(SDValue N, SDValue &Src, uint32_t &ShiftAmount, static SDValue tryCombineToEXTR(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT VT = N->getValueType(0); assert(N->getOpcode() == ISD::OR && "Unexpected root"); @@ -2731,6 +3366,7 @@ static SDValue PerformORCombine(SDNode *N, const AArch64Subtarget *Subtarget) { SelectionDAG &DAG = DCI.DAG; + SDLoc DL(N); EVT VT = N->getValueType(0); if(!DAG.getTargetLoweringInfo().isTypeLegal(VT)) @@ -2751,6 +3387,44 @@ static SDValue PerformORCombine(SDNode *N, if (Res.getNode()) return Res; + if (!Subtarget->hasNEON()) + return SDValue(); + + // Attempt to use vector immediate-form BSL + // (or (and B, A), (and C, ~A)) => (VBSL A, B, C) when A is a constant. + + SDValue N0 = N->getOperand(0); + if (N0.getOpcode() != ISD::AND) + return SDValue(); + + SDValue N1 = N->getOperand(1); + if (N1.getOpcode() != ISD::AND) + return SDValue(); + + if (VT.isVector() && DAG.getTargetLoweringInfo().isTypeLegal(VT)) { + APInt SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); + APInt SplatBits0; + if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, + HasAnyUndefs) && + !HasAnyUndefs) { + BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); + APInt SplatBits1; + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && + !HasAnyUndefs && SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v16i8 : MVT::v8i8; + SDValue Result = DAG.getNode(AArch64ISD::NEON_BSL, DL, CanonicalVT, + N0->getOperand(1), N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, DL, VT, Result); + } + } + } + return SDValue(); } @@ -2759,7 +3433,7 @@ static SDValue PerformSRACombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT VT = N->getValueType(0); // We're looking for an SRA/SHL pair which form an SBFX. @@ -2791,6 +3465,336 @@ static SDValue PerformSRACombine(SDNode *N, DAG.getConstant(LSB + Width - 1, MVT::i64)); } +/// Check if this is a valid build_vector for the immediate operand of +/// a vector shift operation, where all the elements of the build_vector +/// must have the same constant integer value. +static bool getVShiftImm(SDValue Op, unsigned ElementBits, int64_t &Cnt) { + // Ignore bit_converts. + while (Op.getOpcode() == ISD::BITCAST) + Op = Op.getOperand(0); + BuildVectorSDNode *BVN = dyn_cast(Op.getNode()); + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + if (!BVN || !BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, + HasAnyUndefs, ElementBits) || + SplatBitSize > ElementBits) + return false; + Cnt = SplatBits.getSExtValue(); + return true; +} + +/// Check if this is a valid build_vector for the immediate operand of +/// a vector shift left operation. That value must be in the range: +/// 0 <= Value < ElementBits +static bool isVShiftLImm(SDValue Op, EVT VT, int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (!getVShiftImm(Op, ElementBits, Cnt)) + return false; + return (Cnt >= 0 && Cnt < ElementBits); +} + +/// Check if this is a valid build_vector for the immediate operand of a +/// vector shift right operation. The value must be in the range: +/// 1 <= Value <= ElementBits +static bool isVShiftRImm(SDValue Op, EVT VT, int64_t &Cnt) { + assert(VT.isVector() && "vector shift count is not a vector type"); + unsigned ElementBits = VT.getVectorElementType().getSizeInBits(); + if (!getVShiftImm(Op, ElementBits, Cnt)) + return false; + return (Cnt >= 1 && Cnt <= ElementBits); +} + +/// Checks for immediate versions of vector shifts and lowers them. +static SDValue PerformShiftCombine(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI, + const AArch64Subtarget *ST) { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + if (N->getOpcode() == ISD::SRA && (VT == MVT::i32 || VT == MVT::i64)) + return PerformSRACombine(N, DCI); + + // Nothing to be done for scalar shifts. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!VT.isVector() || !TLI.isTypeLegal(VT)) + return SDValue(); + + assert(ST->hasNEON() && "unexpected vector shift"); + int64_t Cnt; + + switch (N->getOpcode()) { + default: + llvm_unreachable("unexpected shift opcode"); + + case ISD::SHL: + if (isVShiftLImm(N->getOperand(1), VT, Cnt)) { + SDValue RHS = + DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT, + DAG.getConstant(Cnt, MVT::i32)); + return DAG.getNode(ISD::SHL, SDLoc(N), VT, N->getOperand(0), RHS); + } + break; + + case ISD::SRA: + case ISD::SRL: + if (isVShiftRImm(N->getOperand(1), VT, Cnt)) { + SDValue RHS = + DAG.getNode(AArch64ISD::NEON_VDUP, SDLoc(N->getOperand(1)), VT, + DAG.getConstant(Cnt, MVT::i32)); + return DAG.getNode(N->getOpcode(), SDLoc(N), VT, N->getOperand(0), RHS); + } + break; + } + + return SDValue(); +} + +/// ARM-specific DAG combining for intrinsics. +static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { + unsigned IntNo = cast(N->getOperand(0))->getZExtValue(); + + switch (IntNo) { + default: + // Don't do anything for most intrinsics. + break; + + case Intrinsic::arm_neon_vqshifts: + case Intrinsic::arm_neon_vqshiftu: + EVT VT = N->getOperand(1).getValueType(); + int64_t Cnt; + if (!isVShiftLImm(N->getOperand(2), VT, Cnt)) + break; + unsigned VShiftOpc = (IntNo == Intrinsic::arm_neon_vqshifts) + ? AArch64ISD::NEON_QSHLs + : AArch64ISD::NEON_QSHLu; + return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), + N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); + } + + return SDValue(); +} + +/// Target-specific DAG combine function for NEON load/store intrinsics +/// to merge base address updates. +static SDValue CombineBaseUpdate(SDNode *N, + TargetLowering::DAGCombinerInfo &DCI) { + if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer()) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + bool isIntrinsic = (N->getOpcode() == ISD::INTRINSIC_VOID || + N->getOpcode() == ISD::INTRINSIC_W_CHAIN); + unsigned AddrOpIdx = (isIntrinsic ? 2 : 1); + SDValue Addr = N->getOperand(AddrOpIdx); + + // Search for a use of the address operand that is an increment. + for (SDNode::use_iterator UI = Addr.getNode()->use_begin(), + UE = Addr.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User->getOpcode() != ISD::ADD || + UI.getUse().getResNo() != Addr.getResNo()) + continue; + + // Check that the add is independent of the load/store. Otherwise, folding + // it would create a cycle. + if (User->isPredecessorOf(N) || N->isPredecessorOf(User)) + continue; + + // Find the new opcode for the updating load/store. + bool isLoad = true; + bool isLaneOp = false; + unsigned NewOpc = 0; + unsigned NumVecs = 0; + if (isIntrinsic) { + unsigned IntNo = cast(N->getOperand(1))->getZExtValue(); + switch (IntNo) { + default: llvm_unreachable("unexpected intrinsic for Neon base update"); + case Intrinsic::arm_neon_vld1: NewOpc = AArch64ISD::NEON_LD1_UPD; + NumVecs = 1; break; + case Intrinsic::arm_neon_vld2: NewOpc = AArch64ISD::NEON_LD2_UPD; + NumVecs = 2; break; + case Intrinsic::arm_neon_vld3: NewOpc = AArch64ISD::NEON_LD3_UPD; + NumVecs = 3; break; + case Intrinsic::arm_neon_vld4: NewOpc = AArch64ISD::NEON_LD4_UPD; + NumVecs = 4; break; + case Intrinsic::arm_neon_vst1: NewOpc = AArch64ISD::NEON_ST1_UPD; + NumVecs = 1; isLoad = false; break; + case Intrinsic::arm_neon_vst2: NewOpc = AArch64ISD::NEON_ST2_UPD; + NumVecs = 2; isLoad = false; break; + case Intrinsic::arm_neon_vst3: NewOpc = AArch64ISD::NEON_ST3_UPD; + NumVecs = 3; isLoad = false; break; + case Intrinsic::arm_neon_vst4: NewOpc = AArch64ISD::NEON_ST4_UPD; + NumVecs = 4; isLoad = false; break; + case Intrinsic::aarch64_neon_vld1x2: NewOpc = AArch64ISD::NEON_LD1x2_UPD; + NumVecs = 2; break; + case Intrinsic::aarch64_neon_vld1x3: NewOpc = AArch64ISD::NEON_LD1x3_UPD; + NumVecs = 3; break; + case Intrinsic::aarch64_neon_vld1x4: NewOpc = AArch64ISD::NEON_LD1x4_UPD; + NumVecs = 4; break; + case Intrinsic::aarch64_neon_vst1x2: NewOpc = AArch64ISD::NEON_ST1x2_UPD; + NumVecs = 2; isLoad = false; break; + case Intrinsic::aarch64_neon_vst1x3: NewOpc = AArch64ISD::NEON_ST1x3_UPD; + NumVecs = 3; isLoad = false; break; + case Intrinsic::aarch64_neon_vst1x4: NewOpc = AArch64ISD::NEON_ST1x4_UPD; + NumVecs = 4; isLoad = false; break; + case Intrinsic::arm_neon_vld2lane: NewOpc = AArch64ISD::NEON_LD2LN_UPD; + NumVecs = 2; isLaneOp = true; break; + case Intrinsic::arm_neon_vld3lane: NewOpc = AArch64ISD::NEON_LD3LN_UPD; + NumVecs = 3; isLaneOp = true; break; + case Intrinsic::arm_neon_vld4lane: NewOpc = AArch64ISD::NEON_LD4LN_UPD; + NumVecs = 4; isLaneOp = true; break; + case Intrinsic::arm_neon_vst2lane: NewOpc = AArch64ISD::NEON_ST2LN_UPD; + NumVecs = 2; isLoad = false; isLaneOp = true; break; + case Intrinsic::arm_neon_vst3lane: NewOpc = AArch64ISD::NEON_ST3LN_UPD; + NumVecs = 3; isLoad = false; isLaneOp = true; break; + case Intrinsic::arm_neon_vst4lane: NewOpc = AArch64ISD::NEON_ST4LN_UPD; + NumVecs = 4; isLoad = false; isLaneOp = true; break; + } + } else { + isLaneOp = true; + switch (N->getOpcode()) { + default: llvm_unreachable("unexpected opcode for Neon base update"); + case AArch64ISD::NEON_LD2DUP: NewOpc = AArch64ISD::NEON_LD2DUP_UPD; + NumVecs = 2; break; + case AArch64ISD::NEON_LD3DUP: NewOpc = AArch64ISD::NEON_LD3DUP_UPD; + NumVecs = 3; break; + case AArch64ISD::NEON_LD4DUP: NewOpc = AArch64ISD::NEON_LD4DUP_UPD; + NumVecs = 4; break; + } + } + + // Find the size of memory referenced by the load/store. + EVT VecTy; + if (isLoad) + VecTy = N->getValueType(0); + else + VecTy = N->getOperand(AddrOpIdx + 1).getValueType(); + unsigned NumBytes = NumVecs * VecTy.getSizeInBits() / 8; + if (isLaneOp) + NumBytes /= VecTy.getVectorNumElements(); + + // If the increment is a constant, it must match the memory ref size. + SDValue Inc = User->getOperand(User->getOperand(0) == Addr ? 1 : 0); + if (ConstantSDNode *CInc = dyn_cast(Inc.getNode())) { + uint32_t IncVal = CInc->getZExtValue(); + if (IncVal != NumBytes) + continue; + Inc = DAG.getTargetConstant(IncVal, MVT::i32); + } + + // Create the new updating load/store node. + EVT Tys[6]; + unsigned NumResultVecs = (isLoad ? NumVecs : 0); + unsigned n; + for (n = 0; n < NumResultVecs; ++n) + Tys[n] = VecTy; + Tys[n++] = MVT::i64; + Tys[n] = MVT::Other; + SDVTList SDTys = DAG.getVTList(Tys, NumResultVecs + 2); + SmallVector Ops; + Ops.push_back(N->getOperand(0)); // incoming chain + Ops.push_back(N->getOperand(AddrOpIdx)); + Ops.push_back(Inc); + for (unsigned i = AddrOpIdx + 1; i < N->getNumOperands(); ++i) { + Ops.push_back(N->getOperand(i)); + } + MemIntrinsicSDNode *MemInt = cast(N); + SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, + Ops.data(), Ops.size(), + MemInt->getMemoryVT(), + MemInt->getMemOperand()); + + // Update the uses. + std::vector NewResults; + for (unsigned i = 0; i < NumResultVecs; ++i) { + NewResults.push_back(SDValue(UpdN.getNode(), i)); + } + NewResults.push_back(SDValue(UpdN.getNode(), NumResultVecs + 1)); // chain + DCI.CombineTo(N, NewResults); + DCI.CombineTo(User, SDValue(UpdN.getNode(), NumResultVecs)); + + break; + } + return SDValue(); +} + +/// For a VDUPLANE node N, check if its source operand is a vldN-lane (N > 1) +/// intrinsic, and if all the other uses of that intrinsic are also VDUPLANEs. +/// If so, combine them to a vldN-dup operation and return true. +static SDValue CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + SelectionDAG &DAG = DCI.DAG; + EVT VT = N->getValueType(0); + + // Check if the VDUPLANE operand is a vldN-dup intrinsic. + SDNode *VLD = N->getOperand(0).getNode(); + if (VLD->getOpcode() != ISD::INTRINSIC_W_CHAIN) + return SDValue(); + unsigned NumVecs = 0; + unsigned NewOpc = 0; + unsigned IntNo = cast(VLD->getOperand(1))->getZExtValue(); + if (IntNo == Intrinsic::arm_neon_vld2lane) { + NumVecs = 2; + NewOpc = AArch64ISD::NEON_LD2DUP; + } else if (IntNo == Intrinsic::arm_neon_vld3lane) { + NumVecs = 3; + NewOpc = AArch64ISD::NEON_LD3DUP; + } else if (IntNo == Intrinsic::arm_neon_vld4lane) { + NumVecs = 4; + NewOpc = AArch64ISD::NEON_LD4DUP; + } else { + return SDValue(); + } + + // First check that all the vldN-lane uses are VDUPLANEs and that the lane + // numbers match the load. + unsigned VLDLaneNo = + cast(VLD->getOperand(NumVecs + 3))->getZExtValue(); + for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); + UI != UE; ++UI) { + // Ignore uses of the chain result. + if (UI.getUse().getResNo() == NumVecs) + continue; + SDNode *User = *UI; + if (User->getOpcode() != AArch64ISD::NEON_VDUPLANE || + VLDLaneNo != cast(User->getOperand(1))->getZExtValue()) + return SDValue(); + } + + // Create the vldN-dup node. + EVT Tys[5]; + unsigned n; + for (n = 0; n < NumVecs; ++n) + Tys[n] = VT; + Tys[n] = MVT::Other; + SDVTList SDTys = DAG.getVTList(Tys, NumVecs + 1); + SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; + MemIntrinsicSDNode *VLDMemInt = cast(VLD); + SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, 2, + VLDMemInt->getMemoryVT(), + VLDMemInt->getMemOperand()); + + // Update the uses. + for (SDNode::use_iterator UI = VLD->use_begin(), UE = VLD->use_end(); + UI != UE; ++UI) { + unsigned ResNo = UI.getUse().getResNo(); + // Ignore uses of the chain result. + if (ResNo == NumVecs) + continue; + SDNode *User = *UI; + DCI.CombineTo(User, SDValue(VLDDup.getNode(), ResNo)); + } + + // Now the vldN-lane intrinsic is dead except for its chain result. + // Update uses of the chain. + std::vector VLDDupResults; + for (unsigned n = 0; n < NumVecs; ++n) + VLDDupResults.push_back(SDValue(VLDDup.getNode(), n)); + VLDDupResults.push_back(SDValue(VLDDup.getNode(), NumVecs)); + DCI.CombineTo(VLD, VLDDupResults); + + return SDValue(N, 0); +} SDValue AArch64TargetLowering::PerformDAGCombine(SDNode *N, @@ -2798,12 +3802,578 @@ AArch64TargetLowering::PerformDAGCombine(SDNode *N, switch (N->getOpcode()) { default: break; case ISD::AND: return PerformANDCombine(N, DCI); - case ISD::OR: return PerformORCombine(N, DCI, Subtarget); - case ISD::SRA: return PerformSRACombine(N, DCI); + case ISD::OR: return PerformORCombine(N, DCI, getSubtarget()); + case ISD::SHL: + case ISD::SRA: + case ISD::SRL: + return PerformShiftCombine(N, DCI, getSubtarget()); + case ISD::INTRINSIC_WO_CHAIN: + return PerformIntrinsicCombine(N, DCI.DAG); + case AArch64ISD::NEON_VDUPLANE: + return CombineVLDDUP(N, DCI); + case AArch64ISD::NEON_LD2DUP: + case AArch64ISD::NEON_LD3DUP: + case AArch64ISD::NEON_LD4DUP: + return CombineBaseUpdate(N, DCI); + case ISD::INTRINSIC_VOID: + case ISD::INTRINSIC_W_CHAIN: + switch (cast(N->getOperand(1))->getZExtValue()) { + case Intrinsic::arm_neon_vld1: + case Intrinsic::arm_neon_vld2: + case Intrinsic::arm_neon_vld3: + case Intrinsic::arm_neon_vld4: + case Intrinsic::arm_neon_vst1: + case Intrinsic::arm_neon_vst2: + case Intrinsic::arm_neon_vst3: + case Intrinsic::arm_neon_vst4: + case Intrinsic::arm_neon_vld2lane: + case Intrinsic::arm_neon_vld3lane: + case Intrinsic::arm_neon_vld4lane: + case Intrinsic::aarch64_neon_vld1x2: + case Intrinsic::aarch64_neon_vld1x3: + case Intrinsic::aarch64_neon_vld1x4: + case Intrinsic::aarch64_neon_vst1x2: + case Intrinsic::aarch64_neon_vst1x3: + case Intrinsic::aarch64_neon_vst1x4: + case Intrinsic::arm_neon_vst2lane: + case Intrinsic::arm_neon_vst3lane: + case Intrinsic::arm_neon_vst4lane: + return CombineBaseUpdate(N, DCI); + default: + break; + } } return SDValue(); } +bool +AArch64TargetLowering::isFMAFasterThanFMulAndFAdd(EVT VT) const { + VT = VT.getScalarType(); + + if (!VT.isSimple()) + return false; + + switch (VT.getSimpleVT().SimpleTy) { + case MVT::f16: + case MVT::f32: + case MVT::f64: + return true; + case MVT::f128: + return false; + default: + break; + } + + return false; +} + +// Check whether a Build Vector could be presented as Shuffle Vector. If yes, +// try to call LowerVECTOR_SHUFFLE to lower it. +bool AArch64TargetLowering::isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, + SDValue &Res) const { + SDLoc DL(Op); + EVT VT = Op.getValueType(); + unsigned NumElts = VT.getVectorNumElements(); + unsigned V0NumElts = 0; + int Mask[16]; + SDValue V0, V1; + + // Check if all elements are extracted from less than 3 vectors. + for (unsigned i = 0; i < NumElts; ++i) { + SDValue Elt = Op.getOperand(i); + if (Elt.getOpcode() != ISD::EXTRACT_VECTOR_ELT) + return false; + + if (V0.getNode() == 0) { + V0 = Elt.getOperand(0); + V0NumElts = V0.getValueType().getVectorNumElements(); + } + if (Elt.getOperand(0) == V0) { + Mask[i] = (cast(Elt->getOperand(1))->getZExtValue()); + continue; + } else if (V1.getNode() == 0) { + V1 = Elt.getOperand(0); + } + if (Elt.getOperand(0) == V1) { + unsigned Lane = cast(Elt->getOperand(1))->getZExtValue(); + Mask[i] = (Lane + V0NumElts); + continue; + } else { + return false; + } + } + + if (!V1.getNode() && V0NumElts == NumElts * 2) { + V1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, + DAG.getConstant(NumElts, MVT::i64)); + V0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, VT, V0, + DAG.getConstant(0, MVT::i64)); + V0NumElts = V0.getValueType().getVectorNumElements(); + } + + if (V1.getNode() && NumElts == V0NumElts && + V0NumElts == V1.getValueType().getVectorNumElements()) { + SDValue Shuffle = DAG.getVectorShuffle(VT, DL, V0, V1, Mask); + Res = LowerVECTOR_SHUFFLE(Shuffle, DAG); + return true; + } else + return false; +} + +// If this is a case we can't handle, return null and let the default +// expansion code take care of it. +SDValue +AArch64TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const AArch64Subtarget *ST) const { + + BuildVectorSDNode *BVN = cast(Op.getNode()); + SDLoc DL(Op); + EVT VT = Op.getValueType(); + + APInt SplatBits, SplatUndef; + unsigned SplatBitSize; + bool HasAnyUndefs; + + unsigned UseNeonMov = VT.getSizeInBits() >= 64; + + // Note we favor lowering MOVI over MVNI. + // This has implications on the definition of patterns in TableGen to select + // BIC immediate instructions but not ORR immediate instructions. + // If this lowering order is changed, TableGen patterns for BIC immediate and + // ORR immediate instructions have to be updated. + if (UseNeonMov && + BVN->isConstantSplat(SplatBits, SplatUndef, SplatBitSize, HasAnyUndefs)) { + if (SplatBitSize <= 64) { + // First attempt to use vector immediate-form MOVI + EVT NeonMovVT; + unsigned Imm = 0; + unsigned OpCmode = 0; + + if (isNeonModifiedImm(SplatBits.getZExtValue(), SplatUndef.getZExtValue(), + SplatBitSize, DAG, VT.is128BitVector(), + Neon_Mov_Imm, NeonMovVT, Imm, OpCmode)) { + SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32); + SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32); + + if (ImmVal.getNode() && OpCmodeVal.getNode()) { + SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MOVIMM, DL, NeonMovVT, + ImmVal, OpCmodeVal); + return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov); + } + } + + // Then attempt to use vector immediate-form MVNI + uint64_t NegatedImm = (~SplatBits).getZExtValue(); + if (isNeonModifiedImm(NegatedImm, SplatUndef.getZExtValue(), SplatBitSize, + DAG, VT.is128BitVector(), Neon_Mvn_Imm, NeonMovVT, + Imm, OpCmode)) { + SDValue ImmVal = DAG.getTargetConstant(Imm, MVT::i32); + SDValue OpCmodeVal = DAG.getConstant(OpCmode, MVT::i32); + if (ImmVal.getNode() && OpCmodeVal.getNode()) { + SDValue NeonMov = DAG.getNode(AArch64ISD::NEON_MVNIMM, DL, NeonMovVT, + ImmVal, OpCmodeVal); + return DAG.getNode(ISD::BITCAST, DL, VT, NeonMov); + } + } + + // Attempt to use vector immediate-form FMOV + if (((VT == MVT::v2f32 || VT == MVT::v4f32) && SplatBitSize == 32) || + (VT == MVT::v2f64 && SplatBitSize == 64)) { + APFloat RealVal( + SplatBitSize == 32 ? APFloat::IEEEsingle : APFloat::IEEEdouble, + SplatBits); + uint32_t ImmVal; + if (A64Imms::isFPImm(RealVal, ImmVal)) { + SDValue Val = DAG.getTargetConstant(ImmVal, MVT::i32); + return DAG.getNode(AArch64ISD::NEON_FMOVIMM, DL, VT, Val); + } + } + } + } + + unsigned NumElts = VT.getVectorNumElements(); + bool isOnlyLowElement = true; + bool usesOnlyOneValue = true; + bool hasDominantValue = false; + bool isConstant = true; + + // Map of the number of times a particular SDValue appears in the + // element list. + DenseMap ValueCounts; + SDValue Value; + for (unsigned i = 0; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (i > 0) + isOnlyLowElement = false; + if (!isa(V) && !isa(V)) + isConstant = false; + + ValueCounts.insert(std::make_pair(V, 0)); + unsigned &Count = ValueCounts[V]; + + // Is this value dominant? (takes up more than half of the lanes) + if (++Count > (NumElts / 2)) { + hasDominantValue = true; + Value = V; + } + } + if (ValueCounts.size() != 1) + usesOnlyOneValue = false; + if (!Value.getNode() && ValueCounts.size() > 0) + Value = ValueCounts.begin()->first; + + if (ValueCounts.size() == 0) + return DAG.getUNDEF(VT); + + // Loads are better lowered with insert_vector_elt. + // Keep going if we are hitting this case. + if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) + return DAG.getNode(ISD::SCALAR_TO_VECTOR, DL, VT, Value); + + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (hasDominantValue && EltSize <= 64) { + // Use VDUP for non-constant splats. + if (!isConstant) { + SDValue N; + + // If we are DUPing a value that comes directly from a vector, we could + // just use DUPLANE. We can only do this if the lane being extracted + // is at a constant index, as the DUP from lane instructions only have + // constant-index forms. + if (Value->getOpcode() == ISD::EXTRACT_VECTOR_ELT && + isa(Value->getOperand(1))) { + N = DAG.getNode(AArch64ISD::NEON_VDUPLANE, DL, VT, + Value->getOperand(0), Value->getOperand(1)); + } else + N = DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value); + + if (!usesOnlyOneValue) { + // The dominant value was splatted as 'N', but we now have to insert + // all differing elements. + for (unsigned I = 0; I < NumElts; ++I) { + if (Op.getOperand(I) == Value) + continue; + SmallVector Ops; + Ops.push_back(N); + Ops.push_back(Op.getOperand(I)); + Ops.push_back(DAG.getConstant(I, MVT::i64)); + N = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, &Ops[0], 3); + } + } + return N; + } + if (usesOnlyOneValue && isConstant) { + return DAG.getNode(AArch64ISD::NEON_VDUP, DL, VT, Value); + } + } + // If all elements are constants and the case above didn't get hit, fall back + // to the default expansion, which will generate a load from the constant + // pool. + if (isConstant) + return SDValue(); + + // Try to lower this in lowering ShuffleVector way. + SDValue Shuf; + if (isKnownShuffleVector(Op, DAG, Shuf)) + return Shuf; + + // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we + // know the default expansion would otherwise fall back on something even + // worse. For a vector with one or two non-undef values, that's + // scalar_to_vector for the elements followed by a shuffle (provided the + // shuffle is valid for the target) and materialization element by element + // on the stack followed by a load for everything else. + if (!isConstant && !usesOnlyOneValue) { + SDValue Vec = DAG.getUNDEF(VT); + for (unsigned i = 0 ; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + SDValue LaneIdx = DAG.getConstant(i, MVT::i64); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, DL, VT, Vec, V, LaneIdx); + } + return Vec; + } + return SDValue(); +} + +/// isREVMask - Check if a vector shuffle corresponds to a REV +/// instruction with the specified blocksize. (The order of the elements +/// within each block of the vector is reversed.) +static bool isREVMask(ArrayRef M, EVT VT, unsigned BlockSize) { + assert((BlockSize == 16 || BlockSize == 32 || BlockSize == 64) && + "Only possible block sizes for REV are: 16, 32, 64"); + + unsigned EltSz = VT.getVectorElementType().getSizeInBits(); + if (EltSz == 64) + return false; + + unsigned NumElts = VT.getVectorNumElements(); + unsigned BlockElts = M[0] + 1; + // If the first shuffle index is UNDEF, be optimistic. + if (M[0] < 0) + BlockElts = BlockSize / EltSz; + + if (BlockSize <= EltSz || BlockSize != BlockElts * EltSz) + return false; + + for (unsigned i = 0; i < NumElts; ++i) { + if (M[i] < 0) + continue; // ignore UNDEF indices + if ((unsigned)M[i] != (i - i % BlockElts) + (BlockElts - 1 - i % BlockElts)) + return false; + } + + return true; +} + +// isPermuteMask - Check whether the vector shuffle matches to UZP, ZIP and +// TRN instruction. +static unsigned isPermuteMask(ArrayRef M, EVT VT) { + unsigned NumElts = VT.getVectorNumElements(); + if (NumElts < 4) + return 0; + + bool ismatch = true; + + // Check UZP1 + for (unsigned i = 0; i < NumElts; ++i) { + if ((unsigned)M[i] != i * 2) { + ismatch = false; + break; + } + } + if (ismatch) + return AArch64ISD::NEON_UZP1; + + // Check UZP2 + ismatch = true; + for (unsigned i = 0; i < NumElts; ++i) { + if ((unsigned)M[i] != i * 2 + 1) { + ismatch = false; + break; + } + } + if (ismatch) + return AArch64ISD::NEON_UZP2; + + // Check ZIP1 + ismatch = true; + for (unsigned i = 0; i < NumElts; ++i) { + if ((unsigned)M[i] != i / 2 + NumElts * (i % 2)) { + ismatch = false; + break; + } + } + if (ismatch) + return AArch64ISD::NEON_ZIP1; + + // Check ZIP2 + ismatch = true; + for (unsigned i = 0; i < NumElts; ++i) { + if ((unsigned)M[i] != (NumElts + i) / 2 + NumElts * (i % 2)) { + ismatch = false; + break; + } + } + if (ismatch) + return AArch64ISD::NEON_ZIP2; + + // Check TRN1 + ismatch = true; + for (unsigned i = 0; i < NumElts; ++i) { + if ((unsigned)M[i] != i + (NumElts - 1) * (i % 2)) { + ismatch = false; + break; + } + } + if (ismatch) + return AArch64ISD::NEON_TRN1; + + // Check TRN2 + ismatch = true; + for (unsigned i = 0; i < NumElts; ++i) { + if ((unsigned)M[i] != 1 + i + (NumElts - 1) * (i % 2)) { + ismatch = false; + break; + } + } + if (ismatch) + return AArch64ISD::NEON_TRN2; + + return 0; +} + +SDValue +AArch64TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, + SelectionDAG &DAG) const { + SDValue V1 = Op.getOperand(0); + SDValue V2 = Op.getOperand(1); + SDLoc dl(Op); + EVT VT = Op.getValueType(); + ShuffleVectorSDNode *SVN = cast(Op.getNode()); + + // Convert shuffles that are directly supported on NEON to target-specific + // DAG nodes, instead of keeping them as shuffles and matching them again + // during code selection. This is more efficient and avoids the possibility + // of inconsistencies between legalization and selection. + ArrayRef ShuffleMask = SVN->getMask(); + + unsigned EltSize = VT.getVectorElementType().getSizeInBits(); + if (EltSize > 64) + return SDValue(); + + if (isREVMask(ShuffleMask, VT, 64)) + return DAG.getNode(AArch64ISD::NEON_REV64, dl, VT, V1); + if (isREVMask(ShuffleMask, VT, 32)) + return DAG.getNode(AArch64ISD::NEON_REV32, dl, VT, V1); + if (isREVMask(ShuffleMask, VT, 16)) + return DAG.getNode(AArch64ISD::NEON_REV16, dl, VT, V1); + + unsigned ISDNo = isPermuteMask(ShuffleMask, VT); + if (ISDNo) + return DAG.getNode(ISDNo, dl, VT, V1, V2); + + // If the element of shuffle mask are all the same constant, we can + // transform it into either NEON_VDUP or NEON_VDUPLANE + if (ShuffleVectorSDNode::isSplatMask(&ShuffleMask[0], VT)) { + int Lane = SVN->getSplatIndex(); + // If this is undef splat, generate it via "just" vdup, if possible. + if (Lane == -1) Lane = 0; + + // Test if V1 is a SCALAR_TO_VECTOR. + if (V1.getOpcode() == ISD::SCALAR_TO_VECTOR) { + return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, V1.getOperand(0)); + } + // Test if V1 is a BUILD_VECTOR which is equivalent to a SCALAR_TO_VECTOR. + if (V1.getOpcode() == ISD::BUILD_VECTOR) { + bool IsScalarToVector = true; + for (unsigned i = 0, e = V1.getNumOperands(); i != e; ++i) + if (V1.getOperand(i).getOpcode() != ISD::UNDEF && + i != (unsigned)Lane) { + IsScalarToVector = false; + break; + } + if (IsScalarToVector) + return DAG.getNode(AArch64ISD::NEON_VDUP, dl, VT, + V1.getOperand(Lane)); + } + + // Test if V1 is a EXTRACT_SUBVECTOR. + if (V1.getOpcode() == ISD::EXTRACT_SUBVECTOR) { + int ExtLane = cast(V1.getOperand(1))->getZExtValue(); + return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1.getOperand(0), + DAG.getConstant(Lane + ExtLane, MVT::i64)); + } + // Test if V1 is a CONCAT_VECTORS. + if (V1.getOpcode() == ISD::CONCAT_VECTORS && + V1.getOperand(1).getOpcode() == ISD::UNDEF) { + SDValue Op0 = V1.getOperand(0); + assert((unsigned)Lane < Op0.getValueType().getVectorNumElements() && + "Invalid vector lane access"); + return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, Op0, + DAG.getConstant(Lane, MVT::i64)); + } + + return DAG.getNode(AArch64ISD::NEON_VDUPLANE, dl, VT, V1, + DAG.getConstant(Lane, MVT::i64)); + } + + int Length = ShuffleMask.size(); + int V1EltNum = V1.getValueType().getVectorNumElements(); + + // If the number of v1 elements is the same as the number of shuffle mask + // element and the shuffle masks are sequential values, we can transform + // it into NEON_VEXTRACT. + if (V1EltNum == Length) { + // Check if the shuffle mask is sequential. + bool IsSequential = true; + int CurMask = ShuffleMask[0]; + for (int I = 0; I < Length; ++I) { + if (ShuffleMask[I] != CurMask) { + IsSequential = false; + break; + } + CurMask++; + } + if (IsSequential) { + assert((EltSize % 8 == 0) && "Bitsize of vector element is incorrect"); + unsigned VecSize = EltSize * V1EltNum; + unsigned Index = (EltSize/8) * ShuffleMask[0]; + if (VecSize == 64 || VecSize == 128) + return DAG.getNode(AArch64ISD::NEON_VEXTRACT, dl, VT, V1, V2, + DAG.getConstant(Index, MVT::i64)); + } + } + + // For shuffle mask like "0, 1, 2, 3, 4, 5, 13, 7", try to generate insert + // by element from V2 to V1 . + // If shuffle mask is like "0, 1, 10, 11, 12, 13, 14, 15", V2 would be a + // better choice to be inserted than V1 as less insert needed, so we count + // element to be inserted for both V1 and V2, and select less one as insert + // target. + + // Collect elements need to be inserted and their index. + SmallVector NV1Elt; + SmallVector N1Index; + SmallVector NV2Elt; + SmallVector N2Index; + for (int I = 0; I != Length; ++I) { + if (ShuffleMask[I] != I) { + NV1Elt.push_back(ShuffleMask[I]); + N1Index.push_back(I); + } + } + for (int I = 0; I != Length; ++I) { + if (ShuffleMask[I] != (I + V1EltNum)) { + NV2Elt.push_back(ShuffleMask[I]); + N2Index.push_back(I); + } + } + + // Decide which to be inserted. If all lanes mismatch, neither V1 nor V2 + // will be inserted. + SDValue InsV = V1; + SmallVector InsMasks = NV1Elt; + SmallVector InsIndex = N1Index; + if ((int)NV1Elt.size() != Length || (int)NV2Elt.size() != Length) { + if (NV1Elt.size() > NV2Elt.size()) { + InsV = V2; + InsMasks = NV2Elt; + InsIndex = N2Index; + } + } else { + InsV = DAG.getNode(ISD::UNDEF, dl, VT); + } + + for (int I = 0, E = InsMasks.size(); I != E; ++I) { + SDValue ExtV = V1; + int Mask = InsMasks[I]; + if (Mask >= V1EltNum) { + ExtV = V2; + Mask -= V1EltNum; + } + // Any value type smaller than i32 is illegal in AArch64, and this lower + // function is called after legalize pass, so we need to legalize + // the result here. + EVT EltVT; + if (VT.getVectorElementType().isFloatingPoint()) + EltVT = (EltSize == 64) ? MVT::f64 : MVT::f32; + else + EltVT = (EltSize == 64) ? MVT::i64 : MVT::i32; + + if (Mask >= 0) { + ExtV = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, EltVT, ExtV, + DAG.getConstant(Mask, MVT::i64)); + InsV = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, InsV, ExtV, + DAG.getConstant(InsIndex[I], MVT::i64)); + } + } + return InsV; +} + AArch64TargetLowering::ConstraintType AArch64TargetLowering::getConstraintType(const std::string &Constraint) const { if (Constraint.size() == 1) { @@ -2899,7 +4469,7 @@ AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, case 'S': { // An absolute symbolic address or label reference. if (const GlobalAddressSDNode *GA = dyn_cast(Op)) { - Result = DAG.getTargetGlobalAddress(GA->getGlobal(), Op.getDebugLoc(), + Result = DAG.getTargetGlobalAddress(GA->getGlobal(), SDLoc(Op), GA->getValueType(0)); } else if (const BlockAddressSDNode *BA = dyn_cast(Op)) { @@ -2935,7 +4505,7 @@ AArch64TargetLowering::LowerAsmOperandForConstraint(SDValue Op, std::pair AArch64TargetLowering::getRegForInlineAsmConstraint( const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': @@ -2949,14 +4519,10 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( return std::make_pair(0U, &AArch64::FPR16RegClass); else if (VT == MVT::f32) return std::make_pair(0U, &AArch64::FPR32RegClass); - else if (VT == MVT::f64) - return std::make_pair(0U, &AArch64::FPR64RegClass); else if (VT.getSizeInBits() == 64) - return std::make_pair(0U, &AArch64::VPR64RegClass); - else if (VT == MVT::f128) - return std::make_pair(0U, &AArch64::FPR128RegClass); + return std::make_pair(0U, &AArch64::FPR64RegClass); else if (VT.getSizeInBits() == 128) - return std::make_pair(0U, &AArch64::VPR128RegClass); + return std::make_pair(0U, &AArch64::FPR128RegClass); break; } } @@ -2965,3 +4531,69 @@ AArch64TargetLowering::getRegForInlineAsmConstraint( // constraint into a member of a register class. return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT); } + +/// Represent NEON load and store intrinsics as MemIntrinsicNodes. +/// The associated MachineMemOperands record the alignment specified +/// in the intrinsic calls. +bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, + const CallInst &I, + unsigned Intrinsic) const { + switch (Intrinsic) { + case Intrinsic::arm_neon_vld1: + case Intrinsic::arm_neon_vld2: + case Intrinsic::arm_neon_vld3: + case Intrinsic::arm_neon_vld4: + case Intrinsic::aarch64_neon_vld1x2: + case Intrinsic::aarch64_neon_vld1x3: + case Intrinsic::aarch64_neon_vld1x4: + case Intrinsic::arm_neon_vld2lane: + case Intrinsic::arm_neon_vld3lane: + case Intrinsic::arm_neon_vld4lane: { + Info.opc = ISD::INTRINSIC_W_CHAIN; + // Conservatively set memVT to the entire set of vectors loaded. + uint64_t NumElts = getDataLayout()->getTypeAllocSize(I.getType()) / 8; + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); + Info.align = cast(AlignArg)->getZExtValue(); + Info.vol = false; // volatile loads with NEON intrinsics not supported + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::arm_neon_vst1: + case Intrinsic::arm_neon_vst2: + case Intrinsic::arm_neon_vst3: + case Intrinsic::arm_neon_vst4: + case Intrinsic::aarch64_neon_vst1x2: + case Intrinsic::aarch64_neon_vst1x3: + case Intrinsic::aarch64_neon_vst1x4: + case Intrinsic::arm_neon_vst2lane: + case Intrinsic::arm_neon_vst3lane: + case Intrinsic::arm_neon_vst4lane: { + Info.opc = ISD::INTRINSIC_VOID; + // Conservatively set memVT to the entire set of vectors stored. + unsigned NumElts = 0; + for (unsigned ArgI = 1, ArgE = I.getNumArgOperands(); ArgI < ArgE; ++ArgI) { + Type *ArgTy = I.getArgOperand(ArgI)->getType(); + if (!ArgTy->isVectorTy()) + break; + NumElts += getDataLayout()->getTypeAllocSize(ArgTy) / 8; + } + Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Value *AlignArg = I.getArgOperand(I.getNumArgOperands() - 1); + Info.align = cast(AlignArg)->getZExtValue(); + Info.vol = false; // volatile stores with NEON intrinsics not supported + Info.readMem = false; + Info.writeMem = true; + return true; + } + default: + break; + } + + return false; +} diff --git a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h index d49b3ee..8ad5a79 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64ISelLowering.h @@ -19,7 +19,7 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Target/TargetLowering.h" - +#include "llvm/IR/Intrinsics.h" namespace llvm { namespace AArch64ISD { @@ -111,7 +111,92 @@ namespace AArch64ISD { // created using the small memory model style: i.e. adrp/add or // adrp/mem-op. This exists to prevent bare TargetAddresses which may never // get selected. - WrapperSmall + WrapperSmall, + + // Vector bitwise select + NEON_BSL, + + // Vector move immediate + NEON_MOVIMM, + + // Vector Move Inverted Immediate + NEON_MVNIMM, + + // Vector FP move immediate + NEON_FMOVIMM, + + // Vector permute + NEON_UZP1, + NEON_UZP2, + NEON_ZIP1, + NEON_ZIP2, + NEON_TRN1, + NEON_TRN2, + + // Vector Element reverse + NEON_REV64, + NEON_REV32, + NEON_REV16, + + // Vector compare + NEON_CMP, + + // Vector compare zero + NEON_CMPZ, + + // Vector compare bitwise test + NEON_TST, + + // Vector saturating shift + NEON_QSHLs, + NEON_QSHLu, + + // Vector dup + NEON_VDUP, + + // Vector dup by lane + NEON_VDUPLANE, + + // Vector extract + NEON_VEXTRACT, + + // NEON duplicate lane loads + NEON_LD2DUP = ISD::FIRST_TARGET_MEMORY_OPCODE, + NEON_LD3DUP, + NEON_LD4DUP, + + // NEON loads with post-increment base updates: + NEON_LD1_UPD, + NEON_LD2_UPD, + NEON_LD3_UPD, + NEON_LD4_UPD, + NEON_LD1x2_UPD, + NEON_LD1x3_UPD, + NEON_LD1x4_UPD, + + // NEON stores with post-increment base updates: + NEON_ST1_UPD, + NEON_ST2_UPD, + NEON_ST3_UPD, + NEON_ST4_UPD, + NEON_ST1x2_UPD, + NEON_ST1x3_UPD, + NEON_ST1x4_UPD, + + // NEON duplicate lane loads with post-increment base updates: + NEON_LD2DUP_UPD, + NEON_LD3DUP_UPD, + NEON_LD4DUP_UPD, + + // NEON lane loads with post-increment base updates: + NEON_LD2LN_UPD, + NEON_LD3LN_UPD, + NEON_LD4LN_UPD, + + // NEON lane store with post-increment base updates: + NEON_ST2LN_UPD, + NEON_ST3LN_UPD, + NEON_ST4LN_UPD }; } @@ -130,14 +215,14 @@ public: SDValue LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; SDValue LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const; SDValue LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const; @@ -145,12 +230,18 @@ public: SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool IsVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; - void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc DL, SDValue &Chain) const; + bool isKnownShuffleVector(SDValue Op, SelectionDAG &DAG, SDValue &Res) const; + + SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, + const AArch64Subtarget *ST) const; + + SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) const; + void SaveVarArgRegisters(CCState &CCInfo, SelectionDAG &DAG, SDLoc DL, + SDValue &Chain) const; /// IsEligibleForTailCallOptimization - Check whether the call is eligible /// for tail call optimization. Targets which want to do tail call @@ -171,7 +262,7 @@ public: SDValue addTokenForArgument(SDValue Chain, SelectionDAG &DAG, MachineFrameInfo *MFI, int ClobberedFI) const; - EVT getSetCCResultType(EVT VT) const; + EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; bool DoesCalleeRestoreStack(CallingConv::ID CallCC, bool TailCallOpt) const; @@ -181,7 +272,7 @@ public: bool isLegalICmpImmediate(int64_t Val) const; SDValue getSelectableIntSetCC(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &A64cc, SelectionDAG &DAG, DebugLoc &dl) const; + SDValue &A64cc, SelectionDAG &DAG, SDLoc &dl) const; virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *MBB) const; @@ -211,12 +302,14 @@ public: SDValue LowerFP_EXTEND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_ROUND(SDValue Op, SelectionDAG &DAG) const; SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; + SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELFSmall(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELFLarge(SDValue Op, SelectionDAG &DAG) const; SDValue LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const; - SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, DebugLoc DL, + SDValue LowerTLSDescCall(SDValue SymAddr, SDValue DescAddr, SDLoc DL, SelectionDAG &DAG) const; SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const; SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG, bool IsSigned) const; @@ -229,11 +322,11 @@ public: virtual SDValue PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const; - /// isFMAFasterThanMulAndAdd - Return true if an FMA operation is faster than - /// a pair of mul and add instructions. fmuladd intrinsics will be expanded to - /// FMAs when this method returns true (and FMAs are legal), otherwise fmuladd - /// is expanded to mul + add. - virtual bool isFMAFasterThanMulAndAdd(EVT) const { return true; } + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster + /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be + /// expanded to FMAs when this method returns true, otherwise fmuladd is + /// expanded to fmul + fadd. + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const; ConstraintType getConstraintType(const std::string &Constraint) const; @@ -245,12 +338,30 @@ public: SelectionDAG &DAG) const; std::pair - getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const; + getRegForInlineAsmConstraint(const std::string &Constraint, MVT VT) const; + + virtual bool getTgtMemIntrinsic(IntrinsicInfo &Info, const CallInst &I, + unsigned Intrinsic) const LLVM_OVERRIDE; + +protected: + std::pair + findRepresentativeClass(MVT VT) const; + private: - const AArch64Subtarget *Subtarget; - const TargetRegisterInfo *RegInfo; const InstrItineraryData *Itins; + + const AArch64Subtarget *getSubtarget() const { + return &getTargetMachine().getSubtarget(); + } }; +enum NeonModImmType { + Neon_Mov_Imm, + Neon_Mvn_Imm +}; + +extern SDValue ScanBUILD_VECTOR(SDValue Op, bool &isOnlyLowElement, + bool &usesOnlyOneValue, bool &hasDominantValue, + bool &isConstant, bool &isUNDEF); } // namespace llvm #endif // LLVM_TARGET_AARCH64_ISELLOWERING_H diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td index 9dd122f..34f917c 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrFormats.td @@ -120,6 +120,14 @@ class A64InstRdnm patterns, InstrItinClass itin> + : A64InstRtn { + bits<5> Rm; + + let Inst{20-16} = Rm; +} + //===----------------------------------------------------------------------===// // // Actual A64 Instruction Formats @@ -383,6 +391,8 @@ class A64I_extract op, bit n, // Inherits Rd in 4-0 } +let Predicates = [HasFPARMv8] in { + // Format for floating-point compare instructions. class A64I_fpcmp type, bits<2> op, bits<5> opcode2, dag outs, dag ins, string asmstr, @@ -562,6 +572,8 @@ class A64I_fpimm type, bits<5> imm5, // Inherit Rd in 4-0 } +} + // Format for load-register (literal) instructions. class A64I_LDRlit opc, bit v, dag outs, dag ins, string asmstr, @@ -959,3 +971,519 @@ class A64I_Breg opc, bits<5> op2, bits<6> op3, bits<5> op4, let Inst{4-0} = op4; } + +//===----------------------------------------------------------------------===// +// +// Neon Instruction Format Definitions. +// + +let Predicates = [HasNEON] in { + +class NeonInstAlias + : InstAlias { +} + +// Format AdvSIMD bitwise extract +class NeonI_BitExtract op2, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm { + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29-24} = 0b101110; + let Inst{23-22} = op2; + let Inst{21} = 0b0; + // Inherit Rm in 20-16 + let Inst{15} = 0b0; + // imm4 in 14-11 + let Inst{10} = 0b0; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD perm +class NeonI_Perm size, bits<3> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm { + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29-24} = 0b001110; + let Inst{23-22} = size; + let Inst{21} = 0b0; + // Inherit Rm in 20-16 + let Inst{15} = 0b0; + let Inst{14-12} = opcode; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD table lookup +class NeonI_TBL op2, bits<2> len, bit op, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm { + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29-24} = 0b001110; + let Inst{23-22} = op2; + let Inst{21} = 0b0; + // Inherit Rm in 20-16 + let Inst{15} = 0b0; + let Inst{14-13} = len; + let Inst{12} = op; + let Inst{11-10} = 0b00; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD 3 vector registers with same vector type +class NeonI_3VSame size, bits<5> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm { + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = u; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-11} = opcode; + let Inst{10} = 0b1; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD 3 vector registers with different vector type +class NeonI_3VDiff size, bits<4> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm { + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = u; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = opcode; + let Inst{11} = 0b0; + let Inst{10} = 0b0; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD two registers and an element +class NeonI_2VElem size, bits<4> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm { + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = u; + let Inst{28-24} = 0b01111; + let Inst{23-22} = size; + // l in Inst{21} + // m in Inst{20} + // Inherit Rm in 19-16 + let Inst{15-12} = opcode; + // h in Inst{11} + let Inst{10} = 0b0; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD 1 vector register with modified immediate +class NeonI_1VModImm patterns, InstrItinClass itin> + : A64InstRd { + bits<8> Imm; + bits<4> cmode; + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = op; + let Inst{28-19} = 0b0111100000; + let Inst{15-12} = cmode; + let Inst{11} = 0b0; // o2 + let Inst{10} = 1; + // Inherit Rd in 4-0 + let Inst{18-16} = Imm{7-5}; // imm a:b:c + let Inst{9-5} = Imm{4-0}; // imm d:e:f:g:h +} + +// Format AdvSIMD 3 scalar registers with same type + +class NeonI_Scalar3Same size, bits<5> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm { + let Inst{31} = 0b0; + let Inst{30} = 0b1; + let Inst{29} = u; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-11} = opcode; + let Inst{10} = 0b1; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + + +// Format AdvSIMD 2 vector registers miscellaneous +class NeonI_2VMisc size, bits<5> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn { + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = u; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD 2 vector 1 immediate shift +class NeonI_2VShiftImm opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn { + bits<7> Imm; + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = u; + let Inst{28-23} = 0b011110; + let Inst{22-16} = Imm; + let Inst{15-11} = opcode; + let Inst{10} = 0b1; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD duplicate and insert +class NeonI_copy imm4, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn { + bits<5> Imm5; + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = op; + let Inst{28-21} = 0b01110000; + let Inst{20-16} = Imm5; + let Inst{15} = 0b0; + let Inst{14-11} = imm4; + let Inst{10} = 0b1; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} +// Format AdvSIMD insert from element to vector +class NeonI_insert patterns, InstrItinClass itin> + : A64InstRdn { + bits<5> Imm5; + bits<4> Imm4; + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = op; + let Inst{28-21} = 0b01110000; + let Inst{20-16} = Imm5; + let Inst{15} = 0b0; + let Inst{14-11} = Imm4; + let Inst{10} = 0b1; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD scalar pairwise +class NeonI_ScalarPair size, bits<5> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn { + let Inst{31} = 0b0; + let Inst{30} = 0b1; + let Inst{29} = u; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b11000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD 2 vector across lanes +class NeonI_2VAcross size, bits<5> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn +{ + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29} = u; + let Inst{28-24} = 0b01110; + let Inst{23-22} = size; + let Inst{21-17} = 0b11000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD scalar two registers miscellaneous +class NeonI_Scalar2SameMisc size, bits<5> opcode, dag outs, dag ins, + string asmstr, list patterns, InstrItinClass itin> + : A64InstRdn { + let Inst{31} = 0b0; + let Inst{30} = 0b1; + let Inst{29} = u; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10000; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD vector load/store multiple N-element structure +class NeonI_LdStMult opcode, bits<2> size, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtn +{ + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29-23} = 0b0011000; + let Inst{22} = l; + let Inst{21-16} = 0b000000; + let Inst{15-12} = opcode; + let Inst{11-10} = size; + + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format AdvSIMD vector load/store multiple N-element structure (post-index) +class NeonI_LdStMult_Post opcode, bits<2> size, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtnm +{ + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29-23} = 0b0011001; + let Inst{22} = l; + let Inst{21} = 0b0; + // Inherit Rm in 20-16 + let Inst{15-12} = opcode; + let Inst{11-10} = size; + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format AdvSIMD vector load Single N-element structure to all lanes +class NeonI_LdOne_Dup opcode, bits<2> size, dag outs, + dag ins, string asmstr, list patterns, + InstrItinClass itin> + : A64InstRtn +{ + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29-23} = 0b0011010; + let Inst{22} = 0b1; + let Inst{21} = r; + let Inst{20-16} = 0b00000; + let Inst{15-13} = opcode; + let Inst{12} = 0b0; + let Inst{11-10} = size; + + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format AdvSIMD vector load/store Single N-element structure to/from one lane +class NeonI_LdStOne_Lane op2_1, bit op0, dag outs, + dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtn +{ + bits<4> lane; + let Inst{31} = 0b0; + let Inst{29-23} = 0b0011010; + let Inst{22} = l; + let Inst{21} = r; + let Inst{20-16} = 0b00000; + let Inst{15-14} = op2_1; + let Inst{13} = op0; + + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format AdvSIMD post-index vector load Single N-element structure to all lanes +class NeonI_LdOne_Dup_Post opcode, bits<2> size, dag outs, + dag ins, string asmstr, list patterns, + InstrItinClass itin> + : A64InstRtnm +{ + let Inst{31} = 0b0; + let Inst{30} = q; + let Inst{29-23} = 0b0011011; + let Inst{22} = 0b1; + let Inst{21} = r; + // Inherit Rm in 20-16 + let Inst{15-13} = opcode; + let Inst{12} = 0b0; + let Inst{11-10} = size; + + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format AdvSIMD post-index vector load/store Single N-element structure +// to/from one lane +class NeonI_LdStOne_Lane_Post op2_1, bit op0, dag outs, + dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRtnm +{ + bits<4> lane; + let Inst{31} = 0b0; + let Inst{29-23} = 0b0011011; + let Inst{22} = l; + let Inst{21} = r; + // Inherit Rm in 20-16 + let Inst{15-14} = op2_1; + let Inst{13} = op0; + + // Inherit Rn in 9-5 + // Inherit Rt in 4-0 +} + +// Format AdvSIMD 3 scalar registers with different type + +class NeonI_Scalar3Diff size, bits<4> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm { + let Inst{31-30} = 0b01; + let Inst{29} = u; + let Inst{28-24} = 0b11110; + let Inst{23-22} = size; + let Inst{21} = 0b1; + // Inherit Rm in 20-16 + let Inst{15-12} = opcode; + let Inst{11-10} = 0b00; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD scalar shift by immediate + +class NeonI_ScalarShiftImm opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn { + bits<4> Imm4; + bits<3> Imm3; + let Inst{31-30} = 0b01; + let Inst{29} = u; + let Inst{28-23} = 0b111110; + let Inst{22-19} = Imm4; + let Inst{18-16} = Imm3; + let Inst{15-11} = opcode; + let Inst{10} = 0b1; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD crypto AES +class NeonI_Crypto_AES size, bits<5> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn { + let Inst{31-24} = 0b01001110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10100; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD crypto SHA +class NeonI_Crypto_SHA size, bits<5> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdn { + let Inst{31-24} = 0b01011110; + let Inst{23-22} = size; + let Inst{21-17} = 0b10100; + let Inst{16-12} = opcode; + let Inst{11-10} = 0b10; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD crypto 3V SHA +class NeonI_Crypto_3VSHA size, bits<3> opcode, + dag outs, dag ins, string asmstr, + list patterns, InstrItinClass itin> + : A64InstRdnm { + let Inst{31-24} = 0b01011110; + let Inst{23-22} = size; + let Inst{21} = 0b0; + // Inherit Rm in 20-16 + let Inst{15} = 0b0; + let Inst{14-12} = opcode; + let Inst{11-10} = 0b00; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} + +// Format AdvSIMD scalar x indexed element +class NeonI_ScalarXIndexedElem opcode, dag outs, dag ins, + string asmstr, list patterns, + InstrItinClass itin> + : A64InstRdnm +{ + let Inst{31} = 0b0; + let Inst{30} = 0b1; + let Inst{29} = u; + let Inst{28-24} = 0b11111; + let Inst{23} = szhi; + let Inst{22} = szlo; + // l in Inst{21} + // m in Instr{20} + // Inherit Rm in 19-16 + let Inst{15-12} = opcode; + // h in Inst{11} + let Inst{10} = 0b0; + // Inherit Rn in 9-5 + // Inherit Rd in 4-0 +} +// Format AdvSIMD scalar copy - insert from element to scalar +class NeonI_ScalarCopy patterns, InstrItinClass itin> + : NeonI_copy<0b1, 0b0, 0b0000, outs, ins, asmstr, patterns, itin> { + let Inst{28} = 0b1; +} +} + diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp index cf3a2c3..180110a 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.cpp @@ -29,14 +29,14 @@ #include -#define GET_INSTRINFO_CTOR +#define GET_INSTRINFO_CTOR_DTOR #include "AArch64GenInstrInfo.inc" using namespace llvm; AArch64InstrInfo::AArch64InstrInfo(const AArch64Subtarget &STI) : AArch64GenInstrInfo(AArch64::ADJCALLSTACKDOWN, AArch64::ADJCALLSTACKUP), - RI(*this, STI), Subtarget(STI) {} + Subtarget(STI) {} void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, DebugLoc DL, @@ -68,43 +68,71 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, BuildMI(MBB, I, DL, get(AArch64::MRSxi), DestReg) .addImm(A64SysReg::NZCV); } else if (AArch64::GPR64RegClass.contains(DestReg)) { - assert(AArch64::GPR64RegClass.contains(SrcReg)); - Opc = AArch64::ORRxxx_lsl; - ZeroReg = AArch64::XZR; + if(AArch64::GPR64RegClass.contains(SrcReg)){ + Opc = AArch64::ORRxxx_lsl; + ZeroReg = AArch64::XZR; + } else{ + assert(AArch64::FPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVxd), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::GPR32RegClass.contains(DestReg)) { - assert(AArch64::GPR32RegClass.contains(SrcReg)); - Opc = AArch64::ORRwww_lsl; - ZeroReg = AArch64::WZR; + if(AArch64::GPR32RegClass.contains(SrcReg)){ + Opc = AArch64::ORRwww_lsl; + ZeroReg = AArch64::WZR; + } else{ + assert(AArch64::FPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVws), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR32RegClass.contains(DestReg)) { - assert(AArch64::FPR32RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) - .addReg(SrcReg); - return; + if(AArch64::FPR32RegClass.contains(SrcReg)){ + BuildMI(MBB, I, DL, get(AArch64::FMOVss), DestReg) + .addReg(SrcReg); + return; + } + else { + assert(AArch64::GPR32RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVsw), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR64RegClass.contains(DestReg)) { - assert(AArch64::FPR64RegClass.contains(SrcReg)); - BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) - .addReg(SrcReg); - return; + if(AArch64::FPR64RegClass.contains(SrcReg)){ + BuildMI(MBB, I, DL, get(AArch64::FMOVdd), DestReg) + .addReg(SrcReg); + return; + } + else { + assert(AArch64::GPR64RegClass.contains(SrcReg)); + BuildMI(MBB, I, DL, get(AArch64::FMOVdx), DestReg) + .addReg(SrcReg); + return; + } } else if (AArch64::FPR128RegClass.contains(DestReg)) { assert(AArch64::FPR128RegClass.contains(SrcReg)); - // FIXME: there's no good way to do this, at least without NEON: - // + There's no single move instruction for q-registers - // + We can't create a spill slot and use normal STR/LDR because stack - // allocation has already happened - // + We can't go via X-registers with FMOV because register allocation has - // already happened. - // This may not be efficient, but at least it works. - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) - .addReg(SrcReg) - .addReg(AArch64::XSP) - .addImm(0x1ff & -16); - - BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) - .addReg(AArch64::XSP, RegState::Define) - .addReg(AArch64::XSP) - .addImm(16); - return; + // If NEON is enable, we use ORR to implement this copy. + // If NEON isn't available, emit STR and LDR to handle this. + if(getSubTarget().hasNEON()) { + BuildMI(MBB, I, DL, get(AArch64::ORRvvv_16B), DestReg) + .addReg(SrcReg) + .addReg(SrcReg); + return; + } else { + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PreInd_STR), AArch64::XSP) + .addReg(SrcReg) + .addReg(AArch64::XSP) + .addImm(0x1ff & -16); + + BuildMI(MBB, I, DL, get(AArch64::LSFP128_PostInd_LDR), DestReg) + .addReg(AArch64::XSP, RegState::Define) + .addReg(AArch64::XSP) + .addImm(16); + return; + } } else { llvm_unreachable("Unknown register class in copyPhysReg"); } @@ -116,17 +144,6 @@ void AArch64InstrInfo::copyPhysReg(MachineBasicBlock &MBB, .addImm(0); } -MachineInstr * -AArch64InstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, - uint64_t Offset, const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(AArch64::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0) - .addImm(Offset) - .addMetadata(MDPtr); - return &*MIB; -} - /// Does the Opcode represent a conditional branch that we can remove and re-add /// at the end of a basic block? static bool isCondBranch(unsigned Opc) { diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h index 22a2ab4..620ecc9 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.h @@ -43,10 +43,6 @@ public: unsigned DestReg, unsigned SrcReg, bool KillSrc) const; - MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, int FrameIx, - uint64_t Offset, const MDNode *MDPtr, - DebugLoc DL) const; - void storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned SrcReg, bool isKill, int FrameIndex, diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td index d2cfc7d..23d81fc 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrInfo.td @@ -11,6 +11,19 @@ // //===----------------------------------------------------------------------===// +//===----------------------------------------------------------------------===// +// ARM Instruction Predicate Definitions. +// +def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, + AssemblerPredicate<"FeatureFPARMv8", "fp-armv8">; +def HasNEON : Predicate<"Subtarget->hasNEON()">, + AssemblerPredicate<"FeatureNEON", "neon">; +def HasCrypto : Predicate<"Subtarget->hasCrypto()">, + AssemblerPredicate<"FeatureCrypto","crypto">; + +// Use fused MAC if more precision in FP computation is allowed. +def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast)">; include "AArch64InstrFormats.td" //===----------------------------------------------------------------------===// @@ -114,6 +127,8 @@ def A64Sbfx : SDNode<"AArch64ISD::SBFX", SDTA64BFX>; def A64Ubfx : SDNode<"AArch64ISD::UBFX", SDTA64BFX>; +class BinOpFrag : PatFrag<(ops node:$LHS, node:$RHS), res>; + //===----------------------------------------------------------------------===// // Call sequence pseudo-instructions //===----------------------------------------------------------------------===// @@ -1263,7 +1278,7 @@ def : Pat<(i64 (sext_inreg (anyext i32:$Rn), i1)), // UBFX makes sense as an implementation of a 64-bit zero-extension too. Could // use either 64-bit or 32-bit variant, but 32-bit might be more efficient. -def : Pat<(zext i32:$Rn), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31), +def : Pat<(i64 (zext i32:$Rn)), (SUBREG_TO_REG (i64 0), (UBFXwwii $Rn, 0, 31), sub_32)>; //===------------------------------- @@ -1967,6 +1982,13 @@ def fpz64 : Operand, let DecoderMethod = "DecodeFPZeroOperand"; } +def fpz64movi : Operand, + ComplexPattern { + let ParserMatchClass = fpzero_asmoperand; + let PrintMethod = "printFPZeroOperand"; + let DecoderMethod = "DecodeFPZeroOperand"; +} + multiclass A64I_fpcmpSignal type, bit imm, dag ins, dag pattern> { def _quiet : A64I_fpcmp<0b0, 0b0, type, 0b00, {0b0, imm, 0b0, 0b0, 0b0}, (outs), ins, "fcmp\t$Rn, $Rm", [pattern], @@ -2173,6 +2195,29 @@ def FMSUBdddd : A64I_fpdp3Impl<"fmsub", FPR64, f64, 0b01, 0b0, 0b1, fmsub>; def FNMADDdddd : A64I_fpdp3Impl<"fnmadd", FPR64, f64, 0b01, 0b1, 0b0, fnmadd>; def FNMSUBdddd : A64I_fpdp3Impl<"fnmsub", FPR64, f64, 0b01, 0b1, 0b1, fnmsub>; +// Extra patterns for when we're allowed to optimise separate multiplication and +// addition. +let Predicates = [HasFPARMv8, UseFusedMAC] in { +def : Pat<(f32 (fadd FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), + (FMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; +def : Pat<(f32 (fsub FPR32:$Ra, (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), + (FMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; +def : Pat<(f32 (fsub (f32 (fmul FPR32:$Rn, FPR32:$Rm)), FPR32:$Ra)), + (FNMADDssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; +def : Pat<(f32 (fsub (f32 (fneg FPR32:$Ra)), (f32 (fmul FPR32:$Rn, FPR32:$Rm)))), + (FNMSUBssss FPR32:$Rn, FPR32:$Rm, FPR32:$Ra)>; + +def : Pat<(f64 (fadd FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), + (FMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +def : Pat<(f64 (fsub FPR64:$Ra, (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), + (FMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +def : Pat<(f64 (fsub (f64 (fmul FPR64:$Rn, FPR64:$Rm)), FPR64:$Ra)), + (FNMADDdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +def : Pat<(f64 (fsub (f64 (fneg FPR64:$Ra)), (f64 (fmul FPR64:$Rn, FPR64:$Rm)))), + (FNMSUBdddd FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; +} + + //===----------------------------------------------------------------------===// // Floating-point <-> fixed-point conversion instructions //===----------------------------------------------------------------------===// @@ -2308,6 +2353,7 @@ defm FCVTM : A64I_fptointRM<0b10, 0b0, "fcvtm">; defm FCVTZ : A64I_fptointRM<0b11, 0b0, "fcvtz">; defm FCVTA : A64I_fptointRM<0b00, 0b1, "fcvta">; +let Predicates = [HasFPARMv8] in { def : Pat<(i32 (fp_to_sint f32:$Rn)), (FCVTZSws $Rn)>; def : Pat<(i64 (fp_to_sint f32:$Rn)), (FCVTZSxs $Rn)>; def : Pat<(i32 (fp_to_uint f32:$Rn)), (FCVTZUws $Rn)>; @@ -2316,6 +2362,7 @@ def : Pat<(i32 (fp_to_sint f64:$Rn)), (FCVTZSwd $Rn)>; def : Pat<(i64 (fp_to_sint f64:$Rn)), (FCVTZSxd $Rn)>; def : Pat<(i32 (fp_to_uint f64:$Rn)), (FCVTZUwd $Rn)>; def : Pat<(i64 (fp_to_uint f64:$Rn)), (FCVTZUxd $Rn)>; +} multiclass A64I_inttofp { def CVTFsw : A64I_fpintI<0b0, 0b00, 0b00, {0, 1, o0}, FPR32, GPR32, asmop>; @@ -2327,6 +2374,7 @@ multiclass A64I_inttofp { defm S : A64I_inttofp<0b0, "scvtf">; defm U : A64I_inttofp<0b1, "ucvtf">; +let Predicates = [HasFPARMv8] in { def : Pat<(f32 (sint_to_fp i32:$Rn)), (SCVTFsw $Rn)>; def : Pat<(f32 (sint_to_fp i64:$Rn)), (SCVTFsx $Rn)>; def : Pat<(f64 (sint_to_fp i32:$Rn)), (SCVTFdw $Rn)>; @@ -2335,16 +2383,19 @@ def : Pat<(f32 (uint_to_fp i32:$Rn)), (UCVTFsw $Rn)>; def : Pat<(f32 (uint_to_fp i64:$Rn)), (UCVTFsx $Rn)>; def : Pat<(f64 (uint_to_fp i32:$Rn)), (UCVTFdw $Rn)>; def : Pat<(f64 (uint_to_fp i64:$Rn)), (UCVTFdx $Rn)>; +} def FMOVws : A64I_fpintI<0b0, 0b00, 0b00, 0b110, GPR32, FPR32, "fmov">; def FMOVsw : A64I_fpintI<0b0, 0b00, 0b00, 0b111, FPR32, GPR32, "fmov">; def FMOVxd : A64I_fpintI<0b1, 0b01, 0b00, 0b110, GPR64, FPR64, "fmov">; def FMOVdx : A64I_fpintI<0b1, 0b01, 0b00, 0b111, FPR64, GPR64, "fmov">; +let Predicates = [HasFPARMv8] in { def : Pat<(i32 (bitconvert f32:$Rn)), (FMOVws $Rn)>; def : Pat<(f32 (bitconvert i32:$Rn)), (FMOVsw $Rn)>; def : Pat<(i64 (bitconvert f64:$Rn)), (FMOVxd $Rn)>; def : Pat<(f64 (bitconvert i64:$Rn)), (FMOVdx $Rn)>; +} def lane1_asmoperand : AsmOperandClass { let Name = "Lane1"; @@ -2367,11 +2418,13 @@ let DecoderMethod = "DecodeFMOVLaneInstruction" in { "fmov\t$Rd.d[$Lane], $Rn", [], NoItinerary>; } +let Predicates = [HasFPARMv8] in { def : InstAlias<"fmov $Rd, $Rn.2d[$Lane]", (FMOVxv GPR64:$Rd, VPR128:$Rn, lane1:$Lane), 0b0>; def : InstAlias<"fmov $Rd.2d[$Lane], $Rn", (FMOVvx VPR128:$Rd, GPR64:$Rn, lane1:$Lane), 0b0>; +} //===----------------------------------------------------------------------===// // Floating-point immediate instructions @@ -2465,11 +2518,15 @@ let mayLoad = 1 in { def LDRx_lit : A64I_LDRlitSimple<0b01, 0b0, GPR64>; } +let Predicates = [HasFPARMv8] in { def LDRs_lit : A64I_LDRlitSimple<0b00, 0b1, FPR32>; def LDRd_lit : A64I_LDRlitSimple<0b01, 0b1, FPR64>; +} let mayLoad = 1 in { + let Predicates = [HasFPARMv8] in { def LDRq_lit : A64I_LDRlitSimple<0b10, 0b1, FPR128>; + } def LDRSWx_lit : A64I_LDRlit<0b10, 0b0, @@ -3063,6 +3120,7 @@ defm LS32 defm LS64 : A64I_LDRSTR_unsigned<"LS64", 0b11, 0b0, 0b0, "", GPR64, dword_addrparams>; +let Predicates = [HasFPARMv8] in { // STR/LDR to/from a B register defm LSFP8 : A64I_LDRSTR_unsigned<"LSFP8", 0b00, 0b1, 0b0, "", FPR8, byte_addrparams>; @@ -3081,6 +3139,7 @@ defm LSFP64 defm LSFP128 : A64I_LDRSTR_unsigned<"LSFP128", 0b00, 0b1, 0b1, "", FPR128, qword_addrparams>; +} //===------------------------------ // 2.3 Signed loads @@ -3536,10 +3595,13 @@ multiclass A64I_LSPsimple opc, bit v, RegisterClass SomeReg, defm LSPair32 : A64I_LSPsimple<0b00, 0b0, GPR32, word_simm7, "LSPair32">; defm LSPair64 : A64I_LSPsimple<0b10, 0b0, GPR64, dword_simm7, "LSPair64">; + +let Predicates = [HasFPARMv8] in { defm LSFPPair32 : A64I_LSPsimple<0b00, 0b1, FPR32, word_simm7, "LSFPPair32">; defm LSFPPair64 : A64I_LSPsimple<0b01, 0b1, FPR64, dword_simm7, "LSFPPair64">; defm LSFPPair128 : A64I_LSPsimple<0b10, 0b1, FPR128, qword_simm7, "LSFPPair128">; +} def LDPSWx : A64I_LSPoffset<0b01, 0b0, 0b1, @@ -3974,14 +4036,17 @@ def : movalias; def : movalias; def : movalias; -def movw_addressref : ComplexPattern; +def movw_addressref_g0 : ComplexPattern">; +def movw_addressref_g1 : ComplexPattern">; +def movw_addressref_g2 : ComplexPattern">; +def movw_addressref_g3 : ComplexPattern">; -def : Pat<(A64WrapperLarge movw_addressref:$G3, movw_addressref:$G2, - movw_addressref:$G1, movw_addressref:$G0), - (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref:$G3), - movw_addressref:$G2), - movw_addressref:$G1), - movw_addressref:$G0)>; +def : Pat<(A64WrapperLarge movw_addressref_g3:$G3, movw_addressref_g2:$G2, + movw_addressref_g1:$G1, movw_addressref_g0:$G0), + (MOVKxii (MOVKxii (MOVKxii (MOVZxii movw_addressref_g3:$G3), + movw_addressref_g2:$G2), + movw_addressref_g1:$G1), + movw_addressref_g0:$G0)>; //===----------------------------------------------------------------------===// // PC-relative addressing instructions @@ -5120,3 +5185,9 @@ defm : regoff_pats<"Xm", (add i64:$Rn, i64:$Rm), defm : regoff_pats<"Xm", (add i64:$Rn, (shl i64:$Rm, SHIFT)), (i64 i64:$Rn), (i64 i64:$Rm), (i64 3)>; + +//===----------------------------------------------------------------------===// +// Advanced SIMD (NEON) Support +// + +include "AArch64InstrNEON.td" diff --git a/contrib/llvm/lib/Target/AArch64/AArch64InstrNEON.td b/contrib/llvm/lib/Target/AArch64/AArch64InstrNEON.td new file mode 100644 index 0000000..d71749d --- /dev/null +++ b/contrib/llvm/lib/Target/AArch64/AArch64InstrNEON.td @@ -0,0 +1,8671 @@ +//===-- AArch64InstrNEON.td - NEON support for AArch64 -----*- tablegen -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file describes the AArch64 NEON instruction set. +// +//===----------------------------------------------------------------------===// + +//===----------------------------------------------------------------------===// +// NEON-specific DAG Nodes. +//===----------------------------------------------------------------------===// +def Neon_bsl : SDNode<"AArch64ISD::NEON_BSL", SDTypeProfile<1, 3, + [SDTCisVec<0>, SDTCisSameAs<0, 1>, SDTCisSameAs<0, 2>, + SDTCisSameAs<0, 3>]>>; + +// (outs Result), (ins Imm, OpCmode) +def SDT_Neon_movi : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisVT<1, i32>]>; + +def Neon_movi : SDNode<"AArch64ISD::NEON_MOVIMM", SDT_Neon_movi>; + +def Neon_mvni : SDNode<"AArch64ISD::NEON_MVNIMM", SDT_Neon_movi>; + +// (outs Result), (ins Imm) +def Neon_fmovi : SDNode<"AArch64ISD::NEON_FMOVIMM", SDTypeProfile<1, 1, + [SDTCisVec<0>, SDTCisVT<1, i32>]>>; + +// (outs Result), (ins LHS, RHS, CondCode) +def Neon_cmp : SDNode<"AArch64ISD::NEON_CMP", SDTypeProfile<1, 3, + [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; + +// (outs Result), (ins LHS, 0/0.0 constant, CondCode) +def Neon_cmpz : SDNode<"AArch64ISD::NEON_CMPZ", SDTypeProfile<1, 3, + [SDTCisVec<0>, SDTCisVec<1>]>>; + +// (outs Result), (ins LHS, RHS) +def Neon_tst : SDNode<"AArch64ISD::NEON_TST", SDTypeProfile<1, 2, + [SDTCisVec<0>, SDTCisSameAs<1, 2>]>>; + +def SDTARMVSH : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisVT<2, i32>]>; +def Neon_sqrshlImm : SDNode<"AArch64ISD::NEON_QSHLs", SDTARMVSH>; +def Neon_uqrshlImm : SDNode<"AArch64ISD::NEON_QSHLu", SDTARMVSH>; + +def SDTPERMUTE : SDTypeProfile<1, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>]>; +def Neon_uzp1 : SDNode<"AArch64ISD::NEON_UZP1", SDTPERMUTE>; +def Neon_uzp2 : SDNode<"AArch64ISD::NEON_UZP2", SDTPERMUTE>; +def Neon_zip1 : SDNode<"AArch64ISD::NEON_ZIP1", SDTPERMUTE>; +def Neon_zip2 : SDNode<"AArch64ISD::NEON_ZIP2", SDTPERMUTE>; +def Neon_trn1 : SDNode<"AArch64ISD::NEON_TRN1", SDTPERMUTE>; +def Neon_trn2 : SDNode<"AArch64ISD::NEON_TRN2", SDTPERMUTE>; + +def SDTVSHUF : SDTypeProfile<1, 1, [SDTCisVec<0>, SDTCisSameAs<0, 1>]>; +def Neon_rev64 : SDNode<"AArch64ISD::NEON_REV64", SDTVSHUF>; +def Neon_rev32 : SDNode<"AArch64ISD::NEON_REV32", SDTVSHUF>; +def Neon_rev16 : SDNode<"AArch64ISD::NEON_REV16", SDTVSHUF>; +def Neon_vdup : SDNode<"AArch64ISD::NEON_VDUP", SDTypeProfile<1, 1, + [SDTCisVec<0>]>>; +def Neon_vduplane : SDNode<"AArch64ISD::NEON_VDUPLANE", SDTypeProfile<1, 2, + [SDTCisVec<0>, SDTCisVec<1>, SDTCisVT<2, i64>]>>; +def Neon_vextract : SDNode<"AArch64ISD::NEON_VEXTRACT", SDTypeProfile<1, 3, + [SDTCisVec<0>, SDTCisSameAs<0, 1>, + SDTCisSameAs<0, 2>, SDTCisVT<3, i64>]>>; + +def SDT_assertext : SDTypeProfile<1, 1, + [SDTCisInt<0>, SDTCisInt<1>, SDTCisSameAs<1, 0>]>; +def assertsext : SDNode<"ISD::AssertSext", SDT_assertext>; +def assertzext : SDNode<"ISD::AssertZext", SDT_assertext>; + +//===----------------------------------------------------------------------===// +// Multiclasses +//===----------------------------------------------------------------------===// + +multiclass NeonI_3VSame_B_sizes size, bits<5> opcode, + string asmop, SDPatternOperator opnode8B, + SDPatternOperator opnode16B, + bit Commutable = 0> { + let isCommutable = Commutable in { + def _8B : NeonI_3VSame<0b0, u, size, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), + asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", + [(set (v8i8 VPR64:$Rd), + (v8i8 (opnode8B (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], + NoItinerary>; + + def _16B : NeonI_3VSame<0b1, u, size, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", + [(set (v16i8 VPR128:$Rd), + (v16i8 (opnode16B (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], + NoItinerary>; + } + +} + +multiclass NeonI_3VSame_HS_sizes opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> { + let isCommutable = Commutable in { + def _4H : NeonI_3VSame<0b0, u, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), + asmop # "\t$Rd.4h, $Rn.4h, $Rm.4h", + [(set (v4i16 VPR64:$Rd), + (v4i16 (opnode (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))))], + NoItinerary>; + + def _8H : NeonI_3VSame<0b1, u, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd.8h, $Rn.8h, $Rm.8h", + [(set (v8i16 VPR128:$Rd), + (v8i16 (opnode (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))))], + NoItinerary>; + + def _2S : NeonI_3VSame<0b0, u, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), + asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", + [(set (v2i32 VPR64:$Rd), + (v2i32 (opnode (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))))], + NoItinerary>; + + def _4S : NeonI_3VSame<0b1, u, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", + [(set (v4i32 VPR128:$Rd), + (v4i32 (opnode (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))))], + NoItinerary>; + } +} +multiclass NeonI_3VSame_BHS_sizes opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> + : NeonI_3VSame_HS_sizes { + let isCommutable = Commutable in { + def _8B : NeonI_3VSame<0b0, u, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), + asmop # "\t$Rd.8b, $Rn.8b, $Rm.8b", + [(set (v8i8 VPR64:$Rd), + (v8i8 (opnode (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))))], + NoItinerary>; + + def _16B : NeonI_3VSame<0b1, u, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd.16b, $Rn.16b, $Rm.16b", + [(set (v16i8 VPR128:$Rd), + (v16i8 (opnode (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))))], + NoItinerary>; + } +} + +multiclass NeonI_3VSame_BHSD_sizes opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> + : NeonI_3VSame_BHS_sizes { + let isCommutable = Commutable in { + def _2D : NeonI_3VSame<0b1, u, 0b11, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", + [(set (v2i64 VPR128:$Rd), + (v2i64 (opnode (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))))], + NoItinerary>; + } +} + +// Multiclass NeonI_3VSame_SD_sizes: Operand types are floating point types, +// but Result types can be integer or floating point types. +multiclass NeonI_3VSame_SD_sizes opcode, + string asmop, SDPatternOperator opnode2S, + SDPatternOperator opnode4S, + SDPatternOperator opnode2D, + ValueType ResTy2S, ValueType ResTy4S, + ValueType ResTy2D, bit Commutable = 0> { + let isCommutable = Commutable in { + def _2S : NeonI_3VSame<0b0, u, {size, 0b0}, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn, VPR64:$Rm), + asmop # "\t$Rd.2s, $Rn.2s, $Rm.2s", + [(set (ResTy2S VPR64:$Rd), + (ResTy2S (opnode2S (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))))], + NoItinerary>; + + def _4S : NeonI_3VSame<0b1, u, {size, 0b0}, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd.4s, $Rn.4s, $Rm.4s", + [(set (ResTy4S VPR128:$Rd), + (ResTy4S (opnode4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))))], + NoItinerary>; + + def _2D : NeonI_3VSame<0b1, u, {size, 0b1}, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd.2d, $Rn.2d, $Rm.2d", + [(set (ResTy2D VPR128:$Rd), + (ResTy2D (opnode2D (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))))], + NoItinerary>; + } +} + +//===----------------------------------------------------------------------===// +// Instruction Definitions +//===----------------------------------------------------------------------===// + +// Vector Arithmetic Instructions + +// Vector Add (Integer and Floating-Point) + +defm ADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b10000, "add", add, 1>; +defm FADDvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11010, "fadd", fadd, fadd, fadd, + v2f32, v4f32, v2f64, 1>; + +// Vector Sub (Integer and Floating-Point) + +defm SUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10000, "sub", sub, 0>; +defm FSUBvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11010, "fsub", fsub, fsub, fsub, + v2f32, v4f32, v2f64, 0>; + +// Vector Multiply (Integer and Floating-Point) + +defm MULvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10011, "mul", mul, 1>; +defm FMULvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11011, "fmul", fmul, fmul, fmul, + v2f32, v4f32, v2f64, 1>; + +// Vector Multiply (Polynomial) + +defm PMULvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b10011, "pmul", + int_arm_neon_vmulp, int_arm_neon_vmulp, 1>; + +// Vector Multiply-accumulate and Multiply-subtract (Integer) + +// class NeonI_3VSame_Constraint_impl: NeonI_3VSame with no data type and +// two operands constraints. +class NeonI_3VSame_Constraint_impl size, + bits<5> opcode, SDPatternOperator opnode> + : NeonI_3VSame { + let Constraints = "$src = $Rd"; +} + +def Neon_mla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), + (add node:$Ra, (mul node:$Rn, node:$Rm))>; + +def Neon_mls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), + (sub node:$Ra, (mul node:$Rn, node:$Rm))>; + + +def MLAvvv_8B: NeonI_3VSame_Constraint_impl<"mla", ".8b", VPR64, v8i8, + 0b0, 0b0, 0b00, 0b10010, Neon_mla>; +def MLAvvv_16B: NeonI_3VSame_Constraint_impl<"mla", ".16b", VPR128, v16i8, + 0b1, 0b0, 0b00, 0b10010, Neon_mla>; +def MLAvvv_4H: NeonI_3VSame_Constraint_impl<"mla", ".4h", VPR64, v4i16, + 0b0, 0b0, 0b01, 0b10010, Neon_mla>; +def MLAvvv_8H: NeonI_3VSame_Constraint_impl<"mla", ".8h", VPR128, v8i16, + 0b1, 0b0, 0b01, 0b10010, Neon_mla>; +def MLAvvv_2S: NeonI_3VSame_Constraint_impl<"mla", ".2s", VPR64, v2i32, + 0b0, 0b0, 0b10, 0b10010, Neon_mla>; +def MLAvvv_4S: NeonI_3VSame_Constraint_impl<"mla", ".4s", VPR128, v4i32, + 0b1, 0b0, 0b10, 0b10010, Neon_mla>; + +def MLSvvv_8B: NeonI_3VSame_Constraint_impl<"mls", ".8b", VPR64, v8i8, + 0b0, 0b1, 0b00, 0b10010, Neon_mls>; +def MLSvvv_16B: NeonI_3VSame_Constraint_impl<"mls", ".16b", VPR128, v16i8, + 0b1, 0b1, 0b00, 0b10010, Neon_mls>; +def MLSvvv_4H: NeonI_3VSame_Constraint_impl<"mls", ".4h", VPR64, v4i16, + 0b0, 0b1, 0b01, 0b10010, Neon_mls>; +def MLSvvv_8H: NeonI_3VSame_Constraint_impl<"mls", ".8h", VPR128, v8i16, + 0b1, 0b1, 0b01, 0b10010, Neon_mls>; +def MLSvvv_2S: NeonI_3VSame_Constraint_impl<"mls", ".2s", VPR64, v2i32, + 0b0, 0b1, 0b10, 0b10010, Neon_mls>; +def MLSvvv_4S: NeonI_3VSame_Constraint_impl<"mls", ".4s", VPR128, v4i32, + 0b1, 0b1, 0b10, 0b10010, Neon_mls>; + +// Vector Multiply-accumulate and Multiply-subtract (Floating Point) + +def Neon_fmla : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), + (fadd node:$Ra, (fmul node:$Rn, node:$Rm))>; + +def Neon_fmls : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), + (fsub node:$Ra, (fmul node:$Rn, node:$Rm))>; + +let Predicates = [HasNEON, UseFusedMAC] in { +def FMLAvvv_2S: NeonI_3VSame_Constraint_impl<"fmla", ".2s", VPR64, v2f32, + 0b0, 0b0, 0b00, 0b11001, Neon_fmla>; +def FMLAvvv_4S: NeonI_3VSame_Constraint_impl<"fmla", ".4s", VPR128, v4f32, + 0b1, 0b0, 0b00, 0b11001, Neon_fmla>; +def FMLAvvv_2D: NeonI_3VSame_Constraint_impl<"fmla", ".2d", VPR128, v2f64, + 0b1, 0b0, 0b01, 0b11001, Neon_fmla>; + +def FMLSvvv_2S: NeonI_3VSame_Constraint_impl<"fmls", ".2s", VPR64, v2f32, + 0b0, 0b0, 0b10, 0b11001, Neon_fmls>; +def FMLSvvv_4S: NeonI_3VSame_Constraint_impl<"fmls", ".4s", VPR128, v4f32, + 0b1, 0b0, 0b10, 0b11001, Neon_fmls>; +def FMLSvvv_2D: NeonI_3VSame_Constraint_impl<"fmls", ".2d", VPR128, v2f64, + 0b1, 0b0, 0b11, 0b11001, Neon_fmls>; +} + +// We're also allowed to match the fma instruction regardless of compile +// options. +def : Pat<(v2f32 (fma VPR64:$Rn, VPR64:$Rm, VPR64:$Ra)), + (FMLAvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; +def : Pat<(v4f32 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), + (FMLAvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; +def : Pat<(v2f64 (fma VPR128:$Rn, VPR128:$Rm, VPR128:$Ra)), + (FMLAvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; + +def : Pat<(v2f32 (fma (fneg VPR64:$Rn), VPR64:$Rm, VPR64:$Ra)), + (FMLSvvv_2S VPR64:$Ra, VPR64:$Rn, VPR64:$Rm)>; +def : Pat<(v4f32 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), + (FMLSvvv_4S VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; +def : Pat<(v2f64 (fma (fneg VPR128:$Rn), VPR128:$Rm, VPR128:$Ra)), + (FMLSvvv_2D VPR128:$Ra, VPR128:$Rn, VPR128:$Rm)>; + +// Vector Divide (Floating-Point) + +defm FDIVvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11111, "fdiv", fdiv, fdiv, fdiv, + v2f32, v4f32, v2f64, 0>; + +// Vector Bitwise Operations + +// Vector Bitwise AND + +defm ANDvvv : NeonI_3VSame_B_sizes<0b0, 0b00, 0b00011, "and", and, and, 1>; + +// Vector Bitwise Exclusive OR + +defm EORvvv : NeonI_3VSame_B_sizes<0b1, 0b00, 0b00011, "eor", xor, xor, 1>; + +// Vector Bitwise OR + +defm ORRvvv : NeonI_3VSame_B_sizes<0b0, 0b10, 0b00011, "orr", or, or, 1>; + +// ORR disassembled as MOV if Vn==Vm + +// Vector Move - register +// Alias for ORR if Vn=Vm. +// FIXME: This is actually the preferred syntax but TableGen can't deal with +// custom printing of aliases. +def : NeonInstAlias<"mov $Rd.8b, $Rn.8b", + (ORRvvv_8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rn), 0>; +def : NeonInstAlias<"mov $Rd.16b, $Rn.16b", + (ORRvvv_16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rn), 0>; + +// The MOVI instruction takes two immediate operands. The first is the +// immediate encoding, while the second is the cmode. A cmode of 14, or +// 0b1110, produces a MOVI operation, rather than a MVNI, ORR, or BIC. +def Neon_AllZero : PatFrag<(ops), (Neon_movi (i32 0), (i32 14))>; +def Neon_AllOne : PatFrag<(ops), (Neon_movi (i32 255), (i32 14))>; + +def Neon_not8B : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v8i8 Neon_AllOne)))>; +def Neon_not16B : PatFrag<(ops node:$in), + (xor node:$in, (bitconvert (v16i8 Neon_AllOne)))>; + +def Neon_orn8B : PatFrag<(ops node:$Rn, node:$Rm), + (or node:$Rn, (Neon_not8B node:$Rm))>; + +def Neon_orn16B : PatFrag<(ops node:$Rn, node:$Rm), + (or node:$Rn, (Neon_not16B node:$Rm))>; + +def Neon_bic8B : PatFrag<(ops node:$Rn, node:$Rm), + (and node:$Rn, (Neon_not8B node:$Rm))>; + +def Neon_bic16B : PatFrag<(ops node:$Rn, node:$Rm), + (and node:$Rn, (Neon_not16B node:$Rm))>; + + +// Vector Bitwise OR NOT - register + +defm ORNvvv : NeonI_3VSame_B_sizes<0b0, 0b11, 0b00011, "orn", + Neon_orn8B, Neon_orn16B, 0>; + +// Vector Bitwise Bit Clear (AND NOT) - register + +defm BICvvv : NeonI_3VSame_B_sizes<0b0, 0b01, 0b00011, "bic", + Neon_bic8B, Neon_bic16B, 0>; + +multiclass Neon_bitwise2V_patterns { + def : Pat<(v2i32 (opnode8B VPR64:$Rn, VPR64:$Rm)), + (INST8B VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v4i16 (opnode8B VPR64:$Rn, VPR64:$Rm)), + (INST8B VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v1i64 (opnode8B VPR64:$Rn, VPR64:$Rm)), + (INST8B VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v4i32 (opnode16B VPR128:$Rn, VPR128:$Rm)), + (INST16B VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v8i16 (opnode16B VPR128:$Rn, VPR128:$Rm)), + (INST16B VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v2i64 (opnode16B VPR128:$Rn, VPR128:$Rm)), + (INST16B VPR128:$Rn, VPR128:$Rm)>; +} + +// Additional patterns for bitwise instructions AND, EOR, ORR, BIC, ORN +defm : Neon_bitwise2V_patterns; +defm : Neon_bitwise2V_patterns; +defm : Neon_bitwise2V_patterns; +defm : Neon_bitwise2V_patterns; +defm : Neon_bitwise2V_patterns; + +// Vector Bitwise Select +def BSLvvv_8B : NeonI_3VSame_Constraint_impl<"bsl", ".8b", VPR64, v8i8, + 0b0, 0b1, 0b01, 0b00011, Neon_bsl>; + +def BSLvvv_16B : NeonI_3VSame_Constraint_impl<"bsl", ".16b", VPR128, v16i8, + 0b1, 0b1, 0b01, 0b00011, Neon_bsl>; + +multiclass Neon_bitwise3V_patterns { + // Disassociate type from instruction definition + def : Pat<(v2i32 (opnode VPR64:$src,VPR64:$Rn, VPR64:$Rm)), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v4i16 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v1i64 (opnode VPR64:$src, VPR64:$Rn, VPR64:$Rm)), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v4i32 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), + (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v8i16 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), + (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v2i64 (opnode VPR128:$src, VPR128:$Rn, VPR128:$Rm)), + (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; + + // Allow to match BSL instruction pattern with non-constant operand + def : Pat<(v8i8 (or (and VPR64:$Rn, VPR64:$Rd), + (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), + (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v4i16 (or (and VPR64:$Rn, VPR64:$Rd), + (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), + (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v2i32 (or (and VPR64:$Rn, VPR64:$Rd), + (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), + (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v1i64 (or (and VPR64:$Rn, VPR64:$Rd), + (and VPR64:$Rm, (Neon_not8B VPR64:$Rd)))), + (INST8B VPR64:$Rd, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v16i8 (or (and VPR128:$Rn, VPR128:$Rd), + (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), + (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v8i16 (or (and VPR128:$Rn, VPR128:$Rd), + (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), + (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v4i32 (or (and VPR128:$Rn, VPR128:$Rd), + (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), + (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v2i64 (or (and VPR128:$Rn, VPR128:$Rd), + (and VPR128:$Rm, (Neon_not16B VPR128:$Rd)))), + (INST16B VPR128:$Rd, VPR128:$Rn, VPR128:$Rm)>; + + // Allow to match llvm.arm.* intrinsics. + def : Pat<(v8i8 (int_arm_neon_vbsl (v8i8 VPR64:$src), + (v8i8 VPR64:$Rn), (v8i8 VPR64:$Rm))), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v4i16 (int_arm_neon_vbsl (v4i16 VPR64:$src), + (v4i16 VPR64:$Rn), (v4i16 VPR64:$Rm))), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v2i32 (int_arm_neon_vbsl (v2i32 VPR64:$src), + (v2i32 VPR64:$Rn), (v2i32 VPR64:$Rm))), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v1i64 (int_arm_neon_vbsl (v1i64 VPR64:$src), + (v1i64 VPR64:$Rn), (v1i64 VPR64:$Rm))), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v2f32 (int_arm_neon_vbsl (v2f32 VPR64:$src), + (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v1f64 (int_arm_neon_vbsl (v1f64 VPR64:$src), + (v1f64 VPR64:$Rn), (v1f64 VPR64:$Rm))), + (INST8B VPR64:$src, VPR64:$Rn, VPR64:$Rm)>; + def : Pat<(v16i8 (int_arm_neon_vbsl (v16i8 VPR128:$src), + (v16i8 VPR128:$Rn), (v16i8 VPR128:$Rm))), + (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v8i16 (int_arm_neon_vbsl (v8i16 VPR128:$src), + (v8i16 VPR128:$Rn), (v8i16 VPR128:$Rm))), + (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v4i32 (int_arm_neon_vbsl (v4i32 VPR128:$src), + (v4i32 VPR128:$Rn), (v4i32 VPR128:$Rm))), + (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v2i64 (int_arm_neon_vbsl (v2i64 VPR128:$src), + (v2i64 VPR128:$Rn), (v2i64 VPR128:$Rm))), + (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v4f32 (int_arm_neon_vbsl (v4f32 VPR128:$src), + (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))), + (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; + def : Pat<(v2f64 (int_arm_neon_vbsl (v2f64 VPR128:$src), + (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))), + (INST16B VPR128:$src, VPR128:$Rn, VPR128:$Rm)>; +} + +// Additional patterns for bitwise instruction BSL +defm: Neon_bitwise3V_patterns; + +def Neon_NoBSLop : PatFrag<(ops node:$src, node:$Rn, node:$Rm), + (Neon_bsl node:$src, node:$Rn, node:$Rm), + [{ (void)N; return false; }]>; + +// Vector Bitwise Insert if True + +def BITvvv_8B : NeonI_3VSame_Constraint_impl<"bit", ".8b", VPR64, v8i8, + 0b0, 0b1, 0b10, 0b00011, Neon_NoBSLop>; +def BITvvv_16B : NeonI_3VSame_Constraint_impl<"bit", ".16b", VPR128, v16i8, + 0b1, 0b1, 0b10, 0b00011, Neon_NoBSLop>; + +// Vector Bitwise Insert if False + +def BIFvvv_8B : NeonI_3VSame_Constraint_impl<"bif", ".8b", VPR64, v8i8, + 0b0, 0b1, 0b11, 0b00011, Neon_NoBSLop>; +def BIFvvv_16B : NeonI_3VSame_Constraint_impl<"bif", ".16b", VPR128, v16i8, + 0b1, 0b1, 0b11, 0b00011, Neon_NoBSLop>; + +// Vector Absolute Difference and Accumulate (Signed, Unsigned) + +def Neon_uaba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), + (add node:$Ra, (int_arm_neon_vabdu node:$Rn, node:$Rm))>; +def Neon_saba : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), + (add node:$Ra, (int_arm_neon_vabds node:$Rn, node:$Rm))>; + +// Vector Absolute Difference and Accumulate (Unsigned) +def UABAvvv_8B : NeonI_3VSame_Constraint_impl<"uaba", ".8b", VPR64, v8i8, + 0b0, 0b1, 0b00, 0b01111, Neon_uaba>; +def UABAvvv_16B : NeonI_3VSame_Constraint_impl<"uaba", ".16b", VPR128, v16i8, + 0b1, 0b1, 0b00, 0b01111, Neon_uaba>; +def UABAvvv_4H : NeonI_3VSame_Constraint_impl<"uaba", ".4h", VPR64, v4i16, + 0b0, 0b1, 0b01, 0b01111, Neon_uaba>; +def UABAvvv_8H : NeonI_3VSame_Constraint_impl<"uaba", ".8h", VPR128, v8i16, + 0b1, 0b1, 0b01, 0b01111, Neon_uaba>; +def UABAvvv_2S : NeonI_3VSame_Constraint_impl<"uaba", ".2s", VPR64, v2i32, + 0b0, 0b1, 0b10, 0b01111, Neon_uaba>; +def UABAvvv_4S : NeonI_3VSame_Constraint_impl<"uaba", ".4s", VPR128, v4i32, + 0b1, 0b1, 0b10, 0b01111, Neon_uaba>; + +// Vector Absolute Difference and Accumulate (Signed) +def SABAvvv_8B : NeonI_3VSame_Constraint_impl<"saba", ".8b", VPR64, v8i8, + 0b0, 0b0, 0b00, 0b01111, Neon_saba>; +def SABAvvv_16B : NeonI_3VSame_Constraint_impl<"saba", ".16b", VPR128, v16i8, + 0b1, 0b0, 0b00, 0b01111, Neon_saba>; +def SABAvvv_4H : NeonI_3VSame_Constraint_impl<"saba", ".4h", VPR64, v4i16, + 0b0, 0b0, 0b01, 0b01111, Neon_saba>; +def SABAvvv_8H : NeonI_3VSame_Constraint_impl<"saba", ".8h", VPR128, v8i16, + 0b1, 0b0, 0b01, 0b01111, Neon_saba>; +def SABAvvv_2S : NeonI_3VSame_Constraint_impl<"saba", ".2s", VPR64, v2i32, + 0b0, 0b0, 0b10, 0b01111, Neon_saba>; +def SABAvvv_4S : NeonI_3VSame_Constraint_impl<"saba", ".4s", VPR128, v4i32, + 0b1, 0b0, 0b10, 0b01111, Neon_saba>; + + +// Vector Absolute Difference (Signed, Unsigned) +defm UABDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01110, "uabd", int_arm_neon_vabdu, 0>; +defm SABDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01110, "sabd", int_arm_neon_vabds, 0>; + +// Vector Absolute Difference (Floating Point) +defm FABDvvv: NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11010, "fabd", + int_arm_neon_vabds, int_arm_neon_vabds, + int_arm_neon_vabds, v2f32, v4f32, v2f64, 0>; + +// Vector Reciprocal Step (Floating Point) +defm FRECPSvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11111, "frecps", + int_arm_neon_vrecps, int_arm_neon_vrecps, + int_arm_neon_vrecps, + v2f32, v4f32, v2f64, 0>; + +// Vector Reciprocal Square Root Step (Floating Point) +defm FRSQRTSvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", + int_arm_neon_vrsqrts, + int_arm_neon_vrsqrts, + int_arm_neon_vrsqrts, + v2f32, v4f32, v2f64, 0>; + +// Vector Comparisons + +def Neon_cmeq : PatFrag<(ops node:$lhs, node:$rhs), + (Neon_cmp node:$lhs, node:$rhs, SETEQ)>; +def Neon_cmphs : PatFrag<(ops node:$lhs, node:$rhs), + (Neon_cmp node:$lhs, node:$rhs, SETUGE)>; +def Neon_cmge : PatFrag<(ops node:$lhs, node:$rhs), + (Neon_cmp node:$lhs, node:$rhs, SETGE)>; +def Neon_cmhi : PatFrag<(ops node:$lhs, node:$rhs), + (Neon_cmp node:$lhs, node:$rhs, SETUGT)>; +def Neon_cmgt : PatFrag<(ops node:$lhs, node:$rhs), + (Neon_cmp node:$lhs, node:$rhs, SETGT)>; + +// NeonI_compare_aliases class: swaps register operands to implement +// comparison aliases, e.g., CMLE is alias for CMGE with operands reversed. +class NeonI_compare_aliases + : NeonInstAlias; + +// Vector Comparisons (Integer) + +// Vector Compare Mask Equal (Integer) +let isCommutable =1 in { +defm CMEQvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b10001, "cmeq", Neon_cmeq, 0>; +} + +// Vector Compare Mask Higher or Same (Unsigned Integer) +defm CMHSvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00111, "cmhs", Neon_cmphs, 0>; + +// Vector Compare Mask Greater Than or Equal (Integer) +defm CMGEvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00111, "cmge", Neon_cmge, 0>; + +// Vector Compare Mask Higher (Unsigned Integer) +defm CMHIvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00110, "cmhi", Neon_cmhi, 0>; + +// Vector Compare Mask Greater Than (Integer) +defm CMGTvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00110, "cmgt", Neon_cmgt, 0>; + +// Vector Compare Mask Bitwise Test (Integer) +defm CMTSTvvv: NeonI_3VSame_BHSD_sizes<0b0, 0b10001, "cmtst", Neon_tst, 0>; + +// Vector Compare Mask Less or Same (Unsigned Integer) +// CMLS is alias for CMHS with operands reversed. +def CMLSvvv_8B : NeonI_compare_aliases<"cmls", ".8b", CMHSvvv_8B, VPR64>; +def CMLSvvv_16B : NeonI_compare_aliases<"cmls", ".16b", CMHSvvv_16B, VPR128>; +def CMLSvvv_4H : NeonI_compare_aliases<"cmls", ".4h", CMHSvvv_4H, VPR64>; +def CMLSvvv_8H : NeonI_compare_aliases<"cmls", ".8h", CMHSvvv_8H, VPR128>; +def CMLSvvv_2S : NeonI_compare_aliases<"cmls", ".2s", CMHSvvv_2S, VPR64>; +def CMLSvvv_4S : NeonI_compare_aliases<"cmls", ".4s", CMHSvvv_4S, VPR128>; +def CMLSvvv_2D : NeonI_compare_aliases<"cmls", ".2d", CMHSvvv_2D, VPR128>; + +// Vector Compare Mask Less Than or Equal (Integer) +// CMLE is alias for CMGE with operands reversed. +def CMLEvvv_8B : NeonI_compare_aliases<"cmle", ".8b", CMGEvvv_8B, VPR64>; +def CMLEvvv_16B : NeonI_compare_aliases<"cmle", ".16b", CMGEvvv_16B, VPR128>; +def CMLEvvv_4H : NeonI_compare_aliases<"cmle", ".4h", CMGEvvv_4H, VPR64>; +def CMLEvvv_8H : NeonI_compare_aliases<"cmle", ".8h", CMGEvvv_8H, VPR128>; +def CMLEvvv_2S : NeonI_compare_aliases<"cmle", ".2s", CMGEvvv_2S, VPR64>; +def CMLEvvv_4S : NeonI_compare_aliases<"cmle", ".4s", CMGEvvv_4S, VPR128>; +def CMLEvvv_2D : NeonI_compare_aliases<"cmle", ".2d", CMGEvvv_2D, VPR128>; + +// Vector Compare Mask Lower (Unsigned Integer) +// CMLO is alias for CMHI with operands reversed. +def CMLOvvv_8B : NeonI_compare_aliases<"cmlo", ".8b", CMHIvvv_8B, VPR64>; +def CMLOvvv_16B : NeonI_compare_aliases<"cmlo", ".16b", CMHIvvv_16B, VPR128>; +def CMLOvvv_4H : NeonI_compare_aliases<"cmlo", ".4h", CMHIvvv_4H, VPR64>; +def CMLOvvv_8H : NeonI_compare_aliases<"cmlo", ".8h", CMHIvvv_8H, VPR128>; +def CMLOvvv_2S : NeonI_compare_aliases<"cmlo", ".2s", CMHIvvv_2S, VPR64>; +def CMLOvvv_4S : NeonI_compare_aliases<"cmlo", ".4s", CMHIvvv_4S, VPR128>; +def CMLOvvv_2D : NeonI_compare_aliases<"cmlo", ".2d", CMHIvvv_2D, VPR128>; + +// Vector Compare Mask Less Than (Integer) +// CMLT is alias for CMGT with operands reversed. +def CMLTvvv_8B : NeonI_compare_aliases<"cmlt", ".8b", CMGTvvv_8B, VPR64>; +def CMLTvvv_16B : NeonI_compare_aliases<"cmlt", ".16b", CMGTvvv_16B, VPR128>; +def CMLTvvv_4H : NeonI_compare_aliases<"cmlt", ".4h", CMGTvvv_4H, VPR64>; +def CMLTvvv_8H : NeonI_compare_aliases<"cmlt", ".8h", CMGTvvv_8H, VPR128>; +def CMLTvvv_2S : NeonI_compare_aliases<"cmlt", ".2s", CMGTvvv_2S, VPR64>; +def CMLTvvv_4S : NeonI_compare_aliases<"cmlt", ".4s", CMGTvvv_4S, VPR128>; +def CMLTvvv_2D : NeonI_compare_aliases<"cmlt", ".2d", CMGTvvv_2D, VPR128>; + + +def neon_uimm0_asmoperand : AsmOperandClass +{ + let Name = "UImm0"; + let PredicateMethod = "isUImm<0>"; + let RenderMethod = "addImmOperands"; +} + +def neon_uimm0 : Operand, ImmLeaf { + let ParserMatchClass = neon_uimm0_asmoperand; + let PrintMethod = "printNeonUImm0Operand"; + +} + +multiclass NeonI_cmpz_sizes opcode, string asmop, CondCode CC> +{ + def _8B : NeonI_2VMisc<0b0, u, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), + asmop # "\t$Rd.8b, $Rn.8b, $Imm", + [(set (v8i8 VPR64:$Rd), + (v8i8 (Neon_cmpz (v8i8 VPR64:$Rn), (i32 imm:$Imm), CC)))], + NoItinerary>; + + def _16B : NeonI_2VMisc<0b1, u, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), + asmop # "\t$Rd.16b, $Rn.16b, $Imm", + [(set (v16i8 VPR128:$Rd), + (v16i8 (Neon_cmpz (v16i8 VPR128:$Rn), (i32 imm:$Imm), CC)))], + NoItinerary>; + + def _4H : NeonI_2VMisc<0b0, u, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), + asmop # "\t$Rd.4h, $Rn.4h, $Imm", + [(set (v4i16 VPR64:$Rd), + (v4i16 (Neon_cmpz (v4i16 VPR64:$Rn), (i32 imm:$Imm), CC)))], + NoItinerary>; + + def _8H : NeonI_2VMisc<0b1, u, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), + asmop # "\t$Rd.8h, $Rn.8h, $Imm", + [(set (v8i16 VPR128:$Rd), + (v8i16 (Neon_cmpz (v8i16 VPR128:$Rn), (i32 imm:$Imm), CC)))], + NoItinerary>; + + def _2S : NeonI_2VMisc<0b0, u, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn, neon_uimm0:$Imm), + asmop # "\t$Rd.2s, $Rn.2s, $Imm", + [(set (v2i32 VPR64:$Rd), + (v2i32 (Neon_cmpz (v2i32 VPR64:$Rn), (i32 imm:$Imm), CC)))], + NoItinerary>; + + def _4S : NeonI_2VMisc<0b1, u, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), + asmop # "\t$Rd.4s, $Rn.4s, $Imm", + [(set (v4i32 VPR128:$Rd), + (v4i32 (Neon_cmpz (v4i32 VPR128:$Rn), (i32 imm:$Imm), CC)))], + NoItinerary>; + + def _2D : NeonI_2VMisc<0b1, u, 0b11, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, neon_uimm0:$Imm), + asmop # "\t$Rd.2d, $Rn.2d, $Imm", + [(set (v2i64 VPR128:$Rd), + (v2i64 (Neon_cmpz (v2i64 VPR128:$Rn), (i32 imm:$Imm), CC)))], + NoItinerary>; +} + +// Vector Compare Mask Equal to Zero (Integer) +defm CMEQvvi : NeonI_cmpz_sizes<0b0, 0b01001, "cmeq", SETEQ>; + +// Vector Compare Mask Greater Than or Equal to Zero (Signed Integer) +defm CMGEvvi : NeonI_cmpz_sizes<0b1, 0b01000, "cmge", SETGE>; + +// Vector Compare Mask Greater Than Zero (Signed Integer) +defm CMGTvvi : NeonI_cmpz_sizes<0b0, 0b01000, "cmgt", SETGT>; + +// Vector Compare Mask Less Than or Equal To Zero (Signed Integer) +defm CMLEvvi : NeonI_cmpz_sizes<0b1, 0b01001, "cmle", SETLE>; + +// Vector Compare Mask Less Than Zero (Signed Integer) +defm CMLTvvi : NeonI_cmpz_sizes<0b0, 0b01010, "cmlt", SETLT>; + +// Vector Comparisons (Floating Point) + +// Vector Compare Mask Equal (Floating Point) +let isCommutable =1 in { +defm FCMEQvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11100, "fcmeq", Neon_cmeq, + Neon_cmeq, Neon_cmeq, + v2i32, v4i32, v2i64, 0>; +} + +// Vector Compare Mask Greater Than Or Equal (Floating Point) +defm FCMGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11100, "fcmge", Neon_cmge, + Neon_cmge, Neon_cmge, + v2i32, v4i32, v2i64, 0>; + +// Vector Compare Mask Greater Than (Floating Point) +defm FCMGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11100, "fcmgt", Neon_cmgt, + Neon_cmgt, Neon_cmgt, + v2i32, v4i32, v2i64, 0>; + +// Vector Compare Mask Less Than Or Equal (Floating Point) +// FCMLE is alias for FCMGE with operands reversed. +def FCMLEvvv_2S : NeonI_compare_aliases<"fcmle", ".2s", FCMGEvvv_2S, VPR64>; +def FCMLEvvv_4S : NeonI_compare_aliases<"fcmle", ".4s", FCMGEvvv_4S, VPR128>; +def FCMLEvvv_2D : NeonI_compare_aliases<"fcmle", ".2d", FCMGEvvv_2D, VPR128>; + +// Vector Compare Mask Less Than (Floating Point) +// FCMLT is alias for FCMGT with operands reversed. +def FCMLTvvv_2S : NeonI_compare_aliases<"fcmlt", ".2s", FCMGTvvv_2S, VPR64>; +def FCMLTvvv_4S : NeonI_compare_aliases<"fcmlt", ".4s", FCMGTvvv_4S, VPR128>; +def FCMLTvvv_2D : NeonI_compare_aliases<"fcmlt", ".2d", FCMGTvvv_2D, VPR128>; + + +multiclass NeonI_fpcmpz_sizes opcode, + string asmop, CondCode CC> +{ + def _2S : NeonI_2VMisc<0b0, u, {size, 0b0}, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn, fpz32:$FPImm), + asmop # "\t$Rd.2s, $Rn.2s, $FPImm", + [(set (v2i32 VPR64:$Rd), + (v2i32 (Neon_cmpz (v2f32 VPR64:$Rn), (f32 fpimm:$FPImm), CC)))], + NoItinerary>; + + def _4S : NeonI_2VMisc<0b1, u, {size, 0b0}, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm), + asmop # "\t$Rd.4s, $Rn.4s, $FPImm", + [(set (v4i32 VPR128:$Rd), + (v4i32 (Neon_cmpz (v4f32 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))], + NoItinerary>; + + def _2D : NeonI_2VMisc<0b1, u, {size, 0b1}, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, fpz32:$FPImm), + asmop # "\t$Rd.2d, $Rn.2d, $FPImm", + [(set (v2i64 VPR128:$Rd), + (v2i64 (Neon_cmpz (v2f64 VPR128:$Rn), (f32 fpimm:$FPImm), CC)))], + NoItinerary>; +} + +// Vector Compare Mask Equal to Zero (Floating Point) +defm FCMEQvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01101, "fcmeq", SETEQ>; + +// Vector Compare Mask Greater Than or Equal to Zero (Floating Point) +defm FCMGEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01100, "fcmge", SETGE>; + +// Vector Compare Mask Greater Than Zero (Floating Point) +defm FCMGTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01100, "fcmgt", SETGT>; + +// Vector Compare Mask Less Than or Equal To Zero (Floating Point) +defm FCMLEvvi : NeonI_fpcmpz_sizes<0b1, 0b1, 0b01101, "fcmle", SETLE>; + +// Vector Compare Mask Less Than Zero (Floating Point) +defm FCMLTvvi : NeonI_fpcmpz_sizes<0b0, 0b1, 0b01110, "fcmlt", SETLT>; + +// Vector Absolute Comparisons (Floating Point) + +// Vector Absolute Compare Mask Greater Than Or Equal (Floating Point) +defm FACGEvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11101, "facge", + int_arm_neon_vacged, int_arm_neon_vacgeq, + int_aarch64_neon_vacgeq, + v2i32, v4i32, v2i64, 0>; + +// Vector Absolute Compare Mask Greater Than (Floating Point) +defm FACGTvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11101, "facgt", + int_arm_neon_vacgtd, int_arm_neon_vacgtq, + int_aarch64_neon_vacgtq, + v2i32, v4i32, v2i64, 0>; + +// Vector Absolute Compare Mask Less Than Or Equal (Floating Point) +// FACLE is alias for FACGE with operands reversed. +def FACLEvvv_2S : NeonI_compare_aliases<"facle", ".2s", FACGEvvv_2S, VPR64>; +def FACLEvvv_4S : NeonI_compare_aliases<"facle", ".4s", FACGEvvv_4S, VPR128>; +def FACLEvvv_2D : NeonI_compare_aliases<"facle", ".2d", FACGEvvv_2D, VPR128>; + +// Vector Absolute Compare Mask Less Than (Floating Point) +// FACLT is alias for FACGT with operands reversed. +def FACLTvvv_2S : NeonI_compare_aliases<"faclt", ".2s", FACGTvvv_2S, VPR64>; +def FACLTvvv_4S : NeonI_compare_aliases<"faclt", ".4s", FACGTvvv_4S, VPR128>; +def FACLTvvv_2D : NeonI_compare_aliases<"faclt", ".2d", FACGTvvv_2D, VPR128>; + +// Vector halving add (Integer Signed, Unsigned) +defm SHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00000, "shadd", + int_arm_neon_vhadds, 1>; +defm UHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00000, "uhadd", + int_arm_neon_vhaddu, 1>; + +// Vector halving sub (Integer Signed, Unsigned) +defm SHSUBvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00100, "shsub", + int_arm_neon_vhsubs, 0>; +defm UHSUBvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00100, "uhsub", + int_arm_neon_vhsubu, 0>; + +// Vector rouding halving add (Integer Signed, Unsigned) +defm SRHADDvvv : NeonI_3VSame_BHS_sizes<0b0, 0b00010, "srhadd", + int_arm_neon_vrhadds, 1>; +defm URHADDvvv : NeonI_3VSame_BHS_sizes<0b1, 0b00010, "urhadd", + int_arm_neon_vrhaddu, 1>; + +// Vector Saturating add (Integer Signed, Unsigned) +defm SQADDvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00001, "sqadd", + int_arm_neon_vqadds, 1>; +defm UQADDvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00001, "uqadd", + int_arm_neon_vqaddu, 1>; + +// Vector Saturating sub (Integer Signed, Unsigned) +defm SQSUBvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b00101, "sqsub", + int_arm_neon_vqsubs, 1>; +defm UQSUBvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b00101, "uqsub", + int_arm_neon_vqsubu, 1>; + +// Vector Shift Left (Signed and Unsigned Integer) +defm SSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01000, "sshl", + int_arm_neon_vshifts, 1>; +defm USHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01000, "ushl", + int_arm_neon_vshiftu, 1>; + +// Vector Saturating Shift Left (Signed and Unsigned Integer) +defm SQSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01001, "sqshl", + int_arm_neon_vqshifts, 1>; +defm UQSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01001, "uqshl", + int_arm_neon_vqshiftu, 1>; + +// Vector Rouding Shift Left (Signed and Unsigned Integer) +defm SRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01010, "srshl", + int_arm_neon_vrshifts, 1>; +defm URSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01010, "urshl", + int_arm_neon_vrshiftu, 1>; + +// Vector Saturating Rouding Shift Left (Signed and Unsigned Integer) +defm SQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b0, 0b01011, "sqrshl", + int_arm_neon_vqrshifts, 1>; +defm UQRSHLvvv : NeonI_3VSame_BHSD_sizes<0b1, 0b01011, "uqrshl", + int_arm_neon_vqrshiftu, 1>; + +// Vector Maximum (Signed and Unsigned Integer) +defm SMAXvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01100, "smax", int_arm_neon_vmaxs, 1>; +defm UMAXvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01100, "umax", int_arm_neon_vmaxu, 1>; + +// Vector Minimum (Signed and Unsigned Integer) +defm SMINvvv : NeonI_3VSame_BHS_sizes<0b0, 0b01101, "smin", int_arm_neon_vmins, 1>; +defm UMINvvv : NeonI_3VSame_BHS_sizes<0b1, 0b01101, "umin", int_arm_neon_vminu, 1>; + +// Vector Maximum (Floating Point) +defm FMAXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11110, "fmax", + int_arm_neon_vmaxs, int_arm_neon_vmaxs, + int_arm_neon_vmaxs, v2f32, v4f32, v2f64, 1>; + +// Vector Minimum (Floating Point) +defm FMINvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11110, "fmin", + int_arm_neon_vmins, int_arm_neon_vmins, + int_arm_neon_vmins, v2f32, v4f32, v2f64, 1>; + +// Vector maxNum (Floating Point) - prefer a number over a quiet NaN) +defm FMAXNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11000, "fmaxnm", + int_aarch64_neon_vmaxnm, + int_aarch64_neon_vmaxnm, + int_aarch64_neon_vmaxnm, + v2f32, v4f32, v2f64, 1>; + +// Vector minNum (Floating Point) - prefer a number over a quiet NaN) +defm FMINNMvvv : NeonI_3VSame_SD_sizes<0b0, 0b1, 0b11000, "fminnm", + int_aarch64_neon_vminnm, + int_aarch64_neon_vminnm, + int_aarch64_neon_vminnm, + v2f32, v4f32, v2f64, 1>; + +// Vector Maximum Pairwise (Signed and Unsigned Integer) +defm SMAXPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10100, "smaxp", int_arm_neon_vpmaxs, 1>; +defm UMAXPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10100, "umaxp", int_arm_neon_vpmaxu, 1>; + +// Vector Minimum Pairwise (Signed and Unsigned Integer) +defm SMINPvvv : NeonI_3VSame_BHS_sizes<0b0, 0b10101, "sminp", int_arm_neon_vpmins, 1>; +defm UMINPvvv : NeonI_3VSame_BHS_sizes<0b1, 0b10101, "uminp", int_arm_neon_vpminu, 1>; + +// Vector Maximum Pairwise (Floating Point) +defm FMAXPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11110, "fmaxp", + int_arm_neon_vpmaxs, int_arm_neon_vpmaxs, + int_arm_neon_vpmaxs, v2f32, v4f32, v2f64, 1>; + +// Vector Minimum Pairwise (Floating Point) +defm FMINPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11110, "fminp", + int_arm_neon_vpmins, int_arm_neon_vpmins, + int_arm_neon_vpmins, v2f32, v4f32, v2f64, 1>; + +// Vector maxNum Pairwise (Floating Point) - prefer a number over a quiet NaN) +defm FMAXNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11000, "fmaxnmp", + int_aarch64_neon_vpmaxnm, + int_aarch64_neon_vpmaxnm, + int_aarch64_neon_vpmaxnm, + v2f32, v4f32, v2f64, 1>; + +// Vector minNum Pairwise (Floating Point) - prefer a number over a quiet NaN) +defm FMINNMPvvv : NeonI_3VSame_SD_sizes<0b1, 0b1, 0b11000, "fminnmp", + int_aarch64_neon_vpminnm, + int_aarch64_neon_vpminnm, + int_aarch64_neon_vpminnm, + v2f32, v4f32, v2f64, 1>; + +// Vector Addition Pairwise (Integer) +defm ADDP : NeonI_3VSame_BHSD_sizes<0b0, 0b10111, "addp", int_arm_neon_vpadd, 1>; + +// Vector Addition Pairwise (Floating Point) +defm FADDP : NeonI_3VSame_SD_sizes<0b1, 0b0, 0b11010, "faddp", + int_arm_neon_vpadd, + int_arm_neon_vpadd, + int_arm_neon_vpadd, + v2f32, v4f32, v2f64, 1>; + +// Vector Saturating Doubling Multiply High +defm SQDMULHvvv : NeonI_3VSame_HS_sizes<0b0, 0b10110, "sqdmulh", + int_arm_neon_vqdmulh, 1>; + +// Vector Saturating Rouding Doubling Multiply High +defm SQRDMULHvvv : NeonI_3VSame_HS_sizes<0b1, 0b10110, "sqrdmulh", + int_arm_neon_vqrdmulh, 1>; + +// Vector Multiply Extended (Floating Point) +defm FMULXvvv : NeonI_3VSame_SD_sizes<0b0, 0b0, 0b11011, "fmulx", + int_aarch64_neon_vmulx, + int_aarch64_neon_vmulx, + int_aarch64_neon_vmulx, + v2f32, v4f32, v2f64, 1>; + +// Vector Immediate Instructions + +multiclass neon_mov_imm_shift_asmoperands +{ + def _asmoperand : AsmOperandClass + { + let Name = "NeonMovImmShift" # PREFIX; + let RenderMethod = "addNeonMovImmShift" # PREFIX # "Operands"; + let PredicateMethod = "isNeonMovImmShift" # PREFIX; + } +} + +// Definition of vector immediates shift operands + +// The selectable use-cases extract the shift operation +// information from the OpCmode fields encoded in the immediate. +def neon_mod_shift_imm_XFORM : SDNodeXFormgetZExtValue(); + unsigned ShiftImm; + unsigned ShiftOnesIn; + unsigned HasShift = + A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); + if (!HasShift) return SDValue(); + return CurDAG->getTargetConstant(ShiftImm, MVT::i32); +}]>; + +// Vector immediates shift operands which accept LSL and MSL +// shift operators with shift value in the range of 0, 8, 16, 24 (LSL), +// or 0, 8 (LSLH) or 8, 16 (MSL). +defm neon_mov_imm_LSL : neon_mov_imm_shift_asmoperands<"LSL">; +defm neon_mov_imm_MSL : neon_mov_imm_shift_asmoperands<"MSL">; +// LSLH restricts shift amount to 0, 8 out of 0, 8, 16, 24 +defm neon_mov_imm_LSLH : neon_mov_imm_shift_asmoperands<"LSLH">; + +multiclass neon_mov_imm_shift_operands +{ + def _operand : Operand, ImmLeaf + { + let PrintMethod = + "printNeonMovImmShiftOperand"; + let DecoderMethod = + "DecodeNeonMovImmShiftOperand"; + let ParserMatchClass = + !cast("neon_mov_imm_" # PREFIX # HALF # "_asmoperand"); + } +} + +defm neon_mov_imm_LSL : neon_mov_imm_shift_operands<"LSL", "", "false", [{ + unsigned ShiftImm; + unsigned ShiftOnesIn; + unsigned HasShift = + A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); + return (HasShift && !ShiftOnesIn); +}]>; + +defm neon_mov_imm_MSL : neon_mov_imm_shift_operands<"MSL", "", "false", [{ + unsigned ShiftImm; + unsigned ShiftOnesIn; + unsigned HasShift = + A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); + return (HasShift && ShiftOnesIn); +}]>; + +defm neon_mov_imm_LSLH : neon_mov_imm_shift_operands<"LSL", "H", "true", [{ + unsigned ShiftImm; + unsigned ShiftOnesIn; + unsigned HasShift = + A64Imms::decodeNeonModShiftImm(Imm, ShiftImm, ShiftOnesIn); + return (HasShift && !ShiftOnesIn); +}]>; + +def neon_uimm1_asmoperand : AsmOperandClass +{ + let Name = "UImm1"; + let PredicateMethod = "isUImm<1>"; + let RenderMethod = "addImmOperands"; +} + +def neon_uimm2_asmoperand : AsmOperandClass +{ + let Name = "UImm2"; + let PredicateMethod = "isUImm<2>"; + let RenderMethod = "addImmOperands"; +} + +def neon_uimm8_asmoperand : AsmOperandClass +{ + let Name = "UImm8"; + let PredicateMethod = "isUImm<8>"; + let RenderMethod = "addImmOperands"; +} + +def neon_uimm8 : Operand, ImmLeaf { + let ParserMatchClass = neon_uimm8_asmoperand; + let PrintMethod = "printUImmHexOperand"; +} + +def neon_uimm64_mask_asmoperand : AsmOperandClass +{ + let Name = "NeonUImm64Mask"; + let PredicateMethod = "isNeonUImm64Mask"; + let RenderMethod = "addNeonUImm64MaskOperands"; +} + +// MCOperand for 64-bit bytemask with each byte having only the +// value 0x00 and 0xff is encoded as an unsigned 8-bit value +def neon_uimm64_mask : Operand, ImmLeaf { + let ParserMatchClass = neon_uimm64_mask_asmoperand; + let PrintMethod = "printNeonUImm64MaskOperand"; +} + +multiclass NeonI_mov_imm_lsl_sizes +{ + // shift zeros, per word + def _2S : NeonI_1VModImm<0b0, op, + (outs VPR64:$Rd), + (ins neon_uimm8:$Imm, + neon_mov_imm_LSL_operand:$Simm), + !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), + [(set (v2i32 VPR64:$Rd), + (v2i32 (opnode (timm:$Imm), + (neon_mov_imm_LSL_operand:$Simm))))], + NoItinerary> { + bits<2> Simm; + let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; + } + + def _4S : NeonI_1VModImm<0b1, op, + (outs VPR128:$Rd), + (ins neon_uimm8:$Imm, + neon_mov_imm_LSL_operand:$Simm), + !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), + [(set (v4i32 VPR128:$Rd), + (v4i32 (opnode (timm:$Imm), + (neon_mov_imm_LSL_operand:$Simm))))], + NoItinerary> { + bits<2> Simm; + let cmode = {0b0, Simm{1}, Simm{0}, 0b0}; + } + + // shift zeros, per halfword + def _4H : NeonI_1VModImm<0b0, op, + (outs VPR64:$Rd), + (ins neon_uimm8:$Imm, + neon_mov_imm_LSLH_operand:$Simm), + !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"), + [(set (v4i16 VPR64:$Rd), + (v4i16 (opnode (timm:$Imm), + (neon_mov_imm_LSLH_operand:$Simm))))], + NoItinerary> { + bit Simm; + let cmode = {0b1, 0b0, Simm, 0b0}; + } + + def _8H : NeonI_1VModImm<0b1, op, + (outs VPR128:$Rd), + (ins neon_uimm8:$Imm, + neon_mov_imm_LSLH_operand:$Simm), + !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"), + [(set (v8i16 VPR128:$Rd), + (v8i16 (opnode (timm:$Imm), + (neon_mov_imm_LSLH_operand:$Simm))))], + NoItinerary> { + bit Simm; + let cmode = {0b1, 0b0, Simm, 0b0}; + } +} + +multiclass NeonI_mov_imm_with_constraint_lsl_sizes +{ + let Constraints = "$src = $Rd" in { + // shift zeros, per word + def _2S : NeonI_1VModImm<0b0, op, + (outs VPR64:$Rd), + (ins VPR64:$src, neon_uimm8:$Imm, + neon_mov_imm_LSL_operand:$Simm), + !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), + [(set (v2i32 VPR64:$Rd), + (v2i32 (opnode (v2i32 VPR64:$src), + (v2i32 (bitconvert (v2i32 (neonopnode timm:$Imm, + neon_mov_imm_LSL_operand:$Simm)))))))], + NoItinerary> { + bits<2> Simm; + let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; + } + + def _4S : NeonI_1VModImm<0b1, op, + (outs VPR128:$Rd), + (ins VPR128:$src, neon_uimm8:$Imm, + neon_mov_imm_LSL_operand:$Simm), + !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), + [(set (v4i32 VPR128:$Rd), + (v4i32 (opnode (v4i32 VPR128:$src), + (v4i32 (bitconvert (v4i32 (neonopnode timm:$Imm, + neon_mov_imm_LSL_operand:$Simm)))))))], + NoItinerary> { + bits<2> Simm; + let cmode = {0b0, Simm{1}, Simm{0}, 0b1}; + } + + // shift zeros, per halfword + def _4H : NeonI_1VModImm<0b0, op, + (outs VPR64:$Rd), + (ins VPR64:$src, neon_uimm8:$Imm, + neon_mov_imm_LSLH_operand:$Simm), + !strconcat(asmop, "\t$Rd.4h, $Imm$Simm"), + [(set (v4i16 VPR64:$Rd), + (v4i16 (opnode (v4i16 VPR64:$src), + (v4i16 (bitconvert (v4i16 (neonopnode timm:$Imm, + neon_mov_imm_LSL_operand:$Simm)))))))], + NoItinerary> { + bit Simm; + let cmode = {0b1, 0b0, Simm, 0b1}; + } + + def _8H : NeonI_1VModImm<0b1, op, + (outs VPR128:$Rd), + (ins VPR128:$src, neon_uimm8:$Imm, + neon_mov_imm_LSLH_operand:$Simm), + !strconcat(asmop, "\t$Rd.8h, $Imm$Simm"), + [(set (v8i16 VPR128:$Rd), + (v8i16 (opnode (v8i16 VPR128:$src), + (v8i16 (bitconvert (v8i16 (neonopnode timm:$Imm, + neon_mov_imm_LSL_operand:$Simm)))))))], + NoItinerary> { + bit Simm; + let cmode = {0b1, 0b0, Simm, 0b1}; + } + } +} + +multiclass NeonI_mov_imm_msl_sizes +{ + // shift ones, per word + def _2S : NeonI_1VModImm<0b0, op, + (outs VPR64:$Rd), + (ins neon_uimm8:$Imm, + neon_mov_imm_MSL_operand:$Simm), + !strconcat(asmop, "\t$Rd.2s, $Imm$Simm"), + [(set (v2i32 VPR64:$Rd), + (v2i32 (opnode (timm:$Imm), + (neon_mov_imm_MSL_operand:$Simm))))], + NoItinerary> { + bit Simm; + let cmode = {0b1, 0b1, 0b0, Simm}; + } + + def _4S : NeonI_1VModImm<0b1, op, + (outs VPR128:$Rd), + (ins neon_uimm8:$Imm, + neon_mov_imm_MSL_operand:$Simm), + !strconcat(asmop, "\t$Rd.4s, $Imm$Simm"), + [(set (v4i32 VPR128:$Rd), + (v4i32 (opnode (timm:$Imm), + (neon_mov_imm_MSL_operand:$Simm))))], + NoItinerary> { + bit Simm; + let cmode = {0b1, 0b1, 0b0, Simm}; + } +} + +// Vector Move Immediate Shifted +let isReMaterializable = 1 in { +defm MOVIvi_lsl : NeonI_mov_imm_lsl_sizes<"movi", 0b0, Neon_movi>; +} + +// Vector Move Inverted Immediate Shifted +let isReMaterializable = 1 in { +defm MVNIvi_lsl : NeonI_mov_imm_lsl_sizes<"mvni", 0b1, Neon_mvni>; +} + +// Vector Bitwise Bit Clear (AND NOT) - immediate +let isReMaterializable = 1 in { +defm BICvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"bic", 0b1, + and, Neon_mvni>; +} + +// Vector Bitwise OR - immedidate + +let isReMaterializable = 1 in { +defm ORRvi_lsl : NeonI_mov_imm_with_constraint_lsl_sizes<"orr", 0b0, + or, Neon_movi>; +} + +// Additional patterns for Vector Bitwise Bit Clear (AND NOT) - immedidate +// LowerBUILD_VECTOR favors lowering MOVI over MVNI. +// BIC immediate instructions selection requires additional patterns to +// transform Neon_movi operands into BIC immediate operands + +def neon_mov_imm_LSLH_transform_XFORM : SDNodeXFormgetZExtValue(); + unsigned ShiftImm; + unsigned ShiftOnesIn; + (void)A64Imms::decodeNeonModShiftImm(OpCmode, ShiftImm, ShiftOnesIn); + // LSLH restricts shift amount to 0, 8 which are encoded as 0 and 1 + // Transform encoded shift amount 0 to 1 and 1 to 0. + return CurDAG->getTargetConstant(!ShiftImm, MVT::i32); +}]>; + +def neon_mov_imm_LSLH_transform_operand + : ImmLeaf; + +// Transform (and A, (4h Neon_movi 0xff)) -> BIC 4h (A, 0x00, LSL 8) +// Transform (and A, (4h Neon_movi 0xff LSL #8)) -> BIC 4h (A, 0x00) +def : Pat<(v4i16 (and VPR64:$src, + (v4i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))), + (BICvi_lsl_4H VPR64:$src, 0, + neon_mov_imm_LSLH_transform_operand:$Simm)>; + +// Transform (and A, (8h Neon_movi 8h 0xff)) -> BIC 8h (A, 0x00, LSL 8) +// Transform (and A, (8h Neon_movi 0xff LSL #8)) -> BIC 8h (A, 0x00) +def : Pat<(v8i16 (and VPR128:$src, + (v8i16 (Neon_movi 255, neon_mov_imm_LSLH_transform_operand:$Simm)))), + (BICvi_lsl_8H VPR128:$src, 0, + neon_mov_imm_LSLH_transform_operand:$Simm)>; + + +multiclass Neon_bitwiseVi_patterns { + def : Pat<(v8i8 (opnode VPR64:$src, + (bitconvert(v4i16 (neonopnode timm:$Imm, + neon_mov_imm_LSLH_operand:$Simm))))), + (INST4H VPR64:$src, neon_uimm8:$Imm, + neon_mov_imm_LSLH_operand:$Simm)>; + def : Pat<(v1i64 (opnode VPR64:$src, + (bitconvert(v4i16 (neonopnode timm:$Imm, + neon_mov_imm_LSLH_operand:$Simm))))), + (INST4H VPR64:$src, neon_uimm8:$Imm, + neon_mov_imm_LSLH_operand:$Simm)>; + + def : Pat<(v16i8 (opnode VPR128:$src, + (bitconvert(v8i16 (neonopnode timm:$Imm, + neon_mov_imm_LSLH_operand:$Simm))))), + (INST8H VPR128:$src, neon_uimm8:$Imm, + neon_mov_imm_LSLH_operand:$Simm)>; + def : Pat<(v4i32 (opnode VPR128:$src, + (bitconvert(v8i16 (neonopnode timm:$Imm, + neon_mov_imm_LSLH_operand:$Simm))))), + (INST8H VPR128:$src, neon_uimm8:$Imm, + neon_mov_imm_LSLH_operand:$Simm)>; + def : Pat<(v2i64 (opnode VPR128:$src, + (bitconvert(v8i16 (neonopnode timm:$Imm, + neon_mov_imm_LSLH_operand:$Simm))))), + (INST8H VPR128:$src, neon_uimm8:$Imm, + neon_mov_imm_LSLH_operand:$Simm)>; +} + +// Additional patterns for Vector Vector Bitwise Bit Clear (AND NOT) - immediate +defm : Neon_bitwiseVi_patterns; + +// Additional patterns for Vector Bitwise OR - immedidate +defm : Neon_bitwiseVi_patterns; + + +// Vector Move Immediate Masked +let isReMaterializable = 1 in { +defm MOVIvi_msl : NeonI_mov_imm_msl_sizes<"movi", 0b0, Neon_movi>; +} + +// Vector Move Inverted Immediate Masked +let isReMaterializable = 1 in { +defm MVNIvi_msl : NeonI_mov_imm_msl_sizes<"mvni", 0b1, Neon_mvni>; +} + +class NeonI_mov_imm_lsl_aliases + : NeonInstAlias; + +// Aliases for Vector Move Immediate Shifted +def : NeonI_mov_imm_lsl_aliases<"movi", ".2s", MOVIvi_lsl_2S, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"movi", ".4s", MOVIvi_lsl_4S, VPR128>; +def : NeonI_mov_imm_lsl_aliases<"movi", ".4h", MOVIvi_lsl_4H, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"movi", ".8h", MOVIvi_lsl_8H, VPR128>; + +// Aliases for Vector Move Inverted Immediate Shifted +def : NeonI_mov_imm_lsl_aliases<"mvni", ".2s", MVNIvi_lsl_2S, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"mvni", ".4s", MVNIvi_lsl_4S, VPR128>; +def : NeonI_mov_imm_lsl_aliases<"mvni", ".4h", MVNIvi_lsl_4H, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"mvni", ".8h", MVNIvi_lsl_8H, VPR128>; + +// Aliases for Vector Bitwise Bit Clear (AND NOT) - immediate +def : NeonI_mov_imm_lsl_aliases<"bic", ".2s", BICvi_lsl_2S, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"bic", ".4s", BICvi_lsl_4S, VPR128>; +def : NeonI_mov_imm_lsl_aliases<"bic", ".4h", BICvi_lsl_4H, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"bic", ".8h", BICvi_lsl_8H, VPR128>; + +// Aliases for Vector Bitwise OR - immedidate +def : NeonI_mov_imm_lsl_aliases<"orr", ".2s", ORRvi_lsl_2S, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"orr", ".4s", ORRvi_lsl_4S, VPR128>; +def : NeonI_mov_imm_lsl_aliases<"orr", ".4h", ORRvi_lsl_4H, VPR64>; +def : NeonI_mov_imm_lsl_aliases<"orr", ".8h", ORRvi_lsl_8H, VPR128>; + +// Vector Move Immediate - per byte +let isReMaterializable = 1 in { +def MOVIvi_8B : NeonI_1VModImm<0b0, 0b0, + (outs VPR64:$Rd), (ins neon_uimm8:$Imm), + "movi\t$Rd.8b, $Imm", + [(set (v8i8 VPR64:$Rd), + (v8i8 (Neon_movi (timm:$Imm), (i32 imm))))], + NoItinerary> { + let cmode = 0b1110; +} + +def MOVIvi_16B : NeonI_1VModImm<0b1, 0b0, + (outs VPR128:$Rd), (ins neon_uimm8:$Imm), + "movi\t$Rd.16b, $Imm", + [(set (v16i8 VPR128:$Rd), + (v16i8 (Neon_movi (timm:$Imm), (i32 imm))))], + NoItinerary> { + let cmode = 0b1110; +} +} + +// Vector Move Immediate - bytemask, per double word +let isReMaterializable = 1 in { +def MOVIvi_2D : NeonI_1VModImm<0b1, 0b1, + (outs VPR128:$Rd), (ins neon_uimm64_mask:$Imm), + "movi\t $Rd.2d, $Imm", + [(set (v2i64 VPR128:$Rd), + (v2i64 (Neon_movi (timm:$Imm), (i32 imm))))], + NoItinerary> { + let cmode = 0b1110; +} +} + +// Vector Move Immediate - bytemask, one doubleword + +let isReMaterializable = 1 in { +def MOVIdi : NeonI_1VModImm<0b0, 0b1, + (outs FPR64:$Rd), (ins neon_uimm64_mask:$Imm), + "movi\t $Rd, $Imm", + [(set (v1i64 FPR64:$Rd), + (v1i64 (Neon_movi (timm:$Imm), (i32 imm))))], + NoItinerary> { + let cmode = 0b1110; +} +} + +// Vector Floating Point Move Immediate + +class NeonI_FMOV_impl + : NeonI_1VModImm { + let cmode = 0b1111; + } + +let isReMaterializable = 1 in { +def FMOVvi_2S : NeonI_FMOV_impl<".2s", VPR64, v2f32, fmov32_operand, 0b0, 0b0>; +def FMOVvi_4S : NeonI_FMOV_impl<".4s", VPR128, v4f32, fmov32_operand, 0b1, 0b0>; +def FMOVvi_2D : NeonI_FMOV_impl<".2d", VPR128, v2f64, fmov64_operand, 0b1, 0b1>; +} + +// Vector Shift (Immediate) +// Immediate in [0, 63] +def imm0_63 : Operand { + let ParserMatchClass = uimm6_asmoperand; +} + +// Shift Right/Left Immediate - The immh:immb field of these shifts are encoded +// as follows: +// +// Offset Encoding +// 8 immh:immb<6:3> = '0001xxx', is encoded in immh:immb<2:0> +// 16 immh:immb<6:4> = '001xxxx', is encoded in immh:immb<3:0> +// 32 immh:immb<6:5> = '01xxxxx', is encoded in immh:immb<4:0> +// 64 immh:immb<6> = '1xxxxxx', is encoded in immh:immb<5:0> +// +// The shift right immediate amount, in the range 1 to element bits, is computed +// as Offset - UInt(immh:immb). The shift left immediate amount, in the range 0 +// to element bits - 1, is computed as UInt(immh:immb) - Offset. + +class shr_imm_asmoperands : AsmOperandClass { + let Name = "ShrImm" # OFFSET; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "ShrImm" # OFFSET; +} + +class shr_imm : Operand { + let EncoderMethod = "getShiftRightImm" # OFFSET; + let DecoderMethod = "DecodeShiftRightImm" # OFFSET; + let ParserMatchClass = + !cast("shr_imm" # OFFSET # "_asmoperand"); +} + +def shr_imm8_asmoperand : shr_imm_asmoperands<"8">; +def shr_imm16_asmoperand : shr_imm_asmoperands<"16">; +def shr_imm32_asmoperand : shr_imm_asmoperands<"32">; +def shr_imm64_asmoperand : shr_imm_asmoperands<"64">; + +def shr_imm8 : shr_imm<"8">, ImmLeaf 0 && Imm <= 8;}]>; +def shr_imm16 : shr_imm<"16">, ImmLeaf 0 && Imm <= 16;}]>; +def shr_imm32 : shr_imm<"32">, ImmLeaf 0 && Imm <= 32;}]>; +def shr_imm64 : shr_imm<"64">, ImmLeaf 0 && Imm <= 64;}]>; + +class shl_imm_asmoperands : AsmOperandClass { + let Name = "ShlImm" # OFFSET; + let RenderMethod = "addImmOperands"; + let DiagnosticType = "ShlImm" # OFFSET; +} + +class shl_imm : Operand { + let EncoderMethod = "getShiftLeftImm" # OFFSET; + let DecoderMethod = "DecodeShiftLeftImm" # OFFSET; + let ParserMatchClass = + !cast("shl_imm" # OFFSET # "_asmoperand"); +} + +def shl_imm8_asmoperand : shl_imm_asmoperands<"8">; +def shl_imm16_asmoperand : shl_imm_asmoperands<"16">; +def shl_imm32_asmoperand : shl_imm_asmoperands<"32">; +def shl_imm64_asmoperand : shl_imm_asmoperands<"64">; + +def shl_imm8 : shl_imm<"8">, ImmLeaf= 0 && Imm < 8;}]>; +def shl_imm16 : shl_imm<"16">, ImmLeaf= 0 && Imm < 16;}]>; +def shl_imm32 : shl_imm<"32">, ImmLeaf= 0 && Imm < 32;}]>; +def shl_imm64 : shl_imm<"64">, ImmLeaf= 0 && Imm < 64;}]>; + +class N2VShift opcode, string asmop, string T, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, SDNode OpNode> + : NeonI_2VShiftImm; + +multiclass NeonI_N2VShL opcode, string asmop> { + // 64-bit vector types. + def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, shl> { + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, shl> { + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, shl> { + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + // 128-bit vector types. + def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, shl> { + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, shl> { + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, shl> { + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, shl> { + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + } +} + +multiclass NeonI_N2VShR opcode, string asmop, SDNode OpNode> { + def _8B : N2VShift<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShift<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShift<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShift<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShift<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShift<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShift<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Shift left +defm SHLvvi : NeonI_N2VShL<0b0, 0b01010, "shl">; + +// Shift right +defm SSHRvvi : NeonI_N2VShR<0b0, 0b00000, "sshr", sra>; +defm USHRvvi : NeonI_N2VShR<0b1, 0b00000, "ushr", srl>; + +def Neon_High16B : PatFrag<(ops node:$in), + (extract_subvector (v16i8 node:$in), (iPTR 8))>; +def Neon_High8H : PatFrag<(ops node:$in), + (extract_subvector (v8i16 node:$in), (iPTR 4))>; +def Neon_High4S : PatFrag<(ops node:$in), + (extract_subvector (v4i32 node:$in), (iPTR 2))>; +def Neon_High2D : PatFrag<(ops node:$in), + (extract_subvector (v2i64 node:$in), (iPTR 1))>; +def Neon_High4float : PatFrag<(ops node:$in), + (extract_subvector (v4f32 node:$in), (iPTR 2))>; +def Neon_High2double : PatFrag<(ops node:$in), + (extract_subvector (v2f64 node:$in), (iPTR 1))>; + +def Neon_Low16B : PatFrag<(ops node:$in), + (v8i8 (extract_subvector (v16i8 node:$in), + (iPTR 0)))>; +def Neon_Low8H : PatFrag<(ops node:$in), + (v4i16 (extract_subvector (v8i16 node:$in), + (iPTR 0)))>; +def Neon_Low4S : PatFrag<(ops node:$in), + (v2i32 (extract_subvector (v4i32 node:$in), + (iPTR 0)))>; +def Neon_Low2D : PatFrag<(ops node:$in), + (v1i64 (extract_subvector (v2i64 node:$in), + (iPTR 0)))>; +def Neon_Low4float : PatFrag<(ops node:$in), + (v2f32 (extract_subvector (v4f32 node:$in), + (iPTR 0)))>; +def Neon_Low2double : PatFrag<(ops node:$in), + (v1f64 (extract_subvector (v2f64 node:$in), + (iPTR 0)))>; + +class N2VShiftLong opcode, string asmop, string DestT, + string SrcT, ValueType DestTy, ValueType SrcTy, + Operand ImmTy, SDPatternOperator ExtOp> + : NeonI_2VShiftImm; + +class N2VShiftLongHigh opcode, string asmop, string DestT, + string SrcT, ValueType DestTy, ValueType SrcTy, + int StartIndex, Operand ImmTy, + SDPatternOperator ExtOp, PatFrag getTop> + : NeonI_2VShiftImm; + +multiclass NeonI_N2VShLL opcode, string asmop, + SDNode ExtOp> { + // 64-bit vector types. + def _8B : N2VShiftLong<0b0, u, opcode, asmop, "8h", "8b", v8i16, v8i8, + shl_imm8, ExtOp> { + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _4H : N2VShiftLong<0b0, u, opcode, asmop, "4s", "4h", v4i32, v4i16, + shl_imm16, ExtOp> { + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _2S : N2VShiftLong<0b0, u, opcode, asmop, "2d", "2s", v2i64, v2i32, + shl_imm32, ExtOp> { + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + // 128-bit vector types + def _16B : N2VShiftLongHigh<0b1, u, opcode, asmop, "8h", "16b", v8i16, v8i8, + 8, shl_imm8, ExtOp, Neon_High16B> { + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + } + + def _8H : N2VShiftLongHigh<0b1, u, opcode, asmop, "4s", "8h", v4i32, v4i16, + 4, shl_imm16, ExtOp, Neon_High8H> { + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + } + + def _4S : N2VShiftLongHigh<0b1, u, opcode, asmop, "2d", "4s", v2i64, v2i32, + 2, shl_imm32, ExtOp, Neon_High4S> { + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + } + + // Use other patterns to match when the immediate is 0. + def : Pat<(v8i16 (ExtOp (v8i8 VPR64:$Rn))), + (!cast(prefix # "_8B") VPR64:$Rn, 0)>; + + def : Pat<(v4i32 (ExtOp (v4i16 VPR64:$Rn))), + (!cast(prefix # "_4H") VPR64:$Rn, 0)>; + + def : Pat<(v2i64 (ExtOp (v2i32 VPR64:$Rn))), + (!cast(prefix # "_2S") VPR64:$Rn, 0)>; + + def : Pat<(v8i16 (ExtOp (v8i8 (Neon_High16B VPR128:$Rn)))), + (!cast(prefix # "_16B") VPR128:$Rn, 0)>; + + def : Pat<(v4i32 (ExtOp (v4i16 (Neon_High8H VPR128:$Rn)))), + (!cast(prefix # "_8H") VPR128:$Rn, 0)>; + + def : Pat<(v2i64 (ExtOp (v2i32 (Neon_High4S VPR128:$Rn)))), + (!cast(prefix # "_4S") VPR128:$Rn, 0)>; +} + +// Shift left long +defm SSHLLvvi : NeonI_N2VShLL<"SSHLLvvi", 0b0, 0b10100, "sshll", sext>; +defm USHLLvvi : NeonI_N2VShLL<"USHLLvvi", 0b1, 0b10100, "ushll", zext>; + +// Rounding/Saturating shift +class N2VShift_RQ opcode, string asmop, string T, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, + SDPatternOperator OpNode> + : NeonI_2VShiftImm; + +// shift right (vector by immediate) +multiclass NeonI_N2VShR_RQ opcode, string asmop, + SDPatternOperator OpNode> { + def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +multiclass NeonI_N2VShL_Q opcode, string asmop, + SDPatternOperator OpNode> { + // 64-bit vector types. + def _8B : N2VShift_RQ<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShift_RQ<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShift_RQ<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + // 128-bit vector types. + def _16B : N2VShift_RQ<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShift_RQ<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShift_RQ<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShift_RQ<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Rounding shift right +defm SRSHRvvi : NeonI_N2VShR_RQ<0b0, 0b00100, "srshr", + int_aarch64_neon_vsrshr>; +defm URSHRvvi : NeonI_N2VShR_RQ<0b1, 0b00100, "urshr", + int_aarch64_neon_vurshr>; + +// Saturating shift left unsigned +defm SQSHLUvvi : NeonI_N2VShL_Q<0b1, 0b01100, "sqshlu", int_aarch64_neon_vsqshlu>; + +// Saturating shift left +defm SQSHLvvi : NeonI_N2VShL_Q<0b0, 0b01110, "sqshl", Neon_sqrshlImm>; +defm UQSHLvvi : NeonI_N2VShL_Q<0b1, 0b01110, "uqshl", Neon_uqrshlImm>; + +class N2VShiftAdd opcode, string asmop, string T, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, + SDNode OpNode> + : NeonI_2VShiftImm { + let Constraints = "$src = $Rd"; +} + +// Shift Right accumulate +multiclass NeonI_N2VShRAdd opcode, string asmop, SDNode OpNode> { + def _8B : N2VShiftAdd<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftAdd<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftAdd<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShiftAdd<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftAdd<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftAdd<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftAdd<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Shift right and accumulate +defm SSRAvvi : NeonI_N2VShRAdd<0, 0b00010, "ssra", sra>; +defm USRAvvi : NeonI_N2VShRAdd<1, 0b00010, "usra", srl>; + +// Rounding shift accumulate +class N2VShiftAdd_R opcode, string asmop, string T, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, + SDPatternOperator OpNode> + : NeonI_2VShiftImm { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_N2VShRAdd_R opcode, string asmop, + SDPatternOperator OpNode> { + def _8B : N2VShiftAdd_R<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftAdd_R<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftAdd_R<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _16B : N2VShiftAdd_R<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + OpNode> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftAdd_R<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + OpNode> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftAdd_R<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + OpNode> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftAdd_R<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + OpNode> { + let Inst{22} = 0b1; + } +} + +// Rounding shift right and accumulate +defm SRSRAvvi : NeonI_N2VShRAdd_R<0, 0b00110, "srsra", int_aarch64_neon_vsrshr>; +defm URSRAvvi : NeonI_N2VShRAdd_R<1, 0b00110, "ursra", int_aarch64_neon_vurshr>; + +// Shift insert by immediate +class N2VShiftIns opcode, string asmop, string T, + RegisterOperand VPRC, ValueType Ty, Operand ImmTy, + SDPatternOperator OpNode> + : NeonI_2VShiftImm { + let Constraints = "$src = $Rd"; +} + +// shift left insert (vector by immediate) +multiclass NeonI_N2VShLIns opcode, string asmop> { + def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shl_imm8, + int_aarch64_neon_vsli> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shl_imm16, + int_aarch64_neon_vsli> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shl_imm32, + int_aarch64_neon_vsli> { + let Inst{22-21} = 0b01; + } + + // 128-bit vector types + def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shl_imm8, + int_aarch64_neon_vsli> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shl_imm16, + int_aarch64_neon_vsli> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shl_imm32, + int_aarch64_neon_vsli> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shl_imm64, + int_aarch64_neon_vsli> { + let Inst{22} = 0b1; + } +} + +// shift right insert (vector by immediate) +multiclass NeonI_N2VShRIns opcode, string asmop> { + // 64-bit vector types. + def _8B : N2VShiftIns<0b0, u, opcode, asmop, "8b", VPR64, v8i8, shr_imm8, + int_aarch64_neon_vsri> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShiftIns<0b0, u, opcode, asmop, "4h", VPR64, v4i16, shr_imm16, + int_aarch64_neon_vsri> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShiftIns<0b0, u, opcode, asmop, "2s", VPR64, v2i32, shr_imm32, + int_aarch64_neon_vsri> { + let Inst{22-21} = 0b01; + } + + // 128-bit vector types + def _16B : N2VShiftIns<0b1, u, opcode, asmop, "16b", VPR128, v16i8, shr_imm8, + int_aarch64_neon_vsri> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShiftIns<0b1, u, opcode, asmop, "8h", VPR128, v8i16, shr_imm16, + int_aarch64_neon_vsri> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShiftIns<0b1, u, opcode, asmop, "4s", VPR128, v4i32, shr_imm32, + int_aarch64_neon_vsri> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VShiftIns<0b1, u, opcode, asmop, "2d", VPR128, v2i64, shr_imm64, + int_aarch64_neon_vsri> { + let Inst{22} = 0b1; + } +} + +// Shift left and insert +defm SLIvvi : NeonI_N2VShLIns<0b1, 0b01010, "sli">; + +// Shift right and insert +defm SRIvvi : NeonI_N2VShRIns<0b1, 0b01000, "sri">; + +class N2VShR_Narrow opcode, string asmop, string DestT, + string SrcT, Operand ImmTy> + : NeonI_2VShiftImm; + +class N2VShR_Narrow_Hi opcode, string asmop, string DestT, + string SrcT, Operand ImmTy> + : NeonI_2VShiftImm { + let Constraints = "$src = $Rd"; +} + +// left long shift by immediate +multiclass NeonI_N2VShR_Narrow opcode, string asmop> { + def _8B : N2VShR_Narrow<0b0, u, opcode, asmop, "8b", "8h", shr_imm8> { + let Inst{22-19} = 0b0001; + } + + def _4H : N2VShR_Narrow<0b0, u, opcode, asmop, "4h", "4s", shr_imm16> { + let Inst{22-20} = 0b001; + } + + def _2S : N2VShR_Narrow<0b0, u, opcode, asmop, "2s", "2d", shr_imm32> { + let Inst{22-21} = 0b01; + } + + // Shift Narrow High + def _16B : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "16b", "8h", + shr_imm8> { + let Inst{22-19} = 0b0001; + } + + def _8H : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "8h", "4s", + shr_imm16> { + let Inst{22-20} = 0b001; + } + + def _4S : N2VShR_Narrow_Hi<0b1, u, opcode, asmop # "2", "4s", "2d", + shr_imm32> { + let Inst{22-21} = 0b01; + } +} + +// Shift right narrow +defm SHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10000, "shrn">; + +// Shift right narrow (prefix Q is saturating, prefix R is rounding) +defm QSHRUNvvi :NeonI_N2VShR_Narrow<0b1, 0b10000, "sqshrun">; +defm RSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10001, "rshrn">; +defm QRSHRUNvvi : NeonI_N2VShR_Narrow<0b1, 0b10001, "sqrshrun">; +defm SQSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10010, "sqshrn">; +defm UQSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10010, "uqshrn">; +defm SQRSHRNvvi : NeonI_N2VShR_Narrow<0b0, 0b10011, "sqrshrn">; +defm UQRSHRNvvi : NeonI_N2VShR_Narrow<0b1, 0b10011, "uqrshrn">; + +def Neon_combine_2D : PatFrag<(ops node:$Rm, node:$Rn), + (v2i64 (concat_vectors (v1i64 node:$Rm), + (v1i64 node:$Rn)))>; +def Neon_combine_8H : PatFrag<(ops node:$Rm, node:$Rn), + (v8i16 (concat_vectors (v4i16 node:$Rm), + (v4i16 node:$Rn)))>; +def Neon_combine_4S : PatFrag<(ops node:$Rm, node:$Rn), + (v4i32 (concat_vectors (v2i32 node:$Rm), + (v2i32 node:$Rn)))>; +def Neon_combine_4f : PatFrag<(ops node:$Rm, node:$Rn), + (v4f32 (concat_vectors (v2f32 node:$Rm), + (v2f32 node:$Rn)))>; +def Neon_combine_2d : PatFrag<(ops node:$Rm, node:$Rn), + (v2f64 (concat_vectors (v1f64 node:$Rm), + (v1f64 node:$Rn)))>; + +def Neon_lshrImm8H : PatFrag<(ops node:$lhs, node:$rhs), + (v8i16 (srl (v8i16 node:$lhs), + (v8i16 (Neon_vdup (i32 node:$rhs)))))>; +def Neon_lshrImm4S : PatFrag<(ops node:$lhs, node:$rhs), + (v4i32 (srl (v4i32 node:$lhs), + (v4i32 (Neon_vdup (i32 node:$rhs)))))>; +def Neon_lshrImm2D : PatFrag<(ops node:$lhs, node:$rhs), + (v2i64 (srl (v2i64 node:$lhs), + (v2i64 (Neon_vdup (i32 node:$rhs)))))>; +def Neon_ashrImm8H : PatFrag<(ops node:$lhs, node:$rhs), + (v8i16 (sra (v8i16 node:$lhs), + (v8i16 (Neon_vdup (i32 node:$rhs)))))>; +def Neon_ashrImm4S : PatFrag<(ops node:$lhs, node:$rhs), + (v4i32 (sra (v4i32 node:$lhs), + (v4i32 (Neon_vdup (i32 node:$rhs)))))>; +def Neon_ashrImm2D : PatFrag<(ops node:$lhs, node:$rhs), + (v2i64 (sra (v2i64 node:$lhs), + (v2i64 (Neon_vdup (i32 node:$rhs)))))>; + +// Normal shift right narrow is matched by IR (srl/sra, trunc, concat_vectors) +multiclass Neon_shiftNarrow_patterns { + def : Pat<(v8i8 (trunc (!cast("Neon_" # shr # "Imm8H") VPR128:$Rn, + (i32 shr_imm8:$Imm)))), + (SHRNvvi_8B VPR128:$Rn, imm:$Imm)>; + def : Pat<(v4i16 (trunc (!cast("Neon_" # shr # "Imm4S") VPR128:$Rn, + (i32 shr_imm16:$Imm)))), + (SHRNvvi_4H VPR128:$Rn, imm:$Imm)>; + def : Pat<(v2i32 (trunc (!cast("Neon_" # shr # "Imm2D") VPR128:$Rn, + (i32 shr_imm32:$Imm)))), + (SHRNvvi_2S VPR128:$Rn, imm:$Imm)>; + + def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert + (v8i8 (trunc (!cast("Neon_" # shr # "Imm8H") + VPR128:$Rn, (i32 shr_imm8:$Imm))))))), + (SHRNvvi_16B (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), + VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert + (v4i16 (trunc (!cast("Neon_" # shr # "Imm4S") + VPR128:$Rn, (i32 shr_imm16:$Imm))))))), + (SHRNvvi_8H (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), (v1i64 (bitconvert + (v2i32 (trunc (!cast("Neon_" # shr # "Imm2D") + VPR128:$Rn, (i32 shr_imm32:$Imm))))))), + (SHRNvvi_4S (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; +} + +multiclass Neon_shiftNarrow_QR_patterns { + def : Pat<(v8i8 (op (v8i16 VPR128:$Rn), shr_imm8:$Imm)), + (!cast(prefix # "_8B") VPR128:$Rn, imm:$Imm)>; + def : Pat<(v4i16 (op (v4i32 VPR128:$Rn), shr_imm16:$Imm)), + (!cast(prefix # "_4H") VPR128:$Rn, imm:$Imm)>; + def : Pat<(v2i32 (op (v2i64 VPR128:$Rn), shr_imm32:$Imm)), + (!cast(prefix # "_2S") VPR128:$Rn, imm:$Imm)>; + + def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), + (v1i64 (bitconvert (v8i8 + (op (v8i16 VPR128:$Rn), shr_imm8:$Imm))))), + (!cast(prefix # "_16B") + (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), + (v1i64 (bitconvert (v4i16 + (op (v4i32 VPR128:$Rn), shr_imm16:$Imm))))), + (!cast(prefix # "_8H") + (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; + def : Pat<(Neon_combine_2D (v1i64 VPR64:$src), + (v1i64 (bitconvert (v2i32 + (op (v2i64 VPR128:$Rn), shr_imm32:$Imm))))), + (!cast(prefix # "_4S") + (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, imm:$Imm)>; +} + +defm : Neon_shiftNarrow_patterns<"lshr">; +defm : Neon_shiftNarrow_patterns<"ashr">; + +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; +defm : Neon_shiftNarrow_QR_patterns; + +// Convert fix-point and float-pointing +class N2VCvt_Fx opcode, string asmop, string T, + RegisterOperand VPRC, ValueType DestTy, ValueType SrcTy, + Operand ImmTy, SDPatternOperator IntOp> + : NeonI_2VShiftImm; + +multiclass NeonI_N2VCvt_Fx2fp opcode, string asmop, + SDPatternOperator IntOp> { + def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2f32, v2i32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4f32, v4i32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2f64, v2i64, + shr_imm64, IntOp> { + let Inst{22} = 0b1; + } +} + +multiclass NeonI_N2VCvt_Fp2fx opcode, string asmop, + SDPatternOperator IntOp> { + def _2S : N2VCvt_Fx<0, u, opcode, asmop, "2s", VPR64, v2i32, v2f32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _4S : N2VCvt_Fx<1, u, opcode, asmop, "4s", VPR128, v4i32, v4f32, + shr_imm32, IntOp> { + let Inst{22-21} = 0b01; + } + + def _2D : N2VCvt_Fx<1, u, opcode, asmop, "2d", VPR128, v2i64, v2f64, + shr_imm64, IntOp> { + let Inst{22} = 0b1; + } +} + +// Convert fixed-point to floating-point +defm VCVTxs2f : NeonI_N2VCvt_Fx2fp<0, 0b11100, "scvtf", + int_arm_neon_vcvtfxs2fp>; +defm VCVTxu2f : NeonI_N2VCvt_Fx2fp<1, 0b11100, "ucvtf", + int_arm_neon_vcvtfxu2fp>; + +// Convert floating-point to fixed-point +defm VCVTf2xs : NeonI_N2VCvt_Fp2fx<0, 0b11111, "fcvtzs", + int_arm_neon_vcvtfp2fxs>; +defm VCVTf2xu : NeonI_N2VCvt_Fp2fx<1, 0b11111, "fcvtzu", + int_arm_neon_vcvtfp2fxu>; + +multiclass Neon_sshll2_0 +{ + def _v8i8 : PatFrag<(ops node:$Rn), + (v8i16 (ext (v8i8 (Neon_High16B node:$Rn))))>; + def _v4i16 : PatFrag<(ops node:$Rn), + (v4i32 (ext (v4i16 (Neon_High8H node:$Rn))))>; + def _v2i32 : PatFrag<(ops node:$Rn), + (v2i64 (ext (v2i32 (Neon_High4S node:$Rn))))>; +} + +defm NI_sext_high : Neon_sshll2_0; +defm NI_zext_high : Neon_sshll2_0; + + +//===----------------------------------------------------------------------===// +// Multiclasses for NeonI_Across +//===----------------------------------------------------------------------===// + +// Variant 1 + +multiclass NeonI_2VAcross_1 opcode, + string asmop, SDPatternOperator opnode> +{ + def _1h8b: NeonI_2VAcross<0b0, u, 0b00, opcode, + (outs FPR16:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd, $Rn.8b", + [(set (v1i16 FPR16:$Rd), + (v1i16 (opnode (v8i8 VPR64:$Rn))))], + NoItinerary>; + + def _1h16b: NeonI_2VAcross<0b1, u, 0b00, opcode, + (outs FPR16:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd, $Rn.16b", + [(set (v1i16 FPR16:$Rd), + (v1i16 (opnode (v16i8 VPR128:$Rn))))], + NoItinerary>; + + def _1s4h: NeonI_2VAcross<0b0, u, 0b01, opcode, + (outs FPR32:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd, $Rn.4h", + [(set (v1i32 FPR32:$Rd), + (v1i32 (opnode (v4i16 VPR64:$Rn))))], + NoItinerary>; + + def _1s8h: NeonI_2VAcross<0b1, u, 0b01, opcode, + (outs FPR32:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd, $Rn.8h", + [(set (v1i32 FPR32:$Rd), + (v1i32 (opnode (v8i16 VPR128:$Rn))))], + NoItinerary>; + + // _1d2s doesn't exist! + + def _1d4s: NeonI_2VAcross<0b1, u, 0b10, opcode, + (outs FPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd, $Rn.4s", + [(set (v1i64 FPR64:$Rd), + (v1i64 (opnode (v4i32 VPR128:$Rn))))], + NoItinerary>; +} + +defm SADDLV : NeonI_2VAcross_1<0b0, 0b00011, "saddlv", int_aarch64_neon_saddlv>; +defm UADDLV : NeonI_2VAcross_1<0b1, 0b00011, "uaddlv", int_aarch64_neon_uaddlv>; + +// Variant 2 + +multiclass NeonI_2VAcross_2 opcode, + string asmop, SDPatternOperator opnode> +{ + def _1b8b: NeonI_2VAcross<0b0, u, 0b00, opcode, + (outs FPR8:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd, $Rn.8b", + [(set (v1i8 FPR8:$Rd), + (v1i8 (opnode (v8i8 VPR64:$Rn))))], + NoItinerary>; + + def _1b16b: NeonI_2VAcross<0b1, u, 0b00, opcode, + (outs FPR8:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd, $Rn.16b", + [(set (v1i8 FPR8:$Rd), + (v1i8 (opnode (v16i8 VPR128:$Rn))))], + NoItinerary>; + + def _1h4h: NeonI_2VAcross<0b0, u, 0b01, opcode, + (outs FPR16:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd, $Rn.4h", + [(set (v1i16 FPR16:$Rd), + (v1i16 (opnode (v4i16 VPR64:$Rn))))], + NoItinerary>; + + def _1h8h: NeonI_2VAcross<0b1, u, 0b01, opcode, + (outs FPR16:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd, $Rn.8h", + [(set (v1i16 FPR16:$Rd), + (v1i16 (opnode (v8i16 VPR128:$Rn))))], + NoItinerary>; + + // _1s2s doesn't exist! + + def _1s4s: NeonI_2VAcross<0b1, u, 0b10, opcode, + (outs FPR32:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd, $Rn.4s", + [(set (v1i32 FPR32:$Rd), + (v1i32 (opnode (v4i32 VPR128:$Rn))))], + NoItinerary>; +} + +defm SMAXV : NeonI_2VAcross_2<0b0, 0b01010, "smaxv", int_aarch64_neon_smaxv>; +defm UMAXV : NeonI_2VAcross_2<0b1, 0b01010, "umaxv", int_aarch64_neon_umaxv>; + +defm SMINV : NeonI_2VAcross_2<0b0, 0b11010, "sminv", int_aarch64_neon_sminv>; +defm UMINV : NeonI_2VAcross_2<0b1, 0b11010, "uminv", int_aarch64_neon_uminv>; + +defm ADDV : NeonI_2VAcross_2<0b0, 0b11011, "addv", int_aarch64_neon_vaddv>; + +// Variant 3 + +multiclass NeonI_2VAcross_3 opcode, bits<2> size, + string asmop, SDPatternOperator opnode> { + def _1s4s: NeonI_2VAcross<0b1, u, size, opcode, + (outs FPR32:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd, $Rn.4s", + [(set (v1f32 FPR32:$Rd), + (v1f32 (opnode (v4f32 VPR128:$Rn))))], + NoItinerary>; +} + +defm FMAXNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b00, "fmaxnmv", + int_aarch64_neon_vmaxnmv>; +defm FMINNMV : NeonI_2VAcross_3<0b1, 0b01100, 0b10, "fminnmv", + int_aarch64_neon_vminnmv>; + +defm FMAXV : NeonI_2VAcross_3<0b1, 0b01111, 0b00, "fmaxv", + int_aarch64_neon_vmaxv>; +defm FMINV : NeonI_2VAcross_3<0b1, 0b01111, 0b10, "fminv", + int_aarch64_neon_vminv>; + +// The followings are for instruction class (Perm) + +class NeonI_Permute size, bits<3> opcode, + string asmop, RegisterOperand OpVPR, string OpS, + SDPatternOperator opnode, ValueType Ty> + : NeonI_Perm; + +multiclass NeonI_Perm_pat opcode, string asmop, + SDPatternOperator opnode> { + def _8b : NeonI_Permute<0b0, 0b00, opcode, asmop, + VPR64, "8b", opnode, v8i8>; + def _16b : NeonI_Permute<0b1, 0b00, opcode, asmop, + VPR128, "16b",opnode, v16i8>; + def _4h : NeonI_Permute<0b0, 0b01, opcode, asmop, + VPR64, "4h", opnode, v4i16>; + def _8h : NeonI_Permute<0b1, 0b01, opcode, asmop, + VPR128, "8h", opnode, v8i16>; + def _2s : NeonI_Permute<0b0, 0b10, opcode, asmop, + VPR64, "2s", opnode, v2i32>; + def _4s : NeonI_Permute<0b1, 0b10, opcode, asmop, + VPR128, "4s", opnode, v4i32>; + def _2d : NeonI_Permute<0b1, 0b11, opcode, asmop, + VPR128, "2d", opnode, v2i64>; +} + +defm UZP1vvv : NeonI_Perm_pat<0b001, "uzp1", Neon_uzp1>; +defm TRN1vvv : NeonI_Perm_pat<0b010, "trn1", Neon_trn1>; +defm ZIP1vvv : NeonI_Perm_pat<0b011, "zip1", Neon_zip1>; +defm UZP2vvv : NeonI_Perm_pat<0b101, "uzp2", Neon_uzp2>; +defm TRN2vvv : NeonI_Perm_pat<0b110, "trn2", Neon_trn2>; +defm ZIP2vvv : NeonI_Perm_pat<0b111, "zip2", Neon_zip2>; + +multiclass NeonI_Perm_float_pat { + def : Pat<(v2f32 (opnode (v2f32 VPR64:$Rn), (v2f32 VPR64:$Rm))), + (!cast(INS # "_2s") VPR64:$Rn, VPR64:$Rm)>; + + def : Pat<(v4f32 (opnode (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rm))), + (!cast(INS # "_4s") VPR128:$Rn, VPR128:$Rm)>; + + def : Pat<(v2f64 (opnode (v2f64 VPR128:$Rn), (v2f64 VPR128:$Rm))), + (!cast(INS # "_2d") VPR128:$Rn, VPR128:$Rm)>; +} + +defm : NeonI_Perm_float_pat<"UZP1vvv", Neon_uzp1>; +defm : NeonI_Perm_float_pat<"UZP2vvv", Neon_uzp2>; +defm : NeonI_Perm_float_pat<"ZIP1vvv", Neon_zip1>; +defm : NeonI_Perm_float_pat<"ZIP2vvv", Neon_zip2>; +defm : NeonI_Perm_float_pat<"TRN1vvv", Neon_trn1>; +defm : NeonI_Perm_float_pat<"TRN2vvv", Neon_trn2>; + +// The followings are for instruction class (3V Diff) + +// normal long/long2 pattern +class NeonI_3VDL size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator ext, + RegisterOperand OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff; + +multiclass NeonI_3VDL_s opcode, + string asmop, SDPatternOperator opnode, + bit Commutable = 0> { + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, sext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, sext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, sext, VPR64, v2i64, v2i32>; + } +} + +multiclass NeonI_3VDL2_s opcode, string asmop, + SDPatternOperator opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; + } +} + +multiclass NeonI_3VDL_u opcode, string asmop, + SDPatternOperator opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, zext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDL<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, zext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDL<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, zext, VPR64, v2i64, v2i32>; + } +} + +multiclass NeonI_3VDL2_u opcode, string asmop, + SDPatternOperator opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDL<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; + } +} + +defm SADDLvvv : NeonI_3VDL_s<0b0, 0b0000, "saddl", add, 1>; +defm UADDLvvv : NeonI_3VDL_u<0b1, 0b0000, "uaddl", add, 1>; + +defm SADDL2vvv : NeonI_3VDL2_s<0b0, 0b0000, "saddl2", add, 1>; +defm UADDL2vvv : NeonI_3VDL2_u<0b1, 0b0000, "uaddl2", add, 1>; + +defm SSUBLvvv : NeonI_3VDL_s<0b0, 0b0010, "ssubl", sub, 0>; +defm USUBLvvv : NeonI_3VDL_u<0b1, 0b0010, "usubl", sub, 0>; + +defm SSUBL2vvv : NeonI_3VDL2_s<0b0, 0b0010, "ssubl2", sub, 0>; +defm USUBL2vvv : NeonI_3VDL2_u<0b1, 0b0010, "usubl2", sub, 0>; + +// normal wide/wide2 pattern +class NeonI_3VDW size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator ext, + RegisterOperand OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff; + +multiclass NeonI_3VDW_s opcode, string asmop, + SDPatternOperator opnode> { + def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, sext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, sext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, sext, VPR64, v2i64, v2i32>; +} + +defm SADDWvvv : NeonI_3VDW_s<0b0, 0b0001, "saddw", add>; +defm SSUBWvvv : NeonI_3VDW_s<0b0, 0b0011, "ssubw", sub>; + +multiclass NeonI_3VDW2_s opcode, string asmop, + SDPatternOperator opnode> { + def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_sext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_sext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_sext_high_v2i32, VPR128, v2i64, v4i32>; +} + +defm SADDW2vvv : NeonI_3VDW2_s<0b0, 0b0001, "saddw2", add>; +defm SSUBW2vvv : NeonI_3VDW2_s<0b0, 0b0011, "ssubw2", sub>; + +multiclass NeonI_3VDW_u opcode, string asmop, + SDPatternOperator opnode> { + def _8h8b : NeonI_3VDW<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, zext, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VDW<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, zext, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDW<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, zext, VPR64, v2i64, v2i32>; +} + +defm UADDWvvv : NeonI_3VDW_u<0b1, 0b0001, "uaddw", add>; +defm USUBWvvv : NeonI_3VDW_u<0b1, 0b0011, "usubw", sub>; + +multiclass NeonI_3VDW2_u opcode, string asmop, + SDPatternOperator opnode> { + def _8h16b : NeonI_3VDW<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, NI_zext_high_v8i8, VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDW<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_zext_high_v4i16, VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDW<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_zext_high_v2i32, VPR128, v2i64, v4i32>; +} + +defm UADDW2vvv : NeonI_3VDW2_u<0b1, 0b0001, "uaddw2", add>; +defm USUBW2vvv : NeonI_3VDW2_u<0b1, 0b0011, "usubw2", sub>; + +// Get the high half part of the vector element. +multiclass NeonI_get_high { + def _8h : PatFrag<(ops node:$Rn), + (v8i8 (trunc (v8i16 (srl (v8i16 node:$Rn), + (v8i16 (Neon_vdup (i32 8)))))))>; + def _4s : PatFrag<(ops node:$Rn), + (v4i16 (trunc (v4i32 (srl (v4i32 node:$Rn), + (v4i32 (Neon_vdup (i32 16)))))))>; + def _2d : PatFrag<(ops node:$Rn), + (v2i32 (trunc (v2i64 (srl (v2i64 node:$Rn), + (v2i64 (Neon_vdup (i32 32)))))))>; +} + +defm NI_get_hi : NeonI_get_high; + +// pattern for addhn/subhn with 2 operands +class NeonI_3VDN_addhn_2Op size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator get_hi, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff; + +multiclass NeonI_3VDN_addhn_2Op opcode, string asmop, + SDPatternOperator opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _8b8h : NeonI_3VDN_addhn_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", + opnode, NI_get_hi_8h, v8i8, v8i16>; + def _4h4s : NeonI_3VDN_addhn_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", + opnode, NI_get_hi_4s, v4i16, v4i32>; + def _2s2d : NeonI_3VDN_addhn_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", + opnode, NI_get_hi_2d, v2i32, v2i64>; + } +} + +defm ADDHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0100, "addhn", add, 1>; +defm SUBHNvvv : NeonI_3VDN_addhn_2Op<0b0, 0b0110, "subhn", sub, 0>; + +// pattern for operation with 2 operands +class NeonI_3VD_2Op size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + RegisterOperand ResVPR, RegisterOperand OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff; + +// normal narrow pattern +multiclass NeonI_3VDN_2Op opcode, string asmop, + SDPatternOperator opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _8b8h : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8b", "8h", + opnode, VPR64, VPR128, v8i8, v8i16>; + def _4h4s : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4h", "4s", + opnode, VPR64, VPR128, v4i16, v4i32>; + def _2s2d : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2s", "2d", + opnode, VPR64, VPR128, v2i32, v2i64>; + } +} + +defm RADDHNvvv : NeonI_3VDN_2Op<0b1, 0b0100, "raddhn", int_arm_neon_vraddhn, 1>; +defm RSUBHNvvv : NeonI_3VDN_2Op<0b1, 0b0110, "rsubhn", int_arm_neon_vrsubhn, 0>; + +// pattern for acle intrinsic with 3 operands +class NeonI_3VDN_3Op size, bits<4> opcode, + string asmop, string ResS, string OpS> + : NeonI_3VDiff { + let Constraints = "$src = $Rd"; + let neverHasSideEffects = 1; +} + +multiclass NeonI_3VDN_3Op_v1 opcode, string asmop> { + def _16b8h : NeonI_3VDN_3Op<0b1, u, 0b00, opcode, asmop, "16b", "8h">; + def _8h4s : NeonI_3VDN_3Op<0b1, u, 0b01, opcode, asmop, "8h", "4s">; + def _4s2d : NeonI_3VDN_3Op<0b1, u, 0b10, opcode, asmop, "4s", "2d">; +} + +defm ADDHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0100, "addhn2">; +defm SUBHN2vvv : NeonI_3VDN_3Op_v1<0b0, 0b0110, "subhn2">; + +defm RADDHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0100, "raddhn2">; +defm RSUBHN2vvv : NeonI_3VDN_3Op_v1<0b1, 0b0110, "rsubhn2">; + +// Patterns have to be separate because there's a SUBREG_TO_REG in the output +// part. +class NarrowHighHalfPat + : Pat<(Neon_combine_2D (v1i64 VPR64:$src), + (v1i64 (bitconvert (DstTy (coreop (SrcTy VPR128:$Rn), + (SrcTy VPR128:$Rm)))))), + (INST (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + VPR128:$Rn, VPR128:$Rm)>; + +// addhn2 patterns +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; + +// subhn2 patterns +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; +def : NarrowHighHalfPat>; + +// raddhn2 patterns +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; + +// rsubhn2 patterns +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; +def : NarrowHighHalfPat; + +// pattern that need to extend result +class NeonI_3VDL_Ext size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + RegisterOperand OpVPR, + ValueType ResTy, ValueType OpTy, ValueType OpSTy> + : NeonI_3VDiff; + +multiclass NeonI_3VDL_zext opcode, string asmop, + SDPatternOperator opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL_Ext<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, VPR64, v8i16, v8i8, v8i8>; + def _4s4h : NeonI_3VDL_Ext<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, VPR64, v4i32, v4i16, v4i16>; + def _2d2s : NeonI_3VDL_Ext<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, VPR64, v2i64, v2i32, v2i32>; + } +} + +defm SABDLvvv : NeonI_3VDL_zext<0b0, 0b0111, "sabdl", int_arm_neon_vabds, 1>; +defm UABDLvvv : NeonI_3VDL_zext<0b1, 0b0111, "uabdl", int_arm_neon_vabdu, 1>; + +multiclass NeonI_Op_High { + def _16B : PatFrag<(ops node:$Rn, node:$Rm), + (op (v8i8 (Neon_High16B node:$Rn)), + (v8i8 (Neon_High16B node:$Rm)))>; + def _8H : PatFrag<(ops node:$Rn, node:$Rm), + (op (v4i16 (Neon_High8H node:$Rn)), + (v4i16 (Neon_High8H node:$Rm)))>; + def _4S : PatFrag<(ops node:$Rn, node:$Rm), + (op (v2i32 (Neon_High4S node:$Rn)), + (v2i32 (Neon_High4S node:$Rm)))>; +} + +defm NI_sabdl_hi : NeonI_Op_High; +defm NI_uabdl_hi : NeonI_Op_High; +defm NI_smull_hi : NeonI_Op_High; +defm NI_umull_hi : NeonI_Op_High; +defm NI_qdmull_hi : NeonI_Op_High; +defm NI_pmull_hi : NeonI_Op_High; + +multiclass NeonI_3VDL_Abd_u opcode, string asmop, string opnode, + bit Commutable = 0> { + let isCommutable = Commutable in { + def _8h8b : NeonI_3VDL_Ext<0b1, u, 0b00, opcode, asmop, "8h", "16b", + !cast(opnode # "_16B"), + VPR128, v8i16, v16i8, v8i8>; + def _4s4h : NeonI_3VDL_Ext<0b1, u, 0b01, opcode, asmop, "4s", "8h", + !cast(opnode # "_8H"), + VPR128, v4i32, v8i16, v4i16>; + def _2d2s : NeonI_3VDL_Ext<0b1, u, 0b10, opcode, asmop, "2d", "4s", + !cast(opnode # "_4S"), + VPR128, v2i64, v4i32, v2i32>; + } +} + +defm SABDL2vvv : NeonI_3VDL_Abd_u<0b0, 0b0111, "sabdl2", "NI_sabdl_hi", 1>; +defm UABDL2vvv : NeonI_3VDL_Abd_u<0b1, 0b0111, "uabdl2", "NI_uabdl_hi", 1>; + +// For pattern that need two operators being chained. +class NeonI_3VDL_Aba size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, SDPatternOperator subop, + RegisterOperand OpVPR, + ValueType ResTy, ValueType OpTy, ValueType OpSTy> + : NeonI_3VDiff { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDL_Aba_v1 opcode, string asmop, + SDPatternOperator opnode, SDPatternOperator subop>{ + def _8h8b : NeonI_3VDL_Aba<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, subop, VPR64, v8i16, v8i8, v8i8>; + def _4s4h : NeonI_3VDL_Aba<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, subop, VPR64, v4i32, v4i16, v4i16>; + def _2d2s : NeonI_3VDL_Aba<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, subop, VPR64, v2i64, v2i32, v2i32>; +} + +defm SABALvvv : NeonI_3VDL_Aba_v1<0b0, 0b0101, "sabal", + add, int_arm_neon_vabds>; +defm UABALvvv : NeonI_3VDL_Aba_v1<0b1, 0b0101, "uabal", + add, int_arm_neon_vabdu>; + +multiclass NeonI_3VDL2_Aba_v1 opcode, string asmop, + SDPatternOperator opnode, string subop> { + def _8h8b : NeonI_3VDL_Aba<0b1, u, 0b00, opcode, asmop, "8h", "16b", + opnode, !cast(subop # "_16B"), + VPR128, v8i16, v16i8, v8i8>; + def _4s4h : NeonI_3VDL_Aba<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, !cast(subop # "_8H"), + VPR128, v4i32, v8i16, v4i16>; + def _2d2s : NeonI_3VDL_Aba<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, !cast(subop # "_4S"), + VPR128, v2i64, v4i32, v2i32>; +} + +defm SABAL2vvv : NeonI_3VDL2_Aba_v1<0b0, 0b0101, "sabal2", add, + "NI_sabdl_hi">; +defm UABAL2vvv : NeonI_3VDL2_Aba_v1<0b1, 0b0101, "uabal2", add, + "NI_uabdl_hi">; + +// Long pattern with 2 operands +multiclass NeonI_3VDL_2Op opcode, string asmop, + SDPatternOperator opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, VPR128, VPR64, v8i16, v8i8>; + def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, VPR128, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, VPR128, VPR64, v2i64, v2i32>; + } +} + +defm SMULLvvv : NeonI_3VDL_2Op<0b0, 0b1100, "smull", int_arm_neon_vmulls, 1>; +defm UMULLvvv : NeonI_3VDL_2Op<0b1, 0b1100, "umull", int_arm_neon_vmullu, 1>; + +class NeonI_3VDL2_2Op_mull size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff; + +multiclass NeonI_3VDL2_2Op_mull_v1 opcode, string asmop, + string opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", + !cast(opnode # "_16B"), + v8i16, v16i8>; + def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", + !cast(opnode # "_8H"), + v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", + !cast(opnode # "_4S"), + v2i64, v4i32>; + } +} + +defm SMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b0, 0b1100, "smull2", + "NI_smull_hi", 1>; +defm UMULL2vvv : NeonI_3VDL2_2Op_mull_v1<0b1, 0b1100, "umull2", + "NI_umull_hi", 1>; + +// Long pattern with 3 operands +class NeonI_3VDL_3Op size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator opnode, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDL_3Op_v1 opcode, string asmop, + SDPatternOperator opnode> { + def _8h8b : NeonI_3VDL_3Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, v8i16, v8i8>; + def _4s4h : NeonI_3VDL_3Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, v4i32, v4i16>; + def _2d2s : NeonI_3VDL_3Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, v2i64, v2i32>; +} + +def Neon_smlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), + (add node:$Rd, + (int_arm_neon_vmulls node:$Rn, node:$Rm))>; + +def Neon_umlal : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), + (add node:$Rd, + (int_arm_neon_vmullu node:$Rn, node:$Rm))>; + +def Neon_smlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), + (sub node:$Rd, + (int_arm_neon_vmulls node:$Rn, node:$Rm))>; + +def Neon_umlsl : PatFrag<(ops node:$Rd, node:$Rn, node:$Rm), + (sub node:$Rd, + (int_arm_neon_vmullu node:$Rn, node:$Rm))>; + +defm SMLALvvv : NeonI_3VDL_3Op_v1<0b0, 0b1000, "smlal", Neon_smlal>; +defm UMLALvvv : NeonI_3VDL_3Op_v1<0b1, 0b1000, "umlal", Neon_umlal>; + +defm SMLSLvvv : NeonI_3VDL_3Op_v1<0b0, 0b1010, "smlsl", Neon_smlsl>; +defm UMLSLvvv : NeonI_3VDL_3Op_v1<0b1, 0b1010, "umlsl", Neon_umlsl>; + +class NeonI_3VDL2_3Op_mlas size, bits<4> opcode, + string asmop, string ResS, string OpS, + SDPatternOperator subop, SDPatternOperator opnode, + RegisterOperand OpVPR, + ValueType ResTy, ValueType OpTy> + : NeonI_3VDiff { + let Constraints = "$src = $Rd"; +} + +multiclass NeonI_3VDL2_3Op_mlas_v1 opcode, string asmop, + SDPatternOperator subop, string opnode> { + def _8h16b : NeonI_3VDL2_3Op_mlas<0b1, u, 0b00, opcode, asmop, "8h", "16b", + subop, !cast(opnode # "_16B"), + VPR128, v8i16, v16i8>; + def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", + subop, !cast(opnode # "_8H"), + VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", + subop, !cast(opnode # "_4S"), + VPR128, v2i64, v4i32>; +} + +defm SMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1000, "smlal2", + add, "NI_smull_hi">; +defm UMLAL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1000, "umlal2", + add, "NI_umull_hi">; + +defm SMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b0, 0b1010, "smlsl2", + sub, "NI_smull_hi">; +defm UMLSL2vvv : NeonI_3VDL2_3Op_mlas_v1<0b1, 0b1010, "umlsl2", + sub, "NI_umull_hi">; + +multiclass NeonI_3VDL_qdmlal_3Op_v2 opcode, string asmop, + SDPatternOperator opnode> { + def _4s4h : NeonI_3VDL2_3Op_mlas<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, int_arm_neon_vqdmull, + VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VDL2_3Op_mlas<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, int_arm_neon_vqdmull, + VPR64, v2i64, v2i32>; +} + +defm SQDMLALvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1001, "sqdmlal", + int_arm_neon_vqadds>; +defm SQDMLSLvvv : NeonI_3VDL_qdmlal_3Op_v2<0b0, 0b1011, "sqdmlsl", + int_arm_neon_vqsubs>; + +multiclass NeonI_3VDL_v2 opcode, string asmop, + SDPatternOperator opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _4s4h : NeonI_3VD_2Op<0b0, u, 0b01, opcode, asmop, "4s", "4h", + opnode, VPR128, VPR64, v4i32, v4i16>; + def _2d2s : NeonI_3VD_2Op<0b0, u, 0b10, opcode, asmop, "2d", "2s", + opnode, VPR128, VPR64, v2i64, v2i32>; + } +} + +defm SQDMULLvvv : NeonI_3VDL_v2<0b0, 0b1101, "sqdmull", + int_arm_neon_vqdmull, 1>; + +multiclass NeonI_3VDL2_2Op_mull_v2 opcode, string asmop, + string opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _4s8h : NeonI_3VDL2_2Op_mull<0b1, u, 0b01, opcode, asmop, "4s", "8h", + !cast(opnode # "_8H"), + v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_2Op_mull<0b1, u, 0b10, opcode, asmop, "2d", "4s", + !cast(opnode # "_4S"), + v2i64, v4i32>; + } +} + +defm SQDMULL2vvv : NeonI_3VDL2_2Op_mull_v2<0b0, 0b1101, "sqdmull2", + "NI_qdmull_hi", 1>; + +multiclass NeonI_3VDL2_3Op_qdmlal_v2 opcode, string asmop, + SDPatternOperator opnode> { + def _4s8h : NeonI_3VDL2_3Op_mlas<0b1, u, 0b01, opcode, asmop, "4s", "8h", + opnode, NI_qdmull_hi_8H, + VPR128, v4i32, v8i16>; + def _2d4s : NeonI_3VDL2_3Op_mlas<0b1, u, 0b10, opcode, asmop, "2d", "4s", + opnode, NI_qdmull_hi_4S, + VPR128, v2i64, v4i32>; +} + +defm SQDMLAL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1001, "sqdmlal2", + int_arm_neon_vqadds>; +defm SQDMLSL2vvv : NeonI_3VDL2_3Op_qdmlal_v2<0b0, 0b1011, "sqdmlsl2", + int_arm_neon_vqsubs>; + +multiclass NeonI_3VDL_v3 opcode, string asmop, + SDPatternOperator opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _8h8b : NeonI_3VD_2Op<0b0, u, 0b00, opcode, asmop, "8h", "8b", + opnode, VPR128, VPR64, v8i16, v8i8>; + + def _1q1d : NeonI_3VDiff<0b0, u, 0b11, opcode, + (outs VPR128:$Rd), (ins VPR64:$Rn, VPR64:$Rm), + asmop # "\t$Rd.1q, $Rn.1d, $Rm.1d", + [], NoItinerary>; + } +} + +defm PMULLvvv : NeonI_3VDL_v3<0b0, 0b1110, "pmull", int_arm_neon_vmullp, 1>; + +multiclass NeonI_3VDL2_2Op_mull_v3 opcode, string asmop, + string opnode, bit Commutable = 0> { + let isCommutable = Commutable in { + def _8h16b : NeonI_3VDL2_2Op_mull<0b1, u, 0b00, opcode, asmop, "8h", "16b", + !cast(opnode # "_16B"), + v8i16, v16i8>; + + def _1q2d : NeonI_3VDiff<0b1, u, 0b11, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn, VPR128:$Rm), + asmop # "\t$Rd.1q, $Rn.2d, $Rm.2d", + [], NoItinerary>; + } +} + +defm PMULL2vvv : NeonI_3VDL2_2Op_mull_v3<0b0, 0b1110, "pmull2", "NI_pmull_hi", + 1>; + +// End of implementation for instruction class (3V Diff) + +// The followings are vector load/store multiple N-element structure +// (class SIMD lselem). + +// ld1: load multiple 1-element structure to 1/2/3/4 registers. +// ld2/ld3/ld4: load multiple N-element structure to N registers (N = 2, 3, 4). +// The structure consists of a sequence of sets of N values. +// The first element of the structure is placed in the first lane +// of the first first vector, the second element in the first lane +// of the second vector, and so on. +// E.g. LD1_3V_2S will load 32-bit elements {A, B, C, D, E, F} sequentially into +// the three 64-bit vectors list {BA, DC, FE}. +// E.g. LD3_2S will load 32-bit elements {A, B, C, D, E, F} into the three +// 64-bit vectors list {DA, EB, FC}. +// Store instructions store multiple structure to N registers like load. + + +class NeonI_LDVList opcode, bits<2> size, + RegisterOperand VecList, string asmop> + : NeonI_LdStMult { + let mayLoad = 1; + let neverHasSideEffects = 1; +} + +multiclass LDVList_BHSD opcode, string List, string asmop> { + def _8B : NeonI_LDVList<0, opcode, 0b00, + !cast(List # "8B_operand"), asmop>; + + def _4H : NeonI_LDVList<0, opcode, 0b01, + !cast(List # "4H_operand"), asmop>; + + def _2S : NeonI_LDVList<0, opcode, 0b10, + !cast(List # "2S_operand"), asmop>; + + def _16B : NeonI_LDVList<1, opcode, 0b00, + !cast(List # "16B_operand"), asmop>; + + def _8H : NeonI_LDVList<1, opcode, 0b01, + !cast(List # "8H_operand"), asmop>; + + def _4S : NeonI_LDVList<1, opcode, 0b10, + !cast(List # "4S_operand"), asmop>; + + def _2D : NeonI_LDVList<1, opcode, 0b11, + !cast(List # "2D_operand"), asmop>; +} + +// Load multiple N-element structure to N consecutive registers (N = 1,2,3,4) +defm LD1 : LDVList_BHSD<0b0111, "VOne", "ld1">; +def LD1_1D : NeonI_LDVList<0, 0b0111, 0b11, VOne1D_operand, "ld1">; + +defm LD2 : LDVList_BHSD<0b1000, "VPair", "ld2">; + +defm LD3 : LDVList_BHSD<0b0100, "VTriple", "ld3">; + +defm LD4 : LDVList_BHSD<0b0000, "VQuad", "ld4">; + +// Load multiple 1-element structure to N consecutive registers (N = 2,3,4) +defm LD1x2 : LDVList_BHSD<0b1010, "VPair", "ld1">; +def LD1x2_1D : NeonI_LDVList<0, 0b1010, 0b11, VPair1D_operand, "ld1">; + +defm LD1x3 : LDVList_BHSD<0b0110, "VTriple", "ld1">; +def LD1x3_1D : NeonI_LDVList<0, 0b0110, 0b11, VTriple1D_operand, "ld1">; + +defm LD1x4 : LDVList_BHSD<0b0010, "VQuad", "ld1">; +def LD1x4_1D : NeonI_LDVList<0, 0b0010, 0b11, VQuad1D_operand, "ld1">; + +class NeonI_STVList opcode, bits<2> size, + RegisterOperand VecList, string asmop> + : NeonI_LdStMult { + let mayStore = 1; + let neverHasSideEffects = 1; +} + +multiclass STVList_BHSD opcode, string List, string asmop> { + def _8B : NeonI_STVList<0, opcode, 0b00, + !cast(List # "8B_operand"), asmop>; + + def _4H : NeonI_STVList<0, opcode, 0b01, + !cast(List # "4H_operand"), asmop>; + + def _2S : NeonI_STVList<0, opcode, 0b10, + !cast(List # "2S_operand"), asmop>; + + def _16B : NeonI_STVList<1, opcode, 0b00, + !cast(List # "16B_operand"), asmop>; + + def _8H : NeonI_STVList<1, opcode, 0b01, + !cast(List # "8H_operand"), asmop>; + + def _4S : NeonI_STVList<1, opcode, 0b10, + !cast(List # "4S_operand"), asmop>; + + def _2D : NeonI_STVList<1, opcode, 0b11, + !cast(List # "2D_operand"), asmop>; +} + +// Store multiple N-element structures from N registers (N = 1,2,3,4) +defm ST1 : STVList_BHSD<0b0111, "VOne", "st1">; +def ST1_1D : NeonI_STVList<0, 0b0111, 0b11, VOne1D_operand, "st1">; + +defm ST2 : STVList_BHSD<0b1000, "VPair", "st2">; + +defm ST3 : STVList_BHSD<0b0100, "VTriple", "st3">; + +defm ST4 : STVList_BHSD<0b0000, "VQuad", "st4">; + +// Store multiple 1-element structures from N consecutive registers (N = 2,3,4) +defm ST1x2 : STVList_BHSD<0b1010, "VPair", "st1">; +def ST1x2_1D : NeonI_STVList<0, 0b1010, 0b11, VPair1D_operand, "st1">; + +defm ST1x3 : STVList_BHSD<0b0110, "VTriple", "st1">; +def ST1x3_1D : NeonI_STVList<0, 0b0110, 0b11, VTriple1D_operand, "st1">; + +defm ST1x4 : STVList_BHSD<0b0010, "VQuad", "st1">; +def ST1x4_1D : NeonI_STVList<0, 0b0010, 0b11, VQuad1D_operand, "st1">; + +def : Pat<(v2f64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; +def : Pat<(v2i64 (load GPR64xsp:$addr)), (LD1_2D GPR64xsp:$addr)>; + +def : Pat<(v4f32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; +def : Pat<(v4i32 (load GPR64xsp:$addr)), (LD1_4S GPR64xsp:$addr)>; + +def : Pat<(v8i16 (load GPR64xsp:$addr)), (LD1_8H GPR64xsp:$addr)>; +def : Pat<(v16i8 (load GPR64xsp:$addr)), (LD1_16B GPR64xsp:$addr)>; + +def : Pat<(v1f64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; +def : Pat<(v1i64 (load GPR64xsp:$addr)), (LD1_1D GPR64xsp:$addr)>; + +def : Pat<(v2f32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; +def : Pat<(v2i32 (load GPR64xsp:$addr)), (LD1_2S GPR64xsp:$addr)>; + +def : Pat<(v4i16 (load GPR64xsp:$addr)), (LD1_4H GPR64xsp:$addr)>; +def : Pat<(v8i8 (load GPR64xsp:$addr)), (LD1_8B GPR64xsp:$addr)>; + +def : Pat<(store (v2i64 VPR128:$value), GPR64xsp:$addr), + (ST1_2D GPR64xsp:$addr, VPR128:$value)>; +def : Pat<(store (v2f64 VPR128:$value), GPR64xsp:$addr), + (ST1_2D GPR64xsp:$addr, VPR128:$value)>; + +def : Pat<(store (v4i32 VPR128:$value), GPR64xsp:$addr), + (ST1_4S GPR64xsp:$addr, VPR128:$value)>; +def : Pat<(store (v4f32 VPR128:$value), GPR64xsp:$addr), + (ST1_4S GPR64xsp:$addr, VPR128:$value)>; + +def : Pat<(store (v8i16 VPR128:$value), GPR64xsp:$addr), + (ST1_8H GPR64xsp:$addr, VPR128:$value)>; +def : Pat<(store (v16i8 VPR128:$value), GPR64xsp:$addr), + (ST1_16B GPR64xsp:$addr, VPR128:$value)>; + +def : Pat<(store (v1i64 VPR64:$value), GPR64xsp:$addr), + (ST1_1D GPR64xsp:$addr, VPR64:$value)>; +def : Pat<(store (v1f64 VPR64:$value), GPR64xsp:$addr), + (ST1_1D GPR64xsp:$addr, VPR64:$value)>; + +def : Pat<(store (v2i32 VPR64:$value), GPR64xsp:$addr), + (ST1_2S GPR64xsp:$addr, VPR64:$value)>; +def : Pat<(store (v2f32 VPR64:$value), GPR64xsp:$addr), + (ST1_2S GPR64xsp:$addr, VPR64:$value)>; + +def : Pat<(store (v4i16 VPR64:$value), GPR64xsp:$addr), + (ST1_4H GPR64xsp:$addr, VPR64:$value)>; +def : Pat<(store (v8i8 VPR64:$value), GPR64xsp:$addr), + (ST1_8B GPR64xsp:$addr, VPR64:$value)>; + +// End of vector load/store multiple N-element structure(class SIMD lselem) + +// The followings are post-index vector load/store multiple N-element +// structure(class SIMD lselem-post) +def exact1_asmoperand : AsmOperandClass { + let Name = "Exact1"; + let PredicateMethod = "isExactImm<1>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact1 : Operand, ImmLeaf { + let ParserMatchClass = exact1_asmoperand; +} + +def exact2_asmoperand : AsmOperandClass { + let Name = "Exact2"; + let PredicateMethod = "isExactImm<2>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact2 : Operand, ImmLeaf { + let ParserMatchClass = exact2_asmoperand; +} + +def exact3_asmoperand : AsmOperandClass { + let Name = "Exact3"; + let PredicateMethod = "isExactImm<3>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact3 : Operand, ImmLeaf { + let ParserMatchClass = exact3_asmoperand; +} + +def exact4_asmoperand : AsmOperandClass { + let Name = "Exact4"; + let PredicateMethod = "isExactImm<4>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact4 : Operand, ImmLeaf { + let ParserMatchClass = exact4_asmoperand; +} + +def exact6_asmoperand : AsmOperandClass { + let Name = "Exact6"; + let PredicateMethod = "isExactImm<6>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact6 : Operand, ImmLeaf { + let ParserMatchClass = exact6_asmoperand; +} + +def exact8_asmoperand : AsmOperandClass { + let Name = "Exact8"; + let PredicateMethod = "isExactImm<8>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact8 : Operand, ImmLeaf { + let ParserMatchClass = exact8_asmoperand; +} + +def exact12_asmoperand : AsmOperandClass { + let Name = "Exact12"; + let PredicateMethod = "isExactImm<12>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact12 : Operand, ImmLeaf { + let ParserMatchClass = exact12_asmoperand; +} + +def exact16_asmoperand : AsmOperandClass { + let Name = "Exact16"; + let PredicateMethod = "isExactImm<16>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact16 : Operand, ImmLeaf { + let ParserMatchClass = exact16_asmoperand; +} + +def exact24_asmoperand : AsmOperandClass { + let Name = "Exact24"; + let PredicateMethod = "isExactImm<24>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact24 : Operand, ImmLeaf { + let ParserMatchClass = exact24_asmoperand; +} + +def exact32_asmoperand : AsmOperandClass { + let Name = "Exact32"; + let PredicateMethod = "isExactImm<32>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact32 : Operand, ImmLeaf { + let ParserMatchClass = exact32_asmoperand; +} + +def exact48_asmoperand : AsmOperandClass { + let Name = "Exact48"; + let PredicateMethod = "isExactImm<48>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact48 : Operand, ImmLeaf { + let ParserMatchClass = exact48_asmoperand; +} + +def exact64_asmoperand : AsmOperandClass { + let Name = "Exact64"; + let PredicateMethod = "isExactImm<64>"; + let RenderMethod = "addImmOperands"; +} +def uimm_exact64 : Operand, ImmLeaf { + let ParserMatchClass = exact64_asmoperand; +} + +multiclass NeonI_LDWB_VList opcode, bits<2> size, + RegisterOperand VecList, Operand ImmTy, + string asmop> { + let Constraints = "$Rn = $wb", mayLoad = 1, neverHasSideEffects = 1, + DecoderMethod = "DecodeVLDSTPostInstruction" in { + def _fixed : NeonI_LdStMult_Post { + let Rm = 0b11111; + } + + def _register : NeonI_LdStMult_Post; + } +} + +multiclass LDWB_VList_BHSD opcode, string List, Operand ImmTy, + Operand ImmTy2, string asmop> { + defm _8B : NeonI_LDWB_VList<0, opcode, 0b00, + !cast(List # "8B_operand"), + ImmTy, asmop>; + + defm _4H : NeonI_LDWB_VList<0, opcode, 0b01, + !cast(List # "4H_operand"), + ImmTy, asmop>; + + defm _2S : NeonI_LDWB_VList<0, opcode, 0b10, + !cast(List # "2S_operand"), + ImmTy, asmop>; + + defm _16B : NeonI_LDWB_VList<1, opcode, 0b00, + !cast(List # "16B_operand"), + ImmTy2, asmop>; + + defm _8H : NeonI_LDWB_VList<1, opcode, 0b01, + !cast(List # "8H_operand"), + ImmTy2, asmop>; + + defm _4S : NeonI_LDWB_VList<1, opcode, 0b10, + !cast(List # "4S_operand"), + ImmTy2, asmop>; + + defm _2D : NeonI_LDWB_VList<1, opcode, 0b11, + !cast(List # "2D_operand"), + ImmTy2, asmop>; +} + +// Post-index load multiple N-element structures from N registers (N = 1,2,3,4) +defm LD1WB : LDWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "ld1">; +defm LD1WB_1D : NeonI_LDWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, + "ld1">; + +defm LD2WB : LDWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "ld2">; + +defm LD3WB : LDWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, + "ld3">; + +defm LD4WB : LDWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "ld4">; + +// Post-index load multiple 1-element structures from N consecutive registers +// (N = 2,3,4) +defm LD1x2WB : LDWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, + "ld1">; +defm LD1x2WB_1D : NeonI_LDWB_VList<0, 0b1010, 0b11, VPair1D_operand, + uimm_exact16, "ld1">; + +defm LD1x3WB : LDWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, + "ld1">; +defm LD1x3WB_1D : NeonI_LDWB_VList<0, 0b0110, 0b11, VTriple1D_operand, + uimm_exact24, "ld1">; + +defm LD1x4WB : LDWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, + "ld1">; +defm LD1x4WB_1D : NeonI_LDWB_VList<0, 0b0010, 0b11, VQuad1D_operand, + uimm_exact32, "ld1">; + +multiclass NeonI_STWB_VList opcode, bits<2> size, + RegisterOperand VecList, Operand ImmTy, + string asmop> { + let Constraints = "$Rn = $wb", mayStore = 1, neverHasSideEffects = 1, + DecoderMethod = "DecodeVLDSTPostInstruction" in { + def _fixed : NeonI_LdStMult_Post { + let Rm = 0b11111; + } + + def _register : NeonI_LdStMult_Post; + } +} + +multiclass STWB_VList_BHSD opcode, string List, Operand ImmTy, + Operand ImmTy2, string asmop> { + defm _8B : NeonI_STWB_VList<0, opcode, 0b00, + !cast(List # "8B_operand"), ImmTy, asmop>; + + defm _4H : NeonI_STWB_VList<0, opcode, 0b01, + !cast(List # "4H_operand"), + ImmTy, asmop>; + + defm _2S : NeonI_STWB_VList<0, opcode, 0b10, + !cast(List # "2S_operand"), + ImmTy, asmop>; + + defm _16B : NeonI_STWB_VList<1, opcode, 0b00, + !cast(List # "16B_operand"), + ImmTy2, asmop>; + + defm _8H : NeonI_STWB_VList<1, opcode, 0b01, + !cast(List # "8H_operand"), + ImmTy2, asmop>; + + defm _4S : NeonI_STWB_VList<1, opcode, 0b10, + !cast(List # "4S_operand"), + ImmTy2, asmop>; + + defm _2D : NeonI_STWB_VList<1, opcode, 0b11, + !cast(List # "2D_operand"), + ImmTy2, asmop>; +} + +// Post-index load multiple N-element structures from N registers (N = 1,2,3,4) +defm ST1WB : STWB_VList_BHSD<0b0111, "VOne", uimm_exact8, uimm_exact16, "st1">; +defm ST1WB_1D : NeonI_STWB_VList<0, 0b0111, 0b11, VOne1D_operand, uimm_exact8, + "st1">; + +defm ST2WB : STWB_VList_BHSD<0b1000, "VPair", uimm_exact16, uimm_exact32, "st2">; + +defm ST3WB : STWB_VList_BHSD<0b0100, "VTriple", uimm_exact24, uimm_exact48, + "st3">; + +defm ST4WB : STWB_VList_BHSD<0b0000, "VQuad", uimm_exact32, uimm_exact64, "st4">; + +// Post-index load multiple 1-element structures from N consecutive registers +// (N = 2,3,4) +defm ST1x2WB : STWB_VList_BHSD<0b1010, "VPair", uimm_exact16, uimm_exact32, + "st1">; +defm ST1x2WB_1D : NeonI_STWB_VList<0, 0b1010, 0b11, VPair1D_operand, + uimm_exact16, "st1">; + +defm ST1x3WB : STWB_VList_BHSD<0b0110, "VTriple", uimm_exact24, uimm_exact48, + "st1">; +defm ST1x3WB_1D : NeonI_STWB_VList<0, 0b0110, 0b11, VTriple1D_operand, + uimm_exact24, "st1">; + +defm ST1x4WB : STWB_VList_BHSD<0b0010, "VQuad", uimm_exact32, uimm_exact64, + "st1">; +defm ST1x4WB_1D : NeonI_STWB_VList<0, 0b0010, 0b11, VQuad1D_operand, + uimm_exact32, "st1">; + +// End of post-index vector load/store multiple N-element structure +// (class SIMD lselem-post) + +// The followings are vector load/store single N-element structure +// (class SIMD lsone). +def neon_uimm0_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm0_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm1_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm1_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm2_bare : Operand, + ImmLeaf { + let ParserMatchClass = neon_uimm2_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm3_bare : Operand, + ImmLeaf { + let ParserMatchClass = uimm3_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +def neon_uimm4_bare : Operand, + ImmLeaf { + let ParserMatchClass = uimm4_asmoperand; + let PrintMethod = "printUImmBareOperand"; +} + +class NeonI_LDN_Dup opcode, bits<2> size, + RegisterOperand VecList, string asmop> + : NeonI_LdOne_Dup { + let mayLoad = 1; + let neverHasSideEffects = 1; +} + +multiclass LDN_Dup_BHSD opcode, string List, string asmop> { + def _8B : NeonI_LDN_Dup<0, r, opcode, 0b00, + !cast(List # "8B_operand"), asmop>; + + def _4H : NeonI_LDN_Dup<0, r, opcode, 0b01, + !cast(List # "4H_operand"), asmop>; + + def _2S : NeonI_LDN_Dup<0, r, opcode, 0b10, + !cast(List # "2S_operand"), asmop>; + + def _1D : NeonI_LDN_Dup<0, r, opcode, 0b11, + !cast(List # "1D_operand"), asmop>; + + def _16B : NeonI_LDN_Dup<1, r, opcode, 0b00, + !cast(List # "16B_operand"), asmop>; + + def _8H : NeonI_LDN_Dup<1, r, opcode, 0b01, + !cast(List # "8H_operand"), asmop>; + + def _4S : NeonI_LDN_Dup<1, r, opcode, 0b10, + !cast(List # "4S_operand"), asmop>; + + def _2D : NeonI_LDN_Dup<1, r, opcode, 0b11, + !cast(List # "2D_operand"), asmop>; +} + +// Load single 1-element structure to all lanes of 1 register +defm LD1R : LDN_Dup_BHSD<0b0, 0b110, "VOne", "ld1r">; + +// Load single N-element structure to all lanes of N consecutive +// registers (N = 2,3,4) +defm LD2R : LDN_Dup_BHSD<0b1, 0b110, "VPair", "ld2r">; +defm LD3R : LDN_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r">; +defm LD4R : LDN_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r">; + + +class LD1R_pattern + : Pat<(VTy (Neon_vdup (DTy (LoadOp GPR64xsp:$Rn)))), + (VTy (INST GPR64xsp:$Rn))>; + +// Match all LD1R instructions +def : LD1R_pattern; + +def : LD1R_pattern; + +def : LD1R_pattern; + +def : LD1R_pattern; + +def : LD1R_pattern; +def : LD1R_pattern; + +def : LD1R_pattern; +def : LD1R_pattern; + +def : LD1R_pattern; +def : LD1R_pattern; + +def : LD1R_pattern; +def : LD1R_pattern; + + +multiclass VectorList_Bare_BHSD { + defm B : VectorList_operands; + defm H : VectorList_operands; + defm S : VectorList_operands; + defm D : VectorList_operands; +} + +// Special vector list operand of 128-bit vectors with bare layout. +// i.e. only show ".b", ".h", ".s", ".d" +defm VOne : VectorList_Bare_BHSD<"VOne", 1, FPR128>; +defm VPair : VectorList_Bare_BHSD<"VPair", 2, QPair>; +defm VTriple : VectorList_Bare_BHSD<"VTriple", 3, QTriple>; +defm VQuad : VectorList_Bare_BHSD<"VQuad", 4, QQuad>; + +class NeonI_LDN_Lane op2_1, bit op0, RegisterOperand VList, + Operand ImmOp, string asmop> + : NeonI_LdStOne_Lane<1, r, op2_1, op0, + (outs VList:$Rt), + (ins GPR64xsp:$Rn, VList:$src, ImmOp:$lane), + asmop # "\t$Rt[$lane], [$Rn]", + [], + NoItinerary> { + let mayLoad = 1; + let neverHasSideEffects = 1; + let hasExtraDefRegAllocReq = 1; + let Constraints = "$src = $Rt"; +} + +multiclass LDN_Lane_BHSD { + def _B : NeonI_LDN_Lane(List # "B_operand"), + neon_uimm4_bare, asmop> { + let Inst{12-10} = lane{2-0}; + let Inst{30} = lane{3}; + } + + def _H : NeonI_LDN_Lane(List # "H_operand"), + neon_uimm3_bare, asmop> { + let Inst{12-10} = {lane{1}, lane{0}, 0b0}; + let Inst{30} = lane{2}; + } + + def _S : NeonI_LDN_Lane(List # "S_operand"), + neon_uimm2_bare, asmop> { + let Inst{12-10} = {lane{0}, 0b0, 0b0}; + let Inst{30} = lane{1}; + } + + def _D : NeonI_LDN_Lane(List # "D_operand"), + neon_uimm1_bare, asmop> { + let Inst{12-10} = 0b001; + let Inst{30} = lane{0}; + } +} + +// Load single 1-element structure to one lane of 1 register. +defm LD1LN : LDN_Lane_BHSD<0b0, 0b0, "VOne", "ld1">; + +// Load single N-element structure to one lane of N consecutive registers +// (N = 2,3,4) +defm LD2LN : LDN_Lane_BHSD<0b1, 0b0, "VPair", "ld2">; +defm LD3LN : LDN_Lane_BHSD<0b0, 0b1, "VTriple", "ld3">; +defm LD4LN : LDN_Lane_BHSD<0b1, 0b1, "VQuad", "ld4">; + +multiclass LD1LN_patterns { + def : Pat<(VTy (vector_insert (VTy VPR64:$src), + (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp:$lane))), + (VTy (EXTRACT_SUBREG + (INST GPR64xsp:$Rn, + (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64), + ImmOp:$lane), + sub_64))>; + + def : Pat<(VTy2 (vector_insert (VTy2 VPR128:$src), + (DTy (LoadOp GPR64xsp:$Rn)), (ImmOp2:$lane))), + (VTy2 (INST GPR64xsp:$Rn, VPR128:$src, ImmOp2:$lane))>; +} + +// Match all LD1LN instructions +defm : LD1LN_patterns; + +defm : LD1LN_patterns; + +defm : LD1LN_patterns; +defm : LD1LN_patterns; + +defm : LD1LN_patterns; +defm : LD1LN_patterns; + +class NeonI_STN_Lane op2_1, bit op0, RegisterOperand VList, + Operand ImmOp, string asmop> + : NeonI_LdStOne_Lane<0, r, op2_1, op0, + (outs), (ins GPR64xsp:$Rn, VList:$Rt, ImmOp:$lane), + asmop # "\t$Rt[$lane], [$Rn]", + [], + NoItinerary> { + let mayStore = 1; + let neverHasSideEffects = 1; + let hasExtraDefRegAllocReq = 1; +} + +multiclass STN_Lane_BHSD { + def _B : NeonI_STN_Lane(List # "B_operand"), + neon_uimm4_bare, asmop> { + let Inst{12-10} = lane{2-0}; + let Inst{30} = lane{3}; + } + + def _H : NeonI_STN_Lane(List # "H_operand"), + neon_uimm3_bare, asmop> { + let Inst{12-10} = {lane{1}, lane{0}, 0b0}; + let Inst{30} = lane{2}; + } + + def _S : NeonI_STN_Lane(List # "S_operand"), + neon_uimm2_bare, asmop> { + let Inst{12-10} = {lane{0}, 0b0, 0b0}; + let Inst{30} = lane{1}; + } + + def _D : NeonI_STN_Lane(List # "D_operand"), + neon_uimm1_bare, asmop>{ + let Inst{12-10} = 0b001; + let Inst{30} = lane{0}; + } +} + +// Store single 1-element structure from one lane of 1 register. +defm ST1LN : STN_Lane_BHSD<0b0, 0b0, "VOne", "st1">; + +// Store single N-element structure from one lane of N consecutive registers +// (N = 2,3,4) +defm ST2LN : STN_Lane_BHSD<0b1, 0b0, "VPair", "st2">; +defm ST3LN : STN_Lane_BHSD<0b0, 0b1, "VTriple", "st3">; +defm ST4LN : STN_Lane_BHSD<0b1, 0b1, "VQuad", "st4">; + +multiclass ST1LN_patterns { + def : Pat<(StoreOp (DTy (vector_extract (VTy VPR64:$Rt), ImmOp:$lane)), + GPR64xsp:$Rn), + (INST GPR64xsp:$Rn, + (SUBREG_TO_REG (i64 0), VPR64:$Rt, sub_64), + ImmOp:$lane)>; + + def : Pat<(StoreOp (DTy (vector_extract (VTy2 VPR128:$Rt), ImmOp2:$lane)), + GPR64xsp:$Rn), + (INST GPR64xsp:$Rn, VPR128:$Rt, ImmOp2:$lane)>; +} + +// Match all ST1LN instructions +defm : ST1LN_patterns; + +defm : ST1LN_patterns; + +defm : ST1LN_patterns; +defm : ST1LN_patterns; + +defm : ST1LN_patterns; +defm : ST1LN_patterns; + +// End of vector load/store single N-element structure (class SIMD lsone). + + +// The following are post-index load/store single N-element instructions +// (class SIMD lsone-post) + +multiclass NeonI_LDN_WB_Dup opcode, bits<2> size, + RegisterOperand VecList, Operand ImmTy, + string asmop> { + let mayLoad = 1, neverHasSideEffects = 1, Constraints = "$wb = $Rn", + DecoderMethod = "DecodeVLDSTLanePostInstruction" in { + def _fixed : NeonI_LdOne_Dup_Post { + let Rm = 0b11111; + } + + def _register : NeonI_LdOne_Dup_Post; + } +} + +multiclass LDWB_Dup_BHSD opcode, string List, string asmop, + Operand uimm_b, Operand uimm_h, + Operand uimm_s, Operand uimm_d> { + defm _8B : NeonI_LDN_WB_Dup<0, r, opcode, 0b00, + !cast(List # "8B_operand"), + uimm_b, asmop>; + + defm _4H : NeonI_LDN_WB_Dup<0, r, opcode, 0b01, + !cast(List # "4H_operand"), + uimm_h, asmop>; + + defm _2S : NeonI_LDN_WB_Dup<0, r, opcode, 0b10, + !cast(List # "2S_operand"), + uimm_s, asmop>; + + defm _1D : NeonI_LDN_WB_Dup<0, r, opcode, 0b11, + !cast(List # "1D_operand"), + uimm_d, asmop>; + + defm _16B : NeonI_LDN_WB_Dup<1, r, opcode, 0b00, + !cast(List # "16B_operand"), + uimm_b, asmop>; + + defm _8H : NeonI_LDN_WB_Dup<1, r, opcode, 0b01, + !cast(List # "8H_operand"), + uimm_h, asmop>; + + defm _4S : NeonI_LDN_WB_Dup<1, r, opcode, 0b10, + !cast(List # "4S_operand"), + uimm_s, asmop>; + + defm _2D : NeonI_LDN_WB_Dup<1, r, opcode, 0b11, + !cast(List # "2D_operand"), + uimm_d, asmop>; +} + +// Post-index load single 1-element structure to all lanes of 1 register +defm LD1R_WB : LDWB_Dup_BHSD<0b0, 0b110, "VOne", "ld1r", uimm_exact1, + uimm_exact2, uimm_exact4, uimm_exact8>; + +// Post-index load single N-element structure to all lanes of N consecutive +// registers (N = 2,3,4) +defm LD2R_WB : LDWB_Dup_BHSD<0b1, 0b110, "VPair", "ld2r", uimm_exact2, + uimm_exact4, uimm_exact8, uimm_exact16>; +defm LD3R_WB : LDWB_Dup_BHSD<0b0, 0b111, "VTriple", "ld3r", uimm_exact3, + uimm_exact6, uimm_exact12, uimm_exact24>; +defm LD4R_WB : LDWB_Dup_BHSD<0b1, 0b111, "VQuad", "ld4r", uimm_exact4, + uimm_exact8, uimm_exact16, uimm_exact32>; + +let mayLoad = 1, neverHasSideEffects = 1, hasExtraDefRegAllocReq = 1, + Constraints = "$Rn = $wb, $Rt = $src", + DecoderMethod = "DecodeVLDSTLanePostInstruction" in { + class LDN_WBFx_Lane op2_1, bit op0, RegisterOperand VList, + Operand ImmTy, Operand ImmOp, string asmop> + : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0, + (outs VList:$Rt, GPR64xsp:$wb), + (ins GPR64xsp:$Rn, ImmTy:$amt, + VList:$src, ImmOp:$lane), + asmop # "\t$Rt[$lane], [$Rn], $amt", + [], + NoItinerary> { + let Rm = 0b11111; + } + + class LDN_WBReg_Lane op2_1, bit op0, RegisterOperand VList, + Operand ImmTy, Operand ImmOp, string asmop> + : NeonI_LdStOne_Lane_Post<1, r, op2_1, op0, + (outs VList:$Rt, GPR64xsp:$wb), + (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, + VList:$src, ImmOp:$lane), + asmop # "\t$Rt[$lane], [$Rn], $Rm", + [], + NoItinerary>; +} + +multiclass LD_Lane_WB_BHSD { + def _B_fixed : LDN_WBFx_Lane(List # "B_operand"), + uimm_b, neon_uimm4_bare, asmop> { + let Inst{12-10} = lane{2-0}; + let Inst{30} = lane{3}; + } + + def _B_register : LDN_WBReg_Lane(List # "B_operand"), + uimm_b, neon_uimm4_bare, asmop> { + let Inst{12-10} = lane{2-0}; + let Inst{30} = lane{3}; + } + + def _H_fixed : LDN_WBFx_Lane(List # "H_operand"), + uimm_h, neon_uimm3_bare, asmop> { + let Inst{12-10} = {lane{1}, lane{0}, 0b0}; + let Inst{30} = lane{2}; + } + + def _H_register : LDN_WBReg_Lane(List # "H_operand"), + uimm_h, neon_uimm3_bare, asmop> { + let Inst{12-10} = {lane{1}, lane{0}, 0b0}; + let Inst{30} = lane{2}; + } + + def _S_fixed : LDN_WBFx_Lane(List # "S_operand"), + uimm_s, neon_uimm2_bare, asmop> { + let Inst{12-10} = {lane{0}, 0b0, 0b0}; + let Inst{30} = lane{1}; + } + + def _S_register : LDN_WBReg_Lane(List # "S_operand"), + uimm_s, neon_uimm2_bare, asmop> { + let Inst{12-10} = {lane{0}, 0b0, 0b0}; + let Inst{30} = lane{1}; + } + + def _D_fixed : LDN_WBFx_Lane(List # "D_operand"), + uimm_d, neon_uimm1_bare, asmop> { + let Inst{12-10} = 0b001; + let Inst{30} = lane{0}; + } + + def _D_register : LDN_WBReg_Lane(List # "D_operand"), + uimm_d, neon_uimm1_bare, asmop> { + let Inst{12-10} = 0b001; + let Inst{30} = lane{0}; + } +} + +// Post-index load single 1-element structure to one lane of 1 register. +defm LD1LN_WB : LD_Lane_WB_BHSD<0b0, 0b0, "VOne", "ld1", uimm_exact1, + uimm_exact2, uimm_exact4, uimm_exact8>; + +// Post-index load single N-element structure to one lane of N consecutive +// registers +// (N = 2,3,4) +defm LD2LN_WB : LD_Lane_WB_BHSD<0b1, 0b0, "VPair", "ld2", uimm_exact2, + uimm_exact4, uimm_exact8, uimm_exact16>; +defm LD3LN_WB : LD_Lane_WB_BHSD<0b0, 0b1, "VTriple", "ld3", uimm_exact3, + uimm_exact6, uimm_exact12, uimm_exact24>; +defm LD4LN_WB : LD_Lane_WB_BHSD<0b1, 0b1, "VQuad", "ld4", uimm_exact4, + uimm_exact8, uimm_exact16, uimm_exact32>; + +let mayStore = 1, neverHasSideEffects = 1, + hasExtraDefRegAllocReq = 1, Constraints = "$Rn = $wb", + DecoderMethod = "DecodeVLDSTLanePostInstruction" in { + class STN_WBFx_Lane op2_1, bit op0, RegisterOperand VList, + Operand ImmTy, Operand ImmOp, string asmop> + : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0, + (outs GPR64xsp:$wb), + (ins GPR64xsp:$Rn, ImmTy:$amt, + VList:$Rt, ImmOp:$lane), + asmop # "\t$Rt[$lane], [$Rn], $amt", + [], + NoItinerary> { + let Rm = 0b11111; + } + + class STN_WBReg_Lane op2_1, bit op0, RegisterOperand VList, + Operand ImmTy, Operand ImmOp, string asmop> + : NeonI_LdStOne_Lane_Post<0, r, op2_1, op0, + (outs GPR64xsp:$wb), + (ins GPR64xsp:$Rn, GPR64noxzr:$Rm, VList:$Rt, + ImmOp:$lane), + asmop # "\t$Rt[$lane], [$Rn], $Rm", + [], + NoItinerary>; +} + +multiclass ST_Lane_WB_BHSD { + def _B_fixed : STN_WBFx_Lane(List # "B_operand"), + uimm_b, neon_uimm4_bare, asmop> { + let Inst{12-10} = lane{2-0}; + let Inst{30} = lane{3}; + } + + def _B_register : STN_WBReg_Lane(List # "B_operand"), + uimm_b, neon_uimm4_bare, asmop> { + let Inst{12-10} = lane{2-0}; + let Inst{30} = lane{3}; + } + + def _H_fixed : STN_WBFx_Lane(List # "H_operand"), + uimm_h, neon_uimm3_bare, asmop> { + let Inst{12-10} = {lane{1}, lane{0}, 0b0}; + let Inst{30} = lane{2}; + } + + def _H_register : STN_WBReg_Lane(List # "H_operand"), + uimm_h, neon_uimm3_bare, asmop> { + let Inst{12-10} = {lane{1}, lane{0}, 0b0}; + let Inst{30} = lane{2}; + } + + def _S_fixed : STN_WBFx_Lane(List # "S_operand"), + uimm_s, neon_uimm2_bare, asmop> { + let Inst{12-10} = {lane{0}, 0b0, 0b0}; + let Inst{30} = lane{1}; + } + + def _S_register : STN_WBReg_Lane(List # "S_operand"), + uimm_s, neon_uimm2_bare, asmop> { + let Inst{12-10} = {lane{0}, 0b0, 0b0}; + let Inst{30} = lane{1}; + } + + def _D_fixed : STN_WBFx_Lane(List # "D_operand"), + uimm_d, neon_uimm1_bare, asmop> { + let Inst{12-10} = 0b001; + let Inst{30} = lane{0}; + } + + def _D_register : STN_WBReg_Lane(List # "D_operand"), + uimm_d, neon_uimm1_bare, asmop> { + let Inst{12-10} = 0b001; + let Inst{30} = lane{0}; + } +} + +// Post-index store single 1-element structure from one lane of 1 register. +defm ST1LN_WB : ST_Lane_WB_BHSD<0b0, 0b0, "VOne", "st1", uimm_exact1, + uimm_exact2, uimm_exact4, uimm_exact8>; + +// Post-index store single N-element structure from one lane of N consecutive +// registers (N = 2,3,4) +defm ST2LN_WB : ST_Lane_WB_BHSD<0b1, 0b0, "VPair", "st2", uimm_exact2, + uimm_exact4, uimm_exact8, uimm_exact16>; +defm ST3LN_WB : ST_Lane_WB_BHSD<0b0, 0b1, "VTriple", "st3", uimm_exact3, + uimm_exact6, uimm_exact12, uimm_exact24>; +defm ST4LN_WB : ST_Lane_WB_BHSD<0b1, 0b1, "VQuad", "st4", uimm_exact4, + uimm_exact8, uimm_exact16, uimm_exact32>; + +// End of post-index load/store single N-element instructions +// (class SIMD lsone-post) + +// Neon Scalar instructions implementation +// Scalar Three Same + +class NeonI_Scalar3Same_size size, bits<5> opcode, string asmop, + RegisterClass FPRC> + : NeonI_Scalar3Same; + +class NeonI_Scalar3Same_D_size opcode, string asmop> + : NeonI_Scalar3Same_size; + +multiclass NeonI_Scalar3Same_HS_sizes opcode, string asmop, + bit Commutable = 0> { + let isCommutable = Commutable in { + def hhh : NeonI_Scalar3Same_size; + def sss : NeonI_Scalar3Same_size; + } +} + +multiclass NeonI_Scalar3Same_SD_sizes opcode, + string asmop, bit Commutable = 0> { + let isCommutable = Commutable in { + def sss : NeonI_Scalar3Same_size; + def ddd : NeonI_Scalar3Same_size; + } +} + +multiclass NeonI_Scalar3Same_BHSD_sizes opcode, + string asmop, bit Commutable = 0> { + let isCommutable = Commutable in { + def bbb : NeonI_Scalar3Same_size; + def hhh : NeonI_Scalar3Same_size; + def sss : NeonI_Scalar3Same_size; + def ddd : NeonI_Scalar3Same_size; + } +} + +multiclass Neon_Scalar3Same_D_size_patterns { + def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (INSTD FPR64:$Rn, FPR64:$Rm)>; +} + +multiclass Neon_Scalar3Same_BHSD_size_patterns + : Neon_Scalar3Same_D_size_patterns { + def: Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (v1i8 FPR8:$Rm))), + (INSTB FPR8:$Rn, FPR8:$Rm)>; + + def: Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), + (INSTH FPR16:$Rn, FPR16:$Rm)>; + + def: Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; +} + +class Neon_Scalar3Same_cmp_D_size_patterns + : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm))), + (INSTD FPR64:$Rn, FPR64:$Rm)>; + +multiclass Neon_Scalar3Same_HS_size_patterns { + def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), + (INSTH FPR16:$Rn, FPR16:$Rm)>; + def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; +} + +multiclass Neon_Scalar3Same_SD_size_patterns { + def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; + def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (INSTD FPR64:$Rn, FPR64:$Rm)>; +} + +multiclass Neon_Scalar3Same_cmp_SD_size_patterns { + def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), (v1f32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; + def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (INSTD FPR64:$Rn, FPR64:$Rm)>; +} + +class Neon_Scalar3Same_cmp_V1_D_size_patterns + : Pat<(v1i64 (Neon_cmp (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), CC)), + (INSTD FPR64:$Rn, FPR64:$Rm)>; + +// Scalar Three Different + +class NeonI_Scalar3Diff_size size, bits<4> opcode, string asmop, + RegisterClass FPRCD, RegisterClass FPRCS> + : NeonI_Scalar3Diff; + +multiclass NeonI_Scalar3Diff_HS_size opcode, string asmop> { + def shh : NeonI_Scalar3Diff_size; + def dss : NeonI_Scalar3Diff_size; +} + +multiclass NeonI_Scalar3Diff_ml_HS_size opcode, string asmop> { + let Constraints = "$Src = $Rd" in { + def shh : NeonI_Scalar3Diff; + def dss : NeonI_Scalar3Diff; + } +} + +multiclass Neon_Scalar3Diff_HS_size_patterns { + def : Pat<(v1i32 (opnode (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), + (INSTH FPR16:$Rn, FPR16:$Rm)>; + def : Pat<(v1i64 (opnode (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; +} + +multiclass Neon_Scalar3Diff_ml_HS_size_patterns { + def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i16 FPR16:$Rn), (v1i16 FPR16:$Rm))), + (INSTH FPR32:$Src, FPR16:$Rn, FPR16:$Rm)>; + def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i32 FPR32:$Rn), (v1i32 FPR32:$Rm))), + (INSTS FPR64:$Src, FPR32:$Rn, FPR32:$Rm)>; +} + +// Scalar Two Registers Miscellaneous + +class NeonI_Scalar2SameMisc_size size, bits<5> opcode, string asmop, + RegisterClass FPRCD, RegisterClass FPRCS> + : NeonI_Scalar2SameMisc; + +multiclass NeonI_Scalar2SameMisc_SD_size opcode, + string asmop> { + def ss : NeonI_Scalar2SameMisc_size; + def dd : NeonI_Scalar2SameMisc_size; +} + +multiclass NeonI_Scalar2SameMisc_D_size opcode, string asmop> { + def dd : NeonI_Scalar2SameMisc_size; +} + +multiclass NeonI_Scalar2SameMisc_BHSD_size opcode, string asmop> + : NeonI_Scalar2SameMisc_D_size { + def bb : NeonI_Scalar2SameMisc_size; + def hh : NeonI_Scalar2SameMisc_size; + def ss : NeonI_Scalar2SameMisc_size; +} + +class NeonI_Scalar2SameMisc_fcvtxn_D_size opcode, string asmop> + : NeonI_Scalar2SameMisc_size; + +multiclass NeonI_Scalar2SameMisc_narrow_HSD_size opcode, + string asmop> { + def bh : NeonI_Scalar2SameMisc_size; + def hs : NeonI_Scalar2SameMisc_size; + def sd : NeonI_Scalar2SameMisc_size; +} + +class NeonI_Scalar2SameMisc_accum_size size, bits<5> opcode, + string asmop, RegisterClass FPRC> + : NeonI_Scalar2SameMisc; + +multiclass NeonI_Scalar2SameMisc_accum_BHSD_size opcode, + string asmop> { + + let Constraints = "$Src = $Rd" in { + def bb : NeonI_Scalar2SameMisc_accum_size; + def hh : NeonI_Scalar2SameMisc_accum_size; + def ss : NeonI_Scalar2SameMisc_accum_size; + def dd : NeonI_Scalar2SameMisc_accum_size; + } +} + +class Neon_Scalar2SameMisc_fcvtxn_D_size_patterns + : Pat<(v1f32 (opnode (v1f64 FPR64:$Rn))), + (INSTD FPR64:$Rn)>; + +multiclass Neon_Scalar2SameMisc_fcvt_SD_size_patterns { + def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn))), + (INSTS FPR32:$Rn)>; + def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), + (INSTD FPR64:$Rn)>; +} + +multiclass Neon_Scalar2SameMisc_cvt_SD_size_patterns { + def : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn))), + (INSTS FPR32:$Rn)>; + def : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn))), + (INSTD FPR64:$Rn)>; +} + +multiclass Neon_Scalar2SameMisc_SD_size_patterns { + def : Pat<(v1f32 (opnode (v1f32 FPR32:$Rn))), + (INSTS FPR32:$Rn)>; + def : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), + (INSTD FPR64:$Rn)>; +} + +class NeonI_Scalar2SameMisc_cmpz_D_size opcode, string asmop> + : NeonI_Scalar2SameMisc; + +multiclass NeonI_Scalar2SameMisc_cmpz_SD_size opcode, + string asmop> { + def ssi : NeonI_Scalar2SameMisc; + def ddi : NeonI_Scalar2SameMisc; +} + +class Neon_Scalar2SameMisc_cmpz_D_size_patterns + : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), + (v1i64 (bitconvert (v8i8 Neon_AllZero))))), + (INSTD FPR64:$Rn, 0)>; + +class Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns + : Pat<(v1i64 (Neon_cmpz (v1i64 FPR64:$Rn), + (i32 neon_uimm0:$Imm), CC)), + (INSTD FPR64:$Rn, neon_uimm0:$Imm)>; + +multiclass Neon_Scalar2SameMisc_cmpz_SD_size_patterns { + def : Pat<(v1i32 (opnode (v1f32 FPR32:$Rn), + (v1f32 (scalar_to_vector (f32 fpz32:$FPImm))))), + (INSTS FPR32:$Rn, fpz32:$FPImm)>; + def : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), + (v1f32 (scalar_to_vector (f32 fpz32:$FPImm))))), + (INSTD FPR64:$Rn, fpz32:$FPImm)>; +} + +multiclass Neon_Scalar2SameMisc_D_size_patterns { + def : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn))), + (INSTD FPR64:$Rn)>; +} + +multiclass Neon_Scalar2SameMisc_BHSD_size_patterns + : Neon_Scalar2SameMisc_D_size_patterns { + def : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn))), + (INSTB FPR8:$Rn)>; + def : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn))), + (INSTH FPR16:$Rn)>; + def : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn))), + (INSTS FPR32:$Rn)>; +} + +multiclass Neon_Scalar2SameMisc_narrow_HSD_size_patterns< + SDPatternOperator opnode, + Instruction INSTH, + Instruction INSTS, + Instruction INSTD> { + def : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn))), + (INSTH FPR16:$Rn)>; + def : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn))), + (INSTS FPR32:$Rn)>; + def : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn))), + (INSTD FPR64:$Rn)>; + +} + +multiclass Neon_Scalar2SameMisc_accum_BHSD_size_patterns< + SDPatternOperator opnode, + Instruction INSTB, + Instruction INSTH, + Instruction INSTS, + Instruction INSTD> { + def : Pat<(v1i8 (opnode (v1i8 FPR8:$Src), (v1i8 FPR8:$Rn))), + (INSTB FPR8:$Src, FPR8:$Rn)>; + def : Pat<(v1i16 (opnode (v1i16 FPR16:$Src), (v1i16 FPR16:$Rn))), + (INSTH FPR16:$Src, FPR16:$Rn)>; + def : Pat<(v1i32 (opnode (v1i32 FPR32:$Src), (v1i32 FPR32:$Rn))), + (INSTS FPR32:$Src, FPR32:$Rn)>; + def : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn))), + (INSTD FPR64:$Src, FPR64:$Rn)>; +} + +// Scalar Shift By Immediate + +class NeonI_ScalarShiftImm_size opcode, string asmop, + RegisterClass FPRC, Operand ImmTy> + : NeonI_ScalarShiftImm; + +multiclass NeonI_ScalarShiftRightImm_D_size opcode, + string asmop> { + def ddi : NeonI_ScalarShiftImm_size { + bits<6> Imm; + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + let Inst{21-16} = Imm; + } +} + +multiclass NeonI_ScalarShiftRightImm_BHSD_size opcode, + string asmop> + : NeonI_ScalarShiftRightImm_D_size { + def bbi : NeonI_ScalarShiftImm_size { + bits<3> Imm; + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + let Inst{18-16} = Imm; + } + def hhi : NeonI_ScalarShiftImm_size { + bits<4> Imm; + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + let Inst{19-16} = Imm; + } + def ssi : NeonI_ScalarShiftImm_size { + bits<5> Imm; + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + let Inst{20-16} = Imm; + } +} + +multiclass NeonI_ScalarShiftLeftImm_D_size opcode, + string asmop> { + def ddi : NeonI_ScalarShiftImm_size { + bits<6> Imm; + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + let Inst{21-16} = Imm; + } +} + +multiclass NeonI_ScalarShiftLeftImm_BHSD_size opcode, + string asmop> + : NeonI_ScalarShiftLeftImm_D_size { + def bbi : NeonI_ScalarShiftImm_size { + bits<3> Imm; + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + let Inst{18-16} = Imm; + } + def hhi : NeonI_ScalarShiftImm_size { + bits<4> Imm; + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + let Inst{19-16} = Imm; + } + def ssi : NeonI_ScalarShiftImm_size { + bits<5> Imm; + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + let Inst{20-16} = Imm; + } +} + +class NeonI_ScalarShiftRightImm_accum_D_size opcode, string asmop> + : NeonI_ScalarShiftImm { + bits<6> Imm; + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + let Inst{21-16} = Imm; + let Constraints = "$Src = $Rd"; +} + +class NeonI_ScalarShiftLeftImm_accum_D_size opcode, string asmop> + : NeonI_ScalarShiftImm { + bits<6> Imm; + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + let Inst{21-16} = Imm; + let Constraints = "$Src = $Rd"; +} + +class NeonI_ScalarShiftImm_narrow_size opcode, string asmop, + RegisterClass FPRCD, RegisterClass FPRCS, + Operand ImmTy> + : NeonI_ScalarShiftImm; + +multiclass NeonI_ScalarShiftImm_narrow_HSD_size opcode, + string asmop> { + def bhi : NeonI_ScalarShiftImm_narrow_size { + bits<3> Imm; + let Inst{22-19} = 0b0001; // immh:immb = 0001xxx + let Inst{18-16} = Imm; + } + def hsi : NeonI_ScalarShiftImm_narrow_size { + bits<4> Imm; + let Inst{22-20} = 0b001; // immh:immb = 001xxxx + let Inst{19-16} = Imm; + } + def sdi : NeonI_ScalarShiftImm_narrow_size { + bits<5> Imm; + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + let Inst{20-16} = Imm; + } +} + +multiclass NeonI_ScalarShiftImm_cvt_SD_size opcode, string asmop> { + def ssi : NeonI_ScalarShiftImm_size { + bits<5> Imm; + let Inst{22-21} = 0b01; // immh:immb = 01xxxxx + let Inst{20-16} = Imm; + } + def ddi : NeonI_ScalarShiftImm_size { + bits<6> Imm; + let Inst{22} = 0b1; // immh:immb = 1xxxxxx + let Inst{21-16} = Imm; + } +} + +multiclass Neon_ScalarShiftRImm_D_size_patterns { + def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + (INSTD FPR64:$Rn, imm:$Imm)>; +} + +multiclass Neon_ScalarShiftLImm_D_size_patterns { + def ddi : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (i32 shl_imm64:$Imm))), + (INSTD FPR64:$Rn, imm:$Imm)>; +} + +class Neon_ScalarShiftImm_arm_D_size_patterns + : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), + (v1i64 (Neon_vdup (i32 shr_imm64:$Imm))))), + (INSTD FPR64:$Rn, imm:$Imm)>; + +multiclass Neon_ScalarShiftLImm_BHSD_size_patterns + : Neon_ScalarShiftLImm_D_size_patterns { + def bbi : Pat<(v1i8 (opnode (v1i8 FPR8:$Rn), (i32 shl_imm8:$Imm))), + (INSTB FPR8:$Rn, imm:$Imm)>; + def hhi : Pat<(v1i16 (opnode (v1i16 FPR16:$Rn), (i32 shl_imm16:$Imm))), + (INSTH FPR16:$Rn, imm:$Imm)>; + def ssi : Pat<(v1i32 (opnode (v1i32 FPR32:$Rn), (i32 shl_imm32:$Imm))), + (INSTS FPR32:$Rn, imm:$Imm)>; +} + +class Neon_ScalarShiftLImm_accum_D_size_patterns + : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), + (i32 shl_imm64:$Imm))), + (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; + +class Neon_ScalarShiftRImm_accum_D_size_patterns + : Pat<(v1i64 (opnode (v1i64 FPR64:$Src), (v1i64 FPR64:$Rn), + (i32 shr_imm64:$Imm))), + (INSTD FPR64:$Src, FPR64:$Rn, imm:$Imm)>; + +multiclass Neon_ScalarShiftImm_narrow_HSD_size_patterns< + SDPatternOperator opnode, + Instruction INSTH, + Instruction INSTS, + Instruction INSTD> { + def bhi : Pat<(v1i8 (opnode (v1i16 FPR16:$Rn), (i32 shr_imm16:$Imm))), + (INSTH FPR16:$Rn, imm:$Imm)>; + def hsi : Pat<(v1i16 (opnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), + (INSTS FPR32:$Rn, imm:$Imm)>; + def sdi : Pat<(v1i32 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + (INSTD FPR64:$Rn, imm:$Imm)>; +} + +multiclass Neon_ScalarShiftImm_scvtf_SD_size_patterns { + def ssi : Pat<(f32 (Sopnode (v1i32 FPR32:$Rn), (i32 shr_imm32:$Imm))), + (INSTS FPR32:$Rn, imm:$Imm)>; + def ddi : Pat<(f64 (Dopnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + (INSTD FPR64:$Rn, imm:$Imm)>; +} + +multiclass Neon_ScalarShiftImm_fcvts_SD_size_patterns { + def ssi : Pat<(v1i32 (Sopnode (v1f32 FPR32:$Rn), (i32 shr_imm32:$Imm))), + (INSTS FPR32:$Rn, imm:$Imm)>; + def ddi : Pat<(v1i64 (Dopnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + (INSTD FPR64:$Rn, imm:$Imm)>; +} + +// Scalar Signed Shift Right (Immediate) +defm SSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00000, "sshr">; +defm : Neon_ScalarShiftRImm_D_size_patterns; +// Pattern to match llvm.arm.* intrinsic. +def : Neon_ScalarShiftImm_arm_D_size_patterns; + +// Scalar Unsigned Shift Right (Immediate) +defm USHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00000, "ushr">; +defm : Neon_ScalarShiftRImm_D_size_patterns; +// Pattern to match llvm.arm.* intrinsic. +def : Neon_ScalarShiftImm_arm_D_size_patterns; + +// Scalar Signed Rounding Shift Right (Immediate) +defm SRSHR : NeonI_ScalarShiftRightImm_D_size<0b0, 0b00100, "srshr">; +defm : Neon_ScalarShiftRImm_D_size_patterns; + +// Scalar Unigned Rounding Shift Right (Immediate) +defm URSHR : NeonI_ScalarShiftRightImm_D_size<0b1, 0b00100, "urshr">; +defm : Neon_ScalarShiftRImm_D_size_patterns; + +// Scalar Signed Shift Right and Accumulate (Immediate) +def SSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00010, "ssra">; +def : Neon_ScalarShiftRImm_accum_D_size_patterns + ; + +// Scalar Unsigned Shift Right and Accumulate (Immediate) +def USRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00010, "usra">; +def : Neon_ScalarShiftRImm_accum_D_size_patterns + ; + +// Scalar Signed Rounding Shift Right and Accumulate (Immediate) +def SRSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b0, 0b00110, "srsra">; +def : Neon_ScalarShiftRImm_accum_D_size_patterns + ; + +// Scalar Unsigned Rounding Shift Right and Accumulate (Immediate) +def URSRA : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b00110, "ursra">; +def : Neon_ScalarShiftRImm_accum_D_size_patterns + ; + +// Scalar Shift Left (Immediate) +defm SHL : NeonI_ScalarShiftLeftImm_D_size<0b0, 0b01010, "shl">; +defm : Neon_ScalarShiftLImm_D_size_patterns; +// Pattern to match llvm.arm.* intrinsic. +def : Neon_ScalarShiftImm_arm_D_size_patterns; + +// Signed Saturating Shift Left (Immediate) +defm SQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b0, 0b01110, "sqshl">; +defm : Neon_ScalarShiftLImm_BHSD_size_patterns; +// Pattern to match llvm.arm.* intrinsic. +defm : Neon_ScalarShiftLImm_D_size_patterns; + +// Unsigned Saturating Shift Left (Immediate) +defm UQSHL : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01110, "uqshl">; +defm : Neon_ScalarShiftLImm_BHSD_size_patterns; +// Pattern to match llvm.arm.* intrinsic. +defm : Neon_ScalarShiftLImm_D_size_patterns; + +// Signed Saturating Shift Left Unsigned (Immediate) +defm SQSHLU : NeonI_ScalarShiftLeftImm_BHSD_size<0b1, 0b01100, "sqshlu">; +defm : Neon_ScalarShiftLImm_BHSD_size_patterns; + +// Shift Right And Insert (Immediate) +def SRI : NeonI_ScalarShiftRightImm_accum_D_size<0b1, 0b01000, "sri">; +def : Neon_ScalarShiftRImm_accum_D_size_patterns + ; + +// Shift Left And Insert (Immediate) +def SLI : NeonI_ScalarShiftLeftImm_accum_D_size<0b1, 0b01010, "sli">; +def : Neon_ScalarShiftLImm_accum_D_size_patterns + ; + +// Signed Saturating Shift Right Narrow (Immediate) +defm SQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10010, "sqshrn">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + +// Unsigned Saturating Shift Right Narrow (Immediate) +defm UQSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10010, "uqshrn">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + +// Signed Saturating Rounded Shift Right Narrow (Immediate) +defm SQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b0, 0b10011, "sqrshrn">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + +// Unsigned Saturating Rounded Shift Right Narrow (Immediate) +defm UQRSHRN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10011, "uqrshrn">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + +// Signed Saturating Shift Right Unsigned Narrow (Immediate) +defm SQSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10000, "sqshrun">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + +// Signed Saturating Rounded Shift Right Unsigned Narrow (Immediate) +defm SQRSHRUN : NeonI_ScalarShiftImm_narrow_HSD_size<0b1, 0b10001, "sqrshrun">; +defm : Neon_ScalarShiftImm_narrow_HSD_size_patterns; + +// Scalar Signed Fixed-point Convert To Floating-Point (Immediate) +defm SCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11100, "scvtf">; +defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns; + +// Scalar Unsigned Fixed-point Convert To Floating-Point (Immediate) +defm UCVTF_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11100, "ucvtf">; +defm : Neon_ScalarShiftImm_scvtf_SD_size_patterns; + +// Scalar Floating-point Convert To Signed Fixed-point (Immediate) +defm FCVTZS_N : NeonI_ScalarShiftImm_cvt_SD_size<0b0, 0b11111, "fcvtzs">; +defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns; + +// Scalar Floating-point Convert To Unsigned Fixed-point (Immediate) +defm FCVTZU_N : NeonI_ScalarShiftImm_cvt_SD_size<0b1, 0b11111, "fcvtzu">; +defm : Neon_ScalarShiftImm_fcvts_SD_size_patterns; + +// Patterns For Convert Instructions Between v1f64 and v1i64 +class Neon_ScalarShiftImm_cvtf_v1f64_pattern + : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + (INST FPR64:$Rn, imm:$Imm)>; + +class Neon_ScalarShiftImm_fcvt_v1f64_pattern + : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn), (i32 shr_imm64:$Imm))), + (INST FPR64:$Rn, imm:$Imm)>; + +def : Neon_ScalarShiftImm_cvtf_v1f64_pattern; + +def : Neon_ScalarShiftImm_cvtf_v1f64_pattern; + +def : Neon_ScalarShiftImm_fcvt_v1f64_pattern; + +def : Neon_ScalarShiftImm_fcvt_v1f64_pattern; + +// Scalar Integer Add +let isCommutable = 1 in { +def ADDddd : NeonI_Scalar3Same_D_size<0b0, 0b10000, "add">; +} + +// Scalar Integer Sub +def SUBddd : NeonI_Scalar3Same_D_size<0b1, 0b10000, "sub">; + +// Pattern for Scalar Integer Add and Sub with D register only +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; + +// Patterns to match llvm.aarch64.* intrinsic for Scalar Add, Sub +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; + +// Scalar Integer Saturating Add (Signed, Unsigned) +defm SQADD : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00001, "sqadd", 1>; +defm UQADD : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00001, "uqadd", 1>; + +// Scalar Integer Saturating Sub (Signed, Unsigned) +defm SQSUB : NeonI_Scalar3Same_BHSD_sizes<0b0, 0b00101, "sqsub", 0>; +defm UQSUB : NeonI_Scalar3Same_BHSD_sizes<0b1, 0b00101, "uqsub", 0>; + + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Add, Sub (Signed, Unsigned) +defm : Neon_Scalar3Same_BHSD_size_patterns; +defm : Neon_Scalar3Same_BHSD_size_patterns; +defm : Neon_Scalar3Same_BHSD_size_patterns; +defm : Neon_Scalar3Same_BHSD_size_patterns; + +// Scalar Integer Saturating Doubling Multiply Half High +defm SQDMULH : NeonI_Scalar3Same_HS_sizes<0b0, 0b10110, "sqdmulh", 1>; + +// Scalar Integer Saturating Rounding Doubling Multiply Half High +defm SQRDMULH : NeonI_Scalar3Same_HS_sizes<0b1, 0b10110, "sqrdmulh", 1>; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Doubling Multiply Half High and +// Scalar Integer Saturating Rounding Doubling Multiply Half High +defm : Neon_Scalar3Same_HS_size_patterns; +defm : Neon_Scalar3Same_HS_size_patterns; + +// Scalar Floating-point Multiply Extended +defm FMULX : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11011, "fmulx", 1>; + +// Scalar Floating-point Reciprocal Step +defm FRECPS : NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11111, "frecps", 0>; + +// Scalar Floating-point Reciprocal Square Root Step +defm FRSQRTS : NeonI_Scalar3Same_SD_sizes<0b0, 0b1, 0b11111, "frsqrts", 0>; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Floating-point Reciprocal Step and +// Scalar Floating-point Reciprocal Square Root Step +defm : Neon_Scalar3Same_SD_size_patterns; +defm : Neon_Scalar3Same_SD_size_patterns; + +def : Pat<(v1f64 (fsqrt (v1f64 FPR64:$Rn))), (FSQRTdd FPR64:$Rn)>; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Floating-point Multiply Extended, +multiclass Neon_Scalar3Same_MULX_SD_size_patterns { + def : Pat<(f32 (opnode (f32 FPR32:$Rn), (f32 FPR32:$Rm))), + (INSTS FPR32:$Rn, FPR32:$Rm)>; + def : Pat<(f64 (opnode (f64 FPR64:$Rn), (f64 FPR64:$Rm))), + (INSTD FPR64:$Rn, FPR64:$Rm)>; +} + +defm : Neon_Scalar3Same_MULX_SD_size_patterns; + +// Scalar Integer Shift Left (Signed, Unsigned) +def SSHLddd : NeonI_Scalar3Same_D_size<0b0, 0b01000, "sshl">; +def USHLddd : NeonI_Scalar3Same_D_size<0b1, 0b01000, "ushl">; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Shift Left (Signed, Unsigned) +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Shift Left (Signed, Unsigned) +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; + +// Scalar Integer Saturating Shift Left (Signed, Unsigned) +defm SQSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01001, "sqshl", 0>; +defm UQSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01001, "uqshl", 0>; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Shift Letf (Signed, Unsigned) +defm : Neon_Scalar3Same_BHSD_size_patterns; +defm : Neon_Scalar3Same_BHSD_size_patterns; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Shift Letf (Signed, Unsigned) +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; + +// Scalar Integer Rounding Shift Left (Signed, Unsigned) +def SRSHLddd: NeonI_Scalar3Same_D_size<0b0, 0b01010, "srshl">; +def URSHLddd: NeonI_Scalar3Same_D_size<0b1, 0b01010, "urshl">; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; + +// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) +defm SQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b0, 0b01011, "sqrshl", 0>; +defm UQRSHL: NeonI_Scalar3Same_BHSD_sizes<0b1, 0b01011, "uqrshl", 0>; + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar3Same_BHSD_size_patterns; +defm : Neon_Scalar3Same_BHSD_size_patterns; + +// Patterns to match llvm.arm.* intrinsic for +// Scalar Integer Saturating Rounding Shift Left (Signed, Unsigned) +defm : Neon_Scalar3Same_D_size_patterns; +defm : Neon_Scalar3Same_D_size_patterns; + +// Signed Saturating Doubling Multiply-Add Long +defm SQDMLAL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1001, "sqdmlal">; +defm : Neon_Scalar3Diff_ml_HS_size_patterns; + +// Signed Saturating Doubling Multiply-Subtract Long +defm SQDMLSL : NeonI_Scalar3Diff_ml_HS_size<0b0, 0b1011, "sqdmlsl">; +defm : Neon_Scalar3Diff_ml_HS_size_patterns; + +// Signed Saturating Doubling Multiply Long +defm SQDMULL : NeonI_Scalar3Diff_HS_size<0b0, 0b1101, "sqdmull">; +defm : Neon_Scalar3Diff_HS_size_patterns; + +// Scalar Signed Integer Convert To Floating-point +defm SCVTF : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11101, "scvtf">; +defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns; + +// Scalar Unsigned Integer Convert To Floating-point +defm UCVTF : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11101, "ucvtf">; +defm : Neon_Scalar2SameMisc_cvt_SD_size_patterns; + +// Scalar Floating-point Converts +def FCVTXN : NeonI_Scalar2SameMisc_fcvtxn_D_size<0b1, 0b10110, "fcvtxn">; +def : Neon_Scalar2SameMisc_fcvtxn_D_size_patterns; + +defm FCVTNS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11010, "fcvtns">; +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; + +defm FCVTNU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11010, "fcvtnu">; +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; + +defm FCVTMS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11011, "fcvtms">; +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; + +defm FCVTMU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11011, "fcvtmu">; +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; + +defm FCVTAS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b0, 0b11100, "fcvtas">; +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; + +defm FCVTAU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b0, 0b11100, "fcvtau">; +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; + +defm FCVTPS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11010, "fcvtps">; +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; + +defm FCVTPU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11010, "fcvtpu">; +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; + +defm FCVTZS : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11011, "fcvtzs">; +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; + +defm FCVTZU : NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11011, "fcvtzu">; +defm : Neon_Scalar2SameMisc_fcvt_SD_size_patterns; + +// Patterns For Convert Instructions Between v1f64 and v1i64 +class Neon_Scalar2SameMisc_cvtf_v1f64_pattern + : Pat<(v1f64 (opnode (v1i64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +class Neon_Scalar2SameMisc_fcvt_v1f64_pattern + : Pat<(v1i64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern; +def : Neon_Scalar2SameMisc_cvtf_v1f64_pattern; + +def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern; +def : Neon_Scalar2SameMisc_fcvt_v1f64_pattern; + +// Scalar Floating-point Reciprocal Estimate +defm FRECPE : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11101, "frecpe">; +defm : Neon_Scalar2SameMisc_SD_size_patterns; + +// Scalar Floating-point Reciprocal Exponent +defm FRECPX : NeonI_Scalar2SameMisc_SD_size<0b0, 0b1, 0b11111, "frecpx">; +defm : Neon_Scalar2SameMisc_SD_size_patterns; + +// Scalar Floating-point Reciprocal Square Root Estimate +defm FRSQRTE: NeonI_Scalar2SameMisc_SD_size<0b1, 0b1, 0b11101, "frsqrte">; +defm : Neon_Scalar2SameMisc_SD_size_patterns; + +// Scalar Floating-point Round +class Neon_ScalarFloatRound_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; +def : Neon_ScalarFloatRound_pattern; + +// Scalar Integer Compare + +// Scalar Compare Bitwise Equal +def CMEQddd: NeonI_Scalar3Same_D_size<0b1, 0b10001, "cmeq">; +def : Neon_Scalar3Same_cmp_D_size_patterns; + +class Neon_Scalar3Same_cmp_D_size_v1_patterns + : Pat<(v1i64 (opnode (v1i64 FPR64:$Rn), (v1i64 FPR64:$Rm), CC)), + (INSTD FPR64:$Rn, FPR64:$Rm)>; + +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; + +// Scalar Compare Signed Greather Than Or Equal +def CMGEddd: NeonI_Scalar3Same_D_size<0b0, 0b00111, "cmge">; +def : Neon_Scalar3Same_cmp_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; + +// Scalar Compare Unsigned Higher Or Same +def CMHSddd: NeonI_Scalar3Same_D_size<0b1, 0b00111, "cmhs">; +def : Neon_Scalar3Same_cmp_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; + +// Scalar Compare Unsigned Higher +def CMHIddd: NeonI_Scalar3Same_D_size<0b1, 0b00110, "cmhi">; +def : Neon_Scalar3Same_cmp_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; + +// Scalar Compare Signed Greater Than +def CMGTddd: NeonI_Scalar3Same_D_size<0b0, 0b00110, "cmgt">; +def : Neon_Scalar3Same_cmp_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_v1_patterns; + +// Scalar Compare Bitwise Test Bits +def CMTSTddd: NeonI_Scalar3Same_D_size<0b0, 0b10001, "cmtst">; +def : Neon_Scalar3Same_cmp_D_size_patterns; +def : Neon_Scalar3Same_cmp_D_size_patterns; + +// Scalar Compare Bitwise Equal To Zero +def CMEQddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01001, "cmeq">; +def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; + +// Scalar Compare Signed Greather Than Or Equal To Zero +def CMGEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01000, "cmge">; +def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; + +// Scalar Compare Signed Greater Than Zero +def CMGTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01000, "cmgt">; +def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; + +// Scalar Compare Signed Less Than Or Equal To Zero +def CMLEddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b1, 0b01001, "cmle">; +def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; + +// Scalar Compare Less Than Zero +def CMLTddi: NeonI_Scalar2SameMisc_cmpz_D_size<0b0, 0b01010, "cmlt">; +def : Neon_Scalar2SameMisc_cmpz_D_size_patterns; +def : Neon_Scalar2SameMisc_cmpz_D_V1_size_patterns; + +// Scalar Floating-point Compare + +// Scalar Floating-point Compare Mask Equal +defm FCMEQ: NeonI_Scalar3Same_SD_sizes<0b0, 0b0, 0b11100, "fcmeq">; +defm : Neon_Scalar3Same_cmp_SD_size_patterns; +def : Neon_Scalar3Same_cmp_V1_D_size_patterns; + +// Scalar Floating-point Compare Mask Equal To Zero +defm FCMEQZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01101, "fcmeq">; +defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; +def : Pat<(v1i64 (Neon_cmpz (v1f64 FPR64:$Rn), (f32 fpz32:$FPImm), SETEQ)), + (FCMEQZddi FPR64:$Rn, fpz32:$FPImm)>; + +// Scalar Floating-point Compare Mask Greater Than Or Equal +defm FCMGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11100, "fcmge">; +defm : Neon_Scalar3Same_cmp_SD_size_patterns; +def : Neon_Scalar3Same_cmp_V1_D_size_patterns; + +// Scalar Floating-point Compare Mask Greater Than Or Equal To Zero +defm FCMGEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01100, "fcmge">; +defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; + +// Scalar Floating-point Compare Mask Greather Than +defm FCMGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11100, "fcmgt">; +defm : Neon_Scalar3Same_cmp_SD_size_patterns; +def : Neon_Scalar3Same_cmp_V1_D_size_patterns; + +// Scalar Floating-point Compare Mask Greather Than Zero +defm FCMGTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01100, "fcmgt">; +defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; + +// Scalar Floating-point Compare Mask Less Than Or Equal To Zero +defm FCMLEZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b1, 0b01101, "fcmle">; +defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; + +// Scalar Floating-point Compare Mask Less Than Zero +defm FCMLTZ: NeonI_Scalar2SameMisc_cmpz_SD_size<0b0, 0b01110, "fcmlt">; +defm : Neon_Scalar2SameMisc_cmpz_SD_size_patterns; + +// Scalar Floating-point Absolute Compare Mask Greater Than Or Equal +defm FACGE: NeonI_Scalar3Same_SD_sizes<0b1, 0b0, 0b11101, "facge">; +defm : Neon_Scalar3Same_cmp_SD_size_patterns; + +// Scalar Floating-point Absolute Compare Mask Greater Than +defm FACGT: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11101, "facgt">; +defm : Neon_Scalar3Same_cmp_SD_size_patterns; + +// Scakar Floating-point Absolute Difference +defm FABD: NeonI_Scalar3Same_SD_sizes<0b1, 0b1, 0b11010, "fabd">; +defm : Neon_Scalar3Same_SD_size_patterns; + +// Scalar Absolute Value +defm ABS : NeonI_Scalar2SameMisc_D_size<0b0, 0b01011, "abs">; +defm : Neon_Scalar2SameMisc_D_size_patterns; + +// Scalar Signed Saturating Absolute Value +defm SQABS : NeonI_Scalar2SameMisc_BHSD_size<0b0, 0b00111, "sqabs">; +defm : Neon_Scalar2SameMisc_BHSD_size_patterns; + +// Scalar Negate +defm NEG : NeonI_Scalar2SameMisc_D_size<0b1, 0b01011, "neg">; +defm : Neon_Scalar2SameMisc_D_size_patterns; + +// Scalar Signed Saturating Negate +defm SQNEG : NeonI_Scalar2SameMisc_BHSD_size<0b1, 0b00111, "sqneg">; +defm : Neon_Scalar2SameMisc_BHSD_size_patterns; + +// Scalar Signed Saturating Accumulated of Unsigned Value +defm SUQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b0, 0b00011, "suqadd">; +defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns; + +// Scalar Unsigned Saturating Accumulated of Signed Value +defm USQADD : NeonI_Scalar2SameMisc_accum_BHSD_size<0b1, 0b00011, "usqadd">; +defm : Neon_Scalar2SameMisc_accum_BHSD_size_patterns; + +def : Pat<(v1i64 (int_aarch64_neon_suqadd (v1i64 FPR64:$Src), + (v1i64 FPR64:$Rn))), + (SUQADDdd FPR64:$Src, FPR64:$Rn)>; + +def : Pat<(v1i64 (int_aarch64_neon_usqadd (v1i64 FPR64:$Src), + (v1i64 FPR64:$Rn))), + (USQADDdd FPR64:$Src, FPR64:$Rn)>; + +def : Pat<(v1i64 (int_arm_neon_vabs (v1i64 FPR64:$Rn))), + (ABSdd FPR64:$Rn)>; + +def : Pat<(v1i64 (int_arm_neon_vqabs (v1i64 FPR64:$Rn))), + (SQABSdd FPR64:$Rn)>; + +def : Pat<(v1i64 (int_arm_neon_vqneg (v1i64 FPR64:$Rn))), + (SQNEGdd FPR64:$Rn)>; + +def : Pat<(v1i64 (sub (v1i64 (bitconvert (v8i8 Neon_AllZero))), + (v1i64 FPR64:$Rn))), + (NEGdd FPR64:$Rn)>; + +// Scalar Signed Saturating Extract Unsigned Narrow +defm SQXTUN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10010, "sqxtun">; +defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns; + +// Scalar Signed Saturating Extract Narrow +defm SQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b0, 0b10100, "sqxtn">; +defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns; + +// Scalar Unsigned Saturating Extract Narrow +defm UQXTN : NeonI_Scalar2SameMisc_narrow_HSD_size<0b1, 0b10100, "uqxtn">; +defm : Neon_Scalar2SameMisc_narrow_HSD_size_patterns; + +// Scalar Reduce Pairwise + +multiclass NeonI_ScalarPair_D_sizes opcode, + string asmop, bit Commutable = 0> { + let isCommutable = Commutable in { + def _D_2D : NeonI_ScalarPair; + } +} + +multiclass NeonI_ScalarPair_SD_sizes opcode, + string asmop, bit Commutable = 0> + : NeonI_ScalarPair_D_sizes { + let isCommutable = Commutable in { + def _S_2S : NeonI_ScalarPair; + } +} + +// Scalar Reduce Addition Pairwise (Integer) with +// Pattern to match llvm.arm.* intrinsic +defm ADDPvv : NeonI_ScalarPair_D_sizes<0b0, 0b1, 0b11011, "addp", 0>; + +// Pattern to match llvm.aarch64.* intrinsic for +// Scalar Reduce Addition Pairwise (Integer) +def : Pat<(v1i64 (int_aarch64_neon_vpadd (v2i64 VPR128:$Rn))), + (ADDPvv_D_2D VPR128:$Rn)>; +def : Pat<(v1i64 (int_aarch64_neon_vaddv (v2i64 VPR128:$Rn))), + (ADDPvv_D_2D VPR128:$Rn)>; + +// Scalar Reduce Addition Pairwise (Floating Point) +defm FADDPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01101, "faddp", 0>; + +// Scalar Reduce Maximum Pairwise (Floating Point) +defm FMAXPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01111, "fmaxp", 0>; + +// Scalar Reduce Minimum Pairwise (Floating Point) +defm FMINPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01111, "fminp", 0>; + +// Scalar Reduce maxNum Pairwise (Floating Point) +defm FMAXNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b0, 0b01100, "fmaxnmp", 0>; + +// Scalar Reduce minNum Pairwise (Floating Point) +defm FMINNMPvv : NeonI_ScalarPair_SD_sizes<0b1, 0b1, 0b01100, "fminnmp", 0>; + +multiclass Neon_ScalarPair_SD_size_patterns { + def : Pat<(v1f32 (opnodeS (v2f32 VPR64:$Rn))), + (INSTS VPR64:$Rn)>; + def : Pat<(v1f64 (opnodeD (v2f64 VPR128:$Rn))), + (INSTD VPR128:$Rn)>; +} + +// Patterns to match llvm.aarch64.* intrinsic for +// Scalar Reduce Add, Max, Min, MaxiNum, MinNum Pairwise (Floating Point) +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +def : Pat<(v1f32 (int_aarch64_neon_vaddv (v4f32 VPR128:$Rn))), + (FADDPvv_S_2S (v2f32 + (EXTRACT_SUBREG + (v4f32 (FADDP_4S (v4f32 VPR128:$Rn), (v4f32 VPR128:$Rn))), + sub_64)))>; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +defm : Neon_ScalarPair_SD_size_patterns; + +// Scalar by element Arithmetic + +class NeonI_ScalarXIndexedElemArith opcode, + string rmlane, bit u, bit szhi, bit szlo, + RegisterClass ResFPR, RegisterClass OpFPR, + RegisterOperand OpVPR, Operand OpImm> + : NeonI_ScalarXIndexedElem { + bits<3> Imm; + bits<5> MRm; +} + +class NeonI_ScalarXIndexedElemArith_Constraint_Impl opcode, + string rmlane, + bit u, bit szhi, bit szlo, + RegisterClass ResFPR, + RegisterClass OpFPR, + RegisterOperand OpVPR, + Operand OpImm> + : NeonI_ScalarXIndexedElem { + let Constraints = "$src = $Rd"; + bits<3> Imm; + bits<5> MRm; +} + +// Scalar Floating Point multiply (scalar, by element) +def FMULssv_4S : NeonI_ScalarXIndexedElemArith<"fmul", + 0b1001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMULddv_2D : NeonI_ScalarXIndexedElemArith<"fmul", + 0b1001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} + +// Scalar Floating Point multiply extended (scalar, by element) +def FMULXssv_4S : NeonI_ScalarXIndexedElemArith<"fmulx", + 0b1001, ".s", 0b1, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMULXddv_2D : NeonI_ScalarXIndexedElemArith<"fmulx", + 0b1001, ".d", 0b1, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} + +multiclass Neon_ScalarXIndexedElem_MUL_MULX_Patterns< + SDPatternOperator opnode, + Instruction INST, + ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm, + ValueType OpNTy, ValueType ExTy, Operand OpNImm> { + + def : Pat<(ResTy (opnode (ResTy FPRC:$Rn), + (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)))), + (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; + + def : Pat<(ResTy (opnode (ResTy FPRC:$Rn), + (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)))), + (ResTy (INST (ResTy FPRC:$Rn), + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), + OpNImm:$Imm))>; + + // swapped operands + def : Pat<(ResTy (opnode + (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), + (ResTy FPRC:$Rn))), + (ResTy (INST (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; + + def : Pat<(ResTy (opnode + (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), + (ResTy FPRC:$Rn))), + (ResTy (INST (ResTy FPRC:$Rn), + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), + OpNImm:$Imm))>; +} + +// Patterns for Scalar Floating Point multiply (scalar, by element) +defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; + +// Patterns for Scalar Floating Point multiply extended (scalar, by element) +defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_MULX_Patterns; + + +// Scalar Floating Point fused multiply-add (scalar, by element) +def FMLAssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", + 0b0001, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMLAddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmla", + 0b0001, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} + +// Scalar Floating Point fused multiply-subtract (scalar, by element) +def FMLSssv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", + 0b0101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def FMLSddv_2D : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"fmls", + 0b0101, ".d", 0b0, 0b1, 0b1, FPR64, FPR64, VPR128, neon_uimm1_bare> { + let Inst{11} = Imm{0}; // h + let Inst{21} = 0b0; // l + let Inst{20-16} = MRm; +} +// We are allowed to match the fma instruction regardless of compile options. +multiclass Neon_ScalarXIndexedElem_FMA_Patterns< + Instruction FMLAI, Instruction FMLSI, + ValueType ResTy, RegisterClass FPRC, ValueType OpTy, Operand OpImm, + ValueType OpNTy, ValueType ExTy, Operand OpNImm> { + // fmla + def : Pat<(ResTy (fma (ResTy FPRC:$Rn), + (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), + (ResTy FPRC:$Ra))), + (ResTy (FMLAI (ResTy FPRC:$Ra), + (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; + + def : Pat<(ResTy (fma (ResTy FPRC:$Rn), + (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), + (ResTy FPRC:$Ra))), + (ResTy (FMLAI (ResTy FPRC:$Ra), + (ResTy FPRC:$Rn), + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), + OpNImm:$Imm))>; + + // swapped fmla operands + def : Pat<(ResTy (fma + (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm)), + (ResTy FPRC:$Rn), + (ResTy FPRC:$Ra))), + (ResTy (FMLAI (ResTy FPRC:$Ra), + (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; + + def : Pat<(ResTy (fma + (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm)), + (ResTy FPRC:$Rn), + (ResTy FPRC:$Ra))), + (ResTy (FMLAI (ResTy FPRC:$Ra), + (ResTy FPRC:$Rn), + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), + OpNImm:$Imm))>; + + // fmls + def : Pat<(ResTy (fma (ResTy FPRC:$Rn), + (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))), + (ResTy FPRC:$Ra))), + (ResTy (FMLSI (ResTy FPRC:$Ra), + (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; + + def : Pat<(ResTy (fma (ResTy FPRC:$Rn), + (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))), + (ResTy FPRC:$Ra))), + (ResTy (FMLSI (ResTy FPRC:$Ra), + (ResTy FPRC:$Rn), + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), + OpNImm:$Imm))>; + + // swapped fmls operands + def : Pat<(ResTy (fma + (fneg (ResTy (vector_extract (OpTy VPR128:$MRm), OpImm:$Imm))), + (ResTy FPRC:$Rn), + (ResTy FPRC:$Ra))), + (ResTy (FMLSI (ResTy FPRC:$Ra), + (ResTy FPRC:$Rn), (OpTy VPR128:$MRm), OpImm:$Imm))>; + + def : Pat<(ResTy (fma + (fneg (ResTy (vector_extract (OpNTy VPR64:$MRm), OpNImm:$Imm))), + (ResTy FPRC:$Rn), + (ResTy FPRC:$Ra))), + (ResTy (FMLSI (ResTy FPRC:$Ra), + (ResTy FPRC:$Rn), + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$MRm, sub_64)), + OpNImm:$Imm))>; +} + +// Scalar Floating Point fused multiply-add and +// multiply-subtract (scalar, by element) +defm : Neon_ScalarXIndexedElem_FMA_Patterns; +defm : Neon_ScalarXIndexedElem_FMA_Patterns; +defm : Neon_ScalarXIndexedElem_FMA_Patterns; + +// Scalar Signed saturating doubling multiply long (scalar, by element) +def SQDMULLshv_4H : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULLshv_8H : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULLdsv_2S : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMULLdsv_4S : NeonI_ScalarXIndexedElemArith<"sqdmull", + 0b1011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +multiclass Neon_ScalarXIndexedElem_MUL_Patterns< + SDPatternOperator opnode, + Instruction INST, + ValueType ResTy, RegisterClass FPRC, + ValueType OpVTy, ValueType OpTy, + ValueType VecOpTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { + + def : Pat<(ResTy (opnode (OpVTy FPRC:$Rn), + (OpVTy (scalar_to_vector + (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))))), + (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; + + //swapped operands + def : Pat<(ResTy (opnode + (OpVTy (scalar_to_vector + (ExTy (vector_extract (VecOpTy VPRC:$MRm), OpImm:$Imm)))), + (OpVTy FPRC:$Rn))), + (ResTy (INST (OpVTy FPRC:$Rn), (VecOpTy VPRC:$MRm), OpImm:$Imm))>; +} + + +// Patterns for Scalar Signed saturating doubling +// multiply long (scalar, by element) +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; + +// Scalar Signed saturating doubling multiply-add long (scalar, by element) +def SQDMLALshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLALshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLALdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMLALdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlal", + 0b0011, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +// Scalar Signed saturating doubling +// multiply-subtract long (scalar, by element) +def SQDMLSLshv_4H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLSLshv_8H : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".h", 0b0, 0b0, 0b1, FPR32, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMLSLdsv_2S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMLSLdsv_4S : NeonI_ScalarXIndexedElemArith_Constraint_Impl<"sqdmlsl", + 0b0111, ".s", 0b0, 0b1, 0b0, FPR64, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +multiclass Neon_ScalarXIndexedElem_MLAL_Patterns< + SDPatternOperator opnode, + SDPatternOperator coreopnode, + Instruction INST, + ValueType ResTy, RegisterClass ResFPRC, RegisterClass FPRC, + ValueType OpTy, + ValueType OpVTy, ValueType ExTy, RegisterOperand VPRC, Operand OpImm> { + + def : Pat<(ResTy (opnode + (ResTy ResFPRC:$Ra), + (ResTy (coreopnode (OpTy FPRC:$Rn), + (OpTy (scalar_to_vector + (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))))))), + (ResTy (INST (ResTy ResFPRC:$Ra), + (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; + + // swapped operands + def : Pat<(ResTy (opnode + (ResTy ResFPRC:$Ra), + (ResTy (coreopnode + (OpTy (scalar_to_vector + (ExTy (vector_extract (OpVTy VPRC:$MRm), OpImm:$Imm)))), + (OpTy FPRC:$Rn))))), + (ResTy (INST (ResTy ResFPRC:$Ra), + (OpTy FPRC:$Rn), (OpVTy VPRC:$MRm), OpImm:$Imm))>; +} + +// Patterns for Scalar Signed saturating +// doubling multiply-add long (scalar, by element) +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; + +// Patterns for Scalar Signed saturating +// doubling multiply-sub long (scalar, by element) +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; +defm : Neon_ScalarXIndexedElem_MLAL_Patterns; + +// Scalar general arithmetic operation +class Neon_Scalar_GeneralMath2D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn))), (INST FPR64:$Rn)>; + +class Neon_Scalar_GeneralMath3D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), + (INST FPR64:$Rn, FPR64:$Rm)>; + +class Neon_Scalar_GeneralMath4D_pattern + : Pat<(v1f64 (opnode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm), + (v1f64 FPR64:$Ra))), + (INST FPR64:$Rn, FPR64:$Rm, FPR64:$Ra)>; + +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; +def : Neon_Scalar_GeneralMath3D_pattern; + +def : Neon_Scalar_GeneralMath2D_pattern; +def : Neon_Scalar_GeneralMath2D_pattern; + +def : Neon_Scalar_GeneralMath4D_pattern; +def : Neon_Scalar_GeneralMath4D_pattern; + +// Scalar Signed saturating doubling multiply returning +// high half (scalar, by element) +def SQDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqdmulh", + 0b1100, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +// Patterns for Scalar Signed saturating doubling multiply returning +// high half (scalar, by element) +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; + +// Scalar Signed saturating rounding doubling multiply +// returning high half (scalar, by element) +def SQRDMULHhhv_4H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR64Lo, neon_uimm2_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQRDMULHhhv_8H : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".h", 0b0, 0b0, 0b1, FPR16, FPR16, VPR128Lo, neon_uimm3_bare> { + let Inst{11} = Imm{2}; // h + let Inst{21} = Imm{1}; // l + let Inst{20} = Imm{0}; // m + let Inst{19-16} = MRm{3-0}; +} +def SQRDMULHssv_2S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR64, neon_uimm1_bare> { + let Inst{11} = 0b0; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} +def SQRDMULHssv_4S : NeonI_ScalarXIndexedElemArith<"sqrdmulh", + 0b1101, ".s", 0b0, 0b1, 0b0, FPR32, FPR32, VPR128, neon_uimm2_bare> { + let Inst{11} = Imm{1}; // h + let Inst{21} = Imm{0}; // l + let Inst{20-16} = MRm; +} + +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; +defm : Neon_ScalarXIndexedElem_MUL_Patterns; + +// Scalar Copy - DUP element to scalar +class NeonI_Scalar_DUP + : NeonI_ScalarCopy<(outs ResRC:$Rd), (ins VPRC:$Rn, OpImm:$Imm), + asmop # "\t$Rd, $Rn." # asmlane # "[$Imm]", + [], + NoItinerary> { + bits<4> Imm; +} + +def DUPbv_B : NeonI_Scalar_DUP<"dup", "b", FPR8, VPR128, neon_uimm4_bare> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def DUPhv_H : NeonI_Scalar_DUP<"dup", "h", FPR16, VPR128, neon_uimm3_bare> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def DUPsv_S : NeonI_Scalar_DUP<"dup", "s", FPR32, VPR128, neon_uimm2_bare> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} +def DUPdv_D : NeonI_Scalar_DUP<"dup", "d", FPR64, VPR128, neon_uimm1_bare> { + let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; +} + +multiclass NeonI_Scalar_DUP_Elt_pattern { + def : Pat<(ResTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), + (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; + + def : Pat<(ResTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), + (ResTy (DUPI + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + OpNImm:$Imm))>; +} + +// Patterns for vector extract of FP data using scalar DUP instructions +defm : NeonI_Scalar_DUP_Elt_pattern; +defm : NeonI_Scalar_DUP_Elt_pattern; + +multiclass NeonI_Scalar_DUP_Ext_Vec_pattern { + + def : Pat<(ResTy (extract_subvector (OpTy VPR128:$Rn), OpLImm:$Imm)), + (ResTy (DUPI VPR128:$Rn, OpLImm:$Imm))>; + + def : Pat<(ResTy (extract_subvector (NOpTy VPR64:$Rn), OpNImm:$Imm)), + (ResTy (DUPI + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + OpNImm:$Imm))>; +} + +// Patterns for extract subvectors of v1ix data using scalar DUP instructions. +defm : NeonI_Scalar_DUP_Ext_Vec_pattern; +defm : NeonI_Scalar_DUP_Ext_Vec_pattern; +defm : NeonI_Scalar_DUP_Ext_Vec_pattern; + +multiclass NeonI_Scalar_DUP_Copy_pattern1 { + + def : Pat<(ResTy (vector_insert (ResTy undef), + (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)), + (neon_uimm0_bare:$Imm))), + (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; + + def : Pat<(ResTy (vector_insert (ResTy undef), + (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)), + (OpNImm:$Imm))), + (ResTy (DUPI + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + OpNImm:$Imm))>; +} + +multiclass NeonI_Scalar_DUP_Copy_pattern2 { + + def : Pat<(ResTy (scalar_to_vector + (ElemTy (vector_extract (OpTy VPR128:$Rn), OpImm:$Imm)))), + (ResTy (DUPI (OpTy VPR128:$Rn), OpImm:$Imm))>; + + def : Pat<(ResTy (scalar_to_vector + (ElemTy (vector_extract (OpNTy VPR64:$Rn), OpNImm:$Imm)))), + (ResTy (DUPI + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + OpNImm:$Imm))>; +} + +// Patterns for vector copy to v1ix and v1fx vectors using scalar DUP +// instructions. +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern1; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; +defm : NeonI_Scalar_DUP_Copy_pattern2; + +multiclass NeonI_Scalar_DUP_alias { + def : NeonInstAlias; +} + +// Aliases for Scalar copy - DUP element (scalar) +// FIXME: This is actually the preferred syntax but TableGen can't deal with +// custom printing of aliases. +defm : NeonI_Scalar_DUP_alias<"mov", ".b", DUPbv_B, neon_uimm4_bare, FPR8>; +defm : NeonI_Scalar_DUP_alias<"mov", ".h", DUPhv_H, neon_uimm3_bare, FPR16>; +defm : NeonI_Scalar_DUP_alias<"mov", ".s", DUPsv_S, neon_uimm2_bare, FPR32>; +defm : NeonI_Scalar_DUP_alias<"mov", ".d", DUPdv_D, neon_uimm1_bare, FPR64>; + +multiclass NeonI_SDUP { + def : Pat<(ResTy (GetLow VPR128:$Rn)), + (ResTy (EXTRACT_SUBREG (OpTy VPR128:$Rn), sub_64))>; + def : Pat<(ResTy (GetHigh VPR128:$Rn)), + (ResTy (DUPdv_D (OpTy VPR128:$Rn), 1))>; +} + +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; +defm : NeonI_SDUP; + +//===----------------------------------------------------------------------===// +// Non-Instruction Patterns +//===----------------------------------------------------------------------===// + +// 64-bit vector bitcasts... + +def : Pat<(v1i64 (bitconvert (v8i8 VPR64:$src))), (v1i64 VPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v8i8 VPR64:$src))), (v2f32 VPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v8i8 VPR64:$src))), (v2i32 VPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v8i8 VPR64:$src))), (v4i16 VPR64:$src)>; + +def : Pat<(v1i64 (bitconvert (v4i16 VPR64:$src))), (v1i64 VPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v4i16 VPR64:$src))), (v2i32 VPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v4i16 VPR64:$src))), (v2f32 VPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v4i16 VPR64:$src))), (v8i8 VPR64:$src)>; + +def : Pat<(v1i64 (bitconvert (v2i32 VPR64:$src))), (v1i64 VPR64:$src)>; +def : Pat<(v2f32 (bitconvert (v2i32 VPR64:$src))), (v2f32 VPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2i32 VPR64:$src))), (v4i16 VPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v2i32 VPR64:$src))), (v8i8 VPR64:$src)>; + +def : Pat<(v1i64 (bitconvert (v2f32 VPR64:$src))), (v1i64 VPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v2f32 VPR64:$src))), (v2i32 VPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v2f32 VPR64:$src))), (v4i16 VPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v2f32 VPR64:$src))), (v8i8 VPR64:$src)>; + +def : Pat<(v2f32 (bitconvert (v1i64 VPR64:$src))), (v2f32 VPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; +def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; + +// ..and 128-bit vector bitcasts... + +def : Pat<(v2f64 (bitconvert (v16i8 VPR128:$src))), (v2f64 VPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v16i8 VPR128:$src))), (v2i64 VPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v16i8 VPR128:$src))), (v4f32 VPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v16i8 VPR128:$src))), (v4i32 VPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v16i8 VPR128:$src))), (v8i16 VPR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v8i16 VPR128:$src))), (v2f64 VPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v8i16 VPR128:$src))), (v2i64 VPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v8i16 VPR128:$src))), (v4i32 VPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v8i16 VPR128:$src))), (v4f32 VPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v8i16 VPR128:$src))), (v16i8 VPR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v4i32 VPR128:$src))), (v2f64 VPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4i32 VPR128:$src))), (v2i64 VPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v4i32 VPR128:$src))), (v4f32 VPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4i32 VPR128:$src))), (v8i16 VPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4i32 VPR128:$src))), (v16i8 VPR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v4f32 VPR128:$src))), (v2f64 VPR128:$src)>; +def : Pat<(v2i64 (bitconvert (v4f32 VPR128:$src))), (v2i64 VPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v4f32 VPR128:$src))), (v4i32 VPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v4f32 VPR128:$src))), (v8i16 VPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v4f32 VPR128:$src))), (v16i8 VPR128:$src)>; + +def : Pat<(v2f64 (bitconvert (v2i64 VPR128:$src))), (v2f64 VPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2i64 VPR128:$src))), (v4f32 VPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2i64 VPR128:$src))), (v4i32 VPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2i64 VPR128:$src))), (v8i16 VPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2i64 VPR128:$src))), (v16i8 VPR128:$src)>; + +def : Pat<(v2i64 (bitconvert (v2f64 VPR128:$src))), (v2i64 VPR128:$src)>; +def : Pat<(v4f32 (bitconvert (v2f64 VPR128:$src))), (v4f32 VPR128:$src)>; +def : Pat<(v4i32 (bitconvert (v2f64 VPR128:$src))), (v4i32 VPR128:$src)>; +def : Pat<(v8i16 (bitconvert (v2f64 VPR128:$src))), (v8i16 VPR128:$src)>; +def : Pat<(v16i8 (bitconvert (v2f64 VPR128:$src))), (v16i8 VPR128:$src)>; + +// ...and scalar bitcasts... +def : Pat<(f16 (bitconvert (v1i16 FPR16:$src))), (f16 FPR16:$src)>; +def : Pat<(f32 (bitconvert (v1i32 FPR32:$src))), (f32 FPR32:$src)>; +def : Pat<(f64 (bitconvert (v1i64 FPR64:$src))), (f64 FPR64:$src)>; +def : Pat<(f32 (bitconvert (v1f32 FPR32:$src))), (f32 FPR32:$src)>; +def : Pat<(f64 (bitconvert (v1f64 FPR64:$src))), (f64 FPR64:$src)>; + +def : Pat<(i64 (bitconvert (v1i64 FPR64:$src))), (FMOVxd $src)>; +def : Pat<(i64 (bitconvert (v1f64 FPR64:$src))), (FMOVxd $src)>; +def : Pat<(i64 (bitconvert (v2i32 FPR64:$src))), (FMOVxd $src)>; +def : Pat<(i64 (bitconvert (v2f32 FPR64:$src))), (FMOVxd $src)>; +def : Pat<(i64 (bitconvert (v4i16 FPR64:$src))), (FMOVxd $src)>; +def : Pat<(i64 (bitconvert (v8i8 FPR64:$src))), (FMOVxd $src)>; + +def : Pat<(i32 (bitconvert (v1i32 FPR32:$src))), (FMOVws $src)>; + +def : Pat<(v8i8 (bitconvert (v1i64 VPR64:$src))), (v8i8 VPR64:$src)>; +def : Pat<(v4i16 (bitconvert (v1i64 VPR64:$src))), (v4i16 VPR64:$src)>; +def : Pat<(v2i32 (bitconvert (v1i64 VPR64:$src))), (v2i32 VPR64:$src)>; + +def : Pat<(f64 (bitconvert (v8i8 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v4i16 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v2i32 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v2f32 VPR64:$src))), (f64 VPR64:$src)>; +def : Pat<(f64 (bitconvert (v1i64 VPR64:$src))), (f64 VPR64:$src)>; + +def : Pat<(f128 (bitconvert (v16i8 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v8i16 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v4i32 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v2i64 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v4f32 VPR128:$src))), (f128 VPR128:$src)>; +def : Pat<(f128 (bitconvert (v2f64 VPR128:$src))), (f128 VPR128:$src)>; + +def : Pat<(v1i16 (bitconvert (f16 FPR16:$src))), (v1i16 FPR16:$src)>; +def : Pat<(v1i32 (bitconvert (f32 FPR32:$src))), (v1i32 FPR32:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; +def : Pat<(v1f32 (bitconvert (f32 FPR32:$src))), (v1f32 FPR32:$src)>; +def : Pat<(v1f64 (bitconvert (f64 FPR64:$src))), (v1f64 FPR64:$src)>; + +def : Pat<(v1i64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; +def : Pat<(v1f64 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; +def : Pat<(v2i32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; +def : Pat<(v2f32 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; +def : Pat<(v4i16 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; +def : Pat<(v8i8 (bitconvert (i64 GPR64:$src))), (FMOVdx $src)>; + +def : Pat<(v1i32 (bitconvert (i32 GPR32:$src))), (FMOVsw $src)>; + +def : Pat<(v8i8 (bitconvert (f64 FPR64:$src))), (v8i8 FPR64:$src)>; +def : Pat<(v4i16 (bitconvert (f64 FPR64:$src))), (v4i16 FPR64:$src)>; +def : Pat<(v2i32 (bitconvert (f64 FPR64:$src))), (v2i32 FPR64:$src)>; +def : Pat<(v2f32 (bitconvert (f64 FPR64:$src))), (v2f32 FPR64:$src)>; +def : Pat<(v1i64 (bitconvert (f64 FPR64:$src))), (v1i64 FPR64:$src)>; + +def : Pat<(v16i8 (bitconvert (f128 FPR128:$src))), (v16i8 FPR128:$src)>; +def : Pat<(v8i16 (bitconvert (f128 FPR128:$src))), (v8i16 FPR128:$src)>; +def : Pat<(v4i32 (bitconvert (f128 FPR128:$src))), (v4i32 FPR128:$src)>; +def : Pat<(v2i64 (bitconvert (f128 FPR128:$src))), (v2i64 FPR128:$src)>; +def : Pat<(v4f32 (bitconvert (f128 FPR128:$src))), (v4f32 FPR128:$src)>; +def : Pat<(v2f64 (bitconvert (f128 FPR128:$src))), (v2f64 FPR128:$src)>; + +// Scalar Three Same + +def neon_uimm3 : Operand, + ImmLeaf { + let ParserMatchClass = uimm3_asmoperand; + let PrintMethod = "printUImmHexOperand"; +} + +def neon_uimm4 : Operand, + ImmLeaf { + let ParserMatchClass = uimm4_asmoperand; + let PrintMethod = "printUImmHexOperand"; +} + +// Bitwise Extract +class NeonI_Extract op2, string asmop, + string OpS, RegisterOperand OpVPR, Operand OpImm> + : NeonI_BitExtract{ + bits<4> Index; +} + +def EXTvvvi_8b : NeonI_Extract<0b0, 0b00, "ext", "8b", + VPR64, neon_uimm3> { + let Inst{14-11} = {0b0, Index{2}, Index{1}, Index{0}}; +} + +def EXTvvvi_16b: NeonI_Extract<0b1, 0b00, "ext", "16b", + VPR128, neon_uimm4> { + let Inst{14-11} = Index; +} + +class NI_Extract + : Pat<(OpTy (Neon_vextract (OpTy OpVPR:$Rn), (OpTy OpVPR:$Rm), + (i64 OpImm:$Imm))), + (INST OpVPR:$Rn, OpVPR:$Rm, OpImm:$Imm)>; + +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; +def : NI_Extract; + +// Table lookup +class NI_TBL op2, bits<2> len, bit op, + string asmop, string OpS, RegisterOperand OpVPR, + RegisterOperand VecList> + : NeonI_TBL; + +// The vectors in look up table are always 16b +multiclass NI_TBL_pat len, bit op, string asmop, string List> { + def _8b : NI_TBL<0, 0b00, len, op, asmop, "8b", VPR64, + !cast(List # "16B_operand")>; + + def _16b : NI_TBL<1, 0b00, len, op, asmop, "16b", VPR128, + !cast(List # "16B_operand")>; +} + +defm TBL1 : NI_TBL_pat<0b00, 0b0, "tbl", "VOne">; +defm TBL2 : NI_TBL_pat<0b01, 0b0, "tbl", "VPair">; +defm TBL3 : NI_TBL_pat<0b10, 0b0, "tbl", "VTriple">; +defm TBL4 : NI_TBL_pat<0b11, 0b0, "tbl", "VQuad">; + +// Table lookup extention +class NI_TBX op2, bits<2> len, bit op, + string asmop, string OpS, RegisterOperand OpVPR, + RegisterOperand VecList> + : NeonI_TBL { + let Constraints = "$src = $Rd"; +} + +// The vectors in look up table are always 16b +multiclass NI_TBX_pat len, bit op, string asmop, string List> { + def _8b : NI_TBX<0, 0b00, len, op, asmop, "8b", VPR64, + !cast(List # "16B_operand")>; + + def _16b : NI_TBX<1, 0b00, len, op, asmop, "16b", VPR128, + !cast(List # "16B_operand")>; +} + +defm TBX1 : NI_TBX_pat<0b00, 0b1, "tbx", "VOne">; +defm TBX2 : NI_TBX_pat<0b01, 0b1, "tbx", "VPair">; +defm TBX3 : NI_TBX_pat<0b10, 0b1, "tbx", "VTriple">; +defm TBX4 : NI_TBX_pat<0b11, 0b1, "tbx", "VQuad">; + +class NeonI_INS_main + : NeonI_copy<0b1, 0b0, 0b0011, + (outs VPR128:$Rd), (ins VPR128:$src, OpGPR:$Rn, OpImm:$Imm), + asmop # "\t$Rd." # Res # "[$Imm], $Rn", + [(set (ResTy VPR128:$Rd), + (ResTy (vector_insert + (ResTy VPR128:$src), + (OpTy OpGPR:$Rn), + (OpImm:$Imm))))], + NoItinerary> { + bits<4> Imm; + let Constraints = "$src = $Rd"; +} + +//Insert element (vector, from main) +def INSbw : NeonI_INS_main<"ins", "b", v16i8, GPR32, i32, + neon_uimm4_bare> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def INShw : NeonI_INS_main<"ins", "h", v8i16, GPR32, i32, + neon_uimm3_bare> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def INSsw : NeonI_INS_main<"ins", "s", v4i32, GPR32, i32, + neon_uimm2_bare> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} +def INSdx : NeonI_INS_main<"ins", "d", v2i64, GPR64, i64, + neon_uimm1_bare> { + let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; +} + +def : NeonInstAlias<"mov $Rd.b[$Imm], $Rn", + (INSbw VPR128:$Rd, GPR32:$Rn, neon_uimm4_bare:$Imm), 0>; +def : NeonInstAlias<"mov $Rd.h[$Imm], $Rn", + (INShw VPR128:$Rd, GPR32:$Rn, neon_uimm3_bare:$Imm), 0>; +def : NeonInstAlias<"mov $Rd.s[$Imm], $Rn", + (INSsw VPR128:$Rd, GPR32:$Rn, neon_uimm2_bare:$Imm), 0>; +def : NeonInstAlias<"mov $Rd.d[$Imm], $Rn", + (INSdx VPR128:$Rd, GPR64:$Rn, neon_uimm1_bare:$Imm), 0>; + +class Neon_INS_main_pattern + : Pat<(ResTy (vector_insert + (ResTy VPR64:$src), + (OpTy OpGPR:$Rn), + (OpImm:$Imm))), + (ResTy (EXTRACT_SUBREG + (ExtResTy (INS (ExtResTy (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), + OpGPR:$Rn, OpImm:$Imm)), sub_64))>; + +def INSbw_pattern : Neon_INS_main_pattern; +def INShw_pattern : Neon_INS_main_pattern; +def INSsw_pattern : Neon_INS_main_pattern; +def INSdx_pattern : Neon_INS_main_pattern; + +class NeonI_INS_element + : NeonI_insert<0b1, 0b1, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn, + ResImm:$Immd, ResImm:$Immn), + asmop # "\t$Rd." # Res # "[$Immd], $Rn." # Res # "[$Immn]", + [], + NoItinerary> { + let Constraints = "$src = $Rd"; + bits<4> Immd; + bits<4> Immn; +} + +//Insert element (vector, from element) +def INSELb : NeonI_INS_element<"ins", "b", neon_uimm4_bare> { + let Inst{20-16} = {Immd{3}, Immd{2}, Immd{1}, Immd{0}, 0b1}; + let Inst{14-11} = {Immn{3}, Immn{2}, Immn{1}, Immn{0}}; +} +def INSELh : NeonI_INS_element<"ins", "h", neon_uimm3_bare> { + let Inst{20-16} = {Immd{2}, Immd{1}, Immd{0}, 0b1, 0b0}; + let Inst{14-11} = {Immn{2}, Immn{1}, Immn{0}, 0b0}; + // bit 11 is unspecified, but should be set to zero. +} +def INSELs : NeonI_INS_element<"ins", "s", neon_uimm2_bare> { + let Inst{20-16} = {Immd{1}, Immd{0}, 0b1, 0b0, 0b0}; + let Inst{14-11} = {Immn{1}, Immn{0}, 0b0, 0b0}; + // bits 11-12 are unspecified, but should be set to zero. +} +def INSELd : NeonI_INS_element<"ins", "d", neon_uimm1_bare> { + let Inst{20-16} = {Immd, 0b1, 0b0, 0b0, 0b0}; + let Inst{14-11} = {Immn{0}, 0b0, 0b0, 0b0}; + // bits 11-13 are unspecified, but should be set to zero. +} + +def : NeonInstAlias<"mov $Rd.b[$Immd], $Rn.b[$Immn]", + (INSELb VPR128:$Rd, VPR128:$Rn, + neon_uimm4_bare:$Immd, neon_uimm4_bare:$Immn), 0>; +def : NeonInstAlias<"mov $Rd.h[$Immd], $Rn.h[$Immn]", + (INSELh VPR128:$Rd, VPR128:$Rn, + neon_uimm3_bare:$Immd, neon_uimm3_bare:$Immn), 0>; +def : NeonInstAlias<"mov $Rd.s[$Immd], $Rn.s[$Immn]", + (INSELs VPR128:$Rd, VPR128:$Rn, + neon_uimm2_bare:$Immd, neon_uimm2_bare:$Immn), 0>; +def : NeonInstAlias<"mov $Rd.d[$Immd], $Rn.d[$Immn]", + (INSELd VPR128:$Rd, VPR128:$Rn, + neon_uimm1_bare:$Immd, neon_uimm1_bare:$Immn), 0>; + +multiclass Neon_INS_elt_pattern { +def : Pat<(ResTy (vector_insert + (ResTy VPR128:$src), + (MidTy (vector_extract + (ResTy VPR128:$Rn), + (StImm:$Immn))), + (StImm:$Immd))), + (INS (ResTy VPR128:$src), (ResTy VPR128:$Rn), + StImm:$Immd, StImm:$Immn)>; + +def : Pat <(ResTy (vector_insert + (ResTy VPR128:$src), + (MidTy (vector_extract + (NaTy VPR64:$Rn), + (NaImm:$Immn))), + (StImm:$Immd))), + (INS (ResTy VPR128:$src), + (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)), + StImm:$Immd, NaImm:$Immn)>; + +def : Pat <(NaTy (vector_insert + (NaTy VPR64:$src), + (MidTy (vector_extract + (ResTy VPR128:$Rn), + (StImm:$Immn))), + (NaImm:$Immd))), + (NaTy (EXTRACT_SUBREG + (ResTy (INS + (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), + (ResTy VPR128:$Rn), + NaImm:$Immd, StImm:$Immn)), + sub_64))>; + +def : Pat <(NaTy (vector_insert + (NaTy VPR64:$src), + (MidTy (vector_extract + (NaTy VPR64:$Rn), + (NaImm:$Immn))), + (NaImm:$Immd))), + (NaTy (EXTRACT_SUBREG + (ResTy (INS + (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), + (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$Rn), sub_64)), + NaImm:$Immd, NaImm:$Immn)), + sub_64))>; +} + +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; +defm : Neon_INS_elt_pattern; + +multiclass Neon_INS_elt_float_pattern { +def : Pat <(ResTy (vector_insert + (ResTy VPR128:$src), + (MidTy OpFPR:$Rn), + (ResImm:$Imm))), + (INS (ResTy VPR128:$src), + (ResTy (SUBREG_TO_REG (i64 0), OpFPR:$Rn, SubIndex)), + ResImm:$Imm, + (i64 0))>; + +def : Pat <(NaTy (vector_insert + (NaTy VPR64:$src), + (MidTy OpFPR:$Rn), + (ResImm:$Imm))), + (NaTy (EXTRACT_SUBREG + (ResTy (INS + (ResTy (SUBREG_TO_REG (i64 0), (NaTy VPR64:$src), sub_64)), + (ResTy (SUBREG_TO_REG (i64 0), (MidTy OpFPR:$Rn), SubIndex)), + ResImm:$Imm, + (i64 0))), + sub_64))>; +} + +defm : Neon_INS_elt_float_pattern; +defm : Neon_INS_elt_float_pattern; + +class NeonI_SMOV + : NeonI_copy { + bits<4> Imm; +} + +//Signed integer move (main, from element) +def SMOVwb : NeonI_SMOV<"smov", "b", 0b0, v16i8, i8, neon_uimm4_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def SMOVwh : NeonI_SMOV<"smov", "h", 0b0, v8i16, i16, neon_uimm3_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def SMOVxb : NeonI_SMOV<"smov", "b", 0b1, v16i8, i8, neon_uimm4_bare, + GPR64, i64> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def SMOVxh : NeonI_SMOV<"smov", "h", 0b1, v8i16, i16, neon_uimm3_bare, + GPR64, i64> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def SMOVxs : NeonI_SMOV<"smov", "s", 0b1, v4i32, i32, neon_uimm2_bare, + GPR64, i64> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} + +multiclass Neon_SMOVx_pattern { + def : Pat<(i64 (sext_inreg + (i64 (anyext + (i32 (vector_extract + (StTy VPR128:$Rn), (StImm:$Imm))))), + eleTy)), + (SMOVI VPR128:$Rn, StImm:$Imm)>; + + def : Pat<(i64 (sext + (i32 (vector_extract + (StTy VPR128:$Rn), (StImm:$Imm))))), + (SMOVI VPR128:$Rn, StImm:$Imm)>; + + def : Pat<(i64 (sext_inreg + (i64 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))), + eleTy)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + + def : Pat<(i64 (sext_inreg + (i64 (anyext + (i32 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))))), + eleTy)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + + def : Pat<(i64 (sext + (i32 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))))), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; +} + +defm : Neon_SMOVx_pattern; +defm : Neon_SMOVx_pattern; +defm : Neon_SMOVx_pattern; + +class Neon_SMOVw_pattern + : Pat<(i32 (sext_inreg + (i32 (vector_extract + (NaTy VPR64:$Rn), (NaImm:$Imm))), + eleTy)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + +def : Neon_SMOVw_pattern; +def : Neon_SMOVw_pattern; + +class NeonI_UMOV + : NeonI_copy { + bits<4> Imm; +} + +//Unsigned integer move (main, from element) +def UMOVwb : NeonI_UMOV<"umov", "b", 0b0, v16i8, neon_uimm4_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} +def UMOVwh : NeonI_UMOV<"umov", "h", 0b0, v8i16, neon_uimm3_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} +def UMOVws : NeonI_UMOV<"umov", "s", 0b0, v4i32, neon_uimm2_bare, + GPR32, i32> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} +def UMOVxd : NeonI_UMOV<"umov", "d", 0b1, v2i64, neon_uimm1_bare, + GPR64, i64> { + let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; +} + +def : NeonInstAlias<"mov $Rd, $Rn.s[$Imm]", + (UMOVws GPR32:$Rd, VPR128:$Rn, neon_uimm2_bare:$Imm), 0>; +def : NeonInstAlias<"mov $Rd, $Rn.d[$Imm]", + (UMOVxd GPR64:$Rd, VPR128:$Rn, neon_uimm1_bare:$Imm), 0>; + +class Neon_UMOV_pattern + : Pat<(ResTy (vector_extract + (NaTy VPR64:$Rn), NaImm:$Imm)), + (SMOVI (StTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + NaImm:$Imm)>; + +def : Neon_UMOV_pattern; +def : Neon_UMOV_pattern; +def : Neon_UMOV_pattern; + +def : Pat<(i32 (and + (i32 (vector_extract + (v16i8 VPR128:$Rn), (neon_uimm4_bare:$Imm))), + 255)), + (UMOVwb VPR128:$Rn, neon_uimm4_bare:$Imm)>; + +def : Pat<(i32 (and + (i32 (vector_extract + (v8i16 VPR128:$Rn), (neon_uimm3_bare:$Imm))), + 65535)), + (UMOVwh VPR128:$Rn, neon_uimm3_bare:$Imm)>; + +def : Pat<(i64 (zext + (i32 (vector_extract + (v2i64 VPR128:$Rn), (neon_uimm1_bare:$Imm))))), + (UMOVxd VPR128:$Rn, neon_uimm1_bare:$Imm)>; + +def : Pat<(i32 (and + (i32 (vector_extract + (v8i8 VPR64:$Rn), (neon_uimm3_bare:$Imm))), + 255)), + (UMOVwb (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), + neon_uimm3_bare:$Imm)>; + +def : Pat<(i32 (and + (i32 (vector_extract + (v4i16 VPR64:$Rn), (neon_uimm2_bare:$Imm))), + 65535)), + (UMOVwh (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), + neon_uimm2_bare:$Imm)>; + +def : Pat<(i64 (zext + (i32 (vector_extract + (v1i64 VPR64:$Rn), (neon_uimm0_bare:$Imm))))), + (UMOVxd (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64), + neon_uimm0_bare:$Imm)>; + +// Additional copy patterns for scalar types +def : Pat<(i32 (vector_extract (v1i8 FPR8:$Rn), (i64 0))), + (UMOVwb (v16i8 + (SUBREG_TO_REG (i64 0), FPR8:$Rn, sub_8)), (i64 0))>; + +def : Pat<(i32 (vector_extract (v1i16 FPR16:$Rn), (i64 0))), + (UMOVwh (v8i16 + (SUBREG_TO_REG (i64 0), FPR16:$Rn, sub_16)), (i64 0))>; + +def : Pat<(i32 (vector_extract (v1i32 FPR32:$Rn), (i64 0))), + (FMOVws FPR32:$Rn)>; + +def : Pat<(i64 (vector_extract (v1i64 FPR64:$Rn), (i64 0))), + (FMOVxd FPR64:$Rn)>; + +def : Pat<(f64 (vector_extract (v1f64 FPR64:$Rn), (i64 0))), + (f64 FPR64:$Rn)>; + +def : Pat<(f32 (vector_extract (v1f32 FPR32:$Rn), (i64 0))), + (f32 FPR32:$Rn)>; + +def : Pat<(v1i8 (scalar_to_vector GPR32:$Rn)), + (v1i8 (EXTRACT_SUBREG (v16i8 + (INSbw (v16i8 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_8))>; + +def : Pat<(v1i16 (scalar_to_vector GPR32:$Rn)), + (v1i16 (EXTRACT_SUBREG (v8i16 + (INShw (v8i16 (IMPLICIT_DEF)), $Rn, (i64 0))), + sub_16))>; + +def : Pat<(v1i32 (scalar_to_vector GPR32:$src)), + (FMOVsw $src)>; + +def : Pat<(v1i64 (scalar_to_vector GPR64:$src)), + (FMOVdx $src)>; + +def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$Rn))), + (v1f32 FPR32:$Rn)>; +def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$Rn))), + (v1f64 FPR64:$Rn)>; + +def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), + (FMOVdd $src)>; + +def : Pat<(v2f64 (scalar_to_vector (f64 FPR64:$src))), + (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), + (f64 FPR64:$src), sub_64)>; + +class NeonI_DUP_Elt + : NeonI_copy { + bits<4> Imm; +} + +def DUPELT16b : NeonI_DUP_Elt<0b1, "dup", ".16b", ".b", VPR128, + neon_uimm4_bare> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} + +def DUPELT8h : NeonI_DUP_Elt<0b1, "dup", ".8h", ".h", VPR128, + neon_uimm3_bare> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} + +def DUPELT4s : NeonI_DUP_Elt<0b1, "dup", ".4s", ".s", VPR128, + neon_uimm2_bare> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} + +def DUPELT2d : NeonI_DUP_Elt<0b1, "dup", ".2d", ".d", VPR128, + neon_uimm1_bare> { + let Inst{20-16} = {Imm, 0b1, 0b0, 0b0, 0b0}; +} + +def DUPELT8b : NeonI_DUP_Elt<0b0, "dup", ".8b", ".b", VPR64, + neon_uimm4_bare> { + let Inst{20-16} = {Imm{3}, Imm{2}, Imm{1}, Imm{0}, 0b1}; +} + +def DUPELT4h : NeonI_DUP_Elt<0b0, "dup", ".4h", ".h", VPR64, + neon_uimm3_bare> { + let Inst{20-16} = {Imm{2}, Imm{1}, Imm{0}, 0b1, 0b0}; +} + +def DUPELT2s : NeonI_DUP_Elt<0b0, "dup", ".2s", ".s", VPR64, + neon_uimm2_bare> { + let Inst{20-16} = {Imm{1}, Imm{0}, 0b1, 0b0, 0b0}; +} + +multiclass NeonI_DUP_Elt_pattern { +def : Pat<(ResTy (Neon_vduplane (OpTy VPR128:$Rn), OpLImm:$Imm)), + (ResTy (DUPELT (OpTy VPR128:$Rn), OpLImm:$Imm))>; + +def : Pat<(ResTy (Neon_vduplane + (NaTy VPR64:$Rn), OpNImm:$Imm)), + (ResTy (DUPELT + (ExTy (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), OpNImm:$Imm))>; +} +defm : NeonI_DUP_Elt_pattern; +defm : NeonI_DUP_Elt_pattern; +defm : NeonI_DUP_Elt_pattern; +defm : NeonI_DUP_Elt_pattern; +defm : NeonI_DUP_Elt_pattern; +defm : NeonI_DUP_Elt_pattern; +defm : NeonI_DUP_Elt_pattern; +defm : NeonI_DUP_Elt_pattern; +defm : NeonI_DUP_Elt_pattern; +defm : NeonI_DUP_Elt_pattern; + +def : Pat<(v2f32 (Neon_vdup (f32 FPR32:$Rn))), + (v2f32 (DUPELT2s + (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), + (i64 0)))>; +def : Pat<(v4f32 (Neon_vdup (f32 FPR32:$Rn))), + (v4f32 (DUPELT4s + (SUBREG_TO_REG (i64 0), FPR32:$Rn, sub_32), + (i64 0)))>; +def : Pat<(v2f64 (Neon_vdup (f64 FPR64:$Rn))), + (v2f64 (DUPELT2d + (SUBREG_TO_REG (i64 0), FPR64:$Rn, sub_64), + (i64 0)))>; + +class NeonI_DUP + : NeonI_copy; + +def DUP16b : NeonI_DUP<0b1, "dup", ".16b", VPR128, v16i8, GPR32, i32> { + let Inst{20-16} = 0b00001; + // bits 17-20 are unspecified, but should be set to zero. +} + +def DUP8h : NeonI_DUP<0b1, "dup", ".8h", VPR128, v8i16, GPR32, i32> { + let Inst{20-16} = 0b00010; + // bits 18-20 are unspecified, but should be set to zero. +} + +def DUP4s : NeonI_DUP<0b1, "dup", ".4s", VPR128, v4i32, GPR32, i32> { + let Inst{20-16} = 0b00100; + // bits 19-20 are unspecified, but should be set to zero. +} + +def DUP2d : NeonI_DUP<0b1, "dup", ".2d", VPR128, v2i64, GPR64, i64> { + let Inst{20-16} = 0b01000; + // bit 20 is unspecified, but should be set to zero. +} + +def DUP8b : NeonI_DUP<0b0, "dup", ".8b", VPR64, v8i8, GPR32, i32> { + let Inst{20-16} = 0b00001; + // bits 17-20 are unspecified, but should be set to zero. +} + +def DUP4h : NeonI_DUP<0b0, "dup", ".4h", VPR64, v4i16, GPR32, i32> { + let Inst{20-16} = 0b00010; + // bits 18-20 are unspecified, but should be set to zero. +} + +def DUP2s : NeonI_DUP<0b0, "dup", ".2s", VPR64, v2i32, GPR32, i32> { + let Inst{20-16} = 0b00100; + // bits 19-20 are unspecified, but should be set to zero. +} + +// patterns for CONCAT_VECTORS +multiclass Concat_Vector_Pattern { +def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), undef)), + (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)>; +def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rm))), + (INSELd + (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rm, sub_64)), + (i64 1), + (i64 0))>; +def : Pat<(ResTy (concat_vectors (OpTy VPR64:$Rn), (OpTy VPR64:$Rn))), + (DUPELT2d + (v2i64 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + (i64 0))> ; +} + +defm : Concat_Vector_Pattern; +defm : Concat_Vector_Pattern; +defm : Concat_Vector_Pattern; +defm : Concat_Vector_Pattern; +defm : Concat_Vector_Pattern; +defm : Concat_Vector_Pattern; + +//patterns for EXTRACT_SUBVECTOR +def : Pat<(v8i8 (extract_subvector (v16i8 VPR128:$Rn), (i64 0))), + (v8i8 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; +def : Pat<(v4i16 (extract_subvector (v8i16 VPR128:$Rn), (i64 0))), + (v4i16 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; +def : Pat<(v2i32 (extract_subvector (v4i32 VPR128:$Rn), (i64 0))), + (v2i32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; +def : Pat<(v1i64 (extract_subvector (v2i64 VPR128:$Rn), (i64 0))), + (v1i64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; +def : Pat<(v2f32 (extract_subvector (v4f32 VPR128:$Rn), (i64 0))), + (v2f32 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; +def : Pat<(v1f64 (extract_subvector (v2f64 VPR128:$Rn), (i64 0))), + (v1f64 (EXTRACT_SUBREG VPR128:$Rn, sub_64))>; + +// The followings are for instruction class (3V Elem) + +// Variant 1 + +class NI_2VE size, bits<4> opcode, + string asmop, string ResS, string OpS, string EleOpS, + Operand OpImm, RegisterOperand ResVPR, + RegisterOperand OpVPR, RegisterOperand EleOpVPR> + : NeonI_2VElem { + bits<3> Index; + bits<5> Re; + + let Constraints = "$src = $Rd"; +} + +multiclass NI_2VE_v1 opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", + neon_uimm2_bare, VPR64, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // Index operations on 16-bit(H) elements are restricted to using v0-v15. + def _4h8h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", + neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } + + def _8h8h : NI_2VE<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", + neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } +} + +defm MLAvve : NI_2VE_v1<0b1, 0b0000, "mla">; +defm MLSvve : NI_2VE_v1<0b1, 0b0100, "mls">; + +// Pattern for lane in 128-bit vector +class NI_2VE_laneq + : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), + (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST ResVPR:$src, OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; + +// Pattern for lane in 64-bit vector +class NI_2VE_lane + : Pat<(ResTy (op (ResTy ResVPR:$src), (OpTy OpVPR:$Rn), + (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST ResVPR:$src, OpVPR:$Rn, + (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; + +multiclass NI_2VE_v1_pat +{ + def : NI_2VE_laneq(subop # "_2s4s"), neon_uimm2_bare, + op, VPR64, VPR64, VPR128, v2i32, v2i32, v4i32>; + + def : NI_2VE_laneq(subop # "_4s4s"), neon_uimm2_bare, + op, VPR128, VPR128, VPR128, v4i32, v4i32, v4i32>; + + def : NI_2VE_laneq(subop # "_4h8h"), neon_uimm3_bare, + op, VPR64, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; + + def : NI_2VE_laneq(subop # "_8h8h"), neon_uimm3_bare, + op, VPR128, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_lane(subop # "_2s4s"), neon_uimm1_bare, + op, VPR64, VPR64, VPR64, v2i32, v2i32, v2i32>; + + def : NI_2VE_lane(subop # "_4h8h"), neon_uimm2_bare, + op, VPR64, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; +} + +defm MLA_lane_v1 : NI_2VE_v1_pat<"MLAvve", Neon_mla>; +defm MLS_lane_v1 : NI_2VE_v1_pat<"MLSvve", Neon_mls>; + +class NI_2VE_2op size, bits<4> opcode, + string asmop, string ResS, string OpS, string EleOpS, + Operand OpImm, RegisterOperand ResVPR, + RegisterOperand OpVPR, RegisterOperand EleOpVPR> + : NeonI_2VElem { + bits<3> Index; + bits<5> Re; +} + +multiclass NI_2VE_v1_2op opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", + neon_uimm2_bare, VPR64, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // Index operations on 16-bit(H) elements are restricted to using v0-v15. + def _4h8h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4h", "4h", "h", + neon_uimm3_bare, VPR64, VPR64, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } + + def _8h8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop, "8h", "8h", "h", + neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } +} + +defm MULve : NI_2VE_v1_2op<0b0, 0b1000, "mul">; +defm SQDMULHve : NI_2VE_v1_2op<0b0, 0b1100, "sqdmulh">; +defm SQRDMULHve : NI_2VE_v1_2op<0b0, 0b1101, "sqrdmulh">; + +// Pattern for lane in 128-bit vector +class NI_2VE_mul_laneq + : Pat<(ResTy (op (OpTy OpVPR:$Rn), + (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST OpVPR:$Rn, EleOpVPR:$Re, OpImm:$Index)>; + +// Pattern for lane in 64-bit vector +class NI_2VE_mul_lane + : Pat<(ResTy (op (OpTy OpVPR:$Rn), + (OpTy (Neon_vduplane (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST OpVPR:$Rn, + (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; + +multiclass NI_2VE_mul_v1_pat { + def : NI_2VE_mul_laneq(subop # "_2s4s"), neon_uimm2_bare, + op, VPR64, VPR128, v2i32, v2i32, v4i32>; + + def : NI_2VE_mul_laneq(subop # "_4s4s"), neon_uimm2_bare, + op, VPR128, VPR128, v4i32, v4i32, v4i32>; + + def : NI_2VE_mul_laneq(subop # "_4h8h"), neon_uimm3_bare, + op, VPR64, VPR128Lo, v4i16, v4i16, v8i16>; + + def : NI_2VE_mul_laneq(subop # "_8h8h"), neon_uimm3_bare, + op, VPR128, VPR128Lo, v8i16, v8i16, v8i16>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_mul_lane(subop # "_2s4s"), neon_uimm1_bare, + op, VPR64, VPR64, v2i32, v2i32, v2i32>; + + def : NI_2VE_mul_lane(subop # "_4h8h"), neon_uimm2_bare, + op, VPR64, VPR64Lo, v4i16, v4i16, v4i16>; +} + +defm MUL_lane_v1 : NI_2VE_mul_v1_pat<"MULve", mul>; +defm SQDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQDMULHve", int_arm_neon_vqdmulh>; +defm SQRDMULH_lane_v1 : NI_2VE_mul_v1_pat<"SQRDMULHve", int_arm_neon_vqrdmulh>; + +// Variant 2 + +multiclass NI_2VE_v2_2op opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2s4s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", + neon_uimm2_bare, VPR64, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _4s4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // _1d2d doesn't exist! + + def _2d2d : NI_2VE_2op<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", + neon_uimm1_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{0}}; + let Inst{21} = 0b0; + let Inst{20-16} = Re; + } +} + +defm FMULve : NI_2VE_v2_2op<0b0, 0b1001, "fmul">; +defm FMULXve : NI_2VE_v2_2op<0b1, 0b1001, "fmulx">; + +class NI_2VE_mul_lane_2d + : Pat<(ResTy (op (OpTy OpVPR:$Rn), + (OpTy (coreop (EleOpTy EleOpVPR:$Re), (EleOpTy EleOpVPR:$Re))))), + (INST OpVPR:$Rn, + (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), 0)>; + +multiclass NI_2VE_mul_v2_pat { + def : NI_2VE_mul_laneq(subop # "_2s4s"), neon_uimm2_bare, + op, VPR64, VPR128, v2f32, v2f32, v4f32>; + + def : NI_2VE_mul_laneq(subop # "_4s4s"), neon_uimm2_bare, + op, VPR128, VPR128, v4f32, v4f32, v4f32>; + + def : NI_2VE_mul_laneq(subop # "_2d2d"), neon_uimm1_bare, + op, VPR128, VPR128, v2f64, v2f64, v2f64>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_mul_lane(subop # "_2s4s"), neon_uimm1_bare, + op, VPR64, VPR64, v2f32, v2f32, v2f32>; + + def : NI_2VE_mul_lane_2d(subop # "_2d2d"), neon_uimm1_bare, + op, VPR128, VPR64, v2f64, v2f64, v1f64, + BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; +} + +defm FMUL_lane_v2 : NI_2VE_mul_v2_pat<"FMULve", fmul>; +defm FMULX_lane_v2 : NI_2VE_mul_v2_pat<"FMULXve", int_aarch64_neon_vmulx>; + +def : Pat<(v2f32 (fmul (v2f32 (Neon_vdup (f32 FPR32:$Re))), + (v2f32 VPR64:$Rn))), + (FMULve_2s4s VPR64:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; + +def : Pat<(v4f32 (fmul (v4f32 (Neon_vdup (f32 FPR32:$Re))), + (v4f32 VPR128:$Rn))), + (FMULve_4s4s VPR128:$Rn, (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; + +def : Pat<(v2f64 (fmul (v2f64 (Neon_vdup (f64 FPR64:$Re))), + (v2f64 VPR128:$Rn))), + (FMULve_2d2d VPR128:$Rn, (SUBREG_TO_REG (i64 0), $Re, sub_64), 0)>; + +// The followings are patterns using fma +// -ffp-contract=fast generates fma + +multiclass NI_2VE_v2 opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2s4s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2s", "2s", "s", + neon_uimm2_bare, VPR64, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _4s4s : NI_2VE<0b1, u, 0b10, opcode, asmop, "4s", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // _1d2d doesn't exist! + + def _2d2d : NI_2VE<0b1, u, 0b11, opcode, asmop, "2d", "2d", "d", + neon_uimm1_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{0}}; + let Inst{21} = 0b0; + let Inst{20-16} = Re; + } +} + +defm FMLAvve : NI_2VE_v2<0b0, 0b0001, "fmla">; +defm FMLSvve : NI_2VE_v2<0b0, 0b0101, "fmls">; + +// Pattern for lane in 128-bit vector +class NI_2VEswap_laneq + : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), + (ResTy ResVPR:$src), (ResTy ResVPR:$Rn))), + (INST ResVPR:$src, ResVPR:$Rn, OpVPR:$Re, OpImm:$Index)>; + +// Pattern for lane 0 +class NI_2VEfma_lane0 + : Pat<(ResTy (op (ResTy ResVPR:$Rn), + (ResTy (Neon_vdup (f32 FPR32:$Re))), + (ResTy ResVPR:$src))), + (INST ResVPR:$src, ResVPR:$Rn, + (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; + +// Pattern for lane in 64-bit vector +class NI_2VEswap_lane + : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (i64 OpImm:$Index))), + (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), + (INST ResVPR:$src, ResVPR:$Rn, + (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), OpImm:$Index)>; + +// Pattern for lane in 64-bit vector +class NI_2VEswap_lane_2d2d + : Pat<(ResTy (op (ResTy (coreop (OpTy OpVPR:$Re), (OpTy OpVPR:$Re))), + (ResTy ResVPR:$Rn), (ResTy ResVPR:$src))), + (INST ResVPR:$src, ResVPR:$Rn, + (SUBREG_TO_REG (i64 0), OpVPR:$Re, sub_64), 0)>; + + +multiclass NI_2VE_fma_v2_pat { + def : NI_2VEswap_laneq(subop # "_2s4s"), + neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, + BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; + + def : NI_2VEfma_lane0(subop # "_2s4s"), + op, VPR64, v2f32>; + + def : NI_2VEswap_laneq(subop # "_4s4s"), + neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, + BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; + + def : NI_2VEfma_lane0(subop # "_4s4s"), + op, VPR128, v4f32>; + + def : NI_2VEswap_laneq(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, + BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VEswap_lane(subop # "_2s4s"), + neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, + BinOpFrag<(Neon_vduplane node:$LHS, node:$RHS)>>; + + def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, + BinOpFrag<(Neon_combine_2d node:$LHS, node:$RHS)>>; +} + +defm FMLA_lane_v2_s : NI_2VE_fma_v2_pat<"FMLAvve", fma>; + +// Pattern for lane 0 +class NI_2VEfms_lane0 + : Pat<(ResTy (op (ResTy (fneg ResVPR:$Rn)), + (ResTy (Neon_vdup (f32 FPR32:$Re))), + (ResTy ResVPR:$src))), + (INST ResVPR:$src, ResVPR:$Rn, + (SUBREG_TO_REG (i32 0), $Re, sub_32), 0)>; + +multiclass NI_2VE_fms_v2_pat +{ + def : NI_2VEswap_laneq(subop # "_2s4s"), + neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, + BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_laneq(subop # "_2s4s"), + neon_uimm2_bare, op, VPR64, VPR128, v2f32, v4f32, + BinOpFrag<(Neon_vduplane + (fneg node:$LHS), node:$RHS)>>; + + def : NI_2VEfms_lane0(subop # "_2s4s"), + op, VPR64, v2f32>; + + def : NI_2VEswap_laneq(subop # "_4s4s"), + neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, + BinOpFrag<(fneg (Neon_vduplane + node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_laneq(subop # "_4s4s"), + neon_uimm2_bare, op, VPR128, VPR128, v4f32, v4f32, + BinOpFrag<(Neon_vduplane + (fneg node:$LHS), node:$RHS)>>; + + def : NI_2VEfms_lane0(subop # "_4s4s"), + op, VPR128, v4f32>; + + def : NI_2VEswap_laneq(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, + BinOpFrag<(fneg (Neon_vduplane + node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_laneq(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR128, v2f64, v2f64, + BinOpFrag<(Neon_vduplane + (fneg node:$LHS), node:$RHS)>>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VEswap_lane(subop # "_2s4s"), + neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, + BinOpFrag<(fneg (Neon_vduplane + node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_lane(subop # "_2s4s"), + neon_uimm1_bare, op, VPR64, VPR64, v2f32, v2f32, + BinOpFrag<(Neon_vduplane + (fneg node:$LHS), node:$RHS)>>; + + def : NI_2VEswap_lane(subop # "_4s4s"), + neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, + BinOpFrag<(fneg (Neon_vduplane node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_lane(subop # "_4s4s"), + neon_uimm1_bare, op, VPR128, VPR64, v4f32, v2f32, + BinOpFrag<(Neon_vduplane (fneg node:$LHS), node:$RHS)>>; + + def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, + BinOpFrag<(fneg (Neon_combine_2d + node:$LHS, node:$RHS))>>; + + def : NI_2VEswap_lane_2d2d(subop # "_2d2d"), + neon_uimm1_bare, op, VPR128, VPR64, v2f64, v1f64, + BinOpFrag<(Neon_combine_2d + (fneg node:$LHS), (fneg node:$RHS))>>; +} + +defm FMLS_lane_v2_s : NI_2VE_fms_v2_pat<"FMLSvve", fma>; + +// Variant 3: Long type +// E.g. SMLAL : 4S/4H/H (v0-v15), 2D/2S/S +// SMLAL2: 4S/8H/H (v0-v15), 2D/4S/S + +multiclass NI_2VE_v3 opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2d2s : NI_2VE<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", + neon_uimm2_bare, VPR128, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _2d4s : NI_2VE<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // Index operations on 16-bit(H) elements are restricted to using v0-v15. + def _4s8h : NI_2VE<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", + neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } + + def _4s4h : NI_2VE<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", + neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } +} + +defm SMLALvve : NI_2VE_v3<0b0, 0b0010, "smlal">; +defm UMLALvve : NI_2VE_v3<0b1, 0b0010, "umlal">; +defm SMLSLvve : NI_2VE_v3<0b0, 0b0110, "smlsl">; +defm UMLSLvve : NI_2VE_v3<0b1, 0b0110, "umlsl">; +defm SQDMLALvve : NI_2VE_v3<0b0, 0b0011, "sqdmlal">; +defm SQDMLSLvve : NI_2VE_v3<0b0, 0b0111, "sqdmlsl">; + +multiclass NI_2VE_v3_2op opcode, string asmop> { + // vector register class for element is always 128-bit to cover the max index + def _2d2s : NI_2VE_2op<0b0, u, 0b10, opcode, asmop, "2d", "2s", "s", + neon_uimm2_bare, VPR128, VPR64, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + def _2d4s : NI_2VE_2op<0b1, u, 0b10, opcode, asmop # "2", "2d", "4s", "s", + neon_uimm2_bare, VPR128, VPR128, VPR128> { + let Inst{11} = {Index{1}}; + let Inst{21} = {Index{0}}; + let Inst{20-16} = Re; + } + + // Index operations on 16-bit(H) elements are restricted to using v0-v15. + def _4s8h : NI_2VE_2op<0b1, u, 0b01, opcode, asmop # "2", "4s", "8h", "h", + neon_uimm3_bare, VPR128, VPR128, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } + + def _4s4h : NI_2VE_2op<0b0, u, 0b01, opcode, asmop, "4s", "4h", "h", + neon_uimm3_bare, VPR128, VPR64, VPR128Lo> { + let Inst{11} = {Index{2}}; + let Inst{21} = {Index{1}}; + let Inst{20} = {Index{0}}; + let Inst{19-16} = Re{3-0}; + } +} + +defm SMULLve : NI_2VE_v3_2op<0b0, 0b1010, "smull">; +defm UMULLve : NI_2VE_v3_2op<0b1, 0b1010, "umull">; +defm SQDMULLve : NI_2VE_v3_2op<0b0, 0b1011, "sqdmull">; + +def : Pat<(v1f64 (scalar_to_vector (f64 FPR64:$src))), + (FMOVdd $src)>; +def : Pat<(v1f32 (scalar_to_vector (f32 FPR32:$src))), + (FMOVss $src)>; + +// Pattern for lane in 128-bit vector +class NI_2VEL2_laneq + : Pat<(ResTy (op (ResTy VPR128:$src), + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vduplane + (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST VPR128:$src, VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; + +// Pattern for lane in 64-bit vector +class NI_2VEL2_lane + : Pat<(ResTy (op (ResTy VPR128:$src), + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vduplane + (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST VPR128:$src, VPR128:$Rn, + (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; + +class NI_2VEL2_lane0 + : Pat<(ResTy (op (ResTy VPR128:$src), + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), + (INST VPR128:$src, VPR128:$Rn, (DupInst $Re), 0)>; + +multiclass NI_2VEL_v3_pat { + def : NI_2VE_laneq(subop # "_4s4h"), neon_uimm3_bare, + op, VPR128, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; + + def : NI_2VE_laneq(subop # "_2d2s"), neon_uimm2_bare, + op, VPR128, VPR64, VPR128, v2i64, v2i32, v4i32>; + + def : NI_2VEL2_laneq(subop # "_4s8h"), neon_uimm3_bare, + op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_laneq(subop # "_2d4s"), neon_uimm2_bare, + op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; + + def : NI_2VEL2_lane0(subop # "_4s8h"), + op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; + + def : NI_2VEL2_lane0(subop # "_2d4s"), + op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_lane(subop # "_4s4h"), neon_uimm2_bare, + op, VPR128, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; + + def : NI_2VE_lane(subop # "_2d2s"), neon_uimm1_bare, + op, VPR128, VPR64, VPR64, v2i64, v2i32, v2i32>; + + def : NI_2VEL2_lane(subop # "_4s8h"), neon_uimm2_bare, + op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_lane(subop # "_2d4s"), neon_uimm1_bare, + op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; +} + +defm SMLAL_lane_v3 : NI_2VEL_v3_pat<"SMLALvve", Neon_smlal>; +defm UMLAL_lane_v3 : NI_2VEL_v3_pat<"UMLALvve", Neon_umlal>; +defm SMLSL_lane_v3 : NI_2VEL_v3_pat<"SMLSLvve", Neon_smlsl>; +defm UMLSL_lane_v3 : NI_2VEL_v3_pat<"UMLSLvve", Neon_umlsl>; + +// Pattern for lane in 128-bit vector +class NI_2VEL2_mul_laneq + : Pat<(ResTy (op + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vduplane + (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST VPR128:$Rn, EleOpVPR:$Re, OpImm:$Index)>; + +// Pattern for lane in 64-bit vector +class NI_2VEL2_mul_lane + : Pat<(ResTy (op + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vduplane + (EleOpTy EleOpVPR:$Re), (i64 OpImm:$Index))))), + (INST VPR128:$Rn, + (SUBREG_TO_REG (i64 0), EleOpVPR:$Re, sub_64), OpImm:$Index)>; + +// Pattern for fixed lane 0 +class NI_2VEL2_mul_lane0 + : Pat<(ResTy (op + (HalfOpTy (hiop (OpTy VPR128:$Rn))), + (HalfOpTy (Neon_vdup (i32 GPR32:$Re))))), + (INST VPR128:$Rn, (DupInst $Re), 0)>; + +multiclass NI_2VEL_mul_v3_pat { + def : NI_2VE_mul_laneq(subop # "_4s4h"), neon_uimm3_bare, + op, VPR64, VPR128Lo, v4i32, v4i16, v8i16>; + + def : NI_2VE_mul_laneq(subop # "_2d2s"), neon_uimm2_bare, + op, VPR64, VPR128, v2i64, v2i32, v4i32>; + + def : NI_2VEL2_mul_laneq(subop # "_4s8h"), neon_uimm3_bare, + op, VPR128Lo, v4i32, v8i16, v8i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_mul_laneq(subop # "_2d4s"), neon_uimm2_bare, + op, VPR128, v2i64, v4i32, v4i32, v2i32, Neon_High4S>; + + def : NI_2VEL2_mul_lane0(subop # "_4s8h"), + op, v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; + + def : NI_2VEL2_mul_lane0(subop # "_2d4s"), + op, v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_mul_lane(subop # "_4s4h"), neon_uimm2_bare, + op, VPR64, VPR64Lo, v4i32, v4i16, v4i16>; + + def : NI_2VE_mul_lane(subop # "_2d2s"), neon_uimm1_bare, + op, VPR64, VPR64, v2i64, v2i32, v2i32>; + + def : NI_2VEL2_mul_lane(subop # "_4s8h"), neon_uimm2_bare, + op, VPR64Lo, v4i32, v8i16, v4i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_mul_lane(subop # "_2d4s"), neon_uimm1_bare, + op, VPR64, v2i64, v4i32, v2i32, v2i32, Neon_High4S>; +} + +defm SMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SMULLve", int_arm_neon_vmulls>; +defm UMULL_lane_v3 : NI_2VEL_mul_v3_pat<"UMULLve", int_arm_neon_vmullu>; +defm SQDMULL_lane_v3 : NI_2VEL_mul_v3_pat<"SQDMULLve", int_arm_neon_vqdmull>; + +multiclass NI_qdma { + def _4s : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), + (op node:$Ra, + (v4i32 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; + + def _2d : PatFrag<(ops node:$Ra, node:$Rn, node:$Rm), + (op node:$Ra, + (v2i64 (int_arm_neon_vqdmull node:$Rn, node:$Rm)))>; +} + +defm Neon_qdmlal : NI_qdma; +defm Neon_qdmlsl : NI_qdma; + +multiclass NI_2VEL_v3_qdma_pat { + def : NI_2VE_laneq(subop # "_4s4h"), neon_uimm3_bare, + !cast(op # "_4s"), VPR128, VPR64, VPR128Lo, + v4i32, v4i16, v8i16>; + + def : NI_2VE_laneq(subop # "_2d2s"), neon_uimm2_bare, + !cast(op # "_2d"), VPR128, VPR64, VPR128, + v2i64, v2i32, v4i32>; + + def : NI_2VEL2_laneq(subop # "_4s8h"), neon_uimm3_bare, + !cast(op # "_4s"), VPR128Lo, + v4i32, v8i16, v8i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_laneq(subop # "_2d4s"), neon_uimm2_bare, + !cast(op # "_2d"), VPR128, + v2i64, v4i32, v4i32, v2i32, Neon_High4S>; + + def : NI_2VEL2_lane0(subop # "_4s8h"), + !cast(op # "_4s"), + v4i32, v8i16, v4i16, Neon_High8H, DUP8h>; + + def : NI_2VEL2_lane0(subop # "_2d4s"), + !cast(op # "_2d"), + v2i64, v4i32, v2i32, Neon_High4S, DUP4s>; + + // Index can only be half of the max value for lane in 64-bit vector + + def : NI_2VE_lane(subop # "_4s4h"), neon_uimm2_bare, + !cast(op # "_4s"), VPR128, VPR64, VPR64Lo, + v4i32, v4i16, v4i16>; + + def : NI_2VE_lane(subop # "_2d2s"), neon_uimm1_bare, + !cast(op # "_2d"), VPR128, VPR64, VPR64, + v2i64, v2i32, v2i32>; + + def : NI_2VEL2_lane(subop # "_4s8h"), neon_uimm2_bare, + !cast(op # "_4s"), VPR64Lo, + v4i32, v8i16, v4i16, v4i16, Neon_High8H>; + + def : NI_2VEL2_lane(subop # "_2d4s"), neon_uimm1_bare, + !cast(op # "_2d"), VPR64, + v2i64, v4i32, v2i32, v2i32, Neon_High4S>; +} + +defm SQDMLAL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLALvve", "Neon_qdmlal">; +defm SQDMLSL_lane_v3 : NI_2VEL_v3_qdma_pat<"SQDMLSLvve", "Neon_qdmlsl">; + +// End of implementation for instruction class (3V Elem) + +class NeonI_REV size, bit Q, bit U, + bits<5> opcode, RegisterOperand ResVPR, ValueType ResTy, + SDPatternOperator Neon_Rev> + : NeonI_2VMisc ; + +def REV64_16b : NeonI_REV<"rev64", "16b", 0b00, 0b1, 0b0, 0b00000, VPR128, + v16i8, Neon_rev64>; +def REV64_8h : NeonI_REV<"rev64", "8h", 0b01, 0b1, 0b0, 0b00000, VPR128, + v8i16, Neon_rev64>; +def REV64_4s : NeonI_REV<"rev64", "4s", 0b10, 0b1, 0b0, 0b00000, VPR128, + v4i32, Neon_rev64>; +def REV64_8b : NeonI_REV<"rev64", "8b", 0b00, 0b0, 0b0, 0b00000, VPR64, + v8i8, Neon_rev64>; +def REV64_4h : NeonI_REV<"rev64", "4h", 0b01, 0b0, 0b0, 0b00000, VPR64, + v4i16, Neon_rev64>; +def REV64_2s : NeonI_REV<"rev64", "2s", 0b10, 0b0, 0b0, 0b00000, VPR64, + v2i32, Neon_rev64>; + +def : Pat<(v4f32 (Neon_rev64 (v4f32 VPR128:$Rn))), (REV64_4s VPR128:$Rn)>; +def : Pat<(v2f32 (Neon_rev64 (v2f32 VPR64:$Rn))), (REV64_2s VPR64:$Rn)>; + +def REV32_16b : NeonI_REV<"rev32", "16b", 0b00, 0b1, 0b1, 0b00000, VPR128, + v16i8, Neon_rev32>; +def REV32_8h : NeonI_REV<"rev32", "8h", 0b01, 0b1, 0b1, 0b00000, VPR128, + v8i16, Neon_rev32>; +def REV32_8b : NeonI_REV<"rev32", "8b", 0b00, 0b0, 0b1, 0b00000, VPR64, + v8i8, Neon_rev32>; +def REV32_4h : NeonI_REV<"rev32", "4h", 0b01, 0b0, 0b1, 0b00000, VPR64, + v4i16, Neon_rev32>; + +def REV16_16b : NeonI_REV<"rev16", "16b", 0b00, 0b1, 0b0, 0b00001, VPR128, + v16i8, Neon_rev16>; +def REV16_8b : NeonI_REV<"rev16", "8b", 0b00, 0b0, 0b0, 0b00001, VPR64, + v8i8, Neon_rev16>; + +multiclass NeonI_PairwiseAdd opcode, + SDPatternOperator Neon_Padd> { + def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.8h, $Rn.16b", + [(set (v8i16 VPR128:$Rd), + (v8i16 (Neon_Padd (v16i8 VPR128:$Rn))))], + NoItinerary>; + + def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.4h, $Rn.8b", + [(set (v4i16 VPR64:$Rd), + (v4i16 (Neon_Padd (v8i8 VPR64:$Rn))))], + NoItinerary>; + + def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.8h", + [(set (v4i32 VPR128:$Rd), + (v4i32 (Neon_Padd (v8i16 VPR128:$Rn))))], + NoItinerary>; + + def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.4h", + [(set (v2i32 VPR64:$Rd), + (v2i32 (Neon_Padd (v4i16 VPR64:$Rn))))], + NoItinerary>; + + def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.4s", + [(set (v2i64 VPR128:$Rd), + (v2i64 (Neon_Padd (v4i32 VPR128:$Rn))))], + NoItinerary>; + + def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.1d, $Rn.2s", + [(set (v1i64 VPR64:$Rd), + (v1i64 (Neon_Padd (v2i32 VPR64:$Rn))))], + NoItinerary>; +} + +defm SADDLP : NeonI_PairwiseAdd<"saddlp", 0b0, 0b00010, + int_arm_neon_vpaddls>; +defm UADDLP : NeonI_PairwiseAdd<"uaddlp", 0b1, 0b00010, + int_arm_neon_vpaddlu>; + +multiclass NeonI_PairwiseAddAcc opcode, + SDPatternOperator Neon_Padd> { + let Constraints = "$src = $Rd" in { + def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.8h, $Rn.16b", + [(set (v8i16 VPR128:$Rd), + (v8i16 (Neon_Padd + (v8i16 VPR128:$src), (v16i8 VPR128:$Rn))))], + NoItinerary>; + + def 8b4h : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.4h, $Rn.8b", + [(set (v4i16 VPR64:$Rd), + (v4i16 (Neon_Padd + (v4i16 VPR64:$src), (v8i8 VPR64:$Rn))))], + NoItinerary>; + + def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.8h", + [(set (v4i32 VPR128:$Rd), + (v4i32 (Neon_Padd + (v4i32 VPR128:$src), (v8i16 VPR128:$Rn))))], + NoItinerary>; + + def 4h2s : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.4h", + [(set (v2i32 VPR64:$Rd), + (v2i32 (Neon_Padd + (v2i32 VPR64:$src), (v4i16 VPR64:$Rn))))], + NoItinerary>; + + def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.4s", + [(set (v2i64 VPR128:$Rd), + (v2i64 (Neon_Padd + (v2i64 VPR128:$src), (v4i32 VPR128:$Rn))))], + NoItinerary>; + + def 2s1d : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.1d, $Rn.2s", + [(set (v1i64 VPR64:$Rd), + (v1i64 (Neon_Padd + (v1i64 VPR64:$src), (v2i32 VPR64:$Rn))))], + NoItinerary>; + } +} + +defm SADALP : NeonI_PairwiseAddAcc<"sadalp", 0b0, 0b00110, + int_arm_neon_vpadals>; +defm UADALP : NeonI_PairwiseAddAcc<"uadalp", 0b1, 0b00110, + int_arm_neon_vpadalu>; + +multiclass NeonI_2VMisc_BHSDsize_1Arg opcode> { + def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.16b, $Rn.16b", + [], NoItinerary>; + + def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.8h, $Rn.8h", + [], NoItinerary>; + + def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [], NoItinerary>; + + def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.2d", + [], NoItinerary>; + + def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.8b, $Rn.8b", + [], NoItinerary>; + + def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.4h, $Rn.4h", + [], NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [], NoItinerary>; +} + +defm SQABS : NeonI_2VMisc_BHSDsize_1Arg<"sqabs", 0b0, 0b00111>; +defm SQNEG : NeonI_2VMisc_BHSDsize_1Arg<"sqneg", 0b1, 0b00111>; +defm ABS : NeonI_2VMisc_BHSDsize_1Arg<"abs", 0b0, 0b01011>; +defm NEG : NeonI_2VMisc_BHSDsize_1Arg<"neg", 0b1, 0b01011>; + +multiclass NeonI_2VMisc_BHSD_1Arg_Pattern { + def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$Rn))), + (v16i8 (!cast(Prefix # 16b) (v16i8 VPR128:$Rn)))>; + + def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$Rn))), + (v8i16 (!cast(Prefix # 8h) (v8i16 VPR128:$Rn)))>; + + def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$Rn))), + (v4i32 (!cast(Prefix # 4s) (v4i32 VPR128:$Rn)))>; + + def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$Rn))), + (v2i64 (!cast(Prefix # 2d) (v2i64 VPR128:$Rn)))>; + + def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$Rn))), + (v8i8 (!cast(Prefix # 8b) (v8i8 VPR64:$Rn)))>; + + def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$Rn))), + (v4i16 (!cast(Prefix # 4h) (v4i16 VPR64:$Rn)))>; + + def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$Rn))), + (v2i32 (!cast(Prefix # 2s) (v2i32 VPR64:$Rn)))>; +} + +defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQABS", int_arm_neon_vqabs>; +defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"SQNEG", int_arm_neon_vqneg>; +defm : NeonI_2VMisc_BHSD_1Arg_Pattern<"ABS", int_arm_neon_vabs>; + +def : Pat<(v16i8 (sub + (v16i8 Neon_AllZero), + (v16i8 VPR128:$Rn))), + (v16i8 (NEG16b (v16i8 VPR128:$Rn)))>; +def : Pat<(v8i8 (sub + (v8i8 Neon_AllZero), + (v8i8 VPR64:$Rn))), + (v8i8 (NEG8b (v8i8 VPR64:$Rn)))>; +def : Pat<(v8i16 (sub + (v8i16 (bitconvert (v16i8 Neon_AllZero))), + (v8i16 VPR128:$Rn))), + (v8i16 (NEG8h (v8i16 VPR128:$Rn)))>; +def : Pat<(v4i16 (sub + (v4i16 (bitconvert (v8i8 Neon_AllZero))), + (v4i16 VPR64:$Rn))), + (v4i16 (NEG4h (v4i16 VPR64:$Rn)))>; +def : Pat<(v4i32 (sub + (v4i32 (bitconvert (v16i8 Neon_AllZero))), + (v4i32 VPR128:$Rn))), + (v4i32 (NEG4s (v4i32 VPR128:$Rn)))>; +def : Pat<(v2i32 (sub + (v2i32 (bitconvert (v8i8 Neon_AllZero))), + (v2i32 VPR64:$Rn))), + (v2i32 (NEG2s (v2i32 VPR64:$Rn)))>; +def : Pat<(v2i64 (sub + (v2i64 (bitconvert (v16i8 Neon_AllZero))), + (v2i64 VPR128:$Rn))), + (v2i64 (NEG2d (v2i64 VPR128:$Rn)))>; + +multiclass NeonI_2VMisc_BHSDsize_2Args opcode> { + let Constraints = "$src = $Rd" in { + def 16b : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.16b, $Rn.16b", + [], NoItinerary>; + + def 8h : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.8h, $Rn.8h", + [], NoItinerary>; + + def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [], NoItinerary>; + + def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.2d", + [], NoItinerary>; + + def 8b : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.8b, $Rn.8b", + [], NoItinerary>; + + def 4h : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.4h, $Rn.4h", + [], NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$src, VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [], NoItinerary>; + } +} + +defm SUQADD : NeonI_2VMisc_BHSDsize_2Args<"suqadd", 0b0, 0b00011>; +defm USQADD : NeonI_2VMisc_BHSDsize_2Args<"usqadd", 0b1, 0b00011>; + +multiclass NeonI_2VMisc_BHSD_2Args_Pattern { + def : Pat<(v16i8 (Neon_Op (v16i8 VPR128:$src), (v16i8 VPR128:$Rn))), + (v16i8 (!cast(Prefix # 16b) + (v16i8 VPR128:$src), (v16i8 VPR128:$Rn)))>; + + def : Pat<(v8i16 (Neon_Op (v8i16 VPR128:$src), (v8i16 VPR128:$Rn))), + (v8i16 (!cast(Prefix # 8h) + (v8i16 VPR128:$src), (v8i16 VPR128:$Rn)))>; + + def : Pat<(v4i32 (Neon_Op (v4i32 VPR128:$src), (v4i32 VPR128:$Rn))), + (v4i32 (!cast(Prefix # 4s) + (v4i32 VPR128:$src), (v4i32 VPR128:$Rn)))>; + + def : Pat<(v2i64 (Neon_Op (v2i64 VPR128:$src), (v2i64 VPR128:$Rn))), + (v2i64 (!cast(Prefix # 2d) + (v2i64 VPR128:$src), (v2i64 VPR128:$Rn)))>; + + def : Pat<(v8i8 (Neon_Op (v8i8 VPR64:$src), (v8i8 VPR64:$Rn))), + (v8i8 (!cast(Prefix # 8b) + (v8i8 VPR64:$src), (v8i8 VPR64:$Rn)))>; + + def : Pat<(v4i16 (Neon_Op (v4i16 VPR64:$src), (v4i16 VPR64:$Rn))), + (v4i16 (!cast(Prefix # 4h) + (v4i16 VPR64:$src), (v4i16 VPR64:$Rn)))>; + + def : Pat<(v2i32 (Neon_Op (v2i32 VPR64:$src), (v2i32 VPR64:$Rn))), + (v2i32 (!cast(Prefix # 2s) + (v2i32 VPR64:$src), (v2i32 VPR64:$Rn)))>; +} + +defm : NeonI_2VMisc_BHSD_2Args_Pattern<"SUQADD", int_aarch64_neon_suqadd>; +defm : NeonI_2VMisc_BHSD_2Args_Pattern<"USQADD", int_aarch64_neon_usqadd>; + +multiclass NeonI_2VMisc_BHSsizes { + def 16b : NeonI_2VMisc<0b1, U, 0b00, 0b00100, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.16b, $Rn.16b", + [(set (v16i8 VPR128:$Rd), + (v16i8 (Neon_Op (v16i8 VPR128:$Rn))))], + NoItinerary>; + + def 8h : NeonI_2VMisc<0b1, U, 0b01, 0b00100, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.8h, $Rn.8h", + [(set (v8i16 VPR128:$Rd), + (v8i16 (Neon_Op (v8i16 VPR128:$Rn))))], + NoItinerary>; + + def 4s : NeonI_2VMisc<0b1, U, 0b10, 0b00100, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [(set (v4i32 VPR128:$Rd), + (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], + NoItinerary>; + + def 8b : NeonI_2VMisc<0b0, U, 0b00, 0b00100, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.8b, $Rn.8b", + [(set (v8i8 VPR64:$Rd), + (v8i8 (Neon_Op (v8i8 VPR64:$Rn))))], + NoItinerary>; + + def 4h : NeonI_2VMisc<0b0, U, 0b01, 0b00100, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.4h, $Rn.4h", + [(set (v4i16 VPR64:$Rd), + (v4i16 (Neon_Op (v4i16 VPR64:$Rn))))], + NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, 0b10, 0b00100, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [(set (v2i32 VPR64:$Rd), + (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], + NoItinerary>; +} + +defm CLS : NeonI_2VMisc_BHSsizes<"cls", 0b0, int_arm_neon_vcls>; +defm CLZ : NeonI_2VMisc_BHSsizes<"clz", 0b1, ctlz>; + +multiclass NeonI_2VMisc_Bsize size, + bits<5> Opcode> { + def 16b : NeonI_2VMisc<0b1, U, size, Opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.16b, $Rn.16b", + [], NoItinerary>; + + def 8b : NeonI_2VMisc<0b0, U, size, Opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.8b, $Rn.8b", + [], NoItinerary>; +} + +defm CNT : NeonI_2VMisc_Bsize<"cnt", 0b0, 0b00, 0b00101>; +defm NOT : NeonI_2VMisc_Bsize<"not", 0b1, 0b00, 0b00101>; +defm RBIT : NeonI_2VMisc_Bsize<"rbit", 0b1, 0b01, 0b00101>; + +def : NeonInstAlias<"mvn $Rd.16b, $Rn.16b", + (NOT16b VPR128:$Rd, VPR128:$Rn), 0>; +def : NeonInstAlias<"mvn $Rd.8b, $Rn.8b", + (NOT8b VPR64:$Rd, VPR64:$Rn), 0>; + +def : Pat<(v16i8 (ctpop (v16i8 VPR128:$Rn))), + (v16i8 (CNT16b (v16i8 VPR128:$Rn)))>; +def : Pat<(v8i8 (ctpop (v8i8 VPR64:$Rn))), + (v8i8 (CNT8b (v8i8 VPR64:$Rn)))>; + +def : Pat<(v16i8 (xor + (v16i8 VPR128:$Rn), + (v16i8 Neon_AllOne))), + (v16i8 (NOT16b (v16i8 VPR128:$Rn)))>; +def : Pat<(v8i8 (xor + (v8i8 VPR64:$Rn), + (v8i8 Neon_AllOne))), + (v8i8 (NOT8b (v8i8 VPR64:$Rn)))>; +def : Pat<(v8i16 (xor + (v8i16 VPR128:$Rn), + (v8i16 (bitconvert (v16i8 Neon_AllOne))))), + (NOT16b VPR128:$Rn)>; +def : Pat<(v4i16 (xor + (v4i16 VPR64:$Rn), + (v4i16 (bitconvert (v8i8 Neon_AllOne))))), + (NOT8b VPR64:$Rn)>; +def : Pat<(v4i32 (xor + (v4i32 VPR128:$Rn), + (v4i32 (bitconvert (v16i8 Neon_AllOne))))), + (NOT16b VPR128:$Rn)>; +def : Pat<(v2i32 (xor + (v2i32 VPR64:$Rn), + (v2i32 (bitconvert (v8i8 Neon_AllOne))))), + (NOT8b VPR64:$Rn)>; +def : Pat<(v2i64 (xor + (v2i64 VPR128:$Rn), + (v2i64 (bitconvert (v16i8 Neon_AllOne))))), + (NOT16b VPR128:$Rn)>; + +def : Pat<(v16i8 (int_aarch64_neon_rbit (v16i8 VPR128:$Rn))), + (v16i8 (RBIT16b (v16i8 VPR128:$Rn)))>; +def : Pat<(v8i8 (int_aarch64_neon_rbit (v8i8 VPR64:$Rn))), + (v8i8 (RBIT8b (v8i8 VPR64:$Rn)))>; + +multiclass NeonI_2VMisc_SDsizes opcode, + SDPatternOperator Neon_Op> { + def 4s : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [(set (v4f32 VPR128:$Rd), + (v4f32 (Neon_Op (v4f32 VPR128:$Rn))))], + NoItinerary>; + + def 2d : NeonI_2VMisc<0b1, U, 0b11, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.2d", + [(set (v2f64 VPR128:$Rd), + (v2f64 (Neon_Op (v2f64 VPR128:$Rn))))], + NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [(set (v2f32 VPR64:$Rd), + (v2f32 (Neon_Op (v2f32 VPR64:$Rn))))], + NoItinerary>; +} + +defm FABS : NeonI_2VMisc_SDsizes<"fabs", 0b0, 0b01111, fabs>; +defm FNEG : NeonI_2VMisc_SDsizes<"fneg", 0b1, 0b01111, fneg>; + +multiclass NeonI_2VMisc_HSD_Narrow opcode> { + def 8h8b : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.8b, $Rn.8h", + [], NoItinerary>; + + def 4s4h : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4h, $Rn.4s", + [], NoItinerary>; + + def 2d2s : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2s, $Rn.2d", + [], NoItinerary>; + + let Constraints = "$Rd = $src" in { + def 8h16b : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.16b, $Rn.8h", + [], NoItinerary>; + + def 4s8h : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.8h, $Rn.4s", + [], NoItinerary>; + + def 2d4s : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.4s, $Rn.2d", + [], NoItinerary>; + } +} + +defm XTN : NeonI_2VMisc_HSD_Narrow<"xtn", 0b0, 0b10010>; +defm SQXTUN : NeonI_2VMisc_HSD_Narrow<"sqxtun", 0b1, 0b10010>; +defm SQXTN : NeonI_2VMisc_HSD_Narrow<"sqxtn", 0b0, 0b10100>; +defm UQXTN : NeonI_2VMisc_HSD_Narrow<"uqxtn", 0b1, 0b10100>; + +multiclass NeonI_2VMisc_Narrow_Patterns { + def : Pat<(v8i8 (Neon_Op (v8i16 VPR128:$Rn))), + (v8i8 (!cast(Prefix # 8h8b) (v8i16 VPR128:$Rn)))>; + + def : Pat<(v4i16 (Neon_Op (v4i32 VPR128:$Rn))), + (v4i16 (!cast(Prefix # 4s4h) (v4i32 VPR128:$Rn)))>; + + def : Pat<(v2i32 (Neon_Op (v2i64 VPR128:$Rn))), + (v2i32 (!cast(Prefix # 2d2s) (v2i64 VPR128:$Rn)))>; + + def : Pat<(v16i8 (concat_vectors + (v8i8 VPR64:$src), + (v8i8 (Neon_Op (v8i16 VPR128:$Rn))))), + (!cast(Prefix # 8h16b) + (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), + VPR128:$Rn)>; + + def : Pat<(v8i16 (concat_vectors + (v4i16 VPR64:$src), + (v4i16 (Neon_Op (v4i32 VPR128:$Rn))))), + (!cast(Prefix # 4s8h) + (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), + VPR128:$Rn)>; + + def : Pat<(v4i32 (concat_vectors + (v2i32 VPR64:$src), + (v2i32 (Neon_Op (v2i64 VPR128:$Rn))))), + (!cast(Prefix # 2d4s) + (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64), + VPR128:$Rn)>; +} + +defm : NeonI_2VMisc_Narrow_Patterns<"XTN", trunc>; +defm : NeonI_2VMisc_Narrow_Patterns<"SQXTUN", int_arm_neon_vqmovnsu>; +defm : NeonI_2VMisc_Narrow_Patterns<"SQXTN", int_arm_neon_vqmovns>; +defm : NeonI_2VMisc_Narrow_Patterns<"UQXTN", int_arm_neon_vqmovnu>; + +multiclass NeonI_2VMisc_SHIFT opcode> { + let DecoderMethod = "DecodeSHLLInstruction" in { + def 8b8h : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR128:$Rd), + (ins VPR64:$Rn, uimm_exact8:$Imm), + asmop # "\t$Rd.8h, $Rn.8b, $Imm", + [], NoItinerary>; + + def 4h4s : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR128:$Rd), + (ins VPR64:$Rn, uimm_exact16:$Imm), + asmop # "\t$Rd.4s, $Rn.4h, $Imm", + [], NoItinerary>; + + def 2s2d : NeonI_2VMisc<0b0, U, 0b10, opcode, + (outs VPR128:$Rd), + (ins VPR64:$Rn, uimm_exact32:$Imm), + asmop # "\t$Rd.2d, $Rn.2s, $Imm", + [], NoItinerary>; + + def 16b8h : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), + (ins VPR128:$Rn, uimm_exact8:$Imm), + asmop # "2\t$Rd.8h, $Rn.16b, $Imm", + [], NoItinerary>; + + def 8h4s : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), + (ins VPR128:$Rn, uimm_exact16:$Imm), + asmop # "2\t$Rd.4s, $Rn.8h, $Imm", + [], NoItinerary>; + + def 4s2d : NeonI_2VMisc<0b1, U, 0b10, opcode, + (outs VPR128:$Rd), + (ins VPR128:$Rn, uimm_exact32:$Imm), + asmop # "2\t$Rd.2d, $Rn.4s, $Imm", + [], NoItinerary>; + } +} + +defm SHLL : NeonI_2VMisc_SHIFT<"shll", 0b1, 0b10011>; + +class NeonI_SHLL_Patterns + : Pat<(DesTy (shl + (DesTy (ExtOp (OpTy VPR64:$Rn))), + (DesTy (Neon_vdup + (i32 Neon_Imm:$Imm))))), + (!cast("SHLL" # suffix) VPR64:$Rn, Neon_Imm:$Imm)>; + +class NeonI_SHLL_High_Patterns + : Pat<(DesTy (shl + (DesTy (ExtOp + (OpTy (GetHigh VPR128:$Rn)))), + (DesTy (Neon_vdup + (i32 Neon_Imm:$Imm))))), + (!cast("SHLL" # suffix) VPR128:$Rn, Neon_Imm:$Imm)>; + +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_Patterns; +def : NeonI_SHLL_High_Patterns; +def : NeonI_SHLL_High_Patterns; +def : NeonI_SHLL_High_Patterns; +def : NeonI_SHLL_High_Patterns; +def : NeonI_SHLL_High_Patterns; +def : NeonI_SHLL_High_Patterns; + +multiclass NeonI_2VMisc_SD_Narrow opcode> { + def 4s4h : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4h, $Rn.4s", + [], NoItinerary>; + + def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2s, $Rn.2d", + [], NoItinerary>; + + let Constraints = "$src = $Rd" in { + def 4s8h : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.8h, $Rn.4s", + [], NoItinerary>; + + def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.4s, $Rn.2d", + [], NoItinerary>; + } +} + +defm FCVTN : NeonI_2VMisc_SD_Narrow<"fcvtn", 0b0, 0b10110>; + +multiclass NeonI_2VMisc_Narrow_Pattern { + + def : Pat<(v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))), + (!cast(prefix # "4s4h") (v4f32 VPR128:$Rn))>; + + def : Pat<(v8i16 (concat_vectors + (v4i16 VPR64:$src), + (v4i16 (f32_to_f16_Op (v4f32 VPR128:$Rn))))), + (!cast(prefix # "4s8h") + (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), + (v4f32 VPR128:$Rn))>; + + def : Pat<(v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))), + (!cast(prefix # "2d2s") (v2f64 VPR128:$Rn))>; + + def : Pat<(v4f32 (concat_vectors + (v2f32 VPR64:$src), + (v2f32 (f64_to_f32_Op (v2f64 VPR128:$Rn))))), + (!cast(prefix # "2d4s") + (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), + (v2f64 VPR128:$Rn))>; +} + +defm : NeonI_2VMisc_Narrow_Pattern<"FCVTN", int_arm_neon_vcvtfp2hf, fround>; + +multiclass NeonI_2VMisc_D_Narrow opcode> { + def 2d2s : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR64:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2s, $Rn.2d", + [], NoItinerary>; + + def 2d4s : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$src, VPR128:$Rn), + asmop # "2\t$Rd.4s, $Rn.2d", + [], NoItinerary> { + let Constraints = "$src = $Rd"; + } + + def : Pat<(v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))), + (!cast(prefix # "2d2s") VPR128:$Rn)>; + + def : Pat<(v4f32 (concat_vectors + (v2f32 VPR64:$src), + (v2f32 (int_aarch64_neon_fcvtxn (v2f64 VPR128:$Rn))))), + (!cast(prefix # "2d4s") + (v4f32 (SUBREG_TO_REG (i32 0), VPR64:$src, sub_64)), + VPR128:$Rn)>; +} + +defm FCVTXN : NeonI_2VMisc_D_Narrow<"fcvtxn","FCVTXN", 0b1, 0b10110>; + +def Neon_High4Float : PatFrag<(ops node:$in), + (extract_subvector (v4f32 node:$in), (iPTR 2))>; + +multiclass NeonI_2VMisc_HS_Extend opcode> { + def 4h4s : NeonI_2VMisc<0b0, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.4s, $Rn.4h", + [], NoItinerary>; + + def 2s2d : NeonI_2VMisc<0b0, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2d, $Rn.2s", + [], NoItinerary>; + + def 8h4s : NeonI_2VMisc<0b1, U, 0b00, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "2\t$Rd.4s, $Rn.8h", + [], NoItinerary>; + + def 4s2d : NeonI_2VMisc<0b1, U, 0b01, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "2\t$Rd.2d, $Rn.4s", + [], NoItinerary>; +} + +defm FCVTL : NeonI_2VMisc_HS_Extend<"fcvtl", 0b0, 0b10111>; + +multiclass NeonI_2VMisc_Extend_Pattern { + def : Pat<(v4f32 (int_arm_neon_vcvthf2fp (v4i16 VPR64:$Rn))), + (!cast(prefix # "4h4s") VPR64:$Rn)>; + + def : Pat<(v4f32 (int_arm_neon_vcvthf2fp + (v4i16 (Neon_High8H + (v8i16 VPR128:$Rn))))), + (!cast(prefix # "8h4s") VPR128:$Rn)>; + + def : Pat<(v2f64 (fextend (v2f32 VPR64:$Rn))), + (!cast(prefix # "2s2d") VPR64:$Rn)>; + + def : Pat<(v2f64 (fextend + (v2f32 (Neon_High4Float + (v4f32 VPR128:$Rn))))), + (!cast(prefix # "4s2d") VPR128:$Rn)>; +} + +defm : NeonI_2VMisc_Extend_Pattern<"FCVTL">; + +multiclass NeonI_2VMisc_SD_Conv opcode, + ValueType ResTy4s, ValueType OpTy4s, + ValueType ResTy2d, ValueType OpTy2d, + ValueType ResTy2s, ValueType OpTy2s, + SDPatternOperator Neon_Op> { + + def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [(set (ResTy4s VPR128:$Rd), + (ResTy4s (Neon_Op (OpTy4s VPR128:$Rn))))], + NoItinerary>; + + def 2d : NeonI_2VMisc<0b1, U, {Size, 0b1}, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.2d, $Rn.2d", + [(set (ResTy2d VPR128:$Rd), + (ResTy2d (Neon_Op (OpTy2d VPR128:$Rn))))], + NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [(set (ResTy2s VPR64:$Rd), + (ResTy2s (Neon_Op (OpTy2s VPR64:$Rn))))], + NoItinerary>; +} + +multiclass NeonI_2VMisc_fp_to_int opcode, SDPatternOperator Neon_Op> { + defm _ : NeonI_2VMisc_SD_Conv; +} + +defm FCVTNS : NeonI_2VMisc_fp_to_int<"fcvtns", 0b0, 0b0, 0b11010, + int_aarch64_neon_fcvtns>; +defm FCVTNU : NeonI_2VMisc_fp_to_int<"fcvtnu", 0b0, 0b1, 0b11010, + int_aarch64_neon_fcvtnu>; +defm FCVTPS : NeonI_2VMisc_fp_to_int<"fcvtps", 0b1, 0b0, 0b11010, + int_aarch64_neon_fcvtps>; +defm FCVTPU : NeonI_2VMisc_fp_to_int<"fcvtpu", 0b1, 0b1, 0b11010, + int_aarch64_neon_fcvtpu>; +defm FCVTMS : NeonI_2VMisc_fp_to_int<"fcvtms", 0b0, 0b0, 0b11011, + int_aarch64_neon_fcvtms>; +defm FCVTMU : NeonI_2VMisc_fp_to_int<"fcvtmu", 0b0, 0b1, 0b11011, + int_aarch64_neon_fcvtmu>; +defm FCVTZS : NeonI_2VMisc_fp_to_int<"fcvtzs", 0b1, 0b0, 0b11011, fp_to_sint>; +defm FCVTZU : NeonI_2VMisc_fp_to_int<"fcvtzu", 0b1, 0b1, 0b11011, fp_to_uint>; +defm FCVTAS : NeonI_2VMisc_fp_to_int<"fcvtas", 0b0, 0b0, 0b11100, + int_aarch64_neon_fcvtas>; +defm FCVTAU : NeonI_2VMisc_fp_to_int<"fcvtau", 0b0, 0b1, 0b11100, + int_aarch64_neon_fcvtau>; + +multiclass NeonI_2VMisc_int_to_fp opcode, SDPatternOperator Neon_Op> { + defm _ : NeonI_2VMisc_SD_Conv; +} + +defm SCVTF : NeonI_2VMisc_int_to_fp<"scvtf", 0b0, 0b0, 0b11101, sint_to_fp>; +defm UCVTF : NeonI_2VMisc_int_to_fp<"ucvtf", 0b0, 0b1, 0b11101, uint_to_fp>; + +multiclass NeonI_2VMisc_fp_to_fp opcode, SDPatternOperator Neon_Op> { + defm _ : NeonI_2VMisc_SD_Conv; +} + +defm FRINTN : NeonI_2VMisc_fp_to_fp<"frintn", 0b0, 0b0, 0b11000, + int_aarch64_neon_frintn>; +defm FRINTA : NeonI_2VMisc_fp_to_fp<"frinta", 0b0, 0b1, 0b11000, frnd>; +defm FRINTP : NeonI_2VMisc_fp_to_fp<"frintp", 0b1, 0b0, 0b11000, fceil>; +defm FRINTM : NeonI_2VMisc_fp_to_fp<"frintm", 0b0, 0b0, 0b11001, ffloor>; +defm FRINTX : NeonI_2VMisc_fp_to_fp<"frintx", 0b0, 0b1, 0b11001, frint>; +defm FRINTZ : NeonI_2VMisc_fp_to_fp<"frintz", 0b1, 0b0, 0b11001, ftrunc>; +defm FRINTI : NeonI_2VMisc_fp_to_fp<"frinti", 0b1, 0b1, 0b11001, fnearbyint>; +defm FRECPE : NeonI_2VMisc_fp_to_fp<"frecpe", 0b1, 0b0, 0b11101, + int_arm_neon_vrecpe>; +defm FRSQRTE : NeonI_2VMisc_fp_to_fp<"frsqrte", 0b1, 0b1, 0b11101, + int_arm_neon_vrsqrte>; +defm FSQRT : NeonI_2VMisc_fp_to_fp<"fsqrt", 0b1, 0b1, 0b11111, fsqrt>; + +multiclass NeonI_2VMisc_S_Conv opcode, SDPatternOperator Neon_Op> { + def 4s : NeonI_2VMisc<0b1, U, {Size, 0b0}, opcode, + (outs VPR128:$Rd), (ins VPR128:$Rn), + asmop # "\t$Rd.4s, $Rn.4s", + [(set (v4i32 VPR128:$Rd), + (v4i32 (Neon_Op (v4i32 VPR128:$Rn))))], + NoItinerary>; + + def 2s : NeonI_2VMisc<0b0, U, {Size, 0b0}, opcode, + (outs VPR64:$Rd), (ins VPR64:$Rn), + asmop # "\t$Rd.2s, $Rn.2s", + [(set (v2i32 VPR64:$Rd), + (v2i32 (Neon_Op (v2i32 VPR64:$Rn))))], + NoItinerary>; +} + +defm URECPE : NeonI_2VMisc_S_Conv<"urecpe", 0b1, 0b0, 0b11100, + int_arm_neon_vrecpe>; +defm URSQRTE : NeonI_2VMisc_S_Conv<"ursqrte", 0b1, 0b1, 0b11100, + int_arm_neon_vrsqrte>; + +// Crypto Class +class NeonI_Cryptoaes_2v size, bits<5> opcode, + string asmop, SDPatternOperator opnode> + : NeonI_Crypto_AES{ + let Constraints = "$src = $Rd"; + let Predicates = [HasNEON, HasCrypto]; +} + +def AESE : NeonI_Cryptoaes_2v<0b00, 0b00100, "aese", int_arm_neon_aese>; +def AESD : NeonI_Cryptoaes_2v<0b00, 0b00101, "aesd", int_arm_neon_aesd>; + +class NeonI_Cryptoaes size, bits<5> opcode, + string asmop, SDPatternOperator opnode> + : NeonI_Crypto_AES; + +def AESMC : NeonI_Cryptoaes<0b00, 0b00110, "aesmc", int_arm_neon_aesmc>; +def AESIMC : NeonI_Cryptoaes<0b00, 0b00111, "aesimc", int_arm_neon_aesimc>; + +class NeonI_Cryptosha_vv size, bits<5> opcode, + string asmop, SDPatternOperator opnode> + : NeonI_Crypto_SHA { + let Constraints = "$src = $Rd"; + let Predicates = [HasNEON, HasCrypto]; +} + +def SHA1SU1 : NeonI_Cryptosha_vv<0b00, 0b00001, "sha1su1", + int_arm_neon_sha1su1>; +def SHA256SU0 : NeonI_Cryptosha_vv<0b00, 0b00010, "sha256su0", + int_arm_neon_sha256su0>; + +class NeonI_Cryptosha_ss size, bits<5> opcode, + string asmop, SDPatternOperator opnode> + : NeonI_Crypto_SHA { + let Predicates = [HasNEON, HasCrypto]; +} + +def SHA1H : NeonI_Cryptosha_ss<0b00, 0b00000, "sha1h", int_arm_neon_sha1h>; + +class NeonI_Cryptosha3_vvv size, bits<3> opcode, string asmop, + SDPatternOperator opnode> + : NeonI_Crypto_3VSHA { + let Constraints = "$src = $Rd"; + let Predicates = [HasNEON, HasCrypto]; +} + +def SHA1SU0 : NeonI_Cryptosha3_vvv<0b00, 0b011, "sha1su0", + int_arm_neon_sha1su0>; +def SHA256SU1 : NeonI_Cryptosha3_vvv<0b00, 0b110, "sha256su1", + int_arm_neon_sha256su1>; + +class NeonI_Cryptosha3_qqv size, bits<3> opcode, string asmop, + SDPatternOperator opnode> + : NeonI_Crypto_3VSHA { + let Constraints = "$src = $Rd"; + let Predicates = [HasNEON, HasCrypto]; +} + +def SHA256H : NeonI_Cryptosha3_qqv<0b00, 0b100, "sha256h", + int_arm_neon_sha256h>; +def SHA256H2 : NeonI_Cryptosha3_qqv<0b00, 0b101, "sha256h2", + int_arm_neon_sha256h2>; + +class NeonI_Cryptosha3_qsv size, bits<3> opcode, string asmop, + SDPatternOperator opnode> + : NeonI_Crypto_3VSHA { + let Constraints = "$src = $Rd"; + let Predicates = [HasNEON, HasCrypto]; +} + +def SHA1C : NeonI_Cryptosha3_qsv<0b00, 0b000, "sha1c", int_aarch64_neon_sha1c>; +def SHA1P : NeonI_Cryptosha3_qsv<0b00, 0b001, "sha1p", int_aarch64_neon_sha1p>; +def SHA1M : NeonI_Cryptosha3_qsv<0b00, 0b010, "sha1m", int_aarch64_neon_sha1m>; + +// +// Patterns for handling half-precision values +// + +// Convert f16 value coming in as i16 value to f32 +def : Pat<(f32 (f16_to_f32 (i32 (and (i32 GPR32:$Rn), 65535)))), + (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; +def : Pat<(f32 (f16_to_f32 (i32 (assertzext GPR32:$Rn)))), + (FCVTsh (EXTRACT_SUBREG (FMOVsw GPR32:$Rn), sub_16))>; + +def : Pat<(f32 (f16_to_f32 (i32 (assertzext (i32 ( + f32_to_f16 (f32 FPR32:$Rn))))))), + (f32 FPR32:$Rn)>; + +// Patterns for vector extract of half-precision FP value in i16 storage type +def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract + (v4i16 VPR64:$Rn), neon_uimm2_bare:$Imm)), 65535)))), + (FCVTsh (f16 (DUPhv_H + (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + neon_uimm2_bare:$Imm)))>; + +def : Pat<(f32 (f16_to_f32 ( i32 (and (i32 (vector_extract + (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)), 65535)))), + (FCVTsh (f16 (DUPhv_H (v8i16 VPR128:$Rn), neon_uimm3_bare:$Imm)))>; + +// Patterns for vector insert of half-precision FP value 0 in i16 storage type +def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), + (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), + (neon_uimm3_bare:$Imm))), + (v8i16 (INSELh (v8i16 VPR128:$Rn), + (v8i16 (SUBREG_TO_REG (i64 0), + (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), + sub_16)), + neon_uimm3_bare:$Imm, 0))>; + +def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), + (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 0))))))), + (neon_uimm2_bare:$Imm))), + (v4i16 (EXTRACT_SUBREG + (v8i16 (INSELh + (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + (v8i16 (SUBREG_TO_REG (i64 0), + (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 WZR))), sub_16)), + sub_16)), + neon_uimm2_bare:$Imm, 0)), + sub_64))>; + +// Patterns for vector insert of half-precision FP value in i16 storage type +def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), + (i32 (assertsext (i32 (fp_to_sint + (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), + (neon_uimm3_bare:$Imm))), + (v8i16 (INSELh (v8i16 VPR128:$Rn), + (v8i16 (SUBREG_TO_REG (i64 0), + (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), + sub_16)), + neon_uimm3_bare:$Imm, 0))>; + +def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), + (i32 (assertsext (i32 (fp_to_sint + (f32 (f16_to_f32 (i32 (and (i32 GPR32:$src), 65535)))))))), + (neon_uimm2_bare:$Imm))), + (v4i16 (EXTRACT_SUBREG + (v8i16 (INSELh + (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + (v8i16 (SUBREG_TO_REG (i64 0), + (f16 (EXTRACT_SUBREG (f32 (FMOVsw (i32 GPR32:$src))), sub_16)), + sub_16)), + neon_uimm2_bare:$Imm, 0)), + sub_64))>; + +def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), + (i32 (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), + (neon_uimm3_bare:$Imm1))), + (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), + neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; + +// Patterns for vector copy of half-precision FP value in i16 storage type +def : Pat<(v8i16 (vector_insert (v8i16 VPR128:$Rn), + (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 + (vector_extract (v8i16 VPR128:$src), neon_uimm3_bare:$Imm2)), + 65535)))))))), + (neon_uimm3_bare:$Imm1))), + (v8i16 (INSELh (v8i16 VPR128:$Rn), (v8i16 VPR128:$src), + neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2))>; + +def : Pat<(v4i16 (vector_insert (v4i16 VPR64:$Rn), + (i32 (assertsext (i32 (fp_to_sint(f32 (f16_to_f32 (i32 (and (i32 + (vector_extract (v4i16 VPR64:$src), neon_uimm3_bare:$Imm2)), + 65535)))))))), + (neon_uimm3_bare:$Imm1))), + (v4i16 (EXTRACT_SUBREG + (v8i16 (INSELh + (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$Rn, sub_64)), + (v8i16 (SUBREG_TO_REG (i64 0), VPR64:$src, sub_64)), + neon_uimm3_bare:$Imm1, neon_uimm3_bare:$Imm2)), + sub_64))>; + + diff --git a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp index 3d22330..8cfb968 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64MCInstLower.cpp @@ -109,6 +109,11 @@ bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO, case MachineOperand::MO_Immediate: MCOp = MCOperand::CreateImm(MO.getImm()); break; + case MachineOperand::MO_FPImmediate: { + assert(MO.getFPImm()->isZero() && "Only fp imm 0.0 is supported"); + MCOp = MCOperand::CreateFPImm(0.0); + break; + } case MachineOperand::MO_BlockAddress: MCOp = lowerSymbolOperand(MO, GetBlockAddressSymbol(MO.getBlockAddress())); break; @@ -116,7 +121,7 @@ bool AArch64AsmPrinter::lowerOperand(const MachineOperand &MO, MCOp = lowerSymbolOperand(MO, GetExternalSymbolSymbol(MO.getSymbolName())); break; case MachineOperand::MO_GlobalAddress: - MCOp = lowerSymbolOperand(MO, Mang->getSymbol(MO.getGlobal())); + MCOp = lowerSymbolOperand(MO, getSymbol(MO.getGlobal())); break; case MachineOperand::MO_MachineBasicBlock: MCOp = MCOperand::CreateExpr(MCSymbolRefExpr::Create( diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp index 20b0dcf..75ec44f 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -29,9 +29,8 @@ using namespace llvm; -AArch64RegisterInfo::AArch64RegisterInfo(const AArch64InstrInfo &tii, - const AArch64Subtarget &sti) - : AArch64GenRegisterInfo(AArch64::X30), TII(tii) { +AArch64RegisterInfo::AArch64RegisterInfo() + : AArch64GenRegisterInfo(AArch64::X30) { } const uint16_t * @@ -122,6 +121,8 @@ AArch64RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator MBBI, return; } + const AArch64InstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); int MinOffset, MaxOffset, OffsetScale; if (MI.getOpcode() == AArch64::ADDxxi_lsl0_s) { MinOffset = 0; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h index bb64fd5..4d67943 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.h @@ -25,12 +25,7 @@ class AArch64InstrInfo; class AArch64Subtarget; struct AArch64RegisterInfo : public AArch64GenRegisterInfo { -private: - const AArch64InstrInfo &TII; - -public: - AArch64RegisterInfo(const AArch64InstrInfo &tii, - const AArch64Subtarget &sti); + AArch64RegisterInfo(); const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; diff --git a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td index bd79546..4e2022c 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/contrib/llvm/lib/Target/AArch64/AArch64RegisterInfo.td @@ -12,15 +12,25 @@ //===----------------------------------------------------------------------===// let Namespace = "AArch64" in { -def sub_128 : SubRegIndex; -def sub_64 : SubRegIndex; -def sub_32 : SubRegIndex; -def sub_16 : SubRegIndex; -def sub_8 : SubRegIndex; - -// The VPR registers are handled as sub-registers of FPR equivalents, but -// they're really the same thing. We give this concept a special index. -def sub_alias : SubRegIndex; +def sub_128 : SubRegIndex<128>; +def sub_64 : SubRegIndex<64>; +def sub_32 : SubRegIndex<32>; +def sub_16 : SubRegIndex<16>; +def sub_8 : SubRegIndex<8>; + +// Note: Code depends on these having consecutive numbers. +def qqsub : SubRegIndex<256, 256>; + +def qsub_0 : SubRegIndex<128>; +def qsub_1 : SubRegIndex<128, 128>; +def qsub_2 : ComposedSubRegIndex; +def qsub_3 : ComposedSubRegIndex; + +def dsub_0 : SubRegIndex<64>; +def dsub_1 : SubRegIndex<64, 64>; +def dsub_2 : ComposedSubRegIndex; +def dsub_3 : ComposedSubRegIndex; +def dsub_4 : ComposedSubRegIndex; } // Registers are identified with 5-bit ID numbers. @@ -137,60 +147,51 @@ foreach Index = 0-31 in { } -def FPR8 : RegisterClass<"AArch64", [i8], 8, +def FPR8 : RegisterClass<"AArch64", [i8, v1i8], 8, (sequence "B%u", 0, 31)> { } -def FPR16 : RegisterClass<"AArch64", [f16], 16, +def FPR16 : RegisterClass<"AArch64", [f16, v1i16], 16, (sequence "H%u", 0, 31)> { } -def FPR32 : RegisterClass<"AArch64", [f32], 32, +def FPR32 : RegisterClass<"AArch64", [f32, v1i32, v1f32], 32, (sequence "S%u", 0, 31)> { } -def FPR64 : RegisterClass<"AArch64", [f64], 64, - (sequence "D%u", 0, 31)> { -} +def FPR64 : RegisterClass<"AArch64", + [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64], + 64, (sequence "D%u", 0, 31)>; -def FPR128 : RegisterClass<"AArch64", [f128], 128, - (sequence "Q%u", 0, 31)> { -} +def FPR128 : RegisterClass<"AArch64", + [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], + 128, (sequence "Q%u", 0, 31)>; +def FPR64Lo : RegisterClass<"AArch64", + [f64, v2f32, v2i32, v4i16, v8i8, v1i64, v1f64], + 64, (sequence "D%u", 0, 15)>; + +def FPR128Lo : RegisterClass<"AArch64", + [f128, v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], + 128, (sequence "Q%u", 0, 15)>; //===----------------------------------------------------------------------===// // Vector registers: //===----------------------------------------------------------------------===// -// NEON registers simply specify the overall vector, and it's expected that -// Instructions will individually specify the acceptable data layout. In -// principle this leaves two approaches open: -// + An operand, giving a single ADDvvv instruction (for example). This turns -// out to be unworkable in the assembly parser (without every Instruction -// having a "cvt" function, at least) because the constraints can't be -// properly enforced. It also complicates specifying patterns since each -// instruction will accept many types. -// + A bare token (e.g. ".2d"). This means the AsmParser has to know specific -// details about NEON registers, but simplifies most other details. -// -// The second approach was taken. - -foreach Index = 0-31 in { - def V # Index : AArch64RegWithSubs("Q" # Index)], - [sub_alias]>, - DwarfRegNum<[!add(Index, 64)]>; +def VPR64AsmOperand : AsmOperandClass { + let Name = "VPR"; + let PredicateMethod = "isReg"; + let RenderMethod = "addRegOperands"; } -// These two classes contain the same registers, which should be reasonably -// sensible for MC and allocation purposes, but allows them to be treated -// separately for things like stack spilling. -def VPR64 : RegisterClass<"AArch64", [v2f32, v2i32, v4i16, v8i8], 64, - (sequence "V%u", 0, 31)>; +def VPR64 : RegisterOperand; + +def VPR128 : RegisterOperand; + +def VPR64Lo : RegisterOperand; -def VPR128 : RegisterClass<"AArch64", - [v2f64, v2i64, v4f32, v4i32, v8i16, v16i8], 128, - (sequence "V%u", 0, 31)>; +def VPR128Lo : RegisterOperand; // Flags register def NZCV : Register<"nzcv"> { @@ -201,3 +202,90 @@ def FlagClass : RegisterClass<"AArch64", [i32], 32, (add NZCV)> { let CopyCost = -1; let isAllocatable = 0; } + +//===----------------------------------------------------------------------===// +// Consecutive vector registers +//===----------------------------------------------------------------------===// +// 2 Consecutive 64-bit registers: D0_D1, D1_D2, ..., D30_D31 +def Tuples2D : RegisterTuples<[dsub_0, dsub_1], + [(rotl FPR64, 0), (rotl FPR64, 1)]>; + +// 3 Consecutive 64-bit registers: D0_D1_D2, ..., D31_D0_D1 +def Tuples3D : RegisterTuples<[dsub_0, dsub_1, dsub_2], + [(rotl FPR64, 0), (rotl FPR64, 1), + (rotl FPR64, 2)]>; + +// 4 Consecutive 64-bit registers: D0_D1_D2_D3, ..., D31_D0_D1_D2 +def Tuples4D : RegisterTuples<[dsub_0, dsub_1, dsub_2, dsub_3], + [(rotl FPR64, 0), (rotl FPR64, 1), + (rotl FPR64, 2), (rotl FPR64, 3)]>; + +// 2 Consecutive 128-bit registers: Q0_Q1, Q1_Q2, ..., Q30_Q31 +def Tuples2Q : RegisterTuples<[qsub_0, qsub_1], + [(rotl FPR128, 0), (rotl FPR128, 1)]>; + +// 3 Consecutive 128-bit registers: Q0_Q1_Q2, ..., Q31_Q0_Q1 +def Tuples3Q : RegisterTuples<[qsub_0, qsub_1, qsub_2], + [(rotl FPR128, 0), (rotl FPR128, 1), + (rotl FPR128, 2)]>; + +// 4 Consecutive 128-bit registers: Q0_Q1_Q2_Q3, ..., Q31_Q0_Q1_Q2 +def Tuples4Q : RegisterTuples<[qsub_0, qsub_1, qsub_2, qsub_3], + [(rotl FPR128, 0), (rotl FPR128, 1), + (rotl FPR128, 2), (rotl FPR128, 3)]>; + +// The followings are super register classes to model 2/3/4 consecutive +// 64-bit/128-bit registers. + +def DPair : RegisterClass<"AArch64", [v2i64], 64, (add Tuples2D)>; + +def DTriple : RegisterClass<"AArch64", [untyped], 64, (add Tuples3D)> { + let Size = 192; // 3 x 64 bits, we have no predefined type of that size. +} + +def DQuad : RegisterClass<"AArch64", [v4i64], 64, (add Tuples4D)>; + +def QPair : RegisterClass<"AArch64", [v4i64], 128, (add Tuples2Q)>; + +def QTriple : RegisterClass<"AArch64", [untyped], 128, (add Tuples3Q)> { + let Size = 384; // 3 x 128 bits, we have no predefined type of that size. +} + +def QQuad : RegisterClass<"AArch64", [v8i64], 128, (add Tuples4Q)>; + + +// The followings are vector list operands +multiclass VectorList_operands { + def _asmoperand : AsmOperandClass { + let Name = PREFIX # LAYOUT # Count; + let RenderMethod = "addVectorListOperands"; + let PredicateMethod = + "isVectorList"; + let ParserMethod = "ParseVectorList"; + } + + def _operand : RegisterOperand"> { + let ParserMatchClass = + !cast(PREFIX # LAYOUT # "_asmoperand"); + } +} + +multiclass VectorList_BHSD { + defm 8B : VectorList_operands; + defm 4H : VectorList_operands; + defm 2S : VectorList_operands; + defm 1D : VectorList_operands; + defm 16B : VectorList_operands; + defm 8H : VectorList_operands; + defm 4S : VectorList_operands; + defm 2D : VectorList_operands; +} + +// Vector list operand with 1/2/3/4 registers: VOne8B_operand,..., VQuad2D_operand +defm VOne : VectorList_BHSD<"VOne", 1, FPR64, FPR128>; +defm VPair : VectorList_BHSD<"VPair", 2, DPair, QPair>; +defm VTriple : VectorList_BHSD<"VTriple", 3, DTriple, QTriple>; +defm VQuad : VectorList_BHSD<"VQuad", 4, DQuad, QQuad>; \ No newline at end of file diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp index d17b738..5c693c1 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.cpp @@ -25,13 +25,31 @@ using namespace llvm; +// Pin the vtable to this file. +void AArch64Subtarget::anchor() {} + AArch64Subtarget::AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS) - : AArch64GenSubtargetInfo(TT, CPU, FS) - , HasNEON(true) - , HasCrypto(true) - , TargetTriple(TT) { + : AArch64GenSubtargetInfo(TT, CPU, FS), HasFPARMv8(false), HasNEON(false), + HasCrypto(false), TargetTriple(TT), CPUString(CPU) { + + initializeSubtargetFeatures(CPU, FS); +} + +void AArch64Subtarget::initializeSubtargetFeatures(StringRef CPU, + StringRef FS) { + if (CPU.empty()) + CPUString = "generic"; + + std::string FullFS = FS; + if (CPUString == "generic") { + // Enable FP by default. + if (FullFS.empty()) + FullFS = "+fp-armv8"; + else + FullFS = "+fp-armv8," + FullFS; + } - ParseSubtargetFeatures(CPU, FS); + ParseSubtargetFeatures(CPU, FullFS); } bool AArch64Subtarget::GVIsIndirectSymbol(const GlobalValue *GV, diff --git a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h index 2e9205f..bbfd3bc 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h +++ b/contrib/llvm/lib/Target/AArch64/AArch64Subtarget.h @@ -27,18 +27,31 @@ class StringRef; class GlobalValue; class AArch64Subtarget : public AArch64GenSubtargetInfo { + virtual void anchor(); protected: + bool HasFPARMv8; bool HasNEON; bool HasCrypto; /// TargetTriple - What processor and OS we're targeting. Triple TargetTriple; + + /// CPUString - String name of used CPU. + std::string CPUString; + +private: + void initializeSubtargetFeatures(StringRef CPU, StringRef FS); + public: /// This constructor initializes the data members to match that /// of the specified triple. /// AArch64Subtarget(StringRef TT, StringRef CPU, StringRef FS); + virtual bool enableMachineScheduler() const { + return true; + } + /// ParseSubtargetFeatures - Parses features string setting specified /// subtarget options. Definition of function is auto generated by tblgen. void ParseSubtargetFeatures(StringRef CPU, StringRef FS); @@ -46,8 +59,13 @@ public: bool GVIsIndirectSymbol(const GlobalValue *GV, Reloc::Model RelocM) const; bool isTargetELF() const { return TargetTriple.isOSBinFormatELF(); } - bool isTargetLinux() const { return TargetTriple.getOS() == Triple::Linux; } + bool isTargetLinux() const { return TargetTriple.isOSLinux(); } + + bool hasFPARMv8() const { return HasFPARMv8; } + bool hasNEON() const { return HasNEON; } + bool hasCrypto() const { return HasCrypto; } + const std::string & getCPUString() const { return CPUString; } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp index df599d5..f1695e2 100644 --- a/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp +++ b/contrib/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp @@ -38,6 +38,7 @@ AArch64TargetMachine::AArch64TargetMachine(const Target &T, StringRef TT, TLInfo(*this), TSInfo(*this), FrameLowering(Subtarget) { + initAsmInfo(); } namespace { diff --git a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index 69bb80a..fbbce11 100644 --- a/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/contrib/llvm/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -54,8 +54,9 @@ public: #include "AArch64GenAsmMatcher.inc" }; - AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser) - : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { + AArch64AsmParser(MCSubtargetInfo &_STI, MCAsmParser &_Parser, + const MCInstrInfo &MII) + : MCTargetAsmParser(), STI(_STI), Parser(_Parser) { MCAsmParserExtension::Initialize(_Parser); // Initialize the set of available features. @@ -126,6 +127,11 @@ public: OperandMatchResultTy ParseSysRegOperand(SmallVectorImpl &Operands); + bool TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, StringRef &Layout, + SMLoc &LayoutLoc); + + OperandMatchResultTy ParseVectorList(SmallVectorImpl &); + bool validateInstruction(MCInst &Inst, const SmallVectorImpl &Operands); @@ -153,6 +159,7 @@ private: k_Immediate, // Including expressions referencing symbols k_Register, k_ShiftExtend, + k_VectorList, // A sequential list of 1 to 4 registers. k_SysReg, // The register operand of MRS and MSR instructions k_Token, // The mnemonic; other raw tokens the auto-generated k_WrappedRegister // Load/store exclusive permit a wrapped register. @@ -188,6 +195,13 @@ private: bool ImplicitAmount; }; + // A vector register list is a sequential list of 1 to 4 registers. + struct VectorListOp { + unsigned RegNum; + unsigned Count; + A64Layout::VectorLayout Layout; + }; + struct SysRegOp { const char *Data; unsigned Length; @@ -205,6 +219,7 @@ private: struct ImmOp Imm; struct RegOp Reg; struct ShiftExtendOp ShiftExtend; + struct VectorListOp VectorList; struct SysRegOp SysReg; struct TokOp Tok; }; @@ -454,7 +469,7 @@ public: } bool isMOVN32Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_SABS_G0, AArch64MCExpr::VK_AARCH64_SABS_G1, AArch64MCExpr::VK_AARCH64_DTPREL_G1, @@ -463,13 +478,13 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1, AArch64MCExpr::VK_AARCH64_TPREL_G0, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(32, PermittedModifiers, NumModifiers); } bool isMOVN64Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_SABS_G0, AArch64MCExpr::VK_AARCH64_SABS_G1, AArch64MCExpr::VK_AARCH64_SABS_G2, @@ -481,14 +496,14 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1, AArch64MCExpr::VK_AARCH64_TPREL_G0, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(64, PermittedModifiers, NumModifiers); } bool isMOVZ32Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_ABS_G0, AArch64MCExpr::VK_AARCH64_ABS_G1, AArch64MCExpr::VK_AARCH64_SABS_G0, @@ -499,13 +514,13 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1, AArch64MCExpr::VK_AARCH64_TPREL_G0, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(32, PermittedModifiers, NumModifiers); } bool isMOVZ64Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_ABS_G0, AArch64MCExpr::VK_AARCH64_ABS_G1, AArch64MCExpr::VK_AARCH64_ABS_G2, @@ -521,13 +536,13 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1, AArch64MCExpr::VK_AARCH64_TPREL_G0, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(64, PermittedModifiers, NumModifiers); } bool isMOVK32Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_ABS_G0_NC, AArch64MCExpr::VK_AARCH64_ABS_G1_NC, AArch64MCExpr::VK_AARCH64_DTPREL_G1_NC, @@ -536,13 +551,13 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(32, PermittedModifiers, NumModifiers); } bool isMOVK64Imm() const { - static AArch64MCExpr::VariantKind PermittedModifiers[] = { + static const AArch64MCExpr::VariantKind PermittedModifiers[] = { AArch64MCExpr::VK_AARCH64_ABS_G0_NC, AArch64MCExpr::VK_AARCH64_ABS_G1_NC, AArch64MCExpr::VK_AARCH64_ABS_G2_NC, @@ -553,13 +568,13 @@ public: AArch64MCExpr::VK_AARCH64_TPREL_G1_NC, AArch64MCExpr::VK_AARCH64_TPREL_G0_NC, }; - unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); + const unsigned NumModifiers = llvm::array_lengthof(PermittedModifiers); return isMoveWideImm(64, PermittedModifiers, NumModifiers); } bool isMoveWideImm(unsigned RegWidth, - AArch64MCExpr::VariantKind *PermittedModifiers, + const AArch64MCExpr::VariantKind *PermittedModifiers, unsigned NumModifiers) const { if (!isImmWithLSL()) return false; @@ -664,8 +679,86 @@ public: return !ShiftExtend.ImplicitAmount && ShiftExtend.Amount <= 4; } - template bool isSImm7Scaled() const { - if (!isImm()) return false; + // if 0 < value <= w, return true + bool isShrFixedWidth(int w) const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) + return false; + int64_t Value = CE->getValue(); + return Value > 0 && Value <= w; + } + + bool isShrImm8() const { return isShrFixedWidth(8); } + + bool isShrImm16() const { return isShrFixedWidth(16); } + + bool isShrImm32() const { return isShrFixedWidth(32); } + + bool isShrImm64() const { return isShrFixedWidth(64); } + + // if 0 <= value < w, return true + bool isShlFixedWidth(int w) const { + if (!isImm()) + return false; + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) + return false; + int64_t Value = CE->getValue(); + return Value >= 0 && Value < w; + } + + bool isShlImm8() const { return isShlFixedWidth(8); } + + bool isShlImm16() const { return isShlFixedWidth(16); } + + bool isShlImm32() const { return isShlFixedWidth(32); } + + bool isShlImm64() const { return isShlFixedWidth(64); } + + bool isNeonMovImmShiftLSL() const { + if (!isShiftOrExtend()) + return false; + + if (ShiftExtend.ShiftType != A64SE::LSL) + return false; + + // Valid shift amount is 0, 8, 16 and 24. + return ShiftExtend.Amount % 8 == 0 && ShiftExtend.Amount <= 24; + } + + bool isNeonMovImmShiftLSLH() const { + if (!isShiftOrExtend()) + return false; + + if (ShiftExtend.ShiftType != A64SE::LSL) + return false; + + // Valid shift amount is 0 and 8. + return ShiftExtend.Amount == 0 || ShiftExtend.Amount == 8; + } + + bool isNeonMovImmShiftMSL() const { + if (!isShiftOrExtend()) + return false; + + if (ShiftExtend.ShiftType != A64SE::MSL) + return false; + + // Valid shift amount is 8 and 16. + return ShiftExtend.Amount == 8 || ShiftExtend.Amount == 16; + } + + template + bool isVectorList() const { + return Kind == k_VectorList && VectorList.Layout == Layout && + VectorList.Count == Count; + } + + template bool isSImm7Scaled() const { + if (!isImm()) + return false; const MCConstantExpr *CE = dyn_cast(getImm()); if (!CE) return false; @@ -705,10 +798,38 @@ public: return isa(getImm()); } + bool isNeonUImm64Mask() const { + if (!isImm()) + return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) + return false; + + uint64_t Value = CE->getValue(); + + // i64 value with each byte being either 0x00 or 0xff. + for (unsigned i = 0; i < 8; ++i, Value >>= 8) + if ((Value & 0xff) != 0 && (Value & 0xff) != 0xff) + return false; + return true; + } + + // if value == N, return true + template + bool isExactImm() const { + if (!isImm()) return false; + + const MCConstantExpr *CE = dyn_cast(getImm()); + if (!CE) return false; + + return CE->getValue() == N; + } + static AArch64Operand *CreateImmWithLSL(const MCExpr *Val, unsigned ShiftAmount, bool ImplicitAmount, - SMLoc S, SMLoc E) { + SMLoc S,SMLoc E) { AArch64Operand *Op = new AArch64Operand(k_ImmWithLSL, S, E); Op->ImmWithLSL.Val = Val; Op->ImmWithLSL.ShiftAmount = ShiftAmount; @@ -766,6 +887,18 @@ public: return Op; } + static AArch64Operand *CreateVectorList(unsigned RegNum, unsigned Count, + A64Layout::VectorLayout Layout, + SMLoc S, SMLoc E) { + AArch64Operand *Op = new AArch64Operand(k_VectorList, S, E); + Op->VectorList.RegNum = RegNum; + Op->VectorList.Count = Count; + Op->VectorList.Layout = Layout; + Op->StartLoc = S; + Op->EndLoc = E; + return Op; + } + static AArch64Operand *CreateToken(StringRef Str, SMLoc S) { AArch64Operand *Op = new AArch64Operand(k_Token, S, S); Op->Tok.Data = Str.data(); @@ -1026,6 +1159,40 @@ public: Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); } + // For Vector Immediates shifted imm operands. + void addNeonMovImmShiftLSLOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + if (ShiftExtend.Amount % 8 != 0 || ShiftExtend.Amount > 24) + llvm_unreachable("Invalid shift amount for vector immediate inst."); + + // Encode LSL shift amount 0, 8, 16, 24 as 0, 1, 2, 3. + int64_t Imm = ShiftExtend.Amount / 8; + Inst.addOperand(MCOperand::CreateImm(Imm)); + } + + void addNeonMovImmShiftLSLHOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + if (ShiftExtend.Amount != 0 && ShiftExtend.Amount != 8) + llvm_unreachable("Invalid shift amount for vector immediate inst."); + + // Encode LSLH shift amount 0, 8 as 0, 1. + int64_t Imm = ShiftExtend.Amount / 8; + Inst.addOperand(MCOperand::CreateImm(Imm)); + } + + void addNeonMovImmShiftMSLOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + if (ShiftExtend.Amount != 8 && ShiftExtend.Amount != 16) + llvm_unreachable("Invalid shift amount for vector immediate inst."); + + // Encode MSL shift amount 8, 16 as 0, 1. + int64_t Imm = ShiftExtend.Amount / 8 - 1; + Inst.addOperand(MCOperand::CreateImm(Imm)); + } + // For the extend in load-store (register offset) instructions. template void addAddrRegExtendOperands(MCInst &Inst, unsigned N) const { @@ -1065,6 +1232,25 @@ public: Inst.addOperand(MCOperand::CreateImm(ShiftExtend.Amount)); } + + void addNeonUImm64MaskOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + + // A bit from each byte in the constant forms the encoded immediate + const MCConstantExpr *CE = dyn_cast(getImm()); + uint64_t Value = CE->getValue(); + + unsigned Imm = 0; + for (unsigned i = 0; i < 8; ++i, Value >>= 8) { + Imm |= (Value & 1) << i; + } + Inst.addOperand(MCOperand::CreateImm(Imm)); + } + + void addVectorListOperands(MCInst &Inst, unsigned N) const { + assert(N == 1 && "Invalid number of operands!"); + Inst.addOperand(MCOperand::CreateReg(VectorList.RegNum)); + } }; } // end anonymous namespace. @@ -1104,7 +1290,6 @@ AArch64AsmParser::ParseOperand(SmallVectorImpl &Operands, else return MatchOperand_Success; } - // ... or it might be a symbolish thing } // Fall through @@ -1148,7 +1333,7 @@ AArch64AsmParser::ParseOperand(SmallVectorImpl &Operands, return ParseOperand(Operands, Mnemonic); } // The following will likely be useful later, but not in very early cases - case AsmToken::LCurly: // Weird SIMD lists + case AsmToken::LCurly: // SIMD vector list is not parsed here llvm_unreachable("Don't know how to deal with '{' in operand"); return MatchOperand_ParseFail; } @@ -1306,7 +1491,7 @@ AArch64AsmParser::ParseImmWithLSLOperand( // The optional operand must be "lsl #N" where N is non-negative. if (Parser.getTok().is(AsmToken::Identifier) - && Parser.getTok().getIdentifier().lower() == "lsl") { + && Parser.getTok().getIdentifier().equals_lower("lsl")) { Parser.Lex(); if (Parser.getTok().is(AsmToken::Hash)) { @@ -1363,9 +1548,8 @@ AArch64AsmParser::ParseCRxOperand( return MatchOperand_ParseFail; } - std::string LowerTok = Parser.getTok().getIdentifier().lower(); - StringRef Tok(LowerTok); - if (Tok[0] != 'c') { + StringRef Tok = Parser.getTok().getIdentifier(); + if (Tok[0] != 'c' && Tok[0] != 'C') { Error(S, "Expected cN operand where 0 <= N <= 15"); return MatchOperand_ParseFail; } @@ -1437,22 +1621,11 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, std::string LowerReg = Tok.getString().lower(); size_t DotPos = LowerReg.find('.'); - RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); - if (RegNum == AArch64::NoRegister) { - RegNum = StringSwitch(LowerReg.substr(0, DotPos)) - .Case("ip0", AArch64::X16) - .Case("ip1", AArch64::X17) - .Case("fp", AArch64::X29) - .Case("lr", AArch64::X30) - .Default(AArch64::NoRegister); - } - if (RegNum == AArch64::NoRegister) - return false; - + bool IsVec128 = false; SMLoc S = Tok.getLoc(); RegEndLoc = SMLoc::getFromPointer(S.getPointer() + DotPos); - if (DotPos == StringRef::npos) { + if (DotPos == std::string::npos) { Layout = StringRef(); } else { // Everything afterwards needs to be a literal token, expected to be @@ -1462,20 +1635,78 @@ AArch64AsmParser::IdentifyRegister(unsigned &RegNum, SMLoc &RegEndLoc, // gives us a permanent string to use in the token (a pointer into LowerReg // would go out of scope when we return). LayoutLoc = SMLoc::getFromPointer(S.getPointer() + DotPos + 1); - std::string LayoutText = LowerReg.substr(DotPos, StringRef::npos); + StringRef LayoutText = StringRef(LowerReg).substr(DotPos); + + // See if it's a 128-bit layout first. Layout = StringSwitch(LayoutText) - .Case(".d", ".d").Case(".1d", ".1d").Case(".2d", ".2d") - .Case(".s", ".s").Case(".2s", ".2s").Case(".4s", ".4s") - .Case(".h", ".h").Case(".4h", ".4h").Case(".8h", ".8h") - .Case(".b", ".b").Case(".8b", ".8b").Case(".16b", ".16b") + .Case(".q", ".q").Case(".1q", ".1q") + .Case(".d", ".d").Case(".2d", ".2d") + .Case(".s", ".s").Case(".4s", ".4s") + .Case(".h", ".h").Case(".8h", ".8h") + .Case(".b", ".b").Case(".16b", ".16b") .Default(""); + if (Layout.size() != 0) + IsVec128 = true; + else { + Layout = StringSwitch(LayoutText) + .Case(".1d", ".1d") + .Case(".2s", ".2s") + .Case(".4h", ".4h") + .Case(".8b", ".8b") + .Default(""); + } + if (Layout.size() == 0) { - // Malformed register + // If we've still not pinned it down the register is malformed. return false; } } + RegNum = MatchRegisterName(LowerReg.substr(0, DotPos)); + if (RegNum == AArch64::NoRegister) { + RegNum = StringSwitch(LowerReg.substr(0, DotPos)) + .Case("ip0", AArch64::X16) + .Case("ip1", AArch64::X17) + .Case("fp", AArch64::X29) + .Case("lr", AArch64::X30) + .Case("v0", IsVec128 ? AArch64::Q0 : AArch64::D0) + .Case("v1", IsVec128 ? AArch64::Q1 : AArch64::D1) + .Case("v2", IsVec128 ? AArch64::Q2 : AArch64::D2) + .Case("v3", IsVec128 ? AArch64::Q3 : AArch64::D3) + .Case("v4", IsVec128 ? AArch64::Q4 : AArch64::D4) + .Case("v5", IsVec128 ? AArch64::Q5 : AArch64::D5) + .Case("v6", IsVec128 ? AArch64::Q6 : AArch64::D6) + .Case("v7", IsVec128 ? AArch64::Q7 : AArch64::D7) + .Case("v8", IsVec128 ? AArch64::Q8 : AArch64::D8) + .Case("v9", IsVec128 ? AArch64::Q9 : AArch64::D9) + .Case("v10", IsVec128 ? AArch64::Q10 : AArch64::D10) + .Case("v11", IsVec128 ? AArch64::Q11 : AArch64::D11) + .Case("v12", IsVec128 ? AArch64::Q12 : AArch64::D12) + .Case("v13", IsVec128 ? AArch64::Q13 : AArch64::D13) + .Case("v14", IsVec128 ? AArch64::Q14 : AArch64::D14) + .Case("v15", IsVec128 ? AArch64::Q15 : AArch64::D15) + .Case("v16", IsVec128 ? AArch64::Q16 : AArch64::D16) + .Case("v17", IsVec128 ? AArch64::Q17 : AArch64::D17) + .Case("v18", IsVec128 ? AArch64::Q18 : AArch64::D18) + .Case("v19", IsVec128 ? AArch64::Q19 : AArch64::D19) + .Case("v20", IsVec128 ? AArch64::Q20 : AArch64::D20) + .Case("v21", IsVec128 ? AArch64::Q21 : AArch64::D21) + .Case("v22", IsVec128 ? AArch64::Q22 : AArch64::D22) + .Case("v23", IsVec128 ? AArch64::Q23 : AArch64::D23) + .Case("v24", IsVec128 ? AArch64::Q24 : AArch64::D24) + .Case("v25", IsVec128 ? AArch64::Q25 : AArch64::D25) + .Case("v26", IsVec128 ? AArch64::Q26 : AArch64::D26) + .Case("v27", IsVec128 ? AArch64::Q27 : AArch64::D27) + .Case("v28", IsVec128 ? AArch64::Q28 : AArch64::D28) + .Case("v29", IsVec128 ? AArch64::Q29 : AArch64::D29) + .Case("v30", IsVec128 ? AArch64::Q30 : AArch64::D30) + .Case("v31", IsVec128 ? AArch64::Q31 : AArch64::D31) + .Default(AArch64::NoRegister); + } + if (RegNum == AArch64::NoRegister) + return false; + return true; } @@ -1507,6 +1738,7 @@ AArch64AsmParser::ParseRegister(SmallVectorImpl &Operands, case 'h': NumLanes = 8; break; case 's': NumLanes = 4; break; case 'd': NumLanes = 2; break; + case 'q': NumLanes = 1; break; } } @@ -1660,20 +1892,21 @@ AArch64AsmParser::ParseShiftExtend( std::string LowerID = IDVal.lower(); A64SE::ShiftExtSpecifiers Spec = - StringSwitch(LowerID) - .Case("lsl", A64SE::LSL) - .Case("lsr", A64SE::LSR) - .Case("asr", A64SE::ASR) - .Case("ror", A64SE::ROR) - .Case("uxtb", A64SE::UXTB) - .Case("uxth", A64SE::UXTH) - .Case("uxtw", A64SE::UXTW) - .Case("uxtx", A64SE::UXTX) - .Case("sxtb", A64SE::SXTB) - .Case("sxth", A64SE::SXTH) - .Case("sxtw", A64SE::SXTW) - .Case("sxtx", A64SE::SXTX) - .Default(A64SE::Invalid); + StringSwitch(LowerID) + .Case("lsl", A64SE::LSL) + .Case("msl", A64SE::MSL) + .Case("lsr", A64SE::LSR) + .Case("asr", A64SE::ASR) + .Case("ror", A64SE::ROR) + .Case("uxtb", A64SE::UXTB) + .Case("uxth", A64SE::UXTH) + .Case("uxtw", A64SE::UXTW) + .Case("uxtx", A64SE::UXTX) + .Case("sxtb", A64SE::SXTB) + .Case("sxth", A64SE::SXTH) + .Case("sxtw", A64SE::SXTW) + .Case("sxtx", A64SE::SXTX) + .Default(A64SE::Invalid); if (Spec == A64SE::Invalid) return MatchOperand_NoMatch; @@ -1683,8 +1916,8 @@ AArch64AsmParser::ParseShiftExtend( S = Parser.getTok().getLoc(); Parser.Lex(); - if (Spec != A64SE::LSL && Spec != A64SE::LSR && - Spec != A64SE::ASR && Spec != A64SE::ROR) { + if (Spec != A64SE::LSL && Spec != A64SE::LSR && Spec != A64SE::ASR && + Spec != A64SE::ROR && Spec != A64SE::MSL) { // The shift amount can be omitted for the extending versions, but not real // shifts: // add x0, x0, x0, uxtb @@ -1724,6 +1957,148 @@ AArch64AsmParser::ParseShiftExtend( return MatchOperand_Success; } +/// Try to parse a vector register token, If it is a vector register, +/// the token is eaten and return true. Otherwise return false. +bool AArch64AsmParser::TryParseVector(uint32_t &RegNum, SMLoc &RegEndLoc, + StringRef &Layout, SMLoc &LayoutLoc) { + bool IsVector = true; + + if (!IdentifyRegister(RegNum, RegEndLoc, Layout, LayoutLoc)) + IsVector = false; + else if (!AArch64MCRegisterClasses[AArch64::FPR64RegClassID] + .contains(RegNum) && + !AArch64MCRegisterClasses[AArch64::FPR128RegClassID] + .contains(RegNum)) + IsVector = false; + else if (Layout.size() == 0) + IsVector = false; + + if (!IsVector) + Error(Parser.getTok().getLoc(), "expected vector type register"); + + Parser.Lex(); // Eat this token. + return IsVector; +} + + +// A vector list contains 1-4 consecutive registers. +// Now there are two kinds of vector list when number of vector > 1: +// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout} +// (2) {Vn.layout - Vm.layout} +// If the layout is like .b/.h/.s/.d, also parse the lane. +AArch64AsmParser::OperandMatchResultTy AArch64AsmParser::ParseVectorList( + SmallVectorImpl &Operands) { + if (Parser.getTok().isNot(AsmToken::LCurly)) { + Error(Parser.getTok().getLoc(), "'{' expected"); + return MatchOperand_ParseFail; + } + SMLoc SLoc = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '{' token. + + unsigned Reg, Count = 1; + StringRef LayoutStr; + SMLoc RegEndLoc, LayoutLoc; + if (!TryParseVector(Reg, RegEndLoc, LayoutStr, LayoutLoc)) + return MatchOperand_ParseFail; + + if (Parser.getTok().is(AsmToken::Minus)) { + Parser.Lex(); // Eat the minus. + + unsigned Reg2; + StringRef LayoutStr2; + SMLoc RegEndLoc2, LayoutLoc2; + SMLoc RegLoc2 = Parser.getTok().getLoc(); + + if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2)) + return MatchOperand_ParseFail; + unsigned Space = (Reg < Reg2) ? (Reg2 - Reg) : (Reg2 + 32 - Reg); + + if (LayoutStr != LayoutStr2) { + Error(LayoutLoc2, "expected the same vector layout"); + return MatchOperand_ParseFail; + } + if (Space == 0 || Space > 3) { + Error(RegLoc2, "invalid number of vectors"); + return MatchOperand_ParseFail; + } + + Count += Space; + } else { + unsigned LastReg = Reg; + while (Parser.getTok().is(AsmToken::Comma)) { + Parser.Lex(); // Eat the comma. + unsigned Reg2; + StringRef LayoutStr2; + SMLoc RegEndLoc2, LayoutLoc2; + SMLoc RegLoc2 = Parser.getTok().getLoc(); + + if (!TryParseVector(Reg2, RegEndLoc2, LayoutStr2, LayoutLoc2)) + return MatchOperand_ParseFail; + unsigned Space = (LastReg < Reg2) ? (Reg2 - LastReg) + : (Reg2 + 32 - LastReg); + Count++; + + // The space between two vectors should be 1. And they should have the same layout. + // Total count shouldn't be great than 4 + if (Space != 1) { + Error(RegLoc2, "invalid space between two vectors"); + return MatchOperand_ParseFail; + } + if (LayoutStr != LayoutStr2) { + Error(LayoutLoc2, "expected the same vector layout"); + return MatchOperand_ParseFail; + } + if (Count > 4) { + Error(RegLoc2, "invalid number of vectors"); + return MatchOperand_ParseFail; + } + + LastReg = Reg2; + } + } + + if (Parser.getTok().isNot(AsmToken::RCurly)) { + Error(Parser.getTok().getLoc(), "'}' expected"); + return MatchOperand_ParseFail; + } + SMLoc ELoc = Parser.getTok().getLoc(); + Parser.Lex(); // Eat '}' token. + + A64Layout::VectorLayout Layout = A64StringToVectorLayout(LayoutStr); + if (Count > 1) { // If count > 1, create vector list using super register. + bool IsVec64 = (Layout < A64Layout::VL_16B); + static unsigned SupRegIDs[3][2] = { + { AArch64::QPairRegClassID, AArch64::DPairRegClassID }, + { AArch64::QTripleRegClassID, AArch64::DTripleRegClassID }, + { AArch64::QQuadRegClassID, AArch64::DQuadRegClassID } + }; + unsigned SupRegID = SupRegIDs[Count - 2][static_cast(IsVec64)]; + unsigned Sub0 = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0; + const MCRegisterInfo *MRI = getContext().getRegisterInfo(); + Reg = MRI->getMatchingSuperReg(Reg, Sub0, + &AArch64MCRegisterClasses[SupRegID]); + } + Operands.push_back( + AArch64Operand::CreateVectorList(Reg, Count, Layout, SLoc, ELoc)); + + if (Parser.getTok().is(AsmToken::LBrac)) { + uint32_t NumLanes = 0; + switch(Layout) { + case A64Layout::VL_B : NumLanes = 16; break; + case A64Layout::VL_H : NumLanes = 8; break; + case A64Layout::VL_S : NumLanes = 4; break; + case A64Layout::VL_D : NumLanes = 2; break; + default: + SMLoc Loc = getLexer().getLoc(); + Error(Loc, "expected comma before next operand"); + return MatchOperand_ParseFail; + } + return ParseNEONLane(Operands, NumLanes); + } else { + return MatchOperand_Success; + } +} + // FIXME: We would really like to be able to tablegen'erate this. bool AArch64AsmParser:: validateInstruction(MCInst &Inst, @@ -1918,7 +2293,7 @@ bool AArch64AsmParser::ParseDirectiveWord(unsigned Size, SMLoc L) { if (getParser().parseExpression(Value)) return true; - getParser().getStreamer().EmitValue(Value, Size, 0/*addrspace*/); + getParser().getStreamer().EmitValue(Value, Size); if (getLexer().is(AsmToken::EndOfStatement)) break; @@ -2019,7 +2394,7 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, "expected compatible register or floating-point constant"); case Match_FPZero: return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), - "expected floating-point constant #0.0"); + "expected floating-point constant #0.0 or invalid register type"); case Match_Label: return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), "expected label or encodable integer pc offset"); @@ -2140,6 +2515,30 @@ bool AArch64AsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_Width64: return Error(((AArch64Operand*)Operands[ErrorInfo])->getStartLoc(), "expected integer in range [, 63]"); + case Match_ShrImm8: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 8]"); + case Match_ShrImm16: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 16]"); + case Match_ShrImm32: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 32]"); + case Match_ShrImm64: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [1, 64]"); + case Match_ShlImm8: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 7]"); + case Match_ShlImm16: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 15]"); + case Match_ShlImm32: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 31]"); + case Match_ShlImm64: + return Error(((AArch64Operand *)Operands[ErrorInfo])->getStartLoc(), + "expected integer in range [0, 63]"); } llvm_unreachable("Implement any new match types added!"); diff --git a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp index 12c1b8f..be4d7f2 100644 --- a/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp +++ b/contrib/llvm/lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp @@ -38,7 +38,7 @@ typedef MCDisassembler::DecodeStatus DecodeStatus; namespace { /// AArch64 disassembler for all AArch64 platforms. class AArch64Disassembler : public MCDisassembler { - const MCRegisterInfo *RegInfo; + OwningPtr RegInfo; public: /// Initializes the disassembler. /// @@ -46,8 +46,7 @@ public: : MCDisassembler(STI), RegInfo(Info) { } - ~AArch64Disassembler() { - } + ~AArch64Disassembler() {} /// See MCDisassembler. DecodeStatus getInstruction(MCInst &instr, @@ -57,7 +56,7 @@ public: raw_ostream &vStream, raw_ostream &cStream) const; - const MCRegisterInfo *getRegInfo() const { return RegInfo; } + const MCRegisterInfo *getRegInfo() const { return RegInfo.get(); } }; } @@ -83,12 +82,38 @@ static DecodeStatus DecodeFPR32RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); static DecodeStatus DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder); static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder); -static DecodeStatus DecodeVPR128RegisterClass(llvm::MCInst &Inst, - unsigned RegNo, uint64_t Address, - const void *Decoder); +static DecodeStatus DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder); static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, unsigned OptionHiS, @@ -111,6 +136,30 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val, + uint64_t Address, const void *Decoder); +static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); +static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); + template static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, unsigned FullImm, @@ -127,6 +176,10 @@ static DecodeStatus DecodeRegExtendOperand(llvm::MCInst &Inst, unsigned ShiftAmount, uint64_t Address, const void *Decoder); +template +static DecodeStatus +DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount, + uint64_t Address, const void *Decoder); static DecodeStatus Decode32BitShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount, @@ -177,6 +230,17 @@ static DecodeStatus DecodeSingleIndexedInstruction(llvm::MCInst &Inst, uint64_t Address, const void *Decoder); +static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); + +static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder); static bool Check(DecodeStatus &Out, DecodeStatus In); @@ -208,7 +272,7 @@ DecodeStatus AArch64Disassembler::getInstruction(MCInst &MI, uint64_t &Size, uint8_t bytes[4]; // We want to read exactly 4 bytes of data. - if (Region.readBytes(Address, 4, (uint8_t*)bytes, NULL) == -1) { + if (Region.readBytes(Address, 4, bytes) == -1) { Size = 0; return MCDisassembler::Fail; } @@ -325,6 +389,14 @@ DecodeFPR64RegisterClass(llvm::MCInst &Inst, unsigned RegNo, return MCDisassembler::Success; } +static DecodeStatus +DecodeFPR64LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 15) + return MCDisassembler::Fail; + + return DecodeFPR64RegisterClass(Inst, RegNo, Address, Decoder); +} static DecodeStatus DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, @@ -338,16 +410,79 @@ DecodeFPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, } static DecodeStatus -DecodeVPR128RegisterClass(llvm::MCInst &Inst, unsigned RegNo, - uint64_t Address, const void *Decoder) { +DecodeFPR128LoRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, const void *Decoder) { + if (RegNo > 15) + return MCDisassembler::Fail; + + return DecodeFPR128RegisterClass(Inst, RegNo, Address, Decoder); +} + +static DecodeStatus DecodeGPR64noxzrRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, + uint64_t Address, + const void *Decoder) { + if (RegNo > 30) + return MCDisassembler::Fail; + + uint16_t Register = getReg(Decoder, AArch64::GPR64noxzrRegClassID, RegNo); + Inst.addOperand(MCOperand::CreateReg(Register)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeRegisterClassByID(llvm::MCInst &Inst, unsigned RegNo, + unsigned RegID, + const void *Decoder) { if (RegNo > 31) return MCDisassembler::Fail; - uint16_t Register = getReg(Decoder, AArch64::VPR128RegClassID, RegNo); + uint16_t Register = getReg(Decoder, RegID, RegNo); Inst.addOperand(MCOperand::CreateReg(Register)); return MCDisassembler::Success; } +static DecodeStatus DecodeDPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeRegisterClassByID(Inst, RegNo, AArch64::DPairRegClassID, + Decoder); +} + +static DecodeStatus DecodeQPairRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeRegisterClassByID(Inst, RegNo, AArch64::QPairRegClassID, + Decoder); +} + +static DecodeStatus DecodeDTripleRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder) { + return DecodeRegisterClassByID(Inst, RegNo, AArch64::DTripleRegClassID, + Decoder); +} + +static DecodeStatus DecodeQTripleRegisterClass(llvm::MCInst &Inst, + unsigned RegNo, uint64_t Address, + const void *Decoder) { + return DecodeRegisterClassByID(Inst, RegNo, AArch64::QTripleRegClassID, + Decoder); +} + +static DecodeStatus DecodeDQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeRegisterClassByID(Inst, RegNo, AArch64::DQuadRegClassID, + Decoder); +} + +static DecodeStatus DecodeQQuadRegisterClass(llvm::MCInst &Inst, unsigned RegNo, + uint64_t Address, + const void *Decoder) { + return DecodeRegisterClassByID(Inst, RegNo, AArch64::QQuadRegClassID, + Decoder); +} + static DecodeStatus DecodeAddrRegExtendOperand(llvm::MCInst &Inst, unsigned OptionHiS, uint64_t Address, @@ -396,7 +531,73 @@ static DecodeStatus DecodeFPZeroOperand(llvm::MCInst &Inst, return MCDisassembler::Success; } +static DecodeStatus DecodeShiftRightImm8(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(8 - Val)); + return MCDisassembler::Success; +} +static DecodeStatus DecodeShiftRightImm16(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(16 - Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftRightImm32(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(32 - Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftRightImm64(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + Inst.addOperand(MCOperand::CreateImm(64 - Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftLeftImm8(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + if (Val > 7) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftLeftImm16(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + if (Val > 15) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftLeftImm32(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + if (Val > 31) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} + +static DecodeStatus DecodeShiftLeftImm64(MCInst &Inst, unsigned Val, + uint64_t Address, + const void *Decoder) { + if (Val > 63) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(Val)); + return MCDisassembler::Success; +} template static DecodeStatus DecodeMoveWideImmOperand(llvm::MCInst &Inst, @@ -553,11 +754,11 @@ static DecodeStatus DecodeFMOVLaneInstruction(llvm::MCInst &Inst, unsigned Insn, unsigned IsToVec = fieldFromInstruction(Insn, 16, 1); if (IsToVec) { - DecodeVPR128RegisterClass(Inst, Rd, Address, Decoder); + DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder); DecodeGPR64RegisterClass(Inst, Rn, Address, Decoder); } else { DecodeGPR64RegisterClass(Inst, Rd, Address, Decoder); - DecodeVPR128RegisterClass(Inst, Rn, Address, Decoder); + DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder); } // Add the lane @@ -800,4 +1001,572 @@ extern "C" void LLVMInitializeAArch64Disassembler() { createAArch64Disassembler); } +template +static DecodeStatus +DecodeNeonMovImmShiftOperand(llvm::MCInst &Inst, unsigned ShiftAmount, + uint64_t Address, const void *Decoder) { + bool IsLSL = false; + if (Ext == A64SE::LSL) + IsLSL = true; + else if (Ext != A64SE::MSL) + return MCDisassembler::Fail; + + // MSL and LSLH accepts encoded shift amount 0 or 1. + if ((!IsLSL || (IsLSL && IsHalf)) && ShiftAmount != 0 && ShiftAmount != 1) + return MCDisassembler::Fail; + + // LSL accepts encoded shift amount 0, 1, 2 or 3. + if (IsLSL && ShiftAmount > 3) + return MCDisassembler::Fail; + + Inst.addOperand(MCOperand::CreateImm(ShiftAmount)); + return MCDisassembler::Success; +} + +// Decode post-index vector load/store instructions. +// This is necessary as we need to decode Rm: if Rm == 0b11111, the last +// operand is an immediate equal the the length of vector list in bytes, +// or Rm is decoded to a GPR64noxzr register. +static DecodeStatus DecodeVLDSTPostInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rt = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned Rm = fieldFromInstruction(Insn, 16, 5); + unsigned Opcode = fieldFromInstruction(Insn, 12, 4); + unsigned IsLoad = fieldFromInstruction(Insn, 22, 1); + // 0 for 64bit vector list, 1 for 128bit vector list + unsigned Is128BitVec = fieldFromInstruction(Insn, 30, 1); + + unsigned NumVecs; + switch (Opcode) { + case 0: // ld4/st4 + case 2: // ld1/st1 with 4 vectors + NumVecs = 4; break; + case 4: // ld3/st3 + case 6: // ld1/st1 with 3 vectors + NumVecs = 3; break; + case 7: // ld1/st1 with 1 vector + NumVecs = 1; break; + case 8: // ld2/st2 + case 10: // ld1/st1 with 2 vectors + NumVecs = 2; break; + default: + llvm_unreachable("Invalid opcode for post-index load/store instructions"); + } + + // Decode vector list of 1/2/3/4 vectors for load instructions. + if (IsLoad) { + switch (NumVecs) { + case 1: + Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder) + : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); + break; + case 2: + Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder) + : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder); + break; + case 3: + Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder) + : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder); + break; + case 4: + Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder) + : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder); + break; + } + } + + // Decode write back register, which is equal to Rn. + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + + if (Rm == 31) // If Rm is 0x11111, add the vector list length in byte + Inst.addOperand(MCOperand::CreateImm(NumVecs * (Is128BitVec ? 16 : 8))); + else // Decode Rm + DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); + + // Decode vector list of 1/2/3/4 vectors for load instructions. + if (!IsLoad) { + switch (NumVecs) { + case 1: + Is128BitVec ? DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder) + : DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder); + break; + case 2: + Is128BitVec ? DecodeQPairRegisterClass(Inst, Rt, Address, Decoder) + : DecodeDPairRegisterClass(Inst, Rt, Address, Decoder); + break; + case 3: + Is128BitVec ? DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder) + : DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder); + break; + case 4: + Is128BitVec ? DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder) + : DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder); + break; + } + } + + return MCDisassembler::Success; +} + +// Decode post-index vector load/store lane instructions. +// This is necessary as we need to decode Rm: if Rm == 0b11111, the last +// operand is an immediate equal the the length of the changed bytes, +// or Rm is decoded to a GPR64noxzr register. +static DecodeStatus DecodeVLDSTLanePostInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + bool Is64bitVec = false; + bool IsLoadDup = false; + bool IsLoad = false; + // The total number of bytes transferred. + // TransferBytes = NumVecs * OneLaneBytes + unsigned TransferBytes = 0; + unsigned NumVecs = 0; + unsigned Opc = Inst.getOpcode(); + switch (Opc) { + case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register: + case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register: + case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register: + case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register: { + switch (Opc) { + case AArch64::LD1R_WB_8B_fixed: case AArch64::LD1R_WB_8B_register: + TransferBytes = 1; break; + case AArch64::LD1R_WB_4H_fixed: case AArch64::LD1R_WB_4H_register: + TransferBytes = 2; break; + case AArch64::LD1R_WB_2S_fixed: case AArch64::LD1R_WB_2S_register: + TransferBytes = 4; break; + case AArch64::LD1R_WB_1D_fixed: case AArch64::LD1R_WB_1D_register: + TransferBytes = 8; break; + } + Is64bitVec = true; + IsLoadDup = true; + NumVecs = 1; + break; + } + + case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register: + case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register: + case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register: + case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register: { + switch (Opc) { + case AArch64::LD1R_WB_16B_fixed: case AArch64::LD1R_WB_16B_register: + TransferBytes = 1; break; + case AArch64::LD1R_WB_8H_fixed: case AArch64::LD1R_WB_8H_register: + TransferBytes = 2; break; + case AArch64::LD1R_WB_4S_fixed: case AArch64::LD1R_WB_4S_register: + TransferBytes = 4; break; + case AArch64::LD1R_WB_2D_fixed: case AArch64::LD1R_WB_2D_register: + TransferBytes = 8; break; + } + IsLoadDup = true; + NumVecs = 1; + break; + } + + case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register: + case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register: + case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register: + case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register: { + switch (Opc) { + case AArch64::LD2R_WB_8B_fixed: case AArch64::LD2R_WB_8B_register: + TransferBytes = 2; break; + case AArch64::LD2R_WB_4H_fixed: case AArch64::LD2R_WB_4H_register: + TransferBytes = 4; break; + case AArch64::LD2R_WB_2S_fixed: case AArch64::LD2R_WB_2S_register: + TransferBytes = 8; break; + case AArch64::LD2R_WB_1D_fixed: case AArch64::LD2R_WB_1D_register: + TransferBytes = 16; break; + } + Is64bitVec = true; + IsLoadDup = true; + NumVecs = 2; + break; + } + + case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register: + case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register: + case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register: + case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register: { + switch (Opc) { + case AArch64::LD2R_WB_16B_fixed: case AArch64::LD2R_WB_16B_register: + TransferBytes = 2; break; + case AArch64::LD2R_WB_8H_fixed: case AArch64::LD2R_WB_8H_register: + TransferBytes = 4; break; + case AArch64::LD2R_WB_4S_fixed: case AArch64::LD2R_WB_4S_register: + TransferBytes = 8; break; + case AArch64::LD2R_WB_2D_fixed: case AArch64::LD2R_WB_2D_register: + TransferBytes = 16; break; + } + IsLoadDup = true; + NumVecs = 2; + break; + } + + case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register: + case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register: + case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register: + case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register: { + switch (Opc) { + case AArch64::LD3R_WB_8B_fixed: case AArch64::LD3R_WB_8B_register: + TransferBytes = 3; break; + case AArch64::LD3R_WB_4H_fixed: case AArch64::LD3R_WB_4H_register: + TransferBytes = 6; break; + case AArch64::LD3R_WB_2S_fixed: case AArch64::LD3R_WB_2S_register: + TransferBytes = 12; break; + case AArch64::LD3R_WB_1D_fixed: case AArch64::LD3R_WB_1D_register: + TransferBytes = 24; break; + } + Is64bitVec = true; + IsLoadDup = true; + NumVecs = 3; + break; + } + + case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register: + case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_8H_register: + case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_4S_register: + case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register: { + switch (Opc) { + case AArch64::LD3R_WB_16B_fixed: case AArch64::LD3R_WB_16B_register: + TransferBytes = 3; break; + case AArch64::LD3R_WB_8H_fixed: case AArch64::LD3R_WB_8H_register: + TransferBytes = 6; break; + case AArch64::LD3R_WB_4S_fixed: case AArch64::LD3R_WB_4S_register: + TransferBytes = 12; break; + case AArch64::LD3R_WB_2D_fixed: case AArch64::LD3R_WB_2D_register: + TransferBytes = 24; break; + } + IsLoadDup = true; + NumVecs = 3; + break; + } + + case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register: + case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register: + case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register: + case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register: { + switch (Opc) { + case AArch64::LD4R_WB_8B_fixed: case AArch64::LD4R_WB_8B_register: + TransferBytes = 4; break; + case AArch64::LD4R_WB_4H_fixed: case AArch64::LD4R_WB_4H_register: + TransferBytes = 8; break; + case AArch64::LD4R_WB_2S_fixed: case AArch64::LD4R_WB_2S_register: + TransferBytes = 16; break; + case AArch64::LD4R_WB_1D_fixed: case AArch64::LD4R_WB_1D_register: + TransferBytes = 32; break; + } + Is64bitVec = true; + IsLoadDup = true; + NumVecs = 4; + break; + } + + case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register: + case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_8H_register: + case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_4S_register: + case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register: { + switch (Opc) { + case AArch64::LD4R_WB_16B_fixed: case AArch64::LD4R_WB_16B_register: + TransferBytes = 4; break; + case AArch64::LD4R_WB_8H_fixed: case AArch64::LD4R_WB_8H_register: + TransferBytes = 8; break; + case AArch64::LD4R_WB_4S_fixed: case AArch64::LD4R_WB_4S_register: + TransferBytes = 16; break; + case AArch64::LD4R_WB_2D_fixed: case AArch64::LD4R_WB_2D_register: + TransferBytes = 32; break; + } + IsLoadDup = true; + NumVecs = 4; + break; + } + + case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register: + case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register: + case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register: + case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register: { + switch (Opc) { + case AArch64::LD1LN_WB_B_fixed: case AArch64::LD1LN_WB_B_register: + TransferBytes = 1; break; + case AArch64::LD1LN_WB_H_fixed: case AArch64::LD1LN_WB_H_register: + TransferBytes = 2; break; + case AArch64::LD1LN_WB_S_fixed: case AArch64::LD1LN_WB_S_register: + TransferBytes = 4; break; + case AArch64::LD1LN_WB_D_fixed: case AArch64::LD1LN_WB_D_register: + TransferBytes = 8; break; + } + IsLoad = true; + NumVecs = 1; + break; + } + + case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register: + case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register: + case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register: + case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register: { + switch (Opc) { + case AArch64::LD2LN_WB_B_fixed: case AArch64::LD2LN_WB_B_register: + TransferBytes = 2; break; + case AArch64::LD2LN_WB_H_fixed: case AArch64::LD2LN_WB_H_register: + TransferBytes = 4; break; + case AArch64::LD2LN_WB_S_fixed: case AArch64::LD2LN_WB_S_register: + TransferBytes = 8; break; + case AArch64::LD2LN_WB_D_fixed: case AArch64::LD2LN_WB_D_register: + TransferBytes = 16; break; + } + IsLoad = true; + NumVecs = 2; + break; + } + + case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register: + case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register: + case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register: + case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register: { + switch (Opc) { + case AArch64::LD3LN_WB_B_fixed: case AArch64::LD3LN_WB_B_register: + TransferBytes = 3; break; + case AArch64::LD3LN_WB_H_fixed: case AArch64::LD3LN_WB_H_register: + TransferBytes = 6; break; + case AArch64::LD3LN_WB_S_fixed: case AArch64::LD3LN_WB_S_register: + TransferBytes = 12; break; + case AArch64::LD3LN_WB_D_fixed: case AArch64::LD3LN_WB_D_register: + TransferBytes = 24; break; + } + IsLoad = true; + NumVecs = 3; + break; + } + + case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register: + case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register: + case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register: + case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: { + switch (Opc) { + case AArch64::LD4LN_WB_B_fixed: case AArch64::LD4LN_WB_B_register: + TransferBytes = 4; break; + case AArch64::LD4LN_WB_H_fixed: case AArch64::LD4LN_WB_H_register: + TransferBytes = 8; break; + case AArch64::LD4LN_WB_S_fixed: case AArch64::LD4LN_WB_S_register: + TransferBytes = 16; break; + case AArch64::LD4LN_WB_D_fixed: case AArch64::LD4LN_WB_D_register: + TransferBytes = 32; break; + } + IsLoad = true; + NumVecs = 4; + break; + } + + case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register: + case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register: + case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register: + case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register: { + switch (Opc) { + case AArch64::ST1LN_WB_B_fixed: case AArch64::ST1LN_WB_B_register: + TransferBytes = 1; break; + case AArch64::ST1LN_WB_H_fixed: case AArch64::ST1LN_WB_H_register: + TransferBytes = 2; break; + case AArch64::ST1LN_WB_S_fixed: case AArch64::ST1LN_WB_S_register: + TransferBytes = 4; break; + case AArch64::ST1LN_WB_D_fixed: case AArch64::ST1LN_WB_D_register: + TransferBytes = 8; break; + } + NumVecs = 1; + break; + } + + case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register: + case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register: + case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register: + case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register: { + switch (Opc) { + case AArch64::ST2LN_WB_B_fixed: case AArch64::ST2LN_WB_B_register: + TransferBytes = 2; break; + case AArch64::ST2LN_WB_H_fixed: case AArch64::ST2LN_WB_H_register: + TransferBytes = 4; break; + case AArch64::ST2LN_WB_S_fixed: case AArch64::ST2LN_WB_S_register: + TransferBytes = 8; break; + case AArch64::ST2LN_WB_D_fixed: case AArch64::ST2LN_WB_D_register: + TransferBytes = 16; break; + } + NumVecs = 2; + break; + } + + case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register: + case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register: + case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register: + case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register: { + switch (Opc) { + case AArch64::ST3LN_WB_B_fixed: case AArch64::ST3LN_WB_B_register: + TransferBytes = 3; break; + case AArch64::ST3LN_WB_H_fixed: case AArch64::ST3LN_WB_H_register: + TransferBytes = 6; break; + case AArch64::ST3LN_WB_S_fixed: case AArch64::ST3LN_WB_S_register: + TransferBytes = 12; break; + case AArch64::ST3LN_WB_D_fixed: case AArch64::ST3LN_WB_D_register: + TransferBytes = 24; break; + } + NumVecs = 3; + break; + } + + case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register: + case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register: + case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register: + case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register: { + switch (Opc) { + case AArch64::ST4LN_WB_B_fixed: case AArch64::ST4LN_WB_B_register: + TransferBytes = 4; break; + case AArch64::ST4LN_WB_H_fixed: case AArch64::ST4LN_WB_H_register: + TransferBytes = 8; break; + case AArch64::ST4LN_WB_S_fixed: case AArch64::ST4LN_WB_S_register: + TransferBytes = 16; break; + case AArch64::ST4LN_WB_D_fixed: case AArch64::ST4LN_WB_D_register: + TransferBytes = 32; break; + } + NumVecs = 4; + break; + } + + default: + return MCDisassembler::Fail; + } // End of switch (Opc) + + unsigned Rt = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned Rm = fieldFromInstruction(Insn, 16, 5); + + // Decode post-index of load duplicate lane + if (IsLoadDup) { + switch (NumVecs) { + case 1: + Is64bitVec ? DecodeFPR64RegisterClass(Inst, Rt, Address, Decoder) + : DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); + break; + case 2: + Is64bitVec ? DecodeDPairRegisterClass(Inst, Rt, Address, Decoder) + : DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); + break; + case 3: + Is64bitVec ? DecodeDTripleRegisterClass(Inst, Rt, Address, Decoder) + : DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); + break; + case 4: + Is64bitVec ? DecodeDQuadRegisterClass(Inst, Rt, Address, Decoder) + : DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); + } + + // Decode write back register, which is equal to Rn. + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + + if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes + Inst.addOperand(MCOperand::CreateImm(TransferBytes)); + else // Decode Rm + DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); + + return MCDisassembler::Success; + } + + // Decode post-index of load/store lane + // Loads have a vector list as output. + if (IsLoad) { + switch (NumVecs) { + case 1: + DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); + break; + case 2: + DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); + break; + case 3: + DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); + break; + case 4: + DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); + } + } + + // Decode write back register, which is equal to Rn. + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + DecodeGPR64xspRegisterClass(Inst, Rn, Address, Decoder); + + if (Rm == 31) // If Rm is 0x11111, add the number of transferred bytes + Inst.addOperand(MCOperand::CreateImm(TransferBytes)); + else // Decode Rm + DecodeGPR64noxzrRegisterClass(Inst, Rm, Address, Decoder); + + // Decode the source vector list. + switch (NumVecs) { + case 1: + DecodeFPR128RegisterClass(Inst, Rt, Address, Decoder); + break; + case 2: + DecodeQPairRegisterClass(Inst, Rt, Address, Decoder); + break; + case 3: + DecodeQTripleRegisterClass(Inst, Rt, Address, Decoder); + break; + case 4: + DecodeQQuadRegisterClass(Inst, Rt, Address, Decoder); + } + + // Decode lane + unsigned Q = fieldFromInstruction(Insn, 30, 1); + unsigned S = fieldFromInstruction(Insn, 10, 3); + unsigned lane = 0; + // Calculate the number of lanes by number of vectors and transfered bytes. + // NumLanes = 16 bytes / bytes of each lane + unsigned NumLanes = 16 / (TransferBytes / NumVecs); + switch (NumLanes) { + case 16: // A vector has 16 lanes, each lane is 1 bytes. + lane = (Q << 3) | S; + break; + case 8: + lane = (Q << 2) | (S >> 1); + break; + case 4: + lane = (Q << 1) | (S >> 2); + break; + case 2: + lane = Q; + break; + } + Inst.addOperand(MCOperand::CreateImm(lane)); + + return MCDisassembler::Success; +} + +static DecodeStatus DecodeSHLLInstruction(MCInst &Inst, unsigned Insn, + uint64_t Address, + const void *Decoder) { + unsigned Rd = fieldFromInstruction(Insn, 0, 5); + unsigned Rn = fieldFromInstruction(Insn, 5, 5); + unsigned size = fieldFromInstruction(Insn, 22, 2); + unsigned Q = fieldFromInstruction(Insn, 30, 1); + + DecodeFPR128RegisterClass(Inst, Rd, Address, Decoder); + + if(Q) + DecodeFPR128RegisterClass(Inst, Rn, Address, Decoder); + else + DecodeFPR64RegisterClass(Inst, Rn, Address, Decoder); + + switch (size) { + case 0: + Inst.addOperand(MCOperand::CreateImm(8)); + break; + case 1: + Inst.addOperand(MCOperand::CreateImm(16)); + break; + case 2: + Inst.addOperand(MCOperand::CreateImm(32)); + break; + default : + return MCDisassembler::Fail; + } + return MCDisassembler::Success; +} diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp index 82ce80c..0438de3 100644 --- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp +++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp @@ -368,6 +368,14 @@ AArch64InstPrinter::printSImm7ScaledOperand(const MCInst *MI, unsigned OpNum, O << "#" << (Imm * MemScale); } +void AArch64InstPrinter::printVPRRegister(const MCInst *MI, unsigned OpNo, + raw_ostream &O) { + unsigned Reg = MI->getOperand(OpNo).getReg(); + std::string Name = getRegisterName(Reg); + Name[0] = 'v'; + O << Name; +} + void AArch64InstPrinter::printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O) { const MCOperand &Op = MI->getOperand(OpNo); @@ -406,3 +414,126 @@ void AArch64InstPrinter::printInst(const MCInst *MI, raw_ostream &O, printAnnotation(O, Annot); } + +template +void AArch64InstPrinter::printNeonMovImmShiftOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MO = MI->getOperand(OpNum); + + assert(MO.isImm() && + "Immediate operand required for Neon vector immediate inst."); + + bool IsLSL = false; + if (Ext == A64SE::LSL) + IsLSL = true; + else if (Ext != A64SE::MSL) + llvm_unreachable("Invalid shift specifier in movi instruction"); + + int64_t Imm = MO.getImm(); + + // MSL and LSLH accepts encoded shift amount 0 or 1. + if ((!IsLSL || (IsLSL && isHalf)) && Imm != 0 && Imm != 1) + llvm_unreachable("Invalid shift amount in movi instruction"); + + // LSH accepts encoded shift amount 0, 1, 2 or 3. + if (IsLSL && (Imm < 0 || Imm > 3)) + llvm_unreachable("Invalid shift amount in movi instruction"); + + // Print shift amount as multiple of 8 with MSL encoded shift amount + // 0 and 1 printed as 8 and 16. + if (!IsLSL) + Imm++; + Imm *= 8; + + // LSL #0 is not printed + if (IsLSL) { + if (Imm == 0) + return; + O << ", lsl"; + } else + O << ", msl"; + + O << " #" << Imm; +} + +void AArch64InstPrinter::printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, + raw_ostream &o) { + o << "#0x0"; +} + +void AArch64InstPrinter::printUImmHexOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + const MCOperand &MOUImm = MI->getOperand(OpNum); + + assert(MOUImm.isImm() && + "Immediate operand required for Neon vector immediate inst."); + + unsigned Imm = MOUImm.getImm(); + + O << "#0x"; + O.write_hex(Imm); +} + +void AArch64InstPrinter::printUImmBareOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MOUImm = MI->getOperand(OpNum); + + assert(MOUImm.isImm() + && "Immediate operand required for Neon vector immediate inst."); + + unsigned Imm = MOUImm.getImm(); + O << Imm; +} + +void AArch64InstPrinter::printNeonUImm64MaskOperand(const MCInst *MI, + unsigned OpNum, + raw_ostream &O) { + const MCOperand &MOUImm8 = MI->getOperand(OpNum); + + assert(MOUImm8.isImm() && + "Immediate operand required for Neon vector immediate bytemask inst."); + + uint32_t UImm8 = MOUImm8.getImm(); + uint64_t Mask = 0; + + // Replicates 0x00 or 0xff byte in a 64-bit vector + for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { + if ((UImm8 >> ByteNum) & 1) + Mask |= (uint64_t)0xff << (8 * ByteNum); + } + + O << "#0x"; + O.write_hex(Mask); +} + +// If Count > 1, there are two valid kinds of vector list: +// (1) {Vn.layout, Vn+1.layout, ... , Vm.layout} +// (2) {Vn.layout - Vm.layout} +// We choose the first kind as output. +template +void AArch64InstPrinter::printVectorList(const MCInst *MI, unsigned OpNum, + raw_ostream &O) { + assert(Count >= 1 && Count <= 4 && "Invalid Number of Vectors"); + + unsigned Reg = MI->getOperand(OpNum).getReg(); + std::string LayoutStr = A64VectorLayoutToString(Layout); + O << "{"; + if (Count > 1) { // Print sub registers separately + bool IsVec64 = (Layout < A64Layout::VL_16B); + unsigned SubRegIdx = IsVec64 ? AArch64::dsub_0 : AArch64::qsub_0; + for (unsigned I = 0; I < Count; I++) { + std::string Name = getRegisterName(MRI.getSubReg(Reg, SubRegIdx++)); + Name[0] = 'v'; + O << Name << LayoutStr; + if (I != Count - 1) + O << ", "; + } + } else { // Print the register directly when NumVecs is 1. + std::string Name = getRegisterName(Reg); + Name[0] = 'v'; + O << Name << LayoutStr; + } + O << "}"; +} diff --git a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h index 639fa86..37b7273 100644 --- a/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h +++ b/contrib/llvm/lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h @@ -157,6 +157,7 @@ public: void printRegExtendOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O, A64SE::ShiftExtSpecifiers Ext); + void printVPRRegister(const MCInst *MI, unsigned OpNo, raw_ostream &O); void printOperand(const MCInst *MI, unsigned OpNo, raw_ostream &O); virtual void printInst(const MCInst *MI, raw_ostream &O, StringRef Annot); @@ -164,9 +165,18 @@ public: return RegNo == AArch64::XSP || RegNo == AArch64::WSP; } - + template + void printNeonMovImmShiftOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + void printNeonUImm0Operand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printUImmHexOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printUImmBareOperand(const MCInst *MI, unsigned OpNum, raw_ostream &O); + void printNeonUImm64MaskOperand(const MCInst *MI, unsigned OpNum, + raw_ostream &O); + + template + void printVectorList(const MCInst *MI, unsigned OpNum, raw_ostream &O); }; - } #endif diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp index a3373b1..8a9077c 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp @@ -578,8 +578,8 @@ static uint64_t adjustFixupValue(unsigned Kind, uint64_t Value) { } MCAsmBackend * -llvm::createAArch64AsmBackend(const Target &T, StringRef TT, StringRef CPU) { +llvm::createAArch64AsmBackend(const Target &T, const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU) { Triple TheTriple(TT); - return new ELFAArch64AsmBackend(T, TT, TheTriple.getOS()); } diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp index 3b811df..a64c463 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp @@ -55,11 +55,10 @@ namespace { /// by MachO. Beware! class AArch64ELFStreamer : public MCELFStreamer { public: - AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, - raw_ostream &OS, MCCodeEmitter *Emitter) - : MCELFStreamer(Context, TAB, OS, Emitter), - MappingSymbolCounter(0), LastEMS(EMS_None) { - } + AArch64ELFStreamer(MCContext &Context, MCAsmBackend &TAB, raw_ostream &OS, + MCCodeEmitter *Emitter) + : MCELFStreamer(Context, 0, TAB, OS, Emitter), MappingSymbolCounter(0), + LastEMS(EMS_None) {} ~AArch64ELFStreamer() {} @@ -85,18 +84,17 @@ public: /// This is one of the functions used to emit data into an ELF section, so the /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) /// if necessary. - virtual void EmitBytes(StringRef Data, unsigned AddrSpace) { + virtual void EmitBytes(StringRef Data) { EmitDataMappingSymbol(); - MCELFStreamer::EmitBytes(Data, AddrSpace); + MCELFStreamer::EmitBytes(Data); } /// This is one of the functions used to emit data into an ELF section, so the /// AArch64 streamer overrides it to add the appropriate mapping symbol ($d) /// if necessary. - virtual void EmitValueImpl(const MCExpr *Value, unsigned Size, - unsigned AddrSpace) { + virtual void EmitValueImpl(const MCExpr *Value, unsigned Size) { EmitDataMappingSymbol(); - MCELFStreamer::EmitValueImpl(Value, Size, AddrSpace); + MCELFStreamer::EmitValueImpl(Value, Size); } private: @@ -130,7 +128,7 @@ private: MCELF::SetType(SD, ELF::STT_NOTYPE); MCELF::SetBinding(SD, ELF::STB_LOCAL); SD.setExternal(false); - Symbol->setSection(*getCurrentSection().first); + AssignSection(Symbol, getCurrentSection().first); const MCExpr *Value = MCSymbolRefExpr::Create(Start, getContext()); Symbol->setVariableValue(Value); diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp index 8ec8cbf..add874c 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp @@ -31,11 +31,12 @@ AArch64ELFMCAsmInfo::AArch64ELFMCAsmInfo() { UseDataRegionDirectives = true; - WeakRefDirective = "\t.weak\t"; - HasLEB128 = true; SupportsDebugInformation = true; // Exceptions handling ExceptionsType = ExceptionHandling::DwarfCFI; } + +// Pin the vtable to this file. +void AArch64ELFMCAsmInfo::anchor() {} diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h index a20bc47..d1dd285 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h @@ -14,13 +14,15 @@ #ifndef LLVM_AARCH64TARGETASMINFO_H #define LLVM_AARCH64TARGETASMINFO_H -#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCAsmInfoELF.h" namespace llvm { - struct AArch64ELFMCAsmInfo : public MCAsmInfo { - explicit AArch64ELFMCAsmInfo(); - }; +struct AArch64ELFMCAsmInfo : public MCAsmInfoELF { + explicit AArch64ELFMCAsmInfo(); +private: + virtual void anchor(); +}; } // namespace llvm diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp index a5c591e..b41c566 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp @@ -59,6 +59,23 @@ public: unsigned getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, SmallVectorImpl &Fixups) const; + unsigned getShiftRightImm8(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftRightImm16(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftRightImm32(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftRightImm64(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + + unsigned getShiftLeftImm8(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftLeftImm16(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftLeftImm32(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; + unsigned getShiftLeftImm64(const MCInst &MI, unsigned Op, + SmallVectorImpl &Fixups) const; // Labels are handled mostly the same way: a symbol is needed, and // just gets some fixup attached. @@ -152,10 +169,10 @@ getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, switch (Expr->getKind()) { default: llvm_unreachable("Unexpected operand modifier"); case AArch64MCExpr::VK_AARCH64_LO12: { - unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12, - AArch64::fixup_a64_ldst16_lo12, - AArch64::fixup_a64_ldst32_lo12, - AArch64::fixup_a64_ldst64_lo12, + static const unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_lo12, + AArch64::fixup_a64_ldst16_lo12, + AArch64::fixup_a64_ldst32_lo12, + AArch64::fixup_a64_ldst64_lo12, AArch64::fixup_a64_ldst128_lo12 }; assert(MemSize <= 16 && "Invalid fixup for operation"); FixupKind = FixupsBySize[Log2_32(MemSize)]; @@ -166,19 +183,23 @@ getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, FixupKind = AArch64::fixup_a64_ld64_got_lo12_nc; break; case AArch64MCExpr::VK_AARCH64_DTPREL_LO12: { - unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12, - AArch64::fixup_a64_ldst16_dtprel_lo12, - AArch64::fixup_a64_ldst32_dtprel_lo12, - AArch64::fixup_a64_ldst64_dtprel_lo12 }; + static const unsigned FixupsBySize[] = { + AArch64::fixup_a64_ldst8_dtprel_lo12, + AArch64::fixup_a64_ldst16_dtprel_lo12, + AArch64::fixup_a64_ldst32_dtprel_lo12, + AArch64::fixup_a64_ldst64_dtprel_lo12 + }; assert(MemSize <= 8 && "Invalid fixup for operation"); FixupKind = FixupsBySize[Log2_32(MemSize)]; break; } case AArch64MCExpr::VK_AARCH64_DTPREL_LO12_NC: { - unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_dtprel_lo12_nc, - AArch64::fixup_a64_ldst16_dtprel_lo12_nc, - AArch64::fixup_a64_ldst32_dtprel_lo12_nc, - AArch64::fixup_a64_ldst64_dtprel_lo12_nc }; + static const unsigned FixupsBySize[] = { + AArch64::fixup_a64_ldst8_dtprel_lo12_nc, + AArch64::fixup_a64_ldst16_dtprel_lo12_nc, + AArch64::fixup_a64_ldst32_dtprel_lo12_nc, + AArch64::fixup_a64_ldst64_dtprel_lo12_nc + }; assert(MemSize <= 8 && "Invalid fixup for operation"); FixupKind = FixupsBySize[Log2_32(MemSize)]; break; @@ -188,19 +209,23 @@ getOffsetUImm12OpValue(const MCInst &MI, unsigned OpIdx, FixupKind = AArch64::fixup_a64_ld64_gottprel_lo12_nc; break; case AArch64MCExpr::VK_AARCH64_TPREL_LO12:{ - unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12, - AArch64::fixup_a64_ldst16_tprel_lo12, - AArch64::fixup_a64_ldst32_tprel_lo12, - AArch64::fixup_a64_ldst64_tprel_lo12 }; + static const unsigned FixupsBySize[] = { + AArch64::fixup_a64_ldst8_tprel_lo12, + AArch64::fixup_a64_ldst16_tprel_lo12, + AArch64::fixup_a64_ldst32_tprel_lo12, + AArch64::fixup_a64_ldst64_tprel_lo12 + }; assert(MemSize <= 8 && "Invalid fixup for operation"); FixupKind = FixupsBySize[Log2_32(MemSize)]; break; } case AArch64MCExpr::VK_AARCH64_TPREL_LO12_NC: { - unsigned FixupsBySize[] = { AArch64::fixup_a64_ldst8_tprel_lo12_nc, - AArch64::fixup_a64_ldst16_tprel_lo12_nc, - AArch64::fixup_a64_ldst32_tprel_lo12_nc, - AArch64::fixup_a64_ldst64_tprel_lo12_nc }; + static const unsigned FixupsBySize[] = { + AArch64::fixup_a64_ldst8_tprel_lo12_nc, + AArch64::fixup_a64_ldst16_tprel_lo12_nc, + AArch64::fixup_a64_ldst32_tprel_lo12_nc, + AArch64::fixup_a64_ldst64_tprel_lo12_nc + }; assert(MemSize <= 8 && "Invalid fixup for operation"); FixupKind = FixupsBySize[Log2_32(MemSize)]; break; @@ -302,6 +327,45 @@ AArch64MCCodeEmitter::getBitfield64LSLOpValue(const MCInst &MI, unsigned OpIdx, return ((64 - MO.getImm()) & 0x3f) | (63 - MO.getImm()) << 6; } +unsigned AArch64MCCodeEmitter::getShiftRightImm8( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return 8 - MI.getOperand(Op).getImm(); +} + +unsigned AArch64MCCodeEmitter::getShiftRightImm16( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return 16 - MI.getOperand(Op).getImm(); +} + +unsigned AArch64MCCodeEmitter::getShiftRightImm32( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return 32 - MI.getOperand(Op).getImm(); +} + +unsigned AArch64MCCodeEmitter::getShiftRightImm64( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return 64 - MI.getOperand(Op).getImm(); +} + +unsigned AArch64MCCodeEmitter::getShiftLeftImm8( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return MI.getOperand(Op).getImm() - 8; +} + +unsigned AArch64MCCodeEmitter::getShiftLeftImm16( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return MI.getOperand(Op).getImm() - 16; +} + +unsigned AArch64MCCodeEmitter::getShiftLeftImm32( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return MI.getOperand(Op).getImm() - 32; +} + +unsigned AArch64MCCodeEmitter::getShiftLeftImm64( + const MCInst &MI, unsigned Op, SmallVectorImpl &Fixups) const { + return MI.getOperand(Op).getImm() - 64; +} template unsigned AArch64MCCodeEmitter::getLabelOpValue(const MCInst &MI, @@ -346,7 +410,7 @@ AArch64MCCodeEmitter::getMachineOpValue(const MCInst &MI, const MCOperand &MO, SmallVectorImpl &Fixups) const { if (MO.isReg()) { - return Ctx.getRegisterInfo().getEncodingValue(MO.getReg()); + return Ctx.getRegisterInfo()->getEncodingValue(MO.getReg()); } else if (MO.isImm()) { return static_cast(MO.getImm()); } diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp index 819eead..58fc95c 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp @@ -40,7 +40,7 @@ MCSubtargetInfo *AArch64_MC::createAArch64MCSubtargetInfo(StringRef TT, StringRef CPU, StringRef FS) { MCSubtargetInfo *X = new MCSubtargetInfo(); - InitAArch64MCSubtargetInfo(X, TT, CPU, ""); + InitAArch64MCSubtargetInfo(X, TT, CPU, FS); return X; } @@ -57,13 +57,14 @@ static MCRegisterInfo *createAArch64MCRegisterInfo(StringRef Triple) { return X; } -static MCAsmInfo *createAArch64MCAsmInfo(const Target &T, StringRef TT) { +static MCAsmInfo *createAArch64MCAsmInfo(const MCRegisterInfo &MRI, + StringRef TT) { Triple TheTriple(TT); MCAsmInfo *MAI = new AArch64ELFMCAsmInfo(); - MachineLocation Dst(MachineLocation::VirtualFP); - MachineLocation Src(AArch64::XSP, 0); - MAI->addInitialFrameState(0, Dst, Src); + unsigned Reg = MRI.getDwarfRegNum(AArch64::XSP, true); + MCCFIInstruction Inst = MCCFIInstruction::createDefCfa(0, Reg, 0); + MAI->addInitialFrameState(Inst); return MAI; } @@ -135,17 +136,17 @@ public: return MCInstrAnalysis::isConditionalBranch(Inst); } - uint64_t evaluateBranch(const MCInst &Inst, uint64_t Addr, - uint64_t Size) const { + bool evaluateBranch(const MCInst &Inst, uint64_t Addr, + uint64_t Size, uint64_t &Target) const { unsigned LblOperand = Inst.getOpcode() == AArch64::Bcc ? 1 : 0; // FIXME: We only handle PCRel branches for now. if (Info->get(Inst.getOpcode()).OpInfo[LblOperand].OperandType != MCOI::OPERAND_PCREL) - return -1ULL; + return false; int64_t Imm = Inst.getOperand(LblOperand).getImm(); - - return Addr + Imm; + Target = Addr + Imm; + return true; } }; diff --git a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h index 3849fe3..670e657 100644 --- a/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h +++ b/contrib/llvm/lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h @@ -43,8 +43,9 @@ MCCodeEmitter *createAArch64MCCodeEmitter(const MCInstrInfo &MCII, MCObjectWriter *createAArch64ELFObjectWriter(raw_ostream &OS, uint8_t OSABI); -MCAsmBackend *createAArch64AsmBackend(const Target &T, StringRef TT, - StringRef CPU); +MCAsmBackend *createAArch64AsmBackend(const Target &T, + const MCRegisterInfo &MRI, + StringRef TT, StringRef CPU); } // End llvm namespace diff --git a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp index fc706a4..377b533 100644 --- a/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp @@ -20,5 +20,5 @@ Target llvm::TheAArch64Target; extern "C" void LLVMInitializeAArch64TargetInfo() { RegisterTarget - X(TheAArch64Target, "aarch64", "AArch64"); + X(TheAArch64Target, "aarch64", "AArch64 (ARM 64-bit target)"); } diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp index bedccb5..2a97cd6 100644 --- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp +++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.cpp @@ -972,7 +972,7 @@ bool A64Imms::isLogicalImm(unsigned RegWidth, uint64_t Imm, uint32_t &Bits) { // Now we have to work out the amount of rotation needed. The first part of // this calculation is actually independent of RepeatWidth, but the complex // case will depend on it. - Rotation = CountTrailingZeros_64(Imm); + Rotation = countTrailingZeros(Imm); if (Rotation == 0) { // There were no leading zeros, which means it's either in place or there // are 1s at each end (e.g. 0x8003 needs rotating). @@ -1105,3 +1105,69 @@ bool A64Imms::isOnlyMOVNImm(int RegWidth, uint64_t Value, return isMOVNImm(RegWidth, Value, UImm16, Shift); } + +// decodeNeonModShiftImm - Decode a Neon OpCmode value into the +// the shift amount and the shift type (shift zeros or ones in) and +// returns whether the OpCmode value implies a shift operation. +bool A64Imms::decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm, + unsigned &ShiftOnesIn) { + ShiftImm = 0; + ShiftOnesIn = false; + bool HasShift = true; + + if (OpCmode == 0xe) { + // movi byte + HasShift = false; + } else if (OpCmode == 0x1e) { + // movi 64-bit bytemask + HasShift = false; + } else if ((OpCmode & 0xc) == 0x8) { + // shift zeros, per halfword + ShiftImm = ((OpCmode & 0x2) >> 1); + } else if ((OpCmode & 0x8) == 0) { + // shift zeros, per word + ShiftImm = ((OpCmode & 0x6) >> 1); + } else if ((OpCmode & 0xe) == 0xc) { + // shift ones, per word + ShiftOnesIn = true; + ShiftImm = (OpCmode & 0x1); + } else { + // per byte, per bytemask + llvm_unreachable("Unsupported Neon modified immediate"); + } + + return HasShift; +} + +// decodeNeonModImm - Decode a NEON modified immediate and OpCmode values +// into the element value and the element size in bits. +uint64_t A64Imms::decodeNeonModImm(unsigned Val, unsigned OpCmode, + unsigned &EltBits) { + uint64_t DecodedVal = Val; + EltBits = 0; + + if (OpCmode == 0xe) { + // movi byte + EltBits = 8; + } else if (OpCmode == 0x1e) { + // movi 64-bit bytemask + DecodedVal = 0; + for (unsigned ByteNum = 0; ByteNum < 8; ++ByteNum) { + if ((Val >> ByteNum) & 1) + DecodedVal |= (uint64_t)0xff << (8 * ByteNum); + } + EltBits = 64; + } else if ((OpCmode & 0xc) == 0x8) { + // shift zeros, per halfword + EltBits = 16; + } else if ((OpCmode & 0x8) == 0) { + // shift zeros, per word + EltBits = 32; + } else if ((OpCmode & 0xe) == 0xc) { + // shift ones, per word + EltBits = 32; + } else { + llvm_unreachable("Unsupported Neon modified immediate"); + } + return DecodedVal; +} diff --git a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h index 9a1ca61..ce970b0 100644 --- a/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h +++ b/contrib/llvm/lib/Target/AArch64/Utils/AArch64BaseInfo.h @@ -289,6 +289,7 @@ namespace A64SE { enum ShiftExtSpecifiers { Invalid = -1, LSL, + MSL, LSR, ASR, ROR, @@ -305,6 +306,65 @@ namespace A64SE { }; } +namespace A64Layout { + enum VectorLayout { + Invalid = -1, + VL_8B, + VL_4H, + VL_2S, + VL_1D, + + VL_16B, + VL_8H, + VL_4S, + VL_2D, + + // Bare layout for the 128-bit vector + // (only show ".b", ".h", ".s", ".d" without vector number) + VL_B, + VL_H, + VL_S, + VL_D + }; +} + +inline static const char * +A64VectorLayoutToString(A64Layout::VectorLayout Layout) { + switch (Layout) { + case A64Layout::VL_8B: return ".8b"; + case A64Layout::VL_4H: return ".4h"; + case A64Layout::VL_2S: return ".2s"; + case A64Layout::VL_1D: return ".1d"; + case A64Layout::VL_16B: return ".16b"; + case A64Layout::VL_8H: return ".8h"; + case A64Layout::VL_4S: return ".4s"; + case A64Layout::VL_2D: return ".2d"; + case A64Layout::VL_B: return ".b"; + case A64Layout::VL_H: return ".h"; + case A64Layout::VL_S: return ".s"; + case A64Layout::VL_D: return ".d"; + default: llvm_unreachable("Unknown Vector Layout"); + } +} + +inline static A64Layout::VectorLayout +A64StringToVectorLayout(StringRef LayoutStr) { + return StringSwitch(LayoutStr) + .Case(".8b", A64Layout::VL_8B) + .Case(".4h", A64Layout::VL_4H) + .Case(".2s", A64Layout::VL_2S) + .Case(".1d", A64Layout::VL_1D) + .Case(".16b", A64Layout::VL_16B) + .Case(".8h", A64Layout::VL_8H) + .Case(".4s", A64Layout::VL_4S) + .Case(".2d", A64Layout::VL_2D) + .Case(".b", A64Layout::VL_B) + .Case(".h", A64Layout::VL_H) + .Case(".s", A64Layout::VL_S) + .Case(".d", A64Layout::VL_D) + .Default(A64Layout::Invalid); +} + namespace A64SysReg { enum SysRegROValues { MDCCSR_EL0 = 0x9808, // 10 011 0000 0001 000 @@ -1068,7 +1128,10 @@ namespace A64Imms { // MOVN but *not* with a MOVZ (because that would take priority). bool isOnlyMOVNImm(int RegWidth, uint64_t Value, int &UImm16, int &Shift); -} + uint64_t decodeNeonModImm(unsigned Val, unsigned OpCmode, unsigned &EltBits); + bool decodeNeonModShiftImm(unsigned OpCmode, unsigned &ShiftImm, + unsigned &ShiftOnesIn); + } } // end namespace llvm; diff --git a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp index f0d4dbe..ff585b4 100644 --- a/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp +++ b/contrib/llvm/lib/Target/ARM/A15SDOptimizer.cpp @@ -615,7 +615,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { SmallVector Defs = getReadDPRs(MI); bool Modified = false; - for (SmallVector::iterator I = Defs.begin(), E = Defs.end(); + for (SmallVectorImpl::iterator I = Defs.begin(), E = Defs.end(); I != E; ++I) { // Follow the def-use chain for this DPR through COPYs, and also through // PHIs (which are essentially multi-way COPYs). It is because of PHIs that @@ -630,7 +630,7 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { elideCopiesAndPHIs(Def, DefSrcs); - for (SmallVector::iterator II = DefSrcs.begin(), + for (SmallVectorImpl::iterator II = DefSrcs.begin(), EE = DefSrcs.end(); II != EE; ++II) { MachineInstr *MI = *II; @@ -655,8 +655,15 @@ bool A15SDOptimizer::runOnInstruction(MachineInstr *MI) { if (NewReg != 0) { Modified = true; - for (SmallVector::const_iterator I = Uses.begin(), + for (SmallVectorImpl::const_iterator I = Uses.begin(), E = Uses.end(); I != E; ++I) { + // Make sure to constrain the register class of the new register to + // match what we're replacing. Otherwise we can optimize a DPR_VFP2 + // reference into a plain DPR, and that will end poorly. NewReg is + // always virtual here, so there will always be a matching subclass + // to find. + MRI->constrainRegClass(NewReg, MRI->getRegClass((*I)->getReg())); + DEBUG(dbgs() << "Replacing operand " << **I << " with " << PrintReg(NewReg) << "\n"); diff --git a/contrib/llvm/lib/Target/ARM/ARM.td b/contrib/llvm/lib/Target/ARM/ARM.td index 2d747091..36e5680 100644 --- a/contrib/llvm/lib/Target/ARM/ARM.td +++ b/contrib/llvm/lib/Target/ARM/ARM.td @@ -38,12 +38,16 @@ def FeatureNEON : SubtargetFeature<"neon", "HasNEON", "true", def FeatureThumb2 : SubtargetFeature<"thumb2", "HasThumb2", "true", "Enable Thumb2 instructions">; def FeatureNoARM : SubtargetFeature<"noarm", "NoARM", "true", - "Does not support ARM mode execution">; + "Does not support ARM mode execution", + [ModeThumb]>; def FeatureFP16 : SubtargetFeature<"fp16", "HasFP16", "true", "Enable half-precision floating point">; def FeatureVFP4 : SubtargetFeature<"vfp4", "HasVFPv4", "true", "Enable VFP4 instructions", [FeatureVFP3, FeatureFP16]>; +def FeatureFPARMv8 : SubtargetFeature<"fp-armv8", "HasFPARMv8", + "true", "Enable ARMv8 FP", + [FeatureVFP4]>; def FeatureD16 : SubtargetFeature<"d16", "HasD16", "true", "Restrict VFP3 to 16 double registers">; def FeatureHWDiv : SubtargetFeature<"hwdiv", "HasHardwareDivide", "true", @@ -59,8 +63,15 @@ def FeatureSlowFPBrcc : SubtargetFeature<"slow-fp-brcc", "SlowFPBrcc", "true", "FP compare + branch is slow">; def FeatureVFPOnlySP : SubtargetFeature<"fp-only-sp", "FPOnlySP", "true", "Floating point unit supports single precision only">; +def FeaturePerfMon : SubtargetFeature<"perfmon", "HasPerfMon", "true", + "Enable support for Performance Monitor extensions">; def FeatureTrustZone : SubtargetFeature<"trustzone", "HasTrustZone", "true", "Enable support for TrustZone security extensions">; +def FeatureCrypto : SubtargetFeature<"crypto", "HasCrypto", "true", + "Enable support for Cryptography extensions", + [FeatureNEON]>; +def FeatureCRC : SubtargetFeature<"crc", "HasCRC", "true", + "Enable support for CRC instructions">; // Some processors have FP multiply-accumulate instructions that don't // play nicely with other VFP / NEON instructions, and it's generally better @@ -108,10 +119,24 @@ def FeatureDSPThumb2 : SubtargetFeature<"t2dsp", "Thumb2DSP", "true", def FeatureMP : SubtargetFeature<"mp", "HasMPExtension", "true", "Supports Multiprocessing extension">; -// M-series ISA? -def FeatureMClass : SubtargetFeature<"mclass", "IsMClass", "true", +// Virtualization extension - requires HW divide (ARMv7-AR ARMARM - 4.4.8). +def FeatureVirtualization : SubtargetFeature<"virtualization", + "HasVirtualization", "true", + "Supports Virtualization extension", + [FeatureHWDiv, FeatureHWDivARM]>; + +// M-series ISA +def FeatureMClass : SubtargetFeature<"mclass", "ARMProcClass", "MClass", "Is microcontroller profile ('M' series)">; +// R-series ISA +def FeatureRClass : SubtargetFeature<"rclass", "ARMProcClass", "RClass", + "Is realtime profile ('R' series)">; + +// A-series ISA +def FeatureAClass : SubtargetFeature<"aclass", "ARMProcClass", "AClass", + "Is application profile ('A' series)">; + // Special TRAP encoding for NaCl, which looks like a TRAP in Thumb too. // See ARMInstrInfo.td for details. def FeatureNaClTrap : SubtargetFeature<"nacl-trap", "UseNaClTrap", "true", @@ -129,12 +154,19 @@ def HasV5TEOps : SubtargetFeature<"v5te", "HasV5TEOps", "true", def HasV6Ops : SubtargetFeature<"v6", "HasV6Ops", "true", "Support ARM v6 instructions", [HasV5TEOps]>; +def HasV6MOps : SubtargetFeature<"v6m", "HasV6MOps", "true", + "Support ARM v6M instructions", + [HasV6Ops]>; def HasV6T2Ops : SubtargetFeature<"v6t2", "HasV6T2Ops", "true", "Support ARM v6t2 instructions", - [HasV6Ops, FeatureThumb2]>; + [HasV6MOps, FeatureThumb2]>; def HasV7Ops : SubtargetFeature<"v7", "HasV7Ops", "true", "Support ARM v7 instructions", - [HasV6T2Ops]>; + [HasV6T2Ops, FeaturePerfMon]>; +def HasV8Ops : SubtargetFeature<"v8", "HasV8Ops", "true", + "Support ARM v8 instructions", + [HasV7Ops, FeatureVirtualization, + FeatureMP]>; //===----------------------------------------------------------------------===// // ARM Processors supported. @@ -170,12 +202,27 @@ def ProcSwift : SubtargetFeature<"swift", "ARMProcFamily", "Swift", // FIXME: It has not been determined if A15 has these features. def ProcA15 : SubtargetFeature<"a15", "ARMProcFamily", "CortexA15", "Cortex-A15 ARM processors", - [FeatureT2XtPk, FeatureFP16, + [FeatureT2XtPk, FeatureVFP4, + FeatureMP, FeatureHWDiv, FeatureHWDivARM, FeatureAvoidPartialCPSR, - FeatureTrustZone]>; + FeatureTrustZone, FeatureVirtualization]>; + +def ProcA53 : SubtargetFeature<"a53", "ARMProcFamily", "CortexA53", + "Cortex-A53 ARM processors", + [FeatureHWDiv, FeatureHWDivARM, + FeatureTrustZone, FeatureT2XtPk, + FeatureCrypto, FeatureCRC]>; + +def ProcA57 : SubtargetFeature<"a57", "ARMProcFamily", "CortexA57", + "Cortex-A57 ARM processors", + [FeatureHWDiv, FeatureHWDivARM, + FeatureTrustZone, FeatureT2XtPk, + FeatureCrypto, FeatureCRC]>; + def ProcR5 : SubtargetFeature<"r5", "ARMProcFamily", "CortexR5", "Cortex-R5 ARM processors", - [FeatureSlowFPBrcc, FeatureHWDivARM, + [FeatureSlowFPBrcc, + FeatureHWDiv, FeatureHWDivARM, FeatureHasSlowFPVMLx, FeatureAvoidPartialCPSR, FeatureT2XtPk]>; @@ -233,7 +280,7 @@ def : Processor<"mpcore", ARMV6Itineraries, [HasV6Ops, FeatureVFP2, FeatureHasSlowFPVMLx]>; // V6M Processors. -def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6Ops, FeatureNoARM, +def : Processor<"cortex-m0", ARMV6Itineraries, [HasV6MOps, FeatureNoARM, FeatureDB, FeatureMClass]>; // V6T2 Processors. @@ -248,26 +295,30 @@ def : Processor<"arm1156t2f-s", ARMV6Itineraries, [HasV6T2Ops, FeatureVFP2, def : ProcessorModel<"cortex-a5", CortexA8Model, [ProcA5, HasV7Ops, FeatureNEON, FeatureDB, FeatureVFP4, FeatureDSPThumb2, - FeatureHasRAS]>; + FeatureHasRAS, FeatureAClass]>; def : ProcessorModel<"cortex-a8", CortexA8Model, [ProcA8, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS]>; + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; def : ProcessorModel<"cortex-a9", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS]>; + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; def : ProcessorModel<"cortex-a9-mp", CortexA9Model, [ProcA9, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, FeatureMP, - FeatureHasRAS]>; + FeatureHasRAS, FeatureAClass]>; // FIXME: A15 has currently the same ProcessorModel as A9. def : ProcessorModel<"cortex-a15", CortexA9Model, [ProcA15, HasV7Ops, FeatureNEON, FeatureDB, - FeatureDSPThumb2, FeatureHasRAS]>; + FeatureDSPThumb2, FeatureHasRAS, + FeatureAClass]>; // FIXME: R5 has currently the same ProcessorModel as A8. def : ProcessorModel<"cortex-r5", CortexA8Model, [ProcR5, HasV7Ops, FeatureDB, FeatureVFP3, FeatureDSPThumb2, - FeatureHasRAS]>; + FeatureHasRAS, FeatureVFPOnlySP, + FeatureD16, FeatureRClass]>; // V7M Processors. def : ProcNoItin<"cortex-m3", [HasV7Ops, @@ -279,13 +330,22 @@ def : ProcNoItin<"cortex-m4", [HasV7Ops, FeatureThumb2, FeatureNoARM, FeatureDB, FeatureHWDiv, FeatureDSPThumb2, FeatureT2XtPk, FeatureVFP4, - FeatureVFPOnlySP, FeatureMClass]>; + FeatureVFPOnlySP, FeatureD16, + FeatureMClass]>; // Swift uArch Processors. def : ProcessorModel<"swift", SwiftModel, [ProcSwift, HasV7Ops, FeatureNEON, FeatureDB, FeatureDSPThumb2, - FeatureHasRAS]>; + FeatureHasRAS, FeatureAClass]>; + +// V8 Processors +def : ProcNoItin<"cortex-a53", [ProcA53, HasV8Ops, FeatureAClass, + FeatureDB, FeatureFPARMv8, + FeatureNEON, FeatureDSPThumb2]>; +def : ProcNoItin<"cortex-a57", [ProcA57, HasV8Ops, FeatureAClass, + FeatureDB, FeatureFPARMv8, + FeatureNEON, FeatureDSPThumb2]>; //===----------------------------------------------------------------------===// // Register File Description diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp index 13ec208..e79f88d 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.cpp @@ -17,6 +17,7 @@ #include "ARM.h" #include "ARMBuildAttrs.h" #include "ARMConstantPoolValue.h" +#include "ARMFPUName.h" #include "ARMMachineFunctionInfo.h" #include "ARMTargetMachine.h" #include "ARMTargetObjectFile.h" @@ -55,235 +56,67 @@ #include using namespace llvm; -namespace { - - // Per section and per symbol attributes are not supported. - // To implement them we would need the ability to delay this emission - // until the assembly file is fully parsed/generated as only then do we - // know the symbol and section numbers. - class AttributeEmitter { - public: - virtual void MaybeSwitchVendor(StringRef Vendor) = 0; - virtual void EmitAttribute(unsigned Attribute, unsigned Value) = 0; - virtual void EmitTextAttribute(unsigned Attribute, StringRef String) = 0; - virtual void Finish() = 0; - virtual ~AttributeEmitter() {} - }; - - class AsmAttributeEmitter : public AttributeEmitter { - MCStreamer &Streamer; - - public: - AsmAttributeEmitter(MCStreamer &Streamer_) : Streamer(Streamer_) {} - void MaybeSwitchVendor(StringRef Vendor) { } - - void EmitAttribute(unsigned Attribute, unsigned Value) { - Streamer.EmitRawText("\t.eabi_attribute " + - Twine(Attribute) + ", " + Twine(Value)); - } - - void EmitTextAttribute(unsigned Attribute, StringRef String) { - switch (Attribute) { - default: llvm_unreachable("Unsupported Text attribute in ASM Mode"); - case ARMBuildAttrs::CPU_name: - Streamer.EmitRawText(StringRef("\t.cpu ") + String.lower()); - break; - /* GAS requires .fpu to be emitted regardless of EABI attribute */ - case ARMBuildAttrs::Advanced_SIMD_arch: - case ARMBuildAttrs::VFP_arch: - Streamer.EmitRawText(StringRef("\t.fpu ") + String.lower()); - break; - } - } - void Finish() { } - }; - - class ObjectAttributeEmitter : public AttributeEmitter { - // This structure holds all attributes, accounting for - // their string/numeric value, so we can later emmit them - // in declaration order, keeping all in the same vector - struct AttributeItemType { - enum { - HiddenAttribute = 0, - NumericAttribute, - TextAttribute - } Type; - unsigned Tag; - unsigned IntValue; - StringRef StringValue; - } AttributeItem; - - MCObjectStreamer &Streamer; - StringRef CurrentVendor; - SmallVector Contents; - - // Account for the ULEB/String size of each item, - // not just the number of items - size_t ContentsSize; - // FIXME: this should be in a more generic place, but - // getULEBSize() is in MCAsmInfo and will be moved to MCDwarf - size_t getULEBSize(int Value) { - size_t Size = 0; - do { - Value >>= 7; - Size += sizeof(int8_t); // Is this really necessary? - } while (Value); - return Size; - } - - public: - ObjectAttributeEmitter(MCObjectStreamer &Streamer_) : - Streamer(Streamer_), CurrentVendor(""), ContentsSize(0) { } - - void MaybeSwitchVendor(StringRef Vendor) { - assert(!Vendor.empty() && "Vendor cannot be empty."); - - if (CurrentVendor.empty()) - CurrentVendor = Vendor; - else if (CurrentVendor == Vendor) - return; - else - Finish(); - - CurrentVendor = Vendor; - - assert(Contents.size() == 0); - } - - void EmitAttribute(unsigned Attribute, unsigned Value) { - AttributeItemType attr = { - AttributeItemType::NumericAttribute, - Attribute, - Value, - StringRef("") - }; - ContentsSize += getULEBSize(Attribute); - ContentsSize += getULEBSize(Value); - Contents.push_back(attr); - } - - void EmitTextAttribute(unsigned Attribute, StringRef String) { - AttributeItemType attr = { - AttributeItemType::TextAttribute, - Attribute, - 0, - String - }; - ContentsSize += getULEBSize(Attribute); - // String + \0 - ContentsSize += String.size()+1; - - Contents.push_back(attr); - } - - void Finish() { - // Vendor size + Vendor name + '\0' - const size_t VendorHeaderSize = 4 + CurrentVendor.size() + 1; - - // Tag + Tag Size - const size_t TagHeaderSize = 1 + 4; - - Streamer.EmitIntValue(VendorHeaderSize + TagHeaderSize + ContentsSize, 4); - Streamer.EmitBytes(CurrentVendor); - Streamer.EmitIntValue(0, 1); // '\0' - - Streamer.EmitIntValue(ARMBuildAttrs::File, 1); - Streamer.EmitIntValue(TagHeaderSize + ContentsSize, 4); - - // Size should have been accounted for already, now - // emit each field as its type (ULEB or String) - for (unsigned int i=0; igetNumOperands() == 4 && "Invalid no. of machine operands!"); - // Frame address. Currently handles register +- offset only. - if (MI->getOperand(0).isReg() && MI->getOperand(1).isImm()) - Location.set(MI->getOperand(0).getReg(), MI->getOperand(1).getImm()); - else { - DEBUG(dbgs() << "DBG_VALUE instruction ignored! " << *MI << "\n"); - } - return Location; -} - /// EmitDwarfRegOp - Emit dwarf register operation. -void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc) const { +void ARMAsmPrinter::EmitDwarfRegOp(const MachineLocation &MLoc, + bool Indirect) const { const TargetRegisterInfo *RI = TM.getRegisterInfo(); - if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) - AsmPrinter::EmitDwarfRegOp(MLoc); - else { - unsigned Reg = MLoc.getReg(); - if (Reg >= ARM::S0 && Reg <= ARM::S31) { - assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); - // S registers are described as bit-pieces of a register - // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) - // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) - - unsigned SReg = Reg - ARM::S0; - bool odd = SReg & 0x1; - unsigned Rx = 256 + (SReg >> 1); - - OutStreamer.AddComment("DW_OP_regx for S register"); - EmitInt8(dwarf::DW_OP_regx); - - OutStreamer.AddComment(Twine(SReg)); - EmitULEB128(Rx); - - if (odd) { - OutStreamer.AddComment("DW_OP_bit_piece 32 32"); - EmitInt8(dwarf::DW_OP_bit_piece); - EmitULEB128(32); - EmitULEB128(32); - } else { - OutStreamer.AddComment("DW_OP_bit_piece 32 0"); - EmitInt8(dwarf::DW_OP_bit_piece); - EmitULEB128(32); - EmitULEB128(0); - } - } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { - assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); - // Q registers Q0-Q15 are described by composing two D registers together. - // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) - // DW_OP_piece(8) - - unsigned QReg = Reg - ARM::Q0; - unsigned D1 = 256 + 2 * QReg; - unsigned D2 = D1 + 1; - - OutStreamer.AddComment("DW_OP_regx for Q register: D1"); - EmitInt8(dwarf::DW_OP_regx); - EmitULEB128(D1); - OutStreamer.AddComment("DW_OP_piece 8"); - EmitInt8(dwarf::DW_OP_piece); - EmitULEB128(8); - - OutStreamer.AddComment("DW_OP_regx for Q register: D2"); - EmitInt8(dwarf::DW_OP_regx); - EmitULEB128(D2); - OutStreamer.AddComment("DW_OP_piece 8"); - EmitInt8(dwarf::DW_OP_piece); - EmitULEB128(8); + if (RI->getDwarfRegNum(MLoc.getReg(), false) != -1) { + AsmPrinter::EmitDwarfRegOp(MLoc, Indirect); + return; + } + assert(MLoc.isReg() && !Indirect && + "This doesn't support offset/indirection - implement it if needed"); + unsigned Reg = MLoc.getReg(); + if (Reg >= ARM::S0 && Reg <= ARM::S31) { + assert(ARM::S0 + 31 == ARM::S31 && "Unexpected ARM S register numbering"); + // S registers are described as bit-pieces of a register + // S[2x] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 0) + // S[2x+1] = DW_OP_regx(256 + (x>>1)) DW_OP_bit_piece(32, 32) + + unsigned SReg = Reg - ARM::S0; + bool odd = SReg & 0x1; + unsigned Rx = 256 + (SReg >> 1); + + OutStreamer.AddComment("DW_OP_regx for S register"); + EmitInt8(dwarf::DW_OP_regx); + + OutStreamer.AddComment(Twine(SReg)); + EmitULEB128(Rx); + + if (odd) { + OutStreamer.AddComment("DW_OP_bit_piece 32 32"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(32); + } else { + OutStreamer.AddComment("DW_OP_bit_piece 32 0"); + EmitInt8(dwarf::DW_OP_bit_piece); + EmitULEB128(32); + EmitULEB128(0); } + } else if (Reg >= ARM::Q0 && Reg <= ARM::Q15) { + assert(ARM::Q0 + 15 == ARM::Q15 && "Unexpected ARM Q register numbering"); + // Q registers Q0-Q15 are described by composing two D registers together. + // Qx = DW_OP_regx(256+2x) DW_OP_piece(8) DW_OP_regx(256+2x+1) + // DW_OP_piece(8) + + unsigned QReg = Reg - ARM::Q0; + unsigned D1 = 256 + 2 * QReg; + unsigned D2 = D1 + 1; + + OutStreamer.AddComment("DW_OP_regx for Q register: D1"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D1); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); + + OutStreamer.AddComment("DW_OP_regx for Q register: D2"); + EmitInt8(dwarf::DW_OP_regx); + EmitULEB128(D2); + OutStreamer.AddComment("DW_OP_piece 8"); + EmitInt8(dwarf::DW_OP_piece); + EmitULEB128(8); } } @@ -312,7 +145,7 @@ void ARMAsmPrinter::EmitXXStructor(const Constant *CV) { const GlobalValue *GV = dyn_cast(CV->stripPointerCasts()); assert(GV && "C++ constructor pointer was not a GlobalValue!"); - const MCExpr *E = MCSymbolRefExpr::Create(Mang->getSymbol(GV), + const MCExpr *E = MCSymbolRefExpr::Create(getSymbol(GV), (Subtarget->isTargetDarwin() ? MCSymbolRefExpr::VK_None : MCSymbolRefExpr::VK_ARM_TARGET1), @@ -373,7 +206,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum, else if ((Modifier && strcmp(Modifier, "hi16") == 0) || (TF & ARMII::MO_HI16)) O << ":upper16:"; - O << *Mang->getSymbol(GV); + O << *getSymbol(GV); printOffset(MO.getOffset(), O); if (TF == ARMII::MO_PLT) @@ -474,8 +307,14 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, // This takes advantage of the 2 operand-ness of ldm/stm and that we've // already got the operands in registers that are operands to the // inline asm statement. - - O << "{" << ARMInstPrinter::getRegisterName(RegBegin); + O << "{"; + if (ARM::GPRPairRegClass.contains(RegBegin)) { + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned Reg0 = TRI->getSubReg(RegBegin, ARM::gsub_0); + O << ARMInstPrinter::getRegisterName(Reg0) << ", ";; + RegBegin = TRI->getSubReg(RegBegin, ARM::gsub_1); + } + O << ARMInstPrinter::getRegisterName(RegBegin); // FIXME: The register allocator not only may not have given us the // registers in sequence, but may not be in ascending registers. This @@ -500,7 +339,38 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum, if (!FlagsOP.isImm()) return true; unsigned Flags = FlagsOP.getImm(); + + // This operand may not be the one that actually provides the register. If + // it's tied to a previous one then we should refer instead to that one + // for registers and their classes. + unsigned TiedIdx; + if (InlineAsm::isUseOperandTiedToDef(Flags, TiedIdx)) { + for (OpNum = InlineAsm::MIOp_FirstOperand; TiedIdx; --TiedIdx) { + unsigned OpFlags = MI->getOperand(OpNum).getImm(); + OpNum += InlineAsm::getNumOperandRegisters(OpFlags) + 1; + } + Flags = MI->getOperand(OpNum).getImm(); + + // Later code expects OpNum to be pointing at the register rather than + // the flags. + OpNum += 1; + } + unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); + unsigned RC; + InlineAsm::hasRegClassConstraint(Flags, RC); + if (RC == ARM::GPRPairRegClassID) { + if (NumVals != 1) + return true; + const MachineOperand &MO = MI->getOperand(OpNum); + if (!MO.isReg()) + return true; + const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo(); + unsigned Reg = TRI->getSubReg(MO.getReg(), ExtraCode[0] == 'Q' ? + ARM::gsub_0 : ARM::gsub_1); + O << ARMInstPrinter::getRegisterName(Reg); + return false; + } if (NumVals != 2) return true; unsigned RegOp = ExtraCode[0] == 'Q' ? OpNum : OpNum + 1; @@ -704,11 +574,6 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer.EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } - // FIXME: This should eventually end up somewhere else where more - // intelligent flag decisions can be made. For now we are just maintaining - // the status quo for ARM and setting EF_ARM_EABI_VER5 as the default. - if (MCELFStreamer *MES = dyn_cast(&OutStreamer)) - MES->getAssembler().setELFHeaderEFlags(ELF::EF_ARM_EABI_VER5); } //===----------------------------------------------------------------------===// @@ -718,145 +583,150 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // to appear in the .ARM.attributes section in ELF. // Instead of subclassing the MCELFStreamer, we do the work here. -void ARMAsmPrinter::emitAttributes() { - - emitARMAttributeSection(); - - /* GAS expect .fpu to be emitted, regardless of VFP build attribute */ - bool emitFPU = false; - AttributeEmitter *AttrEmitter; - if (OutStreamer.hasRawTextSupport()) { - AttrEmitter = new AsmAttributeEmitter(OutStreamer); - emitFPU = true; - } else { - MCObjectStreamer &O = static_cast(OutStreamer); - AttrEmitter = new ObjectAttributeEmitter(O); - } - - AttrEmitter->MaybeSwitchVendor("aeabi"); - - std::string CPUString = Subtarget->getCPUString(); - - if (CPUString == "cortex-a8" || - Subtarget->isCortexA8()) { - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::CPU_name, "cortex-a8"); - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::ApplicationProfile); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, - ARMBuildAttrs::Allowed); - AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); - // Fixme: figure out when this is emitted. - //AttrEmitter->EmitAttribute(ARMBuildAttrs::WMMX_arch, - // ARMBuildAttrs::AllowWMMXv1); - // - - /// ADD additional Else-cases here! - } else if (CPUString == "xscale") { - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TEJ); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, - ARMBuildAttrs::Allowed); - AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::Allowed); - } else if (CPUString == "generic") { - // For a generic CPU, we assume a standard v7a architecture in Subtarget. - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch_profile, - ARMBuildAttrs::ApplicationProfile); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ARM_ISA_use, - ARMBuildAttrs::Allowed); - AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); - } else if (Subtarget->hasV7Ops()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v7); - AttrEmitter->EmitAttribute(ARMBuildAttrs::THUMB_ISA_use, - ARMBuildAttrs::AllowThumb32); +static ARMBuildAttrs::CPUArch getArchForCPU(StringRef CPU, + const ARMSubtarget *Subtarget) { + if (CPU == "xscale") + return ARMBuildAttrs::v5TEJ; + + if (Subtarget->hasV8Ops()) + return ARMBuildAttrs::v8; + else if (Subtarget->hasV7Ops()) { + if (Subtarget->isMClass() && Subtarget->hasThumb2DSP()) + return ARMBuildAttrs::v7E_M; + return ARMBuildAttrs::v7; } else if (Subtarget->hasV6T2Ops()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6T2); + return ARMBuildAttrs::v6T2; + else if (Subtarget->hasV6MOps()) + return ARMBuildAttrs::v6S_M; else if (Subtarget->hasV6Ops()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v6); + return ARMBuildAttrs::v6; else if (Subtarget->hasV5TEOps()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5TE); + return ARMBuildAttrs::v5TE; else if (Subtarget->hasV5TOps()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v5T); + return ARMBuildAttrs::v5T; else if (Subtarget->hasV4TOps()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::CPU_arch, ARMBuildAttrs::v4T); + return ARMBuildAttrs::v4T; + else + return ARMBuildAttrs::v4; +} - if (Subtarget->hasNEON() && emitFPU) { - /* NEON is not exactly a VFP architecture, but GAS emit one of - * neon/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ - if (Subtarget->hasVFP4()) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - "neon-vfpv4"); - else - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::Advanced_SIMD_arch, "neon"); - /* If emitted for NEON, omit from VFP below, since you can have both - * NEON and VFP in build attributes but only one .fpu */ - emitFPU = false; +void ARMAsmPrinter::emitAttributes() { + MCTargetStreamer &TS = OutStreamer.getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast(TS); + + ATS.switchVendor("aeabi"); + + std::string CPUString = Subtarget->getCPUString(); + + if (CPUString != "generic") + ATS.emitTextAttribute(ARMBuildAttrs::CPU_name, CPUString); + + ATS.emitAttribute(ARMBuildAttrs::CPU_arch, + getArchForCPU(CPUString, Subtarget)); + + if (Subtarget->isAClass()) { + ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::ApplicationProfile); + } else if (Subtarget->isRClass()) { + ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::RealTimeProfile); + } else if (Subtarget->isMClass()){ + ATS.emitAttribute(ARMBuildAttrs::CPU_arch_profile, + ARMBuildAttrs::MicroControllerProfile); } - /* VFPv4 + .fpu */ - if (Subtarget->hasVFP4()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, - ARMBuildAttrs::AllowFPv4A); - if (emitFPU) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv4"); - - /* VFPv3 + .fpu */ - } else if (Subtarget->hasVFP3()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, - ARMBuildAttrs::AllowFPv3A); - if (emitFPU) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv3"); - - /* VFPv2 + .fpu */ - } else if (Subtarget->hasVFP2()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::VFP_arch, - ARMBuildAttrs::AllowFPv2); - if (emitFPU) - AttrEmitter->EmitTextAttribute(ARMBuildAttrs::VFP_arch, "vfpv2"); + ATS.emitAttribute(ARMBuildAttrs::ARM_ISA_use, Subtarget->hasARMOps() ? + ARMBuildAttrs::Allowed : ARMBuildAttrs::Not_Allowed); + if (Subtarget->isThumb1Only()) { + ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::Allowed); + } else if (Subtarget->hasThumb2()) { + ATS.emitAttribute(ARMBuildAttrs::THUMB_ISA_use, + ARMBuildAttrs::AllowThumb32); } - /* TODO: ARMBuildAttrs::Allowed is not completely accurate, - * since NEON can have 1 (allowed) or 2 (MAC operations) */ if (Subtarget->hasNEON()) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, - ARMBuildAttrs::Allowed); + /* NEON is not exactly a VFP architecture, but GAS emit one of + * neon/neon-fp-armv8/neon-vfpv4/vfpv3/vfpv2 for .fpu parameters */ + if (Subtarget->hasFPARMv8()) { + if (Subtarget->hasCrypto()) + ATS.emitFPU(ARM::CRYPTO_NEON_FP_ARMV8); + else + ATS.emitFPU(ARM::NEON_FP_ARMV8); + } + else if (Subtarget->hasVFP4()) + ATS.emitFPU(ARM::NEON_VFPV4); + else + ATS.emitFPU(ARM::NEON); + // Emit Tag_Advanced_SIMD_arch for ARMv8 architecture + if (Subtarget->hasV8Ops()) + ATS.emitAttribute(ARMBuildAttrs::Advanced_SIMD_arch, + ARMBuildAttrs::AllowNeonARMv8); + } else { + if (Subtarget->hasFPARMv8()) + ATS.emitFPU(ARM::FP_ARMV8); + else if (Subtarget->hasVFP4()) + ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV4_D16 : ARM::VFPV4); + else if (Subtarget->hasVFP3()) + ATS.emitFPU(Subtarget->hasD16() ? ARM::VFPV3_D16 : ARM::VFPV3); + else if (Subtarget->hasVFP2()) + ATS.emitFPU(ARM::VFPV2); } // Signal various FP modes. if (!TM.Options.UnsafeFPMath) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_denormal, - ARMBuildAttrs::Allowed); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_exceptions, - ARMBuildAttrs::Allowed); + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_denormal, ARMBuildAttrs::Allowed); + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_exceptions, + ARMBuildAttrs::Allowed); } if (TM.Options.NoInfsFPMath && TM.Options.NoNaNsFPMath) - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model, - ARMBuildAttrs::Allowed); + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, + ARMBuildAttrs::Allowed); else - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_FP_number_model, - ARMBuildAttrs::AllowIEE754); + ATS.emitAttribute(ARMBuildAttrs::ABI_FP_number_model, + ARMBuildAttrs::AllowIEE754); // FIXME: add more flags to ARMBuildAttrs.h // 8-bytes alignment stuff. - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_needed, 1); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1); + ATS.emitAttribute(ARMBuildAttrs::ABI_align8_needed, 1); + ATS.emitAttribute(ARMBuildAttrs::ABI_align8_preserved, 1); + + // ABI_HardFP_use attribute to indicate single precision FP. + if (Subtarget->isFPOnlySP()) + ATS.emitAttribute(ARMBuildAttrs::ABI_HardFP_use, + ARMBuildAttrs::HardFPSinglePrecision); // Hard float. Use both S and D registers and conform to AAPCS-VFP. - if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) { - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_HardFP_use, 3); - AttrEmitter->EmitAttribute(ARMBuildAttrs::ABI_VFP_args, 1); - } + if (Subtarget->isAAPCS_ABI() && TM.Options.FloatABIType == FloatABI::Hard) + ATS.emitAttribute(ARMBuildAttrs::ABI_VFP_args, ARMBuildAttrs::HardFPAAPCS); + // FIXME: Should we signal R9 usage? - if (Subtarget->hasDivide()) - AttrEmitter->EmitAttribute(ARMBuildAttrs::DIV_use, 1); + if (Subtarget->hasFP16()) + ATS.emitAttribute(ARMBuildAttrs::FP_HP_extension, ARMBuildAttrs::AllowHPFP); + + if (Subtarget->hasMPExtension()) + ATS.emitAttribute(ARMBuildAttrs::MPextension_use, ARMBuildAttrs::AllowMP); + + if (Subtarget->hasDivide()) { + // Check if hardware divide is only available in thumb2 or ARM as well. + ATS.emitAttribute(ARMBuildAttrs::DIV_use, + Subtarget->hasDivideInARMMode() ? ARMBuildAttrs::AllowDIVExt : + ARMBuildAttrs::AllowDIVIfExists); + } - AttrEmitter->Finish(); - delete AttrEmitter; + if (Subtarget->hasTrustZone() && Subtarget->hasVirtualization()) + ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowTZVirtualization); + else if (Subtarget->hasTrustZone()) + ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowTZ); + else if (Subtarget->hasVirtualization()) + ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, + ARMBuildAttrs::AllowVirtualization); + + ATS.finishAttributeSection(); } void ARMAsmPrinter::emitARMAttributeSection() { @@ -908,7 +778,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { bool isIndirect = Subtarget->isTargetDarwin() && Subtarget->GVIsIndirectSymbol(GV, TM.getRelocationModel()); if (!isIndirect) - return Mang->getSymbol(GV); + return getSymbol(GV); // FIXME: Remove this when Darwin transition to @GOT like syntax. MCSymbol *MCSym = GetSymbolWithGlobalValueBase(GV, "$non_lazy_ptr"); @@ -919,7 +789,7 @@ MCSymbol *ARMAsmPrinter::GetARMGVSymbol(const GlobalValue *GV) { MMIMachO.getGVStubEntry(MCSym); if (StubSym.getPointer() == 0) StubSym = MachineModuleInfoImpl:: - StubValueTy(Mang->getSymbol(GV), !GV->hasInternalLinkage()); + StubValueTy(getSymbol(GV), !GV->hasInternalLinkage()); return MCSym; } @@ -1092,27 +962,12 @@ void ARMAsmPrinter::EmitJump2Table(const MachineInstr *MI) { OutStreamer.EmitDataRegion(MCDR_DataRegionEnd); } -void ARMAsmPrinter::PrintDebugValueComment(const MachineInstr *MI, - raw_ostream &OS) { - unsigned NOps = MI->getNumOperands(); - assert(NOps==4); - OS << '\t' << MAI->getCommentString() << "DEBUG_VALUE: "; - // cast away const; DIetc do not take const operands for some reason. - DIVariable V(const_cast(MI->getOperand(NOps-1).getMetadata())); - OS << V.getName(); - OS << " <- "; - // Frame address. Currently handles register +- offset only. - assert(MI->getOperand(0).isReg() && MI->getOperand(1).isImm()); - OS << '['; printOperand(MI, 0, OS); OS << '+'; printOperand(MI, 1, OS); - OS << ']'; - OS << "+"; - printOperand(MI, NOps-2, OS); -} - void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { assert(MI->getFlag(MachineInstr::FrameSetup) && "Only instruction which are involved into frame setup code are allowed"); + MCTargetStreamer &TS = OutStreamer.getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast(TS); const MachineFunction &MF = *MI->getParent()->getParent(); const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); const ARMFunctionInfo &AFI = *MF.getInfo(); @@ -1175,7 +1030,7 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { RegList.push_back(SrcReg); break; } - OutStreamer.EmitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); + ATS.emitRegSave(RegList, Opc == ARM::VSTMDDB_UPD); } else { // Changes of stack / frame pointer. if (SrcReg == ARM::SP) { @@ -1223,11 +1078,11 @@ void ARMAsmPrinter::EmitUnwindingInstruction(const MachineInstr *MI) { if (DstReg == FramePtr && FramePtr != ARM::SP) // Set-up of the frame pointer. Positive values correspond to "add" // instruction. - OutStreamer.EmitSetFP(FramePtr, ARM::SP, -Offset); + ATS.emitSetFP(FramePtr, ARM::SP, -Offset); else if (DstReg == ARM::SP) { // Change of SP by an offset. Positive values correspond to "sub" // instruction. - OutStreamer.EmitPad(Offset); + ATS.emitPad(Offset); } else { MI->dump(); llvm_unreachable("Unsupported opcode for unwinding information"); @@ -1272,15 +1127,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { unsigned Opc = MI->getOpcode(); switch (Opc) { case ARM::t2MOVi32imm: llvm_unreachable("Should be lowered by thumb2it pass"); - case ARM::DBG_VALUE: { - if (isVerbose() && OutStreamer.hasRawTextSupport()) { - SmallString<128> TmpStr; - raw_svector_ostream OS(TmpStr); - PrintDebugValueComment(MI, OS); - OutStreamer.EmitRawText(StringRef(OS.str())); - } - return; - } + case ARM::DBG_VALUE: llvm_unreachable("Should be handled by generic printing"); case ARM::LEApcrel: case ARM::tLEApcrel: case ARM::t2LEApcrel: { @@ -1376,7 +1223,7 @@ void ARMAsmPrinter::EmitInstruction(const MachineInstr *MI) { .addReg(0)); const GlobalValue *GV = MI->getOperand(0).getGlobal(); - MCSymbol *GVSym = Mang->getSymbol(GV); + MCSymbol *GVSym = getSymbol(GV); const MCExpr *GVSymExpr = MCSymbolRefExpr::Create(GVSym, OutContext); OutStreamer.EmitInstruction(MCInstBuilder(ARM::Bcc) .addExpr(GVSymExpr) diff --git a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h index c945e4f..de72e06 100644 --- a/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h +++ b/contrib/llvm/lib/Target/ARM/ARMAsmPrinter.h @@ -97,13 +97,9 @@ private: const MachineInstr *MI); public: - void PrintDebugValueComment(const MachineInstr *MI, raw_ostream &OS); - - virtual MachineLocation - getDebugValueLocation(const MachineInstr *MI) const LLVM_OVERRIDE; - /// EmitDwarfRegOp - Emit dwarf register operation. - virtual void EmitDwarfRegOp(const MachineLocation &MLoc) const LLVM_OVERRIDE; + virtual void EmitDwarfRegOp(const MachineLocation &MLoc, bool Indirect) const + LLVM_OVERRIDE; virtual unsigned getISAEncoding() LLVM_OVERRIDE { // ARM/Darwin adds ISA to the DWARF info for each function. diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp index 6005054..f835a4e 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -11,10 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "ARMBaseInstrInfo.h" #include "ARM.h" +#include "ARMBaseInstrInfo.h" #include "ARMBaseRegisterInfo.h" #include "ARMConstantPoolValue.h" +#include "ARMFeatures.h" #include "ARMHazardRecognizer.h" #include "ARMMachineFunctionInfo.h" #include "MCTargetDesc/ARMAddressingModes.h" @@ -36,7 +37,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#define GET_INSTRINFO_CTOR +#define GET_INSTRINFO_CTOR_DTOR #include "ARMGenInstrInfo.inc" using namespace llvm; @@ -113,8 +114,7 @@ ScheduleHazardRecognizer *ARMBaseInstrInfo:: CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, const ScheduleDAG *DAG) const { if (Subtarget.isThumb2() || Subtarget.hasVFP2()) - return (ScheduleHazardRecognizer *) - new ARMHazardRecognizer(II, *this, getRegisterInfo(), Subtarget, DAG); + return (ScheduleHazardRecognizer *)new ARMHazardRecognizer(II, DAG); return TargetInstrInfo::CreateTargetPostRAHazardRecognizer(II, DAG); } @@ -273,104 +273,90 @@ ARMBaseInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,MachineBasicBlock *&TBB, MachineBasicBlock *&FBB, SmallVectorImpl &Cond, bool AllowModify) const { - // If the block has no terminators, it just falls into the block after it. + TBB = 0; + FBB = 0; + MachineBasicBlock::iterator I = MBB.end(); if (I == MBB.begin()) - return false; + return false; // Empty blocks are easy. --I; - while (I->isDebugValue()) { - if (I == MBB.begin()) - return false; - --I; - } - - // Get the last instruction in the block. - MachineInstr *LastInst = I; - unsigned LastOpc = LastInst->getOpcode(); - // Check if it's an indirect branch first, this should return 'unanalyzable' - // even if it's predicated. - if (isIndirectBranchOpcode(LastOpc)) - return true; + // Walk backwards from the end of the basic block until the branch is + // analyzed or we give up. + while (isPredicated(I) || I->isTerminator()) { - if (!isUnpredicatedTerminator(I)) - return false; + // Flag to be raised on unanalyzeable instructions. This is useful in cases + // where we want to clean up on the end of the basic block before we bail + // out. + bool CantAnalyze = false; - // If there is only one terminator instruction, process it. - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - if (isUncondBranchOpcode(LastOpc)) { - TBB = LastInst->getOperand(0).getMBB(); - return false; + // Skip over DEBUG values and predicated nonterminators. + while (I->isDebugValue() || !I->isTerminator()) { + if (I == MBB.begin()) + return false; + --I; } - if (isCondBranchOpcode(LastOpc)) { - // Block ends with fall-through condbranch. - TBB = LastInst->getOperand(0).getMBB(); - Cond.push_back(LastInst->getOperand(1)); - Cond.push_back(LastInst->getOperand(2)); - return false; + + if (isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode())) { + // Indirect branches and jump tables can't be analyzed, but we still want + // to clean up any instructions at the tail of the basic block. + CantAnalyze = true; + } else if (isUncondBranchOpcode(I->getOpcode())) { + TBB = I->getOperand(0).getMBB(); + } else if (isCondBranchOpcode(I->getOpcode())) { + // Bail out if we encounter multiple conditional branches. + if (!Cond.empty()) + return true; + + assert(!FBB && "FBB should have been null."); + FBB = TBB; + TBB = I->getOperand(0).getMBB(); + Cond.push_back(I->getOperand(1)); + Cond.push_back(I->getOperand(2)); + } else if (I->isReturn()) { + // Returns can't be analyzed, but we should run cleanup. + CantAnalyze = !isPredicated(I); + } else { + // We encountered other unrecognized terminator. Bail out immediately. + return true; } - return true; // Can't handle indirect branch. - } - // Get the instruction before it if it is a terminator. - MachineInstr *SecondLastInst = I; - unsigned SecondLastOpc = SecondLastInst->getOpcode(); - - // If AllowModify is true and the block ends with two or more unconditional - // branches, delete all but the first unconditional branch. - if (AllowModify && isUncondBranchOpcode(LastOpc)) { - while (isUncondBranchOpcode(SecondLastOpc)) { - LastInst->eraseFromParent(); - LastInst = SecondLastInst; - LastOpc = LastInst->getOpcode(); - if (I == MBB.begin() || !isUnpredicatedTerminator(--I)) { - // Return now the only terminator is an unconditional branch. - TBB = LastInst->getOperand(0).getMBB(); - return false; - } else { - SecondLastInst = I; - SecondLastOpc = SecondLastInst->getOpcode(); + // Cleanup code - to be run for unpredicated unconditional branches and + // returns. + if (!isPredicated(I) && + (isUncondBranchOpcode(I->getOpcode()) || + isIndirectBranchOpcode(I->getOpcode()) || + isJumpTableBranchOpcode(I->getOpcode()) || + I->isReturn())) { + // Forget any previous condition branch information - it no longer applies. + Cond.clear(); + FBB = 0; + + // If we can modify the function, delete everything below this + // unconditional branch. + if (AllowModify) { + MachineBasicBlock::iterator DI = llvm::next(I); + while (DI != MBB.end()) { + MachineInstr *InstToDelete = DI; + ++DI; + InstToDelete->eraseFromParent(); + } } } - } - // If there are three terminators, we don't know what sort of block this is. - if (SecondLastInst && I != MBB.begin() && isUnpredicatedTerminator(--I)) - return true; - - // If the block ends with a B and a Bcc, handle it. - if (isCondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - Cond.push_back(SecondLastInst->getOperand(1)); - Cond.push_back(SecondLastInst->getOperand(2)); - FBB = LastInst->getOperand(0).getMBB(); - return false; - } + if (CantAnalyze) + return true; - // If the block ends with two unconditional branches, handle it. The second - // one is not executed, so remove it. - if (isUncondBranchOpcode(SecondLastOpc) && isUncondBranchOpcode(LastOpc)) { - TBB = SecondLastInst->getOperand(0).getMBB(); - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return false; - } + if (I == MBB.begin()) + return false; - // ...likewise if it ends with a branch table followed by an unconditional - // branch. The branch folder can create these, and we must get rid of them for - // correctness of Thumb constant islands. - if ((isJumpTableBranchOpcode(SecondLastOpc) || - isIndirectBranchOpcode(SecondLastOpc)) && - isUncondBranchOpcode(LastOpc)) { - I = LastInst; - if (AllowModify) - I->eraseFromParent(); - return true; + --I; } - // Otherwise, can't handle this. - return true; + // We made it past the terminators without bailing out - we must have + // analyzed this branch successfully. + return false; } @@ -535,11 +521,17 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const { if (!MI->isPredicable()) return false; - if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) { - ARMFunctionInfo *AFI = - MI->getParent()->getParent()->getInfo(); - return AFI->isThumb2Function(); + ARMFunctionInfo *AFI = + MI->getParent()->getParent()->getInfo(); + + if (AFI->isThumb2Function()) { + if (getSubtarget().restrictIT()) + return isV8EligibleForIT(MI); + } else { // non-Thumb + if ((MI->getDesc().TSFlags & ARMII::DomainMask) == ARMII::DomainNEON) + return false; } + return true; } @@ -660,16 +652,16 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, unsigned DestReg, unsigned SrcReg, bool KillSrc) const { bool GPRDest = ARM::GPRRegClass.contains(DestReg); - bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); + bool GPRSrc = ARM::GPRRegClass.contains(SrcReg); if (GPRDest && GPRSrc) { AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr), DestReg) - .addReg(SrcReg, getKillRegState(KillSrc)))); + .addReg(SrcReg, getKillRegState(KillSrc)))); return; } bool SPRDest = ARM::SPRRegClass.contains(DestReg); - bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); + bool SPRSrc = ARM::SPRRegClass.contains(SrcReg); unsigned Opc = 0; if (SPRDest && SPRSrc) @@ -698,26 +690,47 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, int Spacing = 1; // Use VORRq when possible. - if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 2; - else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VORRq, BeginIdx = ARM::qsub_0, SubRegs = 4; + if (ARM::QQPRRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VORRq; + BeginIdx = ARM::qsub_0; + SubRegs = 2; + } else if (ARM::QQQQPRRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VORRq; + BeginIdx = ARM::qsub_0; + SubRegs = 4; // Fall back to VMOVD. - else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2; - else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3; - else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4; - else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) - Opc = ARM::MOVr, BeginIdx = ARM::gsub_0, SubRegs = 2; - - else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 2, Spacing = 2; - else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 3, Spacing = 2; - else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) - Opc = ARM::VMOVD, BeginIdx = ARM::dsub_0, SubRegs = 4, Spacing = 2; + } else if (ARM::DPairRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 2; + } else if (ARM::DTripleRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 3; + } else if (ARM::DQuadRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 4; + } else if (ARM::GPRPairRegClass.contains(DestReg, SrcReg)) { + Opc = Subtarget.isThumb2() ? ARM::tMOVr : ARM::MOVr; + BeginIdx = ARM::gsub_0; + SubRegs = 2; + } else if (ARM::DPairSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 2; + Spacing = 2; + } else if (ARM::DTripleSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 3; + Spacing = 2; + } else if (ARM::DQuadSpcRegClass.contains(DestReg, SrcReg)) { + Opc = ARM::VMOVD; + BeginIdx = ARM::dsub_0; + SubRegs = 4; + Spacing = 2; + } assert(Opc && "Impossible reg-to-reg copy"); @@ -726,26 +739,28 @@ void ARMBaseInstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Copy register tuples backward when the first Dest reg overlaps with SrcReg. if (TRI->regsOverlap(SrcReg, TRI->getSubReg(DestReg, BeginIdx))) { - BeginIdx = BeginIdx + ((SubRegs-1)*Spacing); + BeginIdx = BeginIdx + ((SubRegs - 1) * Spacing); Spacing = -Spacing; } #ifndef NDEBUG SmallSet DstRegs; #endif for (unsigned i = 0; i != SubRegs; ++i) { - unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i*Spacing); - unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i*Spacing); + unsigned Dst = TRI->getSubReg(DestReg, BeginIdx + i * Spacing); + unsigned Src = TRI->getSubReg(SrcReg, BeginIdx + i * Spacing); assert(Dst && Src && "Bad sub-register"); #ifndef NDEBUG assert(!DstRegs.count(Src) && "destructive vector copy"); DstRegs.insert(Dst); #endif - Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst) - .addReg(Src); + Mov = BuildMI(MBB, I, I->getDebugLoc(), get(Opc), Dst).addReg(Src); // VORR takes two source operands. if (Opc == ARM::VORRq) Mov.addReg(Src); Mov = AddDefaultPred(Mov); + // MOVr can set CC. + if (Opc == ARM::MOVr) + Mov = AddDefaultCC(Mov); } // Add implicit super-register defs and kills to the last instruction. Mov->addRegisterDefined(DestReg, TRI); @@ -1214,16 +1229,6 @@ bool ARMBaseInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const{ return true; } -MachineInstr* -ARMBaseInstrInfo::emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const { - MachineInstrBuilder MIB = BuildMI(MF, DL, get(ARM::DBG_VALUE)) - .addFrameIndex(FrameIx).addImm(0).addImm(Offset).addMetadata(MDPtr); - return &*MIB; -} - /// Create a copy of a const pool value. Update CPI to the new index and return /// the label UID. static unsigned duplicateCPV(MachineFunction &MF, unsigned &CPI) { @@ -1426,9 +1431,11 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: + case ARM::t2LDRBi8: case ARM::t2LDRDi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: + case ARM::t2LDRBi12: case ARM::t2LDRSHi12: break; } @@ -1445,8 +1452,10 @@ bool ARMBaseInstrInfo::areLoadsFromSameBasePtr(SDNode *Load1, SDNode *Load2, case ARM::VLDRD: case ARM::VLDRS: case ARM::t2LDRi8: + case ARM::t2LDRBi8: case ARM::t2LDRSHi8: case ARM::t2LDRi12: + case ARM::t2LDRBi12: case ARM::t2LDRSHi12: break; } @@ -1493,7 +1502,16 @@ bool ARMBaseInstrInfo::shouldScheduleLoadsNear(SDNode *Load1, SDNode *Load2, if ((Offset2 - Offset1) / 8 > 64) return false; - if (Load1->getMachineOpcode() != Load2->getMachineOpcode()) + // Check if the machine opcodes are different. If they are different + // then we consider them to not be of the same base address, + // EXCEPT in the case of Thumb2 byte loads where one is LDRBi8 and the other LDRBi12. + // In this case, they are considered to be the same because they are different + // encoding forms of the same basic instruction. + if ((Load1->getMachineOpcode() != Load2->getMachineOpcode()) && + !((Load1->getMachineOpcode() == ARM::t2LDRBi8 && + Load2->getMachineOpcode() == ARM::t2LDRBi12) || + (Load1->getMachineOpcode() == ARM::t2LDRBi12 && + Load2->getMachineOpcode() == ARM::t2LDRBi8))) return false; // FIXME: overly conservative? // Four loads in a row should be sufficient. @@ -1708,7 +1726,7 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, bool PreferFalse) const { assert((MI->getOpcode() == ARM::MOVCCr || MI->getOpcode() == ARM::t2MOVCCr) && "Unknown select instruction"); - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); MachineInstr *DefMI = canFoldIntoMOVCC(MI->getOperand(2).getReg(), MRI, this); bool Invert = !DefMI; if (!DefMI) @@ -1716,11 +1734,17 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, if (!DefMI) return 0; + // Find new register class to use. + MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); + unsigned DestReg = MI->getOperand(0).getReg(); + const TargetRegisterClass *PreviousClass = MRI.getRegClass(FalseReg.getReg()); + if (!MRI.constrainRegClass(DestReg, PreviousClass)) + return 0; + // Create a new predicated version of DefMI. // Rfalse is the first use. MachineInstrBuilder NewMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), - DefMI->getDesc(), - MI->getOperand(0).getReg()); + DefMI->getDesc(), DestReg); // Copy all the DefMI operands, excluding its (null) predicate. const MCInstrDesc &DefDesc = DefMI->getDesc(); @@ -1743,7 +1767,6 @@ MachineInstr *ARMBaseInstrInfo::optimizeSelect(MachineInstr *MI, // register operand tied to the first def. // The tie makes the register allocator ensure the FalseReg is allocated the // same register as operand 0. - MachineOperand FalseReg = MI->getOperand(Invert ? 2 : 1); FalseReg.setImplicit(); NewMI.addOperand(FalseReg); NewMI->tieOperands(0, NewMI->getNumOperands() - 1); @@ -1803,6 +1826,14 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, unsigned DestReg, unsigned BaseReg, int NumBytes, ARMCC::CondCodes Pred, unsigned PredReg, const ARMBaseInstrInfo &TII, unsigned MIFlags) { + if (NumBytes == 0 && DestReg != BaseReg) { + BuildMI(MBB, MBBI, dl, TII.get(ARM::MOVr), DestReg) + .addReg(BaseReg, RegState::Kill) + .addImm((unsigned)Pred).addReg(PredReg).addReg(0) + .setMIFlags(MIFlags); + return; + } + bool isSub = NumBytes < 0; if (isSub) NumBytes = -NumBytes; @@ -1826,6 +1857,115 @@ void llvm::emitARMRegPlusImmediate(MachineBasicBlock &MBB, } } +bool llvm::tryFoldSPUpdateIntoPushPop(MachineFunction &MF, + MachineInstr *MI, + unsigned NumBytes) { + // This optimisation potentially adds lots of load and store + // micro-operations, it's only really a great benefit to code-size. + if (!MF.getFunction()->hasFnAttribute(Attribute::MinSize)) + return false; + + // If only one register is pushed/popped, LLVM can use an LDR/STR + // instead. We can't modify those so make sure we're dealing with an + // instruction we understand. + bool IsPop = isPopOpcode(MI->getOpcode()); + bool IsPush = isPushOpcode(MI->getOpcode()); + if (!IsPush && !IsPop) + return false; + + bool IsVFPPushPop = MI->getOpcode() == ARM::VSTMDDB_UPD || + MI->getOpcode() == ARM::VLDMDIA_UPD; + bool IsT1PushPop = MI->getOpcode() == ARM::tPUSH || + MI->getOpcode() == ARM::tPOP || + MI->getOpcode() == ARM::tPOP_RET; + + assert((IsT1PushPop || (MI->getOperand(0).getReg() == ARM::SP && + MI->getOperand(1).getReg() == ARM::SP)) && + "trying to fold sp update into non-sp-updating push/pop"); + + // The VFP push & pop act on D-registers, so we can only fold an adjustment + // by a multiple of 8 bytes in correctly. Similarly rN is 4-bytes. Don't try + // if this is violated. + if (NumBytes % (IsVFPPushPop ? 8 : 4) != 0) + return false; + + // ARM and Thumb2 push/pop insts have explicit "sp, sp" operands (+ + // pred) so the list starts at 4. Thumb1 starts after the predicate. + int RegListIdx = IsT1PushPop ? 2 : 4; + + // Calculate the space we'll need in terms of registers. + unsigned FirstReg = MI->getOperand(RegListIdx).getReg(); + unsigned RD0Reg, RegsNeeded; + if (IsVFPPushPop) { + RD0Reg = ARM::D0; + RegsNeeded = NumBytes / 8; + } else { + RD0Reg = ARM::R0; + RegsNeeded = NumBytes / 4; + } + + // We're going to have to strip all list operands off before + // re-adding them since the order matters, so save the existing ones + // for later. + SmallVector RegList; + for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) + RegList.push_back(MI->getOperand(i)); + + MachineBasicBlock *MBB = MI->getParent(); + const TargetRegisterInfo *TRI = MF.getRegInfo().getTargetRegisterInfo(); + const MCPhysReg *CSRegs = TRI->getCalleeSavedRegs(&MF); + + // Now try to find enough space in the reglist to allocate NumBytes. + for (unsigned CurReg = FirstReg - 1; CurReg >= RD0Reg && RegsNeeded; + --CurReg) { + if (!IsPop) { + // Pushing any register is completely harmless, mark the + // register involved as undef since we don't care about it in + // the slightest. + RegList.push_back(MachineOperand::CreateReg(CurReg, false, false, + false, false, true)); + --RegsNeeded; + continue; + } + + // However, we can only pop an extra register if it's not live. For + // registers live within the function we might clobber a return value + // register; the other way a register can be live here is if it's + // callee-saved. + if (isCalleeSavedRegister(CurReg, CSRegs) || + MBB->computeRegisterLiveness(TRI, CurReg, MI) != + MachineBasicBlock::LQR_Dead) { + // VFP pops don't allow holes in the register list, so any skip is fatal + // for our transformation. GPR pops do, so we should just keep looking. + if (IsVFPPushPop) + return false; + else + continue; + } + + // Mark the unimportant registers as in the POP. + RegList.push_back(MachineOperand::CreateReg(CurReg, true, false, false, + true)); + --RegsNeeded; + } + + if (RegsNeeded > 0) + return false; + + // Finally we know we can profitably perform the optimisation so go + // ahead: strip all existing registers off and add them back again + // in the right order. + for (int i = MI->getNumOperands() - 1; i >= RegListIdx; --i) + MI->RemoveOperand(i); + + // Add the complete list back in. + MachineInstrBuilder MIB(MF, &*MI); + for (int i = RegList.size() - 1; i >= 0; --i) + MIB.addOperand(RegList[i]); + + return true; +} + bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int &Offset, const ARMBaseInstrInfo &TII) { @@ -2232,8 +2372,32 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, isSafe = true; break; } - // Condition code is after the operand before CPSR. - ARMCC::CondCodes CC = (ARMCC::CondCodes)Instr.getOperand(IO-1).getImm(); + // Condition code is after the operand before CPSR except for VSELs. + ARMCC::CondCodes CC; + bool IsInstrVSel = true; + switch (Instr.getOpcode()) { + default: + IsInstrVSel = false; + CC = (ARMCC::CondCodes)Instr.getOperand(IO - 1).getImm(); + break; + case ARM::VSELEQD: + case ARM::VSELEQS: + CC = ARMCC::EQ; + break; + case ARM::VSELGTD: + case ARM::VSELGTS: + CC = ARMCC::GT; + break; + case ARM::VSELGED: + case ARM::VSELGES: + CC = ARMCC::GE; + break; + case ARM::VSELVSS: + case ARM::VSELVSD: + CC = ARMCC::VS; + break; + } + if (Sub) { ARMCC::CondCodes NewCC = getSwappedCondition(CC); if (NewCC == ARMCC::AL) @@ -2244,11 +2408,14 @@ optimizeCompareInstr(MachineInstr *CmpInstr, unsigned SrcReg, unsigned SrcReg2, // If it is safe to remove CmpInstr, the condition code of these // operands will be modified. if (SrcReg2 != 0 && Sub->getOperand(1).getReg() == SrcReg2 && - Sub->getOperand(2).getReg() == SrcReg) - OperandsToUpdate.push_back(std::make_pair(&((*I).getOperand(IO-1)), - NewCC)); - } - else + Sub->getOperand(2).getReg() == SrcReg) { + // VSel doesn't support condition code update. + if (IsInstrVSel) + return false; + OperandsToUpdate.push_back( + std::make_pair(&((*I).getOperand(IO - 1)), NewCC)); + } + } else switch (CC) { default: // CPSR can be used multiple times, we should continue. @@ -3604,6 +3771,24 @@ ARMBaseInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, return Latency; } +unsigned ARMBaseInstrInfo::getPredicationCost(const MachineInstr *MI) const { + if (MI->isCopyLike() || MI->isInsertSubreg() || + MI->isRegSequence() || MI->isImplicitDef()) + return 0; + + if (MI->isBundle()) + return 0; + + const MCInstrDesc &MCID = MI->getDesc(); + + if (MCID.isCall() || MCID.hasImplicitDefOfPhysReg(ARM::CPSR)) { + // When predicated, CPSR is an additional source operand for CPSR updating + // instructions, this apparently increases their latencies. + return 1; + } + return 0; +} + unsigned ARMBaseInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost) const { @@ -3685,8 +3870,7 @@ hasHighOperandLatency(const InstrItineraryData *ItinData, return true; // Hoist VFP / NEON instructions with 4 or higher latency. - int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx, - /*FindMin=*/false); + int Latency = computeOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); if (Latency < 0) Latency = getInstrLatency(ItinData, DefMI); if (Latency <= 3) @@ -4137,7 +4321,7 @@ breakPartialRegDependency(MachineBasicBlock::iterator MI, // FIXME: In some cases, VLDRS can be changed to a VLD1DUPd32 which defines // the full D-register by loading the same value to both lanes. The // instruction is micro-coded with 2 uops, so don't do this until we can - // properly schedule micro-coded instuctions. The dispatcher stalls cause + // properly schedule micro-coded instructions. The dispatcher stalls cause // too big regressions. // Insert the dependency-breaking FCONSTD before MI. @@ -4152,6 +4336,8 @@ bool ARMBaseInstrInfo::hasNOP() const { } bool ARMBaseInstrInfo::isSwiftFastImmShift(const MachineInstr *MI) const { + if (MI->getNumOperands() < 4) + return true; unsigned ShOpVal = MI->getOperand(3).getImm(); unsigned ShImm = ARM_AM::getSORegOffset(ShOpVal); // Swift supports faster shifts for: lsl 2, lsl 1, and lsr 1. diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h index 2ef659c..93e5964 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseInstrInfo.h @@ -46,7 +46,7 @@ public: MachineBasicBlock::iterator &MBBI, LiveVariables *LV) const; - virtual const ARMBaseRegisterInfo &getRegisterInfo() const =0; + virtual const ARMBaseRegisterInfo &getRegisterInfo() const = 0; const ARMSubtarget &getSubtarget() const { return Subtarget; } ScheduleHazardRecognizer * @@ -125,12 +125,6 @@ public: virtual bool expandPostRAPseudo(MachineBasicBlock::iterator MI) const; - virtual MachineInstr *emitFrameIndexDebugValue(MachineFunction &MF, - int FrameIx, - uint64_t Offset, - const MDNode *MDPtr, - DebugLoc DL) const; - virtual void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, unsigned DestReg, unsigned SubIdx, @@ -270,6 +264,8 @@ private: const MCInstrDesc &UseMCID, unsigned UseIdx, unsigned UseAlign) const; + unsigned getPredicationCost(const MachineInstr *MI) const; + unsigned getInstrLatency(const InstrItineraryData *ItinData, const MachineInstr *MI, unsigned *PredCost = 0) const; @@ -366,6 +362,17 @@ bool isIndirectBranchOpcode(int Opc) { return Opc == ARM::BX || Opc == ARM::MOVPCRX || Opc == ARM::tBRIND; } +static inline bool isPopOpcode(int Opc) { + return Opc == ARM::tPOP_RET || Opc == ARM::LDMIA_RET || + Opc == ARM::t2LDMIA_RET || Opc == ARM::tPOP || Opc == ARM::LDMIA_UPD || + Opc == ARM::t2LDMIA_UPD || Opc == ARM::VLDMDIA_UPD; +} + +static inline bool isPushOpcode(int Opc) { + return Opc == ARM::tPUSH || Opc == ARM::t2STMDB_UPD || + Opc == ARM::STMDB_UPD || Opc == ARM::VSTMDDB_UPD; +} + /// getInstrPredicate - If instruction is predicated, returns its predicate /// condition, otherwise returns AL. It also returns the condition code /// register by reference. @@ -405,6 +412,13 @@ void emitThumbRegPlusImmediate(MachineBasicBlock &MBB, const ARMBaseRegisterInfo& MRI, unsigned MIFlags = 0); +/// Tries to add registers to the reglist of a given base-updating +/// push/pop instruction to adjust the stack by an additional +/// NumBytes. This can save a few bytes per function in code-size, but +/// obviously generates more memory traffic. As such, it only takes +/// effect in functions being optimised for size. +bool tryFoldSPUpdateIntoPushPop(MachineFunction &MF, MachineInstr *MI, + unsigned NumBytes); /// rewriteARMFrameIndex / rewriteT2FrameIndex - /// Rewrite MI to access 'Offset' bytes from the FP. Return false if the diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp index b0d34a7..8717dc0 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -43,46 +43,73 @@ using namespace llvm; -ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &sti) - : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), TII(tii), STI(sti), +ARMBaseRegisterInfo::ARMBaseRegisterInfo(const ARMSubtarget &sti) + : ARMGenRegisterInfo(ARM::LR, 0, 0, ARM::PC), STI(sti), FramePtr((STI.isTargetDarwin() || STI.isThumb()) ? ARM::R7 : ARM::R11), BasePtr(ARM::R6) { } const uint16_t* ARMBaseRegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { - bool ghcCall = false; - - if (MF) { - const Function *F = MF->getFunction(); - ghcCall = (F ? F->getCallingConv() == CallingConv::GHC : false); - } - - if (ghcCall) { - return CSR_GHC_SaveList; - } - else { - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_SaveList : CSR_AAPCS_SaveList; + const uint16_t *RegList = (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_SaveList + : CSR_AAPCS_SaveList; + + if (!MF) return RegList; + + const Function *F = MF->getFunction(); + if (F->getCallingConv() == CallingConv::GHC) { + // GHC set of callee saved regs is empty as all those regs are + // used for passing STG regs around + return CSR_NoRegs_SaveList; + } else if (F->hasFnAttribute("interrupt")) { + if (STI.isMClass()) { + // M-class CPUs have hardware which saves the registers needed to allow a + // function conforming to the AAPCS to function as a handler. + return CSR_AAPCS_SaveList; + } else if (F->getFnAttribute("interrupt").getValueAsString() == "FIQ") { + // Fast interrupt mode gives the handler a private copy of R8-R14, so less + // need to be saved to restore user-mode state. + return CSR_FIQ_SaveList; + } else { + // Generally only R13-R14 (i.e. SP, LR) are automatically preserved by + // exception handling. + return CSR_GenericInt_SaveList; + } } + + return RegList; } const uint32_t* -ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID) const { +ARMBaseRegisterInfo::getCallPreservedMask(CallingConv::ID CC) const { + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return CSR_NoRegs_RegMask; return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) ? CSR_iOS_RegMask : CSR_AAPCS_RegMask; } const uint32_t* -ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID) const { - return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) - ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; +ARMBaseRegisterInfo::getNoPreservedMask() const { + return CSR_NoRegs_RegMask; } const uint32_t* -ARMBaseRegisterInfo::getNoPreservedMask() const { - return CSR_NoRegs_RegMask; +ARMBaseRegisterInfo::getThisReturnPreservedMask(CallingConv::ID CC) const { + // This should return a register mask that is the same as that returned by + // getCallPreservedMask but that additionally preserves the register used for + // the first i32 argument (which must also be the register used to return a + // single i32 return value) + // + // In case that the calling convention does not use the same register for + // both or otherwise does not want to enable this optimization, the function + // should return NULL + if (CC == CallingConv::GHC) + // This is academic becase all GHC calls are (supposed to be) tail calls + return NULL; + return (STI.isTargetIOS() && !STI.isAAPCS_ABI()) + ? CSR_iOS_ThisReturn_RegMask : CSR_AAPCS_ThisReturn_RegMask; } BitVector ARMBaseRegisterInfo:: @@ -94,6 +121,7 @@ getReservedRegs(const MachineFunction &MF) const { Reserved.set(ARM::SP); Reserved.set(ARM::PC); Reserved.set(ARM::FPSCR); + Reserved.set(ARM::APSR_NZCV); if (TFI->hasFP(MF)) Reserved.set(FramePtr); if (hasBasePointer(MF)) @@ -309,7 +337,7 @@ bool ARMBaseRegisterInfo::canRealignStack(const MachineFunction &MF) const { // 1. Dynamic stack realignment is explicitly disabled, // 2. This is a Thumb1 function (it's not useful, so we don't bother), or // 3. There are VLAs in the function and the base pointer is disabled. - if (!MF.getTarget().Options.RealignStack) + if (MF.getFunction()->hasFnAttribute("no-realign-stack")) return false; if (AFI->isThumb1OnlyFunction()) return false; @@ -357,14 +385,6 @@ ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const { return ARM::SP; } -unsigned ARMBaseRegisterInfo::getEHExceptionRegister() const { - llvm_unreachable("What is the exception register"); -} - -unsigned ARMBaseRegisterInfo::getEHHandlerRegister() const { - llvm_unreachable("What is the exception handler register"); -} - /// emitLoadConstPool - Emits a load from constpool to materialize the /// specified immediate. void ARMBaseRegisterInfo:: @@ -375,6 +395,7 @@ emitLoadConstPool(MachineBasicBlock &MBB, ARMCC::CondCodes Pred, unsigned PredReg, unsigned MIFlags) const { MachineFunction &MF = *MBB.getParent(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); MachineConstantPool *ConstantPool = MF.getConstantPool(); const Constant *C = ConstantInt::get(Type::getInt32Ty(MF.getFunction()->getContext()), Val); @@ -556,9 +577,10 @@ materializeFrameBaseRegister(MachineBasicBlock *MBB, if (Ins != MBB->end()) DL = Ins->getDebugLoc(); - const MCInstrDesc &MCID = TII.get(ADDriOpc); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); const MachineFunction &MF = *MBB->getParent(); + MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + const TargetInstrInfo &TII = *MF.getTarget().getInstrInfo(); + const MCInstrDesc &MCID = TII.get(ADDriOpc); MRI.constrainRegClass(BaseReg, TII.getRegClass(MCID, 0, this, MF)); MachineInstrBuilder MIB = AddDefaultPred(BuildMI(*MBB, Ins, DL, MCID, BaseReg) @@ -574,6 +596,8 @@ ARMBaseRegisterInfo::resolveFrameIndex(MachineBasicBlock::iterator I, MachineInstr &MI = *I; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMBaseInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); ARMFunctionInfo *AFI = MF.getInfo(); int Off = Offset; // ARM doesn't need the general 64-bit offsets unsigned i = 0; @@ -671,6 +695,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, MachineInstr &MI = *II; MachineBasicBlock &MBB = *MI.getParent(); MachineFunction &MF = *MBB.getParent(); + const ARMBaseInstrInfo &TII = + *static_cast(MF.getTarget().getInstrInfo()); const ARMFrameLowering *TFI = static_cast(MF.getTarget().getFrameLowering()); ARMFunctionInfo *AFI = MF.getInfo(); @@ -696,12 +722,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II, } #endif // NDEBUG - // Special handling of dbg_value instructions. - if (MI.isDebugValue()) { - MI.getOperand(FIOperandNum). ChangeToRegister(FrameReg, false /*isDef*/); - MI.getOperand(FIOperandNum + 1).ChangeToImmediate(Offset); - return; - } + assert(!MI.isDebugValue() && "DBG_VALUEs should be handled in target-independent code"); // Modify MI as necessary to handle as much of 'Offset' as possible bool Done = false; diff --git a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h index 0679919..e28fff6 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h +++ b/contrib/llvm/lib/Target/ARM/ARMBaseRegisterInfo.h @@ -72,9 +72,16 @@ static inline bool isARMArea3Register(unsigned Reg, bool isIOS) { } } +static inline bool isCalleeSavedRegister(unsigned Reg, + const MCPhysReg *CSRegs) { + for (unsigned i = 0; CSRegs[i]; ++i) + if (Reg == CSRegs[i]) + return true; + return false; +} + class ARMBaseRegisterInfo : public ARMGenRegisterInfo { protected: - const ARMBaseInstrInfo &TII; const ARMSubtarget &STI; /// FramePtr - ARM physical register used as frame ptr. @@ -86,8 +93,7 @@ protected: unsigned BasePtr; // Can be only subclassed. - explicit ARMBaseRegisterInfo(const ARMBaseInstrInfo &tii, - const ARMSubtarget &STI); + explicit ARMBaseRegisterInfo(const ARMSubtarget &STI); // Return the opcode that implements 'Op', or 0 if no opcode unsigned getOpcode(int Op) const; @@ -96,9 +102,18 @@ public: /// Code Generation virtual methods... const uint16_t *getCalleeSavedRegs(const MachineFunction *MF = 0) const; const uint32_t *getCallPreservedMask(CallingConv::ID) const; - const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; const uint32_t *getNoPreservedMask() const; + /// getThisReturnPreservedMask - Returns a call preserved mask specific to the + /// case that 'returned' is on an i32 first argument if the calling convention + /// is one that can (partially) model this attribute with a preserved mask + /// (i.e. it is a calling convention that uses the same register for the first + /// i32 argument and an i32 return value) + /// + /// Should return NULL in the case that the calling convention does not have + /// this property + const uint32_t *getThisReturnPreservedMask(CallingConv::ID) const; + BitVector getReservedRegs(const MachineFunction &MF) const; const TargetRegisterClass* @@ -142,10 +157,6 @@ public: unsigned getFrameRegister(const MachineFunction &MF) const; unsigned getBaseRegister() const { return BasePtr; } - // Exception handling queries. - unsigned getEHExceptionRegister() const; - unsigned getEHHandlerRegister() const; - bool isLowRegister(unsigned Reg) const; diff --git a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h index 11bd6a4..b16d4ef 100644 --- a/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h +++ b/contrib/llvm/lib/Target/ARM/ARMBuildAttrs.h @@ -15,11 +15,13 @@ #ifndef __TARGET_ARMBUILDATTRS_H__ #define __TARGET_ARMBUILDATTRS_H__ +namespace llvm { namespace ARMBuildAttrs { + enum SpecialAttr { // This is for the .cpu asm attr. It translates into one or more // AttrType (below) entries in the .ARM.attributes section in the ELF. - SEL_CPU + SEL_CPU }; enum AttrType { @@ -57,7 +59,7 @@ namespace ARMBuildAttrs { ABI_FP_optimization_goals = 31, compatibility = 32, CPU_unaligned_access = 34, - VFP_HP_extension = 36, + FP_HP_extension = 36, ABI_FP_16bit_format = 38, MPextension_use = 42, // was 70, 2.08 ABI DIV_use = 44, @@ -89,10 +91,11 @@ namespace ARMBuildAttrs { v7 = 10, // e.g. Cortex A8, Cortex M3 v6_M = 11, // e.g. Cortex M1 v6S_M = 12, // v6_M with the System extensions - v7E_M = 13 // v7_M with DSP extensions + v7E_M = 13, // v7_M with DSP extensions + v8 = 14 // v8, AArch32 }; - enum CPUArchProfile { // (=7), uleb128 + enum CPUArchProfile { // (=7), uleb128 Not_Applicable = 0, // pre v7, or cross-profile code ApplicationProfile = (0x41), // 'A' (e.g. for Cortex A8) RealTimeProfile = (0x52), // 'R' (e.g. for Cortex R4) @@ -101,31 +104,67 @@ namespace ARMBuildAttrs { }; // The following have a lot of common use cases - enum { - //ARMISAUse (=8), uleb128 and THUMBISAUse (=9), uleb128 + enum { Not_Allowed = 0, Allowed = 1, - // FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10) + // Tag_ARM_ISA_use (=8), uleb128 + + // Tag_THUMB_ISA_use, (=9), uleb128 + AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions) + + // Tag_FP_arch (=10), uleb128 (formerly Tag_VFP_arch = 10) AllowFPv2 = 2, // v2 FP ISA permitted (implies use of the v1 FP ISA) AllowFPv3A = 3, // v3 FP ISA permitted (implies use of the v2 FP ISA) - AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 - AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) + AllowFPv3B = 4, // v3 FP ISA permitted, but only D0-D15, S0-S31 + AllowFPv4A = 5, // v4 FP ISA permitted (implies use of v3 FP ISA) AllowFPv4B = 6, // v4 FP ISA was permitted, but only D0-D15, S0-S31 + AllowFPARMv8A = 7, // Use of the ARM v8-A FP ISA was permitted + AllowFPARMv8B = 8, // Use of the ARM v8-A FP ISA was permitted, but only D0-D15, S0-S31 // Tag_WMMX_arch, (=11), uleb128 - AllowThumb32 = 2, // 32-bit Thumb (implies 16-bit instructions) - - // Tag_WMMX_arch, (=11), uleb128 - AllowWMMXv1 = 2, // The user permitted this entity to use WMMX v2 + AllowWMMXv1 = 1, // The user permitted this entity to use WMMX v1 + AllowWMMXv2 = 2, // The user permitted this entity to use WMMX v2 + + // Tag_Advanced_SIMD_arch, (=12), uleb128 + AllowNeon = 1, // SIMDv1 was permitted + AllowNeon2 = 2, // SIMDv2 was permitted (Half-precision FP, MAC operations) + AllowNeonARMv8 = 3, // ARM v8-A SIMD was permitted - // Tag_ABI_FP_denormal, (=20), uleb128 + // Tag_ABI_FP_denormal, (=20), uleb128 PreserveFPSign = 2, // sign when flushed-to-zero is preserved // Tag_ABI_FP_number_model, (=23), uleb128 AllowRTABI = 2, // numbers, infinities, and one quiet NaN (see [RTABI]) - AllowIEE754 = 3 // this code to use all the IEEE 754-defined FP encodings + AllowIEE754 = 3, // this code to use all the IEEE 754-defined FP encodings + + // Tag_ABI_HardFP_use, (=27), uleb128 + HardFPImplied = 0, // FP use should be implied by Tag_FP_arch + HardFPSinglePrecision = 1, // Single-precision only + + // Tag_ABI_VFP_args, (=28), uleb128 + BaseAAPCS = 0, + HardFPAAPCS = 1, + + // Tag_FP_HP_extension, (=36), uleb128 + AllowHPFP = 1, // Allow use of Half Precision FP + + // Tag_MPextension_use, (=42), uleb128 + AllowMP = 1, // Allow use of MP extensions + + // Tag_DIV_use, (=44), uleb128 + AllowDIVIfExists = 0, // Allow hardware divide if available in arch, or no info exists. + DisallowDIV = 1, // Hardware divide explicitly disallowed + AllowDIVExt = 2, // Allow hardware divide as optional architecture extension above + // the base arch specified by Tag_CPU_arch and Tag_CPU_arch_profile. + + // Tag_Virtualization_use, (=68), uleb128 + AllowTZ = 1, + AllowVirtualization = 2, + AllowTZVirtualization = 3 }; -} + +} // namespace ARMBuildAttrs +} // namespace llvm #endif // __TARGET_ARMBUILDATTRS_H__ diff --git a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td index 8ff666e..9bea4b2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCallingConv.td +++ b/contrib/llvm/lib/Target/ARM/ARMCallingConv.td @@ -207,10 +207,24 @@ def CSR_AAPCS_ThisReturn : CalleeSavedRegs<(add LR, R11, R10, R9, R8, R7, R6, def CSR_iOS : CalleeSavedRegs<(add LR, R7, R6, R5, R4, (sub CSR_AAPCS, R9))>; def CSR_iOS_ThisReturn : CalleeSavedRegs<(add LR, R7, R6, R5, R4, - (sub CSR_AAPCS_ThisReturn, R9))>; + (sub CSR_AAPCS_ThisReturn, R9))>; + +// The "interrupt" attribute is used to generate code that is acceptable in +// exception-handlers of various kinds. It makes us use a different return +// instruction (handled elsewhere) and affects which registers we must return to +// our "caller" in the same state as we receive them. + +// For most interrupts, all registers except SP and LR are shared with +// user-space. We mark LR to be saved anyway, since this is what the ARM backend +// generally does rather than tracking its liveness as a normal register. +def CSR_GenericInt : CalleeSavedRegs<(add LR, (sequence "R%u", 12, 0))>; + +// The fast interrupt handlers have more private state and get their own copies +// of R8-R12, in addition to SP and LR. As before, mark LR for saving too. + +// FIXME: we mark R11 as callee-saved since it's often the frame-pointer, and +// current frame lowering expects to encounter it while processing callee-saved +// registers. +def CSR_FIQ : CalleeSavedRegs<(add LR, R11, (sequence "R%u", 7, 0))>; + -// GHC set of callee saved regs is empty as all those regs are -// used for passing STG regs around -// add is a workaround for not being able to compile empty list: -// def CSR_GHC : CalleeSavedRegs<()>; -def CSR_GHC : CalleeSavedRegs<(add)>; diff --git a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp index 5e8e173..568ca85 100644 --- a/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMCodeEmitter.cpp @@ -167,6 +167,8 @@ namespace { const { return 0; } unsigned NEONThumb2DupPostEncoder(const MachineInstr &MI,unsigned Val) const { return 0; } + unsigned NEONThumb2V8PostEncoder(const MachineInstr &MI,unsigned Val) + const { return 0; } unsigned VFPThumb2PostEncoder(const MachineInstr&MI, unsigned Val) const { return 0; } unsigned getAdrLabelOpValue(const MachineInstr &MI, unsigned Op) @@ -1044,8 +1046,8 @@ void ARMCodeEmitter::emitDataProcessingInstruction(const MachineInstr &MI, return; } else if ((MCID.Opcode == ARM::BFC) || (MCID.Opcode == ARM::BFI)) { uint32_t v = ~MI.getOperand(2).getImm(); - int32_t lsb = CountTrailingZeros_32(v); - int32_t msb = (32 - CountLeadingZeros_32(v)) - 1; + int32_t lsb = countTrailingZeros(v); + int32_t msb = (32 - countLeadingZeros(v)) - 1; // Instr{20-16} = msb, Instr{11-7} = lsb Binary |= (msb & 0x1F) << 16; Binary |= (lsb & 0x1F) << 7; diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp index 4891609..cff5ce2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantIslandPass.cpp @@ -128,7 +128,7 @@ namespace { // If the block size isn't a multiple of the known bits, assume the // worst case padding. if (Size & ((1u << Bits) - 1)) - Bits = CountTrailingZeros_32(Size); + Bits = countTrailingZeros(Size); return Bits; } @@ -753,6 +753,7 @@ initializeFunctionInfo(const std::vector &CPEMIs) { Scale = 4; break; + case ARM::LDRBi12: case ARM::LDRi12: case ARM::LDRcp: case ARM::t2LDRpci: diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp index 4e703ec..7d41c69 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.cpp @@ -163,21 +163,7 @@ const BlockAddress *ARMConstantPoolConstant::getBlockAddress() const { int ARMConstantPoolConstant::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; - const std::vector Constants = CP->getConstants(); - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - if (Constants[i].isMachineConstantPoolEntry() && - (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - ARMConstantPoolConstant *APC = dyn_cast(CPV); - if (!APC) continue; - if (APC->CVal == CVal && equals(APC)) - return i; - } - } - - return -1; + return getExistingMachineCPValueImpl(CP, Alignment); } bool ARMConstantPoolConstant::hasSameValue(ARMConstantPoolValue *ACPV) { @@ -216,22 +202,7 @@ ARMConstantPoolSymbol::Create(LLVMContext &C, const char *s, int ARMConstantPoolSymbol::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; - const std::vector Constants = CP->getConstants(); - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - if (Constants[i].isMachineConstantPoolEntry() && - (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - ARMConstantPoolSymbol *APS = dyn_cast(CPV); - if (!APS) continue; - - if (APS->S == S && equals(APS)) - return i; - } - } - - return -1; + return getExistingMachineCPValueImpl(CP, Alignment); } bool ARMConstantPoolSymbol::hasSameValue(ARMConstantPoolValue *ACPV) { @@ -271,22 +242,7 @@ ARMConstantPoolMBB *ARMConstantPoolMBB::Create(LLVMContext &C, int ARMConstantPoolMBB::getExistingMachineCPValue(MachineConstantPool *CP, unsigned Alignment) { - unsigned AlignMask = Alignment - 1; - const std::vector Constants = CP->getConstants(); - for (unsigned i = 0, e = Constants.size(); i != e; ++i) { - if (Constants[i].isMachineConstantPoolEntry() && - (Constants[i].getAlignment() & AlignMask) == 0) { - ARMConstantPoolValue *CPV = - (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; - ARMConstantPoolMBB *APMBB = dyn_cast(CPV); - if (!APMBB) continue; - - if (APMBB->MBB == MBB && equals(APMBB)) - return i; - } - } - - return -1; + return getExistingMachineCPValueImpl(CP, Alignment); } bool ARMConstantPoolMBB::hasSameValue(ARMConstantPoolValue *ACPV) { diff --git a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h index 93812fe..7ae7bf4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h +++ b/contrib/llvm/lib/Target/ARM/ARMConstantPoolValue.h @@ -15,6 +15,7 @@ #define LLVM_TARGET_ARM_CONSTANTPOOLVALUE_H #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/Support/Casting.h" #include "llvm/Support/ErrorHandling.h" #include @@ -64,6 +65,26 @@ protected: ARMConstantPoolValue(LLVMContext &C, unsigned id, ARMCP::ARMCPKind Kind, unsigned char PCAdj, ARMCP::ARMCPModifier Modifier, bool AddCurrentAddress); + + template + int getExistingMachineCPValueImpl(MachineConstantPool *CP, + unsigned Alignment) { + unsigned AlignMask = Alignment - 1; + const std::vector &Constants = CP->getConstants(); + for (unsigned i = 0, e = Constants.size(); i != e; ++i) { + if (Constants[i].isMachineConstantPoolEntry() && + (Constants[i].getAlignment() & AlignMask) == 0) { + ARMConstantPoolValue *CPV = + (ARMConstantPoolValue *)Constants[i].Val.MachineCPVal; + if (Derived *APC = dyn_cast(CPV)) + if (cast(this)->equals(APC)) + return i; + } + } + + return -1; + } + public: virtual ~ARMConstantPoolValue(); @@ -156,6 +177,10 @@ public: static bool classof(const ARMConstantPoolValue *APV) { return APV->isGlobalValue() || APV->isBlockAddress() || APV->isLSDA(); } + + bool equals(const ARMConstantPoolConstant *A) const { + return CVal == A->CVal && ARMConstantPoolValue::equals(A); + } }; /// ARMConstantPoolSymbol - ARM-specific constantpool values for external @@ -187,6 +212,10 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isExtSymbol(); } + + bool equals(const ARMConstantPoolSymbol *A) const { + return S == A->S && ARMConstantPoolValue::equals(A); + } }; /// ARMConstantPoolMBB - ARM-specific constantpool value of a machine basic @@ -219,6 +248,10 @@ public: static bool classof(const ARMConstantPoolValue *ACPV) { return ACPV->isMachineBasicBlock(); } + + bool equals(const ARMConstantPoolMBB *A) const { + return MBB == A->MBB && ARMConstantPoolValue::equals(A); + } }; } // End llvm namespace diff --git a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp index beb843c..e6f7f86 100644 --- a/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMExpandPseudoInsts.cpp @@ -692,10 +692,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned newOpc = Opcode == ARM::VMOVScc ? ARM::VMOVS : ARM::VMOVD; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(newOpc), MI.getOperand(1).getReg()) - .addReg(MI.getOperand(2).getReg(), - getKillRegState(MI.getOperand(2).isKill())) + .addOperand(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) // 'pred' - .addReg(MI.getOperand(4).getReg()); + .addOperand(MI.getOperand(4)); MI.eraseFromParent(); return true; @@ -705,10 +704,9 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, unsigned Opc = AFI->isThumbFunction() ? ARM::t2MOVr : ARM::MOVr; BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) - .addReg(MI.getOperand(2).getReg(), - getKillRegState(MI.getOperand(2).isKill())) + .addOperand(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) // 'pred' - .addReg(MI.getOperand(4).getReg()) + .addOperand(MI.getOperand(4)) .addReg(0); // 's' bit MI.eraseFromParent(); @@ -717,39 +715,36 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVCCsi: { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), (MI.getOperand(1).getReg())) - .addReg(MI.getOperand(2).getReg(), - getKillRegState(MI.getOperand(2).isKill())) + .addOperand(MI.getOperand(2)) .addImm(MI.getOperand(3).getImm()) .addImm(MI.getOperand(4).getImm()) // 'pred' - .addReg(MI.getOperand(5).getReg()) + .addOperand(MI.getOperand(5)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } - case ARM::MOVCCsr: { BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsr), (MI.getOperand(1).getReg())) - .addReg(MI.getOperand(2).getReg(), - getKillRegState(MI.getOperand(2).isKill())) - .addReg(MI.getOperand(3).getReg(), - getKillRegState(MI.getOperand(3).isKill())) + .addOperand(MI.getOperand(2)) + .addOperand(MI.getOperand(3)) .addImm(MI.getOperand(4).getImm()) .addImm(MI.getOperand(5).getImm()) // 'pred' - .addReg(MI.getOperand(6).getReg()) + .addOperand(MI.getOperand(6)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } + case ARM::t2MOVCCi16: case ARM::MOVCCi16: { - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVi16), + unsigned NewOpc = AFI->isThumbFunction() ? ARM::t2MOVi16 : ARM::MOVi16; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' - .addReg(MI.getOperand(4).getReg()); - + .addOperand(MI.getOperand(4)); MI.eraseFromParent(); return true; } @@ -760,23 +755,47 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' - .addReg(MI.getOperand(4).getReg()) + .addOperand(MI.getOperand(4)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } + case ARM::t2MVNCCi: case ARM::MVNCCi: { - BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MVNi), + unsigned Opc = AFI->isThumbFunction() ? ARM::t2MVNi : ARM::MVNi; + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(Opc), MI.getOperand(1).getReg()) .addImm(MI.getOperand(2).getImm()) .addImm(MI.getOperand(3).getImm()) // 'pred' - .addReg(MI.getOperand(4).getReg()) + .addOperand(MI.getOperand(4)) .addReg(0); // 's' bit MI.eraseFromParent(); return true; } + case ARM::t2MOVCClsl: + case ARM::t2MOVCClsr: + case ARM::t2MOVCCasr: + case ARM::t2MOVCCror: { + unsigned NewOpc; + switch (Opcode) { + case ARM::t2MOVCClsl: NewOpc = ARM::t2LSLri; break; + case ARM::t2MOVCClsr: NewOpc = ARM::t2LSRri; break; + case ARM::t2MOVCCasr: NewOpc = ARM::t2ASRri; break; + case ARM::t2MOVCCror: NewOpc = ARM::t2RORri; break; + default: llvm_unreachable("unexpeced conditional move"); + } + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(NewOpc), + MI.getOperand(1).getReg()) + .addOperand(MI.getOperand(2)) + .addImm(MI.getOperand(3).getImm()) + .addImm(MI.getOperand(4).getImm()) // 'pred' + .addOperand(MI.getOperand(5)) + .addReg(0); // 's' bit + MI.eraseFromParent(); + return true; + } case ARM::Int_eh_sjlj_dispatchsetup: { MachineFunction &MF = *MI.getParent()->getParent(); const ARMBaseInstrInfo *AII = @@ -823,7 +842,7 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, case ARM::MOVsrl_flag: case ARM::MOVsra_flag: { - // These are just fancy MOVs insructions. + // These are just fancy MOVs instructions. AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::MOVsi), MI.getOperand(0).getReg()) .addOperand(MI.getOperand(1)) @@ -938,6 +957,18 @@ bool ARMExpandPseudo::ExpandMI(MachineBasicBlock &MBB, ExpandMOV32BitImm(MBB, MBBI); return true; + case ARM::SUBS_PC_LR: { + MachineInstrBuilder MIB = + BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::SUBri), ARM::PC) + .addReg(ARM::LR) + .addOperand(MI.getOperand(0)) + .addOperand(MI.getOperand(1)) + .addOperand(MI.getOperand(2)) + .addReg(ARM::CPSR, RegState::Undef); + TransferImpOps(MI, MIB, MIB); + MI.eraseFromParent(); + return true; + } case ARM::VLDMQIA: { unsigned NewOpc = ARM::VLDMDIA; MachineInstrBuilder MIB = diff --git a/contrib/llvm/lib/Target/ARM/ARMFPUName.def b/contrib/llvm/lib/Target/ARM/ARMFPUName.def new file mode 100644 index 0000000..9a1bbe7 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMFPUName.def @@ -0,0 +1,32 @@ +//===-- ARMFPUName.def - List of the ARM FPU names --------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the list of the supported ARM FPU names. +// +//===----------------------------------------------------------------------===// + +// NOTE: NO INCLUDE GUARD DESIRED! + +#ifndef ARM_FPU_NAME +#error "You must define ARM_FPU_NAME(NAME, ID) before including ARMFPUName.h" +#endif + +ARM_FPU_NAME("vfp", VFP) +ARM_FPU_NAME("vfpv2", VFPV2) +ARM_FPU_NAME("vfpv3", VFPV3) +ARM_FPU_NAME("vfpv3-d16", VFPV3_D16) +ARM_FPU_NAME("vfpv4", VFPV4) +ARM_FPU_NAME("vfpv4-d16", VFPV4_D16) +ARM_FPU_NAME("fp-armv8", FP_ARMV8) +ARM_FPU_NAME("neon", NEON) +ARM_FPU_NAME("neon-vfpv4", NEON_VFPV4) +ARM_FPU_NAME("neon-fp-armv8", NEON_FP_ARMV8) +ARM_FPU_NAME("crypto-neon-fp-armv8", CRYPTO_NEON_FP_ARMV8) + +#undef ARM_FPU_NAME diff --git a/contrib/llvm/lib/Target/ARM/ARMFPUName.h b/contrib/llvm/lib/Target/ARM/ARMFPUName.h new file mode 100644 index 0000000..2a64cce --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMFPUName.h @@ -0,0 +1,26 @@ +//===-- ARMFPUName.h - List of the ARM FPU names ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef ARMFPUNAME_H +#define ARMFPUNAME_H + +namespace llvm { +namespace ARM { + +enum FPUKind { + INVALID_FPU = 0 + +#define ARM_FPU_NAME(NAME, ID) , ID +#include "ARMFPUName.def" +}; + +} // namespace ARM +} // namespace llvm + +#endif // ARMFPUNAME_H diff --git a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp index 5d45f64..a4004f3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFastISel.cpp @@ -20,6 +20,7 @@ #include "ARMSubtarget.h" #include "ARMTargetMachine.h" #include "MCTargetDesc/ARMAddressingModes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" @@ -175,6 +176,8 @@ class ARMFastISel : public FastISel { // Utility routines. private: + unsigned constrainOperandRegClass(const MCInstrDesc &II, unsigned OpNum, + unsigned Op); bool isTypeLegal(Type *Ty, MVT &VT); bool isLoadTypeLegal(Type *Ty, MVT &VT); bool ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, @@ -251,10 +254,10 @@ bool ARMFastISel::DefinesOptionalPredicate(MachineInstr *MI, bool *CPSR) { bool ARMFastISel::isARMNEONPred(const MachineInstr *MI) { const MCInstrDesc &MCID = MI->getDesc(); - // If we're a thumb2 or not NEON function we were handled via isPredicable. + // If we're a thumb2 or not NEON function we'll be handled via isPredicable. if ((MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainNEON || AFI->isThumb2Function()) - return false; + return MI->isPredicable(); for (unsigned i = 0, e = MCID.getNumOperands(); i != e; ++i) if (MCID.OpInfo[i].isPredicate()) @@ -275,7 +278,7 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { // Do we use a predicate? or... // Are we NEON in ARM mode and have a predicate operand? If so, I know // we're not predicable but add it anyways. - if (TII.isPredicable(MI) || isARMNEONPred(MI)) + if (isARMNEONPred(MI)) AddDefaultPred(MIB); // Do we optionally set a predicate? Preds is size > 0 iff the predicate @@ -290,6 +293,23 @@ ARMFastISel::AddOptionalDefs(const MachineInstrBuilder &MIB) { return MIB; } +unsigned ARMFastISel::constrainOperandRegClass(const MCInstrDesc &II, + unsigned Op, unsigned OpNum) { + if (TargetRegisterInfo::isVirtualRegister(Op)) { + const TargetRegisterClass *RegClass = + TII.getRegClass(II, OpNum, &TRI, *FuncInfo.MF); + if (!MRI.constrainRegClass(Op, RegClass)) { + // If it's not legal to COPY between the register classes, something + // has gone very wrong before we got here. + unsigned NewOp = createResultReg(RegClass); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, + TII.get(TargetOpcode::COPY), NewOp).addReg(Op)); + return NewOp; + } + } + return Op; +} + unsigned ARMFastISel::FastEmitInst_(unsigned MachineInstOpcode, const TargetRegisterClass* RC) { unsigned ResultReg = createResultReg(RC); @@ -305,6 +325,9 @@ unsigned ARMFastISel::FastEmitInst_r(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill)); @@ -325,6 +348,11 @@ unsigned ARMFastISel::FastEmitInst_rr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -348,6 +376,12 @@ unsigned ARMFastISel::FastEmitInst_rrr(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); + Op2 = constrainOperandRegClass(II, Op1, 3); + if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -372,6 +406,9 @@ unsigned ARMFastISel::FastEmitInst_ri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -394,6 +431,9 @@ unsigned ARMFastISel::FastEmitInst_rf(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operand is sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -417,6 +457,10 @@ unsigned ARMFastISel::FastEmitInst_rri(unsigned MachineInstOpcode, unsigned ResultReg = createResultReg(RC); const MCInstrDesc &II = TII.get(MachineInstOpcode); + // Make sure the input operands are sufficiently constrained to be legal + // for this instruction. + Op0 = constrainOperandRegClass(II, Op0, 1); + Op1 = constrainOperandRegClass(II, Op1, 2); if (II.getNumDefs() >= 1) { AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II, ResultReg) .addReg(Op0, Op0IsKill * RegState::Kill) @@ -609,6 +653,7 @@ unsigned ARMFastISel::ARMMaterializeInt(const Constant *C, MVT VT) { .addConstantPoolIndex(Idx)); else // The extra immediate is for addrmode2. + DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) @@ -628,6 +673,11 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { (const TargetRegisterClass*)&ARM::GPRRegClass; unsigned DestReg = createResultReg(RC); + // FastISel TLS support on non-Darwin is broken, punt to SelectionDAG. + const GlobalVariable *GVar = dyn_cast(GV); + bool IsThreadLocal = GVar && GVar->isThreadLocal(); + if (!Subtarget->isTargetDarwin() && IsThreadLocal) return 0; + // Use movw+movt when possible, it avoids constant pool entries. // Darwin targets don't support movt with Reloc::Static, see // ARMTargetLowering::LowerGlobalAddressDarwin. Other targets only support @@ -679,6 +729,7 @@ unsigned ARMFastISel::ARMMaterializeGV(const GlobalValue *GV, MVT VT) { AddOptionalDefs(MIB); } else { // The extra immediate is for addrmode2. + DestReg = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg, 0); MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg) .addConstantPoolIndex(Idx) @@ -814,22 +865,19 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { switch (Opcode) { default: break; - case Instruction::BitCast: { + case Instruction::BitCast: // Look through bitcasts. return ARMComputeAddress(U->getOperand(0), Addr); - } - case Instruction::IntToPtr: { + case Instruction::IntToPtr: // Look past no-op inttoptrs. if (TLI.getValueType(U->getOperand(0)->getType()) == TLI.getPointerTy()) return ARMComputeAddress(U->getOperand(0), Addr); break; - } - case Instruction::PtrToInt: { + case Instruction::PtrToInt: // Look past no-op ptrtoints. if (TLI.getValueType(U->getType()) == TLI.getPointerTy()) return ARMComputeAddress(U->getOperand(0), Addr); break; - } case Instruction::GetElementPtr: { Address SavedAddr = Addr; int TmpOffset = Addr.Offset; @@ -852,13 +900,8 @@ bool ARMFastISel::ARMComputeAddress(const Value *Obj, Address &Addr) { TmpOffset += CI->getSExtValue() * S; break; } - if (isa(Op) && - (!isa(Op) || - FuncInfo.MBBMap[cast(Op)->getParent()] - == FuncInfo.MBB) && - isa(cast(Op)->getOperand(1))) { - // An add (in the same block) with a constant operand. Fold the - // constant. + if (canFoldAddIntoGEP(U, Op)) { + // A compatible add with a constant operand. Fold the constant. ConstantInt *CI = cast(cast(Op)->getOperand(1)); TmpOffset += CI->getSExtValue() * S; @@ -1025,7 +1068,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, useAM3 = true; } } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i16: if (Alignment && Alignment < 2 && !Subtarget->allowsUnalignedMem()) @@ -1040,7 +1083,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, Opc = isZExt ? ARM::LDRH : ARM::LDRSH; useAM3 = true; } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::i32: if (Alignment && Alignment < 4 && !Subtarget->allowsUnalignedMem()) @@ -1054,7 +1097,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, } else { Opc = ARM::LDRi12; } - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; break; case MVT::f32: if (!Subtarget->hasVFP2()) return false; @@ -1063,7 +1106,7 @@ bool ARMFastISel::ARMEmitLoad(MVT VT, unsigned &ResultReg, Address &Addr, needVMOV = true; VT = MVT::i32; Opc = isThumb2 ? ARM::t2LDRi12 : ARM::LDRi12; - RC = &ARM::GPRRegClass; + RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; } else { Opc = ARM::VLDRS; RC = TLI.getRegClassFor(VT); @@ -1136,6 +1179,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, (const TargetRegisterClass*)&ARM::tGPRRegClass : (const TargetRegisterClass*)&ARM::GPRRegClass); unsigned Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; + SrcReg = constrainOperandRegClass(TII.get(Opc), SrcReg, 1); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), Res) .addReg(SrcReg).addImm(1)); @@ -1207,6 +1251,7 @@ bool ARMFastISel::ARMEmitStore(MVT VT, unsigned SrcReg, Address &Addr, ARMSimplifyAddress(Addr, VT, useAM3); // Create the base instruction, then add the operands. + SrcReg = constrainOperandRegClass(TII.get(StrOpc), SrcReg, 0); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(StrOpc)) .addReg(SrcReg); @@ -1330,6 +1375,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { (isLoadTypeLegal(TI->getOperand(0)->getType(), SourceVT))) { unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; unsigned OpReg = getRegForValue(TI->getOperand(0)); + OpReg = constrainOperandRegClass(TII.get(TstOpc), OpReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) .addReg(OpReg).addImm(1)); @@ -1367,6 +1413,7 @@ bool ARMFastISel::SelectBranch(const Instruction *I) { // and it left a value for us in a virtual register. Ergo, we test // the one-bit value left in the virtual register. unsigned TstOpc = isThumb2 ? ARM::t2TSTri : ARM::TSTri; + CmpReg = constrainOperandRegClass(TII.get(TstOpc), CmpReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(TstOpc)) .addReg(CmpReg).addImm(1)); @@ -1491,13 +1538,15 @@ bool ARMFastISel::ARMEmitCmp(const Value *Src1Value, const Value *Src2Value, } } + const MCInstrDesc &II = TII.get(CmpOpc); + SrcReg1 = constrainOperandRegClass(II, SrcReg1, 0); if (!UseImm) { - AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, - TII.get(CmpOpc)) + SrcReg2 = constrainOperandRegClass(II, SrcReg2, 1); + AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(SrcReg1).addReg(SrcReg2)); } else { MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) + MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, II) .addReg(SrcReg1); // Only add immediate for icmp as the immediate for fcmp is an implicit 0.0. @@ -1696,6 +1745,7 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { } unsigned CmpOpc = isThumb2 ? ARM::t2CMPri : ARM::CMPri; + CondReg = constrainOperandRegClass(TII.get(CmpOpc), CondReg, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CmpOpc)) .addReg(CondReg).addImm(0)); @@ -1712,12 +1762,16 @@ bool ARMFastISel::SelectSelect(const Instruction *I) { MovCCOpc = isThumb2 ? ARM::t2MVNCCi : ARM::MVNCCi; } unsigned ResultReg = createResultReg(RC); - if (!UseImm) + if (!UseImm) { + Op2Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op2Reg, 1); + Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 2); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) .addReg(Op2Reg).addReg(Op1Reg).addImm(ARMCC::NE).addReg(ARM::CPSR); - else + } else { + Op1Reg = constrainOperandRegClass(TII.get(MovCCOpc), Op1Reg, 1); BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(MovCCOpc), ResultReg) .addReg(Op1Reg).addImm(Imm).addImm(ARMCC::EQ).addReg(ARM::CPSR); + } UpdateValueMap(I, ResultReg); return true; } @@ -1802,7 +1856,9 @@ bool ARMFastISel::SelectBinaryIntOp(const Instruction *I, unsigned ISDOpcode) { unsigned SrcReg2 = getRegForValue(I->getOperand(1)); if (SrcReg2 == 0) return false; - unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass); + SrcReg1 = constrainOperandRegClass(TII.get(Opc), SrcReg1, 1); + SrcReg2 = constrainOperandRegClass(TII.get(Opc), SrcReg2, 2); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) .addReg(SrcReg1).addReg(SrcReg2)); @@ -1930,7 +1986,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, !VA.isRegLoc() || !ArgLocs[++i].isRegLoc()) return false; } else { - switch (static_cast(ArgVT).getSimpleVT().SimpleTy) { + switch (ArgVT.SimpleTy) { default: return false; case MVT::i1: @@ -1985,7 +2041,7 @@ bool ARMFastISel::ProcessCallArgs(SmallVectorImpl &Args, case CCValAssign::ZExt: { MVT DestVT = VA.getLocVT(); Arg = ARMEmitIntExt(ArgVT, Arg, DestVT, /*isZExt*/true); - assert (Arg != 0 && "Failed to emit a sext"); + assert (Arg != 0 && "Failed to emit a zext"); ArgVT = DestVT; break; } @@ -2182,10 +2238,14 @@ unsigned ARMFastISel::ARMSelectCallOp(bool UseReg) { } unsigned ARMFastISel::getLibcallReg(const Twine &Name) { + // Manually compute the global's type to avoid building it when unnecessary. + Type *GVTy = Type::getInt32PtrTy(*Context, /*AS=*/0); + EVT LCREVT = TLI.getValueType(GVTy); + if (!LCREVT.isSimple()) return 0; + GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, GlobalValue::ExternalLinkage, 0, Name); - EVT LCREVT = TLI.getValueType(GV->getType()); - if (!LCREVT.isSimple()) return 0; + assert(GV->getType() == GVTy && "We miscomputed the type for the global!"); return ARMMaterializeGV(GV, LCREVT.getSimpleVT()); } @@ -2403,15 +2463,22 @@ bool ARMFastISel::SelectCall(const Instruction *I, MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(CallOpc)); + unsigned char OpFlags = 0; + + // Add MO_PLT for global address or external symbol in the PIC relocation + // model. + if (Subtarget->isTargetELF() && TM.getRelocationModel() == Reloc::PIC_) + OpFlags = ARMII::MO_PLT; + // ARM calls don't take a predicate, but tBL / tBLX do. if(isThumb2) AddDefaultPred(MIB); if (UseReg) MIB.addReg(CalleeReg); else if (!IntrMemName) - MIB.addGlobalAddress(GV, 0, 0); + MIB.addGlobalAddress(GV, 0, OpFlags); else - MIB.addExternalSymbol(IntrMemName, 0); + MIB.addExternalSymbol(IntrMemName, OpFlags); // Add implicit physical register uses to the call. for (unsigned i = 0, e = RegArgs.size(); i != e; ++i) @@ -2602,47 +2669,136 @@ unsigned ARMFastISel::ARMEmitIntExt(MVT SrcVT, unsigned SrcReg, MVT DestVT, bool isZExt) { if (DestVT != MVT::i32 && DestVT != MVT::i16 && DestVT != MVT::i8) return 0; + if (SrcVT != MVT::i16 && SrcVT != MVT::i8 && SrcVT != MVT::i1) + return 0; - unsigned Opc; - bool isBoolZext = false; - const TargetRegisterClass *RC; - switch (SrcVT.SimpleTy) { - default: return 0; - case MVT::i16: - if (!Subtarget->hasV6Ops()) return 0; - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; - if (isZExt) - Opc = isThumb2 ? ARM::t2UXTH : ARM::UXTH; - else - Opc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; - break; - case MVT::i8: - if (!Subtarget->hasV6Ops()) return 0; - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRnopcRegClass; - if (isZExt) - Opc = isThumb2 ? ARM::t2UXTB : ARM::UXTB; - else - Opc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; - break; - case MVT::i1: - if (isZExt) { - RC = isThumb2 ? &ARM::rGPRRegClass : &ARM::GPRRegClass; - Opc = isThumb2 ? ARM::t2ANDri : ARM::ANDri; - isBoolZext = true; - break; + // Table of which combinations can be emitted as a single instruction, + // and which will require two. + static const uint8_t isSingleInstrTbl[3][2][2][2] = { + // ARM Thumb + // !hasV6Ops hasV6Ops !hasV6Ops hasV6Ops + // ext: s z s z s z s z + /* 1 */ { { { 0, 1 }, { 0, 1 } }, { { 0, 0 }, { 0, 1 } } }, + /* 8 */ { { { 0, 1 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } }, + /* 16 */ { { { 0, 0 }, { 1, 1 } }, { { 0, 0 }, { 1, 1 } } } + }; + + // Target registers for: + // - For ARM can never be PC. + // - For 16-bit Thumb are restricted to lower 8 registers. + // - For 32-bit Thumb are restricted to non-SP and non-PC. + static const TargetRegisterClass *RCTbl[2][2] = { + // Instructions: Two Single + /* ARM */ { &ARM::GPRnopcRegClass, &ARM::GPRnopcRegClass }, + /* Thumb */ { &ARM::tGPRRegClass, &ARM::rGPRRegClass } + }; + + // Table governing the instruction(s) to be emitted. + static const struct InstructionTable { + uint32_t Opc : 16; + uint32_t hasS : 1; // Some instructions have an S bit, always set it to 0. + uint32_t Shift : 7; // For shift operand addressing mode, used by MOVsi. + uint32_t Imm : 8; // All instructions have either a shift or a mask. + } IT[2][2][3][2] = { + { // Two instructions (first is left shift, second is in this table). + { // ARM Opc S Shift Imm + /* 1 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 31 }, + /* 1 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 31 } }, + /* 8 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 24 }, + /* 8 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 24 } }, + /* 16 bit sext */ { { ARM::MOVsi , 1, ARM_AM::asr , 16 }, + /* 16 bit zext */ { ARM::MOVsi , 1, ARM_AM::lsr , 16 } } + }, + { // Thumb Opc S Shift Imm + /* 1 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 31 }, + /* 1 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 31 } }, + /* 8 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 24 }, + /* 8 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 24 } }, + /* 16 bit sext */ { { ARM::tASRri , 0, ARM_AM::no_shift, 16 }, + /* 16 bit zext */ { ARM::tLSRri , 0, ARM_AM::no_shift, 16 } } + } + }, + { // Single instruction. + { // ARM Opc S Shift Imm + /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 }, + /* 1 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 1 } }, + /* 8 bit sext */ { { ARM::SXTB , 0, ARM_AM::no_shift, 0 }, + /* 8 bit zext */ { ARM::ANDri , 1, ARM_AM::no_shift, 255 } }, + /* 16 bit sext */ { { ARM::SXTH , 0, ARM_AM::no_shift, 0 }, + /* 16 bit zext */ { ARM::UXTH , 0, ARM_AM::no_shift, 0 } } + }, + { // Thumb Opc S Shift Imm + /* 1 bit sext */ { { ARM::KILL , 0, ARM_AM::no_shift, 0 }, + /* 1 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 1 } }, + /* 8 bit sext */ { { ARM::t2SXTB , 0, ARM_AM::no_shift, 0 }, + /* 8 bit zext */ { ARM::t2ANDri, 1, ARM_AM::no_shift, 255 } }, + /* 16 bit sext */ { { ARM::t2SXTH , 0, ARM_AM::no_shift, 0 }, + /* 16 bit zext */ { ARM::t2UXTH , 0, ARM_AM::no_shift, 0 } } + } } - return 0; + }; + + unsigned SrcBits = SrcVT.getSizeInBits(); + unsigned DestBits = DestVT.getSizeInBits(); + (void) DestBits; + assert((SrcBits < DestBits) && "can only extend to larger types"); + assert((DestBits == 32 || DestBits == 16 || DestBits == 8) && + "other sizes unimplemented"); + assert((SrcBits == 16 || SrcBits == 8 || SrcBits == 1) && + "other sizes unimplemented"); + + bool hasV6Ops = Subtarget->hasV6Ops(); + unsigned Bitness = SrcBits / 8; // {1,8,16}=>{0,1,2} + assert((Bitness < 3) && "sanity-check table bounds"); + + bool isSingleInstr = isSingleInstrTbl[Bitness][isThumb2][hasV6Ops][isZExt]; + const TargetRegisterClass *RC = RCTbl[isThumb2][isSingleInstr]; + const InstructionTable *ITP = &IT[isSingleInstr][isThumb2][Bitness][isZExt]; + unsigned Opc = ITP->Opc; + assert(ARM::KILL != Opc && "Invalid table entry"); + unsigned hasS = ITP->hasS; + ARM_AM::ShiftOpc Shift = (ARM_AM::ShiftOpc) ITP->Shift; + assert(((Shift == ARM_AM::no_shift) == (Opc != ARM::MOVsi)) && + "only MOVsi has shift operand addressing mode"); + unsigned Imm = ITP->Imm; + + // 16-bit Thumb instructions always set CPSR (unless they're in an IT block). + bool setsCPSR = &ARM::tGPRRegClass == RC; + unsigned LSLOpc = isThumb2 ? ARM::tLSLri : ARM::MOVsi; + unsigned ResultReg; + // MOVsi encodes shift and immediate in shift operand addressing mode. + // The following condition has the same value when emitting two + // instruction sequences: both are shifts. + bool ImmIsSO = (Shift != ARM_AM::no_shift); + + // Either one or two instructions are emitted. + // They're always of the form: + // dst = in OP imm + // CPSR is set only by 16-bit Thumb instructions. + // Predicate, if any, is AL. + // S bit, if available, is always 0. + // When two are emitted the first's result will feed as the second's input, + // that value is then dead. + unsigned NumInstrsEmitted = isSingleInstr ? 1 : 2; + for (unsigned Instr = 0; Instr != NumInstrsEmitted; ++Instr) { + ResultReg = createResultReg(RC); + bool isLsl = (0 == Instr) && !isSingleInstr; + unsigned Opcode = isLsl ? LSLOpc : Opc; + ARM_AM::ShiftOpc ShiftAM = isLsl ? ARM_AM::lsl : Shift; + unsigned ImmEnc = ImmIsSO ? ARM_AM::getSORegOpc(ShiftAM, Imm) : Imm; + bool isKill = 1 == Instr; + MachineInstrBuilder MIB = BuildMI( + *FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opcode), ResultReg); + if (setsCPSR) + MIB.addReg(ARM::CPSR, RegState::Define); + SrcReg = constrainOperandRegClass(TII.get(Opcode), SrcReg, 1 + setsCPSR); + AddDefaultPred(MIB.addReg(SrcReg, isKill * RegState::Kill).addImm(ImmEnc)); + if (hasS) + AddDefaultCC(MIB); + // Second instruction consumes the first's result. + SrcReg = ResultReg; } - unsigned ResultReg = createResultReg(RC); - MachineInstrBuilder MIB; - MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), ResultReg) - .addReg(SrcReg); - if (isBoolZext) - MIB.addImm(1); - else - MIB.addImm(0); - AddOptionalDefs(MIB); return ResultReg; } @@ -2707,7 +2863,7 @@ bool ARMFastISel::SelectShift(const Instruction *I, if (Reg2 == 0) return false; } - unsigned ResultReg = createResultReg(TLI.getRegClassFor(MVT::i32)); + unsigned ResultReg = createResultReg(&ARM::GPRnopcRegClass); if(ResultReg == 0) return false; MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, @@ -2797,6 +2953,25 @@ bool ARMFastISel::TargetSelectInstruction(const Instruction *I) { return false; } +namespace { +// This table describes sign- and zero-extend instructions which can be +// folded into a preceding load. All of these extends have an immediate +// (sometimes a mask and sometimes a shift) that's applied after +// extension. +const struct FoldableLoadExtendsStruct { + uint16_t Opc[2]; // ARM, Thumb. + uint8_t ExpectedImm; + uint8_t isZExt : 1; + uint8_t ExpectedVT : 7; +} FoldableLoadExtends[] = { + { { ARM::SXTH, ARM::t2SXTH }, 0, 0, MVT::i16 }, + { { ARM::UXTH, ARM::t2UXTH }, 0, 1, MVT::i16 }, + { { ARM::ANDri, ARM::t2ANDri }, 255, 1, MVT::i8 }, + { { ARM::SXTB, ARM::t2SXTB }, 0, 0, MVT::i8 }, + { { ARM::UXTB, ARM::t2UXTB }, 0, 1, MVT::i8 } +}; +} + /// \brief The specified machine instr operand is a vreg, and that /// vreg is being provided by the specified load instruction. If possible, /// try to fold the load as an operand to the instruction, returning true if @@ -2812,26 +2987,23 @@ bool ARMFastISel::tryToFoldLoadIntoMI(MachineInstr *MI, unsigned OpNo, // ldrb r1, [r0] ldrb r1, [r0] // uxtb r2, r1 => // mov r3, r2 mov r3, r1 - bool isZExt = true; - switch(MI->getOpcode()) { - default: return false; - case ARM::SXTH: - case ARM::t2SXTH: - isZExt = false; - case ARM::UXTH: - case ARM::t2UXTH: - if (VT != MVT::i16) - return false; - break; - case ARM::SXTB: - case ARM::t2SXTB: - isZExt = false; - case ARM::UXTB: - case ARM::t2UXTB: - if (VT != MVT::i8) - return false; - break; + if (MI->getNumOperands() < 3 || !MI->getOperand(2).isImm()) + return false; + const uint64_t Imm = MI->getOperand(2).getImm(); + + bool Found = false; + bool isZExt; + for (unsigned i = 0, e = array_lengthof(FoldableLoadExtends); + i != e; ++i) { + if (FoldableLoadExtends[i].Opc[isThumb2] == MI->getOpcode() && + (uint64_t)FoldableLoadExtends[i].ExpectedImm == Imm && + MVT((MVT::SimpleValueType)FoldableLoadExtends[i].ExpectedVT) == VT) { + Found = true; + isZExt = FoldableLoadExtends[i].isZExt; + } } + if (!Found) return false; + // See if we can handle this address. Address Addr; if (!ARMComputeAddress(LI->getOperand(0), Addr)) return false; @@ -2854,12 +3026,14 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, unsigned DestReg1 = createResultReg(TLI.getRegClassFor(VT)); // Load value. if (isThumb2) { + DestReg1 = constrainOperandRegClass(TII.get(ARM::t2LDRpci), DestReg1, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::t2LDRpci), DestReg1) .addConstantPoolIndex(Idx)); Opc = UseGOTOFF ? ARM::t2ADDrr : ARM::t2LDRs; } else { // The extra immediate is for addrmode2. + DestReg1 = constrainOperandRegClass(TII.get(ARM::LDRcp), DestReg1, 0); AddOptionalDefs(BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(ARM::LDRcp), DestReg1) .addConstantPoolIndex(Idx).addImm(0)); @@ -2873,6 +3047,9 @@ unsigned ARMFastISel::ARMLowerPICELF(const GlobalValue *GV, } unsigned DestReg2 = createResultReg(TLI.getRegClassFor(VT)); + DestReg2 = constrainOperandRegClass(TII.get(Opc), DestReg2, 0); + DestReg1 = constrainOperandRegClass(TII.get(Opc), DestReg1, 1); + GlobalBaseReg = constrainOperandRegClass(TII.get(Opc), GlobalBaseReg, 2); MachineInstrBuilder MIB = BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DL, TII.get(Opc), DestReg2) .addReg(DestReg1) @@ -2938,12 +3115,10 @@ bool ARMFastISel::FastLowerArguments() { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; - const TargetRegisterClass *RC = TLI.getRegClassFor(MVT::i32); + const TargetRegisterClass *RC = &ARM::rGPRRegClass; Idx = 0; for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I, ++Idx) { - if (I->use_empty()) - continue; unsigned SrcReg = GPRArgRegs[Idx]; unsigned DstReg = FuncInfo.MF->addLiveIn(SrcReg, RC); // FIXME: Unfortunately it's necessary to emit a copy from the livein copy. @@ -2961,13 +3136,23 @@ bool ARMFastISel::FastLowerArguments() { namespace llvm { FastISel *ARM::createFastISel(FunctionLoweringInfo &funcInfo, const TargetLibraryInfo *libInfo) { - // Completely untested on non-iOS. const TargetMachine &TM = funcInfo.MF->getTarget(); - // Darwin and thumb1 only for now. const ARMSubtarget *Subtarget = &TM.getSubtarget(); - if (Subtarget->isTargetIOS() && !Subtarget->isThumb1Only()) + // Thumb2 support on iOS; ARM support on iOS, Linux and NaCl. + bool UseFastISel = false; + UseFastISel |= Subtarget->isTargetIOS() && !Subtarget->isThumb1Only(); + UseFastISel |= Subtarget->isTargetLinux() && !Subtarget->isThumb(); + UseFastISel |= Subtarget->isTargetNaCl() && !Subtarget->isThumb(); + + if (UseFastISel) { + // iOS always has a FP for backtracking, force other targets + // to keep their FP when doing FastISel. The emitted code is + // currently superior, and in cases like test-suite's lencod + // FastISel isn't quite correct when FP is eliminated. + TM.Options.NoFramePointerElim = true; return new ARMFastISel(funcInfo, libInfo); + } return 0; } } diff --git a/contrib/llvm/lib/Target/ARM/ARMFeatures.h b/contrib/llvm/lib/Target/ARM/ARMFeatures.h new file mode 100644 index 0000000..dafc4b3 --- /dev/null +++ b/contrib/llvm/lib/Target/ARM/ARMFeatures.h @@ -0,0 +1,93 @@ +//===-- ARMFeatures.h - Checks for ARM instruction features ------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file contains the code shared between ARM CodeGen and ARM MC +// +//===----------------------------------------------------------------------===// + +#ifndef TARGET_ARM_FEATURES_H +#define TARGET_ARM_FEATURES_H + +#include "ARM.h" + +namespace llvm { + +template // could be MachineInstr or MCInst +inline bool isV8EligibleForIT(InstrType *Instr, int BLXOperandIndex = 0) { + switch (Instr->getOpcode()) { + default: + return false; + case ARM::tADC: + case ARM::tADDi3: + case ARM::tADDi8: + case ARM::tADDrSPi: + case ARM::tADDrr: + case ARM::tAND: + case ARM::tASRri: + case ARM::tASRrr: + case ARM::tBIC: + case ARM::tCMNz: + case ARM::tCMPi8: + case ARM::tCMPr: + case ARM::tEOR: + case ARM::tLDRBi: + case ARM::tLDRBr: + case ARM::tLDRHi: + case ARM::tLDRHr: + case ARM::tLDRSB: + case ARM::tLDRSH: + case ARM::tLDRi: + case ARM::tLDRr: + case ARM::tLDRspi: + case ARM::tLSLri: + case ARM::tLSLrr: + case ARM::tLSRri: + case ARM::tLSRrr: + case ARM::tMOVi8: + case ARM::tMUL: + case ARM::tMVN: + case ARM::tORR: + case ARM::tROR: + case ARM::tRSB: + case ARM::tSBC: + case ARM::tSTRBi: + case ARM::tSTRBr: + case ARM::tSTRHi: + case ARM::tSTRHr: + case ARM::tSTRi: + case ARM::tSTRr: + case ARM::tSTRspi: + case ARM::tSUBi3: + case ARM::tSUBi8: + case ARM::tSUBrr: + case ARM::tTST: + return true; +// there are some "conditionally deprecated" opcodes + case ARM::tADDspr: + return Instr->getOperand(2).getReg() != ARM::PC; + // ADD PC, SP and BLX PC were always unpredictable, + // now on top of it they're deprecated + case ARM::tADDrSP: + case ARM::tBX: + return Instr->getOperand(0).getReg() != ARM::PC; + case ARM::tBLXr: + return Instr->getOperand(BLXOperandIndex).getReg() != ARM::PC; + case ARM::tADDhirr: + return Instr->getOperand(0).getReg() != ARM::PC && + Instr->getOperand(2).getReg() != ARM::PC; + case ARM::tCMPhir: + case ARM::tMOVr: + return Instr->getOperand(0).getReg() != ARM::PC && + Instr->getOperand(1).getReg() != ARM::PC; + } +} + +} + +#endif diff --git a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp index 483802b..d32bdbc 100644 --- a/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMFrameLowering.cpp @@ -82,22 +82,11 @@ ARMFrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const { return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects(); } -static bool isCalleeSavedRegister(unsigned Reg, const uint16_t *CSRegs) { - for (unsigned i = 0; CSRegs[i]; ++i) - if (Reg == CSRegs[i]) - return true; - return false; -} - static bool isCSRestore(MachineInstr *MI, const ARMBaseInstrInfo &TII, const uint16_t *CSRegs) { // Integer spill area is handled with "pop". - if (MI->getOpcode() == ARM::LDMIA_RET || - MI->getOpcode() == ARM::t2LDMIA_RET || - MI->getOpcode() == ARM::LDMIA_UPD || - MI->getOpcode() == ARM::t2LDMIA_UPD || - MI->getOpcode() == ARM::VLDMDIA_UPD) { + if (isPopOpcode(MI->getOpcode())) { // The first two operands are predicates. The last two are // imp-def and imp-use of SP. Check everything in between. for (int i = 5, e = MI->getNumOperands(); i != e; ++i) @@ -115,20 +104,31 @@ static bool isCSRestore(MachineInstr *MI, return false; } -static void -emitSPUpdate(bool isARM, - MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI, - DebugLoc dl, const ARMBaseInstrInfo &TII, - int NumBytes, unsigned MIFlags = MachineInstr::NoFlags, - ARMCC::CondCodes Pred = ARMCC::AL, unsigned PredReg = 0) { +static void emitRegPlusImmediate(bool isARM, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + const ARMBaseInstrInfo &TII, unsigned DestReg, + unsigned SrcReg, int NumBytes, + unsigned MIFlags = MachineInstr::NoFlags, + ARMCC::CondCodes Pred = ARMCC::AL, + unsigned PredReg = 0) { if (isARM) - emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, + emitARMRegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, Pred, PredReg, TII, MIFlags); else - emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, ARM::SP, NumBytes, + emitT2RegPlusImmediate(MBB, MBBI, dl, DestReg, SrcReg, NumBytes, Pred, PredReg, TII, MIFlags); } +static void emitSPUpdate(bool isARM, MachineBasicBlock &MBB, + MachineBasicBlock::iterator &MBBI, DebugLoc dl, + const ARMBaseInstrInfo &TII, int NumBytes, + unsigned MIFlags = MachineInstr::NoFlags, + ARMCC::CondCodes Pred = ARMCC::AL, + unsigned PredReg = 0) { + emitRegPlusImmediate(isARM, MBB, MBBI, dl, TII, ARM::SP, ARM::SP, NumBytes, + MIFlags, Pred, PredReg); +} + void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { MachineBasicBlock &MBB = MF.front(); MachineBasicBlock::iterator MBBI = MBB.begin(); @@ -141,7 +141,8 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { assert(!AFI->isThumb1OnlyFunction() && "This emitPrologue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); unsigned NumBytes = MFI->getStackSize(); const std::vector &CSI = MFI->getCalleeSavedInfo(); DebugLoc dl = MBBI != MBB.end() ? MBBI->getDebugLoc() : DebugLoc(); @@ -174,6 +175,10 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { unsigned Reg = CSI[i].getReg(); int FI = CSI[i].getFrameIdx(); switch (Reg) { + case ARM::R0: + case ARM::R1: + case ARM::R2: + case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: @@ -181,73 +186,61 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { case ARM::LR: if (Reg == FramePtr) FramePtrSpillFI = FI; - AFI->addGPRCalleeSavedArea1Frame(FI); GPRCS1Size += 4; break; case ARM::R8: case ARM::R9: case ARM::R10: case ARM::R11: + case ARM::R12: if (Reg == FramePtr) FramePtrSpillFI = FI; - if (STI.isTargetIOS()) { - AFI->addGPRCalleeSavedArea2Frame(FI); + if (STI.isTargetIOS()) GPRCS2Size += 4; - } else { - AFI->addGPRCalleeSavedArea1Frame(FI); + else GPRCS1Size += 4; - } break; default: // This is a DPR. Exclude the aligned DPRCS2 spills. if (Reg == ARM::D8) D8SpillFI = FI; - if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) { - AFI->addDPRCalleeSavedAreaFrame(FI); + if (Reg < ARM::D8 || Reg >= ARM::D8 + AFI->getNumAlignedDPRCS2Regs()) DPRCSSize += 8; - } } } // Move past area 1. - if (GPRCS1Size > 0) MBBI++; - - // Set FP to point to the stack slot that contains the previous FP. - // For iOS, FP is R7, which has now been stored in spill area 1. - // Otherwise, if this is not iOS, all the callee-saved registers go - // into spill area 1, including the FP in R11. In either case, it is - // now safe to emit this assignment. - bool HasFP = hasFP(MF); - if (HasFP) { - unsigned ADDriOpc = !AFI->isThumbFunction() ? ARM::ADDri : ARM::t2ADDri; - MachineInstrBuilder MIB = - BuildMI(MBB, MBBI, dl, TII.get(ADDriOpc), FramePtr) - .addFrameIndex(FramePtrSpillFI).addImm(0) - .setMIFlag(MachineInstr::FrameSetup); - AddDefaultCC(AddDefaultPred(MIB)); - } - - // Move past area 2. - if (GPRCS2Size > 0) MBBI++; + MachineBasicBlock::iterator LastPush = MBB.end(), FramePtrPush; + if (GPRCS1Size > 0) + FramePtrPush = LastPush = MBBI++; // Determine starting offsets of spill areas. + bool HasFP = hasFP(MF); unsigned DPRCSOffset = NumBytes - (GPRCS1Size + GPRCS2Size + DPRCSSize); unsigned GPRCS2Offset = DPRCSOffset + DPRCSSize; unsigned GPRCS1Offset = GPRCS2Offset + GPRCS2Size; - if (HasFP) + int FramePtrOffsetInPush = 0; + if (HasFP) { + FramePtrOffsetInPush = MFI->getObjectOffset(FramePtrSpillFI) + GPRCS1Size; AFI->setFramePtrSpillOffset(MFI->getObjectOffset(FramePtrSpillFI) + NumBytes); + } AFI->setGPRCalleeSavedArea1Offset(GPRCS1Offset); AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset); AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset); + // Move past area 2. + if (GPRCS2Size > 0) { + LastPush = MBBI++; + } + // Move past area 3. if (DPRCSSize > 0) { - MBBI++; + LastPush = MBBI++; // Since vpush register list cannot have gaps, there may be multiple vpush // instructions in the prologue. while (MBBI->getOpcode() == ARM::VSTMDDB_UPD) - MBBI++; + LastPush = MBBI++; } // Move past the aligned DPRCS2 area. @@ -263,8 +256,13 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { if (NumBytes) { // Adjust SP after all the callee-save spills. - emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, - MachineInstr::FrameSetup); + if (tryFoldSPUpdateIntoPushPop(MF, LastPush, NumBytes)) { + if (LastPush == FramePtrPush) + FramePtrOffsetInPush += NumBytes; + } else + emitSPUpdate(isARM, MBB, MBBI, dl, TII, -NumBytes, + MachineInstr::FrameSetup); + if (HasFP && isARM) // Restore from fp only in ARM mode: e.g. sub sp, r7, #24 // Note it's not safe to do this in Thumb2 mode because it would have @@ -277,6 +275,18 @@ void ARMFrameLowering::emitPrologue(MachineFunction &MF) const { AFI->setShouldRestoreSPFromFP(true); } + // Set FP to point to the stack slot that contains the previous FP. + // For iOS, FP is R7, which has now been stored in spill area 1. + // Otherwise, if this is not iOS, all the callee-saved registers go + // into spill area 1, including the FP in R11. In either case, it + // is in area one and the adjustment needs to take place just after + // that push. + if (HasFP) + emitRegPlusImmediate(!AFI->isThumbFunction(), MBB, ++FramePtrPush, dl, TII, + FramePtr, ARM::SP, FramePtrOffsetInPush, + MachineInstr::FrameSetup); + + if (STI.isTargetELF() && hasFP(MF)) MFI->setOffsetAdjustment(MFI->getOffsetAdjustment() - AFI->getFramePtrSpillOffset()); @@ -357,7 +367,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, "This emitEpilogue does not support Thumb1!"); bool isARM = !AFI->isThumbFunction(); - unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(); + unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); + unsigned ArgRegsSaveSize = AFI->getArgRegsSaveSize(Align); int NumBytes = (int)MFI->getStackSize(); unsigned FramePtr = RegInfo->getFrameRegister(MF); @@ -371,11 +382,11 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); } else { // Unwind MBBI to point to first LDR / VLDRD. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); if (MBBI != MBB.begin()) { - do + do { --MBBI; - while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); + } while (MBBI != MBB.begin() && isCSRestore(MBBI, TII, CSRegs)); if (!isCSRestore(MBBI, TII, CSRegs)) ++MBBI; } @@ -419,8 +430,8 @@ void ARMFrameLowering::emitEpilogue(MachineFunction &MF, ARM::SP) .addReg(FramePtr)); } - } else if (NumBytes) - emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); + } else if (NumBytes && !tryFoldSPUpdateIntoPushPop(MF, MBBI, NumBytes)) + emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes); // Increment past our save areas. if (AFI->getDPRCalleeSavedAreaSize()) { @@ -499,12 +510,6 @@ ARMFrameLowering::ResolveFrameIndexReference(const MachineFunction &MF, FrameReg = ARM::SP; Offset += SPAdj; - if (AFI->isGPRCalleeSavedArea1Frame(FI)) - return Offset - AFI->getGPRCalleeSavedArea1Offset(); - else if (AFI->isGPRCalleeSavedArea2Frame(FI)) - return Offset - AFI->getGPRCalleeSavedArea2Offset(); - else if (AFI->isDPRCalleeSavedAreaFrame(FI)) - return Offset - AFI->getDPRCalleeSavedAreaOffset(); // SP can move around if there are allocas. We may also lose track of SP // when emergency spilling inside a non-reserved call frame setup. @@ -656,6 +661,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, unsigned RetOpcode = MI->getOpcode(); bool isTailCall = (RetOpcode == ARM::TCRETURNdi || RetOpcode == ARM::TCRETURNri); + bool isInterrupt = + RetOpcode == ARM::SUBS_PC_LR || RetOpcode == ARM::t2SUBS_PC_LR; SmallVector Regs; unsigned i = CSI.size(); @@ -670,7 +677,8 @@ void ARMFrameLowering::emitPopInst(MachineBasicBlock &MBB, if (Reg >= ARM::D8 && Reg < ARM::D8 + NumAlignedDPRCS2Regs) continue; - if (Reg == ARM::LR && !isTailCall && !isVarArg && STI.hasV5TOps()) { + if (Reg == ARM::LR && !isTailCall && !isVarArg && !isInterrupt && + STI.hasV5TOps()) { Reg = ARM::PC; LdmOpc = AFI->isThumbFunction() ? ARM::t2LDMIA_RET : ARM::LDMIA_RET; // Fold the return instruction into the LDM. @@ -1197,7 +1205,7 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, // Don't spill FP if the frame can be eliminated. This is determined // by scanning the callee-save registers to see if any is used. - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&MF); for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; bool Spilled = false; @@ -1224,6 +1232,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, case ARM::LR: LRSpilled = true; // Fallthrough + case ARM::R0: case ARM::R1: + case ARM::R2: case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: CS1Spilled = true; @@ -1238,6 +1248,8 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, } switch (Reg) { + case ARM::R0: case ARM::R1: + case ARM::R2: case ARM::R3: case ARM::R4: case ARM::R5: case ARM::R6: case ARM::R7: case ARM::LR: @@ -1293,8 +1305,12 @@ ARMFrameLowering::processFunctionBeforeCalleeSavedScan(MachineFunction &MF, if (!LRSpilled && CS1Spilled) { MRI.setPhysRegUsed(ARM::LR); NumGPRSpills++; - UnspilledCS1GPRs.erase(std::find(UnspilledCS1GPRs.begin(), - UnspilledCS1GPRs.end(), (unsigned)ARM::LR)); + SmallVectorImpl::iterator LRPos; + LRPos = std::find(UnspilledCS1GPRs.begin(), UnspilledCS1GPRs.end(), + (unsigned)ARM::LR); + if (LRPos != UnspilledCS1GPRs.end()) + UnspilledCS1GPRs.erase(LRPos); + ForceLRSpill = false; ExtraCSSpill = true; } diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp index 1240169..c69d313 100644 --- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.cpp @@ -44,10 +44,16 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (LastMI && (MCID.TSFlags & ARMII::DomainMask) != ARMII::DomainGeneral) { MachineInstr *DefMI = LastMI; const MCInstrDesc &LastMCID = LastMI->getDesc(); + const TargetMachine &TM = + MI->getParent()->getParent()->getTarget(); + const ARMBaseInstrInfo &TII = + *static_cast(TM.getInstrInfo()); + // Skip over one non-VFP / NEON instruction. if (!LastMI->isBarrier() && // On A9, AGU and NEON/FPU are muxed. - !(STI.isLikeA9() && (LastMI->mayLoad() || LastMI->mayStore())) && + !(TII.getSubtarget().isLikeA9() && + (LastMI->mayLoad() || LastMI->mayStore())) && (LastMCID.TSFlags & ARMII::DomainMask) == ARMII::DomainGeneral) { MachineBasicBlock::iterator I = LastMI; if (I != LastMI->getParent()->begin()) { @@ -58,7 +64,7 @@ ARMHazardRecognizer::getHazardType(SUnit *SU, int Stalls) { if (TII.isFpMLxInstruction(DefMI->getOpcode()) && (TII.canCauseFpMLxStall(MI->getOpcode()) || - hasRAWHazard(DefMI, MI, TRI))) { + hasRAWHazard(DefMI, MI, TII.getRegisterInfo()))) { // Try to schedule another instruction for the next 4 cycles. if (FpMLxStalls == 0) FpMLxStalls = 4; diff --git a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h index 98bfc4c..e1dcec3 100644 --- a/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h +++ b/contrib/llvm/lib/Target/ARM/ARMHazardRecognizer.h @@ -28,21 +28,14 @@ class MachineInstr; /// ARM preRA scheduler uses an unspecialized instance of the /// ScoreboardHazardRecognizer. class ARMHazardRecognizer : public ScoreboardHazardRecognizer { - const ARMBaseInstrInfo &TII; - const ARMBaseRegisterInfo &TRI; - const ARMSubtarget &STI; - MachineInstr *LastMI; unsigned FpMLxStalls; public: ARMHazardRecognizer(const InstrItineraryData *ItinData, - const ARMBaseInstrInfo &tii, - const ARMBaseRegisterInfo &tri, - const ARMSubtarget &sti, - const ScheduleDAG *DAG) : - ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), TII(tii), - TRI(tri), STI(sti), LastMI(0) {} + const ScheduleDAG *DAG) + : ScoreboardHazardRecognizer(ItinData, DAG, "post-RA-sched"), + LastMI(0) {} virtual HazardType getHazardType(SUnit *SU, int Stalls); virtual void Reset(); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp index 5e88e95..87d1522 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -61,7 +61,6 @@ enum AddrMode2Type { class ARMDAGToDAGISel : public SelectionDAGISel { ARMBaseTargetMachine &TM; - const ARMBaseInstrInfo *TII; /// Subtarget - Keep a pointer to the ARMSubtarget around so that we can /// make the right decision when generating code for different targets. @@ -71,7 +70,6 @@ public: explicit ARMDAGToDAGISel(ARMBaseTargetMachine &tm, CodeGenOpt::Level OptLevel) : SelectionDAGISel(tm, OptLevel), TM(tm), - TII(static_cast(TM.getInstrInfo())), Subtarget(&TM.getSubtarget()) { } @@ -132,6 +130,13 @@ public: return true; } + bool SelectCMOVPred(SDValue N, SDValue &Pred, SDValue &Reg) { + const ConstantSDNode *CN = cast(N); + Pred = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32); + Reg = CurDAG->getRegister(ARM::CPSR, MVT::i32); + return true; + } + bool SelectAddrMode2OffsetReg(SDNode *Op, SDValue N, SDValue &Offset, SDValue &Opc); bool SelectAddrMode2OffsetImm(SDNode *Op, SDValue N, @@ -177,6 +182,7 @@ public: SDValue &OffImm); bool SelectT2AddrModeSoReg(SDValue N, SDValue &Base, SDValue &OffReg, SDValue &ShImm); + bool SelectT2AddrModeExclusive(SDValue N, SDValue &Base, SDValue &OffImm); inline bool is_so_imm(unsigned Imm) const { return ARM_AM::getSOImmVal(Imm) != -1; @@ -240,21 +246,6 @@ private: /// SelectV6T2BitfieldExtractOp - Select SBFX/UBFX instructions for ARM. SDNode *SelectV6T2BitfieldExtractOp(SDNode *N, bool isSigned); - /// SelectCMOVOp - Select CMOV instructions for ARM. - SDNode *SelectCMOVOp(SDNode *N); - SDNode *SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - SDNode *SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - SDNode *SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - SDNode *SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, - SDValue InFlag); - // Select special operations if node forms integer ABS pattern SDNode *SelectABSOp(SDNode *N); @@ -262,7 +253,7 @@ private: SDNode *SelectConcatVector(SDNode *N); - SDNode *SelectAtomic64(SDNode *Node, unsigned Opc); + SDNode *SelectAtomic(SDNode *N, unsigned Op8, unsigned Op16, unsigned Op32, unsigned Op64); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. @@ -364,7 +355,7 @@ void ARMDAGToDAGISel::PreprocessISelDAG() { continue; // Check if the AND mask is an immediate of the form: 000.....1111111100 - unsigned TZ = CountTrailingZeros_32(And_imm); + unsigned TZ = countTrailingZeros(And_imm); if (TZ != 1 && TZ != 2) // Be conservative here. Shifter operands aren't always free. e.g. On // Swift, left shifter operand of 1 / 2 for free but others are not. @@ -402,12 +393,12 @@ void ARMDAGToDAGISel::PreprocessISelDAG() { } // Now make the transformation. - Srl = CurDAG->getNode(ISD::SRL, Srl.getDebugLoc(), MVT::i32, + Srl = CurDAG->getNode(ISD::SRL, SDLoc(Srl), MVT::i32, Srl.getOperand(0), CurDAG->getConstant(Srl_imm+TZ, MVT::i32)); - N1 = CurDAG->getNode(ISD::AND, N1.getDebugLoc(), MVT::i32, + N1 = CurDAG->getNode(ISD::AND, SDLoc(N1), MVT::i32, Srl, CurDAG->getConstant(And_imm, MVT::i32)); - N1 = CurDAG->getNode(ISD::SHL, N1.getDebugLoc(), MVT::i32, + N1 = CurDAG->getNode(ISD::SHL, SDLoc(N1), MVT::i32, N1, CurDAG->getConstant(TZ, MVT::i32)); CurDAG->UpdateNodeOperands(N, N0, N1); } @@ -423,7 +414,7 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (!CheckVMLxHazard) return true; - if (!Subtarget->isCortexA8() && !Subtarget->isLikeA9() && + if (!Subtarget->isCortexA8() && !Subtarget->isCortexA9() && !Subtarget->isSwift()) return true; @@ -434,6 +425,9 @@ bool ARMDAGToDAGISel::hasNoVMLxHazardUse(SDNode *N) const { if (Use->getOpcode() == ISD::CopyToReg) return true; if (Use->isMachineOpcode()) { + const ARMBaseInstrInfo *TII = + static_cast(TM.getInstrInfo()); + const MCInstrDesc &MCID = TII->get(Use->getMachineOpcode()); if (MCID.mayStore()) return true; @@ -533,7 +527,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -557,7 +552,8 @@ bool ARMDAGToDAGISel::SelectAddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -703,7 +699,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -724,7 +721,8 @@ AddrMode2Type ARMDAGToDAGISel::SelectAddrMode2Worker(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -901,7 +899,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); Opc = CurDAG->getTargetConstant(ARM_AM::getAM3Opc(ARM_AM::add, 0),MVT::i32); @@ -915,7 +914,8 @@ bool ARMDAGToDAGISel::SelectAddrMode3(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } Offset = CurDAG->getRegister(0, MVT::i32); @@ -960,7 +960,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N; if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } else if (N.getOpcode() == ARMISD::Wrapper && !(Subtarget->useMovt() && N.getOperand(0).getOpcode() == ISD::TargetGlobalAddress)) { @@ -978,7 +979,8 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } ARM_AM::AddrOpc AddSub = ARM_AM::add; @@ -1202,7 +1204,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, SDValue &Base, SDValue &OffImm) { if (N.getOpcode() == ISD::FrameIndex) { int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1219,7 +1222,8 @@ bool ARMDAGToDAGISel::SelectThumbAddrModeSP(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1267,7 +1271,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, if (N.getOpcode() == ISD::FrameIndex) { // Match frame index. int FI = cast(N)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); OffImm = CurDAG->getTargetConstant(0, MVT::i32); return true; } @@ -1297,7 +1302,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm12(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1326,7 +1332,8 @@ bool ARMDAGToDAGISel::SelectT2AddrModeImm8(SDValue N, Base = N.getOperand(0); if (Base.getOpcode() == ISD::FrameIndex) { int FI = cast(Base)->getIndex(); - Base = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + Base = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); } OffImm = CurDAG->getTargetConstant(RHSC, MVT::i32); return true; @@ -1403,6 +1410,34 @@ bool ARMDAGToDAGISel::SelectT2AddrModeSoReg(SDValue N, return true; } +bool ARMDAGToDAGISel::SelectT2AddrModeExclusive(SDValue N, SDValue &Base, + SDValue &OffImm) { + // This *must* succeed since it's used for the irreplacable ldrex and strex + // instructions. + Base = N; + OffImm = CurDAG->getTargetConstant(0, MVT::i32); + + if (N.getOpcode() != ISD::ADD || !CurDAG->isBaseWithConstantOffset(N)) + return true; + + ConstantSDNode *RHS = dyn_cast(N.getOperand(1)); + if (!RHS) + return true; + + uint32_t RHSC = (int)RHS->getZExtValue(); + if (RHSC > 1020 || RHSC % 4 != 0) + return true; + + Base = N.getOperand(0); + if (Base.getOpcode() == ISD::FrameIndex) { + int FI = cast(Base)->getIndex(); + Base = CurDAG->getTargetFrameIndex(FI, getTargetLowering()->getPointerTy()); + } + + OffImm = CurDAG->getTargetConstant(RHSC / 4, MVT::i32); + return true; +} + //===--------------------------------------------------------------------===// /// getAL - Returns a ARMCC::AL immediate node. @@ -1468,14 +1503,14 @@ SDNode *ARMDAGToDAGISel::SelectARMIndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, + return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, MVT::Other, Ops); } else { SDValue Chain = LD->getChain(); SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, AMOpc, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, + return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, MVT::Other, Ops); } } @@ -1524,7 +1559,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { SDValue Base = LD->getBasePtr(); SDValue Ops[]= { Base, Offset, getAL(CurDAG), CurDAG->getRegister(0, MVT::i32), Chain }; - return CurDAG->getMachineNode(Opcode, N->getDebugLoc(), MVT::i32, MVT::i32, + return CurDAG->getMachineNode(Opcode, SDLoc(N), MVT::i32, MVT::i32, MVT::Other, Ops); } @@ -1533,7 +1568,7 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDNode *N) { /// \brief Form a GPRPair pseudo register from a pair of GPR regs. SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::GPRPairRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::gsub_0, MVT::i32); @@ -1544,7 +1579,7 @@ SDNode *ARMDAGToDAGISel::createGPRPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form a D register from a pair of S registers. SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::DPR_VFP2RegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); @@ -1555,7 +1590,7 @@ SDNode *ARMDAGToDAGISel::createSRegPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form a quad register from a pair of D registers. SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); @@ -1565,7 +1600,7 @@ SDNode *ARMDAGToDAGISel::createDRegPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form 4 consecutive D registers from a pair of Q registers. SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); @@ -1576,7 +1611,7 @@ SDNode *ARMDAGToDAGISel::createQRegPairNode(EVT VT, SDValue V0, SDValue V1) { /// \brief Form 4 consecutive S registers. SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QPR_VFP2RegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::ssub_0, MVT::i32); @@ -1591,7 +1626,7 @@ SDNode *ARMDAGToDAGISel::createQuadSRegsNode(EVT VT, SDValue V0, SDValue V1, /// \brief Form 4 consecutive D registers. SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::dsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::dsub_1, MVT::i32); @@ -1605,7 +1640,7 @@ SDNode *ARMDAGToDAGISel::createQuadDRegsNode(EVT VT, SDValue V0, SDValue V1, /// \brief Form 4 consecutive Q registers. SDNode *ARMDAGToDAGISel::createQuadQRegsNode(EVT VT, SDValue V0, SDValue V1, SDValue V2, SDValue V3) { - DebugLoc dl = V0.getNode()->getDebugLoc(); + SDLoc dl(V0.getNode()); SDValue RegClass = CurDAG->getTargetConstant(ARM::QQQQPRRegClassID, MVT::i32); SDValue SubReg0 = CurDAG->getTargetConstant(ARM::qsub_0, MVT::i32); SDValue SubReg1 = CurDAG->getTargetConstant(ARM::qsub_1, MVT::i32); @@ -1689,7 +1724,7 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *QOpcodes0, const uint16_t *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VLD NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; @@ -1821,7 +1856,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *QOpcodes0, const uint16_t *QOpcodes1) { assert(NumVecs >= 1 && NumVecs <= 4 && "VST NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; @@ -1966,7 +2001,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDNode *N, bool IsLoad, const uint16_t *DOpcodes, const uint16_t *QOpcodes) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDSTLane NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; unsigned AddrOpIdx = isUpdating ? 1 : 2; @@ -2084,7 +2119,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, unsigned NumVecs, const uint16_t *Opcodes) { assert(NumVecs >=2 && NumVecs <= 4 && "VLDDup NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue MemAddr, Align; if (!SelectAddrMode6(N, N->getOperand(1), MemAddr, Align)) @@ -2166,7 +2201,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDDup(SDNode *N, bool isUpdating, SDNode *ARMDAGToDAGISel::SelectVTBL(SDNode *N, bool IsExt, unsigned NumVecs, unsigned Opc) { assert(NumVecs >= 2 && NumVecs <= 4 && "VTBL NumVecs out-of-range"); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); unsigned FirstTblReg = IsExt ? 2 : 1; @@ -2278,204 +2313,6 @@ SDNode *ARMDAGToDAGISel::SelectV6T2BitfieldExtractOp(SDNode *N, return NULL; } -SDNode *ARMDAGToDAGISel:: -SelectT2CMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - SDValue CPTmp0; - SDValue CPTmp1; - if (SelectT2ShifterOperandReg(TrueVal, CPTmp0, CPTmp1)) { - unsigned SOVal = cast(CPTmp1)->getZExtValue(); - unsigned SOShOp = ARM_AM::getSORegShOp(SOVal); - unsigned Opc = 0; - switch (SOShOp) { - case ARM_AM::lsl: Opc = ARM::t2MOVCClsl; break; - case ARM_AM::lsr: Opc = ARM::t2MOVCClsr; break; - case ARM_AM::asr: Opc = ARM::t2MOVCCasr; break; - case ARM_AM::ror: Opc = ARM::t2MOVCCror; break; - default: - llvm_unreachable("Unknown so_reg opcode!"); - } - SDValue SOShImm = - CurDAG->getTargetConstant(ARM_AM::getSORegOffset(SOVal), MVT::i32); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, CPTmp0, SOShImm, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32,Ops, 6); - } - return 0; -} - -SDNode *ARMDAGToDAGISel:: -SelectARMCMOVShiftOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - SDValue CPTmp0; - SDValue CPTmp1; - SDValue CPTmp2; - if (SelectImmShifterOperand(TrueVal, CPTmp0, CPTmp2)) { - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp2, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, ARM::MOVCCsi, MVT::i32, Ops, 6); - } - - if (SelectRegShifterOperand(TrueVal, CPTmp0, CPTmp1, CPTmp2)) { - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, CPTmp0, CPTmp1, CPTmp2, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, ARM::MOVCCsr, MVT::i32, Ops, 7); - } - return 0; -} - -SDNode *ARMDAGToDAGISel:: -SelectT2CMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - ConstantSDNode *T = dyn_cast(TrueVal); - if (!T) - return 0; - - unsigned Opc = 0; - unsigned TrueImm = T->getZExtValue(); - if (is_t2_so_imm(TrueImm)) { - Opc = ARM::t2MOVCCi; - } else if (TrueImm <= 0xffff) { - Opc = ARM::t2MOVCCi16; - } else if (is_t2_so_imm_not(TrueImm)) { - TrueImm = ~TrueImm; - Opc = ARM::t2MVNCCi; - } else if (TrueVal.getNode()->hasOneUse() && Subtarget->hasV6T2Ops()) { - // Large immediate. - Opc = ARM::t2MOVCCi32imm; - } - - if (Opc) { - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); - } - - return 0; -} - -SDNode *ARMDAGToDAGISel:: -SelectARMCMOVImmOp(SDNode *N, SDValue FalseVal, SDValue TrueVal, - ARMCC::CondCodes CCVal, SDValue CCR, SDValue InFlag) { - ConstantSDNode *T = dyn_cast(TrueVal); - if (!T) - return 0; - - unsigned Opc = 0; - unsigned TrueImm = T->getZExtValue(); - bool isSoImm = is_so_imm(TrueImm); - if (isSoImm) { - Opc = ARM::MOVCCi; - } else if (Subtarget->hasV6T2Ops() && TrueImm <= 0xffff) { - Opc = ARM::MOVCCi16; - } else if (is_so_imm_not(TrueImm)) { - TrueImm = ~TrueImm; - Opc = ARM::MVNCCi; - } else if (TrueVal.getNode()->hasOneUse() && - (Subtarget->hasV6T2Ops() || ARM_AM::isSOImmTwoPartVal(TrueImm))) { - // Large immediate. - Opc = ARM::MOVCCi32imm; - } - - if (Opc) { - SDValue True = CurDAG->getTargetConstant(TrueImm, MVT::i32); - SDValue CC = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, True, CC, CCR, InFlag }; - return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); - } - - return 0; -} - -SDNode *ARMDAGToDAGISel::SelectCMOVOp(SDNode *N) { - EVT VT = N->getValueType(0); - SDValue FalseVal = N->getOperand(0); - SDValue TrueVal = N->getOperand(1); - SDValue CC = N->getOperand(2); - SDValue CCR = N->getOperand(3); - SDValue InFlag = N->getOperand(4); - assert(CC.getOpcode() == ISD::Constant); - assert(CCR.getOpcode() == ISD::Register); - ARMCC::CondCodes CCVal = - (ARMCC::CondCodes)cast(CC)->getZExtValue(); - - if (!Subtarget->isThumb1Only() && VT == MVT::i32) { - // Pattern: (ARMcmov:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Emits: (MOVCCs:i32 GPR:i32:$false, so_reg:i32:$true, (imm:i32):$cc) - // Pattern complexity = 18 cost = 1 size = 0 - if (Subtarget->isThumb()) { - SDNode *Res = SelectT2CMOVShiftOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectT2CMOVShiftOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } else { - SDNode *Res = SelectARMCMOVShiftOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectARMCMOVShiftOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } - - // Pattern: (ARMcmov:i32 GPR:i32:$false, - // (imm:i32)<>:$true, - // (imm:i32):$cc) - // Emits: (MOVCCi:i32 GPR:i32:$false, - // (so_imm:i32 (imm:i32):$true), (imm:i32):$cc) - // Pattern complexity = 10 cost = 1 size = 0 - if (Subtarget->isThumb()) { - SDNode *Res = SelectT2CMOVImmOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectT2CMOVImmOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } else { - SDNode *Res = SelectARMCMOVImmOp(N, FalseVal, TrueVal, - CCVal, CCR, InFlag); - if (!Res) - Res = SelectARMCMOVImmOp(N, TrueVal, FalseVal, - ARMCC::getOppositeCondition(CCVal), CCR, InFlag); - if (Res) - return Res; - } - } - - // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Emits: (MOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Pattern complexity = 6 cost = 1 size = 0 - // - // Pattern: (ARMcmov:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Emits: (tMOVCCr:i32 GPR:i32:$false, GPR:i32:$true, (imm:i32):$cc) - // Pattern complexity = 6 cost = 11 size = 0 - // - // Also VMOVScc and VMOVDcc. - SDValue Tmp2 = CurDAG->getTargetConstant(CCVal, MVT::i32); - SDValue Ops[] = { FalseVal, TrueVal, Tmp2, CCR, InFlag }; - unsigned Opc = 0; - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Illegal conditional move type!"); - case MVT::i32: - Opc = Subtarget->isThumb() - ? (Subtarget->hasThumb2() ? ARM::t2MOVCCr : ARM::tMOVCCr_pseudo) - : ARM::MOVCCr; - break; - case MVT::f32: - Opc = ARM::VMOVScc; - break; - case MVT::f64: - Opc = ARM::VMOVDcc; - break; - } - return CurDAG->SelectNodeTo(N, Opc, VT, Ops, 5); -} - /// Target-specific DAG combining for ISD::XOR. /// Target-independent combining lowers SELECT_CC nodes of the form /// select_cc setg[ge] X, 0, X, -X @@ -2524,27 +2361,40 @@ SDNode *ARMDAGToDAGISel::SelectConcatVector(SDNode *N) { return createDRegPairNode(VT, N->getOperand(0), N->getOperand(1)); } -SDNode *ARMDAGToDAGISel::SelectAtomic64(SDNode *Node, unsigned Opc) { +SDNode *ARMDAGToDAGISel::SelectAtomic(SDNode *Node, unsigned Op8, + unsigned Op16,unsigned Op32, + unsigned Op64) { + // Mostly direct translation to the given operations, except that we preserve + // the AtomicOrdering for use later on. + AtomicSDNode *AN = cast(Node); + EVT VT = AN->getMemoryVT(); + + unsigned Op; + SDVTList VTs = CurDAG->getVTList(AN->getValueType(0), MVT::Other); + if (VT == MVT::i8) + Op = Op8; + else if (VT == MVT::i16) + Op = Op16; + else if (VT == MVT::i32) + Op = Op32; + else if (VT == MVT::i64) { + Op = Op64; + VTs = CurDAG->getVTList(MVT::i32, MVT::i32, MVT::Other); + } else + llvm_unreachable("Unexpected atomic operation"); + SmallVector Ops; - Ops.push_back(Node->getOperand(1)); // Ptr - Ops.push_back(Node->getOperand(2)); // Low part of Val1 - Ops.push_back(Node->getOperand(3)); // High part of Val1 - if (Opc == ARM::ATOMCMPXCHG6432) { - Ops.push_back(Node->getOperand(4)); // Low part of Val2 - Ops.push_back(Node->getOperand(5)); // High part of Val2 - } - Ops.push_back(Node->getOperand(0)); // Chain - MachineSDNode::mmo_iterator MemOp = MF->allocateMemRefsArray(1); - MemOp[0] = cast(Node)->getMemOperand(); - SDNode *ResNode = CurDAG->getMachineNode(Opc, Node->getDebugLoc(), - MVT::i32, MVT::i32, MVT::Other, - Ops); - cast(ResNode)->setMemRefs(MemOp, MemOp + 1); - return ResNode; + for (unsigned i = 1; i < AN->getNumOperands(); ++i) + Ops.push_back(AN->getOperand(i)); + + Ops.push_back(CurDAG->getTargetConstant(AN->getOrdering(), MVT::i32)); + Ops.push_back(AN->getOperand(0)); // Chain moves to the end + + return CurDAG->SelectNodeTo(Node, Op, VTs, &Ops[0], Ops.size()); } SDNode *ARMDAGToDAGISel::Select(SDNode *N) { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (N->isMachineOpcode()) { N->setNodeId(-1); @@ -2589,7 +2439,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue CPIdx = CurDAG->getTargetConstantPool(ConstantInt::get( Type::getInt32Ty(*CurDAG->getContext()), Val), - TLI.getPointerTy()); + getTargetLowering()->getPointerTy()); SDNode *ResNode; if (Subtarget->isThumb1Only()) { @@ -2619,7 +2469,8 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::FrameIndex: { // Selects to ADDri FI, 0 which in turn will become ADDri SP, imm. int FI = cast(N)->getIndex(); - SDValue TFI = CurDAG->getTargetFrameIndex(FI, TLI.getPointerTy()); + SDValue TFI = CurDAG->getTargetFrameIndex(FI, + getTargetLowering()->getPointerTy()); if (Subtarget->isThumb1Only()) { SDValue Ops[] = { TFI, CurDAG->getTargetConstant(0, MVT::i32), getAL(CurDAG), CurDAG->getRegister(0, MVT::i32) }; @@ -2840,8 +2691,6 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { SDValue(Chain.getNode(), Chain.getResNo())); return NULL; } - case ARMISD::CMOV: - return SelectCMOVOp(N); case ARMISD::VZIP: { unsigned Opc = 0; EVT VT = N->getValueType(0); @@ -3123,7 +2972,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case Intrinsic::arm_ldrexd: { SDValue MemAddr = N->getOperand(2); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Chain = N->getOperand(0); bool isThumb = Subtarget->isThumb() && Subtarget->hasThumb2(); @@ -3181,7 +3030,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case Intrinsic::arm_strexd: { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Chain = N->getOperand(0); SDValue Val0 = N->getOperand(2); SDValue Val1 = N->getOperand(3); @@ -3385,7 +3234,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { } case ARMISD::VTBL1: { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SmallVector Ops; @@ -3396,7 +3245,7 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { return CurDAG->getMachineNode(ARM::VTBL1, dl, VT, Ops); } case ARMISD::VTBL2: { - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); // Form a REG_SEQUENCE to force register allocation. @@ -3415,31 +3264,90 @@ SDNode *ARMDAGToDAGISel::Select(SDNode *N) { case ISD::CONCAT_VECTORS: return SelectConcatVector(N); - case ARMISD::ATOMOR64_DAG: - return SelectAtomic64(N, ARM::ATOMOR6432); - case ARMISD::ATOMXOR64_DAG: - return SelectAtomic64(N, ARM::ATOMXOR6432); - case ARMISD::ATOMADD64_DAG: - return SelectAtomic64(N, ARM::ATOMADD6432); - case ARMISD::ATOMSUB64_DAG: - return SelectAtomic64(N, ARM::ATOMSUB6432); - case ARMISD::ATOMNAND64_DAG: - return SelectAtomic64(N, ARM::ATOMNAND6432); - case ARMISD::ATOMAND64_DAG: - return SelectAtomic64(N, ARM::ATOMAND6432); - case ARMISD::ATOMSWAP64_DAG: - return SelectAtomic64(N, ARM::ATOMSWAP6432); - case ARMISD::ATOMCMPXCHG64_DAG: - return SelectAtomic64(N, ARM::ATOMCMPXCHG6432); - - case ARMISD::ATOMMIN64_DAG: - return SelectAtomic64(N, ARM::ATOMMIN6432); - case ARMISD::ATOMUMIN64_DAG: - return SelectAtomic64(N, ARM::ATOMUMIN6432); - case ARMISD::ATOMMAX64_DAG: - return SelectAtomic64(N, ARM::ATOMMAX6432); - case ARMISD::ATOMUMAX64_DAG: - return SelectAtomic64(N, ARM::ATOMUMAX6432); + case ISD::ATOMIC_LOAD: + if (cast(N)->getMemoryVT() == MVT::i64) + return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_LOAD_I64); + else + break; + + case ISD::ATOMIC_STORE: + if (cast(N)->getMemoryVT() == MVT::i64) + return SelectAtomic(N, 0, 0, 0, ARM::ATOMIC_STORE_I64); + else + break; + + case ISD::ATOMIC_LOAD_ADD: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_ADD_I8, + ARM::ATOMIC_LOAD_ADD_I16, + ARM::ATOMIC_LOAD_ADD_I32, + ARM::ATOMIC_LOAD_ADD_I64); + case ISD::ATOMIC_LOAD_SUB: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_SUB_I8, + ARM::ATOMIC_LOAD_SUB_I16, + ARM::ATOMIC_LOAD_SUB_I32, + ARM::ATOMIC_LOAD_SUB_I64); + case ISD::ATOMIC_LOAD_AND: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_AND_I8, + ARM::ATOMIC_LOAD_AND_I16, + ARM::ATOMIC_LOAD_AND_I32, + ARM::ATOMIC_LOAD_AND_I64); + case ISD::ATOMIC_LOAD_OR: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_OR_I8, + ARM::ATOMIC_LOAD_OR_I16, + ARM::ATOMIC_LOAD_OR_I32, + ARM::ATOMIC_LOAD_OR_I64); + case ISD::ATOMIC_LOAD_XOR: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_XOR_I8, + ARM::ATOMIC_LOAD_XOR_I16, + ARM::ATOMIC_LOAD_XOR_I32, + ARM::ATOMIC_LOAD_XOR_I64); + case ISD::ATOMIC_LOAD_NAND: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_NAND_I8, + ARM::ATOMIC_LOAD_NAND_I16, + ARM::ATOMIC_LOAD_NAND_I32, + ARM::ATOMIC_LOAD_NAND_I64); + case ISD::ATOMIC_LOAD_MIN: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_MIN_I8, + ARM::ATOMIC_LOAD_MIN_I16, + ARM::ATOMIC_LOAD_MIN_I32, + ARM::ATOMIC_LOAD_MIN_I64); + case ISD::ATOMIC_LOAD_MAX: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_MAX_I8, + ARM::ATOMIC_LOAD_MAX_I16, + ARM::ATOMIC_LOAD_MAX_I32, + ARM::ATOMIC_LOAD_MAX_I64); + case ISD::ATOMIC_LOAD_UMIN: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_UMIN_I8, + ARM::ATOMIC_LOAD_UMIN_I16, + ARM::ATOMIC_LOAD_UMIN_I32, + ARM::ATOMIC_LOAD_UMIN_I64); + case ISD::ATOMIC_LOAD_UMAX: + return SelectAtomic(N, + ARM::ATOMIC_LOAD_UMAX_I8, + ARM::ATOMIC_LOAD_UMAX_I16, + ARM::ATOMIC_LOAD_UMAX_I32, + ARM::ATOMIC_LOAD_UMAX_I64); + case ISD::ATOMIC_SWAP: + return SelectAtomic(N, + ARM::ATOMIC_SWAP_I8, + ARM::ATOMIC_SWAP_I16, + ARM::ATOMIC_SWAP_I32, + ARM::ATOMIC_SWAP_I64); + case ISD::ATOMIC_CMP_SWAP: + return SelectAtomic(N, + ARM::ATOMIC_CMP_SWAP_I8, + ARM::ATOMIC_CMP_SWAP_I16, + ARM::ATOMIC_CMP_SWAP_I32, + ARM::ATOMIC_CMP_SWAP_I64); } return SelectCode(N); @@ -3451,24 +3359,20 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ bool Changed = false; unsigned NumOps = N->getNumOperands(); - ExternalSymbolSDNode *S = dyn_cast( - N->getOperand(InlineAsm::Op_AsmString)); - StringRef AsmString = StringRef(S->getSymbol()); - // Normally, i64 data is bounded to two arbitrary GRPs for "%r" constraint. // However, some instrstions (e.g. ldrexd/strexd in ARM mode) require // (even/even+1) GPRs and use %n and %Hn to refer to the individual regs // respectively. Since there is no constraint to explicitly specify a - // reg pair, we search %H operand inside the asm string. If it is found, the - // transformation below enforces a GPRPair reg class for "%r" for 64-bit data. - if (AsmString.find(":H}") == StringRef::npos) - return NULL; + // reg pair, we use GPRPair reg class for "%r" for 64-bit data. For Thumb, + // the 64-bit data may be referred by H, Q, R modifiers, so we still pack + // them into a GPRPair. - DebugLoc dl = N->getDebugLoc(); - SDValue Glue = N->getOperand(NumOps-1); + SDLoc dl(N); + SDValue Glue = N->getGluedNode() ? N->getOperand(NumOps-1) : SDValue(0,0); + SmallVector OpChanged; // Glue node will be appended late. - for(unsigned i = 0; i < NumOps -1; ++i) { + for(unsigned i = 0, e = N->getGluedNode() ? NumOps - 1 : NumOps; i < e; ++i) { SDValue op = N->getOperand(i); AsmNodeOperands.push_back(op); @@ -3482,17 +3386,38 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ else continue; + // Immediate operands to inline asm in the SelectionDAG are modeled with + // two operands. The first is a constant of value InlineAsm::Kind_Imm, and + // the second is a constant with the value of the immediate. If we get here + // and we have a Kind_Imm, skip the next operand, and continue. + if (Kind == InlineAsm::Kind_Imm) { + SDValue op = N->getOperand(++i); + AsmNodeOperands.push_back(op); + continue; + } + + unsigned NumRegs = InlineAsm::getNumOperandRegisters(Flag); + if (NumRegs) + OpChanged.push_back(false); + + unsigned DefIdx = 0; + bool IsTiedToChangedOp = false; + // If it's a use that is tied with a previous def, it has no + // reg class constraint. + if (Changed && InlineAsm::isUseOperandTiedToDef(Flag, DefIdx)) + IsTiedToChangedOp = OpChanged[DefIdx]; + if (Kind != InlineAsm::Kind_RegUse && Kind != InlineAsm::Kind_RegDef && Kind != InlineAsm::Kind_RegDefEarlyClobber) continue; - unsigned RegNum = InlineAsm::getNumOperandRegisters(Flag); unsigned RC; bool HasRC = InlineAsm::hasRegClassConstraint(Flag, RC); - if (!HasRC || RC != ARM::GPRRegClassID || RegNum != 2) + if ((!IsTiedToChangedOp && (!HasRC || RC != ARM::GPRRegClassID)) + || NumRegs != 2) continue; - assert((i+2 < NumOps-1) && "Invalid number of operands in inline asm"); + assert((i+2 < NumOps) && "Invalid number of operands in inline asm"); SDValue V0 = N->getOperand(i+1); SDValue V1 = N->getOperand(i+2); unsigned Reg0 = cast(V0)->getReg(); @@ -3553,8 +3478,12 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ Changed = true; if(PairedReg.getNode()) { + OpChanged[OpChanged.size() -1 ] = true; Flag = InlineAsm::getFlagWord(Kind, 1 /* RegNum*/); - Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); + if (IsTiedToChangedOp) + Flag = InlineAsm::getFlagWordForMatchingOp(Flag, DefIdx); + else + Flag = InlineAsm::getFlagWordForRegClass(Flag, ARM::GPRPairRegClassID); // Replace the current flag. AsmNodeOperands[AsmNodeOperands.size() -1] = CurDAG->getTargetConstant( Flag, MVT::i32); @@ -3565,11 +3494,12 @@ SDNode *ARMDAGToDAGISel::SelectInlineAsm(SDNode *N){ } } - AsmNodeOperands.push_back(Glue); + if (Glue.getNode()) + AsmNodeOperands.push_back(Glue); if (!Changed) return NULL; - SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), + SDValue New = CurDAG->getNode(ISD::INLINEASM, SDLoc(N), CurDAG->getVTList(MVT::Other, MVT::Glue), &AsmNodeOperands[0], AsmNodeOperands.size()); New->setNodeId(-1); diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp index e49cfc4..76a0a83 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.cpp @@ -48,6 +48,7 @@ #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetOptions.h" +#include using namespace llvm; STATISTIC(NumTailCalls, "Number of tail calls"); @@ -74,7 +75,7 @@ namespace { class ARMCCState : public CCState { public: ARMCCState(CallingConv::ID CC, bool isVarArg, MachineFunction &MF, - const TargetMachine &TM, SmallVector &locs, + const TargetMachine &TM, SmallVectorImpl &locs, LLVMContext &C, ParmContext PC) : CCState(CC, isVarArg, MF, TM, locs, C) { assert(((PC == Call) || (PC == Prologue)) && @@ -174,9 +175,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setBooleanVectorContents(ZeroOrNegativeOneBooleanContent); - if (Subtarget->isTargetDarwin()) { + if (Subtarget->isTargetIOS()) { // Uses VFP for Thumb libfuncs if available. - if (Subtarget->isThumb() && Subtarget->hasVFP2()) { + if (Subtarget->isThumb() && Subtarget->hasVFP2() && + Subtarget->hasARMOps()) { // Single-precision floating-point arithmetic. setLibcallName(RTLIB::ADD_F32, "__addsf3vfp"); setLibcallName(RTLIB::SUB_F32, "__subsf3vfp"); @@ -421,7 +423,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } // Use divmod compiler-rt calls for iOS 5.0 and later. - if (Subtarget->getTargetTriple().getOS() == Triple::IOS && + if (Subtarget->getTargetTriple().isiOS() && !Subtarget->getTargetTriple().isOSVersionLT(5, 0)) { setLibcallName(RTLIB::SDIVREM_I32, "__divmodsi4"); setLibcallName(RTLIB::UDIVREM_I32, "__udivmodsi4"); @@ -452,6 +454,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) } setOperationAction(ISD::ConstantFP, MVT::f32, Custom); + setOperationAction(ISD::ConstantFP, MVT::f64, Custom); if (Subtarget->hasNEON()) { addDRTypeForNEON(MVT::v2f32); @@ -564,16 +567,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP_ROUND, MVT::v2f32, Expand); setOperationAction(ISD::FP_EXTEND, MVT::v2f64, Expand); - // Custom expand long extensions to vectors. - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v4i64, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v16i32, Custom); - setOperationAction(ISD::SIGN_EXTEND, MVT::v8i64, Custom); - setOperationAction(ISD::ZERO_EXTEND, MVT::v8i64, Custom); - // NEON does not have single instruction CTPOP for vectors with element // types wider than 8-bits. However, custom lowering can leverage the // v8i8/v16i8 vcnt instruction. @@ -681,6 +674,8 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::CTTZ_ZERO_UNDEF , MVT::i32 , Expand); setOperationAction(ISD::CTLZ_ZERO_UNDEF , MVT::i32 , Expand); + setOperationAction(ISD::READCYCLECOUNTER, MVT::i64, Custom); + // Only ARMv6 has BSWAP. if (!Subtarget->hasV6Ops()) setOperationAction(ISD::BSWAP, MVT::i32, Expand); @@ -691,10 +686,36 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::SDIV, MVT::i32, Expand); setOperationAction(ISD::UDIV, MVT::i32, Expand); } + + // FIXME: Also set divmod for SREM on EABI setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::UREM, MVT::i32, Expand); - setOperationAction(ISD::SDIVREM, MVT::i32, Expand); - setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + // Register based DivRem for AEABI (RTABI 4.2) + if (Subtarget->isTargetAEABI()) { + setLibcallName(RTLIB::SDIVREM_I8, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I16, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I32, "__aeabi_idivmod"); + setLibcallName(RTLIB::SDIVREM_I64, "__aeabi_ldivmod"); + setLibcallName(RTLIB::UDIVREM_I8, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I16, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I32, "__aeabi_uidivmod"); + setLibcallName(RTLIB::UDIVREM_I64, "__aeabi_uldivmod"); + + setLibcallCallingConv(RTLIB::SDIVREM_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::SDIVREM_I64, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I8, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I16, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I32, CallingConv::ARM_AAPCS); + setLibcallCallingConv(RTLIB::UDIVREM_I64, CallingConv::ARM_AAPCS); + + setOperationAction(ISD::SDIVREM, MVT::i32, Custom); + setOperationAction(ISD::UDIVREM, MVT::i32, Custom); + } else { + setOperationAction(ISD::SDIVREM, MVT::i32, Expand); + setOperationAction(ISD::UDIVREM, MVT::i32, Expand); + } setOperationAction(ISD::GlobalAddress, MVT::i32, Custom); setOperationAction(ISD::ConstantPool, MVT::i32, Custom); @@ -715,8 +736,6 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) if (!Subtarget->isTargetDarwin()) { // Non-Darwin platforms may return values in these registers via the // personality function. - setOperationAction(ISD::EHSELECTION, MVT::i32, Expand); - setOperationAction(ISD::EXCEPTIONADDR, MVT::i32, Expand); setExceptionPointerRegister(ARM::R0); setExceptionSelectorRegister(ARM::R1); } @@ -724,12 +743,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); // ARMv6 Thumb1 (except for CPUs that support dmb / dsb) and earlier use // the default expansion. - // FIXME: This should be checking for v6k, not just v6. - if (Subtarget->hasDataBarrier() || - (Subtarget->hasV6Ops() && !Subtarget->isThumb())) { - // membarrier needs custom lowering; the rest are legal and handled - // normally. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); + if (Subtarget->hasAnyDataBarrier() && !Subtarget->isThumb1Only()) { + // ATOMIC_FENCE needs custom lowering; the other 32-bit ones are legal and + // handled normally. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Custom); // Custom lowering for 64-bit ops setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_SUB, MVT::i64, Custom); @@ -742,11 +759,20 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::ATOMIC_LOAD_UMIN, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_LOAD_UMAX, MVT::i64, Custom); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i64, Custom); - // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. - setInsertFencesForAtomic(true); + // On v8, we have particularly efficient implementations of atomic fences + // if they can be combined with nearby atomic loads and stores. + if (!Subtarget->hasV8Ops()) { + // Automatically insert fences (dmb ist) around ATOMIC_SWAP etc. + setInsertFencesForAtomic(true); + } + setOperationAction(ISD::ATOMIC_LOAD, MVT::i64, Custom); } else { + // If there's anything we can use as a barrier, go through custom lowering + // for ATOMIC_FENCE. + setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, + Subtarget->hasAnyDataBarrier() ? Custom : Expand); + // Set them all for expansion, which will force libcalls. - setOperationAction(ISD::ATOMIC_FENCE, MVT::Other, Expand); setOperationAction(ISD::ATOMIC_CMP_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_SWAP, MVT::i32, Expand); setOperationAction(ISD::ATOMIC_LOAD_ADD, MVT::i32, Expand); @@ -843,6 +869,18 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::FP32_TO_FP16, MVT::i32, Expand); } } + + // Combine sin / cos into one node or libcall if possible. + if (Subtarget->hasSinCos()) { + setLibcallName(RTLIB::SINCOS_F32, "sincosf"); + setLibcallName(RTLIB::SINCOS_F64, "sincos"); + if (Subtarget->getTargetTriple().getOS() == Triple::IOS) { + // For iOS, we don't want to the normal expansion of a libcall to + // sincos. We want to issue a libcall to __sincos_stret. + setOperationAction(ISD::FSINCOS, MVT::f64, Custom); + setOperationAction(ISD::FSINCOS, MVT::f32, Custom); + } + } // We have target-specific dag combine patterns for the following nodes: // ARMISD::VMOVRRD - No need to call setTargetDAGCombine @@ -882,6 +920,44 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setMinFunctionAlignment(Subtarget->isThumb() ? 1 : 2); } +static void getExclusiveOperation(unsigned Size, AtomicOrdering Ord, + bool isThumb2, unsigned &LdrOpc, + unsigned &StrOpc) { + static const unsigned LoadBares[4][2] = {{ARM::LDREXB, ARM::t2LDREXB}, + {ARM::LDREXH, ARM::t2LDREXH}, + {ARM::LDREX, ARM::t2LDREX}, + {ARM::LDREXD, ARM::t2LDREXD}}; + static const unsigned LoadAcqs[4][2] = {{ARM::LDAEXB, ARM::t2LDAEXB}, + {ARM::LDAEXH, ARM::t2LDAEXH}, + {ARM::LDAEX, ARM::t2LDAEX}, + {ARM::LDAEXD, ARM::t2LDAEXD}}; + static const unsigned StoreBares[4][2] = {{ARM::STREXB, ARM::t2STREXB}, + {ARM::STREXH, ARM::t2STREXH}, + {ARM::STREX, ARM::t2STREX}, + {ARM::STREXD, ARM::t2STREXD}}; + static const unsigned StoreRels[4][2] = {{ARM::STLEXB, ARM::t2STLEXB}, + {ARM::STLEXH, ARM::t2STLEXH}, + {ARM::STLEX, ARM::t2STLEX}, + {ARM::STLEXD, ARM::t2STLEXD}}; + + const unsigned (*LoadOps)[2], (*StoreOps)[2]; + if (Ord == Acquire || Ord == AcquireRelease || Ord == SequentiallyConsistent) + LoadOps = LoadAcqs; + else + LoadOps = LoadBares; + + if (Ord == Release || Ord == AcquireRelease || Ord == SequentiallyConsistent) + StoreOps = StoreRels; + else + StoreOps = StoreBares; + + assert(isPowerOf2_32(Size) && Size <= 8 && + "unsupported size for atomic binary op!"); + + LdrOpc = LoadOps[Log2_32(Size)][isThumb2]; + StrOpc = StoreOps[Log2_32(Size)][isThumb2]; +} + // FIXME: It might make sense to define the representative register class as the // nearest super-register that has a non-null superset. For example, DPR_VFP2 is // a super-register of SPR, and DPR is a superset if DPR_VFP2. Consequently, @@ -944,6 +1020,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BR_JT: return "ARMISD::BR_JT"; case ARMISD::BR2_JT: return "ARMISD::BR2_JT"; case ARMISD::RET_FLAG: return "ARMISD::RET_FLAG"; + case ARMISD::INTRET_FLAG: return "ARMISD::INTRET_FLAG"; case ARMISD::PIC_ADD: return "ARMISD::PIC_ADD"; case ARMISD::CMP: return "ARMISD::CMP"; case ARMISD::CMN: return "ARMISD::CMN"; @@ -983,7 +1060,6 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; - case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER"; case ARMISD::MEMBARRIER_MCR: return "ARMISD::MEMBARRIER_MCR"; case ARMISD::PRELOAD: return "ARMISD::PRELOAD"; @@ -1042,6 +1118,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::BUILD_VECTOR: return "ARMISD::BUILD_VECTOR"; case ARMISD::FMAX: return "ARMISD::FMAX"; case ARMISD::FMIN: return "ARMISD::FMIN"; + case ARMISD::VMAXNM: return "ARMISD::VMAX"; + case ARMISD::VMINNM: return "ARMISD::VMIN"; case ARMISD::BFI: return "ARMISD::BFI"; case ARMISD::VORRIMM: return "ARMISD::VORRIMM"; case ARMISD::VBICIMM: return "ARMISD::VBICIMM"; @@ -1069,7 +1147,7 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { } } -EVT ARMTargetLowering::getSetCCResultType(EVT VT) const { +EVT ARMTargetLowering::getSetCCResultType(LLVMContext &, EVT VT) const { if (!VT.isVector()) return getPointerTy(); return VT.changeVectorElementTypeToInteger(); } @@ -1233,7 +1311,7 @@ SDValue ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const { @@ -1314,7 +1392,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag, SDValue ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const { unsigned LocMemOffset = VA.getLocMemOffset(); @@ -1325,12 +1403,12 @@ ARMTargetLowering::LowerMemOpCallTo(SDValue Chain, false, false, 0); } -void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, +void ARMTargetLowering::PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, - SmallVector &MemOpChains, + SmallVectorImpl &MemOpChains, ISD::ArgFlagsTy Flags) const { SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl, @@ -1357,10 +1435,10 @@ SDValue ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { SelectionDAG &DAG = CLI.DAG; - DebugLoc &dl = CLI.DL; - SmallVector &Outs = CLI.Outs; - SmallVector &OutVals = CLI.OutVals; - SmallVector &Ins = CLI.Ins; + SDLoc &dl = CLI.DL; + SmallVectorImpl &Outs = CLI.Outs; + SmallVectorImpl &OutVals = CLI.OutVals; + SmallVectorImpl &Ins = CLI.Ins; SDValue Chain = CLI.Chain; SDValue Callee = CLI.Callee; bool &isTailCall = CLI.IsTailCall; @@ -1406,7 +1484,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, // Adjust the stack pointer for the new arguments... // These operations are automatically eliminated by the prolog/epilog pass if (!isSibCall) - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true)); + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true), + dl); SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy()); @@ -1496,7 +1575,8 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, SDValue AddArg = DAG.getNode(ISD::ADD, dl, PtrVT, Arg, Const); SDValue Load = DAG.getLoad(PtrVT, dl, Chain, AddArg, MachinePointerInfo(), - false, false, false, 0); + false, false, false, + DAG.InferPtrAlignment(AddArg)); MemOpChains.push_back(Load.getValue(1)); RegsToPass.push_back(std::make_pair(j, Load)); } @@ -1705,17 +1785,26 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, RegsToPass[i].second.getValueType())); // Add a register mask operand representing the call-preserved registers. - const uint32_t *Mask; - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const ARMBaseRegisterInfo *ARI = static_cast(TRI); - if (isThisReturn) - // For 'this' returns, use the R0-preserving mask - Mask = ARI->getThisReturnPreservedMask(CallConv); - else - Mask = ARI->getCallPreservedMask(CallConv); + if (!isTailCall) { + const uint32_t *Mask; + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const ARMBaseRegisterInfo *ARI = static_cast(TRI); + if (isThisReturn) { + // For 'this' returns, use the R0-preserving mask if applicable + Mask = ARI->getThisReturnPreservedMask(CallConv); + if (!Mask) { + // Set isThisReturn to false if the calling convention is not one that + // allows 'returned' to be modeled in this way, so LowerCallResult does + // not try to pass 'this' straight through + isThisReturn = false; + Mask = ARI->getCallPreservedMask(CallConv); + } + } else + Mask = ARI->getCallPreservedMask(CallConv); - assert(Mask && "Missing call preserved mask for calling convention"); - Ops.push_back(DAG.getRegisterMask(Mask)); + assert(Mask && "Missing call preserved mask for calling convention"); + Ops.push_back(DAG.getRegisterMask(Mask)); + } if (InFlag.getNode()) Ops.push_back(InFlag); @@ -1729,7 +1818,7 @@ ARMTargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI, InFlag = Chain.getValue(1); Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true), - DAG.getIntPtrConstant(0, true), InFlag); + DAG.getIntPtrConstant(0, true), InFlag, dl); if (!Ins.empty()) InFlag = Chain.getValue(1); @@ -1795,7 +1884,7 @@ ARMTargetLowering::HandleByVal( // else parameter would be splitted between registers and stack, // end register would be r4 in this case. unsigned ByValRegBegin = reg; - unsigned ByValRegEnd = (size < excess) ? reg + size/4 : ARM::R4; + unsigned ByValRegEnd = (size < excess) ? reg + size/4 : (unsigned)ARM::R4; State->addInRegsParamInfo(ByValRegBegin, ByValRegEnd); // Note, first register is allocated in the beginning of function already, // allocate remained amount of registers we need. @@ -1886,6 +1975,12 @@ ARMTargetLowering::IsEligibleForTailCallOptimization(SDValue Callee, if (isVarArg && !Outs.empty()) return false; + // Exception-handling functions need a special set of instructions to indicate + // a return to the hardware. Tail-calling another function would probably + // break this. + if (CallerF->hasFnAttribute("interrupt")) + return false; + // Also avoid sibcall optimization if either caller or callee uses struct // return semantics. if (isCalleeStructRet || isCallerStructRet) @@ -2014,12 +2109,45 @@ ARMTargetLowering::CanLowerReturn(CallingConv::ID CallConv, isVarArg)); } +static SDValue LowerInterruptReturn(SmallVectorImpl &RetOps, + SDLoc DL, SelectionDAG &DAG) { + const MachineFunction &MF = DAG.getMachineFunction(); + const Function *F = MF.getFunction(); + + StringRef IntKind = F->getFnAttribute("interrupt").getValueAsString(); + + // See ARM ARM v7 B1.8.3. On exception entry LR is set to a possibly offset + // version of the "preferred return address". These offsets affect the return + // instruction if this is a return from PL1 without hypervisor extensions. + // IRQ/FIQ: +4 "subs pc, lr, #4" + // SWI: 0 "subs pc, lr, #0" + // ABORT: +4 "subs pc, lr, #4" + // UNDEF: +4/+2 "subs pc, lr, #0" + // UNDEF varies depending on where the exception came from ARM or Thumb + // mode. Alongside GCC, we throw our hands up in disgust and pretend it's 0. + + int64_t LROffset; + if (IntKind == "" || IntKind == "IRQ" || IntKind == "FIQ" || + IntKind == "ABORT") + LROffset = 4; + else if (IntKind == "SWI" || IntKind == "UNDEF") + LROffset = 0; + else + report_fatal_error("Unsupported interrupt attribute. If present, value " + "must be one of: IRQ, FIQ, SWI, ABORT or UNDEF"); + + RetOps.insert(RetOps.begin() + 1, DAG.getConstant(LROffset, MVT::i32, false)); + + return DAG.getNode(ARMISD::INTRET_FLAG, DL, MVT::Other, + RetOps.data(), RetOps.size()); +} + SDValue ARMTargetLowering::LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const { + SDLoc dl, SelectionDAG &DAG) const { // CCValAssign - represent the assignment of the return value to a location. SmallVector RVLocs; @@ -2099,6 +2227,19 @@ ARMTargetLowering::LowerReturn(SDValue Chain, if (Flag.getNode()) RetOps.push_back(Flag); + // CPUs which aren't M-class use a special sequence to return from + // exceptions (roughly, any instruction setting pc and cpsr simultaneously, + // though we use "subs pc, lr, #N"). + // + // M-class CPUs actually use a normal return sequence with a special + // (hardware-provided) value in LR, so the normal code path works. + if (DAG.getMachineFunction().getFunction()->hasFnAttribute("interrupt") && + !Subtarget->isMClass()) { + if (Subtarget->isThumb1Only()) + report_fatal_error("interrupt attribute is not supported in Thumb1"); + return LowerInterruptReturn(RetOps, dl, DAG); + } + return DAG.getNode(ARMISD::RET_FLAG, dl, MVT::Other, RetOps.data(), RetOps.size()); } @@ -2147,7 +2288,7 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { Copy = *Copy->use_begin(); if (Copy->getOpcode() != ISD::CopyToReg || !Copy->hasNUsesOfValue(1, 0)) return false; - Chain = Copy->getOperand(0); + TCChain = Copy->getOperand(0); } else { return false; } @@ -2155,7 +2296,8 @@ bool ARMTargetLowering::isUsedByReturnOnly(SDNode *N, SDValue &Chain) const { bool HasRet = false; for (SDNode::use_iterator UI = Copy->use_begin(), UE = Copy->use_end(); UI != UE; ++UI) { - if (UI->getOpcode() != ARMISD::RET_FLAG) + if (UI->getOpcode() != ARMISD::RET_FLAG && + UI->getOpcode() != ARMISD::INTRET_FLAG) return false; HasRet = true; } @@ -2186,7 +2328,7 @@ bool ARMTargetLowering::mayBeEmittedAsTailCall(CallInst *CI) const { static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) { EVT PtrVT = Op.getValueType(); // FIXME there is no actual debug info here - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); ConstantPoolSDNode *CP = cast(Op); SDValue Res; if (CP->isMachineConstantPoolEntry()) @@ -2207,7 +2349,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = 0; - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); EVT PtrVT = getPointerTy(); const BlockAddress *BA = cast(Op)->getBlockAddress(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); @@ -2236,7 +2378,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SDValue ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA, SelectionDAG &DAG) const { - DebugLoc dl = GA->getDebugLoc(); + SDLoc dl(GA); EVT PtrVT = getPointerTy(); unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8; MachineFunction &MF = DAG.getMachineFunction(); @@ -2279,7 +2421,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA, SelectionDAG &DAG, TLSModel::Model model) const { const GlobalValue *GV = GA->getGlobal(); - DebugLoc dl = GA->getDebugLoc(); + SDLoc dl(GA); SDValue Offset; SDValue Chain = DAG.getEntryNode(); EVT PtrVT = getPointerTy(); @@ -2349,7 +2491,7 @@ ARMTargetLowering::LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); if (getTargetMachine().getRelocationModel() == Reloc::PIC_) { bool UseGOTOFF = GV->hasLocalLinkage() || GV->hasHiddenVisibility(); @@ -2392,7 +2534,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op, SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op, SelectionDAG &DAG) const { EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); const GlobalValue *GV = cast(Op)->getGlobal(); Reloc::Model RelocM = getTargetMachine().getRelocationModel(); @@ -2457,7 +2599,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, ARMFunctionInfo *AFI = MF.getInfo(); unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); EVT PtrVT = getPointerTy(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned PCAdj = Subtarget->isThumb() ? 4 : 8; ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create(*DAG.getContext(), "_GLOBAL_OFFSET_TABLE_", @@ -2473,7 +2615,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op, SDValue ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue Val = DAG.getConstant(0, MVT::i32); return DAG.getNode(ARMISD::EH_SJLJ_SETJMP, dl, DAG.getVTList(MVT::i32, MVT::Other), Op.getOperand(0), @@ -2482,7 +2624,7 @@ ARMTargetLowering::LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const { SDValue ARMTargetLowering::LowerEH_SJLJ_LONGJMP(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ARMISD::EH_SJLJ_LONGJMP, dl, MVT::Other, Op.getOperand(0), Op.getOperand(1), DAG.getConstant(0, MVT::i32)); } @@ -2491,7 +2633,7 @@ SDValue ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) const { unsigned IntNo = cast(Op.getOperand(0))->getZExtValue(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); switch (IntNo) { default: return SDValue(); // Don't custom lower most intrinsics. case Intrinsic::arm_thread_pointer: { @@ -2527,7 +2669,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, case Intrinsic::arm_neon_vmullu: { unsigned NewOpc = (IntNo == Intrinsic::arm_neon_vmulls) ? ARMISD::VMULLs : ARMISD::VMULLu; - return DAG.getNode(NewOpc, Op.getDebugLoc(), Op.getValueType(), + return DAG.getNode(NewOpc, SDLoc(Op), Op.getValueType(), Op.getOperand(1), Op.getOperand(2)); } } @@ -2536,19 +2678,33 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG, static SDValue LowerATOMIC_FENCE(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *Subtarget) { // FIXME: handle "fence singlethread" more efficiently. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (!Subtarget->hasDataBarrier()) { // Some ARMv6 cpus can support data barriers with an mcr instruction. // Thumb1 and pre-v6 ARM mode use a libcall instead and should never get // here. assert(Subtarget->hasV6Ops() && !Subtarget->isThumb() && - "Unexpected ISD::MEMBARRIER encountered. Should be libcall!"); + "Unexpected ISD::ATOMIC_FENCE encountered. Should be libcall!"); return DAG.getNode(ARMISD::MEMBARRIER_MCR, dl, MVT::Other, Op.getOperand(0), DAG.getConstant(0, MVT::i32)); } - return DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0), - DAG.getConstant(ARM_MB::ISH, MVT::i32)); + ConstantSDNode *OrdN = cast(Op.getOperand(1)); + AtomicOrdering Ord = static_cast(OrdN->getZExtValue()); + unsigned Domain = ARM_MB::ISH; + if (Subtarget->isMClass()) { + // Only a full system barrier exists in the M-class architectures. + Domain = ARM_MB::SY; + } else if (Subtarget->isSwift() && Ord == Release) { + // Swift happens to implement ISHST barriers in a way that's compatible with + // Release semantics but weaker than ISH so we'd be fools not to use + // it. Beware: other processors probably don't! + Domain = ARM_MB::ISHST; + } + + return DAG.getNode(ISD::INTRINSIC_VOID, dl, MVT::Other, Op.getOperand(0), + DAG.getConstant(Intrinsic::arm_dmb, MVT::i32), + DAG.getConstant(Domain, MVT::i32)); } static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, @@ -2559,7 +2715,7 @@ static SDValue LowerPREFETCH(SDValue Op, SelectionDAG &DAG, // Just preserve the chain. return Op.getOperand(0); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned isRead = ~cast(Op.getOperand(2))->getZExtValue() & 1; if (!isRead && (!Subtarget->hasV7Ops() || !Subtarget->hasMPExtension())) @@ -2584,7 +2740,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { // vastart just stores the address of the VarArgsFrameIndex slot into the // memory location argument. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy(); SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsFrameIndex(), PtrVT); const Value *SV = cast(Op.getOperand(2))->getValue(); @@ -2595,7 +2751,7 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) { SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { MachineFunction &MF = DAG.getMachineFunction(); ARMFunctionInfo *AFI = MF.getInfo(); @@ -2630,6 +2786,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, void ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, unsigned InRegsParamRecordIdx, + unsigned ArgSize, unsigned &ArgRegsSize, unsigned &ArgRegsSaveSize) const { @@ -2648,7 +2805,29 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, unsigned Align = MF.getTarget().getFrameLowering()->getStackAlignment(); ArgRegsSize = NumGPRs * 4; - ArgRegsSaveSize = (ArgRegsSize + Align - 1) & ~(Align - 1); + + // If parameter is split between stack and GPRs... + if (NumGPRs && Align == 8 && + (ArgRegsSize < ArgSize || + InRegsParamRecordIdx >= CCInfo.getInRegsParamsCount())) { + // Add padding for part of param recovered from GPRs, so + // its last byte must be at address K*8 - 1. + // We need to do it, since remained (stack) part of parameter has + // stack alignment, and we need to "attach" "GPRs head" without gaps + // to it: + // Stack: + // |---- 8 bytes block ----| |---- 8 bytes block ----| |---- 8 bytes... + // [ [padding] [GPRs head] ] [ Tail passed via stack .... + // + ARMFunctionInfo *AFI = MF.getInfo(); + unsigned Padding = + ((ArgRegsSize + AFI->getArgRegsSaveSize() + Align - 1) & ~(Align-1)) - + (ArgRegsSize + AFI->getArgRegsSaveSize()); + ArgRegsSaveSize = ArgRegsSize + Padding; + } else + // We don't need to extend regs save size for byval parameters if they + // are passed via GPRs only. + ArgRegsSaveSize = ArgRegsSize; } // The remaining GPRs hold either the beginning of variable-argument @@ -2661,11 +2840,12 @@ ARMTargetLowering::computeRegArea(CCState &CCInfo, MachineFunction &MF, // Return: The frame index registers were stored into. int ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, unsigned OffsetFromOrigArg, unsigned ArgOffset, + unsigned ArgSize, bool ForceMutable) const { // Currently, two use-cases possible: @@ -2690,12 +2870,13 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, lastRegToSaveIndex = REnd - ARM::R0; } else { firstRegToSaveIndex = CCInfo.getFirstUnallocated - (GPRArgRegs, sizeof(GPRArgRegs) / sizeof(GPRArgRegs[0])); + (GPRArgRegs, array_lengthof(GPRArgRegs)); lastRegToSaveIndex = 4; } unsigned ArgRegsSize, ArgRegsSaveSize; - computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgRegsSize, ArgRegsSaveSize); + computeRegArea(CCInfo, MF, InRegsParamRecordIdx, ArgSize, + ArgRegsSize, ArgRegsSaveSize); // Store any by-val regs to their spots on the stack so that they may be // loaded by deferencing the result of formal parameter pointer or va_next. @@ -2703,9 +2884,17 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, // was initialized, it can't be initialized again. if (ArgRegsSaveSize) { + unsigned Padding = ArgRegsSaveSize - ArgRegsSize; + + if (Padding) { + assert(AFI->getStoredByValParamsPadding() == 0 && + "The only parameter may be padded."); + AFI->setStoredByValParamsPadding(Padding); + } + int FrameIndex = MFI->CreateFixedObject( ArgRegsSaveSize, - ArgOffset + ArgRegsSaveSize - ArgRegsSize, + Padding + ArgOffset, false); SDValue FIN = DAG.getFrameIndex(FrameIndex, getPointerTy()); @@ -2737,13 +2926,14 @@ ARMTargetLowering::StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, return FrameIndex; } else // This will point to the next argument passed via stack. - return MFI->CreateFixedObject(4, ArgOffset, !ForceMutable); + return MFI->CreateFixedObject( + 4, AFI->getStoredByValParamsPadding() + ArgOffset, !ForceMutable); } // Setup stack frame, the va_list pointer will start from. void ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, unsigned ArgOffset, bool ForceMutable) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -2756,7 +2946,7 @@ ARMTargetLowering::VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, // argument passed via stack. int FrameIndex = StoreByValRegs(CCInfo, DAG, dl, Chain, 0, CCInfo.getInRegsParamsCount(), - 0, ArgOffset, ForceMutable); + 0, ArgOffset, 0, ForceMutable); AFI->setVarArgsFrameIndex(FrameIndex); } @@ -2766,7 +2956,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); @@ -2896,12 +3086,15 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain, CurByValIndex, Ins[VA.getValNo()].PartOffset, VA.getLocMemOffset(), + Flags.getByValSize(), true /*force mutable frames*/); InVals.push_back(DAG.getFrameIndex(FrameIndex, getPointerTy())); CCInfo.nextInRegsParam(); } else { + unsigned FIOffset = VA.getLocMemOffset() + + AFI->getStoredByValParamsPadding(); int FI = MFI->CreateFixedObject(VA.getLocVT().getSizeInBits()/8, - VA.getLocMemOffset(), true); + FIOffset, true); // Create load nodes to retrieve arguments from the stack. SDValue FIN = DAG.getFrameIndex(FI, getPointerTy()); @@ -2943,7 +3136,7 @@ static bool isFloatingPointZero(SDValue Op) { SDValue ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, SDValue &ARMcc, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { if (ConstantSDNode *RHSC = dyn_cast(RHS.getNode())) { unsigned C = RHSC->getZExtValue(); if (!isLegalICmpImmediate(C)) { @@ -3001,7 +3194,7 @@ ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, /// Returns a appropriate VFP CMP (fcmp{s|d}+fmstat) for the given operands. SDValue ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) const { + SDLoc dl) const { SDValue Cmp; if (!isFloatingPointZero(RHS)) Cmp = DAG.getNode(ARMISD::CMPFP, dl, MVT::Glue, LHS, RHS); @@ -3015,7 +3208,7 @@ ARMTargetLowering::getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG, SDValue ARMTargetLowering::duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const { unsigned Opc = Cmp.getOpcode(); - DebugLoc DL = Cmp.getDebugLoc(); + SDLoc DL(Cmp); if (Opc == ARMISD::CMP || Opc == ARMISD::CMPZ) return DAG.getNode(Opc, DL, MVT::Glue, Cmp.getOperand(0),Cmp.getOperand(1)); @@ -3035,7 +3228,7 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SDValue Cond = Op.getOperand(0); SDValue SelectTrue = Op.getOperand(1); SDValue SelectFalse = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); // Convert: // @@ -3083,6 +3276,61 @@ SDValue ARMTargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) const { SelectTrue, SelectFalse, ISD::SETNE); } +static ISD::CondCode getInverseCCForVSEL(ISD::CondCode CC) { + if (CC == ISD::SETNE) + return ISD::SETEQ; + return ISD::getSetCCSwappedOperands(CC); +} + +static void checkVSELConstraints(ISD::CondCode CC, ARMCC::CondCodes &CondCode, + bool &swpCmpOps, bool &swpVselOps) { + // Start by selecting the GE condition code for opcodes that return true for + // 'equality' + if (CC == ISD::SETUGE || CC == ISD::SETOGE || CC == ISD::SETOLE || + CC == ISD::SETULE) + CondCode = ARMCC::GE; + + // and GT for opcodes that return false for 'equality'. + else if (CC == ISD::SETUGT || CC == ISD::SETOGT || CC == ISD::SETOLT || + CC == ISD::SETULT) + CondCode = ARMCC::GT; + + // Since we are constrained to GE/GT, if the opcode contains 'less', we need + // to swap the compare operands. + if (CC == ISD::SETOLE || CC == ISD::SETULE || CC == ISD::SETOLT || + CC == ISD::SETULT) + swpCmpOps = true; + + // Both GT and GE are ordered comparisons, and return false for 'unordered'. + // If we have an unordered opcode, we need to swap the operands to the VSEL + // instruction (effectively negating the condition). + // + // This also has the effect of swapping which one of 'less' or 'greater' + // returns true, so we also swap the compare operands. It also switches + // whether we return true for 'equality', so we compensate by picking the + // opposite condition code to our original choice. + if (CC == ISD::SETULE || CC == ISD::SETULT || CC == ISD::SETUGE || + CC == ISD::SETUGT) { + swpCmpOps = !swpCmpOps; + swpVselOps = !swpVselOps; + CondCode = CondCode == ARMCC::GT ? ARMCC::GE : ARMCC::GT; + } + + // 'ordered' is 'anything but unordered', so use the VS condition code and + // swap the VSEL operands. + if (CC == ISD::SETO) { + CondCode = ARMCC::VS; + swpVselOps = true; + } + + // 'unordered or not equal' is 'anything but equal', so use the EQ condition + // code and swap the VSEL operands. + if (CC == ISD::SETUNE) { + CondCode = ARMCC::EQ; + swpVselOps = true; + } +} + SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { EVT VT = Op.getValueType(); SDValue LHS = Op.getOperand(0); @@ -3090,18 +3338,69 @@ SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) const { ISD::CondCode CC = cast(Op.getOperand(4))->get(); SDValue TrueVal = Op.getOperand(2); SDValue FalseVal = Op.getOperand(3); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (LHS.getValueType() == MVT::i32) { + // Try to generate VSEL on ARMv8. + // The VSEL instruction can't use all the usual ARM condition + // codes: it only has two bits to select the condition code, so it's + // constrained to use only GE, GT, VS and EQ. + // + // To implement all the various ISD::SETXXX opcodes, we sometimes need to + // swap the operands of the previous compare instruction (effectively + // inverting the compare condition, swapping 'less' and 'greater') and + // sometimes need to swap the operands to the VSEL (which inverts the + // condition in the sense of firing whenever the previous condition didn't) + if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { + ARMCC::CondCodes CondCode = IntCCToARMCC(CC); + if (CondCode == ARMCC::LT || CondCode == ARMCC::LE || + CondCode == ARMCC::VC || CondCode == ARMCC::NE) { + CC = getInverseCCForVSEL(CC); + std::swap(TrueVal, FalseVal); + } + } + SDValue ARMcc; SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMcc, DAG, dl); - return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR,Cmp); + return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMcc, CCR, + Cmp); } ARMCC::CondCodes CondCode, CondCode2; FPCCToARMCC(CC, CondCode, CondCode2); + // Try to generate VSEL on ARMv8. + if (getSubtarget()->hasFPARMv8() && (TrueVal.getValueType() == MVT::f32 || + TrueVal.getValueType() == MVT::f64)) { + // We can select VMAXNM/VMINNM from a compare followed by a select with the + // same operands, as follows: + // c = fcmp [ogt, olt, ugt, ult] a, b + // select c, a, b + // We only do this in unsafe-fp-math, because signed zeros and NaNs are + // handled differently than the original code sequence. + if (getTargetMachine().Options.UnsafeFPMath && LHS == TrueVal && + RHS == FalseVal) { + if (CC == ISD::SETOGT || CC == ISD::SETUGT) + return DAG.getNode(ARMISD::VMAXNM, dl, VT, TrueVal, FalseVal); + if (CC == ISD::SETOLT || CC == ISD::SETULT) + return DAG.getNode(ARMISD::VMINNM, dl, VT, TrueVal, FalseVal); + } + + bool swpCmpOps = false; + bool swpVselOps = false; + checkVSELConstraints(CC, CondCode, swpCmpOps, swpVselOps); + + if (CondCode == ARMCC::GT || CondCode == ARMCC::GE || + CondCode == ARMCC::VS || CondCode == ARMCC::EQ) { + if (swpCmpOps) + std::swap(LHS, RHS); + if (swpVselOps) + std::swap(TrueVal, FalseVal); + } + } + SDValue ARMcc = DAG.getConstant(CondCode, MVT::i32); SDValue Cmp = getVFPCmp(LHS, RHS, DAG, dl); SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32); @@ -3145,7 +3444,7 @@ static SDValue bitcastf32Toi32(SDValue Op, SelectionDAG &DAG) { return DAG.getConstant(0, MVT::i32); if (LoadSDNode *Ld = dyn_cast(Op)) - return DAG.getLoad(MVT::i32, Op.getDebugLoc(), + return DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ld->getBasePtr(), Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), Ld->isInvariant(), Ld->getAlignment()); @@ -3163,7 +3462,7 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, if (LoadSDNode *Ld = dyn_cast(Op)) { SDValue Ptr = Ld->getBasePtr(); - RetVal1 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), + RetVal1 = DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), Ptr, Ld->getPointerInfo(), Ld->isVolatile(), Ld->isNonTemporal(), @@ -3171,9 +3470,9 @@ static void expandf64Toi32(SDValue Op, SelectionDAG &DAG, EVT PtrType = Ptr.getValueType(); unsigned NewAlign = MinAlign(Ld->getAlignment(), 4); - SDValue NewPtr = DAG.getNode(ISD::ADD, Op.getDebugLoc(), + SDValue NewPtr = DAG.getNode(ISD::ADD, SDLoc(Op), PtrType, Ptr, DAG.getConstant(4, PtrType)); - RetVal2 = DAG.getLoad(MVT::i32, Op.getDebugLoc(), + RetVal2 = DAG.getLoad(MVT::i32, SDLoc(Op), Ld->getChain(), NewPtr, Ld->getPointerInfo().getWithOffset(4), Ld->isVolatile(), Ld->isNonTemporal(), @@ -3193,7 +3492,7 @@ ARMTargetLowering::OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); bool LHSSeenZero = false; bool LHSOk = canChangeToInt(LHS, LHSSeenZero, Subtarget); @@ -3243,7 +3542,7 @@ SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) const { SDValue LHS = Op.getOperand(2); SDValue RHS = Op.getOperand(3); SDValue Dest = Op.getOperand(4); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (LHS.getValueType() == MVT::i32) { SDValue ARMcc; @@ -3284,7 +3583,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { SDValue Chain = Op.getOperand(0); SDValue Table = Op.getOperand(1); SDValue Index = Op.getOperand(2); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT PTy = getPointerTy(); JumpTableSDNode *JT = cast(Table); @@ -3320,7 +3619,7 @@ SDValue ARMTargetLowering::LowerBR_JT(SDValue Op, SelectionDAG &DAG) const { static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Op.getValueType().getVectorElementType() == MVT::i32) { if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::f32) @@ -3342,7 +3641,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { if (VT.isVector()) return LowerVectorFP_TO_INT(Op, DAG); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Opc; switch (Op.getOpcode()) { @@ -3360,7 +3659,7 @@ static SDValue LowerFP_TO_INT(SDValue Op, SelectionDAG &DAG) { static SDValue LowerVectorINT_TO_FP(SDValue Op, SelectionDAG &DAG) { EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Op.getOperand(0).getValueType().getVectorElementType() == MVT::i32) { if (VT.getVectorElementType() == MVT::f32) @@ -3396,7 +3695,7 @@ static SDValue LowerINT_TO_FP(SDValue Op, SelectionDAG &DAG) { if (VT.isVector()) return LowerVectorINT_TO_FP(Op, DAG); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Opc; switch (Op.getOpcode()) { @@ -3417,7 +3716,7 @@ SDValue ARMTargetLowering::LowerFCOPYSIGN(SDValue Op, SelectionDAG &DAG) const { // Implement fcopysign with a fabs and a conditional fneg. SDValue Tmp0 = Op.getOperand(0); SDValue Tmp1 = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); EVT SrcVT = Tmp1.getValueType(); bool InGPR = Tmp0.getOpcode() == ISD::BITCAST || @@ -3501,7 +3800,7 @@ SDValue ARMTargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) const{ MFI->setReturnAddressIsTaken(true); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); if (Depth) { SDValue FrameAddr = LowerFRAMEADDR(Op, DAG); @@ -3521,7 +3820,7 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { MFI->setFrameAddressIsTaken(true); EVT VT = Op.getValueType(); - DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful + SDLoc dl(Op); // FIXME probably not meaningful unsigned Depth = cast(Op.getOperand(0))->getZExtValue(); unsigned FrameReg = (Subtarget->isThumb() || Subtarget->isTargetDarwin()) ? ARM::R7 : ARM::R11; @@ -3533,47 +3832,6 @@ SDValue ARMTargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) const { return FrameAddr; } -/// Custom Expand long vector extensions, where size(DestVec) > 2*size(SrcVec), -/// and size(DestVec) > 128-bits. -/// This is achieved by doing the one extension from the SrcVec, splitting the -/// result, extending these parts, and then concatenating these into the -/// destination. -static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { - SDValue Op = N->getOperand(0); - EVT SrcVT = Op.getValueType(); - EVT DestVT = N->getValueType(0); - - assert(DestVT.getSizeInBits() > 128 && - "Custom sext/zext expansion needs >128-bit vector."); - // If this is a normal length extension, use the default expansion. - if (SrcVT.getSizeInBits()*4 != DestVT.getSizeInBits() && - SrcVT.getSizeInBits()*8 != DestVT.getSizeInBits()) - return SDValue(); - - DebugLoc dl = N->getDebugLoc(); - unsigned SrcEltSize = SrcVT.getVectorElementType().getSizeInBits(); - unsigned DestEltSize = DestVT.getVectorElementType().getSizeInBits(); - unsigned NumElts = SrcVT.getVectorNumElements(); - LLVMContext &Ctx = *DAG.getContext(); - SDValue Mid, SplitLo, SplitHi, ExtLo, ExtHi; - - EVT MidVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), - NumElts); - EVT SplitVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, SrcEltSize*2), - NumElts/2); - EVT ExtVT = EVT::getVectorVT(Ctx, EVT::getIntegerVT(Ctx, DestEltSize), - NumElts/2); - - Mid = DAG.getNode(N->getOpcode(), dl, MidVT, Op); - SplitLo = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, - DAG.getIntPtrConstant(0)); - SplitHi = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, SplitVT, Mid, - DAG.getIntPtrConstant(NumElts/2)); - ExtLo = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitLo); - ExtHi = DAG.getNode(N->getOpcode(), dl, ExtVT, SplitHi); - return DAG.getNode(ISD::CONCAT_VECTORS, dl, DestVT, ExtLo, ExtHi); -} - /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -3581,7 +3839,7 @@ static SDValue ExpandVectorExtension(SDNode *N, SelectionDAG &DAG) { /// vectors), since the legalizer won't know what to do with that. static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue Op = N->getOperand(0); // This function is only supposed to be called for i64 types, either as the @@ -3618,7 +3876,7 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { /// not support i64 elements, so sometimes the zero vectors will need to be /// explicitly constructed. Regardless, use a canonical VMOV to create the /// zero vector. -static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) { +static SDValue getZeroVector(EVT VT, SelectionDAG &DAG, SDLoc dl) { assert(VT.isVector() && "Expected a vector type"); // The canonical modified immediate encoding of a zero vector is....0! SDValue EncodedVal = DAG.getTargetConstant(0, MVT::i32); @@ -3634,7 +3892,7 @@ SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); @@ -3670,7 +3928,7 @@ SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, assert(Op.getNumOperands() == 3 && "Not a double-shift!"); EVT VT = Op.getValueType(); unsigned VTBits = VT.getSizeInBits(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue ShOpLo = Op.getOperand(0); SDValue ShOpHi = Op.getOperand(1); SDValue ShAmt = Op.getOperand(2); @@ -3703,7 +3961,7 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, // The ARM rounding mode value to FLT_ROUNDS mapping is 0->1, 1->2, 2->3, 3->0 // The formula we use to implement this is (((FPSCR + 1 << 22) >> 22) & 3) // so that the shift + and get folded into a bitfield extract. - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue FPSCR = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, dl, MVT::i32, DAG.getConstant(Intrinsic::arm_get_fpscr, MVT::i32)); @@ -3718,7 +3976,7 @@ SDValue ARMTargetLowering::LowerFLT_ROUNDS_(SDValue Op, static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (!ST->hasV6T2Ops()) return SDValue(); @@ -3742,7 +4000,7 @@ static SDValue LowerCTTZ(SDNode *N, SelectionDAG &DAG, /// vuzp: = [k0 k1 k2 k3 k0 k1 k2 k3] each ki is 8-bits) static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT VT8Bit = VT.is64BitVector() ? MVT::v8i8 : MVT::v16i8; SDValue N0 = DAG.getNode(ISD::BITCAST, DL, VT8Bit, N->getOperand(0)); @@ -3764,7 +4022,7 @@ static SDValue getCTPOP16BitCounts(SDNode *N, SelectionDAG &DAG) { /// v4i16:Extracted = [k0 k1 k2 k3 ] static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue BitCounts = getCTPOP16BitCounts(N, DAG); if (VT.is64BitVector()) { @@ -3799,7 +4057,7 @@ static SDValue lowerCTPOP16BitElements(SDNode *N, SelectionDAG &DAG) { /// static SDValue lowerCTPOP32BitElements(SDNode *N, SelectionDAG &DAG) { EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); EVT VT16Bit = VT.is64BitVector() ? MVT::v4i16 : MVT::v8i16; @@ -3838,7 +4096,7 @@ static SDValue LowerCTPOP(SDNode *N, SelectionDAG &DAG, static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); if (!VT.isVector()) return SDValue(); @@ -3873,7 +4131,7 @@ static SDValue LowerShift(SDNode *N, SelectionDAG &DAG, static SDValue Expand64BitShift(SDNode *N, SelectionDAG &DAG, const ARMSubtarget *ST) { EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); // We can get here for a node like i32 = ISD::SHL i32, i64 if (VT != MVT::i64) @@ -3919,7 +4177,7 @@ static SDValue LowerVSETCC(SDValue Op, SelectionDAG &DAG) { SDValue CC = Op.getOperand(2); EVT VT = Op.getValueType(); ISD::CondCode SetCCOpcode = cast(CC)->get(); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); if (Op.getOperand(1).getValueType().isFloatingPoint()) { switch (SetCCOpcode) { @@ -4177,18 +4435,26 @@ static SDValue isNEONModifiedImm(uint64_t SplatBits, uint64_t SplatUndef, SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { - if (!ST->useNEONForSinglePrecisionFP() || !ST->hasVFP3() || ST->hasD16()) + if (!ST->hasVFP3()) return SDValue(); + bool IsDouble = Op.getValueType() == MVT::f64; ConstantFPSDNode *CFP = cast(Op); - assert(Op.getValueType() == MVT::f32 && - "ConstantFP custom lowering should only occur for f32."); // Try splatting with a VMOV.f32... APFloat FPVal = CFP->getValueAPF(); - int ImmVal = ARM_AM::getFP32Imm(FPVal); + int ImmVal = IsDouble ? ARM_AM::getFP64Imm(FPVal) : ARM_AM::getFP32Imm(FPVal); + if (ImmVal != -1) { - DebugLoc DL = Op.getDebugLoc(); + if (IsDouble || !ST->useNEONForSinglePrecisionFP()) { + // We have code in place to select a valid ConstantFP already, no need to + // do any mangling. + return Op; + } + + // It's a float and we are trying to use NEON operations where + // possible. Lower it to a splat followed by an extract. + SDLoc DL(Op); SDValue NewVal = DAG.getTargetConstant(ImmVal, MVT::i32); SDValue VecConstant = DAG.getNode(ARMISD::VMOVFPIMM, DL, MVT::v2f32, NewVal); @@ -4196,15 +4462,31 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, DAG.getConstant(0, MVT::i32)); } - // If that fails, try a VMOV.i32 + // The rest of our options are NEON only, make sure that's allowed before + // proceeding.. + if (!ST->hasNEON() || (!IsDouble && !ST->useNEONForSinglePrecisionFP())) + return SDValue(); + EVT VMovVT; - unsigned iVal = FPVal.bitcastToAPInt().getZExtValue(); - SDValue NewVal = isNEONModifiedImm(iVal, 0, 32, DAG, VMovVT, false, - VMOVModImm); + uint64_t iVal = FPVal.bitcastToAPInt().getZExtValue(); + + // It wouldn't really be worth bothering for doubles except for one very + // important value, which does happen to match: 0.0. So make sure we don't do + // anything stupid. + if (IsDouble && (iVal & 0xffffffff) != (iVal >> 32)) + return SDValue(); + + // Try a VMOV.i32 (FIXME: i8, i16, or i64 could work too). + SDValue NewVal = isNEONModifiedImm(iVal & 0xffffffffU, 0, 32, DAG, VMovVT, + false, VMOVModImm); if (NewVal != SDValue()) { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMOVIMM, DL, VMovVT, NewVal); + if (IsDouble) + return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); + + // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, @@ -4212,11 +4494,16 @@ SDValue ARMTargetLowering::LowerConstantFP(SDValue Op, SelectionDAG &DAG, } // Finally, try a VMVN.i32 - NewVal = isNEONModifiedImm(~iVal & 0xffffffff, 0, 32, DAG, VMovVT, false, - VMVNModImm); + NewVal = isNEONModifiedImm(~iVal & 0xffffffffU, 0, 32, DAG, VMovVT, + false, VMVNModImm); if (NewVal != SDValue()) { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue VecConstant = DAG.getNode(ARMISD::VMVNIMM, DL, VMovVT, NewVal); + + if (IsDouble) + return DAG.getNode(ISD::BITCAST, DL, MVT::f64, VecConstant); + + // It's a float: cast and extract a vector element. SDValue VecFConstant = DAG.getNode(ISD::BITCAST, DL, MVT::v2f32, VecConstant); return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, MVT::f32, VecFConstant, @@ -4475,7 +4762,7 @@ static bool isReverseMask(ArrayRef M, EVT VT) { // instruction, return an SDValue of such a constant (will become a MOV // instruction). Otherwise return null. static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, - const ARMSubtarget *ST, DebugLoc dl) { + const ARMSubtarget *ST, SDLoc dl) { uint64_t Val; if (!isa(N)) return SDValue(); @@ -4496,7 +4783,7 @@ static SDValue IsSingleInstrConstant(SDValue N, SelectionDAG &DAG, SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const { BuildVectorSDNode *BVN = cast(Op.getNode()); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); APInt SplatBits, SplatUndef; @@ -4580,7 +4867,9 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, if (ValueCounts.size() == 0) return DAG.getUNDEF(VT); - if (isOnlyLowElement) + // Loads are better lowered with insert_vector_elt/ARMISD::BUILD_VECTOR. + // Keep going if we are hitting this case. + if (isOnlyLowElement && !ISD::isNormalLoad(Value.getNode())) return DAG.getNode(ISD::SCALAR_TO_VECTOR, dl, VT, Value); unsigned EltSize = VT.getVectorElementType().getSizeInBits(); @@ -4679,6 +4968,24 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, return DAG.getNode(ISD::BITCAST, dl, VT, Val); } + // If all else fails, just use a sequence of INSERT_VECTOR_ELT when we + // know the default expansion would otherwise fall back on something even + // worse. For a vector with one or two non-undef values, that's + // scalar_to_vector for the elements followed by a shuffle (provided the + // shuffle is valid for the target) and materialization element by element + // on the stack followed by a load for everything else. + if (!isConstant && !usesOnlyOneValue) { + SDValue Vec = DAG.getUNDEF(VT); + for (unsigned i = 0 ; i < NumElts; ++i) { + SDValue V = Op.getOperand(i); + if (V.getOpcode() == ISD::UNDEF) + continue; + SDValue LaneIdx = DAG.getConstant(i, MVT::i32); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VT, Vec, V, LaneIdx); + } + return Vec; + } + return SDValue(); } @@ -4686,7 +4993,7 @@ SDValue ARMTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, // shuffle in combination with VEXTs. SDValue ARMTargetLowering::ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); unsigned NumElts = VT.getVectorNumElements(); @@ -4875,7 +5182,7 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl &M, /// the specified operations to build the shuffle. static SDValue GeneratePerfectShuffle(unsigned PFEntry, SDValue LHS, SDValue RHS, SelectionDAG &DAG, - DebugLoc dl) { + SDLoc dl) { unsigned OpNum = (PFEntry >> 26) & 0x0F; unsigned LHSID = (PFEntry >> 13) & ((1 << 13)-1); unsigned RHSID = (PFEntry >> 0) & ((1 << 13)-1); @@ -4955,7 +5262,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, // Check to see if we can use the VTBL instruction. SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SmallVector VTBLMask; for (ArrayRef::iterator @@ -4974,7 +5281,7 @@ static SDValue LowerVECTOR_SHUFFLEv8i8(SDValue Op, static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, SelectionDAG &DAG) { - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue OpLHS = Op.getOperand(0); EVT VT = OpLHS.getValueType(); @@ -4992,7 +5299,7 @@ static SDValue LowerReverse_VECTOR_SHUFFLEv16i8_v8i16(SDValue Op, static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) { SDValue V1 = Op.getOperand(0); SDValue V2 = Op.getOperand(1); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); EVT VT = Op.getValueType(); ShuffleVectorSDNode *SVN = cast(Op.getNode()); @@ -5156,7 +5463,7 @@ static SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) { SDValue Vec = Op.getOperand(0); if (Op.getValueType() == MVT::i32 && Vec.getValueType().getVectorElementType().getSizeInBits() < 32) { - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); return DAG.getNode(ARMISD::VGETLANEu, dl, MVT::i32, Vec, Lane); } @@ -5168,7 +5475,7 @@ static SDValue LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) { // two 64-bit vectors are concatenated to a 128-bit vector. assert(Op.getValueType().is128BitVector() && Op.getNumOperands() == 2 && "unexpected CONCAT_VECTORS"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue Val = DAG.getUNDEF(MVT::v2f64); SDValue Op0 = Op.getOperand(0); SDValue Op1 = Op.getOperand(1); @@ -5291,7 +5598,7 @@ static SDValue AddRequiredExtensionForVMULL(SDValue N, SelectionDAG &DAG, // Must extend size to at least 64 bits to be used as an operand for VMULL. EVT NewVT = getExtensionTo64Bits(OrigTy); - return DAG.getNode(ExtOpcode, N->getDebugLoc(), NewVT, N); + return DAG.getNode(ExtOpcode, SDLoc(N), NewVT, N); } /// SkipLoadExtensionForVMULL - return a load of the original vector size that @@ -5304,7 +5611,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // The load already has the right type. if (ExtendedTy == LD->getMemoryVT()) - return DAG.getLoad(LD->getMemoryVT(), LD->getDebugLoc(), LD->getChain(), + return DAG.getLoad(LD->getMemoryVT(), SDLoc(LD), LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), LD->isInvariant(), LD->getAlignment()); @@ -5312,7 +5619,7 @@ static SDValue SkipLoadExtensionForVMULL(LoadSDNode *LD, SelectionDAG& DAG) { // We need to create a zextload/sextload. We cannot just create a load // followed by a zext/zext node because LowerMUL is also run during normal // operation legalization where we can't create illegal types. - return DAG.getExtLoad(LD->getExtensionType(), LD->getDebugLoc(), ExtendedTy, + return DAG.getExtLoad(LD->getExtensionType(), SDLoc(LD), ExtendedTy, LD->getChain(), LD->getBasePtr(), LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), LD->getAlignment()); @@ -5341,7 +5648,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { assert(BVN->getOpcode() == ISD::BUILD_VECTOR && BVN->getValueType(0) == MVT::v4i32 && "expected v4i32 BUILD_VECTOR"); unsigned LowElt = DAG.getTargetLoweringInfo().isBigEndian() ? 1 : 0; - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), MVT::v2i32, + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::v2i32, BVN->getOperand(LowElt), BVN->getOperand(LowElt+2)); } // Construct a new BUILD_VECTOR with elements truncated to half the size. @@ -5358,7 +5665,7 @@ static SDValue SkipExtensionForVMULL(SDNode *N, SelectionDAG &DAG) { // The values are implicitly truncated so sext vs. zext doesn't matter. Ops.push_back(DAG.getConstant(CInt.zextOrTrunc(32), MVT::i32)); } - return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), + return DAG.getNode(ISD::BUILD_VECTOR, SDLoc(N), MVT::getVectorVT(TruncVT, NumElts), Ops.data(), NumElts); } @@ -5430,7 +5737,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { } // Legalize to a VMULL instruction. - DebugLoc DL = Op.getDebugLoc(); + SDLoc DL(Op); SDValue Op0; SDValue Op1 = SkipExtensionForVMULL(N1, DAG); if (!isMLA) { @@ -5460,7 +5767,7 @@ static SDValue LowerMUL(SDValue Op, SelectionDAG &DAG) { } static SDValue -LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { +LowerSDIV_v4i8(SDValue X, SDValue Y, SDLoc dl, SelectionDAG &DAG) { // Convert to float // float4 xf = vcvt_f32_s32(vmovl_s16(a.lo)); // float4 yf = vcvt_f32_s32(vmovl_s16(b.lo)); @@ -5489,7 +5796,7 @@ LowerSDIV_v4i8(SDValue X, SDValue Y, DebugLoc dl, SelectionDAG &DAG) { } static SDValue -LowerSDIV_v4i16(SDValue N0, SDValue N1, DebugLoc dl, SelectionDAG &DAG) { +LowerSDIV_v4i16(SDValue N0, SDValue N1, SDLoc dl, SelectionDAG &DAG) { SDValue N2; // Convert to float. // float4 yf = vcvt_f32_s32(vmovl_s16(y)); @@ -5530,7 +5837,7 @@ static SDValue LowerSDIV(SDValue Op, SelectionDAG &DAG) { assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::SDIV"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; @@ -5565,7 +5872,7 @@ static SDValue LowerUDIV(SDValue Op, SelectionDAG &DAG) { assert((VT == MVT::v4i16 || VT == MVT::v8i8) && "unexpected type for custom-lowering ISD::UDIV"); - DebugLoc dl = Op.getDebugLoc(); + SDLoc dl(Op); SDValue N0 = Op.getOperand(0); SDValue N1 = Op.getOperand(1); SDValue N2, N3; @@ -5649,12 +5956,76 @@ static SDValue LowerADDC_ADDE_SUBC_SUBE(SDValue Op, SelectionDAG &DAG) { } if (!ExtraOp) - return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1)); - return DAG.getNode(Opc, Op->getDebugLoc(), VTs, Op.getOperand(0), + return DAG.getNode(Opc, SDLoc(Op), VTs, Op.getOperand(0), Op.getOperand(1), Op.getOperand(2)); } +SDValue ARMTargetLowering::LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetDarwin()); + + // For iOS, we want to call an alternative entry point: __sincos_stret, + // return values are passed via sret. + SDLoc dl(Op); + SDValue Arg = Op.getOperand(0); + EVT ArgVT = Arg.getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + + MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo(); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + + // Pair of floats / doubles used to pass the result. + StructType *RetTy = StructType::get(ArgTy, ArgTy, NULL); + + // Create stack object for sret. + const uint64_t ByteSize = TLI.getDataLayout()->getTypeAllocSize(RetTy); + const unsigned StackAlign = TLI.getDataLayout()->getPrefTypeAlignment(RetTy); + int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false); + SDValue SRet = DAG.getFrameIndex(FrameIdx, TLI.getPointerTy()); + + ArgListTy Args; + ArgListEntry Entry; + + Entry.Node = SRet; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Entry.isSRet = true; + Args.push_back(Entry); + + Entry.Node = Arg; + Entry.Ty = ArgTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + const char *LibcallName = (ArgVT == MVT::f64) + ? "__sincos_stret" : "__sincosf_stret"; + SDValue Callee = DAG.getExternalSymbol(LibcallName, getPointerTy()); + + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), Type::getVoidTy(*DAG.getContext()), + false, false, false, false, 0, + CallingConv::C, /*isTaillCall=*/false, + /*doesNotRet=*/false, /*isReturnValueUsed*/false, + Callee, Args, DAG, dl); + std::pair CallResult = LowerCallTo(CLI); + + SDValue LoadSin = DAG.getLoad(ArgVT, dl, CallResult.second, SRet, + MachinePointerInfo(), false, false, false, 0); + + // Address of cos field. + SDValue Add = DAG.getNode(ISD::ADD, dl, getPointerTy(), SRet, + DAG.getIntPtrConstant(ArgVT.getStoreSize())); + SDValue LoadCos = DAG.getLoad(ArgVT, dl, LoadSin.getValue(1), Add, + MachinePointerInfo(), false, false, false, 0); + + SDVTList Tys = DAG.getVTList(ArgVT, ArgVT); + return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, + LoadSin.getValue(0), LoadCos.getValue(0)); +} + static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { // Monotonic load/store is legal for all targets if (cast(Op)->getOrdering() <= Monotonic) @@ -5665,40 +6036,73 @@ static SDValue LowerAtomicLoadStore(SDValue Op, SelectionDAG &DAG) { return SDValue(); } - static void ReplaceATOMIC_OP_64(SDNode *Node, SmallVectorImpl& Results, - SelectionDAG &DAG, unsigned NewOp) { - DebugLoc dl = Node->getDebugLoc(); + SelectionDAG &DAG) { + SDLoc dl(Node); assert (Node->getValueType(0) == MVT::i64 && "Only know how to expand i64 atomics"); + AtomicSDNode *AN = cast(Node); SmallVector Ops; Ops.push_back(Node->getOperand(0)); // Chain Ops.push_back(Node->getOperand(1)); // Ptr - // Low part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(0))); - // High part of Val1 - Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(2), DAG.getIntPtrConstant(1))); - if (NewOp == ARMISD::ATOMCMPXCHG64_DAG) { - // High part of Val1 + for(unsigned i=2; igetNumOperands(); i++) { + // Low part Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(3), DAG.getIntPtrConstant(0))); - // High part of Val2 + Node->getOperand(i), DAG.getIntPtrConstant(0))); + // High part Ops.push_back(DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, - Node->getOperand(3), DAG.getIntPtrConstant(1))); + Node->getOperand(i), DAG.getIntPtrConstant(1))); } SDVTList Tys = DAG.getVTList(MVT::i32, MVT::i32, MVT::Other); SDValue Result = - DAG.getMemIntrinsicNode(NewOp, dl, Tys, Ops.data(), Ops.size(), MVT::i64, - cast(Node)->getMemOperand()); + DAG.getAtomic(Node->getOpcode(), dl, MVT::i64, Tys, Ops.data(), Ops.size(), + cast(Node)->getMemOperand(), AN->getOrdering(), + AN->getSynchScope()); SDValue OpsF[] = { Result.getValue(0), Result.getValue(1) }; Results.push_back(DAG.getNode(ISD::BUILD_PAIR, dl, MVT::i64, OpsF, 2)); Results.push_back(Result.getValue(2)); } +static void ReplaceREADCYCLECOUNTER(SDNode *N, + SmallVectorImpl &Results, + SelectionDAG &DAG, + const ARMSubtarget *Subtarget) { + SDLoc DL(N); + SDValue Cycles32, OutChain; + + if (Subtarget->hasPerfMon()) { + // Under Power Management extensions, the cycle-count is: + // mrc p15, #0, , c9, c13, #0 + SDValue Ops[] = { N->getOperand(0), // Chain + DAG.getConstant(Intrinsic::arm_mrc, MVT::i32), + DAG.getConstant(15, MVT::i32), + DAG.getConstant(0, MVT::i32), + DAG.getConstant(9, MVT::i32), + DAG.getConstant(13, MVT::i32), + DAG.getConstant(0, MVT::i32) + }; + + Cycles32 = DAG.getNode(ISD::INTRINSIC_W_CHAIN, DL, + DAG.getVTList(MVT::i32, MVT::Other), &Ops[0], + array_lengthof(Ops)); + OutChain = Cycles32.getValue(1); + } else { + // Intrinsic is defined to return 0 on unsupported platforms. Technically + // there are older ARM CPUs that have implementation-specific ways of + // obtaining this information (FIXME!). + Cycles32 = DAG.getConstant(0, MVT::i32); + OutChain = DAG.getEntryNode(); + } + + + SDValue Cycles64 = DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, + Cycles32, DAG.getConstant(0, MVT::i32)); + Results.push_back(Cycles64); + Results.push_back(OutChain); +} + SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { switch (Op.getOpcode()) { default: llvm_unreachable("Don't know how to custom lower this!"); @@ -5753,6 +6157,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::SUBE: return LowerADDC_ADDE_SUBC_SUBE(Op, DAG); case ISD::ATOMIC_LOAD: case ISD::ATOMIC_STORE: return LowerAtomicLoadStore(Op, DAG); + case ISD::FSINCOS: return LowerFSINCOS(Op, DAG); + case ISD::SDIVREM: + case ISD::UDIVREM: return LowerDivRem(Op, DAG); } } @@ -5768,49 +6175,28 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N, case ISD::BITCAST: Res = ExpandBITCAST(N, DAG); break; - case ISD::SIGN_EXTEND: - case ISD::ZERO_EXTEND: - Res = ExpandVectorExtension(N, DAG); - break; case ISD::SRL: case ISD::SRA: Res = Expand64BitShift(N, DAG, Subtarget); break; - case ISD::ATOMIC_LOAD_ADD: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMADD64_DAG); + case ISD::READCYCLECOUNTER: + ReplaceREADCYCLECOUNTER(N, Results, DAG, Subtarget); return; + case ISD::ATOMIC_STORE: + case ISD::ATOMIC_LOAD: + case ISD::ATOMIC_LOAD_ADD: case ISD::ATOMIC_LOAD_AND: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMAND64_DAG); - return; case ISD::ATOMIC_LOAD_NAND: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMNAND64_DAG); - return; case ISD::ATOMIC_LOAD_OR: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMOR64_DAG); - return; case ISD::ATOMIC_LOAD_SUB: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSUB64_DAG); - return; case ISD::ATOMIC_LOAD_XOR: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMXOR64_DAG); - return; case ISD::ATOMIC_SWAP: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMSWAP64_DAG); - return; case ISD::ATOMIC_CMP_SWAP: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMCMPXCHG64_DAG); - return; case ISD::ATOMIC_LOAD_MIN: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMIN64_DAG); - return; case ISD::ATOMIC_LOAD_UMIN: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMIN64_DAG); - return; case ISD::ATOMIC_LOAD_MAX: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMMAX64_DAG); - return; case ISD::ATOMIC_LOAD_UMAX: - ReplaceATOMIC_OP_64(N, Results, DAG, ARMISD::ATOMUMAX64_DAG); + ReplaceATOMIC_OP_64(N, Results, DAG); return; } if (Res.getNode()) @@ -5830,6 +6216,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, unsigned oldval = MI->getOperand(2).getReg(); unsigned newval = MI->getOperand(3).getReg(); const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + AtomicOrdering Ord = static_cast(MI->getOperand(4).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -5845,21 +6232,7 @@ ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI, } unsigned ldrOpc, strOpc; - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); - case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; - break; - case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; - break; - case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; - break; - } + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); MachineFunction *MF = BB->getParent(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -5939,6 +6312,7 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, unsigned dest = MI->getOperand(0).getReg(); unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -5946,24 +6320,11 @@ ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB, if (isThumb2) { MRI.constrainRegClass(dest, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + MRI.constrainRegClass(incr, &ARM::rGPRRegClass); } unsigned ldrOpc, strOpc; - switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); - case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; - break; - case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; - break; - case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; - break; - } + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB); @@ -6047,6 +6408,7 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, unsigned ptr = MI->getOperand(1).getReg(); unsigned incr = MI->getOperand(2).getReg(); unsigned oldval = dest; + AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6054,24 +6416,20 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, if (isThumb2) { MRI.constrainRegClass(dest, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + MRI.constrainRegClass(incr, &ARM::rGPRRegClass); } unsigned ldrOpc, strOpc, extendOpc; + getExclusiveOperation(Size, Ord, isThumb2, ldrOpc, strOpc); switch (Size) { - default: llvm_unreachable("unsupported size for AtomicCmpSwap!"); + default: llvm_unreachable("unsupported size for AtomicBinaryMinMax!"); case 1: - ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB; - strOpc = isThumb2 ? ARM::t2STREXB : ARM::STREXB; extendOpc = isThumb2 ? ARM::t2SXTB : ARM::SXTB; break; case 2: - ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH; - strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH; extendOpc = isThumb2 ? ARM::t2SXTH : ARM::SXTH; break; case 4: - ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX; - strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX; extendOpc = 0; break; } @@ -6115,7 +6473,10 @@ ARMTargetLowering::EmitAtomicBinaryMinMax(MachineInstr *MI, // Sign extend the value, if necessary. if (signExtend && extendOpc) { - oldval = MRI.createVirtualRegister(&ARM::GPRRegClass); + oldval = MRI.createVirtualRegister(isThumb2 ? &ARM::rGPRRegClass + : &ARM::GPRnopcRegClass); + if (!isThumb2) + MRI.constrainRegClass(dest, &ARM::GPRnopcRegClass); AddDefaultPred(BuildMI(BB, dl, TII->get(extendOpc), oldval) .addReg(dest) .addImm(0)); @@ -6153,7 +6514,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, unsigned Op1, unsigned Op2, bool NeedsCarry, bool IsCmpxchg, bool IsMinMax, ARMCC::CondCodes CC) const { - // This also handles ATOMIC_SWAP, indicated by Op1==0. + // This also handles ATOMIC_SWAP and ATOMIC_STORE, indicated by Op1==0. const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); const BasicBlock *LLVM_BB = BB->getBasicBlock(); @@ -6161,11 +6522,15 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MachineFunction::iterator It = BB; ++It; + bool isStore = (MI->getOpcode() == ARM::ATOMIC_STORE_I64); + unsigned offset = (isStore ? -2 : 0); unsigned destlo = MI->getOperand(0).getReg(); unsigned desthi = MI->getOperand(1).getReg(); - unsigned ptr = MI->getOperand(2).getReg(); - unsigned vallo = MI->getOperand(3).getReg(); - unsigned valhi = MI->getOperand(4).getReg(); + unsigned ptr = MI->getOperand(offset+2).getReg(); + unsigned vallo = MI->getOperand(offset+3).getReg(); + unsigned valhi = MI->getOperand(offset+4).getReg(); + unsigned OrdIdx = offset + (IsCmpxchg ? 7 : 5); + AtomicOrdering Ord = static_cast(MI->getOperand(OrdIdx).getImm()); DebugLoc dl = MI->getDebugLoc(); bool isThumb2 = Subtarget->isThumb2(); @@ -6174,8 +6539,13 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + MRI.constrainRegClass(vallo, &ARM::rGPRRegClass); + MRI.constrainRegClass(valhi, &ARM::rGPRRegClass); } + unsigned ldrOpc, strOpc; + getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); + MachineBasicBlock *loopMBB = MF->CreateMachineBasicBlock(LLVM_BB); MachineBasicBlock *contBB = 0, *cont2BB = 0; if (IsCmpxchg || IsMinMax) @@ -6215,21 +6585,23 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // fallthrough --> exitMBB BB = loopMBB; - // Load - if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2LDREXD)) - .addReg(destlo, RegState::Define) - .addReg(desthi, RegState::Define) - .addReg(ptr)); - } else { - unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDREXD)) - .addReg(GPRPair0, RegState::Define).addReg(ptr)); - // Copy r2/r3 into dest. (This copy will normally be coalesced.) - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) - .addReg(GPRPair0, 0, ARM::gsub_0); - BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) - .addReg(GPRPair0, 0, ARM::gsub_1); + if (!isStore) { + // Load + if (isThumb2) { + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) + .addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr)); + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc)) + .addReg(GPRPair0, RegState::Define).addReg(ptr)); + // Copy r2/r3 into dest. (This copy will normally be coalesced.) + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(BB, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); + } } unsigned StoreLo, StoreHi; @@ -6281,7 +6653,9 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, // Store if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2STREXD), storesuccess) + MRI.constrainRegClass(StoreLo, &ARM::rGPRRegClass); + MRI.constrainRegClass(StoreHi, &ARM::rGPRRegClass); + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) .addReg(StoreLo).addReg(StoreHi).addReg(ptr)); } else { // Marshal a pair... @@ -6299,7 +6673,7 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, .addImm(ARM::gsub_1); // ...and store it - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::STREXD), storesuccess) + AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), storesuccess) .addReg(StorePair).addReg(ptr)); } // Cmp+jump @@ -6320,6 +6694,51 @@ ARMTargetLowering::EmitAtomicBinary64(MachineInstr *MI, MachineBasicBlock *BB, return BB; } +MachineBasicBlock * +ARMTargetLowering::EmitAtomicLoad64(MachineInstr *MI, MachineBasicBlock *BB) const { + + const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); + + unsigned destlo = MI->getOperand(0).getReg(); + unsigned desthi = MI->getOperand(1).getReg(); + unsigned ptr = MI->getOperand(2).getReg(); + AtomicOrdering Ord = static_cast(MI->getOperand(3).getImm()); + DebugLoc dl = MI->getDebugLoc(); + bool isThumb2 = Subtarget->isThumb2(); + + MachineRegisterInfo &MRI = BB->getParent()->getRegInfo(); + if (isThumb2) { + MRI.constrainRegClass(destlo, &ARM::rGPRRegClass); + MRI.constrainRegClass(desthi, &ARM::rGPRRegClass); + MRI.constrainRegClass(ptr, &ARM::rGPRRegClass); + } + unsigned ldrOpc, strOpc; + getExclusiveOperation(8, Ord, isThumb2, ldrOpc, strOpc); + + MachineInstrBuilder MIB = BuildMI(*BB, MI, dl, TII->get(ldrOpc)); + + if (isThumb2) { + MIB.addReg(destlo, RegState::Define) + .addReg(desthi, RegState::Define) + .addReg(ptr); + + } else { + unsigned GPRPair0 = MRI.createVirtualRegister(&ARM::GPRPairRegClass); + MIB.addReg(GPRPair0, RegState::Define).addReg(ptr); + + // Copy GPRPair0 into dest. (This copy will normally be coalesced.) + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), destlo) + .addReg(GPRPair0, 0, ARM::gsub_0); + BuildMI(*BB, MI, dl, TII->get(TargetOpcode::COPY), desthi) + .addReg(GPRPair0, 0, ARM::gsub_1); + } + AddDefaultPred(MIB); + + MI->eraseFromParent(); // The instruction is gone now. + + return BB; +} + /// SetupEntryBlockForSjLj - Insert code into the entry block that creates and /// registers the function context. void ARMTargetLowering:: @@ -6851,8 +7270,109 @@ MachineBasicBlock *OtherSucc(MachineBasicBlock *MBB, MachineBasicBlock *Succ) { llvm_unreachable("Expecting a BB with two successors!"); } -MachineBasicBlock *ARMTargetLowering:: -EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { +/// Return the load opcode for a given load size. If load size >= 8, +/// neon opcode will be returned. +static unsigned getLdOpcode(unsigned LdSize, bool IsThumb1, bool IsThumb2) { + if (LdSize >= 8) + return LdSize == 16 ? ARM::VLD1q32wb_fixed + : LdSize == 8 ? ARM::VLD1d32wb_fixed : 0; + if (IsThumb1) + return LdSize == 4 ? ARM::tLDRi + : LdSize == 2 ? ARM::tLDRHi + : LdSize == 1 ? ARM::tLDRBi : 0; + if (IsThumb2) + return LdSize == 4 ? ARM::t2LDR_POST + : LdSize == 2 ? ARM::t2LDRH_POST + : LdSize == 1 ? ARM::t2LDRB_POST : 0; + return LdSize == 4 ? ARM::LDR_POST_IMM + : LdSize == 2 ? ARM::LDRH_POST + : LdSize == 1 ? ARM::LDRB_POST_IMM : 0; +} + +/// Return the store opcode for a given store size. If store size >= 8, +/// neon opcode will be returned. +static unsigned getStOpcode(unsigned StSize, bool IsThumb1, bool IsThumb2) { + if (StSize >= 8) + return StSize == 16 ? ARM::VST1q32wb_fixed + : StSize == 8 ? ARM::VST1d32wb_fixed : 0; + if (IsThumb1) + return StSize == 4 ? ARM::tSTRi + : StSize == 2 ? ARM::tSTRHi + : StSize == 1 ? ARM::tSTRBi : 0; + if (IsThumb2) + return StSize == 4 ? ARM::t2STR_POST + : StSize == 2 ? ARM::t2STRH_POST + : StSize == 1 ? ARM::t2STRB_POST : 0; + return StSize == 4 ? ARM::STR_POST_IMM + : StSize == 2 ? ARM::STRH_POST + : StSize == 1 ? ARM::STRB_POST_IMM : 0; +} + +/// Emit a post-increment load operation with given size. The instructions +/// will be added to BB at Pos. +static void emitPostLd(MachineBasicBlock *BB, MachineInstr *Pos, + const TargetInstrInfo *TII, DebugLoc dl, + unsigned LdSize, unsigned Data, unsigned AddrIn, + unsigned AddrOut, bool IsThumb1, bool IsThumb2) { + unsigned LdOpc = getLdOpcode(LdSize, IsThumb1, IsThumb2); + assert(LdOpc != 0 && "Should have a load opcode"); + if (LdSize >= 8) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define).addReg(AddrIn) + .addImm(0)); + } else if (IsThumb1) { + // load + update AddrIn + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrIn).addImm(0)); + MachineInstrBuilder MIB = + BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); + MIB = AddDefaultT1CC(MIB); + MIB.addReg(AddrIn).addImm(LdSize); + AddDefaultPred(MIB); + } else if (IsThumb2) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define).addReg(AddrIn) + .addImm(LdSize)); + } else { // arm + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(LdOpc), Data) + .addReg(AddrOut, RegState::Define).addReg(AddrIn) + .addReg(0).addImm(LdSize)); + } +} + +/// Emit a post-increment store operation with given size. The instructions +/// will be added to BB at Pos. +static void emitPostSt(MachineBasicBlock *BB, MachineInstr *Pos, + const TargetInstrInfo *TII, DebugLoc dl, + unsigned StSize, unsigned Data, unsigned AddrIn, + unsigned AddrOut, bool IsThumb1, bool IsThumb2) { + unsigned StOpc = getStOpcode(StSize, IsThumb1, IsThumb2); + assert(StOpc != 0 && "Should have a store opcode"); + if (StSize >= 8) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(AddrIn).addImm(0).addReg(Data)); + } else if (IsThumb1) { + // store + update AddrIn + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc)).addReg(Data) + .addReg(AddrIn).addImm(0)); + MachineInstrBuilder MIB = + BuildMI(*BB, Pos, dl, TII->get(ARM::tADDi8), AddrOut); + MIB = AddDefaultT1CC(MIB); + MIB.addReg(AddrIn).addImm(StSize); + AddDefaultPred(MIB); + } else if (IsThumb2) { + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(Data).addReg(AddrIn).addImm(StSize)); + } else { // arm + AddDefaultPred(BuildMI(*BB, Pos, dl, TII->get(StOpc), AddrOut) + .addReg(Data).addReg(AddrIn).addReg(0) + .addImm(StSize)); + } +} + +MachineBasicBlock * +ARMTargetLowering::EmitStructByval(MachineInstr *MI, + MachineBasicBlock *BB) const { // This pseudo instruction has 3 operands: dst, src, size // We expand it to a loop if size > Subtarget->getMaxInlineSizeThreshold(). // Otherwise, we will generate unrolled scalar copies. @@ -6867,23 +7387,18 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { unsigned Align = MI->getOperand(3).getImm(); DebugLoc dl = MI->getDebugLoc(); - bool isThumb2 = Subtarget->isThumb2(); MachineFunction *MF = BB->getParent(); MachineRegisterInfo &MRI = MF->getRegInfo(); - unsigned ldrOpc, strOpc, UnitSize = 0; + unsigned UnitSize = 0; + const TargetRegisterClass *TRC = 0; + const TargetRegisterClass *VecTRC = 0; - const TargetRegisterClass *TRC = isThumb2 ? - (const TargetRegisterClass*)&ARM::tGPRRegClass : - (const TargetRegisterClass*)&ARM::GPRRegClass; - const TargetRegisterClass *TRC_Vec = 0; + bool IsThumb1 = Subtarget->isThumb1Only(); + bool IsThumb2 = Subtarget->isThumb2(); if (Align & 1) { - ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; - strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; UnitSize = 1; } else if (Align & 2) { - ldrOpc = isThumb2 ? ARM::t2LDRH_POST : ARM::LDRH_POST; - strOpc = isThumb2 ? ARM::t2STRH_POST : ARM::STRH_POST; UnitSize = 2; } else { // Check whether we can use NEON instructions. @@ -6891,27 +7406,27 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat) && Subtarget->hasNEON()) { - if ((Align % 16 == 0) && SizeVal >= 16) { - ldrOpc = ARM::VLD1q32wb_fixed; - strOpc = ARM::VST1q32wb_fixed; + if ((Align % 16 == 0) && SizeVal >= 16) UnitSize = 16; - TRC_Vec = (const TargetRegisterClass*)&ARM::DPairRegClass; - } - else if ((Align % 8 == 0) && SizeVal >= 8) { - ldrOpc = ARM::VLD1d32wb_fixed; - strOpc = ARM::VST1d32wb_fixed; + else if ((Align % 8 == 0) && SizeVal >= 8) UnitSize = 8; - TRC_Vec = (const TargetRegisterClass*)&ARM::DPRRegClass; - } } // Can't use NEON instructions. - if (UnitSize == 0) { - ldrOpc = isThumb2 ? ARM::t2LDR_POST : ARM::LDR_POST_IMM; - strOpc = isThumb2 ? ARM::t2STR_POST : ARM::STR_POST_IMM; + if (UnitSize == 0) UnitSize = 4; - } } + // Select the correct opcode and register class for unit size load/store + bool IsNeon = UnitSize >= 8; + TRC = (IsThumb1 || IsThumb2) ? (const TargetRegisterClass *)&ARM::tGPRRegClass + : (const TargetRegisterClass *)&ARM::GPRRegClass; + if (IsNeon) + VecTRC = UnitSize == 16 + ? (const TargetRegisterClass *)&ARM::DPairRegClass + : UnitSize == 8 + ? (const TargetRegisterClass *)&ARM::DPRRegClass + : 0; + unsigned BytesLeft = SizeVal % UnitSize; unsigned LoopSize = SizeVal - BytesLeft; @@ -6922,34 +7437,13 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { unsigned srcIn = src; unsigned destIn = dest; for (unsigned i = 0; i < LoopSize; i+=UnitSize) { - unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (UnitSize >= 8) { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc), scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(0)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(destIn).addImm(0).addReg(scratch)); - } else if (isThumb2) { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc), scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(UnitSize)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addImm(UnitSize)); - } else { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc), scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0) - .addImm(UnitSize)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(UnitSize)); - } + unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); + emitPostLd(BB, MI, TII, dl, UnitSize, scratch, srcIn, srcOut, + IsThumb1, IsThumb2); + emitPostSt(BB, MI, TII, dl, UnitSize, scratch, destIn, destOut, + IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } @@ -6957,30 +7451,14 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // Handle the leftover bytes with LDRB and STRB. // [scratch, srcOut] = LDRB_POST(srcIn, 1) // [destOut] = STRB_POST(scratch, destIn, 1) - ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; - strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; for (unsigned i = 0; i < BytesLeft; i++) { - unsigned scratch = MRI.createVirtualRegister(TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } else { - AddDefaultPred(BuildMI(*BB, MI, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn) - .addReg(0).addImm(1)); - - AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } + unsigned scratch = MRI.createVirtualRegister(TRC); + emitPostLd(BB, MI, TII, dl, 1, scratch, srcIn, srcOut, + IsThumb1, IsThumb2); + emitPostSt(BB, MI, TII, dl, 1, scratch, destIn, destOut, + IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } @@ -7021,17 +7499,16 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // Load an immediate to varEnd. unsigned varEnd = MRI.createVirtualRegister(TRC); - if (isThumb2) { - unsigned VReg1 = varEnd; + if (IsThumb2) { + unsigned Vtmp = varEnd; if ((LoopSize & 0xFFFF0000) != 0) - VReg1 = MRI.createVirtualRegister(TRC); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), VReg1) - .addImm(LoopSize & 0xFFFF)); + Vtmp = MRI.createVirtualRegister(TRC); + AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVi16), Vtmp) + .addImm(LoopSize & 0xFFFF)); if ((LoopSize & 0xFFFF0000) != 0) AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::t2MOVTi16), varEnd) - .addReg(VReg1) - .addImm(LoopSize >> 16)); + .addReg(Vtmp).addImm(LoopSize >> 16)); } else { MachineConstantPool *ConstantPool = MF->getConstantPool(); Type *Int32Ty = Type::getInt32Ty(MF->getFunction()->getContext()); @@ -7043,10 +7520,12 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { Align = getDataLayout()->getTypeAllocSize(C->getType()); unsigned Idx = ConstantPool->getConstantPoolIndex(C, Align); - AddDefaultPred(BuildMI(BB, dl, TII->get(ARM::LDRcp)) - .addReg(varEnd, RegState::Define) - .addConstantPoolIndex(Idx) - .addImm(0)); + if (IsThumb1) + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::tLDRpci)).addReg( + varEnd, RegState::Define).addConstantPoolIndex(Idx)); + else + AddDefaultPred(BuildMI(*BB, MI, dl, TII->get(ARM::LDRcp)).addReg( + varEnd, RegState::Define).addConstantPoolIndex(Idx).addImm(0)); } BB->addSuccessor(loopMBB); @@ -7075,39 +7554,30 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // [scratch, srcLoop] = LDR_POST(srcPhi, UnitSize) // [destLoop] = STR_POST(scratch, destPhi, UnitSiz) - unsigned scratch = MRI.createVirtualRegister(UnitSize >= 8 ? TRC_Vec:TRC); - if (UnitSize >= 8) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) - .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(0)); - - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) - .addReg(destPhi).addImm(0).addReg(scratch)); - } else if (isThumb2) { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) - .addReg(srcLoop, RegState::Define).addReg(srcPhi).addImm(UnitSize)); - - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) - .addReg(scratch).addReg(destPhi) - .addImm(UnitSize)); - } else { - AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), scratch) - .addReg(srcLoop, RegState::Define).addReg(srcPhi).addReg(0) - .addImm(UnitSize)); - - AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), destLoop) - .addReg(scratch).addReg(destPhi) - .addReg(0).addImm(UnitSize)); - } + unsigned scratch = MRI.createVirtualRegister(IsNeon ? VecTRC : TRC); + emitPostLd(BB, BB->end(), TII, dl, UnitSize, scratch, srcPhi, srcLoop, + IsThumb1, IsThumb2); + emitPostSt(BB, BB->end(), TII, dl, UnitSize, scratch, destPhi, destLoop, + IsThumb1, IsThumb2); // Decrement loop variable by UnitSize. - MachineInstrBuilder MIB = BuildMI(BB, dl, - TII->get(isThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); - AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); - MIB->getOperand(5).setReg(ARM::CPSR); - MIB->getOperand(5).setIsDef(true); - - BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc)) - .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); + if (IsThumb1) { + MachineInstrBuilder MIB = + BuildMI(*BB, BB->end(), dl, TII->get(ARM::tSUBi8), varLoop); + MIB = AddDefaultT1CC(MIB); + MIB.addReg(varPhi).addImm(UnitSize); + AddDefaultPred(MIB); + } else { + MachineInstrBuilder MIB = + BuildMI(*BB, BB->end(), dl, + TII->get(IsThumb2 ? ARM::t2SUBri : ARM::SUBri), varLoop); + AddDefaultCC(AddDefaultPred(MIB.addReg(varPhi).addImm(UnitSize))); + MIB->getOperand(5).setReg(ARM::CPSR); + MIB->getOperand(5).setIsDef(true); + } + BuildMI(*BB, BB->end(), dl, + TII->get(IsThumb1 ? ARM::tBcc : IsThumb2 ? ARM::t2Bcc : ARM::Bcc)) + .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR); // loopMBB can loop back to loopMBB or fall through to exitMBB. BB->addSuccessor(loopMBB); @@ -7116,34 +7586,19 @@ EmitStructByval(MachineInstr *MI, MachineBasicBlock *BB) const { // Add epilogue to handle BytesLeft. BB = exitMBB; MachineInstr *StartOfExit = exitMBB->begin(); - ldrOpc = isThumb2 ? ARM::t2LDRB_POST : ARM::LDRB_POST_IMM; - strOpc = isThumb2 ? ARM::t2STRB_POST : ARM::STRB_POST_IMM; // [scratch, srcOut] = LDRB_POST(srcLoop, 1) // [destOut] = STRB_POST(scratch, destLoop, 1) unsigned srcIn = srcLoop; unsigned destIn = destLoop; for (unsigned i = 0; i < BytesLeft; i++) { - unsigned scratch = MRI.createVirtualRegister(TRC); unsigned srcOut = MRI.createVirtualRegister(TRC); unsigned destOut = MRI.createVirtualRegister(TRC); - if (isThumb2) { - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addImm(1)); - - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addImm(1)); - } else { - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, - TII->get(ldrOpc),scratch) - .addReg(srcOut, RegState::Define).addReg(srcIn).addReg(0).addImm(1)); - - AddDefaultPred(BuildMI(*BB, StartOfExit, dl, TII->get(strOpc), destOut) - .addReg(scratch).addReg(destIn) - .addReg(0).addImm(1)); - } + unsigned scratch = MRI.createVirtualRegister(TRC); + emitPostLd(BB, StartOfExit, TII, dl, 1, scratch, srcIn, srcOut, + IsThumb1, IsThumb2); + emitPostSt(BB, StartOfExit, TII, dl, 1, scratch, destIn, destOut, + IsThumb1, IsThumb2); srcIn = srcOut; destIn = destOut; } @@ -7293,46 +7748,49 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2); case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4); + case ARM::ATOMIC_LOAD_I64: + return EmitAtomicLoad64(MI, BB); - case ARM::ATOMADD6432: + case ARM::ATOMIC_LOAD_ADD_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr, isThumb2 ? ARM::t2ADCrr : ARM::ADCrr, /*NeedsCarry*/ true); - case ARM::ATOMSUB6432: + case ARM::ATOMIC_LOAD_SUB_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true); - case ARM::ATOMOR6432: + case ARM::ATOMIC_LOAD_OR_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr); - case ARM::ATOMXOR6432: + case ARM::ATOMIC_LOAD_XOR_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2EORrr : ARM::EORrr, isThumb2 ? ARM::t2EORrr : ARM::EORrr); - case ARM::ATOMAND6432: + case ARM::ATOMIC_LOAD_AND_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr); - case ARM::ATOMSWAP6432: + case ARM::ATOMIC_STORE_I64: + case ARM::ATOMIC_SWAP_I64: return EmitAtomicBinary64(MI, BB, 0, 0, false); - case ARM::ATOMCMPXCHG6432: + case ARM::ATOMIC_CMP_SWAP_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ false, /*IsCmpxchg*/true); - case ARM::ATOMMIN6432: + case ARM::ATOMIC_LOAD_MIN_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::LT); - case ARM::ATOMMAX6432: + case ARM::ATOMIC_LOAD_MAX_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::GE); - case ARM::ATOMUMIN6432: + case ARM::ATOMIC_LOAD_UMIN_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, /*IsMinMax*/ true, ARMCC::LO); - case ARM::ATOMUMAX6432: + case ARM::ATOMIC_LOAD_UMAX_I64: return EmitAtomicBinary64(MI, BB, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr, isThumb2 ? ARM::t2SBCrr : ARM::SBCrr, /*NeedsCarry*/ true, /*IsCmpxchg*/false, @@ -7710,13 +8168,13 @@ SDValue combineSelectAndUse(SDNode *N, SDValue Slct, SDValue OtherOp, // Slct is now know to be the desired identity constant when CC is true. SDValue TrueVal = OtherOp; - SDValue FalseVal = DAG.getNode(N->getOpcode(), N->getDebugLoc(), VT, + SDValue FalseVal = DAG.getNode(N->getOpcode(), SDLoc(N), VT, OtherOp, NonConstantVal); // Unless SwapSelectOps says CC should be false. if (SwapSelectOps) std::swap(TrueVal, FalseVal); - return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, + return DAG.getNode(ISD::SELECT, SDLoc(N), VT, CCOp, TrueVal, FalseVal); } @@ -7823,9 +8281,9 @@ static SDValue AddCombineToVPADDL(SDNode *N, SDValue N0, SDValue N1, llvm_unreachable("Invalid vector element type for padd optimization."); } - SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + SDValue tmp = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), widenType, &Ops[0], Ops.size()); - return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, tmp); + return DAG.getNode(ISD::TRUNCATE, SDLoc(N), VT, tmp); } static SDValue findMUL_LOHI(SDValue V) { @@ -7868,8 +8326,11 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, assert(AddcNode->getNumValues() == 2 && AddcNode->getValueType(0) == MVT::i32 && - AddcNode->getValueType(1) == MVT::Glue && - "Expect ADDC with two result values: i32, glue"); + "Expect ADDC with two result values. First: i32"); + + // Check that we have a glued ADDC node. + if (AddcNode->getValueType(1) != MVT::Glue) + return SDValue(); // Check that the ADDC adds the low result of the S/UMUL_LOHI. if (AddcOp0->getOpcode() != ISD::UMUL_LOHI && @@ -7950,7 +8411,7 @@ static SDValue AddCombineTo64bitMLAL(SDNode *AddcNode, Ops.push_back(*LowAdd); Ops.push_back(*HiAdd); - SDValue MLALNode = DAG.getNode(FinalOpc, AddcNode->getDebugLoc(), + SDValue MLALNode = DAG.getNode(FinalOpc, SDLoc(AddcNode), DAG.getVTList(MVT::i32, MVT::i32), &Ops[0], Ops.size()); @@ -8038,6 +8499,13 @@ static SDValue PerformSUBCombine(SDNode *N, /// is faster than /// vadd d3, d0, d1 /// vmul d3, d3, d2 +// However, for (A + B) * (A + B), +// vadd d2, d0, d1 +// vmul d3, d0, d2 +// vmla d3, d1, d2 +// is slower than +// vadd d2, d0, d1 +// vmul d3, d2, d2 static SDValue PerformVMULCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI, const ARMSubtarget *Subtarget) { @@ -8057,8 +8525,11 @@ static SDValue PerformVMULCombine(SDNode *N, std::swap(N0, N1); } + if (N0 == N1) + return SDValue(); + EVT VT = N->getValueType(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue N00 = N0->getOperand(0); SDValue N01 = N0->getOperand(1); return DAG.getNode(Opcode, DL, VT, @@ -8088,11 +8559,11 @@ static SDValue PerformMULCombine(SDNode *N, return SDValue(); int64_t MulAmt = C->getSExtValue(); - unsigned ShiftAmt = CountTrailingZeros_64(MulAmt); + unsigned ShiftAmt = countTrailingZeros(MulAmt); ShiftAmt = ShiftAmt & (32 - 1); SDValue V = N->getOperand(0); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); SDValue Res; MulAmt >>= ShiftAmt; @@ -8156,7 +8627,7 @@ static SDValue PerformANDCombine(SDNode *N, // Attempt to use immediate-form VBIC BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; @@ -8199,7 +8670,7 @@ static SDValue PerformORCombine(SDNode *N, const ARMSubtarget *Subtarget) { // Attempt to use immediate-form VORR BuildVectorSDNode *BVN = dyn_cast(N->getOperand(1)); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT VT = N->getValueType(0); SelectionDAG &DAG = DCI.DAG; @@ -8248,22 +8719,29 @@ static SDValue PerformORCombine(SDNode *N, unsigned SplatBitSize; bool HasAnyUndefs; + APInt SplatBits0, SplatBits1; BuildVectorSDNode *BVN0 = dyn_cast(N0->getOperand(1)); - APInt SplatBits0; + BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); + // Ensure that the second operand of both ands are constants if (BVN0 && BVN0->isConstantSplat(SplatBits0, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs) { - BuildVectorSDNode *BVN1 = dyn_cast(N1->getOperand(1)); - APInt SplatBits1; - if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, - HasAnyUndefs) && !HasAnyUndefs && - SplatBits0 == ~SplatBits1) { - // Canonicalize the vector type to make instruction selection simpler. - EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; - SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, - N0->getOperand(1), N0->getOperand(0), - N1->getOperand(0)); - return DAG.getNode(ISD::BITCAST, dl, VT, Result); - } + HasAnyUndefs) && !HasAnyUndefs) { + if (BVN1 && BVN1->isConstantSplat(SplatBits1, SplatUndef, SplatBitSize, + HasAnyUndefs) && !HasAnyUndefs) { + // Ensure that the bit width of the constants are the same and that + // the splat arguments are logical inverses as per the pattern we + // are trying to simplify. + if (SplatBits0.getBitWidth() == SplatBits1.getBitWidth() && + SplatBits0 == ~SplatBits1) { + // Canonicalize the vector type to make instruction selection + // simpler. + EVT CanonicalVT = VT.is128BitVector() ? MVT::v4i32 : MVT::v2i32; + SDValue Result = DAG.getNode(ARMISD::VBSL, dl, CanonicalVT, + N0->getOperand(1), + N0->getOperand(0), + N1->getOperand(0)); + return DAG.getNode(ISD::BITCAST, dl, VT, Result); + } + } } } @@ -8274,7 +8752,7 @@ static SDValue PerformORCombine(SDNode *N, if (Subtarget->isThumb1Only() || !Subtarget->hasV6T2Ops()) return SDValue(); - DebugLoc DL = N->getDebugLoc(); + SDLoc DL(N); // 1) or (and A, mask), val => ARMbfi A, val, mask // iff (val & mask) == val // @@ -8309,7 +8787,7 @@ static SDValue PerformORCombine(SDNode *N, return SDValue(); if (ARM::isBitFieldInvertedMask(Mask)) { - Val >>= CountTrailingZeros_32(~Mask); + Val >>= countTrailingZeros(~Mask); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, DAG.getConstant(Val, MVT::i32), @@ -8336,7 +8814,7 @@ static SDValue PerformORCombine(SDNode *N, (Mask == 0xffff || Mask == 0xffff0000)) return SDValue(); // 2a - unsigned amt = CountTrailingZeros_32(Mask2); + unsigned amt = countTrailingZeros(Mask2); Res = DAG.getNode(ISD::SRL, DL, VT, N1.getOperand(0), DAG.getConstant(amt, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N00, Res, @@ -8352,7 +8830,7 @@ static SDValue PerformORCombine(SDNode *N, (Mask2 == 0xffff || Mask2 == 0xffff0000)) return SDValue(); // 2b - unsigned lsb = CountTrailingZeros_32(Mask); + unsigned lsb = countTrailingZeros(Mask); Res = DAG.getNode(ISD::SRL, DL, VT, N00, DAG.getConstant(lsb, MVT::i32)); Res = DAG.getNode(ARMISD::BFI, DL, VT, N1.getOperand(0), Res, @@ -8370,7 +8848,7 @@ static SDValue PerformORCombine(SDNode *N, // where lsb(mask) == #shamt and masked bits of B are known zero. SDValue ShAmt = N00.getOperand(1); unsigned ShAmtC = cast(ShAmt)->getZExtValue(); - unsigned LSB = CountTrailingZeros_32(Mask); + unsigned LSB = countTrailingZeros(Mask); if (ShAmtC != LSB) return SDValue(); @@ -8413,12 +8891,12 @@ static SDValue PerformBFICombine(SDNode *N, if (!N11C) return SDValue(); unsigned InvMask = cast(N->getOperand(2))->getZExtValue(); - unsigned LSB = CountTrailingZeros_32(~InvMask); - unsigned Width = (32 - CountLeadingZeros_32(~InvMask)) - LSB; + unsigned LSB = countTrailingZeros(~InvMask); + unsigned Width = (32 - countLeadingZeros(~InvMask)) - LSB; unsigned Mask = (1 << Width)-1; unsigned Mask2 = N11C->getZExtValue(); if ((Mask & (~Mask2)) == 0) - return DCI.DAG.getNode(ARMISD::BFI, N->getDebugLoc(), N->getValueType(0), + return DCI.DAG.getNode(ARMISD::BFI, SDLoc(N), N->getValueType(0), N->getOperand(0), N1.getOperand(0), N->getOperand(2)); } @@ -8444,7 +8922,7 @@ static SDValue PerformVMOVRRDCombine(SDNode *N, LoadSDNode *LD = cast(InNode); SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = LD->getDebugLoc(); + SDLoc DL(LD); SDValue BasePtr = LD->getBasePtr(); SDValue NewLD1 = DAG.getLoad(MVT::i32, DL, LD->getChain(), BasePtr, LD->getPointerInfo(), LD->isVolatile(), @@ -8481,7 +8959,7 @@ static SDValue PerformVMOVDRRCombine(SDNode *N, SelectionDAG &DAG) { if (Op0.getOpcode() == ARMISD::VMOVRRD && Op0.getNode() == Op1.getNode() && Op0.getResNo() == 0 && Op1.getResNo() == 1) - return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), + return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), Op0.getOperand(0)); return SDValue(); } @@ -8523,7 +9001,7 @@ static SDValue PerformSTORECombine(SDNode *N, NumElems*SizeRatio); assert(WideVecVT.getSizeInBits() == VT.getSizeInBits()); - DebugLoc DL = St->getDebugLoc(); + SDLoc DL(St); SDValue WideVec = DAG.getNode(ISD::BITCAST, DL, WideVecVT, StVal); SmallVector ShuffleVec(NumElems * SizeRatio, -1); for (unsigned i = 0; i < NumElems; ++i) ShuffleVec[i] = i * SizeRatio; @@ -8584,7 +9062,7 @@ static SDValue PerformSTORECombine(SDNode *N, if (StVal.getNode()->getOpcode() == ARMISD::VMOVDRR && StVal.getNode()->hasOneUse()) { SelectionDAG &DAG = DCI.DAG; - DebugLoc DL = St->getDebugLoc(); + SDLoc DL(St); SDValue BasePtr = St->getBasePtr(); SDValue NewST1 = DAG.getStore(St->getChain(), DL, StVal.getNode()->getOperand(0), BasePtr, @@ -8606,14 +9084,14 @@ static SDValue PerformSTORECombine(SDNode *N, // Bitcast an i64 store extracted from a vector to f64. // Otherwise, the i64 value will be legalized to a pair of i32 values. SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = StVal.getDebugLoc(); + SDLoc dl(StVal); SDValue IntVec = StVal.getOperand(0); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, IntVec.getValueType().getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, IntVec); SDValue ExtElt = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Vec, StVal.getOperand(1)); - dl = N->getDebugLoc(); + dl = SDLoc(N); SDValue V = DAG.getNode(ISD::BITCAST, dl, MVT::i64, ExtElt); // Make the DAGCombiner fold the bitcasts. DCI.AddToWorklist(Vec.getNode()); @@ -8659,7 +9137,7 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, EVT VT = N->getValueType(0); if (VT.getVectorElementType() != MVT::i64 || !hasNormalLoadOperand(N)) return SDValue(); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SmallVector Ops; unsigned NumElts = VT.getVectorNumElements(); for (unsigned i = 0; i < NumElts; ++i) { @@ -8673,6 +9151,98 @@ static SDValue PerformBUILD_VECTORCombine(SDNode *N, return DAG.getNode(ISD::BITCAST, dl, VT, BV); } +/// \brief Target-specific dag combine xforms for ARMISD::BUILD_VECTOR. +static SDValue +PerformARMBUILD_VECTORCombine(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { + // ARMISD::BUILD_VECTOR is introduced when legalizing ISD::BUILD_VECTOR. + // At that time, we may have inserted bitcasts from integer to float. + // If these bitcasts have survived DAGCombine, change the lowering of this + // BUILD_VECTOR in something more vector friendly, i.e., that does not + // force to use floating point types. + + // Make sure we can change the type of the vector. + // This is possible iff: + // 1. The vector is only used in a bitcast to a integer type. I.e., + // 1.1. Vector is used only once. + // 1.2. Use is a bit convert to an integer type. + // 2. The size of its operands are 32-bits (64-bits are not legal). + EVT VT = N->getValueType(0); + EVT EltVT = VT.getVectorElementType(); + + // Check 1.1. and 2. + if (EltVT.getSizeInBits() != 32 || !N->hasOneUse()) + return SDValue(); + + // By construction, the input type must be float. + assert(EltVT == MVT::f32 && "Unexpected type!"); + + // Check 1.2. + SDNode *Use = *N->use_begin(); + if (Use->getOpcode() != ISD::BITCAST || + Use->getValueType(0).isFloatingPoint()) + return SDValue(); + + // Check profitability. + // Model is, if more than half of the relevant operands are bitcast from + // i32, turn the build_vector into a sequence of insert_vector_elt. + // Relevant operands are everything that is not statically + // (i.e., at compile time) bitcasted. + unsigned NumOfBitCastedElts = 0; + unsigned NumElts = VT.getVectorNumElements(); + unsigned NumOfRelevantElts = NumElts; + for (unsigned Idx = 0; Idx < NumElts; ++Idx) { + SDValue Elt = N->getOperand(Idx); + if (Elt->getOpcode() == ISD::BITCAST) { + // Assume only bit cast to i32 will go away. + if (Elt->getOperand(0).getValueType() == MVT::i32) + ++NumOfBitCastedElts; + } else if (Elt.getOpcode() == ISD::UNDEF || isa(Elt)) + // Constants are statically casted, thus do not count them as + // relevant operands. + --NumOfRelevantElts; + } + + // Check if more than half of the elements require a non-free bitcast. + if (NumOfBitCastedElts <= NumOfRelevantElts / 2) + return SDValue(); + + SelectionDAG &DAG = DCI.DAG; + // Create the new vector type. + EVT VecVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + // Check if the type is legal. + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + if (!TLI.isTypeLegal(VecVT)) + return SDValue(); + + // Combine: + // ARMISD::BUILD_VECTOR E1, E2, ..., EN. + // => BITCAST INSERT_VECTOR_ELT + // (INSERT_VECTOR_ELT (...), (BITCAST EN-1), N-1), + // (BITCAST EN), N. + SDValue Vec = DAG.getUNDEF(VecVT); + SDLoc dl(N); + for (unsigned Idx = 0 ; Idx < NumElts; ++Idx) { + SDValue V = N->getOperand(Idx); + if (V.getOpcode() == ISD::UNDEF) + continue; + if (V.getOpcode() == ISD::BITCAST && + V->getOperand(0).getValueType() == MVT::i32) + // Fold obvious case. + V = V.getOperand(0); + else { + V = DAG.getNode(ISD::BITCAST, SDLoc(V), MVT::i32, V); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(V.getNode()); + } + SDValue LaneIdx = DAG.getConstant(Idx, MVT::i32); + Vec = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, VecVT, Vec, V, LaneIdx); + } + Vec = DAG.getNode(ISD::BITCAST, dl, VT, Vec); + // Make the DAGCombiner fold the bitcasts. + DCI.AddToWorklist(Vec.getNode()); + return Vec; +} + /// PerformInsertEltCombine - Target-specific dag combine xforms for /// ISD::INSERT_VECTOR_ELT. static SDValue PerformInsertEltCombine(SDNode *N, @@ -8686,7 +9256,7 @@ static SDValue PerformInsertEltCombine(SDNode *N, return SDValue(); SelectionDAG &DAG = DCI.DAG; - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); EVT FloatVT = EVT::getVectorVT(*DAG.getContext(), MVT::f64, VT.getVectorNumElements()); SDValue Vec = DAG.getNode(ISD::BITCAST, dl, FloatVT, N->getOperand(0)); @@ -8732,7 +9302,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { !TLI.isTypeLegal(Concat1Op1.getValueType())) return SDValue(); - SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, N->getDebugLoc(), VT, + SDValue NewConcat = DAG.getNode(ISD::CONCAT_VECTORS, SDLoc(N), VT, Op0.getOperand(0), Op1.getOperand(0)); // Translate the shuffle mask. SmallVector NewMask; @@ -8748,7 +9318,7 @@ static SDValue PerformVECTOR_SHUFFLECombine(SDNode *N, SelectionDAG &DAG) { NewElt = HalfElts + MaskElt - NumElts; NewMask.push_back(NewElt); } - return DAG.getVectorShuffle(VT, N->getDebugLoc(), NewConcat, + return DAG.getVectorShuffle(VT, SDLoc(N), NewConcat, DAG.getUNDEF(VT), NewMask.data()); } @@ -8865,7 +9435,7 @@ static SDValue CombineBaseUpdate(SDNode *N, Ops.push_back(N->getOperand(i)); } MemIntrinsicSDNode *MemInt = cast(N); - SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, N->getDebugLoc(), SDTys, + SDValue UpdN = DAG.getMemIntrinsicNode(NewOpc, SDLoc(N), SDTys, Ops.data(), Ops.size(), MemInt->getMemoryVT(), MemInt->getMemOperand()); @@ -8939,7 +9509,7 @@ static bool CombineVLDDUP(SDNode *N, TargetLowering::DAGCombinerInfo &DCI) { SDVTList SDTys = DAG.getVTList(Tys, NumVecs+1); SDValue Ops[] = { VLD->getOperand(0), VLD->getOperand(2) }; MemIntrinsicSDNode *VLDMemInt = cast(VLD); - SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, VLD->getDebugLoc(), SDTys, + SDValue VLDDup = DAG.getMemIntrinsicNode(NewOpc, SDLoc(VLD), SDTys, Ops, 2, VLDMemInt->getMemoryVT(), VLDMemInt->getMemOperand()); @@ -8994,7 +9564,7 @@ static SDValue PerformVDUPLANECombine(SDNode *N, if (EltSize > VT.getVectorElementType().getSizeInBits()) return SDValue(); - return DCI.DAG.getNode(ISD::BITCAST, N->getDebugLoc(), VT, Op); + return DCI.DAG.getNode(ISD::BITCAST, SDLoc(N), VT, Op); } // isConstVecPow2 - Return true if each vector element is a power of 2, all @@ -9051,12 +9621,27 @@ static SDValue PerformVCVTCombine(SDNode *N, !isConstVecPow2(ConstVec, isSigned, C)) return SDValue(); + MVT FloatTy = Op.getSimpleValueType().getVectorElementType(); + MVT IntTy = N->getSimpleValueType(0).getVectorElementType(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + // These instructions only exist converting from f32 to i32. We can handle + // smaller integers by generating an extra truncate, but larger ones would + // be lossy. + return SDValue(); + } + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfp2fxs : Intrinsic::arm_neon_vcvtfp2fxu; - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), - N->getValueType(0), - DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, - DAG.getConstant(Log2_64(C), MVT::i32)); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + SDValue FixConv = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), + NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + DAG.getConstant(IntrinsicOpcode, MVT::i32), N0, + DAG.getConstant(Log2_64(C), MVT::i32)); + + if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + FixConv = DAG.getNode(ISD::TRUNCATE, SDLoc(N), N->getValueType(0), FixConv); + + return FixConv; } /// PerformVDIVCombine - VCVT (fixed-point to floating-point, Advanced SIMD) @@ -9087,12 +9672,28 @@ static SDValue PerformVDIVCombine(SDNode *N, !isConstVecPow2(ConstVec, isSigned, C)) return SDValue(); + MVT FloatTy = N->getSimpleValueType(0).getVectorElementType(); + MVT IntTy = Op.getOperand(0).getSimpleValueType().getVectorElementType(); + if (FloatTy.getSizeInBits() != 32 || IntTy.getSizeInBits() > 32) { + // These instructions only exist converting from i32 to f32. We can handle + // smaller integers by generating an extra extend, but larger ones would + // be lossy. + return SDValue(); + } + + SDValue ConvInput = Op.getOperand(0); + unsigned NumLanes = Op.getValueType().getVectorNumElements(); + if (IntTy.getSizeInBits() < FloatTy.getSizeInBits()) + ConvInput = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, + SDLoc(N), NumLanes == 2 ? MVT::v2i32 : MVT::v4i32, + ConvInput); + unsigned IntrinsicOpcode = isSigned ? Intrinsic::arm_neon_vcvtfxs2fp : Intrinsic::arm_neon_vcvtfxu2fp; - return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, N->getDebugLoc(), + return DAG.getNode(ISD::INTRINSIC_WO_CHAIN, SDLoc(N), Op.getValueType(), DAG.getConstant(IntrinsicOpcode, MVT::i32), - Op.getOperand(0), DAG.getConstant(Log2_64(C), MVT::i32)); + ConvInput, DAG.getConstant(Log2_64(C), MVT::i32)); } /// Getvshiftimm - Check if this is a valid build_vector for the immediate @@ -9273,7 +9874,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { VShiftOpc = ARMISD::VQRSHRNsu; break; } - return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), N->getOperand(1), DAG.getConstant(Cnt, MVT::i32)); } @@ -9290,7 +9891,7 @@ static SDValue PerformIntrinsicCombine(SDNode *N, SelectionDAG &DAG) { llvm_unreachable("invalid shift count for vsli/vsri intrinsic"); } - return DAG.getNode(VShiftOpc, N->getDebugLoc(), N->getValueType(0), + return DAG.getNode(VShiftOpc, SDLoc(N), N->getValueType(0), N->getOperand(1), N->getOperand(2), DAG.getConstant(Cnt, MVT::i32)); } @@ -9321,7 +9922,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, if (C->getZExtValue() == 16 && N0.getOpcode() == ISD::BSWAP && DAG.MaskedValueIsZero(N0.getOperand(0), APInt::getHighBitsSet(32, 16))) - return DAG.getNode(ISD::ROTR, N->getDebugLoc(), VT, N0, N1); + return DAG.getNode(ISD::ROTR, SDLoc(N), VT, N0, N1); } } @@ -9338,7 +9939,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, case ISD::SHL: if (isVShiftLImm(N->getOperand(1), VT, false, Cnt)) - return DAG.getNode(ARMISD::VSHL, N->getDebugLoc(), VT, N->getOperand(0), + return DAG.getNode(ARMISD::VSHL, SDLoc(N), VT, N->getOperand(0), DAG.getConstant(Cnt, MVT::i32)); break; @@ -9347,7 +9948,7 @@ static SDValue PerformShiftCombine(SDNode *N, SelectionDAG &DAG, if (isVShiftRImm(N->getOperand(1), VT, false, false, Cnt)) { unsigned VShiftOpc = (N->getOpcode() == ISD::SRA ? ARMISD::VSHRs : ARMISD::VSHRu); - return DAG.getNode(VShiftOpc, N->getDebugLoc(), VT, N->getOperand(0), + return DAG.getNode(VShiftOpc, SDLoc(N), VT, N->getOperand(0), DAG.getConstant(Cnt, MVT::i32)); } } @@ -9387,7 +9988,7 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG, Opc = ARMISD::VGETLANEu; break; } - return DAG.getNode(Opc, N->getDebugLoc(), VT, Vec, Lane); + return DAG.getNode(Opc, SDLoc(N), VT, Vec, Lane); } } @@ -9476,7 +10077,7 @@ static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG, if (!Opcode) return SDValue(); - return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS); + return DAG.getNode(Opcode, SDLoc(N), N->getValueType(0), LHS, RHS); } /// PerformCMOVCombine - Target-specific DAG combining for ARMISD::CMOV. @@ -9488,7 +10089,7 @@ ARMTargetLowering::PerformCMOVCombine(SDNode *N, SelectionDAG &DAG) const { return SDValue(); EVT VT = N->getValueType(0); - DebugLoc dl = N->getDebugLoc(); + SDLoc dl(N); SDValue LHS = Cmp.getOperand(0); SDValue RHS = Cmp.getOperand(1); SDValue FalseVal = N->getOperand(0); @@ -9578,6 +10179,8 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N, case ARMISD::VLD3DUP: case ARMISD::VLD4DUP: return CombineBaseUpdate(N, DCI); + case ARMISD::BUILD_VECTOR: + return PerformARMBUILD_VECTORCombine(N, DCI); case ISD::INTRINSIC_VOID: case ISD::INTRINSIC_W_CHAIN: switch (cast(N->getOperand(1))->getZExtValue()) { @@ -9702,6 +10305,21 @@ bool ARMTargetLowering::isZExtFree(SDValue Val, EVT VT2) const { return false; } +bool ARMTargetLowering::allowTruncateForTailCall(Type *Ty1, Type *Ty2) const { + if (!Ty1->isIntegerTy() || !Ty2->isIntegerTy()) + return false; + + if (!isTypeLegal(EVT::getEVT(Ty1))) + return false; + + assert(Ty1->getPrimitiveSizeInBits() <= 64 && "i128 is probably not a noop"); + + // Assuming the caller doesn't have a zeroext or signext return parameter, + // truncation all the way down to i1 is valid. + return true; +} + + static bool isLegalT1AddressImmediate(int64_t V, EVT VT) { if (V < 0) return false; @@ -10101,9 +10719,19 @@ void ARMTargetLowering::computeMaskedBitsForTargetNode(const SDValue Op, APInt &KnownOne, const SelectionDAG &DAG, unsigned Depth) const { - KnownZero = KnownOne = APInt(KnownOne.getBitWidth(), 0); + unsigned BitWidth = KnownOne.getBitWidth(); + KnownZero = KnownOne = APInt(BitWidth, 0); switch (Op.getOpcode()) { default: break; + case ARMISD::ADDC: + case ARMISD::ADDE: + case ARMISD::SUBC: + case ARMISD::SUBE: + // These nodes' second result is a boolean + if (Op.getResNo() == 0) + break; + KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - 1); + break; case ARMISD::CMOV: { // Bits are known zero/one if known on the LHS and RHS. DAG.ComputeMaskedBits(Op.getOperand(0), KnownZero, KnownOne, Depth+1); @@ -10217,7 +10845,7 @@ ARMTargetLowering::getSingleConstraintMatchWeight( typedef std::pair RCPair; RCPair ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const { + MVT VT) const { if (Constraint.size() == 1) { // GCC ARM Constraint Letters switch (Constraint[0]) { @@ -10232,6 +10860,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, case 'r': return RCPair(0U, &ARM::GPRRegClass); case 'w': + if (VT == MVT::Other) + break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPRRegClass); if (VT.getSizeInBits() == 64) @@ -10240,6 +10870,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint, return RCPair(0U, &ARM::QPRRegClass); break; case 'x': + if (VT == MVT::Other) + break; if (VT == MVT::f32) return RCPair(0U, &ARM::SPR_8RegClass); if (VT.getSizeInBits() == 64) @@ -10426,6 +11058,54 @@ void ARMTargetLowering::LowerAsmOperandForConstraint(SDValue Op, return TargetLowering::LowerAsmOperandForConstraint(Op, Constraint, Ops, DAG); } +SDValue ARMTargetLowering::LowerDivRem(SDValue Op, SelectionDAG &DAG) const { + assert(Subtarget->isTargetAEABI() && "Register-based DivRem lowering only"); + unsigned Opcode = Op->getOpcode(); + assert((Opcode == ISD::SDIVREM || Opcode == ISD::UDIVREM) && + "Invalid opcode for Div/Rem lowering"); + bool isSigned = (Opcode == ISD::SDIVREM); + EVT VT = Op->getValueType(0); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + + RTLIB::Libcall LC; + switch (VT.getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::i8: LC= isSigned ? RTLIB::SDIVREM_I8 : RTLIB::UDIVREM_I8; break; + case MVT::i16: LC= isSigned ? RTLIB::SDIVREM_I16 : RTLIB::UDIVREM_I16; break; + case MVT::i32: LC= isSigned ? RTLIB::SDIVREM_I32 : RTLIB::UDIVREM_I32; break; + case MVT::i64: LC= isSigned ? RTLIB::SDIVREM_I64 : RTLIB::UDIVREM_I64; break; + } + + SDValue InChain = DAG.getEntryNode(); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0, e = Op->getNumOperands(); i != e; ++i) { + EVT ArgVT = Op->getOperand(i).getValueType(); + Type *ArgTy = ArgVT.getTypeForEVT(*DAG.getContext()); + Entry.Node = Op->getOperand(i); + Entry.Ty = ArgTy; + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), + getPointerTy()); + + Type *RetTy = (Type*)StructType::get(Ty, Ty, NULL); + + SDLoc dl(Op); + TargetLowering:: + CallLoweringInfo CLI(InChain, RetTy, isSigned, !isSigned, false, true, + 0, getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair CallInfo = LowerCallTo(CLI); + + return CallInfo.first; +} + bool ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { // The ARM target isn't yet aware of offsets. @@ -10434,17 +11114,15 @@ ARMTargetLowering::isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const { bool ARM::isBitFieldInvertedMask(unsigned v) { if (v == 0xffffffff) - return 0; + return false; + // there can be 1's on either or both "outsides", all the "inside" // bits must be 0's - unsigned int lsb = 0, msb = 31; - while (v & (1 << msb)) --msb; - while (v & (1 << lsb)) ++lsb; - for (unsigned int i = lsb; i <= msb; ++i) { - if (v & (1 << i)) - return 0; - } - return 1; + unsigned TO = CountTrailingOnes_32(v); + unsigned LO = CountLeadingOnes_32(v); + v = (v >> TO) << TO; + v = (v << LO) >> LO; + return v == 0; } /// isFPImmLegal - Returns true if the target can instruction select the @@ -10513,6 +11191,30 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.writeMem = true; return true; } + case Intrinsic::arm_ldrex: { + PointerType *PtrTy = cast(I.getArgOperand(0)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(0); + Info.offset = 0; + Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.vol = true; + Info.readMem = true; + Info.writeMem = false; + return true; + } + case Intrinsic::arm_strex: { + PointerType *PtrTy = cast(I.getArgOperand(1)->getType()); + Info.opc = ISD::INTRINSIC_W_CHAIN; + Info.memVT = MVT::getVT(PtrTy->getElementType()); + Info.ptrVal = I.getArgOperand(1); + Info.offset = 0; + Info.align = getDataLayout()->getABITypeAlignment(PtrTy->getElementType()); + Info.vol = true; + Info.readMem = false; + Info.writeMem = true; + return true; + } case Intrinsic::arm_strexd: { Info.opc = ISD::INTRINSIC_W_CHAIN; Info.memVT = MVT::i64; diff --git a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h index 426010e..90facdd 100644 --- a/contrib/llvm/lib/Target/ARM/ARMISelLowering.h +++ b/contrib/llvm/lib/Target/ARM/ARMISelLowering.h @@ -52,6 +52,7 @@ namespace llvm { BR_JT, // Jumptable branch. BR2_JT, // Jumptable branch (2 level - jumptable entry is a jump). RET_FLAG, // Return with a flag operand. + INTRET_FLAG, // Interrupt return with an LR-offset and a flag operand. PIC_ADD, // Add with a PC operand and a PIC label. @@ -94,7 +95,6 @@ namespace llvm { DYN_ALLOC, // Dynamic allocation on the stack. - MEMBARRIER, // Memory barrier (DMB) MEMBARRIER_MCR, // Memory barrier (MCR) PRELOAD, // Preload @@ -186,6 +186,8 @@ namespace llvm { // Floating-point max and min: FMAX, FMIN, + VMAXNM, + VMINNM, // Bit-field insert BFI, @@ -222,21 +224,7 @@ namespace llvm { VST4_UPD, VST2LN_UPD, VST3LN_UPD, - VST4LN_UPD, - - // 64-bit atomic ops (value split into two registers) - ATOMADD64_DAG, - ATOMSUB64_DAG, - ATOMOR64_DAG, - ATOMXOR64_DAG, - ATOMAND64_DAG, - ATOMNAND64_DAG, - ATOMSWAP64_DAG, - ATOMCMPXCHG64_DAG, - ATOMMIN64_DAG, - ATOMUMIN64_DAG, - ATOMMAX64_DAG, - ATOMUMAX64_DAG + VST4LN_UPD }; } @@ -270,7 +258,7 @@ namespace llvm { } /// getSetCCResultType - Return the value type to use for ISD::SETCC. - virtual EVT getSetCCResultType(EVT VT) const; + virtual EVT getSetCCResultType(LLVMContext &Context, EVT VT) const; virtual MachineBasicBlock * EmitInstrWithCustomInserter(MachineInstr *MI, @@ -298,6 +286,9 @@ namespace llvm { using TargetLowering::isZExtFree; virtual bool isZExtFree(SDValue Val, EVT VT2) const; + virtual bool allowTruncateForTailCall(Type *Ty1, Type *Ty2) const; + + /// isLegalAddressingMode - Return true if the addressing mode represented /// by AM is legal for this target, for a load/store of the specified type. virtual bool isLegalAddressingMode(const AddrMode &AM, Type *Ty)const; @@ -349,7 +340,7 @@ namespace llvm { std::pair getRegForInlineAsmConstraint(const std::string &Constraint, - EVT VT) const; + MVT VT) const; /// LowerAsmOperandForConstraint - Lower the specified operand into the Ops /// vector. If it is invalid, don't add anything to Ops. If hasMemory is @@ -372,6 +363,12 @@ namespace llvm { /// be used for loads / stores from the global. virtual unsigned getMaximalGlobalOffset() const; + /// Returns true if a cast between SrcAS and DestAS is a noop. + virtual bool isNoopAddrSpaceCast(unsigned SrcAS, unsigned DestAS) const { + // Addrspacecasts are always noops. + return true; + } + /// createFastISel - This method returns a target specific FastISel object, /// or null if the target does not support "fast" ISel. virtual FastISel *createFastISel(FunctionLoweringInfo &funcInfo, @@ -412,21 +409,21 @@ namespace llvm { void addQRTypeForNEON(MVT VT); typedef SmallVector, 8> RegsToPassVector; - void PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG, + void PassF64ArgInRegs(SDLoc dl, SelectionDAG &DAG, SDValue Chain, SDValue &Arg, RegsToPassVector &RegsToPass, CCValAssign &VA, CCValAssign &NextVA, SDValue &StackPtr, - SmallVector &MemOpChains, + SmallVectorImpl &MemOpChains, ISD::ArgFlagsTy Flags) const; SDValue GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, - DebugLoc dl) const; + SDLoc dl) const; CCAssignFn *CCAssignFnForNode(CallingConv::ID CC, bool Return, bool isVarArg) const; SDValue LowerMemOpCallTo(SDValue Chain, SDValue StackPtr, SDValue Arg, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, const CCValAssign &VA, ISD::ArgFlagsTy Flags) const; SDValue LowerEH_SJLJ_SETJMP(SDValue Op, SelectionDAG &DAG) const; @@ -457,13 +454,26 @@ namespace llvm { const ARMSubtarget *ST) const; SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG, const ARMSubtarget *ST) const; + SDValue LowerFSINCOS(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerDivRem(SDValue Op, SelectionDAG &DAG) const; + + /// isFMAFasterThanFMulAndFAdd - Return true if an FMA operation is faster + /// than a pair of fmul and fadd instructions. fmuladd intrinsics will be + /// expanded to FMAs when this method returns true, otherwise fmuladd is + /// expanded to fmul + fadd. + /// + /// ARM supports both fused and unfused multiply-add operations; we already + /// lower a pair of fmul and fadd to the latter so it's not clear that there + /// would be a gain or that the gain would be worthwhile enough to risk + /// correctness bugs. + virtual bool isFMAFasterThanFMulAndFAdd(EVT VT) const { return false; } SDValue ReconstructShuffle(SDValue Op, SelectionDAG &DAG) const; SDValue LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals, bool isThisReturn, SDValue ThisVal) const; @@ -471,24 +481,26 @@ namespace llvm { LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Ins, - DebugLoc dl, SelectionDAG &DAG, + SDLoc dl, SelectionDAG &DAG, SmallVectorImpl &InVals) const; int StoreByValRegs(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, const Value *OrigArg, unsigned InRegsParamRecordIdx, unsigned OffsetFromOrigArg, unsigned ArgOffset, + unsigned ArgSize, bool ForceMutable) const; void VarArgStyleRegisters(CCState &CCInfo, SelectionDAG &DAG, - DebugLoc dl, SDValue &Chain, + SDLoc dl, SDValue &Chain, unsigned ArgOffset, bool ForceMutable = false) const; void computeRegArea(CCState &CCInfo, MachineFunction &MF, unsigned InRegsParamRecordIdx, + unsigned ArgSize, unsigned &ArgRegsSize, unsigned &ArgRegsSaveSize) const; @@ -522,16 +534,16 @@ namespace llvm { CallingConv::ID CallConv, bool isVarArg, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, - DebugLoc dl, SelectionDAG &DAG) const; + SDLoc dl, SelectionDAG &DAG) const; virtual bool isUsedByReturnOnly(SDNode *N, SDValue &Chain) const; virtual bool mayBeEmittedAsTailCall(CallInst *CI) const; SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC, - SDValue &ARMcc, SelectionDAG &DAG, DebugLoc dl) const; + SDValue &ARMcc, SelectionDAG &DAG, SDLoc dl) const; SDValue getVFPCmp(SDValue LHS, SDValue RHS, - SelectionDAG &DAG, DebugLoc dl) const; + SelectionDAG &DAG, SDLoc dl) const; SDValue duplicateCmp(SDValue Cmp, SelectionDAG &DAG) const; SDValue OptimizeVFPBrcond(SDValue Op, SelectionDAG &DAG) const; @@ -556,6 +568,8 @@ namespace llvm { unsigned Size, bool signExtend, ARMCC::CondCodes Cond) const; + MachineBasicBlock *EmitAtomicLoad64(MachineInstr *MI, + MachineBasicBlock *BB) const; void SetupEntryBlockForSjLj(MachineInstr *MI, MachineBasicBlock *MBB, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td index 67a6820..f93504f 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrFormats.td @@ -155,6 +155,16 @@ def pred : PredicateOperand, PredicateOp, + ComplexPattern { + let MIOperandInfo = (ops i32imm, i32imm); + let PrintMethod = "printPredicateOperand"; +} + // Conditional code result for instructions whose 's' bit is set, e.g. subs. def CCOutOperand : AsmOperandClass { let Name = "CCOut"; } def cc_out : OptionalDefOperand { @@ -237,6 +247,8 @@ class t2InstAlias : InstAlias, Requires<[IsThumb2]>; class VFP2InstAlias : InstAlias, Requires<[HasVFP2]>; +class VFP2DPInstAlias + : InstAlias, Requires<[HasVFP2,HasDPVFP]>; class VFP3InstAlias : InstAlias, Requires<[HasVFP3]>; class NEONInstAlias @@ -490,8 +502,7 @@ class JTI; -// Atomic load/store instructions -class AIldrex opcod, dag oops, dag iops, InstrItinClass itin, +class AIldr_ex_or_acq opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> : I { @@ -502,23 +513,52 @@ class AIldrex opcod, dag oops, dag iops, InstrItinClass itin, let Inst{20} = 1; let Inst{19-16} = addr; let Inst{15-12} = Rt; - let Inst{11-0} = 0b111110011111; + let Inst{11-10} = 0b11; + let Inst{9-8} = opcod2; + let Inst{7-0} = 0b10011111; } -class AIstrex opcod, dag oops, dag iops, InstrItinClass itin, +class AIstr_ex_or_rel opcod, bits<2> opcod2, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> : I { - bits<4> Rd; bits<4> Rt; bits<4> addr; let Inst{27-23} = 0b00011; let Inst{22-21} = opcod; let Inst{20} = 0; let Inst{19-16} = addr; - let Inst{15-12} = Rd; - let Inst{11-4} = 0b11111001; + let Inst{11-10} = 0b11; + let Inst{9-8} = opcod2; + let Inst{7-4} = 0b1001; let Inst{3-0} = Rt; } +// Atomic load/store instructions +class AIldrex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIldr_ex_or_acq; + +class AIstrex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIstr_ex_or_rel { + bits<4> Rd; + let Inst{15-12} = Rd; +} + +// Exclusive load/store instructions + +class AIldaex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIldr_ex_or_acq, + Requires<[IsARM, HasV8]>; + +class AIstlex opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIstr_ex_or_rel, + Requires<[IsARM, HasV8]> { + bits<4> Rd; + let Inst{15-12} = Rd; +} + class AIswp pattern> : AI { bits<4> Rt; @@ -535,6 +575,18 @@ class AIswp pattern> let Unpredictable{11-8} = 0b1111; let DecoderMethod = "DecodeSwap"; } +// Acquire/Release load/store instructions +class AIldracq opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIldr_ex_or_acq, + Requires<[IsARM, HasV8]>; + +class AIstrrel opcod, dag oops, dag iops, InstrItinClass itin, + string opc, string asm, list pattern> + : AIstr_ex_or_rel, + Requires<[IsARM, HasV8]> { + let Inst{15-12} = 0b1111; +} // addrmode1 instructions class AI1 opcod, dag oops, dag iops, Format f, InstrItinClass itin, @@ -1230,8 +1282,9 @@ class T2JTI; // Move to/from coprocessor instructions -class T2Cop opc, dag oops, dag iops, string asm, list pattern> - : T2XI , Requires<[IsThumb2]> { +class T2Cop opc, dag oops, dag iops, string opcstr, string asm, + list pattern> + : T2I , Requires<[IsThumb2]> { let Inst{31-28} = opc; } @@ -1389,7 +1442,6 @@ class ADI5 opcod1, bits<2> opcod2, dag oops, dag iops, let Inst{15-12} = Dd{3-0}; let Inst{7-0} = addr{7-0}; // imm8 - // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; let Inst{11-9} = 0b101; @@ -1415,7 +1467,6 @@ class ASI5 opcod1, bits<2> opcod2, dag oops, dag iops, let Inst{15-12} = Sd{4-1}; let Inst{7-0} = addr{7-0}; // imm8 - // TODO: Mark the instructions with the appropriate subtarget info. let Inst{27-24} = opcod1; let Inst{21-20} = opcod2; let Inst{11-9} = 0b101; @@ -1437,6 +1488,28 @@ class PseudoVFPLdStM pattern> + : VFPXI { + // Instruction operands. + bits<4> Rn; + bits<13> regs; + + // Encode instruction operands. + let Inst{19-16} = Rn; + let Inst{22} = 0; + let Inst{15-12} = regs{11-8}; + let Inst{7-1} = regs{7-1}; + + let Inst{27-25} = 0b110; + let Inst{11-8} = 0b1011; + let Inst{0} = 1; +} + +// Double precision class AXDI4 pattern> : VFPXI pattern> : VFPXI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, let Inst{8} = 1; // Double precision let Inst{7-6} = opcod4; let Inst{4} = opcod5; + + let Predicates = [HasVFP2, HasDPVFP]; +} + +// Double precision, unary, not-predicated +class ADuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list pattern> + : VFPXI { + // Instruction operands. + bits<5> Dd; + bits<5> Dm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // Double precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; } // Double precision, binary @@ -1525,9 +1626,42 @@ class ADbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, let Inst{8} = 1; // Double precision let Inst{6} = op6; let Inst{4} = op4; + + let Predicates = [HasVFP2, HasDPVFP]; +} + +// FP, binary, not predicated +class ADbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list pattern> + : VFPXI +{ + // Instruction operands. + bits<5> Dd; + bits<5> Dn; + bits<5> Dm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Dm{3-0}; + let Inst{5} = Dm{4}; + let Inst{19-16} = Dn{3-0}; + let Inst{7} = Dn{4}; + let Inst{15-12} = Dd{3-0}; + let Inst{22} = Dd{4}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 1; // double precision + let Inst{6} = opcod3; + let Inst{4} = 0; + + let Predicates = [HasVFP2, HasDPVFP]; } -// Single precision, unary +// Single precision, unary, predicated class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, bit opcod5, dag oops, dag iops, InstrItinClass itin, string opc, string asm, list pattern> @@ -1551,6 +1685,33 @@ class ASuI opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, let Inst{4} = opcod5; } +// Single precision, unary, non-predicated +class ASuInp opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, + bit opcod5, dag oops, dag iops, InstrItinClass itin, + string asm, list pattern> + : VFPXI { + // Instruction operands. + bits<5> Sd; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{19-16} = opcod3; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{7-6} = opcod4; + let Inst{4} = opcod5; +} + // Single precision unary, if no NEON. Same as ASuI except not available if // NEON is enabled. class ASuIn opcod1, bits<2> opcod2, bits<4> opcod3, bits<2> opcod4, @@ -1586,6 +1747,35 @@ class ASbI opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, dag iops, let Inst{4} = op4; } +// Single precision, binary, not predicated +class ASbInp opcod1, bits<2> opcod2, bit opcod3, dag oops, dag iops, + InstrItinClass itin, string asm, list pattern> + : VFPXI +{ + // Instruction operands. + bits<5> Sd; + bits<5> Sn; + bits<5> Sm; + + let Inst{31-28} = 0b1111; + + // Encode instruction operands. + let Inst{3-0} = Sm{4-1}; + let Inst{5} = Sm{0}; + let Inst{19-16} = Sn{4-1}; + let Inst{7} = Sn{0}; + let Inst{15-12} = Sd{4-1}; + let Inst{22} = Sd{0}; + + let Inst{27-23} = opcod1; + let Inst{21-20} = opcod2; + let Inst{11-9} = 0b101; + let Inst{8} = 0; // Single precision + let Inst{6} = opcod3; + let Inst{4} = 0; +} + // Single precision binary, if no NEON. Same as ASbI except not available if // NEON is enabled. class ASbIn opcod1, bits<2> opcod2, bit op6, bit op4, dag oops, @@ -1698,6 +1888,21 @@ class NeonXI pattern> + : InstARM { + let OutOperandList = oops; + let InOperandList = iops; + let AsmString = !strconcat(opc, ".", dt, "\t", asm); + let Pattern = pattern; + list Predicates = [HasNEON]; + let DecoderNamespace = "NEON"; + + let Inst{31-28} = 0b1111; +} + class NLdSt op21_20, bits<4> op11_8, bits<4> op7_4, dag oops, dag iops, InstrItinClass itin, string opc, string dt, string asm, string cstr, list pattern> @@ -1817,6 +2022,35 @@ class N2V op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, let Inst{5} = Vm{4}; } +// Same as N2V but not predicated. +class N2Vnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op7, bit op6, + dag oops, dag iops, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vm; + + // Encode instruction operands + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; + + // Encode constant bits + let Inst{27-23} = 0b00111; + let Inst{21-20} = 0b11; + let Inst{19-18} = op19_18; + let Inst{17-16} = op17_16; + let Inst{11} = 0; + let Inst{10-8} = op10_8; + let Inst{7} = op7; + let Inst{6} = op6; + let Inst{4} = 0; + + let DecoderNamespace = "NEON"; +} + // Same as N2V except it doesn't have a datatype suffix. class N2VX op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -1898,6 +2132,32 @@ class N3V op21_20, bits<4> op11_8, bit op6, bit op4, let Inst{5} = Vm{4}; } +class N3Vnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, dag oops, dag iops,Format f, InstrItinClass itin, + string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable, list pattern> + : NeonInp { + bits<5> Vd; + bits<5> Vn; + bits<5> Vm; + + // Encode instruction operands + let Inst{22} = Vd{4}; + let Inst{15-12} = Vd{3-0}; + let Inst{19-16} = Vn{3-0}; + let Inst{7} = Vn{4}; + let Inst{5} = Vm{4}; + let Inst{3-0} = Vm{3-0}; + + // Encode constant bits + let Inst{27-23} = op27_23; + let Inst{21-20} = op21_20; + let Inst{11-8} = op11_8; + let Inst{6} = op6; + let Inst{4} = op4; +} + class N3VLane32 op21_20, bits<4> op11_8, bit op6, bit op4, dag oops, dag iops, Format f, InstrItinClass itin, string opc, string dt, string asm, string cstr, diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp index 80f0ec7..df867b4 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.cpp @@ -22,6 +22,7 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/MC/MCAsmInfo.h" @@ -29,7 +30,7 @@ using namespace llvm; ARMInstrInfo::ARMInstrInfo(const ARMSubtarget &STI) - : ARMBaseInstrInfo(STI), RI(*this, STI) { + : ARMBaseInstrInfo(STI), RI(STI) { } /// getNoopForMachoTarget - Return the noop instruction to use for a noop. @@ -106,29 +107,42 @@ namespace { if (TM->getRelocationModel() != Reloc::PIC_) return false; - LLVMContext* Context = &MF.getFunction()->getContext(); - GlobalValue *GV = new GlobalVariable(Type::getInt32Ty(*Context), false, - GlobalValue::ExternalLinkage, 0, - "_GLOBAL_OFFSET_TABLE_"); - unsigned Id = AFI->createPICLabelUId(); - ARMConstantPoolValue *CPV = ARMConstantPoolConstant::Create(GV, Id); - unsigned Align = TM->getDataLayout()->getPrefTypeAlignment(GV->getType()); + LLVMContext *Context = &MF.getFunction()->getContext(); + unsigned ARMPCLabelIndex = AFI->createPICLabelUId(); + unsigned PCAdj = TM->getSubtarget().isThumb() ? 4 : 8; + ARMConstantPoolValue *CPV = ARMConstantPoolSymbol::Create( + *Context, "_GLOBAL_OFFSET_TABLE_", ARMPCLabelIndex, PCAdj); + + unsigned Align = TM->getDataLayout() + ->getPrefTypeAlignment(Type::getInt32PtrTy(*Context)); unsigned Idx = MF.getConstantPool()->getConstantPoolIndex(CPV, Align); MachineBasicBlock &FirstMBB = MF.front(); MachineBasicBlock::iterator MBBI = FirstMBB.begin(); DebugLoc DL = FirstMBB.findDebugLoc(MBBI); - unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + unsigned TempReg = + MF.getRegInfo().createVirtualRegister(&ARM::rGPRRegClass); unsigned Opc = TM->getSubtarget().isThumb2() ? ARM::t2LDRpci : ARM::LDRcp; const TargetInstrInfo &TII = *TM->getInstrInfo(); MachineInstrBuilder MIB = BuildMI(FirstMBB, MBBI, DL, - TII.get(Opc), GlobalBaseReg) + TII.get(Opc), TempReg) .addConstantPoolIndex(Idx); if (Opc == ARM::LDRcp) MIB.addImm(0); AddDefaultPred(MIB); + // Fix the GOT address by adding pc. + unsigned GlobalBaseReg = AFI->getGlobalBaseReg(); + Opc = TM->getSubtarget().isThumb2() ? ARM::tPICADD + : ARM::PICADD; + MIB = BuildMI(FirstMBB, MBBI, DL, TII.get(Opc), GlobalBaseReg) + .addReg(TempReg) + .addImm(ARMPCLabelIndex); + if (Opc == ARM::PICADD) + AddDefaultPred(MIB); + + return true; } diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td index 1bd174e..2042c04 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrInfo.td @@ -71,6 +71,9 @@ def SDT_ARMTCRET : SDTypeProfile<0, 1, [SDTCisPtrTy<0>]>; def SDT_ARMBFI : SDTypeProfile<1, 3, [SDTCisVT<0, i32>, SDTCisVT<1, i32>, SDTCisVT<2, i32>, SDTCisVT<3, i32>]>; +def SDT_ARMVMAXNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>; +def SDT_ARMVMINNM : SDTypeProfile<1, 2, [SDTCisFP<0>, SDTCisFP<1>, SDTCisFP<2>]>; + def SDTBinaryArithWithFlags : SDTypeProfile<2, 2, [SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>, @@ -118,7 +121,8 @@ def ARMcall_nolink : SDNode<"ARMISD::CALL_NOLINK", SDT_ARMcall, def ARMretflag : SDNode<"ARMISD::RET_FLAG", SDTNone, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - +def ARMintretflag : SDNode<"ARMISD::INTRET_FLAG", SDT_ARMcall, + [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; def ARMcmov : SDNode<"ARMISD::CMOV", SDT_ARMCMov, [SDNPInGlue]>; @@ -162,8 +166,6 @@ def ARMeh_sjlj_longjmp: SDNode<"ARMISD::EH_SJLJ_LONGJMP", SDT_ARMEH_SJLJ_Longjmp, [SDNPHasChain, SDNPSideEffect]>; -def ARMMemBarrier : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIER, - [SDNPHasChain, SDNPSideEffect]>; def ARMMemBarrierMCR : SDNode<"ARMISD::MEMBARRIER_MCR", SDT_ARMMEMBARRIER, [SDNPHasChain, SDNPSideEffect]>; def ARMPreload : SDNode<"ARMISD::PRELOAD", SDT_ARMPREFETCH, @@ -174,9 +176,11 @@ def ARMrbit : SDNode<"ARMISD::RBIT", SDTIntUnaryOp>; def ARMtcret : SDNode<"ARMISD::TC_RETURN", SDT_ARMTCRET, [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>; - def ARMbfi : SDNode<"ARMISD::BFI", SDT_ARMBFI>; +def ARMvmaxnm : SDNode<"ARMISD::VMAXNM", SDT_ARMVMAXNM, []>; +def ARMvminnm : SDNode<"ARMISD::VMINNM", SDT_ARMVMINNM, []>; + //===----------------------------------------------------------------------===// // ARM Instruction Predicate Definitions. // @@ -189,11 +193,18 @@ def HasV5TE : Predicate<"Subtarget->hasV5TEOps()">, def HasV6 : Predicate<"Subtarget->hasV6Ops()">, AssemblerPredicate<"HasV6Ops", "armv6">; def NoV6 : Predicate<"!Subtarget->hasV6Ops()">; +def HasV6M : Predicate<"Subtarget->hasV6MOps()">, + AssemblerPredicate<"HasV6MOps", + "armv6m or armv6t2">; def HasV6T2 : Predicate<"Subtarget->hasV6T2Ops()">, AssemblerPredicate<"HasV6T2Ops", "armv6t2">; def NoV6T2 : Predicate<"!Subtarget->hasV6T2Ops()">; def HasV7 : Predicate<"Subtarget->hasV7Ops()">, AssemblerPredicate<"HasV7Ops", "armv7">; +def HasV8 : Predicate<"Subtarget->hasV8Ops()">, + AssemblerPredicate<"HasV8Ops", "armv8">; +def PreV8 : Predicate<"!Subtarget->hasV8Ops()">, + AssemblerPredicate<"!HasV8Ops", "armv7 or earlier">; def NoVFP : Predicate<"!Subtarget->hasVFP2()">; def HasVFP2 : Predicate<"Subtarget->hasVFP2()">, AssemblerPredicate<"FeatureVFP2", "VFP2">; @@ -201,14 +212,23 @@ def HasVFP3 : Predicate<"Subtarget->hasVFP3()">, AssemblerPredicate<"FeatureVFP3", "VFP3">; def HasVFP4 : Predicate<"Subtarget->hasVFP4()">, AssemblerPredicate<"FeatureVFP4", "VFP4">; +def HasDPVFP : Predicate<"!Subtarget->isFPOnlySP()">, + AssemblerPredicate<"!FeatureVFPOnlySP", + "double precision VFP">; +def HasFPARMv8 : Predicate<"Subtarget->hasFPARMv8()">, + AssemblerPredicate<"FeatureFPARMv8", "FPARMv8">; def HasNEON : Predicate<"Subtarget->hasNEON()">, AssemblerPredicate<"FeatureNEON", "NEON">; +def HasCrypto : Predicate<"Subtarget->hasCrypto()">, + AssemblerPredicate<"FeatureCrypto", "crypto">; +def HasCRC : Predicate<"Subtarget->hasCRC()">, + AssemblerPredicate<"FeatureCRC", "crc">; def HasFP16 : Predicate<"Subtarget->hasFP16()">, AssemblerPredicate<"FeatureFP16","half-float">; def HasDivide : Predicate<"Subtarget->hasDivide()">, - AssemblerPredicate<"FeatureHWDiv", "divide">; + AssemblerPredicate<"FeatureHWDiv", "divide in THUMB">; def HasDivideInARM : Predicate<"Subtarget->hasDivideInARMMode()">, - AssemblerPredicate<"FeatureHWDivARM">; + AssemblerPredicate<"FeatureHWDivARM", "divide in ARM">; def HasT2ExtractPack : Predicate<"Subtarget->hasT2ExtractPack()">, AssemblerPredicate<"FeatureT2XtPk", "pack/extract">; @@ -233,10 +253,10 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">, AssemblerPredicate<"ModeThumb,FeatureThumb2", "thumb2">; def IsMClass : Predicate<"Subtarget->isMClass()">, - AssemblerPredicate<"FeatureMClass", "armv7m">; -def IsARClass : Predicate<"!Subtarget->isMClass()">, + AssemblerPredicate<"FeatureMClass", "armv*m">; +def IsNotMClass : Predicate<"!Subtarget->isMClass()">, AssemblerPredicate<"!FeatureMClass", - "armv7a/r">; + "!armv*m">; def IsARM : Predicate<"!Subtarget->isThumb()">, AssemblerPredicate<"!ModeThumb", "arm-mode">; def IsIOS : Predicate<"Subtarget->isTargetIOS()">; @@ -258,7 +278,9 @@ def UseMulOps : Predicate<"Subtarget->useMulOps()">; def UseFusedMAC : Predicate<"(TM.Options.AllowFPOpFusion ==" " FPOpFusion::Fast) && " "!Subtarget->isTargetDarwin()">; -def DontUseFusedMAC : Predicate<"!Subtarget->hasVFP4() || " +def DontUseFusedMAC : Predicate<"!(TM.Options.AllowFPOpFusion ==" + " FPOpFusion::Fast &&" + " Subtarget->hasVFP4()) || " "Subtarget->isTargetDarwin()">; // VGETLNi32 is microcoded on Swift - prefer VMOV. @@ -275,8 +297,8 @@ def HasSlowVDUP32 : Predicate<"Subtarget->isSwift()">; def UseVMOVSR : Predicate<"Subtarget->isCortexA9() || !Subtarget->useNEONForSinglePrecisionFP()">; def DontUseVMOVSR : Predicate<"!Subtarget->isCortexA9() && Subtarget->useNEONForSinglePrecisionFP()">; -def IsLE : Predicate<"TLI.isLittleEndian()">; -def IsBE : Predicate<"TLI.isBigEndian()">; +def IsLE : Predicate<"getTargetLowering()->isLittleEndian()">; +def IsBE : Predicate<"getTargetLowering()->isBigEndian()">; //===----------------------------------------------------------------------===// // ARM Flag Definitions. @@ -456,7 +478,7 @@ def AdrLabelAsmOperand : AsmOperandClass { let Name = "AdrLabel"; } def adrlabel : Operand { let EncoderMethod = "getAdrLabelOpValue"; let ParserMatchClass = AdrLabelAsmOperand; - let PrintMethod = "printAdrLabelOperand"; + let PrintMethod = "printAdrLabelOperand<0>"; } def neon_vcvt_imm32 : Operand { @@ -581,17 +603,6 @@ def imm0_1 : Operand { let ParserMatchClass = Imm0_1AsmOperand; } def Imm0_3AsmOperand: ImmAsmOperand { let Name = "Imm0_3"; } def imm0_3 : Operand { let ParserMatchClass = Imm0_3AsmOperand; } -/// imm0_4 predicate - Immediate in the range [0,4]. -def Imm0_4AsmOperand : ImmAsmOperand -{ - let Name = "Imm0_4"; - let DiagnosticType = "ImmRange0_4"; -} -def imm0_4 : Operand, ImmLeaf= 0 && Imm < 5; }]> { - let ParserMatchClass = Imm0_4AsmOperand; - let DecoderMethod = "DecodeImm0_4"; -} - /// imm0_7 predicate - Immediate in the range [0,7]. def Imm0_7AsmOperand: ImmAsmOperand { let Name = "Imm0_7"; } def imm0_7 : Operand, ImmLeaf, ImmLeaf, ImmLeaf= 0 && Imm < 240; }]> { + let ParserMatchClass = Imm0_239AsmOperand; +} + /// imm0_255 predicate - Immediate in the range [0,255]. def Imm0_255AsmOperand : ImmAsmOperand { let Name = "Imm0_255"; } def imm0_255 : Operand, ImmLeaf= 0 && Imm < 256; }]> { @@ -702,6 +722,11 @@ def imm0_65535_expr : Operand { let ParserMatchClass = Imm0_65535ExprAsmOperand; } +def Imm256_65535ExprAsmOperand: ImmAsmOperand { let Name = "Imm256_65535Expr"; } +def imm256_65535_expr : Operand { + let ParserMatchClass = Imm256_65535ExprAsmOperand; +} + /// imm24b - True if the 32-bit immediate is encodable in 24 bits. def Imm24bitAsmOperand: ImmAsmOperand { let Name = "Imm24bit"; } def imm24b : Operand, ImmLeaf { let DecoderMethod = "DecodeCoprocessor"; } -def pf_imm : Operand { - let PrintMethod = "printPImmediate"; - let ParserMatchClass = CoprocNumAsmOperand; -} - def CoprocRegAsmOperand : AsmOperandClass { let Name = "CoprocReg"; let ParserMethod = "parseCoprocRegOperand"; @@ -1327,7 +1347,7 @@ class AI_ext_rrot opcod, string opc, PatFrag opnode> : AExtI, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> { bits<4> Rd; bits<4> Rm; bits<2> rot; @@ -1340,11 +1360,11 @@ class AI_ext_rrot opcod, string opc, PatFrag opnode> class AI_ext_rrot_np opcod, string opc> : AExtI, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsi]> { bits<2> rot; let Inst{19-16} = 0b1111; let Inst{11-10} = rot; -} + } /// AI_exta_rrot - A binary operation with two forms: one whose operand is a /// register and one whose operand is a register rotated by 8/16/24. @@ -1353,7 +1373,7 @@ class AI_exta_rrot opcod, string opc, PatFrag opnode> IIC_iEXTAr, opc, "\t$Rd, $Rn, $Rm$rot", [(set GPRnopc:$Rd, (opnode GPR:$Rn, (rotr GPRnopc:$Rm, rot_imm:$rot)))]>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> { bits<4> Rd; bits<4> Rm; bits<4> Rn; @@ -1368,7 +1388,7 @@ class AI_exta_rrot opcod, string opc, PatFrag opnode> class AI_exta_rrot_np opcod, string opc> : AExtI, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALUsr]> { bits<4> Rn; bits<2> rot; let Inst{19-16} = Rn; @@ -1664,53 +1684,11 @@ PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary, [(ARMcallseq_start timm:$amt)]>; } -// Atomic pseudo-insts which will be lowered to ldrexd/strexd loops. -// (These pseudos use a hand-written selection code). -let usesCustomInserter = 1, Defs = [CPSR], mayLoad = 1, mayStore = 1 in { -def ATOMOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMXOR6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMADD6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMSUB6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMNAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMAND6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMSWAP6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMCMPXCHG6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, - GPR:$set1, GPR:$set2), - NoItinerary, []>; -def ATOMMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMUMIN6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -def ATOMUMAX6432 : PseudoInst<(outs GPR:$dst1, GPR:$dst2), - (ins GPR:$addr, GPR:$src1, GPR:$src2), - NoItinerary, []>; -} - -def HINT : AI<(outs), (ins imm0_4:$imm), MiscFrm, NoItinerary, +def HINT : AI<(outs), (ins imm0_239:$imm), MiscFrm, NoItinerary, "hint", "\t$imm", []>, Requires<[IsARM, HasV6]> { - bits<3> imm; - let Inst{27-3} = 0b0011001000001111000000000; - let Inst{2-0} = imm; + bits<8> imm; + let Inst{27-8} = 0b00110010000011110000; + let Inst{7-0} = imm; } def : InstAlias<"nop$p", (HINT 0, pred:$p)>, Requires<[IsARM, HasV6T2]>; @@ -1718,6 +1696,9 @@ def : InstAlias<"yield$p", (HINT 1, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"wfe$p", (HINT 2, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"wfi$p", (HINT 3, pred:$p)>, Requires<[IsARM, HasV6T2]>; def : InstAlias<"sev$p", (HINT 4, pred:$p)>, Requires<[IsARM, HasV6T2]>; +def : InstAlias<"sevl$p", (HINT 5, pred:$p)>, Requires<[IsARM, HasV8]>; + +def : Pat<(int_arm_sevl), (HINT 5)>; def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", "\t$Rd, $Rn, $Rm", []>, Requires<[IsARM, HasV6]> { @@ -1735,12 +1716,23 @@ def SEL : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), DPFrm, NoItinerary, "sel", // The 16-bit operand $val can be used by a debugger to store more information // about the breakpoint. -def BKPT : AI<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, - "bkpt", "\t$val", []>, Requires<[IsARM]> { +def BKPT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, + "bkpt", "\t$val", []>, Requires<[IsARM]> { bits<16> val; let Inst{3-0} = val{3-0}; let Inst{19-8} = val{15-4}; let Inst{27-20} = 0b00010010; + let Inst{31-28} = 0xe; // AL + let Inst{7-4} = 0b0111; +} + +def HLT : AInoP<(outs), (ins imm0_65535:$val), MiscFrm, NoItinerary, + "hlt", "\t$val", []>, Requires<[IsARM, HasV8]> { + bits<16> val; + let Inst{3-0} = val{3-0}; + let Inst{19-8} = val{15-4}; + let Inst{27-20} = 0b00010000; + let Inst{31-28} = 0xe; // AL let Inst{7-4} = 0b0111; } @@ -1780,7 +1772,8 @@ multiclass APreLoad read, bits<1> data, string opc> { def i12 : AXI<(outs), (ins addrmode_imm12:$addr), MiscFrm, IIC_Preload, !strconcat(opc, "\t$addr"), - [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]> { + [(ARMPreload addrmode_imm12:$addr, (i32 read), (i32 data))]>, + Sched<[WritePreLd]> { bits<4> Rt; bits<17> addr; let Inst{31-26} = 0b111101; @@ -1796,7 +1789,8 @@ multiclass APreLoad read, bits<1> data, string opc> { def rs : AXI<(outs), (ins ldst_so_reg:$shift), MiscFrm, IIC_Preload, !strconcat(opc, "\t$shift"), - [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]> { + [(ARMPreload ldst_so_reg:$shift, (i32 read), (i32 data))]>, + Sched<[WritePreLd]> { bits<17> shift; let Inst{31-26} = 0b111101; let Inst{25} = 1; // 1 for register form @@ -1816,7 +1810,7 @@ defm PLDW : APreLoad<0, 1, "pldw">, Requires<[IsARM,HasV7,HasMP]>; defm PLI : APreLoad<1, 0, "pli">, Requires<[IsARM,HasV7]>; def SETEND : AXI<(outs), (ins setend_op:$end), MiscFrm, NoItinerary, - "setend\t$end", []>, Requires<[IsARM]> { + "setend\t$end", []>, Requires<[IsARM]>, Deprecated { bits<1> end; let Inst{31-10} = 0b1111000100000001000000; let Inst{9} = end; @@ -1863,7 +1857,8 @@ def TRAP : AXI<(outs), (ins), MiscFrm, NoItinerary, let isNotDuplicable = 1 in { def PICADD : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$a, pclabel:$cp, pred:$p), 4, IIC_iALUr, - [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>; + [(set GPR:$dst, (ARMpic_add GPR:$a, imm:$cp))]>, + Sched<[WriteALU, ReadALU]>; let AddedComplexity = 10 in { def PICLDR : ARMPseudoInst<(outs GPR:$dst), (ins addrmodepc:$addr, pred:$p), @@ -1923,11 +1918,11 @@ def ADR : AI1<{0,?,?,0}, (outs GPR:$Rd), (ins adrlabel:$label), let hasSideEffects = 1 in { def LEApcrel : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, pred:$p), - 4, IIC_iALUi, []>; + 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; def LEApcrelJT : ARMPseudoInst<(outs GPR:$Rd), (ins i32imm:$label, nohash_imm:$id, pred:$p), - 4, IIC_iALUi, []>; + 4, IIC_iALUi, []>, Sched<[WriteALU, ReadALU]>; } //===----------------------------------------------------------------------===// @@ -1938,16 +1933,22 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in { // ARMV4T and above def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br, "bx", "\tlr", [(ARMretflag)]>, - Requires<[IsARM, HasV4T]> { + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { let Inst{27-0} = 0b0001001011111111111100011110; } // ARMV4 only def MOVPCLR : AI<(outs), (ins), BrMiscFrm, IIC_Br, "mov", "\tpc, lr", [(ARMretflag)]>, - Requires<[IsARM, NoV4T]> { + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]> { let Inst{27-0} = 0b0001101000001111000000001110; } + + // Exception return: N.b. doesn't set CPSR as far as we're concerned (it sets + // the user-space one). + def SUBS_PC_LR : ARMPseudoInst<(outs), (ins i32imm:$offset, pred:$p), + 4, IIC_Br, + [(ARMintretflag imm:$offset)]>; } // Indirect branches @@ -1955,7 +1956,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { // ARMV4T and above def BX : AXI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx\t$dst", [(brind GPR:$dst)]>, - Requires<[IsARM, HasV4T]> { + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { bits<4> dst; let Inst{31-4} = 0b1110000100101111111111110001; let Inst{3-0} = dst; @@ -1963,7 +1964,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in { def BX_pred : AI<(outs), (ins GPR:$dst), BrMiscFrm, IIC_Br, "bx", "\t$dst", [/* pattern left blank */]>, - Requires<[IsARM, HasV4T]> { + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]> { bits<4> dst; let Inst{27-4} = 0b000100101111111111110001; let Inst{3-0} = dst; @@ -1980,7 +1981,7 @@ let isCall = 1, def BL : ABXI<0b1011, (outs), (ins bl_target:$func), IIC_Br, "bl\t$func", [(ARMcall tglobaladdr:$func)]>, - Requires<[IsARM]> { + Requires<[IsARM]>, Sched<[WriteBrL]> { let Inst{31-28} = 0b1110; bits<24> func; let Inst{23-0} = func; @@ -1990,7 +1991,7 @@ let isCall = 1, def BL_pred : ABI<0b1011, (outs), (ins bl_target:$func), IIC_Br, "bl", "\t$func", [(ARMcall_pred tglobaladdr:$func)]>, - Requires<[IsARM]> { + Requires<[IsARM]>, Sched<[WriteBrL]> { bits<24> func; let Inst{23-0} = func; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -2000,7 +2001,7 @@ let isCall = 1, def BLX : AXI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx\t$func", [(ARMcall GPR:$func)]>, - Requires<[IsARM, HasV5T]> { + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { bits<4> func; let Inst{31-4} = 0b1110000100101111111111110011; let Inst{3-0} = func; @@ -2009,7 +2010,7 @@ let isCall = 1, def BLX_pred : AI<(outs), (ins GPR:$func), BrMiscFrm, IIC_Br, "blx", "\t$func", [(ARMcall_pred GPR:$func)]>, - Requires<[IsARM, HasV5T]> { + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { bits<4> func; let Inst{27-4} = 0b000100101111111111110011; let Inst{3-0} = func; @@ -2019,18 +2020,18 @@ let isCall = 1, // Note: Restrict $func to the tGPR regclass to prevent it being in LR. def BX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, HasV4T]>; + Requires<[IsARM, HasV4T]>, Sched<[WriteBr]>; // ARMv4 def BMOVPCRX_CALL : ARMPseudoInst<(outs), (ins tGPR:$func), 8, IIC_Br, [(ARMcall_nolink tGPR:$func)]>, - Requires<[IsARM, NoV4T]>; + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; // mov lr, pc; b if callee is marked noreturn to avoid confusing the // return stack predictor. def BMOVPCB_CALL : ARMPseudoInst<(outs), (ins bl_target:$func), 8, IIC_Br, [(ARMcall_nolink tglobaladdr:$func)]>, - Requires<[IsARM]>; + Requires<[IsARM]>, Sched<[WriteBr]>; } let isBranch = 1, isTerminator = 1 in { @@ -2038,7 +2039,8 @@ let isBranch = 1, isTerminator = 1 in { // a two-value operand where a dag node expects two operands. :( def Bcc : ABI<0b1010, (outs), (ins br_target:$target), IIC_Br, "b", "\t$target", - [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]> { + [/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>, + Sched<[WriteBr]> { bits<24> target; let Inst{23-0} = target; let DecoderMethod = "DecodeBranchImmInstruction"; @@ -2051,25 +2053,27 @@ let isBranch = 1, isTerminator = 1 in { // should be sufficient. // FIXME: Is B really a Barrier? That doesn't seem right. def B : ARMPseudoExpand<(outs), (ins br_target:$target), 4, IIC_Br, - [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>; + [(br bb:$target)], (Bcc br_target:$target, (ops 14, zero_reg))>, + Sched<[WriteBr]>; let isNotDuplicable = 1, isIndirectBranch = 1 in { def BR_JTr : ARMPseudoInst<(outs), (ins GPR:$target, i32imm:$jt, i32imm:$id), 0, IIC_Br, - [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>; + [(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]>, + Sched<[WriteBr]>; // FIXME: This shouldn't use the generic "addrmode2," but rather be split // into i12 and rs suffixed versions. def BR_JTm : ARMPseudoInst<(outs), (ins addrmode2:$target, i32imm:$jt, i32imm:$id), 0, IIC_Br, [(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt, - imm:$id)]>; + imm:$id)]>, Sched<[WriteBrTbl]>; def BR_JTadd : ARMPseudoInst<(outs), (ins GPR:$target, GPR:$idx, i32imm:$jt, i32imm:$id), 0, IIC_Br, [(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt, - imm:$id)]>; + imm:$id)]>, Sched<[WriteBrTbl]>; } // isNotDuplicable = 1, isIndirectBranch = 1 } // isBarrier = 1 @@ -2078,7 +2082,7 @@ let isBranch = 1, isTerminator = 1 in { // BLX (immediate) def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary, "blx\t$target", []>, - Requires<[IsARM, HasV5T]> { + Requires<[IsARM, HasV5T]>, Sched<[WriteBrL]> { let Inst{31-25} = 0b1111101; bits<25> target; let Inst{23-0} = target{24-1}; @@ -2087,7 +2091,7 @@ def BLXi : AXI<(outs), (ins blx_target:$target), BrMiscFrm, NoItinerary, // Branch and Exchange Jazelle def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", - [/* pattern left blank */]> { + [/* pattern left blank */]>, Sched<[WriteBr]> { bits<4> func; let Inst{23-20} = 0b0010; let Inst{19-8} = 0xfff; @@ -2098,18 +2102,20 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func", // Tail calls. let isCall = 1, isTerminator = 1, isReturn = 1, isBarrier = 1, Uses = [SP] in { - def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>; + def TCRETURNdi : PseudoInst<(outs), (ins i32imm:$dst), IIC_Br, []>, + Sched<[WriteBr]>; - def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>; + def TCRETURNri : PseudoInst<(outs), (ins tcGPR:$dst), IIC_Br, []>, + Sched<[WriteBr]>; def TAILJMPd : ARMPseudoExpand<(outs), (ins br_target:$dst), 4, IIC_Br, [], (Bcc br_target:$dst, (ops 14, zero_reg))>, - Requires<[IsARM]>; + Requires<[IsARM]>, Sched<[WriteBr]>; def TAILJMPr : ARMPseudoExpand<(outs), (ins tcGPR:$dst), 4, IIC_Br, [], - (BX GPR:$dst)>, + (BX GPR:$dst)>, Sched<[WriteBr]>, Requires<[IsARM]>; } @@ -2123,7 +2129,8 @@ def SMC : ABI<0b0001, (outs), (ins imm0_15:$opt), NoItinerary, "smc", "\t$opt", // Supervisor Call (Software Interrupt) let isCall = 1, Uses = [SP] in { -def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []> { +def SVC : ABI<0b1111, (outs), (ins imm24b:$svc), IIC_Br, "svc", "\t$svc", []>, + Sched<[WriteBr]> { bits<24> svc; let Inst{23-0} = svc; } @@ -2272,6 +2279,13 @@ def LDRD : AI3ld<0b1101, 0, (outs GPR:$Rd, GPR:$dst2), []>, Requires<[IsARM, HasV5TE]>; } +def LDA : AIldracq<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "lda", "\t$Rt, $addr", []>; +def LDAB : AIldracq<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldab", "\t$Rt, $addr", []>; +def LDAH : AIldracq<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldah", "\t$Rt, $addr", []>; + // Indexed loads multiclass AI2_ldridx { @@ -2284,7 +2298,6 @@ multiclass AI2_ldridx op, string opc, InstrItinClass itin> { let Inst{19-16} = addr{12-9}; // Rn let Inst{11-8} = addr{7-4}; // imm7_4/zero let Inst{3-0} = addr{3-0}; // imm3_0/Rm - let AsmMatchConverter = "cvtLdWriteBackRegAddrMode3"; let DecoderMethod = "DecodeAddrMode3Instruction"; } def _POST : AI3ldstidx op, string opc> { let Inst{22} = 1; let Inst{11-8} = offset{7-4}; let Inst{3-0} = offset{3-0}; - let AsmMatchConverter = "cvtLdExtTWriteBackImm"; } def r : AI3ldstidxT op, string opc> { let Inst{11-8} = 0; let Unpredictable{11-8} = 0b1111; let Inst{3-0} = Rm{3-0}; - let AsmMatchConverter = "cvtLdExtTWriteBackReg"; let DecoderMethod = "DecodeLDR"; } } @@ -2544,7 +2552,6 @@ multiclass AI2_stridx op, string opc> { let Inst{22} = 1; let Inst{11-8} = offset{7-4}; let Inst{3-0} = offset{3-0}; - let AsmMatchConverter = "cvtStExtTWriteBackImm"; } def r : AI3ldstidxT op, string opc> { let Inst{22} = 0; let Inst{11-8} = 0; let Inst{3-0} = Rm{3-0}; - let AsmMatchConverter = "cvtStExtTWriteBackReg"; } } defm STRHT : AI3strT<0b1011, "strht">; +def STL : AIstrrel<0b00, (outs), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stl", "\t$Rt, $addr", []>; +def STLB : AIstrrel<0b10, (outs), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlb", "\t$Rt, $addr", []>; +def STLH : AIstrrel<0b11, (outs), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlh", "\t$Rt, $addr", []>; //===----------------------------------------------------------------------===// // Load / store multiple Instructions. @@ -2955,7 +2963,7 @@ defm sysSTM : arm_ldst_mult<"stm", " ^", 0, 1, LdStMulFrm, IIC_iStore_m, let neverHasSideEffects = 1 in def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, - "mov", "\t$Rd, $Rm", []>, UnaryDP { + "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<4> Rm; @@ -2969,7 +2977,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMOVr, // A version for the smaller set of tail call registers. let neverHasSideEffects = 1 in def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, - IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP { + IIC_iMOVr, "mov", "\t$Rd, $Rm", []>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<4> Rm; @@ -2982,7 +2990,8 @@ def MOVr_TC : AsI1<0b1101, (outs tcGPR:$Rd), (ins tcGPR:$Rm), DPFrm, def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src), DPSoRegRegFrm, IIC_iMOVsr, "mov", "\t$Rd, $src", - [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP { + [(set GPRnopc:$Rd, shift_so_reg_reg:$src)]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; @@ -2998,7 +3007,7 @@ def MOVsr : AsI1<0b1101, (outs GPRnopc:$Rd), (ins shift_so_reg_reg:$src), def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), DPSoRegImmFrm, IIC_iMOVsr, "mov", "\t$Rd, $src", [(set GPR:$Rd, shift_so_reg_imm:$src)]>, - UnaryDP { + UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<12> src; let Inst{15-12} = Rd; @@ -3011,7 +3020,8 @@ def MOVsi : AsI1<0b1101, (outs GPR:$Rd), (ins shift_so_reg_imm:$src), let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def MOVi : AsI1<0b1101, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMOVi, - "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP { + "mov", "\t$Rd, $imm", [(set GPR:$Rd, so_imm:$imm)]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> imm; let Inst{25} = 1; @@ -3025,7 +3035,7 @@ def MOVi16 : AI1<0b1000, (outs GPR:$Rd), (ins imm0_65535_expr:$imm), DPFrm, IIC_iMOVi, "movw", "\t$Rd, $imm", [(set GPR:$Rd, imm0_65535:$imm)]>, - Requires<[IsARM, HasV6T2]>, UnaryDP { + Requires<[IsARM, HasV6T2]>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<16> imm; let Inst{15-12} = Rd; @@ -3041,7 +3051,8 @@ def : InstAlias<"mov${p} $Rd, $imm", Requires<[IsARM]>; def MOVi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), - (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + (ins i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; let Constraints = "$src = $Rd" in { def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), @@ -3051,7 +3062,7 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), [(set GPRnopc:$Rd, (or (and GPR:$src, 0xffff), lo16AllZero:$imm))]>, UnaryDP, - Requires<[IsARM, HasV6T2]> { + Requires<[IsARM, HasV6T2]>, Sched<[WriteALU]> { bits<4> Rd; bits<16> imm; let Inst{15-12} = Rd; @@ -3063,7 +3074,8 @@ def MOVTi16 : AI1<0b1010, (outs GPRnopc:$Rd), } def MOVTi16_ga_pcrel : PseudoInst<(outs GPR:$Rd), - (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>; + (ins GPR:$src, i32imm:$addr, pclabel:$id), IIC_iMOVi, []>, + Sched<[WriteALU]>; } // Constraints @@ -3073,7 +3085,7 @@ def : ARMPat<(or GPR:$src, 0xffff0000), (MOVTi16 GPR:$src, 0xffff)>, let Uses = [CPSR] in def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, [(set GPR:$Rd, (ARMrrx GPR:$Rm))]>, UnaryDP, - Requires<[IsARM]>; + Requires<[IsARM]>, Sched<[WriteALU]>; // These aren't really mov instructions, but we have to define them this way // due to flag operands. @@ -3081,10 +3093,10 @@ def RRX: PseudoInst<(outs GPR:$Rd), (ins GPR:$Rm), IIC_iMOVsi, let Defs = [CPSR] in { def MOVsrl_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, [(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP, - Requires<[IsARM]>; + Sched<[WriteALU]>, Requires<[IsARM]>; def MOVsra_flag : PseudoInst<(outs GPR:$dst), (ins GPR:$src), IIC_iMOVsi, [(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP, - Requires<[IsARM]>; + Sched<[WriteALU]>, Requires<[IsARM]>; } //===----------------------------------------------------------------------===// @@ -3250,7 +3262,8 @@ class AAI op27_20, bits<8> op11_4, string opc, list pattern = [], dag iops = (ins GPRnopc:$Rn, GPRnopc:$Rm), string asm = "\t$Rd, $Rn, $Rm"> - : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern> { + : AI<(outs GPRnopc:$Rd), iops, DPFrm, IIC_iALUr, opc, asm, pattern>, + Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rn; bits<4> Rd; bits<4> Rm; @@ -3265,9 +3278,11 @@ class AAI op27_20, bits<8> op11_4, string opc, // Saturating add/subtract +let DecoderMethod = "DecodeQADDInstruction" in def QADD : AAI<0b00010000, 0b00000101, "qadd", [(set GPRnopc:$Rd, (int_arm_qadd GPRnopc:$Rm, GPRnopc:$Rn))], (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; + def QSUB : AAI<0b00010010, 0b00000101, "qsub", [(set GPRnopc:$Rd, (int_arm_qsub GPRnopc:$Rm, GPRnopc:$Rn))], (ins GPRnopc:$Rm, GPRnopc:$Rn), "\t$Rd, $Rm, $Rn">; @@ -3326,7 +3341,7 @@ def UHSUB8 : AAI<0b01100111, 0b11111111, "uhsub8">; def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), MulFrm /* for convenience */, NoItinerary, "usad8", "\t$Rd, $Rn, $Rm", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]> { bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -3340,7 +3355,7 @@ def USAD8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm), def USADA8 : AI<(outs GPR:$Rd), (ins GPR:$Rn, GPR:$Rm, GPR:$Ra), MulFrm /* for convenience */, NoItinerary, "usada8", "\t$Rd, $Rn, $Rm, $Ra", []>, - Requires<[IsARM, HasV6]> { + Requires<[IsARM, HasV6]>, Sched<[WriteALU, ReadALU, ReadALU]>{ bits<4> Rd; bits<4> Rn; bits<4> Rm; @@ -3473,7 +3488,7 @@ def BFI:I<(outs GPRnopc:$Rd), (ins GPRnopc:$src, GPR:$Rn, bf_inv_mask_imm:$imm), def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, "mvn", "\t$Rd, $Rm", - [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP { + [(set GPR:$Rd, (not GPR:$Rm))]>, UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<4> Rm; let Inst{25} = 0; @@ -3484,7 +3499,8 @@ def MVNr : AsI1<0b1111, (outs GPR:$Rd), (ins GPR:$Rm), DPFrm, IIC_iMVNr, } def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift), DPSoRegImmFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", - [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP { + [(set GPR:$Rd, (not so_reg_imm:$shift))]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> shift; let Inst{25} = 0; @@ -3496,7 +3512,8 @@ def MVNsi : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_imm:$shift), } def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift), DPSoRegRegFrm, IIC_iMVNsr, "mvn", "\t$Rd, $shift", - [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP { + [(set GPR:$Rd, (not so_reg_reg:$shift))]>, UnaryDP, + Sched<[WriteALU]> { bits<4> Rd; bits<12> shift; let Inst{25} = 0; @@ -3511,7 +3528,7 @@ def MVNsr : AsI1<0b1111, (outs GPR:$Rd), (ins so_reg_reg:$shift), let isReMaterializable = 1, isAsCheapAsAMove = 1, isMoveImm = 1 in def MVNi : AsI1<0b1111, (outs GPR:$Rd), (ins so_imm:$imm), DPFrm, IIC_iMVNi, "mvn", "\t$Rd, $imm", - [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP { + [(set GPR:$Rd, so_imm_not:$imm)]>,UnaryDP, Sched<[WriteALU]> { bits<4> Rd; bits<12> imm; let Inst{25} = 1; @@ -3993,14 +4010,58 @@ def PKHTB : APKHI<0b01101000, 1, (outs GPRnopc:$Rd), // Alternate cases for PKHTB where identities eliminate some nodes. Note that // a shift amount of 0 is *not legal* here, it is PKHBT instead. +// We also can not replace a srl (17..31) by an arithmetic shift we would use in +// pkhtb src1, src2, asr (17..31). def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), - (srl GPRnopc:$src2, imm16_31:$sh)), + (srl GPRnopc:$src2, imm16:$sh)), + (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16:$sh)>; +def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), + (sra GPRnopc:$src2, imm16_31:$sh)), (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm16_31:$sh)>; def : ARMV6Pat<(or (and GPRnopc:$src1, 0xFFFF0000), (and (srl GPRnopc:$src2, imm1_15:$sh), 0xFFFF)), (PKHTB GPRnopc:$src1, GPRnopc:$src2, imm1_15:$sh)>; //===----------------------------------------------------------------------===// +// CRC Instructions +// +// Polynomials: +// + CRC32{B,H,W} 0x04C11DB7 +// + CRC32C{B,H,W} 0x1EDC6F41 +// + +class AI_crc32 sz, string suffix, SDPatternOperator builtin> + : AInoP<(outs GPRnopc:$Rd), (ins GPRnopc:$Rn, GPRnopc:$Rm), MiscFrm, NoItinerary, + !strconcat("crc32", suffix), "\t$Rd, $Rn, $Rm", + [(set GPRnopc:$Rd, (builtin GPRnopc:$Rn, GPRnopc:$Rm))]>, + Requires<[IsARM, HasV8, HasCRC]> { + bits<4> Rd; + bits<4> Rn; + bits<4> Rm; + + let Inst{31-28} = 0b1110; + let Inst{27-23} = 0b00010; + let Inst{22-21} = sz; + let Inst{20} = 0; + let Inst{19-16} = Rn; + let Inst{15-12} = Rd; + let Inst{11-10} = 0b00; + let Inst{9} = C; + let Inst{8} = 0; + let Inst{7-4} = 0b0100; + let Inst{3-0} = Rm; + + let Unpredictable{11-8} = 0b1101; +} + +def CRC32B : AI_crc32<0, 0b00, "b", int_arm_crc32b>; +def CRC32CB : AI_crc32<1, 0b00, "cb", int_arm_crc32cb>; +def CRC32H : AI_crc32<0, 0b01, "h", int_arm_crc32h>; +def CRC32CH : AI_crc32<1, 0b01, "ch", int_arm_crc32ch>; +def CRC32W : AI_crc32<0, 0b10, "w", int_arm_crc32w>; +def CRC32CW : AI_crc32<1, 0b10, "cw", int_arm_crc32cw>; + +//===----------------------------------------------------------------------===// // Comparison Instructions... // @@ -4022,7 +4083,8 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_reg_reg:$rhs), let isCompare = 1, Defs = [CPSR] in { def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi, "cmn", "\t$Rn, $imm", - [(ARMcmn GPR:$Rn, so_imm:$imm)]> { + [(ARMcmn GPR:$Rn, so_imm:$imm)]>, + Sched<[WriteCMP, ReadALU]> { bits<4> Rn; bits<12> imm; let Inst{25} = 1; @@ -4038,7 +4100,7 @@ def CMNri : AI1<0b1011, (outs), (ins GPR:$Rn, so_imm:$imm), DPFrm, IIC_iCMPi, def CMNzrr : AI1<0b1011, (outs), (ins GPR:$Rn, GPR:$Rm), DPFrm, IIC_iCMPr, "cmn", "\t$Rn, $Rm", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPR:$Rn, GPR:$Rm)]> { + GPR:$Rn, GPR:$Rm)]>, Sched<[WriteCMP, ReadALU, ReadALU]> { bits<4> Rn; bits<4> Rm; let isCommutable = 1; @@ -4056,7 +4118,8 @@ def CMNzrsi : AI1<0b1011, (outs), (ins GPR:$Rn, so_reg_imm:$shift), DPSoRegImmFrm, IIC_iCMPsr, "cmn", "\t$Rn, $shift", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPR:$Rn, so_reg_imm:$shift)]> { + GPR:$Rn, so_reg_imm:$shift)]>, + Sched<[WriteCMPsi, ReadALU]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; @@ -4074,7 +4137,8 @@ def CMNzrsr : AI1<0b1011, (outs), (ins GPRnopc:$Rn, so_reg_reg:$shift), DPSoRegRegFrm, IIC_iCMPsr, "cmn", "\t$Rn, $shift", [(BinOpFrag<(ARMcmpZ node:$LHS,(ineg node:$RHS))> - GPRnopc:$Rn, so_reg_reg:$shift)]> { + GPRnopc:$Rn, so_reg_reg:$shift)]>, + Sched<[WriteCMPsr, ReadALU]> { bits<4> Rn; bits<12> shift; let Inst{25} = 0; @@ -4112,65 +4176,77 @@ let usesCustomInserter = 1, isBranch = 1, isTerminator = 1, def BCCi64 : PseudoInst<(outs), (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, brtarget:$dst), IIC_Br, - [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>; + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, GPR:$rhs1, GPR:$rhs2, bb:$dst)]>, + Sched<[WriteBr]>; def BCCZi64 : PseudoInst<(outs), (ins i32imm:$cc, GPR:$lhs1, GPR:$lhs2, brtarget:$dst), IIC_Br, - [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>; + [(ARMBcci64 imm:$cc, GPR:$lhs1, GPR:$lhs2, 0, 0, bb:$dst)]>, + Sched<[WriteBr]>; } // usesCustomInserter // Conditional moves -// FIXME: should be able to write a pattern for ARMcmov, but can't use -// a two-value operand where a dag node expects two operands. :( let neverHasSideEffects = 1 in { let isCommutable = 1, isSelect = 1 in -def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), (ins GPR:$false, GPR:$Rm, pred:$p), +def MOVCCr : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, GPR:$Rm, cmovpred:$p), 4, IIC_iCMOVr, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + [(set GPR:$Rd, (ARMcmov GPR:$false, GPR:$Rm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; def MOVCCsi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg_imm:$shift, pred:$p), - 4, IIC_iCMOVsr, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_imm:$shift, - imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + (ins GPR:$false, so_reg_imm:$shift, cmovpred:$p), + 4, IIC_iCMOVsr, + [(set GPR:$Rd, + (ARMcmov GPR:$false, so_reg_imm:$shift, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; def MOVCCsr : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_reg_reg:$shift, pred:$p), + (ins GPR:$false, so_reg_reg:$shift, cmovpred:$p), 4, IIC_iCMOVsr, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, - imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + [(set GPR:$Rd, (ARMcmov GPR:$false, so_reg_reg:$shift, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; let isMoveImm = 1 in -def MOVCCi16 : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, imm0_65535_expr:$imm, pred:$p), - 4, IIC_iMOVi, - []>, - RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; +def MOVCCi16 + : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, imm0_65535_expr:$imm, cmovpred:$p), + 4, IIC_iMOVi, + [(set GPR:$Rd, (ARMcmov GPR:$false, imm0_65535:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>, + Sched<[WriteALU]>; let isMoveImm = 1 in def MOVCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm, pred:$p), + (ins GPR:$false, so_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; // Two instruction predicate mov immediate. let isMoveImm = 1 in -def MOVCCi32imm : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, i32imm:$src, pred:$p), - 8, IIC_iCMOVix2, []>, RegConstraint<"$false = $Rd">; +def MOVCCi32imm + : ARMPseudoInst<(outs GPR:$Rd), + (ins GPR:$false, i32imm:$src, cmovpred:$p), + 8, IIC_iCMOVix2, + [(set GPR:$Rd, (ARMcmov GPR:$false, imm:$src, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Requires<[IsARM, HasV6T2]>; let isMoveImm = 1 in def MVNCCi : ARMPseudoInst<(outs GPR:$Rd), - (ins GPR:$false, so_imm:$imm, pred:$p), + (ins GPR:$false, so_imm:$imm, cmovpred:$p), 4, IIC_iCMOVi, - [/*(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, imm:$cc, CCR:$ccr))*/]>, - RegConstraint<"$false = $Rd">; + [(set GPR:$Rd, (ARMcmov GPR:$false, so_imm_not:$imm, + cmovpred:$p))]>, + RegConstraint<"$false = $Rd">, Sched<[WriteALU]>; } // neverHasSideEffects @@ -4189,10 +4265,20 @@ def memb_opt : Operand { let DecoderMethod = "DecodeMemBarrierOption"; } +def InstSyncBarrierOptOperand : AsmOperandClass { + let Name = "InstSyncBarrierOpt"; + let ParserMethod = "parseInstSyncBarrierOptOperand"; +} +def instsyncb_opt : Operand { + let PrintMethod = "printInstSyncBOption"; + let ParserMatchClass = InstSyncBarrierOptOperand; + let DecoderMethod = "DecodeInstSyncBarrierOption"; +} + // memory barriers protect the atomic sequences let hasSideEffects = 1 in { def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, - "dmb", "\t$opt", [(ARMMemBarrier (i32 imm:$opt))]>, + "dmb", "\t$opt", [(int_arm_dmb (i32 imm0_15:$opt))]>, Requires<[IsARM, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf57ff05; @@ -4201,7 +4287,7 @@ def DMB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, } def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, - "dsb", "\t$opt", []>, + "dsb", "\t$opt", [(int_arm_dsb (i32 imm0_15:$opt))]>, Requires<[IsARM, HasDB]> { bits<4> opt; let Inst{31-4} = 0xf57ff04; @@ -4209,7 +4295,7 @@ def DSB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, } // ISB has only full system option -def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, +def ISB : AInoP<(outs), (ins instsyncb_opt:$opt), MiscFrm, NoItinerary, "isb", "\t$opt", []>, Requires<[IsARM, HasDB]> { bits<4> opt; @@ -4217,124 +4303,219 @@ def ISB : AInoP<(outs), (ins memb_opt:$opt), MiscFrm, NoItinerary, let Inst{3-0} = opt; } +let usesCustomInserter = 1, Defs = [CPSR] in { + // Pseudo instruction that combines movs + predicated rsbmi // to implement integer ABS -let usesCustomInserter = 1, Defs = [CPSR] in -def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; + def ABS : ARMPseudoInst<(outs GPR:$dst), (ins GPR:$src), 8, NoItinerary, []>; -let usesCustomInserter = 1 in { - let Defs = [CPSR] in { +// Atomic pseudo-insts which will be lowered to ldrex/strex loops. +// (64-bit pseudos use a hand-written selection code). + let mayLoad = 1, mayStore = 1 in { def ATOMIC_LOAD_ADD_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_8 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I8 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I8 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_ADD_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_16 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I16 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I16 : PseudoInst< + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_ADD_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_SUB_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_AND_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_OR_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_XOR_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_NAND_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary, - [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$incr, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MIN_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_min_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_MAX_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_max_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMIN_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umin_32 GPR:$ptr, GPR:$val))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_LOAD_UMAX_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$val), NoItinerary, - [(set GPR:$dst, (atomic_load_umax_32 GPR:$ptr, GPR:$val))]>; - - def ATOMIC_SWAP_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>; - def ATOMIC_SWAP_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$val, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_SWAP_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>; - - def ATOMIC_CMP_SWAP_I8 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>; - def ATOMIC_CMP_SWAP_I16 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>; + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$new, i32imm:$ordering), + NoItinerary, []>; def ATOMIC_CMP_SWAP_I32 : PseudoInst< - (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary, - [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>; -} + (outs GPR:$dst), + (ins GPR:$ptr, GPR:$old, GPR:$new, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_ADD_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_SUB_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_AND_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_OR_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_XOR_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_NAND_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_MIN_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_MAX_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_UMIN_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_LOAD_UMAX_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_SWAP_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; + def ATOMIC_CMP_SWAP_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$cmp1, GPR:$cmp2, + GPR:$set1, GPR:$set2, i32imm:$ordering), + NoItinerary, []>; + } + let mayLoad = 1 in + def ATOMIC_LOAD_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, i32imm:$ordering), + NoItinerary, []>; + let mayStore = 1 in + def ATOMIC_STORE_I64 : PseudoInst< + (outs GPR:$dst1, GPR:$dst2), + (ins GPR:$addr, GPR:$src1, GPR:$src2, i32imm:$ordering), + NoItinerary, []>; } let usesCustomInserter = 1 in { @@ -4344,48 +4525,147 @@ let usesCustomInserter = 1 in { [(ARMcopystructbyval GPR:$dst, GPR:$src, imm:$size, imm:$alignment)]>; } +def ldrex_1 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def ldrex_2 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def ldrex_4 : PatFrag<(ops node:$ptr), (int_arm_ldrex node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + +def strex_1 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i8; +}]>; + +def strex_2 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i16; +}]>; + +def strex_4 : PatFrag<(ops node:$val, node:$ptr), + (int_arm_strex node:$val, node:$ptr), [{ + return cast(N)->getMemoryVT() == MVT::i32; +}]>; + let mayLoad = 1 in { def LDREXB : AIldrex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, - "ldrexb", "\t$Rt, $addr", []>; + NoItinerary, "ldrexb", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_1 addr_offset_none:$addr))]>; def LDREXH : AIldrex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldrexh", "\t$Rt, $addr", []>; + NoItinerary, "ldrexh", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_2 addr_offset_none:$addr))]>; def LDREX : AIldrex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), - NoItinerary, "ldrex", "\t$Rt, $addr", []>; + NoItinerary, "ldrex", "\t$Rt, $addr", + [(set GPR:$Rt, (ldrex_4 addr_offset_none:$addr))]>; let hasExtraDefRegAllocReq = 1 in -def LDREXD: AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), +def LDREXD : AIldrex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), NoItinerary, "ldrexd", "\t$Rt, $addr", []> { let DecoderMethod = "DecodeDoubleRegLoad"; } + +def LDAEXB : AIldaex<0b10, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaexb", "\t$Rt, $addr", []>; +def LDAEXH : AIldaex<0b11, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaexh", "\t$Rt, $addr", []>; +def LDAEX : AIldaex<0b00, (outs GPR:$Rt), (ins addr_offset_none:$addr), + NoItinerary, "ldaex", "\t$Rt, $addr", []>; +let hasExtraDefRegAllocReq = 1 in +def LDAEXD : AIldaex<0b01, (outs GPRPairOp:$Rt),(ins addr_offset_none:$addr), + NoItinerary, "ldaexd", "\t$Rt, $addr", []> { + let DecoderMethod = "DecodeDoubleRegLoad"; +} } let mayStore = 1, Constraints = "@earlyclobber $Rd" in { def STREXB: AIstrex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strexb", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strexb", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_1 GPR:$Rt, addr_offset_none:$addr))]>; def STREXH: AIstrex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strexh", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strexh", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_2 GPR:$Rt, addr_offset_none:$addr))]>; def STREX : AIstrex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), - NoItinerary, "strex", "\t$Rd, $Rt, $addr", []>; + NoItinerary, "strex", "\t$Rd, $Rt, $addr", + [(set GPR:$Rd, (strex_4 GPR:$Rt, addr_offset_none:$addr))]>; let hasExtraSrcRegAllocReq = 1 in def STREXD : AIstrex<0b01, (outs GPR:$Rd), (ins GPRPairOp:$Rt, addr_offset_none:$addr), NoItinerary, "strexd", "\t$Rd, $Rt, $addr", []> { let DecoderMethod = "DecodeDoubleRegStore"; } +def STLEXB: AIstlex<0b10, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexb", "\t$Rd, $Rt, $addr", + []>; +def STLEXH: AIstlex<0b11, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexh", "\t$Rd, $Rt, $addr", + []>; +def STLEX : AIstlex<0b00, (outs GPR:$Rd), (ins GPR:$Rt, addr_offset_none:$addr), + NoItinerary, "stlex", "\t$Rd, $Rt, $addr", + []>; +let hasExtraSrcRegAllocReq = 1 in +def STLEXD : AIstlex<0b01, (outs GPR:$Rd), + (ins GPRPairOp:$Rt, addr_offset_none:$addr), + NoItinerary, "stlexd", "\t$Rd, $Rt, $addr", []> { + let DecoderMethod = "DecodeDoubleRegStore"; +} } - -def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", []>, +def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex", + [(int_arm_clrex)]>, Requires<[IsARM, HasV7]> { let Inst{31-0} = 0b11110101011111111111000000011111; } +def : ARMPat<(and (ldrex_1 addr_offset_none:$addr), 0xff), + (LDREXB addr_offset_none:$addr)>; +def : ARMPat<(and (ldrex_2 addr_offset_none:$addr), 0xffff), + (LDREXH addr_offset_none:$addr)>; +def : ARMPat<(strex_1 (and GPR:$Rt, 0xff), addr_offset_none:$addr), + (STREXB GPR:$Rt, addr_offset_none:$addr)>; +def : ARMPat<(strex_2 (and GPR:$Rt, 0xffff), addr_offset_none:$addr), + (STREXH GPR:$Rt, addr_offset_none:$addr)>; + +class acquiring_load + : PatFrag<(ops node:$ptr), (base node:$ptr), [{ + AtomicOrdering Ordering = cast(N)->getOrdering(); + return Ordering == Acquire || Ordering == SequentiallyConsistent; +}]>; + +def atomic_load_acquire_8 : acquiring_load; +def atomic_load_acquire_16 : acquiring_load; +def atomic_load_acquire_32 : acquiring_load; + +class releasing_store + : PatFrag<(ops node:$ptr, node:$val), (base node:$ptr, node:$val), [{ + AtomicOrdering Ordering = cast(N)->getOrdering(); + return Ordering == Release || Ordering == SequentiallyConsistent; +}]>; + +def atomic_store_release_8 : releasing_store; +def atomic_store_release_16 : releasing_store; +def atomic_store_release_32 : releasing_store; + +let AddedComplexity = 8 in { + def : ARMPat<(atomic_load_acquire_8 addr_offset_none:$addr), (LDAB addr_offset_none:$addr)>; + def : ARMPat<(atomic_load_acquire_16 addr_offset_none:$addr), (LDAH addr_offset_none:$addr)>; + def : ARMPat<(atomic_load_acquire_32 addr_offset_none:$addr), (LDA addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_8 addr_offset_none:$addr, GPR:$val), (STLB GPR:$val, addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_16 addr_offset_none:$addr, GPR:$val), (STLH GPR:$val, addr_offset_none:$addr)>; + def : ARMPat<(atomic_store_release_32 addr_offset_none:$addr, GPR:$val), (STL GPR:$val, addr_offset_none:$addr)>; +} + // SWP/SWPB are deprecated in V6/V7. let mayLoad = 1, mayStore = 1 in { def SWP : AIswp<0, (outs GPRnopc:$Rt), - (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>; + (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swp", []>, + Requires<[PreV8]>; def SWPB: AIswp<1, (outs GPRnopc:$Rt), - (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>; + (ins GPRnopc:$Rt2, addr_offset_none:$addr), "swpb", []>, + Requires<[PreV8]>; } //===----------------------------------------------------------------------===// @@ -4396,7 +4676,8 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp", "\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + imm:$CRm, imm:$opc2)]>, + Requires<[PreV8]> { bits<4> opc1; bits<4> CRn; bits<4> CRd; @@ -4413,11 +4694,12 @@ def CDP : ABI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, let Inst{23-20} = opc1; } -def CDP2 : ABXI<0b1110, (outs), (ins pf_imm:$cop, imm0_15:$opc1, +def CDP2 : ABXI<0b1110, (outs), (ins p_imm:$cop, imm0_15:$opc1, c_imm:$CRd, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), NoItinerary, "cdp2\t$cop, $opc1, $CRd, $CRn, $CRm, $opc2", [(int_arm_cdp2 imm:$cop, imm:$opc1, imm:$CRd, imm:$CRn, - imm:$CRm, imm:$opc2)]> { + imm:$CRm, imm:$opc2)]>, + Requires<[PreV8]> { let Inst{31-28} = 0b1111; bits<4> opc1; bits<4> CRn; @@ -4595,10 +4877,10 @@ defm LDC : LdStCop <1, 0, "ldc">; defm LDCL : LdStCop <1, 1, "ldcl">; defm STC : LdStCop <0, 0, "stc">; defm STCL : LdStCop <0, 1, "stcl">; -defm LDC2 : LdSt2Cop<1, 0, "ldc2">; -defm LDC2L : LdSt2Cop<1, 1, "ldc2l">; -defm STC2 : LdSt2Cop<0, 0, "stc2">; -defm STC2L : LdSt2Cop<0, 1, "stc2l">; +defm LDC2 : LdSt2Cop<1, 0, "ldc2">, Requires<[PreV8]>; +defm LDC2L : LdSt2Cop<1, 1, "ldc2l">, Requires<[PreV8]>; +defm STC2 : LdSt2Cop<0, 0, "stc2">, Requires<[PreV8]>; +defm STC2L : LdSt2Cop<0, 1, "stc2l">, Requires<[PreV8]>; //===----------------------------------------------------------------------===// // Move between coprocessor and ARM core register. @@ -4631,16 +4913,17 @@ def MCR : MovRCopro<"mcr", 0 /* from ARM core register to coprocessor */, (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>; + imm:$CRm, imm:$opc2)]>, + ComplexDeprecationPredicate<"MCR">; def : ARMInstAlias<"mcr${p} $cop, $opc1, $Rt, $CRn, $CRm", (MCR p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; def MRC : MovRCopro<"mrc", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), []>; def : ARMInstAlias<"mrc${p} $cop, $opc1, $Rt, $CRn, $CRm", - (MRC GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (MRC GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0, pred:$p)>; def : ARMPat<(int_arm_mrc imm:$cop, imm:$opc1, imm:$CRn, imm:$CRm, imm:$opc2), @@ -4650,7 +4933,7 @@ class MovRCopro2 pattern> : ABXI<0b1110, oops, iops, NoItinerary, !strconcat(opc, "\t$cop, $opc1, $Rt, $CRn, $CRm, $opc2"), pattern> { - let Inst{31-28} = 0b1111; + let Inst{31-24} = 0b11111110; let Inst{20} = direction; let Inst{4} = 1; @@ -4674,16 +4957,18 @@ def MCR2 : MovRCopro2<"mcr2", 0 /* from ARM core register to coprocessor */, (ins p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, imm0_7:$opc2), [(int_arm_mcr2 imm:$cop, imm:$opc1, GPR:$Rt, imm:$CRn, - imm:$CRm, imm:$opc2)]>; + imm:$CRm, imm:$opc2)]>, + Requires<[PreV8]>; def : ARMInstAlias<"mcr2$ $cop, $opc1, $Rt, $CRn, $CRm", (MCR2 p_imm:$cop, imm0_7:$opc1, GPR:$Rt, c_imm:$CRn, c_imm:$CRm, 0)>; def MRC2 : MovRCopro2<"mrc2", 1 /* from coprocessor to ARM core register */, - (outs GPR:$Rt), + (outs GPRwithAPSR:$Rt), (ins p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, - imm0_7:$opc2), []>; + imm0_7:$opc2), []>, + Requires<[PreV8]>; def : ARMInstAlias<"mrc2$ $cop, $opc1, $Rt, $CRn, $CRm", - (MRC2 GPR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, + (MRC2 GPRwithAPSR:$Rt, p_imm:$cop, imm0_7:$opc1, c_imm:$CRn, c_imm:$CRm, 0)>; def : ARMV5TPat<(int_arm_mrc2 imm:$cop, imm:$opc1, imm:$CRn, @@ -4718,7 +5003,8 @@ def MRRC : MovRRCopro<"mrrc", 1 /* from coprocessor to ARM core register */>; class MovRRCopro2 pattern = []> : ABXI<0b1100, (outs), (ins p_imm:$cop, imm0_15:$opc1, GPRnopc:$Rt, GPRnopc:$Rt2, c_imm:$CRm), NoItinerary, - !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern> { + !strconcat(opc, "\t$cop, $opc1, $Rt, $Rt2, $CRm"), pattern>, + Requires<[PreV8]> { let Inst{31-28} = 0b1111; let Inst{23-21} = 0b010; let Inst{20} = direction; @@ -4820,7 +5106,7 @@ def MSRi : ABI<0b0011, (outs), (ins msr_mask:$mask, so_imm:$a), NoItinerary, let isCall = 1, Defs = [R0, R12, LR, CPSR], Uses = [SP] in { def TPsoft : PseudoInst<(outs), (ins), IIC_Br, - [(set R0, ARMthread_pointer)]>; + [(set R0, ARMthread_pointer)]>, Sched<[WriteBr]>; } //===----------------------------------------------------------------------===// @@ -4884,7 +5170,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in def MOVPCRX : ARMPseudoExpand<(outs), (ins GPR:$dst), 4, IIC_Br, [(brind GPR:$dst)], (MOVr PC, GPR:$dst, (ops 14, zero_reg), zero_reg)>, - Requires<[IsARM, NoV4T]>; + Requires<[IsARM, NoV4T]>, Sched<[WriteBr]>; // Large immediate handling. @@ -5153,10 +5439,10 @@ def : MnemonicAlias<"rfeed", "rfeib">; def : MnemonicAlias<"rfe", "rfeia">; // SRS aliases -def : MnemonicAlias<"srsfa", "srsda">; -def : MnemonicAlias<"srsea", "srsdb">; -def : MnemonicAlias<"srsfd", "srsia">; -def : MnemonicAlias<"srsed", "srsib">; +def : MnemonicAlias<"srsfa", "srsib">; +def : MnemonicAlias<"srsea", "srsia">; +def : MnemonicAlias<"srsfd", "srsdb">; +def : MnemonicAlias<"srsed", "srsda">; def : MnemonicAlias<"srs", "srsia">; // QSAX == QSUBADDX @@ -5233,7 +5519,7 @@ def RORi : ARMAsmPseudo<"ror${s}${p} $Rd, $Rm, $imm", cc_out:$s)>; } def RRXi : ARMAsmPseudo<"rrx${s}${p} $Rd, $Rm", - (ins GPRnopc:$Rd, GPRnopc:$Rm, pred:$p, cc_out:$s)>; + (ins GPR:$Rd, GPR:$Rm, pred:$p, cc_out:$s)>; let TwoOperandAliasConstraint = "$Rn = $Rd" in { def ASRr : ARMAsmPseudo<"asr${s}${p} $Rd, $Rn, $Rm", (ins GPRnopc:$Rd, GPRnopc:$Rn, GPRnopc:$Rm, pred:$p, @@ -5269,4 +5555,5 @@ def : InstAlias<"umull${s}${p} $RdLo, $RdHi, $Rn, $Rm", // 'it' blocks in ARM mode just validate the predicates. The IT itself // is discarded. -def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>; +def ITasm : ARMAsmPseudo<"it$mask $cc", (ins it_pred:$cc, it_mask:$mask)>, + ComplexDeprecationPredicate<"IT">; diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td index 896fd0f..43bd4c2 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrNEON.td @@ -626,7 +626,7 @@ class VLD1D op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VLD1Q op7_4, string Dt> : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd), @@ -634,7 +634,7 @@ class VLD1Q op7_4, string Dt> "vld1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VLD1d8 : VLD1D<{0,0,0,?}, "8">; @@ -655,16 +655,14 @@ multiclass VLD1DWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0111,op7_4, (outs VecListOneD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } multiclass VLD1QWB op7_4, string Dt> { @@ -674,16 +672,14 @@ multiclass VLD1QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b1010,op7_4, (outs VecListDPair:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -703,7 +699,7 @@ class VLD1D3 op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D3WB op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0110, op7_4, (outs VecListThreeD:$Vd, GPR:$wb), @@ -712,16 +708,14 @@ multiclass VLD1D3WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0110,op7_4, (outs VecListThreeD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -744,7 +738,7 @@ class VLD1D4 op7_4, string Dt> "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VLD1D4WB op7_4, string Dt> { def _fixed : NLdSt<0,0b10,0b0010, op7_4, (outs VecListFourD:$Vd, GPR:$wb), @@ -753,16 +747,14 @@ multiclass VLD1D4WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b10,0b0010,op7_4, (outs VecListFourD:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), IIC_VLD1x2u, "vld1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -786,7 +778,7 @@ class VLD2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, "vld2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VLD2d8 : VLD2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VLD2>; @@ -810,16 +802,14 @@ multiclass VLD2WB op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b10, op11_8, op7_4, (outs VdTy:$Vd, GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm), itin, "vld2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } @@ -853,7 +843,7 @@ class VLD3D op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8 : VLD3D<0b0100, {0,0,0,?}, "8">; @@ -872,7 +862,7 @@ class VLD3DWB op11_8, bits<4> op7_4, string Dt> "vld3", Dt, "\\{$Vd, $dst2, $dst3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VLD3d8_UPD : VLD3DWB<0b0100, {0,0,0,?}, "8">; @@ -912,7 +902,7 @@ class VLD4D op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8 : VLD4D<0b0000, {0,0,?,?}, "8">; @@ -931,7 +921,7 @@ class VLD4DWB op11_8, bits<4> op7_4, string Dt> "vld4", Dt, "\\{$Vd, $dst2, $dst3, $dst4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVLDInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VLD4d8_UPD : VLD4DWB<0b0000, {0,0,?,?}, "8">; @@ -1348,7 +1338,6 @@ multiclass VLD1DUPWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListOneDAllLanes:$Vd, GPR:$wb), @@ -1357,7 +1346,6 @@ multiclass VLD1DUPWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } multiclass VLD1QDUPWB op7_4, string Dt> { @@ -1369,7 +1357,6 @@ multiclass VLD1QDUPWB op7_4, string Dt> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1100, op7_4, (outs VecListDPairAllLanes:$Vd, GPR:$wb), @@ -1378,7 +1365,6 @@ multiclass VLD1QDUPWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD1DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1419,7 +1405,6 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbFixed"; } def _register : NLdSt<1, 0b10, 0b1101, op7_4, (outs VdTy:$Vd, GPR:$wb), @@ -1428,7 +1413,6 @@ multiclass VLD2DUPWB op7_4, string Dt, RegisterOperand VdTy> { "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; let DecoderMethod = "DecodeVLD2DupInstruction"; - let AsmMatchConverter = "cvtVLDwbRegister"; } } @@ -1580,14 +1564,14 @@ class VST1D op7_4, string Dt> IIC_VST1, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } class VST1Q op7_4, string Dt> : NLdSt<0,0b00,0b1010,op7_4, (outs), (ins addrmode6:$Rn, VecListDPair:$Vd), IIC_VST1x2, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def VST1d8 : VST1D<{0,0,0,?}, "8">; @@ -1608,8 +1592,7 @@ multiclass VST1DWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0111,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListOneD:$Vd), @@ -1617,8 +1600,7 @@ multiclass VST1DWB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } multiclass VST1QWB op7_4, string Dt> { @@ -1628,8 +1610,7 @@ multiclass VST1QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b1010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListDPair:$Vd), @@ -1637,8 +1618,7 @@ multiclass VST1QWB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1659,7 +1639,7 @@ class VST1D3 op7_4, string Dt> IIC_VST1x3, "vst1", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D3WB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), @@ -1668,8 +1648,7 @@ multiclass VST1D3WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0110,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListThreeD:$Vd), @@ -1677,8 +1656,7 @@ multiclass VST1D3WB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1704,7 +1682,7 @@ class VST1D4 op7_4, string Dt> []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST1Instruction"; } multiclass VST1D4WB op7_4, string Dt> { def _fixed : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), @@ -1713,8 +1691,7 @@ multiclass VST1D4WB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST1Instruction"; } def _register : NLdSt<0,0b00,0b0010,op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1722,8 +1699,7 @@ multiclass VST1D4WB op7_4, string Dt> { "vst1", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST1Instruction"; } } @@ -1748,7 +1724,7 @@ class VST2 op11_8, bits<4> op7_4, string Dt, RegisterOperand VdTy, itin, "vst2", Dt, "$Vd, $Rn", "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def VST2d8 : VST2<0b1000, {0,0,?,?}, "8", VecListDPair, IIC_VST2>; @@ -1772,16 +1748,14 @@ multiclass VST2DWB op11_8, bits<4> op7_4, string Dt, "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, op11_8, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VdTy:$Vd), IIC_VLD1u, "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } multiclass VST2QWB op7_4, string Dt> { @@ -1791,8 +1765,7 @@ multiclass VST2QWB op7_4, string Dt> { "$Rn.addr = $wb", []> { let Rm = 0b1101; // NLdSt will assign to the right encoding bits. let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbFixed"; + let DecoderMethod = "DecodeVLDST2Instruction"; } def _register : NLdSt<0, 0b00, 0b0011, op7_4, (outs GPR:$wb), (ins addrmode6:$Rn, rGPR:$Rm, VecListFourD:$Vd), @@ -1800,8 +1773,7 @@ multiclass VST2QWB op7_4, string Dt> { "vst2", Dt, "$Vd, $Rn, $Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; - let AsmMatchConverter = "cvtVSTwbRegister"; + let DecoderMethod = "DecodeVLDST2Instruction"; } } @@ -1835,7 +1807,7 @@ class VST3D op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn", "", []> { let Rm = 0b1111; let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8 : VST3D<0b0100, {0,0,0,?}, "8">; @@ -1854,7 +1826,7 @@ class VST3DWB op11_8, bits<4> op7_4, string Dt> "vst3", Dt, "\\{$Vd, $src2, $src3\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{4} = Rn{4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST3Instruction"; } def VST3d8_UPD : VST3DWB<0b0100, {0,0,0,?}, "8">; @@ -1894,7 +1866,7 @@ class VST4D op11_8, bits<4> op7_4, string Dt> "", []> { let Rm = 0b1111; let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8 : VST4D<0b0000, {0,0,?,?}, "8">; @@ -1913,7 +1885,7 @@ class VST4DWB op11_8, bits<4> op7_4, string Dt> "vst4", Dt, "\\{$Vd, $src2, $src3, $src4\\}, $Rn$Rm", "$Rn.addr = $wb", []> { let Inst{5-4} = Rn{5-4}; - let DecoderMethod = "DecodeVSTInstruction"; + let DecoderMethod = "DecodeVLDST4Instruction"; } def VST4d8_UPD : VST4DWB<0b0000, {0,0,?,?}, "8">; @@ -2379,6 +2351,40 @@ class N2VQInt op24_23, bits<2> op21_20, bits<2> op19_18, (ins QPR:$Vm), itin, OpcodeStr, Dt, "$Vd, $Vm", "", [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; +// Same as above, but not predicated. +class N2VDIntnp op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<0b10, op17_16, op10_8, op7, 0, (outs DPR:$Vd), (ins DPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set DPR:$Vd, (ResTy (IntOp (OpTy DPR:$Vm))))]>; + +class N2VQIntnp op17_16, bits<3> op10_8, bit op7, + InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp<0b10, op17_16, op10_8, op7, 1, (outs QPR:$Vd), (ins QPR:$Vm), + itin, OpcodeStr, Dt, ResTy, OpTy, + [(set QPR:$Vd, (ResTy (IntOp (OpTy QPR:$Vm))))]>; + +// Similar to NV2VQIntnp with some more encoding bits exposed (crypto). +class N2VQIntXnp op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp; + +// Same as N2VQIntXnp but with Vd as a src register. +class N2VQIntX2np op19_18, bits<2> op17_16, bits<3> op10_8, bit op6, + bit op7, InstrItinClass itin, string OpcodeStr, string Dt, + ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> + : N2Vnp { + let Constraints = "$src = $Vd"; +} + // Narrow 2-register operations. class N2VN op24_23, bits<2> op21_20, bits<2> op19_18, bits<2> op17_16, bits<5> op11_7, bit op6, bit op4, @@ -2541,6 +2547,16 @@ class N3VDInt op21_20, bits<4> op11_8, bit op4, let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VDIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane32<0, 1, op21_20, op11_8, 1, 0, @@ -2552,6 +2568,7 @@ class N3VDIntSL op21_20, bits<4> op11_8, InstrItinClass itin, imm:$lane)))))]> { let isCommutable = 0; } + class N3VDIntSL16 op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType Ty, SDPatternOperator IntOp> : N3VLane16<0, 1, op21_20, op11_8, 1, 0, @@ -2584,6 +2601,29 @@ class N3VQInt op21_20, bits<4> op11_8, bit op4, let TwoOperandAliasConstraint = "$Vn = $Vd"; let isCommutable = Commutable; } + +class N3VQIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + +// Same as N3VQIntnp but with Vd as a src register. +class N3VQInt3np op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, Format f, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp { + let Constraints = "$src = $Vd"; +} + class N3VQIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -2834,6 +2874,7 @@ class N3VL op21_20, bits<4> op11_8, bit op4, [(set QPR:$Vd, (TyQ (OpNode (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } + class N3VLSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType TyQ, ValueType TyD, SDNode OpNode> @@ -2889,6 +2930,17 @@ class N3VLInt op21_20, bits<4> op11_8, bit op4, [(set QPR:$Vd, (TyQ (IntOp (TyD DPR:$Vn), (TyD DPR:$Vm))))]> { let isCommutable = Commutable; } + +// Same as above, but not predicated. +class N3VLIntnp op27_23, bits<2> op21_20, bits<4> op11_8, bit op6, + bit op4, InstrItinClass itin, string OpcodeStr, + string Dt, ValueType ResTy, ValueType OpTy, + SDPatternOperator IntOp, bit Commutable> + : N3Vnp; + class N3VLIntSL op21_20, bits<4> op11_8, InstrItinClass itin, string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy, SDPatternOperator IntOp> @@ -3965,12 +4017,18 @@ defm VQADDu : N3VInt_QHSD<1, 0, 0b0000, 1, N3RegFrm, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q, IIC_VBINi4Q, "vqadd", "u", int_arm_neon_vqaddu, 1>; // VADDHN : Vector Add and Narrow Returning High Half (D = Q + Q) -defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", - int_arm_neon_vaddhn, 1>; +defm VADDHN : N3VNInt_HSD<0,1,0b0100,0, "vaddhn", "i", null_frag, 1>; // VRADDHN : Vector Rounding Add and Narrow Returning High Half (D = Q + Q) defm VRADDHN : N3VNInt_HSD<1,1,0b0100,0, "vraddhn", "i", int_arm_neon_vraddhn, 1>; +def : Pat<(v8i8 (trunc (NEONvshru (add (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VADDHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (add (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VADDHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (add (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VADDHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Multiply Operations. // VMUL : Vector Multiply (integer, polynomial and floating-point) @@ -4008,6 +4066,17 @@ def : Pat<(v4f32 (fmul (v4f32 QPR:$src1), (DSubReg_i32_reg imm:$lane))), (SubReg_i32_lane imm:$lane)))>; + +def : Pat<(v2f32 (fmul DPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfd DPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; +def : Pat<(v4f32 (fmul QPR:$Rn, (NEONvdup (f32 SPR:$Rm)))), + (VMULslfq QPR:$Rn, + (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$Rm, ssub_0), + (i32 0))>; + + // VQDMULH : Vector Saturating Doubling Multiply Returning High Half defm VQDMULH : N3VInt_HS<0, 0, 0b1011, 0, N3RegFrm, IIC_VMULi16D, IIC_VMULi32D, IIC_VMULi16Q, IIC_VMULi32Q, @@ -4053,12 +4122,18 @@ def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1), (SubReg_i32_lane imm:$lane)))>; // VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D) -defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "s", NEONvmulls, 1>; -defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, - "vmull", "u", NEONvmullu, 1>; -def VMULLp : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", - v8i16, v8i8, int_arm_neon_vmullp, 1>; +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "NEONData" in { + defm VMULLs : N3VL_QHS<0,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "s", NEONvmulls, 1>; + defm VMULLu : N3VL_QHS<1,1,0b1100,0, IIC_VMULi16D, IIC_VMULi32D, + "vmull", "u", NEONvmullu, 1>; + def VMULLp8 : N3VLInt<0, 1, 0b00, 0b1110, 0, IIC_VMULi16D, "vmull", "p8", + v8i16, v8i8, int_arm_neon_vmullp, 1>; + def VMULLp64 : N3VLIntnp<0b00101, 0b10, 0b1110, 0, 0, NoItinerary, + "vmull", "p64", v2i64, v1i64, int_arm_neon_vmullp, 1>, + Requires<[HasV8, HasCrypto]>; +} defm VMULLsls : N3VLSL_HS<0, 0b1010, IIC_VMULi16D, "vmull", "s", NEONvmulls>; defm VMULLslu : N3VLSL_HS<1, 0b1010, IIC_VMULi16D, "vmull", "u", NEONvmullu>; @@ -4125,8 +4200,27 @@ defm VMLALslu : N3VLMulOpSL_HS<1, 0b0010, "vmlal", "u", NEONvmullu, add>; // VQDMLAL : Vector Saturating Doubling Multiply Accumulate Long (Q += D * D) defm VQDMLAL : N3VLInt3_HS<0, 1, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlal", "s", int_arm_neon_vqdmlal>; -defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", int_arm_neon_vqdmlal>; + "vqdmlal", "s", null_frag>; +defm VQDMLALsl: N3VLInt3SL_HS<0, 0b0011, "vqdmlal", "s", null_frag>; + +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLALv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLALv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqadds (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLALslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqadds (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLALslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; // VMLS : Vector Multiply Subtract (integer and floating-point) defm VMLS : N3VMulOp_QHS<1, 0, 0b1001, 0, IIC_VMACi16D, IIC_VMACi32D, @@ -4182,25 +4276,44 @@ defm VMLSLslu : N3VLMulOpSL_HS<1, 0b0110, "vmlsl", "u", NEONvmullu, sub>; // VQDMLSL : Vector Saturating Doubling Multiply Subtract Long (Q -= D * D) defm VQDMLSL : N3VLInt3_HS<0, 1, 0b1011, 0, IIC_VMACi16D, IIC_VMACi32D, - "vqdmlsl", "s", int_arm_neon_vqdmlsl>; -defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", int_arm_neon_vqdmlsl>; + "vqdmlsl", "s", null_frag>; +defm VQDMLSLsl: N3VLInt3SL_HS<0, 0b111, "vqdmlsl", "s", null_frag>; + +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 DPR:$Vm))))), + (VQDMLSLv4i32 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 DPR:$Vm))))), + (VQDMLSLv2i64 QPR:$src1, DPR:$Vn, DPR:$Vm)>; +def : Pat<(v4i32 (int_arm_neon_vqsubs (v4i32 QPR:$src1), + (v4i32 (int_arm_neon_vqdmull (v4i16 DPR:$Vn), + (v4i16 (NEONvduplane (v4i16 DPR_8:$Vm), + imm:$lane)))))), + (VQDMLSLslv4i16 QPR:$src1, DPR:$Vn, DPR_8:$Vm, imm:$lane)>; +def : Pat<(v2i64 (int_arm_neon_vqsubs (v2i64 QPR:$src1), + (v2i64 (int_arm_neon_vqdmull (v2i32 DPR:$Vn), + (v2i32 (NEONvduplane (v2i32 DPR_VFP2:$Vm), + imm:$lane)))))), + (VQDMLSLslv2i32 QPR:$src1, DPR:$Vn, DPR_VFP2:$Vm, imm:$lane)>; // Fused Vector Multiply-Accumulate and Fused Multiply-Subtract Operations. def VFMAfd : N3VDMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACD, "vfma", "f32", v2f32, fmul_su, fadd_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; def VFMAfq : N3VQMulOp<0, 0, 0b00, 0b1100, 1, IIC_VFMACQ, "vfma", "f32", v4f32, fmul_su, fadd_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; // Fused Vector Multiply Subtract (floating-point) def VFMSfd : N3VDMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACD, "vfms", "f32", v2f32, fmul_su, fsub_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; def VFMSfq : N3VQMulOp<0, 0, 0b10, 0b1100, 1, IIC_VFMACQ, "vfms", "f32", v4f32, fmul_su, fsub_mlx>, - Requires<[HasVFP4,UseFusedMAC]>; + Requires<[HasNEON,HasVFP4,UseFusedMAC]>; // Match @llvm.fma.* intrinsics def : Pat<(v2f32 (fma DPR:$Vn, DPR:$Vm, DPR:$src1)), @@ -4248,12 +4361,18 @@ defm VQSUBu : N3VInt_QHSD<1, 0, 0b0010, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, "vqsub", "u", int_arm_neon_vqsubu, 0>; // VSUBHN : Vector Subtract and Narrow Returning High Half (D = Q - Q) -defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", - int_arm_neon_vsubhn, 0>; +defm VSUBHN : N3VNInt_HSD<0,1,0b0110,0, "vsubhn", "i", null_frag, 0>; // VRSUBHN : Vector Rounding Subtract and Narrow Returning High Half (D=Q-Q) defm VRSUBHN : N3VNInt_HSD<1,1,0b0110,0, "vrsubhn", "i", int_arm_neon_vrsubhn, 0>; +def : Pat<(v8i8 (trunc (NEONvshru (sub (v8i16 QPR:$Vn), QPR:$Vm), 8))), + (VSUBHNv8i8 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v4i16 (trunc (NEONvshru (sub (v4i32 QPR:$Vn), QPR:$Vm), 16))), + (VSUBHNv4i16 QPR:$Vn, QPR:$Vm)>; +def : Pat<(v2i32 (trunc (NEONvshru (sub (v2i64 QPR:$Vn), QPR:$Vm), 32))), + (VSUBHNv2i32 QPR:$Vn, QPR:$Vm)>; + // Vector Comparisons. // VCEQ : Vector Compare Equal @@ -4659,6 +4778,18 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmax", "f32", v4f32, v4f32, int_arm_neon_vmaxs, 1>; +// VMAXNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMAXNMND : N3VDIntnp<0b00110, 0b00, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v2f32, v2f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMAXNMNQ : N3VQIntnp<0b00110, 0b00, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vmaxnm", "f32", + v4f32, v4f32, int_arm_neon_vmaxnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // VMIN : Vector Minimum defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, N3RegFrm, IIC_VSUBi4D, IIC_VSUBi4D, IIC_VSUBi4Q, IIC_VSUBi4Q, @@ -4673,6 +4804,18 @@ def VMINfq : N3VQInt<0, 0, 0b10, 0b1111, 0, N3RegFrm, IIC_VBINQ, "vmin", "f32", v4f32, v4f32, int_arm_neon_vmins, 1>; +// VMINNM +let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def VMINNMND : N3VDIntnp<0b00110, 0b10, 0b1111, 0, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v2f32, v2f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; + def VMINNMNQ : N3VQIntnp<0b00110, 0b10, 0b1111, 1, 1, + N3RegFrm, NoItinerary, "vminnm", "f32", + v4f32, v4f32, int_arm_neon_vminnm, 1>, + Requires<[HasV8, HasNEON]>; +} + // Vector Pairwise Operations. // VPADD : Vector Pairwise Add @@ -5015,10 +5158,10 @@ def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0, // Vector Move Operations. // VMOV : Vector Move (Register) -def : InstAlias<"vmov${p} $Vd, $Vm", - (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; -def : InstAlias<"vmov${p} $Vd, $Vm", - (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p} $Vd, $Vm", + (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; +def : NEONInstAlias<"vmov${p} $Vd, $Vm", + (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; // VMOV : Vector Move (Immediate) @@ -5386,6 +5529,26 @@ def VCVTs2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32", def VCVTu2fq : N2VQ<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32", v4f32, v4i32, uint_to_fp>; +// VCVT{A, N, P, M} +multiclass VCVT_FPI op10_8, SDPatternOperator IntS, + SDPatternOperator IntU> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def SD : N2VDIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v2i32, v2f32, IntS>, Requires<[HasV8, HasNEON]>; + def SQ : N2VQIntnp<0b11, op10_8, 0, NoItinerary, !strconcat("vcvt", op), + "s32.f32", v4i32, v4f32, IntS>, Requires<[HasV8, HasNEON]>; + def UD : N2VDIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v2i32, v2f32, IntU>, Requires<[HasV8, HasNEON]>; + def UQ : N2VQIntnp<0b11, op10_8, 1, NoItinerary, !strconcat("vcvt", op), + "u32.f32", v4i32, v4f32, IntU>, Requires<[HasV8, HasNEON]>; + } +} + +defm VCVTAN : VCVT_FPI<"a", 0b000, int_arm_neon_vcvtas, int_arm_neon_vcvtau>; +defm VCVTNN : VCVT_FPI<"n", 0b001, int_arm_neon_vcvtns, int_arm_neon_vcvtnu>; +defm VCVTPN : VCVT_FPI<"p", 0b010, int_arm_neon_vcvtps, int_arm_neon_vcvtpu>; +defm VCVTMN : VCVT_FPI<"m", 0b011, int_arm_neon_vcvtms, int_arm_neon_vcvtmu>; + // VCVT : Vector Convert Between Floating-Point and Fixed-Point. let DecoderMethod = "DecodeVCVTD" in { def VCVTf2xsd : N2VCvtD<0, 1, 0b1111, 0, 1, "vcvt", "s32.f32", @@ -5409,6 +5572,25 @@ def VCVTxu2fq : N2VCvtQ<1, 1, 0b1110, 0, 1, "vcvt", "f32.u32", v4f32, v4i32, int_arm_neon_vcvtfxu2fp>; } +def : NEONInstAlias<"vcvt${p}.s32.f32 $Dd, $Dm, #0", + (VCVTf2sd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Dd, $Dm, #0", + (VCVTf2ud DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Dd, $Dm, #0", + (VCVTs2fd DPR:$Dd, DPR:$Dm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Dd, $Dm, #0", + (VCVTu2fd DPR:$Dd, DPR:$Dm, pred:$p)>; + +def : NEONInstAlias<"vcvt${p}.s32.f32 $Qd, $Qm, #0", + (VCVTf2sq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.u32.f32 $Qd, $Qm, #0", + (VCVTf2uq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.s32 $Qd, $Qm, #0", + (VCVTs2fq QPR:$Qd, QPR:$Qm, pred:$p)>; +def : NEONInstAlias<"vcvt${p}.f32.u32 $Qd, $Qm, #0", + (VCVTu2fq QPR:$Qd, QPR:$Qm, pred:$p)>; + + // VCVT : Vector Convert Between Half-Precision and Single-Precision. def VCVTf2h : N2VNInt<0b11, 0b11, 0b01, 0b10, 0b01100, 0, 0, IIC_VUNAQ, "vcvt", "f16.f32", @@ -5509,8 +5691,9 @@ class VEXTd IIC_VEXTD, OpcodeStr, Dt, "$Vd, $Vn, $Vm, $index", "", [(set DPR:$Vd, (Ty (NEONvext (Ty DPR:$Vn), (Ty DPR:$Vm), imm:$index)))]> { - bits<4> index; - let Inst{11-8} = index{3-0}; + bits<3> index; + let Inst{11} = 0b0; + let Inst{10-8} = index{2-0}; } class VEXTq @@ -5525,14 +5708,14 @@ class VEXTq } def VEXTd8 : VEXTd<"vext", "8", v8i8, imm0_7> { - let Inst{11-8} = index{3-0}; + let Inst{10-8} = index{2-0}; } def VEXTd16 : VEXTd<"vext", "16", v4i16, imm0_3> { - let Inst{11-9} = index{2-0}; + let Inst{10-9} = index{1-0}; let Inst{8} = 0b0; } def VEXTd32 : VEXTd<"vext", "32", v2i32, imm0_1> { - let Inst{11-10} = index{1-0}; + let Inst{10} = index{0}; let Inst{9-8} = 0b00; } def : Pat<(v2f32 (NEONvext (v2f32 DPR:$Vn), @@ -5657,6 +5840,77 @@ def VTBX4Pseudo IIC_VTBX4, "$orig = $dst", []>; } // DecoderMethod = "DecodeTBLInstruction" +// VRINT : Vector Rounding +multiclass VRINT_FPI op9_7, SDPatternOperator Int> { + let PostEncoderMethod = "NEONThumb2V8PostEncoder", DecoderNamespace = "v8NEON" in { + def D : N2VDIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v2f32, v2f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + def Q : N2VQIntnp<0b10, 0b100, 0, NoItinerary, + !strconcat("vrint", op), "f32", + v4f32, v4f32, Int>, Requires<[HasV8, HasNEON]> { + let Inst{9-7} = op9_7; + } + } + + def : NEONInstAlias(NAME#"D") DPR:$Dd, DPR:$Dm)>; + def : NEONInstAlias(NAME#"Q") QPR:$Qd, QPR:$Qm)>; +} + +defm VRINTNN : VRINT_FPI<"n", 0b000, int_arm_neon_vrintn>; +defm VRINTXN : VRINT_FPI<"x", 0b001, int_arm_neon_vrintx>; +defm VRINTAN : VRINT_FPI<"a", 0b010, int_arm_neon_vrinta>; +defm VRINTZN : VRINT_FPI<"z", 0b011, int_arm_neon_vrintz>; +defm VRINTMN : VRINT_FPI<"m", 0b101, int_arm_neon_vrintm>; +defm VRINTPN : VRINT_FPI<"p", 0b111, int_arm_neon_vrintp>; + +// Cryptography instructions +let PostEncoderMethod = "NEONThumb2DataIPostEncoder", + DecoderNamespace = "v8Crypto" in { + class AES + : N2VQIntXnp<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class AES2Op + : N2VQIntX2np<0b00, 0b00, 0b011, op6, op7, NoItinerary, + !strconcat("aes", op), "8", v16i8, v16i8, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntXnp<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N2SHA2Op op17_16, bits<3> op10_8, bit op7, bit op6, + SDPatternOperator Int> + : N2VQIntX2np<0b10, op17_16, op10_8, op6, op7, NoItinerary, + !strconcat("sha", op), "32", v4i32, v4i32, Int>, + Requires<[HasV8, HasCrypto]>; + class N3SHA3Op op27_23, bits<2> op21_20, SDPatternOperator Int> + : N3VQInt3np, + Requires<[HasV8, HasCrypto]>; +} + +def AESD : AES2Op<"d", 0, 1, int_arm_neon_aesd>; +def AESE : AES2Op<"e", 0, 0, int_arm_neon_aese>; +def AESIMC : AES<"imc", 1, 1, int_arm_neon_aesimc>; +def AESMC : AES<"mc", 1, 0, int_arm_neon_aesmc>; + +def SHA1H : N2SHA<"1h", 0b01, 0b010, 1, 1, int_arm_neon_sha1h>; +def SHA1SU1 : N2SHA2Op<"1su1", 0b10, 0b011, 1, 0, int_arm_neon_sha1su1>; +def SHA256SU0 : N2SHA2Op<"256su0", 0b10, 0b011, 1, 1, int_arm_neon_sha256su0>; +def SHA1C : N3SHA3Op<"1c", 0b00100, 0b00, int_arm_neon_sha1c>; +def SHA1M : N3SHA3Op<"1m", 0b00100, 0b10, int_arm_neon_sha1m>; +def SHA1P : N3SHA3Op<"1p", 0b00100, 0b01, int_arm_neon_sha1p>; +def SHA1SU0 : N3SHA3Op<"1su0", 0b00100, 0b11, int_arm_neon_sha1su0>; +def SHA256H : N3SHA3Op<"256h", 0b00110, 0b00, int_arm_neon_sha256h>; +def SHA256H2 : N3SHA3Op<"256h2", 0b00110, 0b01, int_arm_neon_sha256h2>; +def SHA256SU1 : N3SHA3Op<"256su1", 0b00110, 0b10, int_arm_neon_sha256su1>; + //===----------------------------------------------------------------------===// // NEON instructions for single-precision FP math //===----------------------------------------------------------------------===// @@ -6697,12 +6951,17 @@ def VST4qWB_register_Asm_32 : (ins VecListFourQ:$list, addrmode6:$addr, rGPR:$Rm, pred:$p)>; -// VMOV takes an optional datatype suffix +// VMOV/VMVN takes an optional datatype suffix defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRd DPR:$Vd, DPR:$Vm, DPR:$Vm, pred:$p)>; defm : NEONDTAnyInstAlias<"vmov${p}", "$Vd, $Vm", (VORRq QPR:$Vd, QPR:$Vm, QPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNd DPR:$Vd, DPR:$Vm, pred:$p)>; +defm : NEONDTAnyInstAlias<"vmvn${p}", "$Vd, $Vm", + (VMVNq QPR:$Vd, QPR:$Vm, pred:$p)>; + // VCLT (register) is an assembler alias for VCGT w/ the operands reversed. // D-register versions. def : NEONInstAlias<"vcle${p}.s8 $Dd, $Dn, $Dm", diff --git a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td index ae7a5c0..af5ef53 100644 --- a/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td +++ b/contrib/llvm/lib/Target/ARM/ARMInstrThumb.td @@ -69,11 +69,6 @@ def thumb_immshifted_shamt : SDNodeXFormgetTargetConstant(V, MVT::i32); }]>; -// ADR instruction labels. -def t_adrlabel : Operand { - let EncoderMethod = "getThumbAdrLabelOpValue"; -} - // Scaled 4 immediate. def t_imm0_1020s4_asmoperand: AsmOperandClass { let Name = "Imm0_1020s4"; } def t_imm0_1020s4 : Operand { @@ -97,12 +92,34 @@ def t_imm0_508s4_neg : Operand { // Define Thumb specific addressing modes. +// unsigned 8-bit, 2-scaled memory offset +class OperandUnsignedOffset_b8s2 : AsmOperandClass { + let Name = "UnsignedOffset_b8s2"; + let PredicateMethod = "isUnsignedOffset<8, 2>"; +} + +def UnsignedOffset_b8s2 : OperandUnsignedOffset_b8s2; + +// thumb style PC relative operand. signed, 8 bits magnitude, +// two bits shift. can be represented as either [pc, #imm], #imm, +// or relocatable expression... +def ThumbMemPC : AsmOperandClass { + let Name = "ThumbMemPC"; +} + let OperandType = "OPERAND_PCREL" in { def t_brtarget : Operand { let EncoderMethod = "getThumbBRTargetOpValue"; let DecoderMethod = "DecodeThumbBROperand"; } +// ADR instruction labels. +def t_adrlabel : Operand { + let EncoderMethod = "getThumbAdrLabelOpValue"; + let PrintMethod = "printAdrLabelOperand<2>"; + let ParserMatchClass = UnsignedOffset_b8s2; +} + def t_bcctarget : Operand { let EncoderMethod = "getThumbBCCTargetOpValue"; let DecoderMethod = "DecodeThumbBCCTargetOperand"; @@ -122,6 +139,15 @@ def t_blxtarget : Operand { let EncoderMethod = "getThumbBLXTargetOpValue"; let DecoderMethod = "DecodeThumbBLXOffset"; } + +// t_addrmode_pc :=