summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2009-11-18 14:58:34 +0000
committerrdivacky <rdivacky@FreeBSD.org>2009-11-18 14:58:34 +0000
commitd2e985fd323c167e20f77b045a1d99ad166e65db (patch)
tree6a111e552c75afc66228e3d8f19b6731e4013f10 /lib
parentded64d5d348ce8d8c5aa42cf63f6de9dd84b7e89 (diff)
downloadFreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.zip
FreeBSD-src-d2e985fd323c167e20f77b045a1d99ad166e65db.tar.gz
Update LLVM to r89205.
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp56
-rw-r--r--lib/Analysis/CMakeLists.txt2
-rw-r--r--lib/Analysis/ConstantFolding.cpp159
-rw-r--r--lib/Analysis/DebugInfo.cpp191
-rw-r--r--lib/Analysis/IPA/Andersens.cpp6
-rw-r--r--lib/Analysis/IVUsers.cpp14
-rw-r--r--lib/Analysis/InstructionSimplify.cpp348
-rw-r--r--lib/Analysis/LazyValueInfo.cpp582
-rw-r--r--lib/Analysis/LiveValues.cpp4
-rw-r--r--lib/Analysis/LoopInfo.cpp21
-rw-r--r--lib/Analysis/MemoryBuiltins.cpp114
-rw-r--r--lib/Analysis/PointerTracking.cpp3
-rw-r--r--lib/Analysis/ScalarEvolution.cpp29
-rw-r--r--lib/Analysis/ValueTracking.cpp112
-rw-r--r--lib/AsmParser/LLParser.cpp2
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp134
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h15
-rw-r--r--lib/CodeGen/AntiDepBreaker.h1
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp153
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h24
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp1020
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h122
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp89
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h12
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfWriter.cpp35
-rw-r--r--lib/CodeGen/BranchFolding.cpp689
-rw-r--r--lib/CodeGen/BranchFolding.h77
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp64
-rw-r--r--lib/CodeGen/IntrinsicLowering.cpp9
-rw-r--r--lib/CodeGen/LatencyPriorityQueue.cpp15
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp32
-rw-r--r--lib/CodeGen/LiveVariables.cpp49
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp63
-rw-r--r--lib/CodeGen/MachineFunction.cpp36
-rw-r--r--lib/CodeGen/MachineFunctionAnalysis.cpp2
-rw-r--r--lib/CodeGen/MachineInstr.cpp85
-rw-r--r--lib/CodeGen/MachineLICM.cpp65
-rw-r--r--lib/CodeGen/MachineModuleInfo.cpp2
-rw-r--r--lib/CodeGen/MachineVerifier.cpp23
-rw-r--r--lib/CodeGen/PHIElimination.cpp286
-rw-r--r--lib/CodeGen/PHIElimination.h30
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp15
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp91
-rw-r--r--lib/CodeGen/ProcessImplicitDefs.cpp9
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp3
-rw-r--r--lib/CodeGen/PseudoSourceValue.cpp29
-rw-r--r--lib/CodeGen/RegAllocLocal.cpp4
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp5
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp7
-rw-r--r--lib/CodeGen/ScheduleDAG.cpp10
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp193
-rw-r--r--lib/CodeGen/SelectionDAG/CallingConvLower.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp30
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp93
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/InstrEmitter.h2
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp48
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp21
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp491
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuild.h14
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp34
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp10
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp2
-rw-r--r--lib/CodeGen/SlotIndexes.cpp91
-rw-r--r--lib/CodeGen/Spiller.cpp44
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp44
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp2
-rw-r--r--lib/CodeGen/VirtRegMap.cpp8
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp57
-rw-r--r--lib/CompilerDriver/Action.cpp29
-rw-r--r--lib/CompilerDriver/BuiltinOptions.cpp2
-rw-r--r--lib/CompilerDriver/Main.cpp22
-rw-r--r--lib/CompilerDriver/Tool.cpp5
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp6
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp10
-rw-r--r--lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp88
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp19
-rw-r--r--lib/ExecutionEngine/JIT/JIT.h10
-rw-r--r--lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp2
-rw-r--r--lib/ExecutionEngine/JIT/JITEmitter.cpp714
-rw-r--r--lib/ExecutionEngine/JIT/JITMemoryManager.cpp81
-rw-r--r--lib/Linker/LinkArchives.cpp3
-rw-r--r--lib/Linker/LinkItems.cpp12
-rw-r--r--lib/Linker/Linker.cpp16
-rw-r--r--lib/MC/MCAsmStreamer.cpp4
-rw-r--r--lib/MC/MCAssembler.cpp2
-rw-r--r--lib/MC/MCContext.cpp8
-rw-r--r--lib/MC/MCExpr.cpp3
-rw-r--r--lib/MC/MCMachOStreamer.cpp4
-rw-r--r--lib/MC/MCNullStreamer.cpp2
-rw-r--r--lib/MC/MCSection.cpp2
-rw-r--r--lib/MC/MCSectionELF.cpp2
-rw-r--r--lib/MC/MCSectionMachO.cpp2
-rw-r--r--lib/MC/MCSymbol.cpp2
-rw-r--r--lib/Support/CommandLine.cpp9
-rw-r--r--lib/Support/ConstantRange.cpp61
-rw-r--r--lib/Support/Debug.cpp2
-rw-r--r--lib/Support/MemoryBuffer.cpp30
-rw-r--r--lib/Support/StringExtras.cpp22
-rw-r--r--lib/Support/StringMap.cpp6
-rw-r--r--lib/Support/StringRef.cpp57
-rw-r--r--lib/Support/Timer.cpp49
-rw-r--r--lib/Support/Triple.cpp46
-rw-r--r--lib/System/Host.cpp274
-rw-r--r--lib/System/Unix/Program.inc12
-rw-r--r--lib/Target/ARM/ARM.h2
-rw-r--r--lib/Target/ARM/ARMAddressingModes.h18
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp177
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h15
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp90
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h2
-rw-r--r--lib/Target/ARM/ARMCodeEmitter.cpp9
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp143
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.cpp22
-rw-r--r--lib/Target/ARM/ARMConstantPoolValue.h4
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp115
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp63
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp153
-rw-r--r--lib/Target/ARM/ARMISelLowering.h17
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp20
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h9
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td135
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td35
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td10
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td10
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td263
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp99
-rw-r--r--lib/Target/ARM/ARMSubtarget.cpp13
-rw-r--r--lib/Target/ARM/ARMSubtarget.h15
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp19
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp54
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp7
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h4
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp1
-rw-r--r--lib/Target/ARM/CMakeLists.txt1
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp8
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp28
-rw-r--r--lib/Target/ARM/README-Thumb.txt8
-rw-r--r--lib/Target/ARM/README.txt7
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp4
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h2
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp7
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.h5
-rw-r--r--lib/Target/ARM/Thumb2ITBlockPass.cpp44
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp6
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h2
-rw-r--r--lib/Target/ARM/Thumb2RegisterInfo.h5
-rw-r--r--lib/Target/Alpha/AlphaISelDAGToDAG.cpp2
-rw-r--r--lib/Target/Alpha/AlphaISelLowering.cpp6
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td2
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.cpp2
-rw-r--r--lib/Target/Alpha/AlphaRegisterInfo.h2
-rw-r--r--lib/Target/Alpha/AlphaTargetMachine.cpp2
-rw-r--r--lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp2
-rw-r--r--lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp4
-rw-r--r--lib/Target/Blackfin/BlackfinISelLowering.cpp5
-rw-r--r--lib/Target/Blackfin/BlackfinInstrInfo.td1
-rw-r--r--lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp1
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.cpp6
-rw-r--r--lib/Target/Blackfin/BlackfinRegisterInfo.h2
-rw-r--r--lib/Target/Blackfin/BlackfinTargetMachine.cpp2
-rw-r--r--lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp2
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp2
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp5
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.td30
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.cpp2
-rw-r--r--lib/Target/CellSPU/SPURegisterInfo.h2
-rw-r--r--lib/Target/CellSPU/SPUSubtarget.h2
-rw-r--r--lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp149
-rw-r--r--lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp33
-rw-r--r--lib/Target/MSP430/MSP430.td6
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp402
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp61
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h6
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp24
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td185
-rw-r--r--lib/Target/MSP430/MSP430MCAsmInfo.cpp6
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp4
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.h2
-rw-r--r--lib/Target/MSP430/MSP430TargetMachine.cpp2
-rw-r--r--lib/Target/MSP430/README.txt2
-rw-r--r--lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp32
-rw-r--r--lib/Target/Mips/MipsISelLowering.cpp12
-rw-r--r--lib/Target/Mips/MipsInstrFPU.td8
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp15
-rw-r--r--lib/Target/Mips/MipsMachineFunction.h1
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.cpp19
-rw-r--r--lib/Target/Mips/MipsRegisterInfo.h2
-rw-r--r--lib/Target/Mips/MipsTargetMachine.cpp4
-rw-r--r--lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp2
-rw-r--r--lib/Target/PIC16/PIC16ISelDAGToDAG.cpp1
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp2
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.cpp2
-rw-r--r--lib/Target/PIC16/PIC16RegisterInfo.h2
-rw-r--r--lib/Target/PIC16/PIC16TargetMachine.cpp2
-rw-r--r--lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp14
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp39
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.cpp84
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.h2
-rw-r--r--lib/Target/PowerPC/PPCSubtarget.h4
-rw-r--r--lib/Target/PowerPC/PPCTargetMachine.cpp3
-rw-r--r--lib/Target/README.txt110
-rw-r--r--lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp2
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp1
-rw-r--r--lib/Target/Sparc/SparcISelLowering.cpp15
-rw-r--r--lib/Target/Sparc/SparcInstrInfo.td2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.cpp2
-rw-r--r--lib/Target/Sparc/SparcRegisterInfo.h2
-rw-r--r--lib/Target/Sparc/SparcTargetMachine.cpp2
-rw-r--r--lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp3
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZISelLowering.cpp3
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp2
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp3
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.h2
-rw-r--r--lib/Target/SystemZ/SystemZTargetMachine.cpp2
-rw-r--r--lib/Target/TargetData.cpp302
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp7
-rw-r--r--lib/Target/TargetSubtarget.cpp11
-rw-r--r--lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp2
-rw-r--r--lib/Target/X86/AsmPrinter/X86MCInstLower.cpp2
-rw-r--r--lib/Target/X86/X86CodeEmitter.cpp27
-rw-r--r--lib/Target/X86/X86FastISel.cpp2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp18
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp102
-rw-r--r--lib/Target/X86/X86ISelLowering.h8
-rw-r--r--lib/Target/X86/X86Instr64bit.td2
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp169
-rw-r--r--lib/Target/X86/X86InstrInfo.h35
-rw-r--r--lib/Target/X86/X86InstrInfo.td2
-rw-r--r--lib/Target/X86/X86InstrSSE.td48
-rw-r--r--lib/Target/X86/X86JITInfo.cpp5
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp30
-rw-r--r--lib/Target/X86/X86RegisterInfo.h2
-rw-r--r--lib/Target/X86/X86Subtarget.cpp125
-rw-r--r--lib/Target/X86/X86Subtarget.h12
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp19
-rw-r--r--lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp2
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp5
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp19
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h6
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td2
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.cpp13
-rw-r--r--lib/Target/XCore/XCoreRegisterInfo.h4
-rw-r--r--lib/Target/XCore/XCoreTargetMachine.cpp2
-rw-r--r--lib/Transforms/Hello/CMakeLists.txt2
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp261
-rw-r--r--lib/Transforms/IPO/Inliner.cpp46
-rw-r--r--lib/Transforms/IPO/LoopExtractor.cpp4
-rw-r--r--lib/Transforms/IPO/StripSymbols.cpp54
-rw-r--r--lib/Transforms/Scalar/ABCD.cpp37
-rw-r--r--lib/Transforms/Scalar/CMakeLists.txt1
-rw-r--r--lib/Transforms/Scalar/CondPropagate.cpp289
-rw-r--r--lib/Transforms/Scalar/ConstantProp.cpp2
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp193
-rw-r--r--lib/Transforms/Scalar/GVN.cpp40
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp4
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp631
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp735
-rw-r--r--lib/Transforms/Scalar/LICM.cpp9
-rw-r--r--lib/Transforms/Scalar/LoopDeletion.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopIndexSplit.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopRotation.cpp13
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp876
-rw-r--r--lib/Transforms/Scalar/LoopUnroll.cpp177
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp7
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp36
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp11
-rw-r--r--lib/Transforms/Scalar/SCCVN.cpp6
-rw-r--r--lib/Transforms/Scalar/Scalar.cpp4
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp41
-rw-r--r--lib/Transforms/Scalar/TailDuplication.cpp3
-rw-r--r--lib/Transforms/Scalar/TailRecursionElimination.cpp32
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp8
-rw-r--r--lib/Transforms/Utils/CloneFunction.cpp71
-rw-r--r--lib/Transforms/Utils/InlineFunction.cpp2
-rw-r--r--lib/Transforms/Utils/LCSSA.cpp18
-rw-r--r--lib/Transforms/Utils/Local.cpp223
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp112
-rw-r--r--lib/Transforms/Utils/LoopUnroll.cpp16
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp168
-rw-r--r--lib/VMCore/AsmWriter.cpp7
-rw-r--r--lib/VMCore/Constants.cpp6
-rw-r--r--lib/VMCore/Core.cpp19
-rw-r--r--lib/VMCore/Globals.cpp19
-rw-r--r--lib/VMCore/InlineAsm.cpp14
-rw-r--r--lib/VMCore/Instructions.cpp28
-rw-r--r--lib/VMCore/Metadata.cpp15
-rw-r--r--lib/VMCore/Module.cpp37
-rw-r--r--lib/VMCore/Pass.cpp4
-rw-r--r--lib/VMCore/PassManager.cpp8
-rw-r--r--lib/VMCore/TypeSymbolTable.cpp7
-rw-r--r--lib/VMCore/ValueSymbolTable.cpp2
-rw-r--r--lib/VMCore/Verifier.cpp6
297 files changed, 9904 insertions, 6184 deletions
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index c81190b..b8d69f4 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -23,7 +23,6 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Operator.h"
#include "llvm/Pass.h"
#include "llvm/Target/TargetData.h"
@@ -99,7 +98,7 @@ static bool isNonEscapingLocalObject(const Value *V) {
/// isObjectSmallerThan - Return true if we can prove that the object specified
/// by V is smaller than Size.
static bool isObjectSmallerThan(const Value *V, unsigned Size,
- LLVMContext &Context, const TargetData &TD) {
+ const TargetData &TD) {
const Type *AccessTy;
if (const GlobalVariable *GV = dyn_cast<GlobalVariable>(V)) {
AccessTy = GV->getType()->getElementType();
@@ -109,7 +108,7 @@ static bool isObjectSmallerThan(const Value *V, unsigned Size,
else
return false;
} else if (const CallInst* CI = extractMallocCall(V)) {
- if (!isArrayMalloc(V, Context, &TD))
+ if (!isArrayMalloc(V, &TD))
// The size is the argument to the malloc call.
if (const ConstantInt* C = dyn_cast<ConstantInt>(CI->getOperand(1)))
return (C->getZExtValue() < Size);
@@ -647,11 +646,25 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
const Value *O1 = V1->getUnderlyingObject();
const Value *O2 = V2->getUnderlyingObject();
+ // Null values in the default address space don't point to any object, so they
+ // don't alias any other pointer.
+ if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O1))
+ if (CPN->getType()->getAddressSpace() == 0)
+ return NoAlias;
+ if (const ConstantPointerNull *CPN = dyn_cast<ConstantPointerNull>(O2))
+ if (CPN->getType()->getAddressSpace() == 0)
+ return NoAlias;
+
if (O1 != O2) {
// If V1/V2 point to two different objects we know that we have no alias.
if (isIdentifiedObject(O1) && isIdentifiedObject(O2))
return NoAlias;
-
+
+ // Constant pointers can't alias with non-const isIdentifiedObject objects.
+ if ((isa<Constant>(O1) && isIdentifiedObject(O2) && !isa<Constant>(O2)) ||
+ (isa<Constant>(O2) && isIdentifiedObject(O1) && !isa<Constant>(O1)))
+ return NoAlias;
+
// Arguments can't alias with local allocations or noalias calls.
if ((isa<Argument>(O1) && (isa<AllocaInst>(O2) || isNoAliasCall(O2))) ||
(isa<Argument>(O2) && (isa<AllocaInst>(O1) || isNoAliasCall(O1))))
@@ -665,10 +678,9 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
// If the size of one access is larger than the entire object on the other
// side, then we know such behavior is undefined and can assume no alias.
- LLVMContext &Context = V1->getContext();
if (TD)
- if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, Context, *TD)) ||
- (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, Context, *TD)))
+ if ((V1Size != ~0U && isObjectSmallerThan(O2, V1Size, *TD)) ||
+ (V2Size != ~0U && isObjectSmallerThan(O1, V2Size, *TD)))
return NoAlias;
// If one pointer is the result of a call/invoke and the other is a
@@ -707,16 +719,16 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
// This function is used to determine if the indices of two GEP instructions are
// equal. V1 and V2 are the indices.
-static bool IndexOperandsEqual(Value *V1, Value *V2, LLVMContext &Context) {
+static bool IndexOperandsEqual(Value *V1, Value *V2) {
if (V1->getType() == V2->getType())
return V1 == V2;
if (Constant *C1 = dyn_cast<Constant>(V1))
if (Constant *C2 = dyn_cast<Constant>(V2)) {
// Sign extend the constants to long types, if necessary
- if (C1->getType() != Type::getInt64Ty(Context))
- C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(Context));
- if (C2->getType() != Type::getInt64Ty(Context))
- C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(Context));
+ if (C1->getType() != Type::getInt64Ty(C1->getContext()))
+ C1 = ConstantExpr::getSExt(C1, Type::getInt64Ty(C1->getContext()));
+ if (C2->getType() != Type::getInt64Ty(C1->getContext()))
+ C2 = ConstantExpr::getSExt(C2, Type::getInt64Ty(C1->getContext()));
return C1 == C2;
}
return false;
@@ -737,8 +749,6 @@ BasicAliasAnalysis::CheckGEPInstructions(
const PointerType *GEPPointerTy = cast<PointerType>(BasePtr1Ty);
- LLVMContext &Context = GEPPointerTy->getContext();
-
// Find the (possibly empty) initial sequence of equal values... which are not
// necessarily constants.
unsigned NumGEP1Operands = NumGEP1Ops, NumGEP2Operands = NumGEP2Ops;
@@ -746,8 +756,7 @@ BasicAliasAnalysis::CheckGEPInstructions(
unsigned MaxOperands = std::max(NumGEP1Operands, NumGEP2Operands);
unsigned UnequalOper = 0;
while (UnequalOper != MinOperands &&
- IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper],
- Context)) {
+ IndexOperandsEqual(GEP1Ops[UnequalOper], GEP2Ops[UnequalOper])) {
// Advance through the type as we go...
++UnequalOper;
if (const CompositeType *CT = dyn_cast<CompositeType>(BasePtr1Ty))
@@ -811,10 +820,11 @@ BasicAliasAnalysis::CheckGEPInstructions(
if (Constant *G2OC = dyn_cast<ConstantInt>(const_cast<Value*>(G2Oper))){
if (G1OC->getType() != G2OC->getType()) {
// Sign extend both operands to long.
- if (G1OC->getType() != Type::getInt64Ty(Context))
- G1OC = ConstantExpr::getSExt(G1OC, Type::getInt64Ty(Context));
- if (G2OC->getType() != Type::getInt64Ty(Context))
- G2OC = ConstantExpr::getSExt(G2OC, Type::getInt64Ty(Context));
+ const Type *Int64Ty = Type::getInt64Ty(G1OC->getContext());
+ if (G1OC->getType() != Int64Ty)
+ G1OC = ConstantExpr::getSExt(G1OC, Int64Ty);
+ if (G2OC->getType() != Int64Ty)
+ G2OC = ConstantExpr::getSExt(G2OC, Int64Ty);
GEP1Ops[FirstConstantOper] = G1OC;
GEP2Ops[FirstConstantOper] = G2OC;
}
@@ -950,7 +960,7 @@ BasicAliasAnalysis::CheckGEPInstructions(
for (unsigned i = 0; i != FirstConstantOper; ++i) {
if (!isa<StructType>(ZeroIdxTy))
GEP1Ops[i] = GEP2Ops[i] =
- Constant::getNullValue(Type::getInt32Ty(Context));
+ Constant::getNullValue(Type::getInt32Ty(ZeroIdxTy->getContext()));
if (const CompositeType *CT = dyn_cast<CompositeType>(ZeroIdxTy))
ZeroIdxTy = CT->getTypeAtIndex(GEP1Ops[i]);
@@ -992,11 +1002,11 @@ BasicAliasAnalysis::CheckGEPInstructions(
//
if (const ArrayType *AT = dyn_cast<ArrayType>(BasePtr1Ty))
GEP1Ops[i] =
- ConstantInt::get(Type::getInt64Ty(Context),
+ ConstantInt::get(Type::getInt64Ty(AT->getContext()),
AT->getNumElements()-1);
else if (const VectorType *VT = dyn_cast<VectorType>(BasePtr1Ty))
GEP1Ops[i] =
- ConstantInt::get(Type::getInt64Ty(Context),
+ ConstantInt::get(Type::getInt64Ty(VT->getContext()),
VT->getNumElements()-1);
}
}
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index f21fd54..0a83c3d 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -15,8 +15,10 @@ add_llvm_library(LLVMAnalysis
IVUsers.cpp
InlineCost.cpp
InstCount.cpp
+ InstructionSimplify.cpp
Interval.cpp
IntervalPartition.cpp
+ LazyValueInfo.cpp
LibCallAliasAnalysis.cpp
LibCallSemantics.cpp
LiveValues.cpp
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 33a5792..1cdadbf 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -23,7 +23,6 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
#include "llvm/ADT/SmallVector.h"
@@ -493,8 +492,7 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const TargetData *TD){
/// these together. If target data info is available, it is provided as TD,
/// otherwise TD is null.
static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
- Constant *Op1, const TargetData *TD,
- LLVMContext &Context){
+ Constant *Op1, const TargetData *TD){
// SROA
// Fold (and 0xffffffff00000000, (shl x, 32)) -> shl.
@@ -521,15 +519,15 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0,
/// SymbolicallyEvaluateGEP - If we can symbolically evaluate the specified GEP
/// constant expression, do so.
-static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
+static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
const Type *ResultTy,
- LLVMContext &Context,
const TargetData *TD) {
Constant *Ptr = Ops[0];
if (!TD || !cast<PointerType>(Ptr->getType())->getElementType()->isSized())
return 0;
- unsigned BitWidth = TD->getTypeSizeInBits(TD->getIntPtrType(Context));
+ unsigned BitWidth =
+ TD->getTypeSizeInBits(TD->getIntPtrType(Ptr->getContext()));
APInt BasePtr(BitWidth, 0);
bool BaseIsInt = true;
if (!Ptr->isNullValue()) {
@@ -558,7 +556,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
// If the base value for this address is a literal integer value, fold the
// getelementptr to the resulting integer value casted to the pointer type.
if (BaseIsInt) {
- Constant *C = ConstantInt::get(Context, Offset+BasePtr);
+ Constant *C = ConstantInt::get(Ptr->getContext(), Offset+BasePtr);
return ConstantExpr::getIntToPtr(C, ResultTy);
}
@@ -579,7 +577,8 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
return 0;
APInt NewIdx = Offset.udiv(ElemSize);
Offset -= NewIdx * ElemSize;
- NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Context), NewIdx));
+ NewIdxs.push_back(ConstantInt::get(TD->getIntPtrType(Ty->getContext()),
+ NewIdx));
Ty = ATy->getElementType();
} else if (const StructType *STy = dyn_cast<StructType>(Ty)) {
// Determine which field of the struct the offset points into. The
@@ -587,7 +586,8 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
// know the offset is within the struct at this point.
const StructLayout &SL = *TD->getStructLayout(STy);
unsigned ElIdx = SL.getElementContainingOffset(Offset.getZExtValue());
- NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Context), ElIdx));
+ NewIdxs.push_back(ConstantInt::get(Type::getInt32Ty(Ty->getContext()),
+ ElIdx));
Offset -= APInt(BitWidth, SL.getElementOffset(ElIdx));
Ty = STy->getTypeAtIndex(ElIdx);
} else {
@@ -628,8 +628,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant* const* Ops, unsigned NumOps,
/// is returned. Note that this function can only fail when attempting to fold
/// instructions like loads and stores, which have no constant expression form.
///
-Constant *llvm::ConstantFoldInstruction(Instruction *I, LLVMContext &Context,
- const TargetData *TD) {
+Constant *llvm::ConstantFoldInstruction(Instruction *I, const TargetData *TD) {
if (PHINode *PN = dyn_cast<PHINode>(I)) {
if (PN->getNumIncomingValues() == 0)
return UndefValue::get(PN->getType());
@@ -656,33 +655,30 @@ Constant *llvm::ConstantFoldInstruction(Instruction *I, LLVMContext &Context,
return 0; // All operands not constant!
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
- return ConstantFoldCompareInstOperands(CI->getPredicate(),
- Ops.data(), Ops.size(),
- Context, TD);
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+ TD);
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
return ConstantFoldLoadInst(LI, TD);
return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- Ops.data(), Ops.size(), Context, TD);
+ Ops.data(), Ops.size(), TD);
}
/// ConstantFoldConstantExpression - Attempt to fold the constant expression
/// using the specified TargetData. If successful, the constant result is
/// result is returned, if not, null is returned.
Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
- LLVMContext &Context,
const TargetData *TD) {
SmallVector<Constant*, 8> Ops;
for (User::op_iterator i = CE->op_begin(), e = CE->op_end(); i != e; ++i)
Ops.push_back(cast<Constant>(*i));
if (CE->isCompare())
- return ConstantFoldCompareInstOperands(CE->getPredicate(),
- Ops.data(), Ops.size(),
- Context, TD);
+ return ConstantFoldCompareInstOperands(CE->getPredicate(), Ops[0], Ops[1],
+ TD);
return ConstantFoldInstOperands(CE->getOpcode(), CE->getType(),
- Ops.data(), Ops.size(), Context, TD);
+ Ops.data(), Ops.size(), TD);
}
/// ConstantFoldInstOperands - Attempt to constant fold an instruction with the
@@ -693,13 +689,11 @@ Constant *llvm::ConstantFoldConstantExpression(ConstantExpr *CE,
///
Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
Constant* const* Ops, unsigned NumOps,
- LLVMContext &Context,
const TargetData *TD) {
// Handle easy binops first.
if (Instruction::isBinaryOp(Opcode)) {
if (isa<ConstantExpr>(Ops[0]) || isa<ConstantExpr>(Ops[1]))
- if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD,
- Context))
+ if (Constant *C = SymbolicallyEvaluateBinop(Opcode, Ops[0], Ops[1], TD))
return C;
return ConstantExpr::get(Opcode, Ops[0], Ops[1]);
@@ -724,7 +718,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
unsigned InWidth = Input->getType()->getScalarSizeInBits();
if (TD->getPointerSizeInBits() < InWidth) {
Constant *Mask =
- ConstantInt::get(Context, APInt::getLowBitsSet(InWidth,
+ ConstantInt::get(CE->getContext(), APInt::getLowBitsSet(InWidth,
TD->getPointerSizeInBits()));
Input = ConstantExpr::getAnd(Input, Mask);
}
@@ -766,7 +760,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
AT->getNumElements()))) {
Constant *Index[] = {
Constant::getNullValue(CE->getType()),
- ConstantInt::get(Context, ElemIdx)
+ ConstantInt::get(ElTy->getContext(), ElemIdx)
};
return
ConstantExpr::getGetElementPtr(GV, &Index[0], 2);
@@ -800,7 +794,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
case Instruction::ShuffleVector:
return ConstantExpr::getShuffleVector(Ops[0], Ops[1], Ops[2]);
case Instruction::GetElementPtr:
- if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, Context, TD))
+ if (Constant *C = SymbolicallyEvaluateGEP(Ops, NumOps, DestTy, TD))
return C;
return ConstantExpr::getGetElementPtr(Ops[0], Ops+1, NumOps-1);
@@ -812,9 +806,7 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
/// returns a constant expression of the specified operands.
///
Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
- Constant*const * Ops,
- unsigned NumOps,
- LLVMContext &Context,
+ Constant *Ops0, Constant *Ops1,
const TargetData *TD) {
// fold: icmp (inttoptr x), null -> icmp x, 0
// fold: icmp (ptrtoint x), 0 -> icmp x, null
@@ -823,17 +815,16 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
//
// ConstantExpr::getCompare cannot do this, because it doesn't have TD
// around to know if bit truncation is happening.
- if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops[0])) {
- if (TD && Ops[1]->isNullValue()) {
- const Type *IntPtrTy = TD->getIntPtrType(Context);
+ if (ConstantExpr *CE0 = dyn_cast<ConstantExpr>(Ops0)) {
+ if (TD && Ops1->isNullValue()) {
+ const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
if (CE0->getOpcode() == Instruction::IntToPtr) {
// Convert the integer value to the right size to ensure we get the
// proper extension or truncation.
Constant *C = ConstantExpr::getIntegerCast(CE0->getOperand(0),
IntPtrTy, false);
- Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) };
- return ConstantFoldCompareInstOperands(Predicate, NewOps, 2,
- Context, TD);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
}
// Only do this transformation if the int is intptrty in size, otherwise
@@ -841,16 +832,14 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
if (CE0->getOpcode() == Instruction::PtrToInt &&
CE0->getType() == IntPtrTy) {
Constant *C = CE0->getOperand(0);
- Constant *NewOps[] = { C, Constant::getNullValue(C->getType()) };
- // FIXME!
- return ConstantFoldCompareInstOperands(Predicate, NewOps, 2,
- Context, TD);
+ Constant *Null = Constant::getNullValue(C->getType());
+ return ConstantFoldCompareInstOperands(Predicate, C, Null, TD);
}
}
- if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops[1])) {
+ if (ConstantExpr *CE1 = dyn_cast<ConstantExpr>(Ops1)) {
if (TD && CE0->getOpcode() == CE1->getOpcode()) {
- const Type *IntPtrTy = TD->getIntPtrType(Context);
+ const Type *IntPtrTy = TD->getIntPtrType(CE0->getContext());
if (CE0->getOpcode() == Instruction::IntToPtr) {
// Convert the integer value to the right size to ensure we get the
@@ -859,26 +848,21 @@ Constant *llvm::ConstantFoldCompareInstOperands(unsigned Predicate,
IntPtrTy, false);
Constant *C1 = ConstantExpr::getIntegerCast(CE1->getOperand(0),
IntPtrTy, false);
- Constant *NewOps[] = { C0, C1 };
- return ConstantFoldCompareInstOperands(Predicate, NewOps, 2,
- Context, TD);
+ return ConstantFoldCompareInstOperands(Predicate, C0, C1, TD);
}
// Only do this transformation if the int is intptrty in size, otherwise
// there is a truncation or extension that we aren't modeling.
if ((CE0->getOpcode() == Instruction::PtrToInt &&
CE0->getType() == IntPtrTy &&
- CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType())) {
- Constant *NewOps[] = {
- CE0->getOperand(0), CE1->getOperand(0)
- };
- return ConstantFoldCompareInstOperands(Predicate, NewOps, 2,
- Context, TD);
- }
+ CE0->getOperand(0)->getType() == CE1->getOperand(0)->getType()))
+ return ConstantFoldCompareInstOperands(Predicate, CE0->getOperand(0),
+ CE1->getOperand(0), TD);
}
}
}
- return ConstantExpr::getCompare(Predicate, Ops[0], Ops[1]);
+
+ return ConstantExpr::getCompare(Predicate, Ops0, Ops1);
}
@@ -996,7 +980,7 @@ llvm::canConstantFoldCallTo(const Function *F) {
}
static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
- const Type *Ty, LLVMContext &Context) {
+ const Type *Ty) {
errno = 0;
V = NativeFP(V);
if (errno != 0) {
@@ -1005,17 +989,15 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V,
}
if (Ty->isFloatTy())
- return ConstantFP::get(Context, APFloat((float)V));
+ return ConstantFP::get(Ty->getContext(), APFloat((float)V));
if (Ty->isDoubleTy())
- return ConstantFP::get(Context, APFloat(V));
+ return ConstantFP::get(Ty->getContext(), APFloat(V));
llvm_unreachable("Can only constant fold float/double");
return 0; // dummy return to suppress warning
}
static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
- double V, double W,
- const Type *Ty,
- LLVMContext &Context) {
+ double V, double W, const Type *Ty) {
errno = 0;
V = NativeFP(V, W);
if (errno != 0) {
@@ -1024,9 +1006,9 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double),
}
if (Ty->isFloatTy())
- return ConstantFP::get(Context, APFloat((float)V));
+ return ConstantFP::get(Ty->getContext(), APFloat((float)V));
if (Ty->isDoubleTy())
- return ConstantFP::get(Context, APFloat(V));
+ return ConstantFP::get(Ty->getContext(), APFloat(V));
llvm_unreachable("Can only constant fold float/double");
return 0; // dummy return to suppress warning
}
@@ -1037,7 +1019,6 @@ Constant *
llvm::ConstantFoldCall(Function *F,
Constant *const *Operands, unsigned NumOperands) {
if (!F->hasName()) return 0;
- LLVMContext &Context = F->getContext();
StringRef Name = F->getName();
const Type *Ty = F->getReturnType();
@@ -1054,62 +1035,62 @@ llvm::ConstantFoldCall(Function *F,
switch (Name[0]) {
case 'a':
if (Name == "acos")
- return ConstantFoldFP(acos, V, Ty, Context);
+ return ConstantFoldFP(acos, V, Ty);
else if (Name == "asin")
- return ConstantFoldFP(asin, V, Ty, Context);
+ return ConstantFoldFP(asin, V, Ty);
else if (Name == "atan")
- return ConstantFoldFP(atan, V, Ty, Context);
+ return ConstantFoldFP(atan, V, Ty);
break;
case 'c':
if (Name == "ceil")
- return ConstantFoldFP(ceil, V, Ty, Context);
+ return ConstantFoldFP(ceil, V, Ty);
else if (Name == "cos")
- return ConstantFoldFP(cos, V, Ty, Context);
+ return ConstantFoldFP(cos, V, Ty);
else if (Name == "cosh")
- return ConstantFoldFP(cosh, V, Ty, Context);
+ return ConstantFoldFP(cosh, V, Ty);
else if (Name == "cosf")
- return ConstantFoldFP(cos, V, Ty, Context);
+ return ConstantFoldFP(cos, V, Ty);
break;
case 'e':
if (Name == "exp")
- return ConstantFoldFP(exp, V, Ty, Context);
+ return ConstantFoldFP(exp, V, Ty);
break;
case 'f':
if (Name == "fabs")
- return ConstantFoldFP(fabs, V, Ty, Context);
+ return ConstantFoldFP(fabs, V, Ty);
else if (Name == "floor")
- return ConstantFoldFP(floor, V, Ty, Context);
+ return ConstantFoldFP(floor, V, Ty);
break;
case 'l':
if (Name == "log" && V > 0)
- return ConstantFoldFP(log, V, Ty, Context);
+ return ConstantFoldFP(log, V, Ty);
else if (Name == "log10" && V > 0)
- return ConstantFoldFP(log10, V, Ty, Context);
+ return ConstantFoldFP(log10, V, Ty);
else if (Name == "llvm.sqrt.f32" ||
Name == "llvm.sqrt.f64") {
if (V >= -0.0)
- return ConstantFoldFP(sqrt, V, Ty, Context);
+ return ConstantFoldFP(sqrt, V, Ty);
else // Undefined
return Constant::getNullValue(Ty);
}
break;
case 's':
if (Name == "sin")
- return ConstantFoldFP(sin, V, Ty, Context);
+ return ConstantFoldFP(sin, V, Ty);
else if (Name == "sinh")
- return ConstantFoldFP(sinh, V, Ty, Context);
+ return ConstantFoldFP(sinh, V, Ty);
else if (Name == "sqrt" && V >= 0)
- return ConstantFoldFP(sqrt, V, Ty, Context);
+ return ConstantFoldFP(sqrt, V, Ty);
else if (Name == "sqrtf" && V >= 0)
- return ConstantFoldFP(sqrt, V, Ty, Context);
+ return ConstantFoldFP(sqrt, V, Ty);
else if (Name == "sinf")
- return ConstantFoldFP(sin, V, Ty, Context);
+ return ConstantFoldFP(sin, V, Ty);
break;
case 't':
if (Name == "tan")
- return ConstantFoldFP(tan, V, Ty, Context);
+ return ConstantFoldFP(tan, V, Ty);
else if (Name == "tanh")
- return ConstantFoldFP(tanh, V, Ty, Context);
+ return ConstantFoldFP(tanh, V, Ty);
break;
default:
break;
@@ -1120,7 +1101,7 @@ llvm::ConstantFoldCall(Function *F,
if (ConstantInt *Op = dyn_cast<ConstantInt>(Operands[0])) {
if (Name.startswith("llvm.bswap"))
- return ConstantInt::get(Context, Op->getValue().byteSwap());
+ return ConstantInt::get(F->getContext(), Op->getValue().byteSwap());
else if (Name.startswith("llvm.ctpop"))
return ConstantInt::get(Ty, Op->getValue().countPopulation());
else if (Name.startswith("llvm.cttz"))
@@ -1149,18 +1130,20 @@ llvm::ConstantFoldCall(Function *F,
Op2->getValueAPF().convertToDouble();
if (Name == "pow")
- return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty, Context);
+ return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty);
if (Name == "fmod")
- return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty, Context);
+ return ConstantFoldBinaryFP(fmod, Op1V, Op2V, Ty);
if (Name == "atan2")
- return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty, Context);
+ return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty);
} else if (ConstantInt *Op2C = dyn_cast<ConstantInt>(Operands[1])) {
if (Name == "llvm.powi.f32")
- return ConstantFP::get(Context, APFloat((float)std::pow((float)Op1V,
+ return ConstantFP::get(F->getContext(),
+ APFloat((float)std::pow((float)Op1V,
(int)Op2C->getZExtValue())));
if (Name == "llvm.powi.f64")
- return ConstantFP::get(Context, APFloat((double)std::pow((double)Op1V,
- (int)Op2C->getZExtValue())));
+ return ConstantFP::get(F->getContext(),
+ APFloat((double)std::pow((double)Op1V,
+ (int)Op2C->getZExtValue())));
}
return 0;
}
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index b64dbf4..8f62245 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -366,6 +366,9 @@ bool DIGlobalVariable::Verify() const {
if (isNull())
return false;
+ if (!getDisplayName())
+ return false;
+
if (getContext().isNull())
return false;
@@ -406,6 +409,10 @@ uint64_t DIDerivedType::getOriginalTypeSize() const {
Tag == dwarf::DW_TAG_const_type || Tag == dwarf::DW_TAG_volatile_type ||
Tag == dwarf::DW_TAG_restrict_type) {
DIType BaseType = getTypeDerivedFrom();
+ // If this type is not derived from any type then take conservative
+ // approach.
+ if (BaseType.isNull())
+ return getSizeInBits();
if (BaseType.isDerivedType())
return DIDerivedType(BaseType.getNode()).getOriginalTypeSize();
else
@@ -599,9 +606,7 @@ void DIVariable::dump() const {
//===----------------------------------------------------------------------===//
DIFactory::DIFactory(Module &m)
- : M(m), VMContext(M.getContext()), StopPointFn(0), FuncStartFn(0),
- RegionStartFn(0), RegionEndFn(0),
- DeclareFn(0) {
+ : M(m), VMContext(M.getContext()), DeclareFn(0) {
EmptyStructPtr = PointerType::getUnqual(StructType::get(VMContext));
}
@@ -646,9 +651,9 @@ DISubrange DIFactory::GetOrCreateSubrange(int64_t Lo, int64_t Hi) {
/// CreateCompileUnit - Create a new descriptor for the specified compile
/// unit. Note that this does not unique compile units within the module.
DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
- StringRef Filename,
- StringRef Directory,
- StringRef Producer,
+ const char * Filename,
+ const char * Directory,
+ const char * Producer,
bool isMain,
bool isOptimized,
const char *Flags,
@@ -670,7 +675,7 @@ DICompileUnit DIFactory::CreateCompileUnit(unsigned LangID,
}
/// CreateEnumerator - Create a single enumerator value.
-DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){
+DIEnumerator DIFactory::CreateEnumerator(const char * Name, uint64_t Val){
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_enumerator),
MDString::get(VMContext, Name),
@@ -682,7 +687,7 @@ DIEnumerator DIFactory::CreateEnumerator(StringRef Name, uint64_t Val){
/// CreateBasicType - Create a basic type like int, float, etc.
DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
- StringRef Name,
+ const char * Name,
DICompileUnit CompileUnit,
unsigned LineNumber,
uint64_t SizeInBits,
@@ -707,7 +712,7 @@ DIBasicType DIFactory::CreateBasicType(DIDescriptor Context,
/// CreateBasicType - Create a basic type like int, float, etc.
DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context,
- StringRef Name,
+ const char * Name,
DICompileUnit CompileUnit,
unsigned LineNumber,
Constant *SizeInBits,
@@ -734,7 +739,7 @@ DIBasicType DIFactory::CreateBasicTypeEx(DIDescriptor Context,
/// pointer, typedef, etc.
DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
DIDescriptor Context,
- StringRef Name,
+ const char * Name,
DICompileUnit CompileUnit,
unsigned LineNumber,
uint64_t SizeInBits,
@@ -762,7 +767,7 @@ DIDerivedType DIFactory::CreateDerivedType(unsigned Tag,
/// pointer, typedef, etc.
DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag,
DIDescriptor Context,
- StringRef Name,
+ const char * Name,
DICompileUnit CompileUnit,
unsigned LineNumber,
Constant *SizeInBits,
@@ -789,7 +794,7 @@ DIDerivedType DIFactory::CreateDerivedTypeEx(unsigned Tag,
/// CreateCompositeType - Create a composite type like array, struct, etc.
DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
DIDescriptor Context,
- StringRef Name,
+ const char * Name,
DICompileUnit CompileUnit,
unsigned LineNumber,
uint64_t SizeInBits,
@@ -821,7 +826,7 @@ DICompositeType DIFactory::CreateCompositeType(unsigned Tag,
/// CreateCompositeType - Create a composite type like array, struct, etc.
DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
DIDescriptor Context,
- StringRef Name,
+ const char * Name,
DICompileUnit CompileUnit,
unsigned LineNumber,
Constant *SizeInBits,
@@ -854,9 +859,9 @@ DICompositeType DIFactory::CreateCompositeTypeEx(unsigned Tag,
/// See comments in DISubprogram for descriptions of these fields. This
/// method does not unique the generated descriptors.
DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
- StringRef Name,
- StringRef DisplayName,
- StringRef LinkageName,
+ const char * Name,
+ const char * DisplayName,
+ const char * LinkageName,
DICompileUnit CompileUnit,
unsigned LineNo, DIType Type,
bool isLocalToUnit,
@@ -880,9 +885,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
/// CreateGlobalVariable - Create a new descriptor for the specified global.
DIGlobalVariable
-DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
- StringRef DisplayName,
- StringRef LinkageName,
+DIFactory::CreateGlobalVariable(DIDescriptor Context, const char * Name,
+ const char * DisplayName,
+ const char * LinkageName,
DICompileUnit CompileUnit,
unsigned LineNo, DIType Type,bool isLocalToUnit,
bool isDefinition, llvm::GlobalVariable *Val) {
@@ -914,7 +919,7 @@ DIFactory::CreateGlobalVariable(DIDescriptor Context, StringRef Name,
/// CreateVariable - Create a new descriptor for the specified variable.
DIVariable DIFactory::CreateVariable(unsigned Tag, DIDescriptor Context,
- StringRef Name,
+ const char * Name,
DICompileUnit CompileUnit, unsigned LineNo,
DIType Type) {
Value *Elts[] = {
@@ -976,60 +981,8 @@ DILocation DIFactory::CreateLocation(unsigned LineNo, unsigned ColumnNo,
// DIFactory: Routines for inserting code into a function
//===----------------------------------------------------------------------===//
-/// InsertStopPoint - Create a new llvm.dbg.stoppoint intrinsic invocation,
-/// inserting it at the end of the specified basic block.
-void DIFactory::InsertStopPoint(DICompileUnit CU, unsigned LineNo,
- unsigned ColNo, BasicBlock *BB) {
-
- // Lazily construct llvm.dbg.stoppoint function.
- if (!StopPointFn)
- StopPointFn = llvm::Intrinsic::getDeclaration(&M,
- llvm::Intrinsic::dbg_stoppoint);
-
- // Invoke llvm.dbg.stoppoint
- Value *Args[] = {
- ConstantInt::get(llvm::Type::getInt32Ty(VMContext), LineNo),
- ConstantInt::get(llvm::Type::getInt32Ty(VMContext), ColNo),
- CU.getNode()
- };
- CallInst::Create(StopPointFn, Args, Args+3, "", BB);
-}
-
-/// InsertSubprogramStart - Create a new llvm.dbg.func.start intrinsic to
-/// mark the start of the specified subprogram.
-void DIFactory::InsertSubprogramStart(DISubprogram SP, BasicBlock *BB) {
- // Lazily construct llvm.dbg.func.start.
- if (!FuncStartFn)
- FuncStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_func_start);
-
- // Call llvm.dbg.func.start which also implicitly sets a stoppoint.
- CallInst::Create(FuncStartFn, SP.getNode(), "", BB);
-}
-
-/// InsertRegionStart - Insert a new llvm.dbg.region.start intrinsic call to
-/// mark the start of a region for the specified scoping descriptor.
-void DIFactory::InsertRegionStart(DIDescriptor D, BasicBlock *BB) {
- // Lazily construct llvm.dbg.region.start function.
- if (!RegionStartFn)
- RegionStartFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_start);
-
- // Call llvm.dbg.func.start.
- CallInst::Create(RegionStartFn, D.getNode(), "", BB);
-}
-
-/// InsertRegionEnd - Insert a new llvm.dbg.region.end intrinsic call to
-/// mark the end of a region for the specified scoping descriptor.
-void DIFactory::InsertRegionEnd(DIDescriptor D, BasicBlock *BB) {
- // Lazily construct llvm.dbg.region.end function.
- if (!RegionEndFn)
- RegionEndFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_region_end);
-
- // Call llvm.dbg.region.end.
- CallInst::Create(RegionEndFn, D.getNode(), "", BB);
-}
-
/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
+Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
Instruction *InsertBefore) {
// Cast the storage to a {}* for the call to llvm.dbg.declare.
Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertBefore);
@@ -1038,11 +991,11 @@ void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
Value *Args[] = { Storage, D.getNode() };
- CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
+ return CallInst::Create(DeclareFn, Args, Args+2, "", InsertBefore);
}
/// InsertDeclare - Insert a new llvm.dbg.declare intrinsic call.
-void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
+Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
BasicBlock *InsertAtEnd) {
// Cast the storage to a {}* for the call to llvm.dbg.declare.
Storage = new BitCastInst(Storage, EmptyStructPtr, "", InsertAtEnd);
@@ -1051,7 +1004,7 @@ void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
DeclareFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_declare);
Value *Args[] = { Storage, D.getNode() };
- CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
+ return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
}
@@ -1062,38 +1015,18 @@ void DIFactory::InsertDeclare(Value *Storage, DIVariable D,
/// processModule - Process entire module and collect debug info.
void DebugInfoFinder::processModule(Module &M) {
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
MetadataContext &TheMetadata = M.getContext().getMetadata();
unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
-#endif
+
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I)
for (Function::iterator FI = (*I).begin(), FE = (*I).end(); FI != FE; ++FI)
for (BasicBlock::iterator BI = (*FI).begin(), BE = (*FI).end(); BI != BE;
++BI) {
- if (DbgStopPointInst *SPI = dyn_cast<DbgStopPointInst>(BI))
- processStopPoint(SPI);
- else if (DbgFuncStartInst *FSI = dyn_cast<DbgFuncStartInst>(BI))
- processFuncStart(FSI);
- else if (DbgRegionStartInst *DRS = dyn_cast<DbgRegionStartInst>(BI))
- processRegionStart(DRS);
- else if (DbgRegionEndInst *DRE = dyn_cast<DbgRegionEndInst>(BI))
- processRegionEnd(DRE);
- else if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
+ if (DbgDeclareInst *DDI = dyn_cast<DbgDeclareInst>(BI))
processDeclare(DDI);
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
- else if (MDDbgKind) {
- if (MDNode *L = TheMetadata.getMD(MDDbgKind, BI)) {
- DILocation Loc(L);
- DIScope S(Loc.getScope().getNode());
- if (S.isCompileUnit())
- addCompileUnit(DICompileUnit(S.getNode()));
- else if (S.isSubprogram())
- processSubprogram(DISubprogram(S.getNode()));
- else if (S.isLexicalBlock())
- processLexicalBlock(DILexicalBlock(S.getNode()));
- }
- }
-#endif
+ else if (MDDbgKind)
+ if (MDNode *L = TheMetadata.getMD(MDDbgKind, BI))
+ processLocation(DILocation(L));
}
NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
@@ -1109,6 +1042,20 @@ void DebugInfoFinder::processModule(Module &M) {
}
}
+/// processLocation - Process DILocation.
+void DebugInfoFinder::processLocation(DILocation Loc) {
+ if (Loc.isNull()) return;
+ DIScope S(Loc.getScope().getNode());
+ if (S.isNull()) return;
+ if (S.isCompileUnit())
+ addCompileUnit(DICompileUnit(S.getNode()));
+ else if (S.isSubprogram())
+ processSubprogram(DISubprogram(S.getNode()));
+ else if (S.isLexicalBlock())
+ processLexicalBlock(DILexicalBlock(S.getNode()));
+ processLocation(Loc.getOrigLocation());
+}
+
/// processType - Process DIType.
void DebugInfoFinder::processType(DIType DT) {
if (!addType(DT))
@@ -1156,30 +1103,6 @@ void DebugInfoFinder::processSubprogram(DISubprogram SP) {
processType(SP.getType());
}
-/// processStopPoint - Process DbgStopPointInst.
-void DebugInfoFinder::processStopPoint(DbgStopPointInst *SPI) {
- MDNode *Context = dyn_cast<MDNode>(SPI->getContext());
- addCompileUnit(DICompileUnit(Context));
-}
-
-/// processFuncStart - Process DbgFuncStartInst.
-void DebugInfoFinder::processFuncStart(DbgFuncStartInst *FSI) {
- MDNode *SP = dyn_cast<MDNode>(FSI->getSubprogram());
- processSubprogram(DISubprogram(SP));
-}
-
-/// processRegionStart - Process DbgRegionStart.
-void DebugInfoFinder::processRegionStart(DbgRegionStartInst *DRS) {
- MDNode *SP = dyn_cast<MDNode>(DRS->getContext());
- processSubprogram(DISubprogram(SP));
-}
-
-/// processRegionEnd - Process DbgRegionEnd.
-void DebugInfoFinder::processRegionEnd(DbgRegionEndInst *DRE) {
- MDNode *SP = dyn_cast<MDNode>(DRE->getContext());
- processSubprogram(DISubprogram(SP));
-}
-
/// processDeclare - Process DbgDeclareInst.
void DebugInfoFinder::processDeclare(DbgDeclareInst *DDI) {
DIVariable DV(cast<MDNode>(DDI->getVariable()));
@@ -1475,22 +1398,4 @@ bool getLocationInfo(const Value *V, std::string &DisplayName,
return DebugLoc::get(Id);
}
-
- /// isInlinedFnStart - Return true if FSI is starting an inlined function.
- bool isInlinedFnStart(DbgFuncStartInst &FSI, const Function *CurrentFn) {
- DISubprogram Subprogram(cast<MDNode>(FSI.getSubprogram()));
- if (Subprogram.describes(CurrentFn))
- return false;
-
- return true;
- }
-
- /// isInlinedFnEnd - Return true if REI is ending an inlined function.
- bool isInlinedFnEnd(DbgRegionEndInst &REI, const Function *CurrentFn) {
- DISubprogram Subprogram(cast<MDNode>(REI.getContext()));
- if (Subprogram.isNull() || Subprogram.describes(CurrentFn))
- return false;
-
- return true;
- }
}
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
index 17f304c..40a8cd5 100644
--- a/lib/Analysis/IPA/Andersens.cpp
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -518,7 +518,7 @@ namespace {
/// getObject - Return the node corresponding to the memory object for the
/// specified global or allocation instruction.
unsigned getObject(Value *V) const {
- DenseMap<Value*, unsigned>::iterator I = ObjectNodes.find(V);
+ DenseMap<Value*, unsigned>::const_iterator I = ObjectNodes.find(V);
assert(I != ObjectNodes.end() &&
"Value does not have an object in the points-to graph!");
return I->second;
@@ -527,7 +527,7 @@ namespace {
/// getReturnNode - Return the node representing the return value for the
/// specified function.
unsigned getReturnNode(Function *F) const {
- DenseMap<Function*, unsigned>::iterator I = ReturnNodes.find(F);
+ DenseMap<Function*, unsigned>::const_iterator I = ReturnNodes.find(F);
assert(I != ReturnNodes.end() && "Function does not return a value!");
return I->second;
}
@@ -535,7 +535,7 @@ namespace {
/// getVarargNode - Return the node representing the variable arguments
/// formal for the specified function.
unsigned getVarargNode(Function *F) const {
- DenseMap<Function*, unsigned>::iterator I = VarargNodes.find(F);
+ DenseMap<Function*, unsigned>::const_iterator I = VarargNodes.find(F);
assert(I != VarargNodes.end() && "Function does not take var args!");
return I->second;
}
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 543e017..cf52320 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -151,6 +151,8 @@ static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
if (L->contains(User->getParent())) return false;
BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock)
+ return false;
// Ok, the user is outside of the loop. If it is dominated by the latch
// block, use the post-inc value.
@@ -265,6 +267,18 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
return true;
}
+void IVUsers::AddUser(const SCEV *Stride, const SCEV *Offset,
+ Instruction *User, Value *Operand) {
+ IVUsersOfOneStride *StrideUses = IVUsesByStride[Stride];
+ if (!StrideUses) { // First occurrence of this stride?
+ StrideOrder.push_back(Stride);
+ StrideUses = new IVUsersOfOneStride(Stride);
+ IVUses.push_back(StrideUses);
+ IVUsesByStride[Stride] = StrideUses;
+ }
+ IVUsesByStride[Stride]->addUser(Offset, User, Operand);
+}
+
IVUsers::IVUsers()
: LoopPass(&ID) {
}
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
new file mode 100644
index 0000000..f9953e3
--- /dev/null
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -0,0 +1,348 @@
+//===- InstructionSimplify.cpp - Fold instruction operands ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements routines for folding instructions into simpler forms
+// that do not require creating new instructions. For example, this does
+// constant folding, and can handle identities like (X&0)->0.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Support/ValueHandle.h"
+#include "llvm/Instructions.h"
+#include "llvm/Support/PatternMatch.h"
+using namespace llvm;
+using namespace llvm::PatternMatch;
+
+/// SimplifyAndInst - Given operands for an And, see if we can
+/// fold the result. If not, this returns null.
+Value *llvm::SimplifyAndInst(Value *Op0, Value *Op1,
+ const TargetData *TD) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::And, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X & undef -> 0
+ if (isa<UndefValue>(Op1))
+ return Constant::getNullValue(Op0->getType());
+
+ // X & X = X
+ if (Op0 == Op1)
+ return Op0;
+
+ // X & <0,0> = <0,0>
+ if (isa<ConstantAggregateZero>(Op1))
+ return Op1;
+
+ // X & <-1,-1> = X
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1))
+ if (CP->isAllOnesValue())
+ return Op0;
+
+ if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) {
+ // X & 0 = 0
+ if (Op1CI->isZero())
+ return Op1CI;
+ // X & -1 = X
+ if (Op1CI->isAllOnesValue())
+ return Op0;
+ }
+
+ // A & ~A = ~A & A = 0
+ Value *A, *B;
+ if ((match(Op0, m_Not(m_Value(A))) && A == Op1) ||
+ (match(Op1, m_Not(m_Value(A))) && A == Op0))
+ return Constant::getNullValue(Op0->getType());
+
+ // (A | ?) & A = A
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ (A == Op1 || B == Op1))
+ return Op1;
+
+ // A & (A | ?) = A
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0))
+ return Op0;
+
+ return 0;
+}
+
+/// SimplifyOrInst - Given operands for an Or, see if we can
+/// fold the result. If not, this returns null.
+Value *llvm::SimplifyOrInst(Value *Op0, Value *Op1,
+ const TargetData *TD) {
+ if (Constant *CLHS = dyn_cast<Constant>(Op0)) {
+ if (Constant *CRHS = dyn_cast<Constant>(Op1)) {
+ Constant *Ops[] = { CLHS, CRHS };
+ return ConstantFoldInstOperands(Instruction::Or, CLHS->getType(),
+ Ops, 2, TD);
+ }
+
+ // Canonicalize the constant to the RHS.
+ std::swap(Op0, Op1);
+ }
+
+ // X | undef -> -1
+ if (isa<UndefValue>(Op1))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // X | X = X
+ if (Op0 == Op1)
+ return Op0;
+
+ // X | <0,0> = X
+ if (isa<ConstantAggregateZero>(Op1))
+ return Op0;
+
+ // X | <-1,-1> = <-1,-1>
+ if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1))
+ if (CP->isAllOnesValue())
+ return Op1;
+
+ if (ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1)) {
+ // X | 0 = X
+ if (Op1CI->isZero())
+ return Op0;
+ // X | -1 = -1
+ if (Op1CI->isAllOnesValue())
+ return Op1CI;
+ }
+
+ // A | ~A = ~A | A = -1
+ Value *A, *B;
+ if ((match(Op0, m_Not(m_Value(A))) && A == Op1) ||
+ (match(Op1, m_Not(m_Value(A))) && A == Op0))
+ return Constant::getAllOnesValue(Op0->getType());
+
+ // (A & ?) | A = A
+ if (match(Op0, m_And(m_Value(A), m_Value(B))) &&
+ (A == Op1 || B == Op1))
+ return Op1;
+
+ // A | (A & ?) = A
+ if (match(Op1, m_And(m_Value(A), m_Value(B))) &&
+ (A == Op0 || B == Op0))
+ return Op0;
+
+ return 0;
+}
+
+
+
+
+static const Type *GetCompareTy(Value *Op) {
+ return CmpInst::makeCmpResultType(Op->getType());
+}
+
+
+/// SimplifyICmpInst - Given operands for an ICmpInst, see if we can
+/// fold the result. If not, this returns null.
+Value *llvm::SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD) {
+ CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+ assert(CmpInst::isIntPredicate(Pred) && "Not an integer compare!");
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
+ // ITy - This is the return type of the compare we're considering.
+ const Type *ITy = GetCompareTy(LHS);
+
+ // icmp X, X -> true/false
+ if (LHS == RHS)
+ return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred));
+
+ if (isa<UndefValue>(RHS)) // X icmp undef -> undef
+ return UndefValue::get(ITy);
+
+ // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
+ // addresses never equal each other! We already know that Op0 != Op1.
+ if ((isa<GlobalValue>(LHS) || isa<AllocaInst>(LHS) ||
+ isa<ConstantPointerNull>(LHS)) &&
+ (isa<GlobalValue>(RHS) || isa<AllocaInst>(RHS) ||
+ isa<ConstantPointerNull>(RHS)))
+ return ConstantInt::get(ITy, CmpInst::isFalseWhenEqual(Pred));
+
+ // See if we are doing a comparison with a constant.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(RHS)) {
+ // If we have an icmp le or icmp ge instruction, turn it into the
+ // appropriate icmp lt or icmp gt instruction. This allows us to rely on
+ // them being folded in the code below.
+ switch (Pred) {
+ default: break;
+ case ICmpInst::ICMP_ULE:
+ if (CI->isMaxValue(false)) // A <=u MAX -> TRUE
+ return ConstantInt::getTrue(CI->getContext());
+ break;
+ case ICmpInst::ICMP_SLE:
+ if (CI->isMaxValue(true)) // A <=s MAX -> TRUE
+ return ConstantInt::getTrue(CI->getContext());
+ break;
+ case ICmpInst::ICMP_UGE:
+ if (CI->isMinValue(false)) // A >=u MIN -> TRUE
+ return ConstantInt::getTrue(CI->getContext());
+ break;
+ case ICmpInst::ICMP_SGE:
+ if (CI->isMinValue(true)) // A >=s MIN -> TRUE
+ return ConstantInt::getTrue(CI->getContext());
+ break;
+ }
+ }
+
+
+ return 0;
+}
+
+/// SimplifyFCmpInst - Given operands for an FCmpInst, see if we can
+/// fold the result. If not, this returns null.
+Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD) {
+ CmpInst::Predicate Pred = (CmpInst::Predicate)Predicate;
+ assert(CmpInst::isFPPredicate(Pred) && "Not an FP compare!");
+
+ if (Constant *CLHS = dyn_cast<Constant>(LHS)) {
+ if (Constant *CRHS = dyn_cast<Constant>(RHS))
+ return ConstantFoldCompareInstOperands(Pred, CLHS, CRHS, TD);
+
+ // If we have a constant, make sure it is on the RHS.
+ std::swap(LHS, RHS);
+ Pred = CmpInst::getSwappedPredicate(Pred);
+ }
+
+ // Fold trivial predicates.
+ if (Pred == FCmpInst::FCMP_FALSE)
+ return ConstantInt::get(GetCompareTy(LHS), 0);
+ if (Pred == FCmpInst::FCMP_TRUE)
+ return ConstantInt::get(GetCompareTy(LHS), 1);
+
+ if (isa<UndefValue>(RHS)) // fcmp pred X, undef -> undef
+ return UndefValue::get(GetCompareTy(LHS));
+
+ // fcmp x,x -> true/false. Not all compares are foldable.
+ if (LHS == RHS) {
+ if (CmpInst::isTrueWhenEqual(Pred))
+ return ConstantInt::get(GetCompareTy(LHS), 1);
+ if (CmpInst::isFalseWhenEqual(Pred))
+ return ConstantInt::get(GetCompareTy(LHS), 0);
+ }
+
+ // Handle fcmp with constant RHS
+ if (Constant *RHSC = dyn_cast<Constant>(RHS)) {
+ // If the constant is a nan, see if we can fold the comparison based on it.
+ if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
+ if (CFP->getValueAPF().isNaN()) {
+ if (FCmpInst::isOrdered(Pred)) // True "if ordered and foo"
+ return ConstantInt::getFalse(CFP->getContext());
+ assert(FCmpInst::isUnordered(Pred) &&
+ "Comparison must be either ordered or unordered!");
+ // True if unordered.
+ return ConstantInt::getTrue(CFP->getContext());
+ }
+ }
+ }
+
+ return 0;
+}
+
+//=== Helper functions for higher up the class hierarchy.
+
+/// SimplifyBinOp - Given operands for a BinaryOperator, see if we can
+/// fold the result. If not, this returns null.
+Value *llvm::SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS,
+ const TargetData *TD) {
+ switch (Opcode) {
+ case Instruction::And: return SimplifyAndInst(LHS, RHS, TD);
+ case Instruction::Or: return SimplifyOrInst(LHS, RHS, TD);
+ default:
+ if (Constant *CLHS = dyn_cast<Constant>(LHS))
+ if (Constant *CRHS = dyn_cast<Constant>(RHS)) {
+ Constant *COps[] = {CLHS, CRHS};
+ return ConstantFoldInstOperands(Opcode, LHS->getType(), COps, 2, TD);
+ }
+ return 0;
+ }
+}
+
+/// SimplifyCmpInst - Given operands for a CmpInst, see if we can
+/// fold the result.
+Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
+ const TargetData *TD) {
+ if (CmpInst::isIntPredicate((CmpInst::Predicate)Predicate))
+ return SimplifyICmpInst(Predicate, LHS, RHS, TD);
+ return SimplifyFCmpInst(Predicate, LHS, RHS, TD);
+}
+
+
+/// SimplifyInstruction - See if we can compute a simplified version of this
+/// instruction. If not, this returns null.
+Value *llvm::SimplifyInstruction(Instruction *I, const TargetData *TD) {
+ switch (I->getOpcode()) {
+ default:
+ return ConstantFoldInstruction(I, TD);
+ case Instruction::And:
+ return SimplifyAndInst(I->getOperand(0), I->getOperand(1), TD);
+ case Instruction::Or:
+ return SimplifyOrInst(I->getOperand(0), I->getOperand(1), TD);
+ case Instruction::ICmp:
+ return SimplifyICmpInst(cast<ICmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1), TD);
+ case Instruction::FCmp:
+ return SimplifyFCmpInst(cast<FCmpInst>(I)->getPredicate(),
+ I->getOperand(0), I->getOperand(1), TD);
+ }
+}
+
+/// ReplaceAndSimplifyAllUses - Perform From->replaceAllUsesWith(To) and then
+/// delete the From instruction. In addition to a basic RAUW, this does a
+/// recursive simplification of the newly formed instructions. This catches
+/// things where one simplification exposes other opportunities. This only
+/// simplifies and deletes scalar operations, it does not change the CFG.
+///
+void llvm::ReplaceAndSimplifyAllUses(Instruction *From, Value *To,
+ const TargetData *TD) {
+ assert(From != To && "ReplaceAndSimplifyAllUses(X,X) is not valid!");
+
+ // FromHandle - This keeps a weakvh on the from value so that we can know if
+ // it gets deleted out from under us in a recursive simplification.
+ WeakVH FromHandle(From);
+
+ while (!From->use_empty()) {
+ // Update the instruction to use the new value.
+ Use &U = From->use_begin().getUse();
+ Instruction *User = cast<Instruction>(U.getUser());
+ U = To;
+
+ // See if we can simplify it.
+ if (Value *V = SimplifyInstruction(User, TD)) {
+ // Recursively simplify this.
+ ReplaceAndSimplifyAllUses(User, V, TD);
+
+ // If the recursive simplification ended up revisiting and deleting 'From'
+ // then we're done.
+ if (FromHandle == 0)
+ return;
+ }
+ }
+ From->eraseFromParent();
+}
+
diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp
new file mode 100644
index 0000000..5796c6f
--- /dev/null
+++ b/lib/Analysis/LazyValueInfo.cpp
@@ -0,0 +1,582 @@
+//===- LazyValueInfo.cpp - Value constraint analysis ----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interface for lazy computation of value constraint
+// information.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "lazy-value-info"
+#include "llvm/Analysis/LazyValueInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Instructions.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/STLExtras.h"
+using namespace llvm;
+
+char LazyValueInfo::ID = 0;
+static RegisterPass<LazyValueInfo>
+X("lazy-value-info", "Lazy Value Information Analysis", false, true);
+
+namespace llvm {
+ FunctionPass *createLazyValueInfoPass() { return new LazyValueInfo(); }
+}
+
+
+//===----------------------------------------------------------------------===//
+// LVILatticeVal
+//===----------------------------------------------------------------------===//
+
+/// LVILatticeVal - This is the information tracked by LazyValueInfo for each
+/// value.
+///
+/// FIXME: This is basically just for bringup, this can be made a lot more rich
+/// in the future.
+///
+namespace {
+class LVILatticeVal {
+ enum LatticeValueTy {
+ /// undefined - This LLVM Value has no known value yet.
+ undefined,
+ /// constant - This LLVM Value has a specific constant value.
+ constant,
+
+ /// notconstant - This LLVM value is known to not have the specified value.
+ notconstant,
+
+ /// overdefined - This instruction is not known to be constant, and we know
+ /// it has a value.
+ overdefined
+ };
+
+ /// Val: This stores the current lattice value along with the Constant* for
+ /// the constant if this is a 'constant' or 'notconstant' value.
+ PointerIntPair<Constant *, 2, LatticeValueTy> Val;
+
+public:
+ LVILatticeVal() : Val(0, undefined) {}
+
+ static LVILatticeVal get(Constant *C) {
+ LVILatticeVal Res;
+ Res.markConstant(C);
+ return Res;
+ }
+ static LVILatticeVal getNot(Constant *C) {
+ LVILatticeVal Res;
+ Res.markNotConstant(C);
+ return Res;
+ }
+
+ bool isUndefined() const { return Val.getInt() == undefined; }
+ bool isConstant() const { return Val.getInt() == constant; }
+ bool isNotConstant() const { return Val.getInt() == notconstant; }
+ bool isOverdefined() const { return Val.getInt() == overdefined; }
+
+ Constant *getConstant() const {
+ assert(isConstant() && "Cannot get the constant of a non-constant!");
+ return Val.getPointer();
+ }
+
+ Constant *getNotConstant() const {
+ assert(isNotConstant() && "Cannot get the constant of a non-notconstant!");
+ return Val.getPointer();
+ }
+
+ /// markOverdefined - Return true if this is a change in status.
+ bool markOverdefined() {
+ if (isOverdefined())
+ return false;
+ Val.setInt(overdefined);
+ return true;
+ }
+
+ /// markConstant - Return true if this is a change in status.
+ bool markConstant(Constant *V) {
+ if (isConstant()) {
+ assert(getConstant() == V && "Marking constant with different value");
+ return false;
+ }
+
+ assert(isUndefined());
+ Val.setInt(constant);
+ assert(V && "Marking constant with NULL");
+ Val.setPointer(V);
+ return true;
+ }
+
+ /// markNotConstant - Return true if this is a change in status.
+ bool markNotConstant(Constant *V) {
+ if (isNotConstant()) {
+ assert(getNotConstant() == V && "Marking !constant with different value");
+ return false;
+ }
+
+ if (isConstant())
+ assert(getConstant() != V && "Marking not constant with different value");
+ else
+ assert(isUndefined());
+
+ Val.setInt(notconstant);
+ assert(V && "Marking constant with NULL");
+ Val.setPointer(V);
+ return true;
+ }
+
+ /// mergeIn - Merge the specified lattice value into this one, updating this
+ /// one and returning true if anything changed.
+ bool mergeIn(const LVILatticeVal &RHS) {
+ if (RHS.isUndefined() || isOverdefined()) return false;
+ if (RHS.isOverdefined()) return markOverdefined();
+
+ if (RHS.isNotConstant()) {
+ if (isNotConstant()) {
+ if (getNotConstant() != RHS.getNotConstant() ||
+ isa<ConstantExpr>(getNotConstant()) ||
+ isa<ConstantExpr>(RHS.getNotConstant()))
+ return markOverdefined();
+ return false;
+ }
+ if (isConstant()) {
+ if (getConstant() == RHS.getNotConstant() ||
+ isa<ConstantExpr>(RHS.getNotConstant()) ||
+ isa<ConstantExpr>(getConstant()))
+ return markOverdefined();
+ return markNotConstant(RHS.getNotConstant());
+ }
+
+ assert(isUndefined() && "Unexpected lattice");
+ return markNotConstant(RHS.getNotConstant());
+ }
+
+ // RHS must be a constant, we must be undef, constant, or notconstant.
+ if (isUndefined())
+ return markConstant(RHS.getConstant());
+
+ if (isConstant()) {
+ if (getConstant() != RHS.getConstant())
+ return markOverdefined();
+ return false;
+ }
+
+ // If we are known "!=4" and RHS is "==5", stay at "!=4".
+ if (getNotConstant() == RHS.getConstant() ||
+ isa<ConstantExpr>(getNotConstant()) ||
+ isa<ConstantExpr>(RHS.getConstant()))
+ return markOverdefined();
+ return false;
+ }
+
+};
+
+} // end anonymous namespace.
+
+namespace llvm {
+raw_ostream &operator<<(raw_ostream &OS, const LVILatticeVal &Val) {
+ if (Val.isUndefined())
+ return OS << "undefined";
+ if (Val.isOverdefined())
+ return OS << "overdefined";
+
+ if (Val.isNotConstant())
+ return OS << "notconstant<" << *Val.getNotConstant() << '>';
+ return OS << "constant<" << *Val.getConstant() << '>';
+}
+}
+
+//===----------------------------------------------------------------------===//
+// LazyValueInfoCache Decl
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// LazyValueInfoCache - This is the cache kept by LazyValueInfo which
+ /// maintains information about queries across the clients' queries.
+ class LazyValueInfoCache {
+ public:
+ /// BlockCacheEntryTy - This is a computed lattice value at the end of the
+ /// specified basic block for a Value* that depends on context.
+ typedef std::pair<BasicBlock*, LVILatticeVal> BlockCacheEntryTy;
+
+ /// ValueCacheEntryTy - This is all of the cached block information for
+ /// exactly one Value*. The entries are sorted by the BasicBlock* of the
+ /// entries, allowing us to do a lookup with a binary search.
+ typedef std::vector<BlockCacheEntryTy> ValueCacheEntryTy;
+
+ private:
+ /// ValueCache - This is all of the cached information for all values,
+ /// mapped from Value* to key information.
+ DenseMap<Value*, ValueCacheEntryTy> ValueCache;
+ public:
+
+ /// getValueInBlock - This is the query interface to determine the lattice
+ /// value for the specified Value* at the end of the specified block.
+ LVILatticeVal getValueInBlock(Value *V, BasicBlock *BB);
+
+ /// getValueOnEdge - This is the query interface to determine the lattice
+ /// value for the specified Value* that is true on the specified edge.
+ LVILatticeVal getValueOnEdge(Value *V, BasicBlock *FromBB,BasicBlock *ToBB);
+ };
+} // end anonymous namespace
+
+namespace {
+ struct BlockCacheEntryComparator {
+ static int Compare(const void *LHSv, const void *RHSv) {
+ const LazyValueInfoCache::BlockCacheEntryTy *LHS =
+ static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(LHSv);
+ const LazyValueInfoCache::BlockCacheEntryTy *RHS =
+ static_cast<const LazyValueInfoCache::BlockCacheEntryTy *>(RHSv);
+ if (LHS->first < RHS->first)
+ return -1;
+ if (LHS->first > RHS->first)
+ return 1;
+ return 0;
+ }
+
+ bool operator()(const LazyValueInfoCache::BlockCacheEntryTy &LHS,
+ const LazyValueInfoCache::BlockCacheEntryTy &RHS) const {
+ return LHS.first < RHS.first;
+ }
+ };
+}
+
+//===----------------------------------------------------------------------===//
+// LVIQuery Impl
+//===----------------------------------------------------------------------===//
+
+namespace {
+ /// LVIQuery - This is a transient object that exists while a query is
+ /// being performed.
+ ///
+ /// TODO: Reuse LVIQuery instead of recreating it for every query, this avoids
+ /// reallocation of the densemap on every query.
+ class LVIQuery {
+ typedef LazyValueInfoCache::BlockCacheEntryTy BlockCacheEntryTy;
+ typedef LazyValueInfoCache::ValueCacheEntryTy ValueCacheEntryTy;
+
+ /// This is the current value being queried for.
+ Value *Val;
+
+ /// This is all of the cached information about this value.
+ ValueCacheEntryTy &Cache;
+
+ /// NewBlocks - This is a mapping of the new BasicBlocks which have been
+ /// added to cache but that are not in sorted order.
+ DenseMap<BasicBlock*, LVILatticeVal> NewBlockInfo;
+ public:
+
+ LVIQuery(Value *V, ValueCacheEntryTy &VC) : Val(V), Cache(VC) {
+ }
+
+ ~LVIQuery() {
+ // When the query is done, insert the newly discovered facts into the
+ // cache in sorted order.
+ if (NewBlockInfo.empty()) return;
+
+ // Grow the cache to exactly fit the new data.
+ Cache.reserve(Cache.size() + NewBlockInfo.size());
+
+ // If we only have one new entry, insert it instead of doing a full-on
+ // sort.
+ if (NewBlockInfo.size() == 1) {
+ BlockCacheEntryTy Entry = *NewBlockInfo.begin();
+ ValueCacheEntryTy::iterator I =
+ std::lower_bound(Cache.begin(), Cache.end(), Entry,
+ BlockCacheEntryComparator());
+ assert((I == Cache.end() || I->first != Entry.first) &&
+ "Entry already in map!");
+
+ Cache.insert(I, Entry);
+ return;
+ }
+
+ // TODO: If we only have two new elements, INSERT them both.
+
+ Cache.insert(Cache.end(), NewBlockInfo.begin(), NewBlockInfo.end());
+ array_pod_sort(Cache.begin(), Cache.end(),
+ BlockCacheEntryComparator::Compare);
+
+ }
+
+ LVILatticeVal getBlockValue(BasicBlock *BB);
+ LVILatticeVal getEdgeValue(BasicBlock *FromBB, BasicBlock *ToBB);
+
+ private:
+ LVILatticeVal &getCachedEntryForBlock(BasicBlock *BB);
+ };
+} // end anonymous namespace
+
+/// getCachedEntryForBlock - See if we already have a value for this block. If
+/// so, return it, otherwise create a new entry in the NewBlockInfo map to use.
+LVILatticeVal &LVIQuery::getCachedEntryForBlock(BasicBlock *BB) {
+
+ // Do a binary search to see if we already have an entry for this block in
+ // the cache set. If so, find it.
+ if (!Cache.empty()) {
+ ValueCacheEntryTy::iterator Entry =
+ std::lower_bound(Cache.begin(), Cache.end(),
+ BlockCacheEntryTy(BB, LVILatticeVal()),
+ BlockCacheEntryComparator());
+ if (Entry != Cache.end() && Entry->first == BB)
+ return Entry->second;
+ }
+
+ // Otherwise, check to see if it's in NewBlockInfo or create a new entry if
+ // not.
+ return NewBlockInfo[BB];
+}
+
+LVILatticeVal LVIQuery::getBlockValue(BasicBlock *BB) {
+ // See if we already have a value for this block.
+ LVILatticeVal &BBLV = getCachedEntryForBlock(BB);
+
+ // If we've already computed this block's value, return it.
+ if (!BBLV.isUndefined()) {
+ DEBUG(errs() << " reuse BB '" << BB->getName() << "' val=" << BBLV <<'\n');
+ return BBLV;
+ }
+
+ // Otherwise, this is the first time we're seeing this block. Reset the
+ // lattice value to overdefined, so that cycles will terminate and be
+ // conservatively correct.
+ BBLV.markOverdefined();
+
+ // If V is live into BB, see if our predecessors know anything about it.
+ Instruction *BBI = dyn_cast<Instruction>(Val);
+ if (BBI == 0 || BBI->getParent() != BB) {
+ LVILatticeVal Result; // Start Undefined.
+ unsigned NumPreds = 0;
+
+ // Loop over all of our predecessors, merging what we know from them into
+ // result.
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ Result.mergeIn(getEdgeValue(*PI, BB));
+
+ // If we hit overdefined, exit early. The BlockVals entry is already set
+ // to overdefined.
+ if (Result.isOverdefined()) {
+ DEBUG(errs() << " compute BB '" << BB->getName()
+ << "' - overdefined because of pred.\n");
+ return Result;
+ }
+ ++NumPreds;
+ }
+
+ // If this is the entry block, we must be asking about an argument. The
+ // value is overdefined.
+ if (NumPreds == 0 && BB == &BB->getParent()->front()) {
+ assert(isa<Argument>(Val) && "Unknown live-in to the entry block");
+ Result.markOverdefined();
+ return Result;
+ }
+
+ // Return the merged value, which is more precise than 'overdefined'.
+ assert(!Result.isOverdefined());
+ return getCachedEntryForBlock(BB) = Result;
+ }
+
+ // If this value is defined by an instruction in this block, we have to
+ // process it here somehow or return overdefined.
+ if (PHINode *PN = dyn_cast<PHINode>(BBI)) {
+ (void)PN;
+ // TODO: PHI Translation in preds.
+ } else {
+
+ }
+
+ DEBUG(errs() << " compute BB '" << BB->getName()
+ << "' - overdefined because inst def found.\n");
+
+ LVILatticeVal Result;
+ Result.markOverdefined();
+ return getCachedEntryForBlock(BB) = Result;
+}
+
+
+/// getEdgeValue - This method attempts to infer more complex
+LVILatticeVal LVIQuery::getEdgeValue(BasicBlock *BBFrom, BasicBlock *BBTo) {
+ // TODO: Handle more complex conditionals. If (v == 0 || v2 < 1) is false, we
+ // know that v != 0.
+ if (BranchInst *BI = dyn_cast<BranchInst>(BBFrom->getTerminator())) {
+ // If this is a conditional branch and only one successor goes to BBTo, then
+ // we maybe able to infer something from the condition.
+ if (BI->isConditional() &&
+ BI->getSuccessor(0) != BI->getSuccessor(1)) {
+ bool isTrueDest = BI->getSuccessor(0) == BBTo;
+ assert(BI->getSuccessor(!isTrueDest) == BBTo &&
+ "BBTo isn't a successor of BBFrom");
+
+ // If V is the condition of the branch itself, then we know exactly what
+ // it is.
+ if (BI->getCondition() == Val)
+ return LVILatticeVal::get(ConstantInt::get(
+ Type::getInt1Ty(Val->getContext()), isTrueDest));
+
+ // If the condition of the branch is an equality comparison, we may be
+ // able to infer the value.
+ if (ICmpInst *ICI = dyn_cast<ICmpInst>(BI->getCondition()))
+ if (ICI->isEquality() && ICI->getOperand(0) == Val &&
+ isa<Constant>(ICI->getOperand(1))) {
+ // We know that V has the RHS constant if this is a true SETEQ or
+ // false SETNE.
+ if (isTrueDest == (ICI->getPredicate() == ICmpInst::ICMP_EQ))
+ return LVILatticeVal::get(cast<Constant>(ICI->getOperand(1)));
+ return LVILatticeVal::getNot(cast<Constant>(ICI->getOperand(1)));
+ }
+ }
+ }
+
+ // If the edge was formed by a switch on the value, then we may know exactly
+ // what it is.
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(BBFrom->getTerminator())) {
+ // If BBTo is the default destination of the switch, we don't know anything.
+ // Given a more powerful range analysis we could know stuff.
+ if (SI->getCondition() == Val && SI->getDefaultDest() != BBTo) {
+ // We only know something if there is exactly one value that goes from
+ // BBFrom to BBTo.
+ unsigned NumEdges = 0;
+ ConstantInt *EdgeVal = 0;
+ for (unsigned i = 1, e = SI->getNumSuccessors(); i != e; ++i) {
+ if (SI->getSuccessor(i) != BBTo) continue;
+ if (NumEdges++) break;
+ EdgeVal = SI->getCaseValue(i);
+ }
+ assert(EdgeVal && "Missing successor?");
+ if (NumEdges == 1)
+ return LVILatticeVal::get(EdgeVal);
+ }
+ }
+
+ // Otherwise see if the value is known in the block.
+ return getBlockValue(BBFrom);
+}
+
+
+//===----------------------------------------------------------------------===//
+// LazyValueInfoCache Impl
+//===----------------------------------------------------------------------===//
+
+LVILatticeVal LazyValueInfoCache::getValueInBlock(Value *V, BasicBlock *BB) {
+ // If already a constant, there is nothing to compute.
+ if (Constant *VC = dyn_cast<Constant>(V))
+ return LVILatticeVal::get(VC);
+
+ DEBUG(errs() << "LVI Getting block end value " << *V << " at '"
+ << BB->getName() << "'\n");
+
+ LVILatticeVal Result = LVIQuery(V, ValueCache[V]).getBlockValue(BB);
+
+ DEBUG(errs() << " Result = " << Result << "\n");
+ return Result;
+}
+
+LVILatticeVal LazyValueInfoCache::
+getValueOnEdge(Value *V, BasicBlock *FromBB, BasicBlock *ToBB) {
+ // If already a constant, there is nothing to compute.
+ if (Constant *VC = dyn_cast<Constant>(V))
+ return LVILatticeVal::get(VC);
+
+ DEBUG(errs() << "LVI Getting edge value " << *V << " from '"
+ << FromBB->getName() << "' to '" << ToBB->getName() << "'\n");
+ LVILatticeVal Result =
+ LVIQuery(V, ValueCache[V]).getEdgeValue(FromBB, ToBB);
+
+ DEBUG(errs() << " Result = " << Result << "\n");
+
+ return Result;
+}
+
+//===----------------------------------------------------------------------===//
+// LazyValueInfo Impl
+//===----------------------------------------------------------------------===//
+
+bool LazyValueInfo::runOnFunction(Function &F) {
+ TD = getAnalysisIfAvailable<TargetData>();
+ // Fully lazy.
+ return false;
+}
+
+/// getCache - This lazily constructs the LazyValueInfoCache.
+static LazyValueInfoCache &getCache(void *&PImpl) {
+ if (!PImpl)
+ PImpl = new LazyValueInfoCache();
+ return *static_cast<LazyValueInfoCache*>(PImpl);
+}
+
+void LazyValueInfo::releaseMemory() {
+ // If the cache was allocated, free it.
+ if (PImpl) {
+ delete &getCache(PImpl);
+ PImpl = 0;
+ }
+}
+
+Constant *LazyValueInfo::getConstant(Value *V, BasicBlock *BB) {
+ LVILatticeVal Result = getCache(PImpl).getValueInBlock(V, BB);
+
+ if (Result.isConstant())
+ return Result.getConstant();
+ return 0;
+}
+
+/// getConstantOnEdge - Determine whether the specified value is known to be a
+/// constant on the specified edge. Return null if not.
+Constant *LazyValueInfo::getConstantOnEdge(Value *V, BasicBlock *FromBB,
+ BasicBlock *ToBB) {
+ LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+
+ if (Result.isConstant())
+ return Result.getConstant();
+ return 0;
+}
+
+/// getPredicateOnEdge - Determine whether the specified value comparison
+/// with a constant is known to be true or false on the specified CFG edge.
+/// Pred is a CmpInst predicate.
+LazyValueInfo::Tristate
+LazyValueInfo::getPredicateOnEdge(unsigned Pred, Value *V, Constant *C,
+ BasicBlock *FromBB, BasicBlock *ToBB) {
+ LVILatticeVal Result = getCache(PImpl).getValueOnEdge(V, FromBB, ToBB);
+
+ // If we know the value is a constant, evaluate the conditional.
+ Constant *Res = 0;
+ if (Result.isConstant()) {
+ Res = ConstantFoldCompareInstOperands(Pred, Result.getConstant(), C, TD);
+ if (ConstantInt *ResCI = dyn_cast_or_null<ConstantInt>(Res))
+ return ResCI->isZero() ? False : True;
+ return Unknown;
+ }
+
+ if (Result.isNotConstant()) {
+ // If this is an equality comparison, we can try to fold it knowing that
+ // "V != C1".
+ if (Pred == ICmpInst::ICMP_EQ) {
+ // !C1 == C -> false iff C1 == C.
+ Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+ Result.getNotConstant(), C, TD);
+ if (Res->isNullValue())
+ return False;
+ } else if (Pred == ICmpInst::ICMP_NE) {
+ // !C1 != C -> true iff C1 == C.
+ Res = ConstantFoldCompareInstOperands(ICmpInst::ICMP_NE,
+ Result.getNotConstant(), C, TD);
+ if (Res->isNullValue())
+ return True;
+ }
+ return Unknown;
+ }
+
+ return Unknown;
+}
+
+
diff --git a/lib/Analysis/LiveValues.cpp b/lib/Analysis/LiveValues.cpp
index 2bbe98a..02ec7d3 100644
--- a/lib/Analysis/LiveValues.cpp
+++ b/lib/Analysis/LiveValues.cpp
@@ -17,7 +17,9 @@
#include "llvm/Analysis/LoopInfo.h"
using namespace llvm;
-FunctionPass *llvm::createLiveValuesPass() { return new LiveValues(); }
+namespace llvm {
+ FunctionPass *createLiveValuesPass() { return new LiveValues(); }
+}
char LiveValues::ID = 0;
static RegisterPass<LiveValues>
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index e9256b7..1c614b0 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -263,14 +263,13 @@ bool Loop::isLCSSAForm() const {
SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
for (block_iterator BI = block_begin(), E = block_end(); BI != E; ++BI) {
- BasicBlock *BB = *BI;
- for (BasicBlock ::iterator I = BB->begin(), E = BB->end(); I != E;++I)
+ BasicBlock *BB = *BI;
+ for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E;++I)
for (Value::use_iterator UI = I->use_begin(), E = I->use_end(); UI != E;
++UI) {
BasicBlock *UserBB = cast<Instruction>(*UI)->getParent();
- if (PHINode *P = dyn_cast<PHINode>(*UI)) {
+ if (PHINode *P = dyn_cast<PHINode>(*UI))
UserBB = P->getIncomingBlock(UI);
- }
// Check the current block, as a fast-path. Most values are used in
// the same block they are defined in.
@@ -286,12 +285,14 @@ bool Loop::isLCSSAForm() const {
/// the LoopSimplify form transforms loops to, which is sometimes called
/// normal form.
bool Loop::isLoopSimplifyForm() const {
- // Normal-form loops have a preheader.
- if (!getLoopPreheader())
- return false;
- // Normal-form loops have a single backedge.
- if (!getLoopLatch())
- return false;
+ // Normal-form loops have a preheader, a single backedge, and all of their
+ // exits have all their predecessors inside the loop.
+ return getLoopPreheader() && getLoopLatch() && hasDedicatedExits();
+}
+
+/// hasDedicatedExits - Return true if no exit block for the loop
+/// has a predecessor that is outside the loop.
+bool Loop::hasDedicatedExits() const {
// Sort the blocks vector so that we can use binary search to do quick
// lookups.
SmallPtrSet<BasicBlock *, 16> LoopBBs(block_begin(), block_end());
diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp
index f4eb793..b448628 100644
--- a/lib/Analysis/MemoryBuiltins.cpp
+++ b/lib/Analysis/MemoryBuiltins.cpp
@@ -16,7 +16,7 @@
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/Module.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
using namespace llvm;
@@ -87,13 +87,8 @@ const CallInst *llvm::extractMallocCallFromBitCast(const Value *I) {
: NULL;
}
-/// isConstantOne - Return true only if val is constant int 1.
-static bool isConstantOne(Value *val) {
- return isa<ConstantInt>(val) && cast<ConstantInt>(val)->isOne();
-}
-
-static Value *isArrayMallocHelper(const CallInst *CI, LLVMContext &Context,
- const TargetData *TD) {
+static Value *computeArraySize(const CallInst *CI, const TargetData *TD,
+ bool LookThroughSExt = false) {
if (!CI)
return NULL;
@@ -102,99 +97,27 @@ static Value *isArrayMallocHelper(const CallInst *CI, LLVMContext &Context,
if (!T || !T->isSized() || !TD)
return NULL;
- Value *MallocArg = CI->getOperand(1);
- const Type *ArgType = MallocArg->getType();
- ConstantExpr *CO = dyn_cast<ConstantExpr>(MallocArg);
- BinaryOperator *BO = dyn_cast<BinaryOperator>(MallocArg);
-
- unsigned ElementSizeInt = TD->getTypeAllocSize(T);
+ unsigned ElementSize = TD->getTypeAllocSize(T);
if (const StructType *ST = dyn_cast<StructType>(T))
- ElementSizeInt = TD->getStructLayout(ST)->getSizeInBytes();
- Constant *ElementSize = ConstantInt::get(ArgType, ElementSizeInt);
-
- // First, check if CI is a non-array malloc.
- if (CO && CO == ElementSize)
- // Match CreateMalloc's use of constant 1 array-size for non-array mallocs.
- return ConstantInt::get(ArgType, 1);
-
- // Second, check if CI is an array malloc whose array size can be determined.
- if (isConstantOne(ElementSize))
- return MallocArg;
-
- if (ConstantInt *CInt = dyn_cast<ConstantInt>(MallocArg))
- if (CInt->getZExtValue() % ElementSizeInt == 0)
- return ConstantInt::get(ArgType, CInt->getZExtValue() / ElementSizeInt);
+ ElementSize = TD->getStructLayout(ST)->getSizeInBytes();
- if (!CO && !BO)
- return NULL;
-
- Value *Op0 = NULL;
- Value *Op1 = NULL;
- unsigned Opcode = 0;
- if (CO && ((CO->getOpcode() == Instruction::Mul) ||
- (CO->getOpcode() == Instruction::Shl))) {
- Op0 = CO->getOperand(0);
- Op1 = CO->getOperand(1);
- Opcode = CO->getOpcode();
- }
- if (BO && ((BO->getOpcode() == Instruction::Mul) ||
- (BO->getOpcode() == Instruction::Shl))) {
- Op0 = BO->getOperand(0);
- Op1 = BO->getOperand(1);
- Opcode = BO->getOpcode();
- }
-
- // Determine array size if malloc's argument is the product of a mul or shl.
- if (Op0) {
- if (Opcode == Instruction::Mul) {
- if (Op1 == ElementSize)
- // ArraySize * ElementSize
- return Op0;
- if (Op0 == ElementSize)
- // ElementSize * ArraySize
- return Op1;
- }
- if (Opcode == Instruction::Shl) {
- ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
- if (!Op1CI) return NULL;
-
- APInt Op1Int = Op1CI->getValue();
- uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
- Value *Op1Pow = ConstantInt::get(Context,
- APInt(Op1Int.getBitWidth(), 0).set(BitToSet));
- if (Op0 == ElementSize)
- // ArraySize << log2(ElementSize)
- return Op1Pow;
- if (Op1Pow == ElementSize)
- // ElementSize << log2(ArraySize)
- return Op0;
- }
- }
+ // If malloc calls' arg can be determined to be a multiple of ElementSize,
+ // return the multiple. Otherwise, return NULL.
+ Value *MallocArg = CI->getOperand(1);
+ Value *Multiple = NULL;
+ if (ComputeMultiple(MallocArg, ElementSize, Multiple,
+ LookThroughSExt))
+ return Multiple;
- // We could not determine the malloc array size from MallocArg.
return NULL;
}
/// isArrayMalloc - Returns the corresponding CallInst if the instruction
/// is a call to malloc whose array size can be determined and the array size
/// is not constant 1. Otherwise, return NULL.
-CallInst *llvm::isArrayMalloc(Value *I, LLVMContext &Context,
- const TargetData *TD) {
- CallInst *CI = extractMallocCall(I);
- Value *ArraySize = isArrayMallocHelper(CI, Context, TD);
-
- if (ArraySize &&
- ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1))
- return CI;
-
- // CI is a non-array malloc or we can't figure out that it is an array malloc.
- return NULL;
-}
-
-const CallInst *llvm::isArrayMalloc(const Value *I, LLVMContext &Context,
- const TargetData *TD) {
+const CallInst *llvm::isArrayMalloc(const Value *I, const TargetData *TD) {
const CallInst *CI = extractMallocCall(I);
- Value *ArraySize = isArrayMallocHelper(CI, Context, TD);
+ Value *ArraySize = computeArraySize(CI, TD);
if (ArraySize &&
ArraySize != ConstantInt::get(CI->getOperand(1)->getType(), 1))
@@ -210,7 +133,7 @@ const CallInst *llvm::isArrayMalloc(const Value *I, LLVMContext &Context,
/// 1: PointerType is the bitcast's result type.
/// >1: Unique PointerType cannot be determined, return NULL.
const PointerType *llvm::getMallocType(const CallInst *CI) {
- assert(isMalloc(CI) && "GetMallocType and not malloc call");
+ assert(isMalloc(CI) && "getMallocType and not malloc call");
const PointerType *MallocType = NULL;
unsigned NumOfBitCastUses = 0;
@@ -250,9 +173,10 @@ const Type *llvm::getMallocAllocatedType(const CallInst *CI) {
/// then return that multiple. For non-array mallocs, the multiple is
/// constant 1. Otherwise, return NULL for mallocs whose array size cannot be
/// determined.
-Value *llvm::getMallocArraySize(CallInst *CI, LLVMContext &Context,
- const TargetData *TD) {
- return isArrayMallocHelper(CI, Context, TD);
+Value *llvm::getMallocArraySize(CallInst *CI, const TargetData *TD,
+ bool LookThroughSExt) {
+ assert(isMalloc(CI) && "getMallocArraySize and not malloc call");
+ return computeArraySize(CI, TD, LookThroughSExt);
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
index 2251b62..8da07e7 100644
--- a/lib/Analysis/PointerTracking.cpp
+++ b/lib/Analysis/PointerTracking.cpp
@@ -10,6 +10,7 @@
// This file implements tracking of pointer bounds.
//
//===----------------------------------------------------------------------===//
+
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -101,7 +102,7 @@ const SCEV *PointerTracking::computeAllocationCount(Value *P,
}
if (CallInst *CI = extractMallocCall(V)) {
- Value *arraySize = getMallocArraySize(CI, P->getContext(), TD);
+ Value *arraySize = getMallocArraySize(CI, TD);
const Type* AllocTy = getMallocAllocatedType(CI);
if (!AllocTy || !arraySize) return SE->getCouldNotCompute();
Ty = AllocTy;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 3e87ca2..ea4af40 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -3811,29 +3811,26 @@ static PHINode *getConstantEvolvingPHI(Value *V, const Loop *L) {
/// getConstantEvolvingPHI predicate, evaluate its value assuming the PHI node
/// in the loop has the value PHIVal. If we can't fold this expression for some
/// reason, return null.
-static Constant *EvaluateExpression(Value *V, Constant *PHIVal) {
+static Constant *EvaluateExpression(Value *V, Constant *PHIVal,
+ const TargetData *TD) {
if (isa<PHINode>(V)) return PHIVal;
if (Constant *C = dyn_cast<Constant>(V)) return C;
if (GlobalValue *GV = dyn_cast<GlobalValue>(V)) return GV;
Instruction *I = cast<Instruction>(V);
- LLVMContext &Context = I->getParent()->getContext();
std::vector<Constant*> Operands;
Operands.resize(I->getNumOperands());
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
- Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal);
+ Operands[i] = EvaluateExpression(I->getOperand(i), PHIVal, TD);
if (Operands[i] == 0) return 0;
}
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
- return ConstantFoldCompareInstOperands(CI->getPredicate(),
- &Operands[0], Operands.size(),
- Context);
- else
- return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- &Operands[0], Operands.size(),
- Context);
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Operands[0],
+ Operands[1], TD);
+ return ConstantFoldInstOperands(I->getOpcode(), I->getType(),
+ &Operands[0], Operands.size(), TD);
}
/// getConstantEvolutionLoopExitValue - If we know that the specified Phi is
@@ -3879,7 +3876,7 @@ ScalarEvolution::getConstantEvolutionLoopExitValue(PHINode *PN,
return RetVal = PHIVal; // Got exit value!
// Compute the value of the PHI node for the next iteration.
- Constant *NextPHI = EvaluateExpression(BEValue, PHIVal);
+ Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
if (NextPHI == PHIVal)
return RetVal = NextPHI; // Stopped evolving!
if (NextPHI == 0)
@@ -3920,7 +3917,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
for (Constant *PHIVal = StartCST;
IterationNum != MaxIterations; ++IterationNum) {
ConstantInt *CondVal =
- dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal));
+ dyn_cast_or_null<ConstantInt>(EvaluateExpression(Cond, PHIVal, TD));
// Couldn't symbolically evaluate.
if (!CondVal) return getCouldNotCompute();
@@ -3931,7 +3928,7 @@ ScalarEvolution::ComputeBackedgeTakenCountExhaustively(const Loop *L,
}
// Compute the value of the PHI node for the next iteration.
- Constant *NextPHI = EvaluateExpression(BEValue, PHIVal);
+ Constant *NextPHI = EvaluateExpression(BEValue, PHIVal, TD);
if (NextPHI == 0 || NextPHI == PHIVal)
return getCouldNotCompute();// Couldn't evaluate or not making progress...
PHIVal = NextPHI;
@@ -4040,12 +4037,10 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
Constant *C;
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(),
- &Operands[0], Operands.size(),
- getContext());
+ Operands[0], Operands[1], TD);
else
C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
- &Operands[0], Operands.size(),
- getContext());
+ &Operands[0], Operands.size(), TD);
return getSCEV(C);
}
}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 5672510..b0e6900 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -789,6 +789,118 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
return std::max(FirstAnswer, std::min(TyBits, Mask.countLeadingZeros()));
}
+/// ComputeMultiple - This function computes the integer multiple of Base that
+/// equals V. If successful, it returns true and returns the multiple in
+/// Multiple. If unsuccessful, it returns false. It looks
+/// through SExt instructions only if LookThroughSExt is true.
+bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple,
+ bool LookThroughSExt, unsigned Depth) {
+ const unsigned MaxDepth = 6;
+
+ assert(V && "No Value?");
+ assert(Depth <= MaxDepth && "Limit Search Depth");
+ assert(V->getType()->isInteger() && "Not integer or pointer type!");
+
+ const Type *T = V->getType();
+
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
+
+ if (Base == 0)
+ return false;
+
+ if (Base == 1) {
+ Multiple = V;
+ return true;
+ }
+
+ ConstantExpr *CO = dyn_cast<ConstantExpr>(V);
+ Constant *BaseVal = ConstantInt::get(T, Base);
+ if (CO && CO == BaseVal) {
+ // Multiple is 1.
+ Multiple = ConstantInt::get(T, 1);
+ return true;
+ }
+
+ if (CI && CI->getZExtValue() % Base == 0) {
+ Multiple = ConstantInt::get(T, CI->getZExtValue() / Base);
+ return true;
+ }
+
+ if (Depth == MaxDepth) return false; // Limit search depth.
+
+ Operator *I = dyn_cast<Operator>(V);
+ if (!I) return false;
+
+ switch (I->getOpcode()) {
+ default: break;
+ case Instruction::SExt: {
+ if (!LookThroughSExt) return false;
+ // otherwise fall through to ZExt
+ }
+ case Instruction::ZExt: {
+ return ComputeMultiple(I->getOperand(0), Base, Multiple,
+ LookThroughSExt, Depth+1);
+ }
+ case Instruction::Shl:
+ case Instruction::Mul: {
+ Value *Op0 = I->getOperand(0);
+ Value *Op1 = I->getOperand(1);
+
+ if (I->getOpcode() == Instruction::Shl) {
+ ConstantInt *Op1CI = dyn_cast<ConstantInt>(Op1);
+ if (!Op1CI) return false;
+ // Turn Op0 << Op1 into Op0 * 2^Op1
+ APInt Op1Int = Op1CI->getValue();
+ uint64_t BitToSet = Op1Int.getLimitedValue(Op1Int.getBitWidth() - 1);
+ Op1 = ConstantInt::get(V->getContext(),
+ APInt(Op1Int.getBitWidth(), 0).set(BitToSet));
+ }
+
+ Value *Mul0 = NULL;
+ Value *Mul1 = NULL;
+ bool M0 = ComputeMultiple(Op0, Base, Mul0,
+ LookThroughSExt, Depth+1);
+ bool M1 = ComputeMultiple(Op1, Base, Mul1,
+ LookThroughSExt, Depth+1);
+
+ if (M0) {
+ if (isa<Constant>(Op1) && isa<Constant>(Mul0)) {
+ // V == Base * (Mul0 * Op1), so return (Mul0 * Op1)
+ Multiple = ConstantExpr::getMul(cast<Constant>(Mul0),
+ cast<Constant>(Op1));
+ return true;
+ }
+
+ if (ConstantInt *Mul0CI = dyn_cast<ConstantInt>(Mul0))
+ if (Mul0CI->getValue() == 1) {
+ // V == Base * Op1, so return Op1
+ Multiple = Op1;
+ return true;
+ }
+ }
+
+ if (M1) {
+ if (isa<Constant>(Op0) && isa<Constant>(Mul1)) {
+ // V == Base * (Mul1 * Op0), so return (Mul1 * Op0)
+ Multiple = ConstantExpr::getMul(cast<Constant>(Mul1),
+ cast<Constant>(Op0));
+ return true;
+ }
+
+ if (ConstantInt *Mul1CI = dyn_cast<ConstantInt>(Mul1))
+ if (Mul1CI->getValue() == 1) {
+ // V == Base * Op0, so return Op0
+ Multiple = Op0;
+ return true;
+ }
+ }
+ }
+ }
+
+ // We could not determine if V is a multiple of Base.
+ return false;
+}
+
/// CannotBeNegativeZero - Return true if we can prove that the specified FP
/// value is never equal to -0.0.
///
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 63af42d..26b6a09 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -2039,7 +2039,7 @@ bool LLParser::ParseValID(ValID &ID) {
ParseToken(lltok::StringConstant, "expected constraint string"))
return true;
ID.StrVal2 = Lex.getStrVal();
- ID.UIntVal = HasSideEffect | ((unsigned)AlignStack<<1);
+ ID.UIntVal = unsigned(HasSideEffect) | (unsigned(AlignStack)<<1);
ID.Kind = ValID::t_InlineAsm;
return false;
}
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 86d051c..c37c793 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -54,10 +54,13 @@ unsigned AggressiveAntiDepState::GetGroup(unsigned Reg)
return Node;
}
-void AggressiveAntiDepState::GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs)
+void AggressiveAntiDepState::GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
{
for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) {
- if (GetGroup(Reg) == Group)
+ if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
Regs.push_back(Reg);
}
}
@@ -99,12 +102,28 @@ bool AggressiveAntiDepState::IsLive(unsigned Reg)
AggressiveAntiDepBreaker::
-AggressiveAntiDepBreaker(MachineFunction& MFi) :
+AggressiveAntiDepBreaker(MachineFunction& MFi,
+ TargetSubtarget::RegClassVector& CriticalPathRCs) :
AntiDepBreaker(), MF(MFi),
MRI(MF.getRegInfo()),
TRI(MF.getTarget().getRegisterInfo()),
AllocatableSet(TRI->getAllocatableSet(MF)),
State(NULL), SavedState(NULL) {
+ /* Collect a bitset of all registers that are only broken if they
+ are on the critical path. */
+ for (unsigned i = 0, e = CriticalPathRCs.size(); i < e; ++i) {
+ BitVector CPSet = TRI->getAllocatableSet(MF, CriticalPathRCs[i]);
+ if (CriticalPathSet.none())
+ CriticalPathSet = CPSet;
+ else
+ CriticalPathSet |= CPSet;
+ }
+
+ DEBUG(errs() << "AntiDep Critical-Path Registers:");
+ DEBUG(for (int r = CriticalPathSet.find_first(); r != -1;
+ r = CriticalPathSet.find_next(r))
+ errs() << " " << TRI->getName(r));
+ DEBUG(errs() << '\n');
}
AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
@@ -264,16 +283,18 @@ void AggressiveAntiDepBreaker::GetPassthruRegs(MachineInstr *MI,
}
}
-/// AntiDepPathStep - Return SUnit that SU has an anti-dependence on.
-static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs,
- std::vector<SDep*>& Edges) {
+/// AntiDepEdges - Return in Edges the anti- and output-
+/// dependencies on Regs in SU that we want to consider for breaking.
+static void AntiDepEdges(SUnit *SU,
+ const AntiDepBreaker::AntiDepRegVector& Regs,
+ std::vector<SDep*>& Edges) {
AntiDepBreaker::AntiDepRegSet RegSet;
for (unsigned i = 0, e = Regs.size(); i < e; ++i)
RegSet.insert(Regs[i]);
for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
P != PE; ++P) {
- if (P->getKind() == SDep::Anti) {
+ if ((P->getKind() == SDep::Anti) || (P->getKind() == SDep::Output)) {
unsigned Reg = P->getReg();
if (RegSet.count(Reg) != 0) {
Edges.push_back(&*P);
@@ -285,6 +306,31 @@ static void AntiDepPathStep(SUnit *SU, AntiDepBreaker::AntiDepRegVector& Regs,
assert(RegSet.empty() && "Expected all antidep registers to be found");
}
+/// CriticalPathStep - Return the next SUnit after SU on the bottom-up
+/// critical path.
+static SUnit *CriticalPathStep(SUnit *SU) {
+ SDep *Next = 0;
+ unsigned NextDepth = 0;
+ // Find the predecessor edge with the greatest depth.
+ if (SU != 0) {
+ for (SUnit::pred_iterator P = SU->Preds.begin(), PE = SU->Preds.end();
+ P != PE; ++P) {
+ SUnit *PredSU = P->getSUnit();
+ unsigned PredLatency = P->getLatency();
+ unsigned PredTotalLatency = PredSU->getDepth() + PredLatency;
+ // In the case of a latency tie, prefer an anti-dependency edge over
+ // other types of edges.
+ if (NextDepth < PredTotalLatency ||
+ (NextDepth == PredTotalLatency && P->getKind() == SDep::Anti)) {
+ NextDepth = PredTotalLatency;
+ Next = &*P;
+ }
+ }
+ }
+
+ return (Next) ? Next->getSUnit() : 0;
+}
+
void AggressiveAntiDepBreaker::HandleLastUse(unsigned Reg, unsigned KillIdx,
const char *tag) {
unsigned *KillIndices = State->GetKillIndices();
@@ -499,11 +545,11 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
- // Collect all registers in the same group as AntiDepReg. These all
- // need to be renamed together if we are to break the
- // anti-dependence.
+ // Collect all referenced registers in the same group as
+ // AntiDepReg. These all need to be renamed together if we are to
+ // break the anti-dependence.
std::vector<unsigned> Regs;
- State->GetGroupRegs(AntiDepGroupIndex, Regs);
+ State->GetGroupRegs(AntiDepGroupIndex, Regs, &RegRefs);
assert(Regs.size() > 0 && "Empty register group!");
if (Regs.size() == 0)
return false;
@@ -544,9 +590,10 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters(
}
// FIXME: for now just handle single register in group case...
- // FIXME: check only regs that have references...
- if (Regs.size() > 1)
+ if (Regs.size() > 1) {
+ DEBUG(errs() << "\tMultiple rename registers in group\n");
return false;
+ }
// Check each possible rename register for SuperReg in round-robin
// order. If that register is available, and the corresponding
@@ -630,12 +677,6 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference>&
RegRefs = State->GetRegRefs();
- // Nothing to do if no candidates.
- if (Candidates.empty()) {
- DEBUG(errs() << "\n===== No anti-dependency candidates\n");
- return 0;
- }
-
// The code below assumes that there is at least one instruction,
// so just duck out immediately if the block is empty.
if (SUnits.empty()) return 0;
@@ -655,16 +696,37 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// ...need a map from MI to SUnit.
std::map<MachineInstr *, SUnit *> MISUnitMap;
-
- DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() <<
- " anti-dependencies\n");
for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
SUnit *SU = &SUnits[i];
MISUnitMap.insert(std::pair<MachineInstr *, SUnit *>(SU->getInstr(), SU));
}
+ // Track progress along the critical path through the SUnit graph as
+ // we walk the instructions. This is needed for regclasses that only
+ // break critical-path anti-dependencies.
+ SUnit *CriticalPathSU = 0;
+ MachineInstr *CriticalPathMI = 0;
+ if (CriticalPathSet.any()) {
+ for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
+ SUnit *SU = &SUnits[i];
+ if (!CriticalPathSU ||
+ ((SU->getDepth() + SU->Latency) >
+ (CriticalPathSU->getDepth() + CriticalPathSU->Latency))) {
+ CriticalPathSU = SU;
+ }
+ }
+
+ CriticalPathMI = CriticalPathSU->getInstr();
+ }
+
+ // Even if there are no anti-dependencies we still need to go
+ // through the instructions to update Def, Kills, etc.
#ifndef NDEBUG
- {
+ if (Candidates.empty()) {
+ DEBUG(errs() << "\n===== No anti-dependency candidates\n");
+ } else {
+ DEBUG(errs() << "\n===== Attempting to break " << Candidates.size() <<
+ " anti-dependencies\n");
DEBUG(errs() << "Available regs:");
for (unsigned Reg = 0; Reg < TRI->getNumRegs(); ++Reg) {
if (!State->IsLive(Reg))
@@ -691,14 +753,26 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Process the defs in MI...
PrescanInstruction(MI, Count, PassthruRegs);
-
+
+ // The the dependence edges that represent anti- and output-
+ // dependencies that are candidates for breaking.
std::vector<SDep*> Edges;
SUnit *PathSU = MISUnitMap[MI];
AntiDepBreaker::CandidateMap::iterator
citer = Candidates.find(PathSU);
if (citer != Candidates.end())
- AntiDepPathStep(PathSU, citer->second, Edges);
-
+ AntiDepEdges(PathSU, citer->second, Edges);
+
+ // If MI is not on the critical path, then we don't rename
+ // registers in the CriticalPathSet.
+ BitVector *ExcludeRegs = NULL;
+ if (MI == CriticalPathMI) {
+ CriticalPathSU = CriticalPathStep(CriticalPathSU);
+ CriticalPathMI = (CriticalPathSU) ? CriticalPathSU->getInstr() : 0;
+ } else {
+ ExcludeRegs = &CriticalPathSet;
+ }
+
// Ignore KILL instructions (they form a group in ScanInstruction
// but don't cause any anti-dependence breaking themselves)
if (MI->getOpcode() != TargetInstrInfo::KILL) {
@@ -707,7 +781,8 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
SDep *Edge = Edges[i];
SUnit *NextSU = Edge->getSUnit();
- if (Edge->getKind() != SDep::Anti) continue;
+ if ((Edge->getKind() != SDep::Anti) &&
+ (Edge->getKind() != SDep::Output)) continue;
unsigned AntiDepReg = Edge->getReg();
DEBUG(errs() << "\tAntidep reg: " << TRI->getName(AntiDepReg));
@@ -717,6 +792,11 @@ unsigned AggressiveAntiDepBreaker::BreakAntiDependencies(
// Don't break anti-dependencies on non-allocatable registers.
DEBUG(errs() << " (non-allocatable)\n");
continue;
+ } else if ((ExcludeRegs != NULL) && ExcludeRegs->test(AntiDepReg)) {
+ // Don't break anti-dependencies for critical path registers
+ // if not on the critical path
+ DEBUG(errs() << " (not critical-path)\n");
+ continue;
} else if (PassthruRegs.count(AntiDepReg) != 0) {
// If the anti-dep register liveness "passes-thru", then
// don't try to change it. It will be changed along with
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index c512168..e5c9a7b 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -23,6 +23,7 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/ScheduleDAG.h"
+#include "llvm/Target/TargetSubtarget.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/BitVector.h"
#include "llvm/ADT/SmallSet.h"
@@ -85,8 +86,11 @@ namespace llvm {
unsigned GetGroup(unsigned Reg);
// GetGroupRegs - Return a vector of the registers belonging to a
- // group.
- void GetGroupRegs(unsigned Group, std::vector<unsigned> &Regs);
+ // group. If RegRefs is non-NULL then only included referenced registers.
+ void GetGroupRegs(
+ unsigned Group,
+ std::vector<unsigned> &Regs,
+ std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs);
// UnionGroups - Union Reg1's and Reg2's groups to form a new
// group. Return the index of the GroupNode representing the
@@ -114,6 +118,10 @@ namespace llvm {
/// because they may not be safe to break.
const BitVector AllocatableSet;
+ /// CriticalPathSet - The set of registers that should only be
+ /// renamed if they are on the critical path.
+ BitVector CriticalPathSet;
+
/// State - The state used to identify and rename anti-dependence
/// registers.
AggressiveAntiDepState *State;
@@ -124,7 +132,8 @@ namespace llvm {
AggressiveAntiDepState *SavedState;
public:
- AggressiveAntiDepBreaker(MachineFunction& MFi);
+ AggressiveAntiDepBreaker(MachineFunction& MFi,
+ TargetSubtarget::RegClassVector& CriticalPathRCs);
~AggressiveAntiDepBreaker();
/// GetMaxTrials - As anti-dependencies are broken, additional
diff --git a/lib/CodeGen/AntiDepBreaker.h b/lib/CodeGen/AntiDepBreaker.h
index 2775087..b614f68 100644
--- a/lib/CodeGen/AntiDepBreaker.h
+++ b/lib/CodeGen/AntiDepBreaker.h
@@ -23,6 +23,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
+#include <map>
namespace llvm {
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index bb6bd95..08e0eae 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -18,6 +18,7 @@
#include "llvm/Module.h"
#include "llvm/CodeGen/GCMetadataPrinter.h"
#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
@@ -35,6 +36,7 @@
#include "llvm/Support/Mangler.h"
#include "llvm/MC/MCAsmInfo.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetOptions.h"
@@ -512,7 +514,7 @@ void AsmPrinter::EmitXXStructorList(Constant *List) {
//===----------------------------------------------------------------------===//
/// LEB 128 number encoding.
-/// PrintULEB128 - Print a series of hexidecimal values (separated by commas)
+/// PrintULEB128 - Print a series of hexadecimal values (separated by commas)
/// representing an unsigned leb128 value.
void AsmPrinter::PrintULEB128(unsigned Value) const {
char Buffer[20];
@@ -525,7 +527,7 @@ void AsmPrinter::PrintULEB128(unsigned Value) const {
} while (Value);
}
-/// PrintSLEB128 - Print a series of hexidecimal values (separated by commas)
+/// PrintSLEB128 - Print a series of hexadecimal values (separated by commas)
/// representing a signed leb128 value.
void AsmPrinter::PrintSLEB128(int Value) const {
int Sign = Value >> (8 * sizeof(Value) - 1);
@@ -546,7 +548,7 @@ void AsmPrinter::PrintSLEB128(int Value) const {
// Emission and print routines
//
-/// PrintHex - Print a value as a hexidecimal value.
+/// PrintHex - Print a value as a hexadecimal value.
///
void AsmPrinter::PrintHex(int Value) const {
char Buffer[20];
@@ -727,7 +729,7 @@ static void printStringChar(formatted_raw_ostream &O, unsigned char C) {
/// Special characters are emitted properly.
/// \literal (Eg. '\t') \endliteral
void AsmPrinter::EmitString(const std::string &String) const {
- EmitString(String.c_str(), String.size());
+ EmitString(String.data(), String.size());
}
void AsmPrinter::EmitString(const char *String, unsigned Size) const {
@@ -1357,32 +1359,31 @@ void AsmPrinter::PrintSpecial(const MachineInstr *MI, const char *Code) const {
/// instruction's DebugLoc.
void AsmPrinter::processDebugLoc(const MachineInstr *MI,
bool BeforePrintingInsn) {
- if (!MAI || !DW)
+ if (!MAI || !DW || !MAI->doesSupportDebugInformation()
+ || !DW->ShouldEmitDwarfDebug())
return;
DebugLoc DL = MI->getDebugLoc();
- if (MAI->doesSupportDebugInformation() && DW->ShouldEmitDwarfDebug()) {
- if (!DL.isUnknown()) {
- DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
- if (BeforePrintingInsn) {
- if (CurDLT.Scope != 0 && PrevDLT != CurDLT) {
- unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
- CurDLT.Scope);
- printLabel(L);
- O << '\n';
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
- DW->SetDbgScopeBeginLabels(MI, L);
-#endif
- } else {
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
- DW->SetDbgScopeEndLabels(MI, 0);
-#endif
- }
- }
+ if (DL.isUnknown())
+ return;
+ DebugLocTuple CurDLT = MF->getDebugLocTuple(DL);
+ if (CurDLT.Scope == 0)
+ return;
+
+ if (BeforePrintingInsn) {
+ if (CurDLT != PrevDLT) {
+ unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
+ CurDLT.Scope);
+ printLabel(L);
+ DW->BeginScope(MI, L);
PrevDLT = CurDLT;
}
+ } else {
+ // After printing instruction
+ DW->EndScope(MI);
}
}
+
/// printInlineAsm - This method formats and prints the specified machine
/// instruction that is an inline asm.
void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
@@ -1399,6 +1400,8 @@ void AsmPrinter::printInlineAsm(const MachineInstr *MI) const {
// Disassemble the AsmStr, printing out the literal pieces, the operands, etc.
const char *AsmStr = MI->getOperand(NumDefs).getSymbolName();
+ O << '\t';
+
// If this asmstr is empty, just print the #APP/#NOAPP markers.
// These are useful to see where empty asm's wound up.
if (AsmStr[0] == 0) {
@@ -1636,13 +1639,17 @@ MCSymbol *AsmPrinter::GetBlockAddressSymbol(const Function *F,
assert(BB->hasName() &&
"Address of anonymous basic block not supported yet!");
- // FIXME: This isn't guaranteed to produce a unique name even if the
- // block and function have a name.
- std::string Mangled =
- Mang->getMangledName(F, Mang->makeNameProper(BB->getName()).c_str(),
- /*ForcePrivate=*/true);
+ // This code must use the function name itself, and not the function number,
+ // since it must be possible to generate the label name from within other
+ // functions.
+ std::string FuncName = Mang->getMangledName(F);
+
+ SmallString<60> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix() << "BA"
+ << FuncName.size() << '_' << FuncName << '_'
+ << Mang->makeNameProper(BB->getName());
- return OutContext.GetOrCreateSymbol(StringRef(Mangled));
+ return OutContext.GetOrCreateSymbol(Name.str());
}
MCSymbol *AsmPrinter::GetMBBSymbol(unsigned MBBID) const {
@@ -1817,21 +1824,80 @@ GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
/// EmitComments - Pretty-print comments for instructions
void AsmPrinter::EmitComments(const MachineInstr &MI) const {
- assert(VerboseAsm && !MI.getDebugLoc().isUnknown());
-
- DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc());
+ if (!VerboseAsm)
+ return;
- // Print source line info.
- O.PadToColumn(MAI->getCommentColumn());
- O << MAI->getCommentString() << " SrcLine ";
- if (DLT.Scope) {
- DICompileUnit CU(DLT.Scope);
- if (!CU.isNull())
- O << CU.getFilename() << " ";
+ bool Newline = false;
+
+ if (!MI.getDebugLoc().isUnknown()) {
+ DebugLocTuple DLT = MF->getDebugLocTuple(MI.getDebugLoc());
+
+ // Print source line info.
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " SrcLine ";
+ if (DLT.Scope) {
+ DICompileUnit CU(DLT.Scope);
+ if (!CU.isNull())
+ O << CU.getFilename() << " ";
+ }
+ O << DLT.Line;
+ if (DLT.Col != 0)
+ O << ":" << DLT.Col;
+ Newline = true;
+ }
+
+ // Check for spills and reloads
+ int FI;
+
+ const MachineFrameInfo *FrameInfo =
+ MI.getParent()->getParent()->getFrameInfo();
+
+ // We assume a single instruction only has a spill or reload, not
+ // both.
+ if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ if (Newline) O << '\n';
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " Reload";
+ Newline = true;
+ }
+ }
+ else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ if (Newline) O << '\n';
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " Folded Reload";
+ Newline = true;
+ }
+ }
+ else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ if (Newline) O << '\n';
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " Spill";
+ Newline = true;
+ }
+ }
+ else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, FI)) {
+ if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ if (Newline) O << '\n';
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " Folded Spill";
+ Newline = true;
+ }
+ }
+
+ // Check for spill-induced copies
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TM.getInstrInfo()->isMoveInstr(MI, SrcReg, DstReg,
+ SrcSubIdx, DstSubIdx)) {
+ if (MI.getAsmPrinterFlag(ReloadReuse)) {
+ if (Newline) O << '\n';
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << " Reload Reuse";
+ Newline = true;
+ }
}
- O << DLT.Line;
- if (DLT.Col != 0)
- O << ":" << DLT.Col;
}
/// PrintChildLoopComment - Print comments about child loops within
@@ -1862,8 +1928,7 @@ static void PrintChildLoopComment(formatted_raw_ostream &O,
}
/// EmitComments - Pretty-print comments for basic blocks
-void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const
-{
+void AsmPrinter::EmitComments(const MachineBasicBlock &MBB) const {
if (VerboseAsm) {
// Add loop depth information
const MachineLoop *loop = LI->getLoopFor(&MBB);
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index 62b51ec..3e50a15 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -29,7 +29,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEAbbrevData - Dwarf abbreviation data, describes the one attribute of a
/// Dwarf abbreviation.
- class VISIBILITY_HIDDEN DIEAbbrevData {
+ class DIEAbbrevData {
/// Attribute - Dwarf attribute code.
///
unsigned Attribute;
@@ -52,7 +52,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEAbbrev - Dwarf abbreviation, describes the organization of a debug
/// information object.
- class VISIBILITY_HIDDEN DIEAbbrev : public FoldingSetNode {
+ class DIEAbbrev : public FoldingSetNode {
/// Tag - Dwarf tag code.
///
unsigned Tag;
@@ -113,7 +113,7 @@ namespace llvm {
class CompileUnit;
class DIEValue;
- class VISIBILITY_HIDDEN DIE : public FoldingSetNode {
+ class DIE : public FoldingSetNode {
protected:
/// Abbrev - Buffer for constructing abbreviation.
///
@@ -202,7 +202,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEValue - A debug information entry value.
///
- class VISIBILITY_HIDDEN DIEValue : public FoldingSetNode {
+ class DIEValue : public FoldingSetNode {
public:
enum {
isInteger,
@@ -249,7 +249,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEInteger - An integer value DIE.
///
- class VISIBILITY_HIDDEN DIEInteger : public DIEValue {
+ class DIEInteger : public DIEValue {
uint64_t Integer;
public:
explicit DIEInteger(uint64_t I) : DIEValue(isInteger), Integer(I) {}
@@ -294,7 +294,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEString - A string value DIE.
///
- class VISIBILITY_HIDDEN DIEString : public DIEValue {
+ class DIEString : public DIEValue {
const std::string Str;
public:
explicit DIEString(const std::string &S) : DIEValue(isString), Str(S) {}
@@ -326,7 +326,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEDwarfLabel - A Dwarf internal label expression DIE.
//
- class VISIBILITY_HIDDEN DIEDwarfLabel : public DIEValue {
+ class DIEDwarfLabel : public DIEValue {
const DWLabel Label;
public:
explicit DIEDwarfLabel(const DWLabel &L) : DIEValue(isLabel), Label(L) {}
@@ -356,7 +356,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEObjectLabel - A label to an object in code or data.
//
- class VISIBILITY_HIDDEN DIEObjectLabel : public DIEValue {
+ class DIEObjectLabel : public DIEValue {
const std::string Label;
public:
explicit DIEObjectLabel(const std::string &L)
@@ -389,7 +389,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIESectionOffset - A section offset DIE.
///
- class VISIBILITY_HIDDEN DIESectionOffset : public DIEValue {
+ class DIESectionOffset : public DIEValue {
const DWLabel Label;
const DWLabel Section;
bool IsEH : 1;
@@ -428,7 +428,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEDelta - A simple label difference DIE.
///
- class VISIBILITY_HIDDEN DIEDelta : public DIEValue {
+ class DIEDelta : public DIEValue {
const DWLabel LabelHi;
const DWLabel LabelLo;
public:
@@ -462,7 +462,7 @@ namespace llvm {
/// DIEntry - A pointer to another debug information entry. An instance of
/// this class can also be used as a proxy for a debug information entry not
/// yet defined (ie. types.)
- class VISIBILITY_HIDDEN DIEEntry : public DIEValue {
+ class DIEEntry : public DIEValue {
DIE *Entry;
public:
explicit DIEEntry(DIE *E) : DIEValue(isEntry), Entry(E) {}
@@ -497,7 +497,7 @@ namespace llvm {
//===--------------------------------------------------------------------===//
/// DIEBlock - A block of values. Primarily used for location expressions.
//
- class VISIBILITY_HIDDEN DIEBlock : public DIEValue, public DIE {
+ class DIEBlock : public DIEValue, public DIE {
unsigned Size; // Size in bytes excluding size header.
public:
DIEBlock()
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 1372fc2..c62c435 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -48,7 +48,7 @@ namespace llvm {
//===----------------------------------------------------------------------===//
/// CompileUnit - This dwarf writer support class manages information associate
/// with a source file.
-class VISIBILITY_HIDDEN CompileUnit {
+class CompileUnit {
/// ID - File identifier for source.
///
unsigned ID;
@@ -127,61 +127,66 @@ public:
class DbgVariable {
DIVariable Var; // Variable Descriptor.
unsigned FrameIndex; // Variable frame index.
- bool InlinedFnVar; // Variable for an inlined function.
+ DbgVariable *AbstractVar; // Abstract variable for this variable.
+ DIE *TheDIE;
public:
- DbgVariable(DIVariable V, unsigned I, bool IFV)
- : Var(V), FrameIndex(I), InlinedFnVar(IFV) {}
+ DbgVariable(DIVariable V, unsigned I)
+ : Var(V), FrameIndex(I), AbstractVar(0), TheDIE(0) {}
// Accessors.
- DIVariable getVariable() const { return Var; }
- unsigned getFrameIndex() const { return FrameIndex; }
- bool isInlinedFnVar() const { return InlinedFnVar; }
+ DIVariable getVariable() const { return Var; }
+ unsigned getFrameIndex() const { return FrameIndex; }
+ void setAbstractVariable(DbgVariable *V) { AbstractVar = V; }
+ DbgVariable *getAbstractVariable() const { return AbstractVar; }
+ void setDIE(DIE *D) { TheDIE = D; }
+ DIE *getDIE() const { return TheDIE; }
};
//===----------------------------------------------------------------------===//
/// DbgScope - This class is used to track scope information.
///
-class DbgConcreteScope;
class DbgScope {
DbgScope *Parent; // Parent to this scope.
- DIDescriptor Desc; // Debug info descriptor for scope.
- // FIXME use WeakVH for Desc.
- WeakVH InlinedAt; // If this scope represents inlined
- // function body then this is the location
- // where this body is inlined.
+ DIDescriptor Desc; // Debug info descriptor for scope.
+ WeakVH InlinedAtLocation; // Location at which scope is inlined.
+ bool AbstractScope; // Abstract Scope
unsigned StartLabelID; // Label ID of the beginning of scope.
unsigned EndLabelID; // Label ID of the end of scope.
const MachineInstr *LastInsn; // Last instruction of this scope.
const MachineInstr *FirstInsn; // First instruction of this scope.
SmallVector<DbgScope *, 4> Scopes; // Scopes defined in scope.
SmallVector<DbgVariable *, 8> Variables;// Variables declared in scope.
- SmallVector<DbgConcreteScope *, 8> ConcreteInsts;// Concrete insts of funcs.
// Private state for dump()
mutable unsigned IndentLevel;
public:
DbgScope(DbgScope *P, DIDescriptor D, MDNode *I = 0)
- : Parent(P), Desc(D), InlinedAt(I), StartLabelID(0), EndLabelID(0),
+ : Parent(P), Desc(D), InlinedAtLocation(I), AbstractScope(false),
+ StartLabelID(0), EndLabelID(0),
LastInsn(0), FirstInsn(0), IndentLevel(0) {}
virtual ~DbgScope();
// Accessors.
DbgScope *getParent() const { return Parent; }
+ void setParent(DbgScope *P) { Parent = P; }
DIDescriptor getDesc() const { return Desc; }
- MDNode *getInlinedAt() const {
- return dyn_cast_or_null<MDNode>(InlinedAt);
+ MDNode *getInlinedAt() const {
+ return dyn_cast_or_null<MDNode>(InlinedAtLocation);
}
+ MDNode *getScopeNode() const { return Desc.getNode(); }
unsigned getStartLabelID() const { return StartLabelID; }
unsigned getEndLabelID() const { return EndLabelID; }
SmallVector<DbgScope *, 4> &getScopes() { return Scopes; }
SmallVector<DbgVariable *, 8> &getVariables() { return Variables; }
- SmallVector<DbgConcreteScope*,8> &getConcreteInsts() { return ConcreteInsts; }
void setStartLabelID(unsigned S) { StartLabelID = S; }
void setEndLabelID(unsigned E) { EndLabelID = E; }
void setLastInsn(const MachineInstr *MI) { LastInsn = MI; }
const MachineInstr *getLastInsn() { return LastInsn; }
void setFirstInsn(const MachineInstr *MI) { FirstInsn = MI; }
+ void setAbstractScope() { AbstractScope = true; }
+ bool isAbstractScope() const { return AbstractScope; }
const MachineInstr *getFirstInsn() { return FirstInsn; }
+
/// AddScope - Add a scope to the scope.
///
void AddScope(DbgScope *S) { Scopes.push_back(S); }
@@ -190,10 +195,6 @@ public:
///
void AddVariable(DbgVariable *V) { Variables.push_back(V); }
- /// AddConcreteInst - Add a concrete instance to the scope.
- ///
- void AddConcreteInst(DbgConcreteScope *C) { ConcreteInsts.push_back(C); }
-
void FixInstructionMarkers() {
assert (getFirstInsn() && "First instruction is missing!");
if (getLastInsn())
@@ -218,11 +219,15 @@ public:
void DbgScope::dump() const {
raw_ostream &err = errs();
err.indent(IndentLevel);
- Desc.dump();
+ MDNode *N = Desc.getNode();
+ N->dump();
err << " [" << StartLabelID << ", " << EndLabelID << "]\n";
+ if (AbstractScope)
+ err << "Abstract Scope\n";
IndentLevel += 2;
-
+ if (!Scopes.empty())
+ err << "Children ...\n";
for (unsigned i = 0, e = Scopes.size(); i != e; ++i)
if (Scopes[i] != this)
Scopes[i]->dump();
@@ -235,7 +240,7 @@ void DbgScope::dump() const {
/// DbgConcreteScope - This class is used to track a scope that holds concrete
/// instance information.
///
-class VISIBILITY_HIDDEN DbgConcreteScope : public DbgScope {
+class DbgConcreteScope : public DbgScope {
CompileUnit *Unit;
DIE *Die; // Debug info for this concrete scope.
public:
@@ -251,8 +256,6 @@ DbgScope::~DbgScope() {
delete Scopes[i];
for (unsigned j = 0, M = Variables.size(); j < M; ++j)
delete Variables[j];
- for (unsigned k = 0, O = ConcreteInsts.size(); k < O; ++k)
- delete ConcreteInsts[k];
}
} // end llvm namespace
@@ -262,7 +265,7 @@ DwarfDebug::DwarfDebug(raw_ostream &OS, AsmPrinter *A, const MCAsmInfo *T)
AbbreviationsSet(InitAbbreviationsSetSize), Abbreviations(),
ValuesSet(InitValuesSetSize), Values(), StringPool(),
SectionSourceLines(), didInitial(false), shouldEmit(false),
- FunctionDbgScope(0), DebugTimer(0) {
+ CurrentFnDbgScope(0), DebugTimer(0) {
if (TimePassesIsEnabled)
DebugTimer = new Timer("Dwarf Debug Writer",
getDwarfTimerGroup());
@@ -271,11 +274,6 @@ DwarfDebug::~DwarfDebug() {
for (unsigned j = 0, M = Values.size(); j < M; ++j)
delete Values[j];
- for (DenseMap<const MDNode *, DbgScope *>::iterator
- I = AbstractInstanceRootMap.begin(),
- E = AbstractInstanceRootMap.end(); I != E;++I)
- delete I->second;
-
delete DebugTimer;
}
@@ -1097,6 +1095,10 @@ DIE *DwarfDebug::ConstructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
/// CreateGlobalVariableDIE - Create new DIE using GV.
DIE *DwarfDebug::CreateGlobalVariableDIE(CompileUnit *DW_Unit,
const DIGlobalVariable &GV) {
+ // If the global variable was optmized out then no need to create debug info entry.
+ if (!GV.getGlobal()) return NULL;
+ if (!GV.getDisplayName()) return NULL;
+
DIE *GVDie = new DIE(dwarf::DW_TAG_variable);
AddString(GVDie, dwarf::DW_AT_name, dwarf::DW_FORM_string,
GV.getDisplayName());
@@ -1233,9 +1235,6 @@ DIE *DwarfDebug::CreateSubprogramDIE(CompileUnit *DW_Unit,
}
}
- if (!SP.isLocalToUnit() && !IsInlined)
- AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
-
// DW_TAG_inlined_subroutine may refer to this DIE.
DIE *&Slot = DW_Unit->getDieMapSlotFor(SP.getNode());
Slot = SPDie;
@@ -1283,263 +1282,341 @@ DIE *DwarfDebug::CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
AddSourceLine(VariableDie, &VD);
// Add variable type.
- // FIXME: isBlockByrefVariable should be reformulated in terms of complex addresses instead.
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
if (VD.isBlockByrefVariable())
AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
else
AddType(Unit, VariableDie, VD.getType());
// Add variable address.
- if (!DV->isInlinedFnVar()) {
- // Variables for abstract instances of inlined functions don't get a
- // location.
- MachineLocation Location;
- Location.set(RI->getFrameRegister(*MF),
- RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
-
-
- if (VD.hasComplexAddress())
- AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else if (VD.isBlockByrefVariable())
- AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else
- AddAddress(VariableDie, dwarf::DW_AT_location, Location);
- }
+ // Variables for abstract instances of inlined functions don't get a
+ // location.
+ MachineLocation Location;
+ Location.set(RI->getFrameRegister(*MF),
+ RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
+
+
+ if (VD.hasComplexAddress())
+ AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else if (VD.isBlockByrefVariable())
+ AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else
+ AddAddress(VariableDie, dwarf::DW_AT_location, Location);
return VariableDie;
}
-/// getOrCreateScope - Returns the scope associated with the given descriptor.
-///
-DbgScope *DwarfDebug::getDbgScope(MDNode *N, const MachineInstr *MI,
- MDNode *InlinedAt) {
- ValueMap<MDNode *, DbgScope *>::iterator VI = DbgScopeMap.find(N);
- if (VI != DbgScopeMap.end())
- return VI->second;
+/// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction.
+/// Initialize scope and update scope hierarchy.
+DbgScope *DwarfDebug::getUpdatedDbgScope(MDNode *N, const MachineInstr *MI,
+ MDNode *InlinedAt) {
+ assert (N && "Invalid Scope encoding!");
+ assert (MI && "Missing machine instruction!");
+ bool GetConcreteScope = (MI && InlinedAt);
- DbgScope *Parent = NULL;
+ DbgScope *NScope = NULL;
+
+ if (InlinedAt)
+ NScope = DbgScopeMap.lookup(InlinedAt);
+ else
+ NScope = DbgScopeMap.lookup(N);
+ assert (NScope && "Unable to find working scope!");
+
+ if (NScope->getFirstInsn())
+ return NScope;
- if (InlinedAt) {
+ DbgScope *Parent = NULL;
+ if (GetConcreteScope) {
DILocation IL(InlinedAt);
- assert (!IL.isNull() && "Invalid InlindAt location!");
- ValueMap<MDNode *, DbgScope *>::iterator DSI =
- DbgScopeMap.find(IL.getScope().getNode());
- assert (DSI != DbgScopeMap.end() && "Unable to find InlineAt scope!");
- Parent = DSI->second;
- } else {
- DIDescriptor Scope(N);
- if (Scope.isCompileUnit()) {
- return NULL;
- } else if (Scope.isSubprogram()) {
- DISubprogram SP(N);
- DIDescriptor ParentDesc = SP.getContext();
- if (!ParentDesc.isNull() && !ParentDesc.isCompileUnit())
- Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt);
- } else if (Scope.isLexicalBlock()) {
- DILexicalBlock DB(N);
- DIDescriptor ParentDesc = DB.getContext();
- if (!ParentDesc.isNull())
- Parent = getDbgScope(ParentDesc.getNode(), MI, InlinedAt);
- } else
- assert (0 && "Unexpected scope info");
- }
-
- DbgScope *NScope = new DbgScope(Parent, DIDescriptor(N), InlinedAt);
+ Parent = getUpdatedDbgScope(IL.getScope().getNode(), MI,
+ IL.getOrigLocation().getNode());
+ assert (Parent && "Unable to find Parent scope!");
+ NScope->setParent(Parent);
+ Parent->AddScope(NScope);
+ } else if (DIDescriptor(N).isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ if (!DB.getContext().isNull()) {
+ Parent = getUpdatedDbgScope(DB.getContext().getNode(), MI, InlinedAt);
+ NScope->setParent(Parent);
+ Parent->AddScope(NScope);
+ }
+ }
+
NScope->setFirstInsn(MI);
- if (Parent)
- Parent->AddScope(NScope);
- else
- // First function is top level function.
- if (!FunctionDbgScope)
- FunctionDbgScope = NScope;
+ if (!Parent && !InlinedAt) {
+ StringRef SPName = DISubprogram(N).getLinkageName();
+ if (SPName == MF->getFunction()->getName())
+ CurrentFnDbgScope = NScope;
+ }
+
+ if (GetConcreteScope) {
+ ConcreteScopes[InlinedAt] = NScope;
+ getOrCreateAbstractScope(N);
+ }
- DbgScopeMap.insert(std::make_pair(N, NScope));
return NScope;
}
+DbgScope *DwarfDebug::getOrCreateAbstractScope(MDNode *N) {
+ assert (N && "Invalid Scope encoding!");
-/// getOrCreateScope - Returns the scope associated with the given descriptor.
-/// FIXME - Remove this method.
-DbgScope *DwarfDebug::getOrCreateScope(MDNode *N) {
- DbgScope *&Slot = DbgScopeMap[N];
- if (Slot) return Slot;
-
+ DbgScope *AScope = AbstractScopes.lookup(N);
+ if (AScope)
+ return AScope;
+
DbgScope *Parent = NULL;
- DILexicalBlock Block(N);
- // Don't create a new scope if we already created one for an inlined function.
- DenseMap<const MDNode *, DbgScope *>::iterator
- II = AbstractInstanceRootMap.find(N);
- if (II != AbstractInstanceRootMap.end())
- return LexicalScopeStack.back();
-
- if (!Block.isNull()) {
- DIDescriptor ParentDesc = Block.getContext();
- Parent =
- ParentDesc.isNull() ? NULL : getOrCreateScope(ParentDesc.getNode());
+ DIDescriptor Scope(N);
+ if (Scope.isLexicalBlock()) {
+ DILexicalBlock DB(N);
+ DIDescriptor ParentDesc = DB.getContext();
+ if (!ParentDesc.isNull())
+ Parent = getOrCreateAbstractScope(ParentDesc.getNode());
}
- Slot = new DbgScope(Parent, DIDescriptor(N));
+ AScope = new DbgScope(Parent, DIDescriptor(N), NULL);
if (Parent)
- Parent->AddScope(Slot);
- else
- // First function is top level function.
- FunctionDbgScope = Slot;
+ Parent->AddScope(AScope);
+ AScope->setAbstractScope();
+ AbstractScopes[N] = AScope;
+ if (DIDescriptor(N).isSubprogram())
+ AbstractScopesList.push_back(AScope);
+ return AScope;
+}
+
+static DISubprogram getDISubprogram(MDNode *N) {
- return Slot;
+ DIDescriptor D(N);
+ if (D.isNull())
+ return DISubprogram();
+
+ if (D.isCompileUnit())
+ return DISubprogram();
+
+ if (D.isSubprogram())
+ return DISubprogram(N);
+
+ if (D.isLexicalBlock())
+ return getDISubprogram(DILexicalBlock(N).getContext().getNode());
+
+ llvm_unreachable("Unexpected Descriptor!");
}
-/// ConstructDbgScope - Construct the components of a scope.
-///
-void DwarfDebug::ConstructDbgScope(DbgScope *ParentScope,
- unsigned ParentStartID,
- unsigned ParentEndID,
- DIE *ParentDie, CompileUnit *Unit) {
- // Add variables to scope.
- SmallVector<DbgVariable *, 8> &Variables = ParentScope->getVariables();
- for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
- DIE *VariableDie = CreateDbgScopeVariable(Variables[i], Unit);
- if (VariableDie) ParentDie->AddChild(VariableDie);
- }
+DIE *DwarfDebug::UpdateSubprogramScopeDIE(MDNode *SPNode) {
+
+ DIE *SPDie = ModuleCU->getDieMapSlotFor(SPNode);
+ assert (SPDie && "Unable to find subprogram DIE!");
+ AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_begin", SubprogramCount));
+ AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("func_end", SubprogramCount));
+ MachineLocation Location(RI->getFrameRegister(*MF));
+ AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+
+ if (!DISubprogram(SPNode).isLocalToUnit())
+ AddUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
+
+ // If there are global variables at this scope then add their dies.
+ for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(),
+ SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
+ MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
+ if (!N) continue;
+ DIGlobalVariable GV(N);
+ if (GV.getContext().getNode() == SPNode) {
+ DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV);
+ if (ScopedGVDie)
+ SPDie->AddChild(ScopedGVDie);
+ }
+ }
+ return SPDie;
+}
+
+DIE *DwarfDebug::ConstructLexicalScopeDIE(DbgScope *Scope) {
+ unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+ unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+
+ // Ignore empty scopes.
+ if (StartID == EndID && StartID != 0)
+ return NULL;
- // Add concrete instances to scope.
- SmallVector<DbgConcreteScope *, 8> &ConcreteInsts =
- ParentScope->getConcreteInsts();
- for (unsigned i = 0, N = ConcreteInsts.size(); i < N; ++i) {
- DbgConcreteScope *ConcreteInst = ConcreteInsts[i];
- DIE *Die = ConcreteInst->getDie();
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block);
+ if (Scope->isAbstractScope())
+ return ScopeDIE;
- unsigned StartID = ConcreteInst->getStartLabelID();
- unsigned EndID = ConcreteInst->getEndLabelID();
+ AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ StartID ?
+ DWLabel("label", StartID)
+ : DWLabel("func_begin", SubprogramCount));
+ AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ EndID ?
+ DWLabel("label", EndID)
+ : DWLabel("func_end", SubprogramCount));
- // Add the scope bounds.
- if (StartID)
- AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("label", StartID));
- else
- AddLabel(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("func_begin", SubprogramCount));
- if (EndID)
- AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("label", EndID));
- else
- AddLabel(Die, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("func_end", SubprogramCount));
- ParentDie->AddChild(Die);
- }
+ return ScopeDIE;
+}
- // Add nested scopes.
- SmallVector<DbgScope *, 4> &Scopes = ParentScope->getScopes();
- for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
- // Define the Scope debug information entry.
- DbgScope *Scope = Scopes[j];
+DIE *DwarfDebug::ConstructInlinedScopeDIE(DbgScope *Scope) {
+ unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
+ unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+ assert (StartID && "Invalid starting label for an inlined scope!");
+ assert (EndID && "Invalid end label for an inlined scope!");
+ // Ignore empty scopes.
+ if (StartID == EndID && StartID != 0)
+ return NULL;
- unsigned StartID = MMI->MappedLabel(Scope->getStartLabelID());
- unsigned EndID = MMI->MappedLabel(Scope->getEndLabelID());
+ DIScope DS(Scope->getScopeNode());
+ if (DS.isNull())
+ return NULL;
+ DIE *ScopeDIE = new DIE(dwarf::DW_TAG_inlined_subroutine);
- // Ignore empty scopes.
- if (StartID == EndID && StartID != 0) continue;
+ DISubprogram InlinedSP = getDISubprogram(DS.getNode());
+ DIE *&OriginDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode());
+ assert (OriginDIE && "Unable to find Origin DIE!");
+ AddDIEEntry(ScopeDIE, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, OriginDIE);
- // Do not ignore inlined scopes even if they don't have any variables or
- // scopes.
- if (Scope->getScopes().empty() && Scope->getVariables().empty() &&
- Scope->getConcreteInsts().empty())
- continue;
+ AddLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", StartID));
+ AddLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
+ DWLabel("label", EndID));
- if (StartID == ParentStartID && EndID == ParentEndID) {
- // Just add stuff to the parent scope.
- ConstructDbgScope(Scope, ParentStartID, ParentEndID, ParentDie, Unit);
- } else {
- DIE *ScopeDie = new DIE(dwarf::DW_TAG_lexical_block);
+ InlinedSubprogramDIEs.insert(OriginDIE);
- // Add the scope bounds.
- if (StartID)
- AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("label", StartID));
- else
- AddLabel(ScopeDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("func_begin", SubprogramCount));
+ // Track the start label for this inlined function.
+ ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+ I = InlineInfo.find(InlinedSP.getNode());
- if (EndID)
- AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("label", EndID));
- else
- AddLabel(ScopeDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("func_end", SubprogramCount));
+ if (I == InlineInfo.end()) {
+ InlineInfo[InlinedSP.getNode()].push_back(std::make_pair(StartID, ScopeDIE));
+ InlinedSPNodes.push_back(InlinedSP.getNode());
+ } else
+ I->second.push_back(std::make_pair(StartID, ScopeDIE));
- // Add the scope's contents.
- ConstructDbgScope(Scope, StartID, EndID, ScopeDie, Unit);
- ParentDie->AddChild(ScopeDie);
- }
- }
+ StringPool.insert(InlinedSP.getName());
+ StringPool.insert(InlinedSP.getLinkageName());
+ DILocation DL(Scope->getInlinedAt());
+ AddUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
+ AddUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber());
+
+ return ScopeDIE;
}
-/// ConstructFunctionDbgScope - Construct the scope for the subprogram.
-///
-void DwarfDebug::ConstructFunctionDbgScope(DbgScope *RootScope,
- bool AbstractScope) {
- // Exit if there is no root scope.
- if (!RootScope) return;
- DIDescriptor Desc = RootScope->getDesc();
- if (Desc.isNull())
- return;
+DIE *DwarfDebug::ConstructVariableDIE(DbgVariable *DV,
+ DbgScope *Scope, CompileUnit *Unit) {
+ // Get the descriptor.
+ const DIVariable &VD = DV->getVariable();
+ const char *Name = VD.getName();
+ if (!Name)
+ return NULL;
- // Get the subprogram debug information entry.
- DISubprogram SPD(Desc.getNode());
-
- // Get the subprogram die.
- DIE *SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode());
- if (!SPDie) {
- ConstructSubprogram(SPD.getNode());
- SPDie = ModuleCU->getDieMapSlotFor(SPD.getNode());
- }
- assert(SPDie && "Missing subprogram descriptor");
-
- if (!AbstractScope) {
- // Add the function bounds.
- AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("func_begin", SubprogramCount));
- AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("func_end", SubprogramCount));
- MachineLocation Location(RI->getFrameRegister(*MF));
- AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
- }
-
- ConstructDbgScope(RootScope, 0, 0, SPDie, ModuleCU);
- // If there are global variables at this scope then add their dies.
- for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(),
- SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
- MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
- if (!N) continue;
- DIGlobalVariable GV(N);
- if (GV.getContext().getNode() == RootScope->getDesc().getNode()) {
- DIE *ScopedGVDie = CreateGlobalVariableDIE(ModuleCU, GV);
- SPDie->AddChild(ScopedGVDie);
- }
+ // Translate tag to proper Dwarf tag. The result variable is dropped for
+ // now.
+ unsigned Tag;
+ switch (VD.getTag()) {
+ case dwarf::DW_TAG_return_variable:
+ return NULL;
+ case dwarf::DW_TAG_arg_variable:
+ Tag = dwarf::DW_TAG_formal_parameter;
+ break;
+ case dwarf::DW_TAG_auto_variable: // fall thru
+ default:
+ Tag = dwarf::DW_TAG_variable;
+ break;
}
-}
-/// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
-///
-void DwarfDebug::ConstructDefaultDbgScope(MachineFunction *MF) {
- StringMap<DIE*> &Globals = ModuleCU->getGlobals();
- StringMap<DIE*>::iterator GI = Globals.find(MF->getFunction()->getName());
- if (GI != Globals.end()) {
- DIE *SPDie = GI->second;
+ // Define variable debug information entry.
+ DIE *VariableDie = new DIE(Tag);
- // Add the function bounds.
- AddLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
- DWLabel("func_begin", SubprogramCount));
- AddLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
- DWLabel("func_end", SubprogramCount));
- MachineLocation Location(RI->getFrameRegister(*MF));
- AddAddress(SPDie, dwarf::DW_AT_frame_base, Location);
+ DIE *AbsDIE = NULL;
+ if (DbgVariable *AV = DV->getAbstractVariable())
+ AbsDIE = AV->getDIE();
+
+ if (AbsDIE) {
+ DIScope DS(Scope->getScopeNode());
+ DISubprogram InlinedSP = getDISubprogram(DS.getNode());
+ DIE *&OriginSPDIE = ModuleCU->getDieMapSlotFor(InlinedSP.getNode());
+ (void) OriginSPDIE;
+ assert (OriginSPDIE && "Unable to find Origin DIE for the SP!");
+ DIE *AbsDIE = DV->getAbstractVariable()->getDIE();
+ assert (AbsDIE && "Unable to find Origin DIE for the Variable!");
+ AddDIEEntry(VariableDie, dwarf::DW_AT_abstract_origin,
+ dwarf::DW_FORM_ref4, AbsDIE);
}
+ else {
+ AddString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
+ AddSourceLine(VariableDie, &VD);
+
+ // Add variable type.
+ // FIXME: isBlockByrefVariable should be reformulated in terms of complex
+ // addresses instead.
+ if (VD.isBlockByrefVariable())
+ AddType(Unit, VariableDie, GetBlockByrefType(VD.getType(), Name));
+ else
+ AddType(Unit, VariableDie, VD.getType());
+ }
+
+ // Add variable address.
+ if (!Scope->isAbstractScope()) {
+ MachineLocation Location;
+ Location.set(RI->getFrameRegister(*MF),
+ RI->getFrameIndexOffset(*MF, DV->getFrameIndex()));
+
+
+ if (VD.hasComplexAddress())
+ AddComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else if (VD.isBlockByrefVariable())
+ AddBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
+ else
+ AddAddress(VariableDie, dwarf::DW_AT_location, Location);
+ }
+ DV->setDIE(VariableDie);
+ return VariableDie;
+
+}
+DIE *DwarfDebug::ConstructScopeDIE(DbgScope *Scope) {
+ if (!Scope)
+ return NULL;
+ DIScope DS(Scope->getScopeNode());
+ if (DS.isNull())
+ return NULL;
+
+ DIE *ScopeDIE = NULL;
+ if (Scope->getInlinedAt())
+ ScopeDIE = ConstructInlinedScopeDIE(Scope);
+ else if (DS.isSubprogram()) {
+ if (Scope->isAbstractScope())
+ ScopeDIE = ModuleCU->getDieMapSlotFor(DS.getNode());
+ else
+ ScopeDIE = UpdateSubprogramScopeDIE(DS.getNode());
+ }
+ else {
+ ScopeDIE = ConstructLexicalScopeDIE(Scope);
+ if (!ScopeDIE) return NULL;
+ }
+
+ // Add variables to scope.
+ SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
+ for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
+ DIE *VariableDIE = ConstructVariableDIE(Variables[i], Scope, ModuleCU);
+ if (VariableDIE)
+ ScopeDIE->AddChild(VariableDIE);
+ }
+
+ // Add nested scopes.
+ SmallVector<DbgScope *, 4> &Scopes = Scope->getScopes();
+ for (unsigned j = 0, M = Scopes.size(); j < M; ++j) {
+ // Define the Scope debug information entry.
+ DIE *NestedDIE = ConstructScopeDIE(Scopes[j]);
+ if (NestedDIE)
+ ScopeDIE->AddChild(NestedDIE);
+ }
+ return ScopeDIE;
}
/// GetOrCreateSourceID - Look up the source id with the given directory and
@@ -1680,6 +1757,9 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
if (TimePassesIsEnabled)
DebugTimer->startTimer();
+ if (!MAI->doesSupportDebugInformation())
+ return;
+
DebugInfoFinder DbgFinder;
DbgFinder.processModule(*M);
@@ -1710,7 +1790,7 @@ void DwarfDebug::BeginModule(Module *M, MachineModuleInfo *mmi) {
ConstructGlobalVariableDIE(*I);
}
- // Create DIEs for each of the externally visible subprograms.
+ // Create DIEs for each subprogram.
for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
E = DbgFinder.subprogram_end(); I != E; ++I)
ConstructSubprogram(*I);
@@ -1754,6 +1834,13 @@ void DwarfDebug::EndModule() {
if (TimePassesIsEnabled)
DebugTimer->startTimer();
+ // Attach DW_AT_inline attribute with inlined subprogram DIEs.
+ for (SmallPtrSet<DIE *, 4>::iterator AI = InlinedSubprogramDIEs.begin(),
+ AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) {
+ DIE *ISP = *AI;
+ AddUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
+ }
+
// Standard sections final addresses.
Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
EmitLabel("text_end", 0);
@@ -1811,55 +1898,102 @@ void DwarfDebug::EndModule() {
DebugTimer->stopTimer();
}
+/// findAbstractVariable - Find abstract variable, if any, associated with Var.
+DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &Var, unsigned FrameIdx,
+ DILocation &ScopeLoc) {
+
+ DbgVariable *AbsDbgVariable = AbstractVariables.lookup(Var.getNode());
+ if (AbsDbgVariable)
+ return AbsDbgVariable;
+
+ DbgScope *Scope = AbstractScopes.lookup(ScopeLoc.getScope().getNode());
+ if (!Scope)
+ return NULL;
+
+ AbsDbgVariable = new DbgVariable(Var, FrameIdx);
+ Scope->AddVariable(AbsDbgVariable);
+ AbstractVariables[Var.getNode()] = AbsDbgVariable;
+ return AbsDbgVariable;
+}
+
/// CollectVariableInfo - Populate DbgScope entries with variables' info.
void DwarfDebug::CollectVariableInfo() {
if (!MMI) return;
+
MachineModuleInfo::VariableDbgInfoMapTy &VMap = MMI->getVariableDbgInfo();
for (MachineModuleInfo::VariableDbgInfoMapTy::iterator VI = VMap.begin(),
VE = VMap.end(); VI != VE; ++VI) {
MetadataBase *MB = VI->first;
MDNode *Var = dyn_cast_or_null<MDNode>(MB);
+ if (!Var) continue;
DIVariable DV (Var);
- if (DV.isNull()) continue;
- unsigned VSlot = VI->second;
- DbgScope *Scope = NULL;
- ValueMap<MDNode *, DbgScope *>::iterator DSI =
- DbgScopeMap.find(DV.getContext().getNode());
- if (DSI != DbgScopeMap.end())
- Scope = DSI->second;
- else
- // There is not any instruction assocated with this scope, so get
- // a new scope.
- Scope = getDbgScope(DV.getContext().getNode(),
- NULL /* Not an instruction */,
- NULL /* Not inlined */);
- assert (Scope && "Unable to find variable scope!");
- Scope->AddVariable(new DbgVariable(DV, VSlot, false));
- }
-}
-
-/// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that
-/// start with this machine instruction.
-void DwarfDebug::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label) {
+ std::pair< unsigned, MDNode *> VP = VI->second;
+ DILocation ScopeLoc(VP.second);
+
+ DbgScope *Scope =
+ ConcreteScopes.lookup(ScopeLoc.getOrigLocation().getNode());
+ if (!Scope)
+ Scope = DbgScopeMap.lookup(ScopeLoc.getScope().getNode());
+ // If variable scope is not found then skip this variable.
+ if (!Scope)
+ continue;
+
+ DbgVariable *RegVar = new DbgVariable(DV, VP.first);
+ Scope->AddVariable(RegVar);
+ if (DbgVariable *AbsDbgVariable = findAbstractVariable(DV, VP.first, ScopeLoc))
+ RegVar->setAbstractVariable(AbsDbgVariable);
+ }
+}
+
+/// BeginScope - Process beginning of a scope starting at Label.
+void DwarfDebug::BeginScope(const MachineInstr *MI, unsigned Label) {
InsnToDbgScopeMapTy::iterator I = DbgScopeBeginMap.find(MI);
if (I == DbgScopeBeginMap.end())
return;
- SmallVector<DbgScope *, 2> &SD = I->second;
- for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
+ ScopeVector &SD = DbgScopeBeginMap[MI];
+ for (ScopeVector::iterator SDI = SD.begin(), SDE = SD.end();
SDI != SDE; ++SDI)
(*SDI)->setStartLabelID(Label);
}
-/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
-/// end with this machine instruction.
-void DwarfDebug::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label) {
+/// EndScope - Process end of a scope.
+void DwarfDebug::EndScope(const MachineInstr *MI) {
InsnToDbgScopeMapTy::iterator I = DbgScopeEndMap.find(MI);
if (I == DbgScopeEndMap.end())
return;
+
+ unsigned Label = MMI->NextLabelID();
+ Asm->printLabel(Label);
+
SmallVector<DbgScope *, 2> &SD = I->second;
for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
SDI != SDE; ++SDI)
(*SDI)->setEndLabelID(Label);
+ return;
+}
+
+/// createDbgScope - Create DbgScope for the scope.
+void DwarfDebug::createDbgScope(MDNode *Scope, MDNode *InlinedAt) {
+
+ if (!InlinedAt) {
+ DbgScope *WScope = DbgScopeMap.lookup(Scope);
+ if (WScope)
+ return;
+ WScope = new DbgScope(NULL, DIDescriptor(Scope), NULL);
+ DbgScopeMap.insert(std::make_pair(Scope, WScope));
+ if (DIDescriptor(Scope).isLexicalBlock())
+ createDbgScope(DILexicalBlock(Scope).getContext().getNode(), NULL);
+ return;
+ }
+
+ DbgScope *WScope = DbgScopeMap.lookup(InlinedAt);
+ if (WScope)
+ return;
+
+ WScope = new DbgScope(NULL, DIDescriptor(Scope), InlinedAt);
+ DbgScopeMap.insert(std::make_pair(InlinedAt, WScope));
+ DILocation DL(InlinedAt);
+ createDbgScope(DL.getScope().getNode(), DL.getOrigLocation().getNode());
}
/// ExtractScopeInformation - Scan machine instructions in this function
@@ -1870,26 +2004,41 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
if (!DbgScopeMap.empty())
return false;
- // Scan each instruction and create scopes.
+ // Scan each instruction and create scopes. First build working set of scopes.
for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
I != E; ++I) {
for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
II != IE; ++II) {
const MachineInstr *MInsn = II;
DebugLoc DL = MInsn->getDebugLoc();
- if (DL.isUnknown())
- continue;
+ if (DL.isUnknown()) continue;
DebugLocTuple DLT = MF->getDebugLocTuple(DL);
- if (!DLT.Scope)
- continue;
+ if (!DLT.Scope) continue;
// There is no need to create another DIE for compile unit. For all
// other scopes, create one DbgScope now. This will be translated
// into a scope DIE at the end.
- DIDescriptor D(DLT.Scope);
- if (!D.isCompileUnit()) {
- DbgScope *Scope = getDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc);
- Scope->setLastInsn(MInsn);
- }
+ if (DIDescriptor(DLT.Scope).isCompileUnit()) continue;
+ createDbgScope(DLT.Scope, DLT.InlinedAtLoc);
+ }
+ }
+
+
+ // Build scope hierarchy using working set of scopes.
+ for (MachineFunction::const_iterator I = MF->begin(), E = MF->end();
+ I != E; ++I) {
+ for (MachineBasicBlock::const_iterator II = I->begin(), IE = I->end();
+ II != IE; ++II) {
+ const MachineInstr *MInsn = II;
+ DebugLoc DL = MInsn->getDebugLoc();
+ if (DL.isUnknown()) continue;
+ DebugLocTuple DLT = MF->getDebugLocTuple(DL);
+ if (!DLT.Scope) continue;
+ // There is no need to create another DIE for compile unit. For all
+ // other scopes, create one DbgScope now. This will be translated
+ // into a scope DIE at the end.
+ if (DIDescriptor(DLT.Scope).isCompileUnit()) continue;
+ DbgScope *Scope = getUpdatedDbgScope(DLT.Scope, MInsn, DLT.InlinedAtLoc);
+ Scope->setLastInsn(MInsn);
}
}
@@ -1897,8 +2046,8 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
// last instruction as this scope's last instrunction.
for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
DE = DbgScopeMap.end(); DI != DE; ++DI) {
- DbgScope *S = DI->second;
- if (!S) continue;
+ if (DI->second->isAbstractScope())
+ continue;
assert (DI->second->getFirstInsn() && "Invalid first instruction!");
DI->second->FixInstructionMarkers();
assert (DI->second->getLastInsn() && "Invalid last instruction!");
@@ -1911,7 +2060,8 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
for (ValueMap<MDNode *, DbgScope *>::iterator DI = DbgScopeMap.begin(),
DE = DbgScopeMap.end(); DI != DE; ++DI) {
DbgScope *S = DI->second;
- if (!S) continue;
+ if (S->isAbstractScope())
+ continue;
const MachineInstr *MI = S->getFirstInsn();
assert (MI && "DbgScope does not have first instruction!");
@@ -1919,8 +2069,7 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
if (IDI != DbgScopeBeginMap.end())
IDI->second.push_back(S);
else
- DbgScopeBeginMap.insert(std::make_pair(MI,
- SmallVector<DbgScope *, 2>(2, S)));
+ DbgScopeBeginMap[MI].push_back(S);
MI = S->getLastInsn();
assert (MI && "DbgScope does not have last instruction!");
@@ -1928,31 +2077,12 @@ bool DwarfDebug::ExtractScopeInformation(MachineFunction *MF) {
if (IDI != DbgScopeEndMap.end())
IDI->second.push_back(S);
else
- DbgScopeEndMap.insert(std::make_pair(MI,
- SmallVector<DbgScope *, 2>(2, S)));
+ DbgScopeEndMap[MI].push_back(S);
}
return !DbgScopeMap.empty();
}
-static DISubprogram getDISubprogram(MDNode *N) {
-
- DIDescriptor D(N);
- if (D.isNull())
- return DISubprogram();
-
- if (D.isCompileUnit())
- return DISubprogram();
-
- if (D.isSubprogram())
- return DISubprogram(N);
-
- if (D.isLexicalBlock())
- return getDISubprogram(DILexicalBlock(N).getContext().getNode());
-
- llvm_unreachable("Unexpected Descriptor!");
-}
-
/// BeginFunction - Gather pre-function debug information. Assumes being
/// emitted immediately after the function entry point.
void DwarfDebug::BeginFunction(MachineFunction *MF) {
@@ -1963,11 +2093,9 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
if (TimePassesIsEnabled)
DebugTimer->startTimer();
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
if (!ExtractScopeInformation(MF))
return;
CollectVariableInfo();
-#endif
// Begin accumulating function debug information.
MMI->BeginFunction(MF);
@@ -1977,7 +2105,6 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
// Emit label for the implicitly defined dbg.stoppoint at the start of the
// function.
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
DebugLoc FDL = MF->getDefaultDebugLoc();
if (!FDL.isUnknown()) {
DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
@@ -1990,15 +2117,6 @@ void DwarfDebug::BeginFunction(MachineFunction *MF) {
Asm->printLabel(LabelID);
O << '\n';
}
-#else
- DebugLoc FDL = MF->getDefaultDebugLoc();
- if (!FDL.isUnknown()) {
- DebugLocTuple DLT = MF->getDebugLocTuple(FDL);
- unsigned LabelID = RecordSourceLine(DLT.Line, DLT.Col, DLT.Scope);
- Asm->printLabel(LabelID);
- O << '\n';
- }
-#endif
if (TimePassesIsEnabled)
DebugTimer->stopTimer();
}
@@ -2011,10 +2129,9 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
if (TimePassesIsEnabled)
DebugTimer->startTimer();
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
if (DbgScopeMap.empty())
return;
-#endif
+
// Define end label for subprogram.
EmitLabel("func_end", SubprogramCount);
@@ -2029,41 +2146,24 @@ void DwarfDebug::EndFunction(MachineFunction *MF) {
Lines.begin(), Lines.end());
}
- // Construct the DbgScope for abstract instances.
- for (SmallVector<DbgScope *, 32>::iterator
- I = AbstractInstanceRootList.begin(),
- E = AbstractInstanceRootList.end(); I != E; ++I)
- ConstructFunctionDbgScope(*I);
+ // Construct abstract scopes.
+ for (SmallVector<DbgScope *, 4>::iterator AI = AbstractScopesList.begin(),
+ AE = AbstractScopesList.end(); AI != AE; ++AI)
+ ConstructScopeDIE(*AI);
- // Construct scopes for subprogram.
- if (FunctionDbgScope)
- ConstructFunctionDbgScope(FunctionDbgScope);
- else
- // FIXME: This is wrong. We are essentially getting past a problem with
- // debug information not being able to handle unreachable blocks that have
- // debug information in them. In particular, those unreachable blocks that
- // have "region end" info in them. That situation results in the "root
- // scope" not being created. If that's the case, then emit a "default"
- // scope, i.e., one that encompasses the whole function. This isn't
- // desirable. And a better way of handling this (and all of the debugging
- // information) needs to be explored.
- ConstructDefaultDbgScope(MF);
+ ConstructScopeDIE(CurrentFnDbgScope);
DebugFrames.push_back(FunctionDebugFrameInfo(SubprogramCount,
MMI->getFrameMoves()));
// Clear debug info
- if (FunctionDbgScope) {
- delete FunctionDbgScope;
+ if (CurrentFnDbgScope) {
+ CurrentFnDbgScope = NULL;
DbgScopeMap.clear();
DbgScopeBeginMap.clear();
DbgScopeEndMap.clear();
- DbgAbstractScopeMap.clear();
- DbgConcreteScopeMap.clear();
- FunctionDbgScope = NULL;
- LexicalScopeStack.clear();
- AbstractInstanceRootList.clear();
- AbstractInstanceRootMap.clear();
+ ConcreteScopes.clear();
+ AbstractScopesList.clear();
}
Lines.clear();
@@ -2130,201 +2230,6 @@ unsigned DwarfDebug::getOrCreateSourceID(const std::string &DirName,
return SrcId;
}
-/// RecordRegionStart - Indicate the start of a region.
-unsigned DwarfDebug::RecordRegionStart(MDNode *N) {
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- DbgScope *Scope = getOrCreateScope(N);
- unsigned ID = MMI->NextLabelID();
- if (!Scope->getStartLabelID()) Scope->setStartLabelID(ID);
- LexicalScopeStack.push_back(Scope);
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
- return ID;
-}
-
-/// RecordRegionEnd - Indicate the end of a region.
-unsigned DwarfDebug::RecordRegionEnd(MDNode *N) {
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- DbgScope *Scope = getOrCreateScope(N);
- unsigned ID = MMI->NextLabelID();
- Scope->setEndLabelID(ID);
- // FIXME : region.end() may not be in the last basic block.
- // For now, do not pop last lexical scope because next basic
- // block may start new inlined function's body.
- unsigned LSSize = LexicalScopeStack.size();
- if (LSSize != 0 && LSSize != 1)
- LexicalScopeStack.pop_back();
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
- return ID;
-}
-
-/// RecordVariable - Indicate the declaration of a local variable.
-void DwarfDebug::RecordVariable(MDNode *N, unsigned FrameIndex) {
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- DIDescriptor Desc(N);
- DbgScope *Scope = NULL;
- bool InlinedFnVar = false;
-
- if (Desc.getTag() == dwarf::DW_TAG_variable)
- Scope = getOrCreateScope(DIGlobalVariable(N).getContext().getNode());
- else {
- bool InlinedVar = false;
- MDNode *Context = DIVariable(N).getContext().getNode();
- DISubprogram SP(Context);
- if (!SP.isNull()) {
- // SP is inserted into DbgAbstractScopeMap when inlined function
- // start was recorded by RecordInlineFnStart.
- ValueMap<MDNode *, DbgScope *>::iterator
- I = DbgAbstractScopeMap.find(SP.getNode());
- if (I != DbgAbstractScopeMap.end()) {
- InlinedVar = true;
- Scope = I->second;
- }
- }
- if (!InlinedVar)
- Scope = getOrCreateScope(Context);
- }
-
- assert(Scope && "Unable to find the variable's scope");
- DbgVariable *DV = new DbgVariable(DIVariable(N), FrameIndex, InlinedFnVar);
- Scope->AddVariable(DV);
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-}
-
-//// RecordInlinedFnStart - Indicate the start of inlined subroutine.
-unsigned DwarfDebug::RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
- unsigned Line, unsigned Col) {
- unsigned LabelID = MMI->NextLabelID();
-
- if (!MAI->doesDwarfUsesInlineInfoSection())
- return LabelID;
-
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- MDNode *Node = SP.getNode();
- DenseMap<const MDNode *, DbgScope *>::iterator
- II = AbstractInstanceRootMap.find(Node);
-
- if (II == AbstractInstanceRootMap.end()) {
- // Create an abstract instance entry for this inlined function if it doesn't
- // already exist.
- DbgScope *Scope = new DbgScope(NULL, DIDescriptor(Node));
-
- // Get the compile unit context.
- DIE *SPDie = ModuleCU->getDieMapSlotFor(Node);
- if (!SPDie)
- SPDie = CreateSubprogramDIE(ModuleCU, SP, false, true);
-
- // Mark as being inlined. This makes this subprogram entry an abstract
- // instance root.
- // FIXME: Our debugger doesn't care about the value of DW_AT_inline, only
- // that it's defined. That probably won't change in the future. However,
- // this could be more elegant.
- AddUInt(SPDie, dwarf::DW_AT_inline, 0, dwarf::DW_INL_declared_not_inlined);
-
- // Keep track of the abstract scope for this function.
- DbgAbstractScopeMap[Node] = Scope;
-
- AbstractInstanceRootMap[Node] = Scope;
- AbstractInstanceRootList.push_back(Scope);
- }
-
- // Create a concrete inlined instance for this inlined function.
- DbgConcreteScope *ConcreteScope = new DbgConcreteScope(DIDescriptor(Node));
- DIE *ScopeDie = new DIE(dwarf::DW_TAG_inlined_subroutine);
- ScopeDie->setAbstractCompileUnit(ModuleCU);
-
- DIE *Origin = ModuleCU->getDieMapSlotFor(Node);
- AddDIEEntry(ScopeDie, dwarf::DW_AT_abstract_origin,
- dwarf::DW_FORM_ref4, Origin);
- AddUInt(ScopeDie, dwarf::DW_AT_call_file, 0, ModuleCU->getID());
- AddUInt(ScopeDie, dwarf::DW_AT_call_line, 0, Line);
- AddUInt(ScopeDie, dwarf::DW_AT_call_column, 0, Col);
-
- ConcreteScope->setDie(ScopeDie);
- ConcreteScope->setStartLabelID(LabelID);
- MMI->RecordUsedDbgLabel(LabelID);
-
- LexicalScopeStack.back()->AddConcreteInst(ConcreteScope);
-
- // Keep track of the concrete scope that's inlined into this function.
- ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
- SI = DbgConcreteScopeMap.find(Node);
-
- if (SI == DbgConcreteScopeMap.end())
- DbgConcreteScopeMap[Node].push_back(ConcreteScope);
- else
- SI->second.push_back(ConcreteScope);
-
- // Track the start label for this inlined function.
- ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator
- I = InlineInfo.find(Node);
-
- if (I == InlineInfo.end())
- InlineInfo[Node].push_back(LabelID);
- else
- I->second.push_back(LabelID);
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
- return LabelID;
-}
-
-/// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
-unsigned DwarfDebug::RecordInlinedFnEnd(DISubprogram &SP) {
- if (!MAI->doesDwarfUsesInlineInfoSection())
- return 0;
-
- if (TimePassesIsEnabled)
- DebugTimer->startTimer();
-
- MDNode *Node = SP.getNode();
- ValueMap<MDNode *, SmallVector<DbgScope *, 8> >::iterator
- I = DbgConcreteScopeMap.find(Node);
-
- if (I == DbgConcreteScopeMap.end()) {
- // FIXME: Can this situation actually happen? And if so, should it?
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
- return 0;
- }
-
- SmallVector<DbgScope *, 8> &Scopes = I->second;
- if (Scopes.empty()) {
- // Returned ID is 0 if this is unbalanced "end of inlined
- // scope". This could happen if optimizer eats dbg intrinsics
- // or "beginning of inlined scope" is not recoginized due to
- // missing location info. In such cases, ignore this region.end.
- return 0;
- }
-
- DbgScope *Scope = Scopes.back(); Scopes.pop_back();
- unsigned ID = MMI->NextLabelID();
- MMI->RecordUsedDbgLabel(ID);
- Scope->setEndLabelID(ID);
-
- if (TimePassesIsEnabled)
- DebugTimer->stopTimer();
-
- return ID;
-}
-
//===----------------------------------------------------------------------===//
// Emit Methods
//===----------------------------------------------------------------------===//
@@ -2470,10 +2375,7 @@ void DwarfDebug::EmitDIE(DIE *Die) {
case dwarf::DW_AT_abstract_origin: {
DIEEntry *E = cast<DIEEntry>(Values[i]);
DIE *Origin = E->getEntry();
- unsigned Addr =
- CompileUnitOffsets[Die->getAbstractCompileUnit()] +
- Origin->getOffset();
-
+ unsigned Addr = Origin->getOffset();
Asm->EmitInt32(Addr);
break;
}
@@ -3002,10 +2904,14 @@ void DwarfDebug::EmitDebugInlineInfo() {
Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("Dwarf Version");
Asm->EmitInt8(TD->getPointerSize()); Asm->EOL("Address Size (in bytes)");
- for (ValueMap<MDNode *, SmallVector<unsigned, 4> >::iterator
- I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
- MDNode *Node = I->first;
- SmallVector<unsigned, 4> &Labels = I->second;
+ for (SmallVector<MDNode *, 4>::iterator I = InlinedSPNodes.begin(),
+ E = InlinedSPNodes.end(); I != E; ++I) {
+
+// for (ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator
+ // I = InlineInfo.begin(), E = InlineInfo.end(); I != E; ++I) {
+ MDNode *Node = *I;
+ ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> >::iterator II = InlineInfo.find(Node);
+ SmallVector<InlineInfoLabels, 4> &Labels = II->second;
DISubprogram SP(Node);
const char *LName = SP.getLinkageName();
const char *Name = SP.getName();
@@ -3019,17 +2925,21 @@ void DwarfDebug::EmitDebugInlineInfo() {
// __asm__ attribute.
if (LName[0] == 1)
LName = &LName[1];
- Asm->EmitString(LName);
+// Asm->EmitString(LName);
+ EmitSectionOffset("string", "section_str",
+ StringPool.idFor(LName), false, true);
+
}
Asm->EOL("MIPS linkage name");
-
- Asm->EmitString(Name); Asm->EOL("Function name");
-
+// Asm->EmitString(Name);
+ EmitSectionOffset("string", "section_str",
+ StringPool.idFor(Name), false, true);
+ Asm->EOL("Function name");
Asm->EmitULEB128Bytes(Labels.size()); Asm->EOL("Inline count");
- for (SmallVector<unsigned, 4>::iterator LI = Labels.begin(),
+ for (SmallVector<InlineInfoLabels, 4>::iterator LI = Labels.begin(),
LE = Labels.end(); LI != LE; ++LI) {
- DIE *SP = ModuleCU->getDieMapSlotFor(Node);
+ DIE *SP = LI->second;
Asm->EmitInt32(SP->getOffset()); Asm->EOL("DIE offset");
if (TD->getPointerSize() == sizeof(int32_t))
@@ -3037,7 +2947,7 @@ void DwarfDebug::EmitDebugInlineInfo() {
else
O << MAI->getData64bitsDirective();
- PrintLabelName("label", *LI); Asm->EOL("low_pc");
+ PrintLabelName("label", LI->first); Asm->EOL("low_pc");
}
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index ddb0a15..646de8f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -30,9 +30,9 @@
namespace llvm {
class CompileUnit;
-class DbgVariable;
-class DbgScope;
class DbgConcreteScope;
+class DbgScope;
+class DbgVariable;
class MachineFrameInfo;
class MachineModuleInfo;
class MCAsmInfo;
@@ -41,7 +41,7 @@ class Timer;
//===----------------------------------------------------------------------===//
/// SrcLineInfo - This class is used to record source line correspondence.
///
-class VISIBILITY_HIDDEN SrcLineInfo {
+class SrcLineInfo {
unsigned Line; // Source line number.
unsigned Column; // Source column.
unsigned SourceID; // Source ID number.
@@ -57,7 +57,7 @@ public:
unsigned getLabelID() const { return LabelID; }
};
-class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
+class DwarfDebug : public Dwarf {
//===--------------------------------------------------------------------===//
// Attributes used to construct specific Dwarf sections.
//
@@ -134,52 +134,52 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
///
bool shouldEmit;
- // FunctionDbgScope - Top level scope for the current function.
+ // CurrentFnDbgScope - Top level scope for the current function.
//
- DbgScope *FunctionDbgScope;
+ DbgScope *CurrentFnDbgScope;
/// DbgScopeMap - Tracks the scopes in the current function.
+ ///
ValueMap<MDNode *, DbgScope *> DbgScopeMap;
+ /// ConcreteScopes - Tracks the concrete scopees in the current function.
+ /// These scopes are also included in DbgScopeMap.
+ ValueMap<MDNode *, DbgScope *> ConcreteScopes;
+
+ /// AbstractScopes - Tracks the abstract scopes a module. These scopes are
+ /// not included DbgScopeMap.
+ ValueMap<MDNode *, DbgScope *> AbstractScopes;
+ SmallVector<DbgScope *, 4>AbstractScopesList;
+
+ /// AbstractVariables - Collection on abstract variables.
+ ValueMap<MDNode *, DbgVariable *> AbstractVariables;
+
+ /// InliendSubprogramDIEs - Collection of subprgram DIEs that are marked
+ /// (at the end of the module) as DW_AT_inline.
+ SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
+
+ /// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs.
+ SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs;
+
/// ScopedGVs - Tracks global variables that are not at file scope.
/// For example void f() { static int b = 42; }
SmallVector<WeakVH, 4> ScopedGVs;
- typedef DenseMap<const MachineInstr *, SmallVector<DbgScope *, 2> >
+ typedef SmallVector<DbgScope *, 2> ScopeVector;
+ typedef DenseMap<const MachineInstr *, ScopeVector>
InsnToDbgScopeMapTy;
- /// DbgScopeBeginMap - Maps instruction with a list DbgScopes it starts.
+ /// DbgScopeBeginMap - Maps instruction with a list of DbgScopes it starts.
InsnToDbgScopeMapTy DbgScopeBeginMap;
/// DbgScopeEndMap - Maps instruction with a list DbgScopes it ends.
InsnToDbgScopeMapTy DbgScopeEndMap;
- /// DbgAbstractScopeMap - Tracks abstract instance scopes in the current
- /// function.
- ValueMap<MDNode *, DbgScope *> DbgAbstractScopeMap;
-
- /// DbgConcreteScopeMap - Tracks concrete instance scopes in the current
- /// function.
- ValueMap<MDNode *,
- SmallVector<DbgScope *, 8> > DbgConcreteScopeMap;
-
/// InlineInfo - Keep track of inlined functions and their location. This
/// information is used to populate debug_inlined section.
- ValueMap<MDNode *, SmallVector<unsigned, 4> > InlineInfo;
-
- /// AbstractInstanceRootMap - Map of abstract instance roots of inlined
- /// functions. These are subroutine entries that contain a DW_AT_inline
- /// attribute.
- DenseMap<const MDNode *, DbgScope *> AbstractInstanceRootMap;
-
- /// AbstractInstanceRootList - List of abstract instance roots of inlined
- /// functions. These are subroutine entries that contain a DW_AT_inline
- /// attribute.
- SmallVector<DbgScope *, 32> AbstractInstanceRootList;
-
- /// LexicalScopeStack - A stack of lexical scopes. The top one is the current
- /// scope.
- SmallVector<DbgScope *, 16> LexicalScopeStack;
+ typedef std::pair<unsigned, DIE *> InlineInfoLabels;
+ ValueMap<MDNode *, SmallVector<InlineInfoLabels, 4> > InlineInfo;
+ SmallVector<MDNode *, 4> InlinedSPNodes;
/// CompileUnitOffsets - A vector of the offsets of the compile units. This is
/// used when calculating the "origin" of a concrete instance of an inlined
@@ -361,10 +361,24 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
///
DIE *CreateDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
- /// getDbgScope - Returns the scope associated with the given descriptor.
- ///
- DbgScope *getOrCreateScope(MDNode *N);
- DbgScope *getDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt);
+ /// getUpdatedDbgScope - Find or create DbgScope assicated with
+ /// the instruction. Initialize scope and update scope hierarchy.
+ DbgScope *getUpdatedDbgScope(MDNode *N, const MachineInstr *MI, MDNode *InlinedAt);
+
+ /// createDbgScope - Create DbgScope for the scope.
+ void createDbgScope(MDNode *Scope, MDNode *InlinedAt);
+
+ DbgScope *getOrCreateAbstractScope(MDNode *N);
+
+ /// findAbstractVariable - Find abstract variable associated with Var.
+ DbgVariable *findAbstractVariable(DIVariable &Var, unsigned FrameIdx,
+ DILocation &Loc);
+
+ DIE *UpdateSubprogramScopeDIE(MDNode *SPNode);
+ DIE *ConstructLexicalScopeDIE(DbgScope *Scope);
+ DIE *ConstructScopeDIE(DbgScope *Scope);
+ DIE *ConstructInlinedScopeDIE(DbgScope *Scope);
+ DIE *ConstructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit);
/// ConstructDbgScope - Construct the components of a scope.
///
@@ -372,15 +386,6 @@ class VISIBILITY_HIDDEN DwarfDebug : public Dwarf {
unsigned ParentStartID, unsigned ParentEndID,
DIE *ParentDie, CompileUnit *Unit);
- /// ConstructFunctionDbgScope - Construct the scope for the subprogram.
- ///
- void ConstructFunctionDbgScope(DbgScope *RootScope,
- bool AbstractScope = false);
-
- /// ConstructDefaultDbgScope - Construct a default scope for the subprogram.
- ///
- void ConstructDefaultDbgScope(MachineFunction *MF);
-
/// EmitInitial - Emit initial Dwarf declarations. This is necessary for cc
/// tools to recognize the object file contains Dwarf information.
void EmitInitial();
@@ -535,22 +540,6 @@ public:
unsigned getOrCreateSourceID(const std::string &DirName,
const std::string &FileName);
- /// RecordRegionStart - Indicate the start of a region.
- unsigned RecordRegionStart(MDNode *N);
-
- /// RecordRegionEnd - Indicate the end of a region.
- unsigned RecordRegionEnd(MDNode *N);
-
- /// RecordVariable - Indicate the declaration of a local variable.
- void RecordVariable(MDNode *N, unsigned FrameIndex);
-
- //// RecordInlinedFnStart - Indicate the start of inlined subroutine.
- unsigned RecordInlinedFnStart(DISubprogram &SP, DICompileUnit CU,
- unsigned Line, unsigned Col);
-
- /// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
- unsigned RecordInlinedFnEnd(DISubprogram &SP);
-
/// ExtractScopeInformation - Scan machine instructions in this function
/// and collect DbgScopes. Return true, if atleast one scope was found.
bool ExtractScopeInformation(MachineFunction *MF);
@@ -558,15 +547,16 @@ public:
/// CollectVariableInfo - Populate DbgScope entries with variables' info.
void CollectVariableInfo();
- /// SetDbgScopeBeginLabels - Update DbgScope begin labels for the scopes that
- /// start with this machine instruction.
- void SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned Label);
-
/// SetDbgScopeEndLabels - Update DbgScope end labels for the scopes that
/// end with this machine instruction.
void SetDbgScopeEndLabels(const MachineInstr *MI, unsigned Label);
-};
+ /// BeginScope - Process beginning of a scope starting at Label.
+ void BeginScope(const MachineInstr *MI, unsigned Label);
+
+ /// EndScope - Prcess end of a scope.
+ void EndScope(const MachineInstr *MI);
+};
} // End of namespace llvm
#endif
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 6c03b55..1c8b8f4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -74,6 +74,25 @@ unsigned DwarfException::SizeOfEncodedValue(unsigned Encoding) {
return 0;
}
+/// CreateLabelDiff - Emit a label and subtract it from the expression we
+/// already have. This is equivalent to emitting "foo - .", but we have to emit
+/// the label for "." directly.
+const MCExpr *DwarfException::CreateLabelDiff(const MCExpr *ExprRef,
+ const char *LabelName,
+ unsigned Index) {
+ SmallString<64> Name;
+ raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
+ << LabelName << Asm->getFunctionNumber()
+ << "_" << Index;
+ MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str());
+ Asm->OutStreamer.EmitLabel(DotSym);
+
+ return MCBinaryExpr::CreateSub(ExprRef,
+ MCSymbolRefExpr::Create(DotSym,
+ Asm->OutContext),
+ Asm->OutContext);
+}
+
/// EmitCIE - Emit a Common Information Entry (CIE). This holds information that
/// is shared among many Frame Description Entries. There is at least one CIE
/// in every non-empty .debug_frame section.
@@ -176,24 +195,10 @@ void DwarfException::EmitCIE(const Function *PersonalityFn, unsigned Index) {
// If there is a personality, we need to indicate the function's location.
if (PersonalityRef) {
- // If the reference to the personality function symbol is not already
- // pc-relative, then we need to subtract our current address from it. Do
- // this by emitting a label and subtracting it from the expression we
- // already have. This is equivalent to emitting "foo - .", but we have to
- // emit the label for "." directly.
- if (!IsPersonalityPCRel) {
- SmallString<64> Name;
- raw_svector_ostream(Name) << MAI->getPrivateGlobalPrefix()
- << "personalityref_addr" << Asm->getFunctionNumber() << "_" << Index;
- MCSymbol *DotSym = Asm->OutContext.GetOrCreateSymbol(Name.str());
- Asm->OutStreamer.EmitLabel(DotSym);
-
- PersonalityRef =
- MCBinaryExpr::CreateSub(PersonalityRef,
- MCSymbolRefExpr::Create(DotSym,Asm->OutContext),
- Asm->OutContext);
- }
-
+ if (!IsPersonalityPCRel)
+ PersonalityRef = CreateLabelDiff(PersonalityRef, "personalityref_addr",
+ Index);
+
O << MAI->getData32bitsDirective();
PersonalityRef->print(O, MAI);
Asm->EOL("Personality");
@@ -232,11 +237,16 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
// corresponding function is static, this should not be externally visible.
if (!TheFunc->hasLocalLinkage())
if (const char *GlobalEHDirective = MAI->getGlobalEHDirective())
- O << GlobalEHDirective << EHFrameInfo.FnName << "\n";
+ O << GlobalEHDirective << EHFrameInfo.FnName << '\n';
// If corresponding function is weak definition, this should be too.
if (TheFunc->isWeakForLinker() && MAI->getWeakDefDirective())
- O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << "\n";
+ O << MAI->getWeakDefDirective() << EHFrameInfo.FnName << '\n';
+
+ // If corresponding function is hidden, this should be too.
+ if (TheFunc->hasHiddenVisibility())
+ if (const char *HiddenDirective = MAI->getHiddenDirective())
+ O << HiddenDirective << EHFrameInfo.FnName << '\n' ;
// If there are no calls then you can't unwind. This may mean we can omit the
// EH Frame, but some environments do not handle weak absolute symbols. If
@@ -457,6 +467,39 @@ ComputeActionsTable(const SmallVectorImpl<const LandingPadInfo*> &LandingPads,
return SizeActions;
}
+/// CallToNoUnwindFunction - Return `true' if this is a call to a function
+/// marked `nounwind'. Return `false' otherwise.
+bool DwarfException::CallToNoUnwindFunction(const MachineInstr *MI) {
+ assert(MI->getDesc().isCall() && "This should be a call instruction!");
+
+ bool MarkedNoUnwind = false;
+ bool SawFunc = false;
+
+ for (unsigned I = 0, E = MI->getNumOperands(); I != E; ++I) {
+ const MachineOperand &MO = MI->getOperand(I);
+
+ if (MO.isGlobal()) {
+ if (Function *F = dyn_cast<Function>(MO.getGlobal())) {
+ if (SawFunc) {
+ // Be conservative. If we have more than one function operand for this
+ // call, then we can't make the assumption that it's the callee and
+ // not a parameter to the call.
+ //
+ // FIXME: Determine if there's a way to say that `F' is the callee or
+ // parameter.
+ MarkedNoUnwind = false;
+ break;
+ }
+
+ MarkedNoUnwind = F->doesNotThrow();
+ SawFunc = true;
+ }
+ }
+ }
+
+ return MarkedNoUnwind;
+}
+
/// ComputeCallSiteTable - Compute the call-site table. The entry for an invoke
/// has a try-range containing the call, a non-zero landing pad, and an
/// appropriate action. The entry for an ordinary call has a try-range
@@ -485,7 +528,9 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
for (MachineBasicBlock::const_iterator MI = I->begin(), E = I->end();
MI != E; ++MI) {
if (!MI->isLabel()) {
- SawPotentiallyThrowing |= MI->getDesc().isCall();
+ if (MI->getDesc().isCall())
+ SawPotentiallyThrowing |= !CallToNoUnwindFunction(MI);
+
continue;
}
@@ -497,7 +542,7 @@ ComputeCallSiteTable(SmallVectorImpl<CallSiteEntry> &CallSites,
SawPotentiallyThrowing = false;
// Beginning of a new try-range?
- RangeMapType::iterator L = PadMap.find(BeginLabel);
+ RangeMapType::const_iterator L = PadMap.find(BeginLabel);
if (L == PadMap.end())
// Nope, it was just some random label.
continue;
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index f6f5025..aff1665 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -25,13 +25,14 @@ namespace llvm {
struct LandingPadInfo;
class MachineModuleInfo;
class MCAsmInfo;
+class MCExpr;
class Timer;
class raw_ostream;
//===----------------------------------------------------------------------===//
/// DwarfException - Emits Dwarf exception handling directives.
///
-class VISIBILITY_HIDDEN DwarfException : public Dwarf {
+class DwarfException : public Dwarf {
struct FunctionEHFrameInfo {
std::string FnName;
unsigned Number;
@@ -155,6 +156,10 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
SmallVectorImpl<ActionEntry> &Actions,
SmallVectorImpl<unsigned> &FirstActions);
+ /// CallToNoUnwindFunction - Return `true' if this is a call to a function
+ /// marked `nounwind'. Return `false' otherwise.
+ bool CallToNoUnwindFunction(const MachineInstr *MI);
+
/// ComputeCallSiteTable - Compute the call-site table. The entry for an
/// invoke has a try-range containing the call, a non-zero landing pad and an
/// appropriate action. The entry for an ordinary call has a try-range
@@ -168,6 +173,11 @@ class VISIBILITY_HIDDEN DwarfException : public Dwarf {
const SmallVectorImpl<unsigned> &FirstActions);
void EmitExceptionTable();
+ /// CreateLabelDiff - Emit a label and subtract it from the expression we
+ /// already have. This is equivalent to emitting "foo - .", but we have to
+ /// emit the label for "." directly.
+ const MCExpr *CreateLabelDiff(const MCExpr *ExprRef, const char *LabelName,
+ unsigned Index);
public:
//===--------------------------------------------------------------------===//
// Main entry points.
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
index 33ebb3b..dedd695 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -29,7 +29,7 @@ namespace llvm {
class TargetData;
class TargetRegisterInfo;
- class VISIBILITY_HIDDEN Dwarf {
+ class Dwarf {
protected:
//===-------------------------------------------------------------==---===//
// Core attributes used by the DWARF printer.
diff --git a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
index 0638d35..63ae653 100644
--- a/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfWriter.cpp
@@ -81,47 +81,20 @@ unsigned DwarfWriter::RecordSourceLine(unsigned Line, unsigned Col,
return DD->RecordSourceLine(Line, Col, Scope);
}
-/// RecordRegionStart - Indicate the start of a region.
-unsigned DwarfWriter::RecordRegionStart(MDNode *N) {
- return DD->RecordRegionStart(N);
-}
-
-/// RecordRegionEnd - Indicate the end of a region.
-unsigned DwarfWriter::RecordRegionEnd(MDNode *N) {
- return DD->RecordRegionEnd(N);
-}
-
/// getRecordSourceLineCount - Count source lines.
unsigned DwarfWriter::getRecordSourceLineCount() {
return DD->getRecordSourceLineCount();
}
-/// RecordVariable - Indicate the declaration of a local variable.
-///
-void DwarfWriter::RecordVariable(MDNode *N, unsigned FrameIndex) {
- DD->RecordVariable(N, FrameIndex);
-}
-
/// ShouldEmitDwarfDebug - Returns true if Dwarf debugging declarations should
/// be emitted.
bool DwarfWriter::ShouldEmitDwarfDebug() const {
return DD && DD->ShouldEmitDwarfDebug();
}
-//// RecordInlinedFnStart
-unsigned DwarfWriter::RecordInlinedFnStart(DISubprogram SP, DICompileUnit CU,
- unsigned Line, unsigned Col) {
- return DD->RecordInlinedFnStart(SP, CU, Line, Col);
-}
-
-/// RecordInlinedFnEnd - Indicate the end of inlined subroutine.
-unsigned DwarfWriter::RecordInlinedFnEnd(DISubprogram SP) {
- return DD->RecordInlinedFnEnd(SP);
-}
-
-void DwarfWriter::SetDbgScopeBeginLabels(const MachineInstr *MI, unsigned L) {
- DD->SetDbgScopeEndLabels(MI, L);
+void DwarfWriter::BeginScope(const MachineInstr *MI, unsigned L) {
+ DD->BeginScope(MI, L);
}
-void DwarfWriter::SetDbgScopeEndLabels(const MachineInstr *MI, unsigned L) {
- DD->SetDbgScopeBeginLabels(MI, L);
+void DwarfWriter::EndScope(const MachineInstr *MI) {
+ DD->EndScope(MI);
}
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index baea964..94bfb72 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -32,6 +32,7 @@
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include <algorithm>
@@ -40,18 +41,38 @@ using namespace llvm;
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
STATISTIC(NumBranchOpts, "Number of branches optimized");
STATISTIC(NumTailMerge , "Number of block tails merged");
-static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
+static cl::opt<cl::boolOrDefault> FlagEnableTailMerge("enable-tail-merge",
cl::init(cl::BOU_UNSET), cl::Hidden);
// Throttle for huge numbers of predecessors (compile speed problems)
static cl::opt<unsigned>
-TailMergeThreshold("tail-merge-threshold",
+TailMergeThreshold("tail-merge-threshold",
cl::desc("Max number of predecessors to consider tail merging"),
cl::init(150), cl::Hidden);
+// Heuristic for tail merging (and, inversely, tail duplication).
+// TODO: This should be replaced with a target query.
+static cl::opt<unsigned>
+TailMergeSize("tail-merge-size",
+ cl::desc("Min number of instructions to consider tail merging"),
+ cl::init(3), cl::Hidden);
+
+namespace {
+ /// BranchFolderPass - Wrap branch folder in a machine function pass.
+ class BranchFolderPass : public MachineFunctionPass,
+ public BranchFolder {
+ public:
+ static char ID;
+ explicit BranchFolderPass(bool defaultEnableTailMerge)
+ : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+ virtual const char *getPassName() const { return "Control Flow Optimizer"; }
+ };
+}
char BranchFolderPass::ID = 0;
-FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
+FunctionPass *llvm::createBranchFoldingPass(bool DefaultEnableTailMerge) {
return new BranchFolderPass(DefaultEnableTailMerge);
}
@@ -63,7 +84,6 @@ bool BranchFolderPass::runOnMachineFunction(MachineFunction &MF) {
}
-
BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
switch (FlagEnableTailMerge) {
case cl::BOU_UNSET: EnableTailMerge = defaultEnableTailMerge; break;
@@ -77,12 +97,12 @@ BranchFolder::BranchFolder(bool defaultEnableTailMerge) {
void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
assert(MBB->pred_empty() && "MBB must be dead!");
DEBUG(errs() << "\nRemoving MBB: " << *MBB);
-
+
MachineFunction *MF = MBB->getParent();
// drop all successors.
while (!MBB->succ_empty())
MBB->removeSuccessor(MBB->succ_end()-1);
-
+
// If there are any labels in the basic block, unregister them from
// MachineModuleInfo.
if (MMI && !MBB->empty()) {
@@ -93,7 +113,7 @@ void BranchFolder::RemoveDeadBlock(MachineBasicBlock *MBB) {
MMI->InvalidateLabel(I->getOperand(0).getImm());
}
}
-
+
// Remove the block.
MF->erase(MBB);
}
@@ -182,6 +202,11 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
MadeChange |= MadeChangeThisIteration;
}
+ // Do tail duplication once after tail merging is done. Otherwise it is
+ // tough to avoid situations where tail duplication and tail merging undo
+ // each other's transformations ad infinitum.
+ MadeChange |= TailDuplicateBlocks(MF);
+
// See if any jump tables have become mergable or dead as the code generator
// did its thing.
MachineJumpTableInfo *JTI = MF.getJumpTableInfo();
@@ -190,7 +215,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
// Figure out how these jump tables should be merged.
std::vector<unsigned> JTMapping;
JTMapping.reserve(JTs.size());
-
+
// We always keep the 0th jump table.
JTMapping.push_back(0);
@@ -202,7 +227,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
else
JTMapping.push_back(JTI->getJumpTableIndex(JTs[i].MBBs));
}
-
+
// If a jump table was merge with another one, walk the function rewriting
// references to jump tables to reference the new JT ID's. Keep track of
// whether we see a jump table idx, if not, we can delete the JT.
@@ -221,7 +246,7 @@ bool BranchFolder::OptimizeFunction(MachineFunction &MF,
JTIsLive.set(NewIdx);
}
}
-
+
// Finally, remove dead jump tables. This happens either because the
// indirect jump was unreachable (and thus deleted) or because the jump
// table was merged with some other one.
@@ -245,7 +270,7 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
unsigned Hash = MI->getOpcode();
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &Op = MI->getOperand(i);
-
+
// Merge in bits from the operand if easy.
unsigned OperandHash = 0;
switch (Op.getType()) {
@@ -267,31 +292,30 @@ static unsigned HashMachineInstr(const MachineInstr *MI) {
break;
default: break;
}
-
+
Hash += ((OperandHash << 3) | Op.getType()) << (i&31);
}
return Hash;
}
/// HashEndOfMBB - Hash the last few instructions in the MBB. For blocks
-/// with no successors, we hash two instructions, because cross-jumping
-/// only saves code when at least two instructions are removed (since a
+/// with no successors, we hash two instructions, because cross-jumping
+/// only saves code when at least two instructions are removed (since a
/// branch must be inserted). For blocks with a successor, one of the
/// two blocks to be tail-merged will end with a branch already, so
/// it gains to cross-jump even for one instruction.
-
static unsigned HashEndOfMBB(const MachineBasicBlock *MBB,
unsigned minCommonTailLength) {
MachineBasicBlock::const_iterator I = MBB->end();
if (I == MBB->begin())
return 0; // Empty MBB.
-
+
--I;
unsigned Hash = HashMachineInstr(I);
-
+
if (I == MBB->begin() || minCommonTailLength == 1)
return Hash; // Single instr MBB.
-
+
--I;
// Hash in the second-to-last instruction.
Hash ^= HashMachineInstr(I) << 2;
@@ -307,11 +331,11 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
MachineBasicBlock::iterator &I2) {
I1 = MBB1->end();
I2 = MBB2->end();
-
+
unsigned TailLen = 0;
while (I1 != MBB1->begin() && I2 != MBB2->begin()) {
--I1; --I2;
- if (!I1->isIdenticalTo(I2) ||
+ if (!I1->isIdenticalTo(I2) ||
// FIXME: This check is dubious. It's used to get around a problem where
// people incorrectly expect inline asm directives to remain in the same
// relative order. This is untenable because normal compiler
@@ -332,11 +356,11 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1,
void BranchFolder::ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest) {
MachineBasicBlock *OldBB = OldInst->getParent();
-
+
// Remove all the old successors of OldBB from the CFG.
while (!OldBB->succ_empty())
OldBB->removeSuccessor(OldBB->succ_begin());
-
+
// Remove all the dead instructions from the end of OldBB.
OldBB->erase(OldInst, OldBB->end());
@@ -361,10 +385,10 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
// Move all the successors of this block to the specified block.
NewMBB->transferSuccessors(&CurMBB);
-
+
// Add an edge from CurMBB to NewMBB for the fall-through.
CurMBB.addSuccessor(NewMBB);
-
+
// Splice the code over.
NewMBB->splice(NewMBB->end(), &CurMBB, BBI1, CurMBB.end());
@@ -375,7 +399,7 @@ MachineBasicBlock *BranchFolder::SplitMBBAt(MachineBasicBlock &CurMBB,
RS->forward(prior(CurMBB.end()));
BitVector RegsLiveAtExit(TRI->getNumRegs());
RS->getRegsUsed(RegsLiveAtExit, false);
- for (unsigned int i=0, e=TRI->getNumRegs(); i!=e; i++)
+ for (unsigned int i = 0, e = TRI->getNumRegs(); i != e; i++)
if (RegsLiveAtExit[i])
NewMBB->addLiveIn(i);
}
@@ -404,8 +428,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
// branches temporarily for tail merging). In the case where CurMBB ends
// with a conditional branch to the next block, optimize by reversing the
// test and conditionally branching to SuccMBB instead.
-
-static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
+static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
const TargetInstrInfo *TII) {
MachineFunction *MF = CurMBB->getParent();
MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB));
@@ -425,24 +448,43 @@ static void FixTail(MachineBasicBlock* CurMBB, MachineBasicBlock *SuccBB,
TII->InsertBranch(*CurMBB, SuccBB, NULL, SmallVector<MachineOperand, 0>());
}
-static bool MergeCompare(const std::pair<unsigned,MachineBasicBlock*> &p,
- const std::pair<unsigned,MachineBasicBlock*> &q) {
- if (p.first < q.first)
- return true;
- else if (p.first > q.first)
- return false;
- else if (p.second->getNumber() < q.second->getNumber())
- return true;
- else if (p.second->getNumber() > q.second->getNumber())
- return false;
- else {
- // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
- // an object with itself.
+bool
+BranchFolder::MergePotentialsElt::operator<(const MergePotentialsElt &o) const {
+ if (getHash() < o.getHash())
+ return true;
+ else if (getHash() > o.getHash())
+ return false;
+ else if (getBlock()->getNumber() < o.getBlock()->getNumber())
+ return true;
+ else if (getBlock()->getNumber() > o.getBlock()->getNumber())
+ return false;
+ else {
+ // _GLIBCXX_DEBUG checks strict weak ordering, which involves comparing
+ // an object with itself.
#ifndef _GLIBCXX_DEBUG
- llvm_unreachable("Predecessor appears twice");
+ llvm_unreachable("Predecessor appears twice");
#endif
- return false;
+ return false;
+ }
+}
+
+/// CountTerminators - Count the number of terminators in the given
+/// block and set I to the position of the first non-terminator, if there
+/// is one, or MBB->end() otherwise.
+static unsigned CountTerminators(MachineBasicBlock *MBB,
+ MachineBasicBlock::iterator &I) {
+ I = MBB->end();
+ unsigned NumTerms = 0;
+ for (;;) {
+ if (I == MBB->begin()) {
+ I = MBB->end();
+ break;
}
+ --I;
+ if (!I->getDesc().isTerminator()) break;
+ ++NumTerms;
+ }
+ return NumTerms;
}
/// ProfitableToMerge - Check if two machine basic blocks have a common tail
@@ -454,21 +496,52 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
unsigned minCommonTailLength,
unsigned &CommonTailLen,
MachineBasicBlock::iterator &I1,
- MachineBasicBlock::iterator &I2) {
+ MachineBasicBlock::iterator &I2,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
CommonTailLen = ComputeCommonTailLength(MBB1, MBB2, I1, I2);
MachineFunction *MF = MBB1->getParent();
- if (CommonTailLen >= minCommonTailLength)
- return true;
-
if (CommonTailLen == 0)
return false;
- // If we are optimizing for code size, 1 instruction in common is enough if
- // we don't have to split a block. At worst we will be replacing a
- // fallthrough into the common tail with a branch, which at worst breaks
- // even with falling through into the duplicated common tail.
- if (MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+ // It's almost always profitable to merge any number of non-terminator
+ // instructions with the block that falls through into the common successor.
+ if (MBB1 == PredBB || MBB2 == PredBB) {
+ MachineBasicBlock::iterator I;
+ unsigned NumTerms = CountTerminators(MBB1 == PredBB ? MBB2 : MBB1, I);
+ if (CommonTailLen > NumTerms)
+ return true;
+ }
+
+ // If one of the blocks can be completely merged and happens to be in
+ // a position where the other could fall through into it, merge any number
+ // of instructions, because it can be done without a branch.
+ // TODO: If the blocks are not adjacent, move one of them so that they are?
+ if (MBB1->isLayoutSuccessor(MBB2) && I2 == MBB2->begin())
+ return true;
+ if (MBB2->isLayoutSuccessor(MBB1) && I1 == MBB1->begin())
+ return true;
+
+ // If both blocks have an unconditional branch temporarily stripped out,
+ // count that as an additional common instruction for the following
+ // heuristics.
+ unsigned EffectiveTailLen = CommonTailLen;
+ if (SuccBB && MBB1 != PredBB && MBB2 != PredBB &&
+ !MBB1->back().getDesc().isBarrier() &&
+ !MBB2->back().getDesc().isBarrier())
+ ++EffectiveTailLen;
+
+ // Check if the common tail is long enough to be worthwhile.
+ if (EffectiveTailLen >= minCommonTailLength)
+ return true;
+
+ // If we are optimizing for code size, 2 instructions in common is enough if
+ // we don't have to split a block. At worst we will be introducing 1 new
+ // branch instruction, which is likely to be smaller than the 2
+ // instructions that would be deleted in the merge.
+ if (EffectiveTailLen >= 2 &&
+ MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
(I1 == MBB1->begin() || I2 == MBB2->begin()))
return true;
@@ -476,40 +549,44 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1,
}
/// ComputeSameTails - Look through all the blocks in MergePotentials that have
-/// hash CurHash (guaranteed to match the last element). Build the vector
+/// hash CurHash (guaranteed to match the last element). Build the vector
/// SameTails of all those that have the (same) largest number of instructions
/// in common of any pair of these blocks. SameTails entries contain an
-/// iterator into MergePotentials (from which the MachineBasicBlock can be
-/// found) and a MachineBasicBlock::iterator into that MBB indicating the
+/// iterator into MergePotentials (from which the MachineBasicBlock can be
+/// found) and a MachineBasicBlock::iterator into that MBB indicating the
/// instruction where the matching code sequence begins.
/// Order of elements in SameTails is the reverse of the order in which
/// those blocks appear in MergePotentials (where they are not necessarily
/// consecutive).
-unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
- unsigned minCommonTailLength) {
+unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
+ unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
unsigned maxCommonTailLength = 0U;
SameTails.clear();
MachineBasicBlock::iterator TrialBBI1, TrialBBI2;
MPIterator HighestMPIter = prior(MergePotentials.end());
for (MPIterator CurMPIter = prior(MergePotentials.end()),
- B = MergePotentials.begin();
- CurMPIter!=B && CurMPIter->first==CurHash;
+ B = MergePotentials.begin();
+ CurMPIter != B && CurMPIter->getHash() == CurHash;
--CurMPIter) {
- for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) {
+ for (MPIterator I = prior(CurMPIter); I->getHash() == CurHash ; --I) {
unsigned CommonTailLen;
- if (ProfitableToMerge(CurMPIter->second, I->second, minCommonTailLength,
- CommonTailLen, TrialBBI1, TrialBBI2)) {
+ if (ProfitableToMerge(CurMPIter->getBlock(), I->getBlock(),
+ minCommonTailLength,
+ CommonTailLen, TrialBBI1, TrialBBI2,
+ SuccBB, PredBB)) {
if (CommonTailLen > maxCommonTailLength) {
SameTails.clear();
maxCommonTailLength = CommonTailLen;
HighestMPIter = CurMPIter;
- SameTails.push_back(std::make_pair(CurMPIter, TrialBBI1));
+ SameTails.push_back(SameTailElt(CurMPIter, TrialBBI1));
}
if (HighestMPIter == CurMPIter &&
CommonTailLen == maxCommonTailLength)
- SameTails.push_back(std::make_pair(I, TrialBBI2));
+ SameTails.push_back(SameTailElt(I, TrialBBI2));
}
- if (I==B)
+ if (I == B)
break;
}
}
@@ -518,21 +595,21 @@ unsigned BranchFolder::ComputeSameTails(unsigned CurHash,
/// RemoveBlocksWithHash - Remove all blocks with hash CurHash from
/// MergePotentials, restoring branches at ends of blocks as appropriate.
-void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
- MachineBasicBlock* SuccBB,
- MachineBasicBlock* PredBB) {
+void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
MPIterator CurMPIter, B;
- for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
- CurMPIter->first==CurHash;
+ for (CurMPIter = prior(MergePotentials.end()), B = MergePotentials.begin();
+ CurMPIter->getHash() == CurHash;
--CurMPIter) {
// Put the unconditional branch back, if we need one.
- MachineBasicBlock *CurMBB = CurMPIter->second;
+ MachineBasicBlock *CurMBB = CurMPIter->getBlock();
if (SuccBB && CurMBB != PredBB)
FixTail(CurMBB, SuccBB, TII);
- if (CurMPIter==B)
+ if (CurMPIter == B)
break;
}
- if (CurMPIter->first!=CurHash)
+ if (CurMPIter->getHash() != CurHash)
CurMPIter++;
MergePotentials.erase(CurMPIter, MergePotentials.end());
}
@@ -541,35 +618,37 @@ void BranchFolder::RemoveBlocksWithHash(unsigned CurHash,
/// only of the common tail. Create a block that does by splitting one.
unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
unsigned maxCommonTailLength) {
- unsigned i, commonTailIndex;
+ unsigned commonTailIndex = 0;
unsigned TimeEstimate = ~0U;
- for (i=0, commonTailIndex=0; i<SameTails.size(); i++) {
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
// Use PredBB if possible; that doesn't require a new branch.
- if (SameTails[i].first->second==PredBB) {
+ if (SameTails[i].getBlock() == PredBB) {
commonTailIndex = i;
break;
}
// Otherwise, make a (fairly bogus) choice based on estimate of
// how long it will take the various blocks to execute.
- unsigned t = EstimateRuntime(SameTails[i].first->second->begin(),
- SameTails[i].second);
- if (t<=TimeEstimate) {
+ unsigned t = EstimateRuntime(SameTails[i].getBlock()->begin(),
+ SameTails[i].getTailStartPos());
+ if (t <= TimeEstimate) {
TimeEstimate = t;
commonTailIndex = i;
}
}
- MachineBasicBlock::iterator BBI = SameTails[commonTailIndex].second;
- MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
+ MachineBasicBlock::iterator BBI =
+ SameTails[commonTailIndex].getTailStartPos();
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
- DEBUG(errs() << "\nSplitting " << MBB->getNumber() << ", size "
+ DEBUG(errs() << "\nSplitting BB#" << MBB->getNumber() << ", size "
<< maxCommonTailLength);
MachineBasicBlock *newMBB = SplitMBBAt(*MBB, BBI);
- SameTails[commonTailIndex].first->second = newMBB;
- SameTails[commonTailIndex].second = newMBB->begin();
+ SameTails[commonTailIndex].setBlock(newMBB);
+ SameTails[commonTailIndex].setTailStartPos(newMBB->begin());
+
// If we split PredBB, newMBB is the new predecessor.
- if (PredBB==MBB)
+ if (PredBB == MBB)
PredBB = newMBB;
return commonTailIndex;
@@ -579,35 +658,49 @@ unsigned BranchFolder::CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
// successor, or all have no successor) can be tail-merged. If there is a
// successor, any blocks in MergePotentials that are not tail-merged and
// are not immediately before Succ must have an unconditional branch to
-// Succ added (but the predecessor/successor lists need no adjustment).
+// Succ added (but the predecessor/successor lists need no adjustment).
// The lone predecessor of Succ that falls through into Succ,
// if any, is given in PredBB.
-bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
- MachineBasicBlock* PredBB) {
+bool BranchFolder::TryTailMergeBlocks(MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB) {
bool MadeChange = false;
- // It doesn't make sense to save a single instruction since tail merging
- // will add a jump.
- // FIXME: Ask the target to provide the threshold?
- unsigned minCommonTailLength = (SuccBB ? 1 : 2) + 1;
-
- DEBUG(errs() << "\nTryMergeBlocks " << MergePotentials.size() << '\n');
+ // Except for the special cases below, tail-merge if there are at least
+ // this many instructions in common.
+ unsigned minCommonTailLength = TailMergeSize;
+
+ DEBUG(errs() << "\nTryTailMergeBlocks: ";
+ for (unsigned i = 0, e = MergePotentials.size(); i != e; ++i)
+ errs() << "BB#" << MergePotentials[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", ");
+ errs() << "\n";
+ if (SuccBB) {
+ errs() << " with successor BB#" << SuccBB->getNumber() << '\n';
+ if (PredBB)
+ errs() << " which has fall-through from BB#"
+ << PredBB->getNumber() << "\n";
+ }
+ errs() << "Looking for common tails of at least "
+ << minCommonTailLength << " instruction"
+ << (minCommonTailLength == 1 ? "" : "s") << '\n';
+ );
// Sort by hash value so that blocks with identical end sequences sort
// together.
- std::stable_sort(MergePotentials.begin(), MergePotentials.end(),MergeCompare);
+ std::stable_sort(MergePotentials.begin(), MergePotentials.end());
// Walk through equivalence sets looking for actual exact matches.
while (MergePotentials.size() > 1) {
- unsigned CurHash = prior(MergePotentials.end())->first;
-
+ unsigned CurHash = MergePotentials.back().getHash();
+
// Build SameTails, identifying the set of blocks with this hash code
// and with the maximum number of instructions in common.
- unsigned maxCommonTailLength = ComputeSameTails(CurHash,
- minCommonTailLength);
+ unsigned maxCommonTailLength = ComputeSameTails(CurHash,
+ minCommonTailLength,
+ SuccBB, PredBB);
- // If we didn't find any pair that has at least minCommonTailLength
+ // If we didn't find any pair that has at least minCommonTailLength
// instructions in common, remove all blocks with this hash code and retry.
if (SameTails.empty()) {
RemoveBlocksWithHash(CurHash, SuccBB, PredBB);
@@ -618,36 +711,58 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
// block, which we can't jump to), we can treat all blocks with this same
// tail at once. Use PredBB if that is one of the possibilities, as that
// will not introduce any extra branches.
- MachineBasicBlock *EntryBB = MergePotentials.begin()->second->
- getParent()->begin();
- unsigned int commonTailIndex, i;
- for (commonTailIndex=SameTails.size(), i=0; i<SameTails.size(); i++) {
- MachineBasicBlock *MBB = SameTails[i].first->second;
- if (MBB->begin() == SameTails[i].second && MBB != EntryBB) {
- commonTailIndex = i;
- if (MBB==PredBB)
+ MachineBasicBlock *EntryBB = MergePotentials.begin()->getBlock()->
+ getParent()->begin();
+ unsigned commonTailIndex = SameTails.size();
+ // If there are two blocks, check to see if one can be made to fall through
+ // into the other.
+ if (SameTails.size() == 2 &&
+ SameTails[0].getBlock()->isLayoutSuccessor(SameTails[1].getBlock()) &&
+ SameTails[1].tailIsWholeBlock())
+ commonTailIndex = 1;
+ else if (SameTails.size() == 2 &&
+ SameTails[1].getBlock()->isLayoutSuccessor(
+ SameTails[0].getBlock()) &&
+ SameTails[0].tailIsWholeBlock())
+ commonTailIndex = 0;
+ else {
+ // Otherwise just pick one, favoring the fall-through predecessor if
+ // there is one.
+ for (unsigned i = 0, e = SameTails.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = SameTails[i].getBlock();
+ if (MBB == EntryBB && SameTails[i].tailIsWholeBlock())
+ continue;
+ if (MBB == PredBB) {
+ commonTailIndex = i;
break;
+ }
+ if (SameTails[i].tailIsWholeBlock())
+ commonTailIndex = i;
}
}
- if (commonTailIndex==SameTails.size()) {
+ if (commonTailIndex == SameTails.size() ||
+ (SameTails[commonTailIndex].getBlock() == PredBB &&
+ !SameTails[commonTailIndex].tailIsWholeBlock())) {
// None of the blocks consist entirely of the common tail.
// Split a block so that one does.
- commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
+ commonTailIndex = CreateCommonTailOnlyBlock(PredBB, maxCommonTailLength);
}
- MachineBasicBlock *MBB = SameTails[commonTailIndex].first->second;
+ MachineBasicBlock *MBB = SameTails[commonTailIndex].getBlock();
// MBB is common tail. Adjust all other BB's to jump to this one.
// Traversal must be forwards so erases work.
- DEBUG(errs() << "\nUsing common tail " << MBB->getNumber() << " for ");
- for (unsigned int i=0; i<SameTails.size(); ++i) {
- if (commonTailIndex==i)
+ DEBUG(errs() << "\nUsing common tail in BB#" << MBB->getNumber()
+ << " for ");
+ for (unsigned int i=0, e = SameTails.size(); i != e; ++i) {
+ if (commonTailIndex == i)
continue;
- DEBUG(errs() << SameTails[i].first->second->getNumber() << ",");
+ DEBUG(errs() << "BB#" << SameTails[i].getBlock()->getNumber()
+ << (i == e-1 ? "" : ", "));
// Hack the end off BB i, making it jump to BB commonTailIndex instead.
- ReplaceTailWithBranchTo(SameTails[i].second, MBB);
+ ReplaceTailWithBranchTo(SameTails[i].getTailStartPos(), MBB);
// BB i is no longer a predecessor of SuccBB; remove it from the worklist.
- MergePotentials.erase(SameTails[i].first);
+ MergePotentials.erase(SameTails[i].getMPIter());
}
DEBUG(errs() << "\n");
// We leave commonTailIndex in the worklist in case there are other blocks
@@ -660,26 +775,27 @@ bool BranchFolder::TryMergeBlocks(MachineBasicBlock *SuccBB,
bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
if (!EnableTailMerge) return false;
-
+
bool MadeChange = false;
// First find blocks with no successors.
MergePotentials.clear();
for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
if (I->succ_empty())
- MergePotentials.push_back(std::make_pair(HashEndOfMBB(I, 2U), I));
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(I, 2U), I));
}
+
// See if we can do any tail merging on those.
if (MergePotentials.size() < TailMergeThreshold &&
MergePotentials.size() >= 2)
- MadeChange |= TryMergeBlocks(NULL, NULL);
+ MadeChange |= TryTailMergeBlocks(NULL, NULL);
// Look at blocks (IBB) with multiple predecessors (PBB).
// We change each predecessor to a canonical form, by
// (1) temporarily removing any unconditional branch from the predecessor
// to IBB, and
// (2) alter conditional branches so they branch to the other block
- // not IBB; this may require adding back an unconditional branch to IBB
+ // not IBB; this may require adding back an unconditional branch to IBB
// later, where there wasn't one coming in. E.g.
// Bcc IBB
// fallthrough to QBB
@@ -693,18 +809,19 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// a compile-time infinite loop repeatedly doing and undoing the same
// transformations.)
- for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
+ for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ I != E; ++I) {
if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
MachineBasicBlock *IBB = I;
MachineBasicBlock *PredBB = prior(I);
MergePotentials.clear();
- for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
+ for (MachineBasicBlock::pred_iterator P = I->pred_begin(),
E2 = I->pred_end();
P != E2; ++P) {
- MachineBasicBlock* PBB = *P;
+ MachineBasicBlock *PBB = *P;
// Skip blocks that loop to themselves, can't tail merge these.
- if (PBB==IBB)
+ if (PBB == IBB)
continue;
// Visit each predecessor only once.
if (!UniquePreds.insert(PBB))
@@ -715,7 +832,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// Failing case: IBB is the target of a cbr, and
// we cannot reverse the branch.
SmallVector<MachineOperand, 4> NewCond(Cond);
- if (!Cond.empty() && TBB==IBB) {
+ if (!Cond.empty() && TBB == IBB) {
if (TII->ReverseBranchCondition(NewCond))
continue;
// This is the QBB case described above
@@ -727,20 +844,20 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// to have a bit in the edge so we didn't have to do all this.
if (IBB->isLandingPad()) {
MachineFunction::iterator IP = PBB; IP++;
- MachineBasicBlock* PredNextBB = NULL;
- if (IP!=MF.end())
+ MachineBasicBlock *PredNextBB = NULL;
+ if (IP != MF.end())
PredNextBB = IP;
- if (TBB==NULL) {
- if (IBB!=PredNextBB) // fallthrough
+ if (TBB == NULL) {
+ if (IBB != PredNextBB) // fallthrough
continue;
} else if (FBB) {
- if (TBB!=IBB && FBB!=IBB) // cbr then ubr
+ if (TBB != IBB && FBB != IBB) // cbr then ubr
continue;
} else if (Cond.empty()) {
- if (TBB!=IBB) // ubr
+ if (TBB != IBB) // ubr
continue;
} else {
- if (TBB!=IBB && IBB!=PredNextBB) // cbr
+ if (TBB != IBB && IBB != PredNextBB) // cbr
continue;
}
}
@@ -749,19 +866,20 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
TII->RemoveBranch(*PBB);
if (!Cond.empty())
// reinsert conditional branch only, for now
- TII->InsertBranch(*PBB, (TBB==IBB) ? FBB : TBB, 0, NewCond);
+ TII->InsertBranch(*PBB, (TBB == IBB) ? FBB : TBB, 0, NewCond);
}
- MergePotentials.push_back(std::make_pair(HashEndOfMBB(PBB, 1U), *P));
+ MergePotentials.push_back(MergePotentialsElt(HashEndOfMBB(PBB, 1U),
+ *P));
}
}
- if (MergePotentials.size() >= 2)
- MadeChange |= TryMergeBlocks(I, PredBB);
- // Reinsert an unconditional branch if needed.
- // The 1 below can occur as a result of removing blocks in TryMergeBlocks.
- PredBB = prior(I); // this may have been changed in TryMergeBlocks
- if (MergePotentials.size()==1 &&
- MergePotentials.begin()->second != PredBB)
- FixTail(MergePotentials.begin()->second, I, TII);
+ if (MergePotentials.size() >= 2)
+ MadeChange |= TryTailMergeBlocks(IBB, PredBB);
+ // Reinsert an unconditional branch if needed.
+ // The 1 below can occur as a result of removing blocks in TryTailMergeBlocks.
+ PredBB = prior(I); // this may have been changed in TryTailMergeBlocks
+ if (MergePotentials.size() == 1 &&
+ MergePotentials.begin()->getBlock() != PredBB)
+ FixTail(MergePotentials.begin()->getBlock(), IBB, TII);
}
}
return MadeChange;
@@ -773,14 +891,14 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
bool MadeChange = false;
-
+
// Make sure blocks are numbered in order
MF.RenumberBlocks();
for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
MachineBasicBlock *MBB = I++;
MadeChange |= OptimizeBlock(MBB);
-
+
// If it is dead, remove it.
if (MBB->pred_empty()) {
RemoveDeadBlock(MBB);
@@ -801,7 +919,7 @@ bool BranchFolder::OptimizeBranches(MachineFunction &MF) {
///
bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
bool BranchUnAnalyzable,
- MachineBasicBlock *TBB,
+ MachineBasicBlock *TBB,
MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond) {
MachineFunction::iterator Fallthrough = CurBB;
@@ -809,14 +927,22 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
// If FallthroughBlock is off the end of the function, it can't fall through.
if (Fallthrough == CurBB->getParent()->end())
return false;
-
+
// If FallthroughBlock isn't a successor of CurBB, no fallthrough is possible.
if (!CurBB->isSuccessor(Fallthrough))
return false;
-
- // If we couldn't analyze the branch, assume it could fall through.
- if (BranchUnAnalyzable) return true;
-
+
+ // If we couldn't analyze the branch, examine the last instruction.
+ // If the block doesn't end in a known control barrier, assume fallthrough
+ // is possible. The isPredicable check is needed because this code can be
+ // called during IfConversion, where an instruction which is normally a
+ // Barrier is predicated and thus no longer an actual control barrier. This
+ // is over-conservative though, because if an instruction isn't actually
+ // predicated we could still treat it like a barrier.
+ if (BranchUnAnalyzable)
+ return CurBB->empty() || !CurBB->back().getDesc().isBarrier() ||
+ CurBB->back().getDesc().isPredicable();
+
// If there is no branch, control always falls through.
if (TBB == 0) return true;
@@ -825,11 +951,11 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB,
if (MachineFunction::iterator(TBB) == Fallthrough ||
MachineFunction::iterator(FBB) == Fallthrough)
return true;
-
- // If it's an unconditional branch to some block not the fall through, it
+
+ // If it's an unconditional branch to some block not the fall through, it
// doesn't fall through.
if (Cond.empty()) return false;
-
+
// Otherwise, if it is conditional and has no explicit false block, it falls
// through.
return FBB == 0;
@@ -853,14 +979,14 @@ bool BranchFolder::CanFallThrough(MachineBasicBlock *CurBB) {
/// fall-through to MBB1 than to fall through into MBB2. This has to return
/// a strict ordering, returning true for both (MBB1,MBB2) and (MBB2,MBB1) will
/// result in infinite loops.
-static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
+static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
MachineBasicBlock *MBB2) {
// Right now, we use a simple heuristic. If MBB2 ends with a call, and
// MBB1 doesn't, we prefer to fall through into MBB1. This allows us to
// optimize branches that branch to either a return block or an assert block
// into a fallthrough to the return.
if (MBB1->empty() || MBB2->empty()) return false;
-
+
// If there is a clear successor ordering we make sure that one block
// will fall through to the next
if (MBB1->isSuccessor(MBB2)) return true;
@@ -871,14 +997,153 @@ static bool IsBetterFallthrough(MachineBasicBlock *MBB1,
return MBB2I->getDesc().isCall() && !MBB1I->getDesc().isCall();
}
+/// TailDuplicateBlocks - Look for small blocks that are unconditionally
+/// branched to and do not fall through. Tail-duplicate their instructions
+/// into their predecessors to eliminate (dynamic) branches.
+bool BranchFolder::TailDuplicateBlocks(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ // Make sure blocks are numbered in order
+ MF.RenumberBlocks();
+
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
+ MachineBasicBlock *MBB = I++;
+
+ // Only duplicate blocks that end with unconditional branches.
+ if (CanFallThrough(MBB))
+ continue;
+
+ MadeChange |= TailDuplicate(MBB, MF);
+
+ // If it is dead, remove it.
+ if (MBB->pred_empty()) {
+ RemoveDeadBlock(MBB);
+ MadeChange = true;
+ ++NumDeadBlocks;
+ }
+ }
+ return MadeChange;
+}
+
+/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
+/// of its predecessors.
+bool BranchFolder::TailDuplicate(MachineBasicBlock *TailBB,
+ MachineFunction &MF) {
+ // Don't try to tail-duplicate single-block loops.
+ if (TailBB->isSuccessor(TailBB))
+ return false;
+
+ // Set the limit on the number of instructions to duplicate, with a default
+ // of one less than the tail-merge threshold. When optimizing for size,
+ // duplicate only one, because one branch instruction can be eliminated to
+ // compensate for the duplication.
+ unsigned MaxDuplicateCount =
+ MF.getFunction()->hasFnAttr(Attribute::OptimizeForSize) ?
+ 1 : TII->TailDuplicationLimit(*TailBB, TailMergeSize - 1);
+
+ // Check the instructions in the block to determine whether tail-duplication
+ // is invalid or unlikely to be profitable.
+ unsigned i = 0;
+ bool HasCall = false;
+ for (MachineBasicBlock::iterator I = TailBB->begin();
+ I != TailBB->end(); ++I, ++i) {
+ // Non-duplicable things shouldn't be tail-duplicated.
+ if (I->getDesc().isNotDuplicable()) return false;
+ // Don't duplicate more than the threshold.
+ if (i == MaxDuplicateCount) return false;
+ // Remember if we saw a call.
+ if (I->getDesc().isCall()) HasCall = true;
+ }
+ // Heuristically, don't tail-duplicate calls if it would expand code size,
+ // as it's less likely to be worth the extra cost.
+ if (i > 1 && HasCall)
+ return false;
+
+ // Iterate through all the unique predecessors and tail-duplicate this
+ // block into them, if possible. Copying the list ahead of time also
+ // avoids trouble with the predecessor list reallocating.
+ bool Changed = false;
+ SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+
+ assert(TailBB != PredBB &&
+ "Single-block loop should have been rejected earlier!");
+ if (PredBB->succ_size() > 1) continue;
+
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true))
+ continue;
+ if (!PredCond.empty())
+ continue;
+ // EH edges are ignored by AnalyzeBranch.
+ if (PredBB->succ_size() != 1)
+ continue;
+ // Don't duplicate into a fall-through predecessor (at least for now).
+ if (PredBB->isLayoutSuccessor(TailBB) && CanFallThrough(PredBB))
+ continue;
+
+ DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
+ << "From Succ: " << *TailBB);
+
+ // Remove PredBB's unconditional branch.
+ TII->RemoveBranch(*PredBB);
+ // Clone the contents of TailBB into PredBB.
+ for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
+ I != E; ++I) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(I);
+ PredBB->insert(PredBB->end(), NewMI);
+ }
+
+ // Update the CFG.
+ PredBB->removeSuccessor(PredBB->succ_begin());
+ assert(PredBB->succ_empty() &&
+ "TailDuplicate called on block with multiple successors!");
+ for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
+ E = TailBB->succ_end(); I != E; ++I)
+ PredBB->addSuccessor(*I);
+
+ Changed = true;
+ }
+
+ // If TailBB was duplicated into all its predecessors except for the prior
+ // block, which falls through unconditionally, move the contents of this
+ // block into the prior block.
+ MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB));
+ MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
+ SmallVector<MachineOperand, 4> PriorCond;
+ bool PriorUnAnalyzable =
+ TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
+ TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 &&
+ !TailBB->hasAddressTaken()) {
+ DEBUG(errs() << "\nMerging into block: " << PrevBB
+ << "From MBB: " << *TailBB);
+ PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end());
+ PrevBB.removeSuccessor(PrevBB.succ_begin());;
+ assert(PrevBB.succ_empty());
+ PrevBB.transferSuccessors(TailBB);
+ Changed = true;
+ }
+
+ return Changed;
+}
+
/// OptimizeBlock - Analyze and optimize control flow related to the specified
/// block. This is never called on the entry block.
bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
bool MadeChange = false;
+ MachineFunction &MF = *MBB->getParent();
+ReoptimizeBlock:
MachineFunction::iterator FallThrough = MBB;
++FallThrough;
-
+
// If this block is empty, make everyone use its fall-through, not the block
// explicitly. Landing pads should not do this since the landing-pad table
// points to this block. Blocks with their addresses taken shouldn't be
@@ -886,8 +1151,8 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
if (MBB->empty() && !MBB->isLandingPad() && !MBB->hasAddressTaken()) {
// Dead block? Leave for cleanup later.
if (MBB->pred_empty()) return MadeChange;
-
- if (FallThrough == MBB->getParent()->end()) {
+
+ if (FallThrough == MF.end()) {
// TODO: Simplify preds to not branch here if possible!
} else {
// Rewrite all predecessors of the old block to go to the fallthrough
@@ -898,8 +1163,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
}
// If MBB was the target of a jump table, update jump tables to go to the
// fallthrough instead.
- MBB->getParent()->getJumpTableInfo()->
- ReplaceMBBInJumpTables(MBB, FallThrough);
+ MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, FallThrough);
MadeChange = true;
}
return MadeChange;
@@ -917,29 +1181,49 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// If the CFG for the prior block has extra edges, remove them.
MadeChange |= PrevBB.CorrectExtraCFGEdges(PriorTBB, PriorFBB,
!PriorCond.empty());
-
+
// If the previous branch is conditional and both conditions go to the same
// destination, remove the branch, replacing it with an unconditional one or
// a fall-through.
if (PriorTBB && PriorTBB == PriorFBB) {
TII->RemoveBranch(PrevBB);
- PriorCond.clear();
+ PriorCond.clear();
if (PriorTBB != MBB)
TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
MadeChange = true;
++NumBranchOpts;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
-
+
+ // If the previous block unconditionally falls through to this block and
+ // this block has no other predecessors, move the contents of this block
+ // into the prior block. This doesn't usually happen when SimplifyCFG
+ // has been used, but it can happen if tail merging splits a fall-through
+ // predecessor of a block.
+ // This has to check PrevBB->succ_size() because EH edges are ignored by
+ // AnalyzeBranch.
+ if (PriorCond.empty() && !PriorTBB && MBB->pred_size() == 1 &&
+ PrevBB.succ_size() == 1 &&
+ !MBB->hasAddressTaken()) {
+ DEBUG(errs() << "\nMerging into block: " << PrevBB
+ << "From MBB: " << *MBB);
+ PrevBB.splice(PrevBB.end(), MBB, MBB->begin(), MBB->end());
+ PrevBB.removeSuccessor(PrevBB.succ_begin());;
+ assert(PrevBB.succ_empty());
+ PrevBB.transferSuccessors(MBB);
+ MadeChange = true;
+ return MadeChange;
+ }
+
// If the previous branch *only* branches to *this* block (conditional or
// not) remove the branch.
if (PriorTBB == MBB && PriorFBB == 0) {
TII->RemoveBranch(PrevBB);
MadeChange = true;
++NumBranchOpts;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
-
+
// If the prior block branches somewhere else on the condition and here if
// the condition is false, remove the uncond second branch.
if (PriorFBB == MBB) {
@@ -947,9 +1231,9 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
TII->InsertBranch(PrevBB, PriorTBB, 0, PriorCond);
MadeChange = true;
++NumBranchOpts;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
-
+
// If the prior block branches here on true and somewhere else on false, and
// if the branch condition is reversible, reverse the branch to create a
// fall-through.
@@ -960,10 +1244,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
TII->InsertBranch(PrevBB, PriorFBB, 0, NewPriorCond);
MadeChange = true;
++NumBranchOpts;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
}
-
+
// If this block has no successors (e.g. it is a return block or ends with
// a call to a no-return function like abort or __cxa_throw) and if the pred
// falls through into this block, and if it would otherwise fall through
@@ -976,13 +1260,13 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
MachineFunction::iterator(PriorTBB) == FallThrough &&
!CanFallThrough(MBB)) {
bool DoTransform = true;
-
+
// We have to be careful that the succs of PredBB aren't both no-successor
// blocks. If neither have successors and if PredBB is the second from
// last block in the function, we'd just keep swapping the two blocks for
// last. Only do the swap if one is clearly better to fall through than
// the other.
- if (FallThrough == --MBB->getParent()->end() &&
+ if (FallThrough == --MF.end() &&
!IsBetterFallthrough(PriorTBB, MBB))
DoTransform = false;
@@ -1000,20 +1284,20 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
if (DoTransform && !MBB->succ_empty() &&
(!CanFallThrough(PriorTBB) || PriorTBB->empty()))
DoTransform = false;
-
-
+
+
if (DoTransform) {
// Reverse the branch so we will fall through on the previous true cond.
SmallVector<MachineOperand, 4> NewPriorCond(PriorCond);
if (!TII->ReverseBranchCondition(NewPriorCond)) {
DEBUG(errs() << "\nMoving MBB: " << *MBB
<< "To make fallthrough to: " << *PriorTBB << "\n");
-
+
TII->RemoveBranch(PrevBB);
TII->InsertBranch(PrevBB, MBB, 0, NewPriorCond);
// Move this block to the end of the function.
- MBB->moveAfter(--MBB->getParent()->end());
+ MBB->moveAfter(--MF.end());
MadeChange = true;
++NumBranchOpts;
return MadeChange;
@@ -1021,7 +1305,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
}
}
}
-
+
// Analyze the branch in the current block.
MachineBasicBlock *CurTBB = 0, *CurFBB = 0;
SmallVector<MachineOperand, 4> CurCond;
@@ -1030,7 +1314,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// If the CFG for the prior block has extra edges, remove them.
MadeChange |= MBB->CorrectExtraCFGEdges(CurTBB, CurFBB, !CurCond.empty());
- // If this is a two-way branch, and the FBB branches to this block, reverse
+ // If this is a two-way branch, and the FBB branches to this block, reverse
// the condition so the single-basic-block loop is faster. Instead of:
// Loop: xxx; jcc Out; jmp Loop
// we want:
@@ -1042,14 +1326,13 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
TII->InsertBranch(*MBB, CurFBB, CurTBB, NewCond);
MadeChange = true;
++NumBranchOpts;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
}
-
-
+
// If this branch is the only thing in its block, see if we can forward
// other blocks across it.
- if (CurTBB && CurCond.empty() && CurFBB == 0 &&
+ if (CurTBB && CurCond.empty() && CurFBB == 0 &&
MBB->begin()->getDesc().isBranch() && CurTBB != MBB &&
!MBB->hasAddressTaken()) {
// This block may contain just an unconditional branch. Because there can
@@ -1068,7 +1351,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
!PrevBB.isSuccessor(MBB)) {
// If the prior block falls through into us, turn it into an
// explicit branch to us to make updates simpler.
- if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
+ if (!PredHasNoFallThrough && PrevBB.isSuccessor(MBB) &&
PriorTBB != MBB && PriorFBB != MBB) {
if (PriorTBB == 0) {
assert(PriorCond.empty() && PriorFBB == 0 &&
@@ -1104,18 +1387,17 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
NewCurFBB, NewCurCond, true);
if (!NewCurUnAnalyzable && NewCurTBB && NewCurTBB == NewCurFBB) {
TII->RemoveBranch(*PMBB);
- NewCurCond.clear();
+ NewCurCond.clear();
TII->InsertBranch(*PMBB, NewCurTBB, 0, NewCurCond);
MadeChange = true;
++NumBranchOpts;
- PMBB->CorrectExtraCFGEdges(NewCurTBB, NewCurFBB, false);
+ PMBB->CorrectExtraCFGEdges(NewCurTBB, 0, false);
}
}
}
// Change any jumptables to go to the new MBB.
- MBB->getParent()->getJumpTableInfo()->
- ReplaceMBBInJumpTables(MBB, CurTBB);
+ MF.getJumpTableInfo()->ReplaceMBBInJumpTables(MBB, CurTBB);
if (DidChange) {
++NumBranchOpts;
MadeChange = true;
@@ -1123,7 +1405,7 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
}
}
}
-
+
// Add the branch back if the block is more than just an uncond branch.
TII->InsertBranch(*MBB, CurTBB, 0, CurCond);
}
@@ -1134,9 +1416,10 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// place to move this block where a fall-through will happen.
if (!CanFallThrough(&PrevBB, PriorUnAnalyzable,
PriorTBB, PriorFBB, PriorCond)) {
+
// Now we know that there was no fall-through into this block, check to
// see if it has a fall-through into its successor.
- bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB,
+ bool CurFallsThru = CanFallThrough(MBB, CurUnAnalyzable, CurTBB, CurFBB,
CurCond);
if (!MBB->isLandingPad()) {
@@ -1147,12 +1430,15 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// Analyze the branch at the end of the pred.
MachineBasicBlock *PredBB = *PI;
MachineFunction::iterator PredFallthrough = PredBB; ++PredFallthrough;
- if (PredBB != MBB && !CanFallThrough(PredBB)
+ MachineBasicBlock *PredTBB, *PredFBB;
+ SmallVector<MachineOperand, 4> PredCond;
+ if (PredBB != MBB && !CanFallThrough(PredBB) &&
+ !TII->AnalyzeBranch(*PredBB, PredTBB, PredFBB, PredCond, true)
&& (!CurFallsThru || !CurTBB || !CurFBB)
&& (!CurFallsThru || MBB->getNumber() >= PredBB->getNumber())) {
// If the current block doesn't fall through, just move it.
// If the current block can fall through and does not end with a
- // conditional branch, we need to append an unconditional jump to
+ // conditional branch, we need to append an unconditional jump to
// the (current) next block. To avoid a possible compile-time
// infinite loop, move blocks only backward in this case.
// Also, if there are already 2 branches here, we cannot add a third;
@@ -1167,11 +1453,11 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
}
MBB->moveAfter(PredBB);
MadeChange = true;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
}
}
-
+
if (!CurFallsThru) {
// Check all successors to see if we can move this block before it.
for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(),
@@ -1179,26 +1465,29 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) {
// Analyze the branch at the end of the block before the succ.
MachineBasicBlock *SuccBB = *SI;
MachineFunction::iterator SuccPrev = SuccBB; --SuccPrev;
- std::vector<MachineOperand> SuccPrevCond;
-
+
// If this block doesn't already fall-through to that successor, and if
// the succ doesn't already have a block that can fall through into it,
// and if the successor isn't an EH destination, we can arrange for the
// fallthrough to happen.
- if (SuccBB != MBB && !CanFallThrough(SuccPrev) &&
+ if (SuccBB != MBB && &*SuccPrev != MBB &&
+ !CanFallThrough(SuccPrev) && !CurUnAnalyzable &&
!SuccBB->isLandingPad()) {
MBB->moveBefore(SuccBB);
MadeChange = true;
- return OptimizeBlock(MBB);
+ goto ReoptimizeBlock;
}
}
-
+
// Okay, there is no really great place to put this block. If, however,
// the block before this one would be a fall-through if this block were
// removed, move this block to the end of the function.
- if (FallThrough != MBB->getParent()->end() &&
+ MachineBasicBlock *PrevTBB, *PrevFBB;
+ SmallVector<MachineOperand, 4> PrevCond;
+ if (FallThrough != MF.end() &&
+ !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) &&
PrevBB.isSuccessor(FallThrough)) {
- MBB->moveAfter(--MBB->getParent()->end());
+ MBB->moveAfter(--MF.end());
MadeChange = true;
return MadeChange;
}
diff --git a/lib/CodeGen/BranchFolding.h b/lib/CodeGen/BranchFolding.h
index 9763e33..4920755 100644
--- a/lib/CodeGen/BranchFolding.h
+++ b/lib/CodeGen/BranchFolding.h
@@ -11,7 +11,6 @@
#define LLVM_CODEGEN_BRANCHFOLDING_HPP
#include "llvm/CodeGen/MachineBasicBlock.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
#include <vector>
namespace llvm {
@@ -20,6 +19,7 @@ namespace llvm {
class RegScavenger;
class TargetInstrInfo;
class TargetRegisterInfo;
+ template<typename T> class SmallVectorImpl;
class BranchFolder {
public:
@@ -30,11 +30,58 @@ namespace llvm {
const TargetRegisterInfo *tri,
MachineModuleInfo *mmi);
private:
- typedef std::pair<unsigned,MachineBasicBlock*> MergePotentialsElt;
+ class MergePotentialsElt {
+ unsigned Hash;
+ MachineBasicBlock *Block;
+ public:
+ MergePotentialsElt(unsigned h, MachineBasicBlock *b)
+ : Hash(h), Block(b) {}
+
+ unsigned getHash() const { return Hash; }
+ MachineBasicBlock *getBlock() const { return Block; }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ Block = MBB;
+ }
+
+ bool operator<(const MergePotentialsElt &) const;
+ };
typedef std::vector<MergePotentialsElt>::iterator MPIterator;
std::vector<MergePotentialsElt> MergePotentials;
- typedef std::pair<MPIterator, MachineBasicBlock::iterator> SameTailElt;
+ class SameTailElt {
+ MPIterator MPIter;
+ MachineBasicBlock::iterator TailStartPos;
+ public:
+ SameTailElt(MPIterator mp, MachineBasicBlock::iterator tsp)
+ : MPIter(mp), TailStartPos(tsp) {}
+
+ MPIterator getMPIter() const {
+ return MPIter;
+ }
+ MergePotentialsElt &getMergePotentialsElt() const {
+ return *getMPIter();
+ }
+ MachineBasicBlock::iterator getTailStartPos() const {
+ return TailStartPos;
+ }
+ unsigned getHash() const {
+ return getMergePotentialsElt().getHash();
+ }
+ MachineBasicBlock *getBlock() const {
+ return getMergePotentialsElt().getBlock();
+ }
+ bool tailIsWholeBlock() const {
+ return TailStartPos == getBlock()->begin();
+ }
+
+ void setBlock(MachineBasicBlock *MBB) {
+ getMergePotentialsElt().setBlock(MBB);
+ }
+ void setTailStartPos(MachineBasicBlock::iterator Pos) {
+ TailStartPos = Pos;
+ }
+ };
std::vector<SameTailElt> SameTails;
bool EnableTailMerge;
@@ -44,18 +91,23 @@ namespace llvm {
RegScavenger *RS;
bool TailMergeBlocks(MachineFunction &MF);
- bool TryMergeBlocks(MachineBasicBlock* SuccBB,
- MachineBasicBlock* PredBB);
+ bool TryTailMergeBlocks(MachineBasicBlock* SuccBB,
+ MachineBasicBlock* PredBB);
void ReplaceTailWithBranchTo(MachineBasicBlock::iterator OldInst,
MachineBasicBlock *NewDest);
MachineBasicBlock *SplitMBBAt(MachineBasicBlock &CurMBB,
MachineBasicBlock::iterator BBI1);
- unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength);
+ unsigned ComputeSameTails(unsigned CurHash, unsigned minCommonTailLength,
+ MachineBasicBlock *SuccBB,
+ MachineBasicBlock *PredBB);
void RemoveBlocksWithHash(unsigned CurHash, MachineBasicBlock* SuccBB,
MachineBasicBlock* PredBB);
unsigned CreateCommonTailOnlyBlock(MachineBasicBlock *&PredBB,
unsigned maxCommonTailLength);
+ bool TailDuplicateBlocks(MachineFunction &MF);
+ bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF);
+
bool OptimizeBranches(MachineFunction &MF);
bool OptimizeBlock(MachineBasicBlock *MBB);
void RemoveDeadBlock(MachineBasicBlock *MBB);
@@ -66,19 +118,6 @@ namespace llvm {
MachineBasicBlock *TBB, MachineBasicBlock *FBB,
const SmallVectorImpl<MachineOperand> &Cond);
};
-
-
- /// BranchFolderPass - Wrap branch folder in a machine function pass.
- class BranchFolderPass : public MachineFunctionPass,
- public BranchFolder {
- public:
- static char ID;
- explicit BranchFolderPass(bool defaultEnableTailMerge)
- : MachineFunctionPass(&ID), BranchFolder(defaultEnableTailMerge) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF);
- virtual const char *getPassName() const { return "Control Flow Optimizer"; }
- };
}
#endif /* LLVM_CODEGEN_BRANCHFOLDING_HPP */
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index 6fff12c..e9844d8 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -56,7 +56,6 @@ namespace {
MachineFunction::iterator InsertPt,
MachineFunction::iterator Begin,
MachineFunction::iterator End);
- void UpdateTerminator(MachineBasicBlock *MBB);
bool EliminateUnconditionalJumpsToTop(MachineFunction &MF,
MachineLoop *L);
bool MoveDiscontiguousLoopBlocks(MachineFunction &MF,
@@ -141,66 +140,9 @@ void CodePlacementOpt::Splice(MachineFunction &MF,
MF.splice(InsertPt, Begin, End);
- UpdateTerminator(prior(Begin));
- UpdateTerminator(OldBeginPrior);
- UpdateTerminator(OldEndPrior);
-}
-
-/// UpdateTerminator - Update the terminator instructions in MBB to account
-/// for changes to the layout. If the block previously used a fallthrough,
-/// it may now need a branch, and if it previously used branching it may now
-/// be able to use a fallthrough.
-///
-void CodePlacementOpt::UpdateTerminator(MachineBasicBlock *MBB) {
- // A block with no successors has no concerns with fall-through edges.
- if (MBB->succ_empty()) return;
-
- MachineBasicBlock *TBB = 0, *FBB = 0;
- SmallVector<MachineOperand, 4> Cond;
- bool B = TII->AnalyzeBranch(*MBB, TBB, FBB, Cond);
- (void) B;
- assert(!B && "UpdateTerminators requires analyzable predecessors!");
- if (Cond.empty()) {
- if (TBB) {
- // The block has an unconditional branch. If its successor is now
- // its layout successor, delete the branch.
- if (MBB->isLayoutSuccessor(TBB))
- TII->RemoveBranch(*MBB);
- } else {
- // The block has an unconditional fallthrough. If its successor is not
- // its layout successor, insert a branch.
- TBB = *MBB->succ_begin();
- if (!MBB->isLayoutSuccessor(TBB))
- TII->InsertBranch(*MBB, TBB, 0, Cond);
- }
- } else {
- if (FBB) {
- // The block has a non-fallthrough conditional branch. If one of its
- // successors is its layout successor, rewrite it to a fallthrough
- // conditional branch.
- if (MBB->isLayoutSuccessor(TBB)) {
- TII->RemoveBranch(*MBB);
- TII->ReverseBranchCondition(Cond);
- TII->InsertBranch(*MBB, FBB, 0, Cond);
- } else if (MBB->isLayoutSuccessor(FBB)) {
- TII->RemoveBranch(*MBB);
- TII->InsertBranch(*MBB, TBB, 0, Cond);
- }
- } else {
- // The block has a fallthrough conditional branch.
- MachineBasicBlock *MBBA = *MBB->succ_begin();
- MachineBasicBlock *MBBB = *next(MBB->succ_begin());
- if (MBBA == TBB) std::swap(MBBB, MBBA);
- if (MBB->isLayoutSuccessor(TBB)) {
- TII->RemoveBranch(*MBB);
- TII->ReverseBranchCondition(Cond);
- TII->InsertBranch(*MBB, MBBA, 0, Cond);
- } else if (!MBB->isLayoutSuccessor(MBBA)) {
- TII->RemoveBranch(*MBB);
- TII->InsertBranch(*MBB, TBB, MBBA, Cond);
- }
- }
- }
+ prior(Begin)->updateTerminator();
+ OldBeginPrior->updateTerminator();
+ OldEndPrior->updateTerminator();
}
/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump
diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp
index 3e3b28a..8a3bd0b 100644
--- a/lib/CodeGen/IntrinsicLowering.cpp
+++ b/lib/CodeGen/IntrinsicLowering.cpp
@@ -515,6 +515,15 @@ void IntrinsicLowering::LowerIntrinsicCall(CallInst *CI) {
if (CI->getType() != Type::getVoidTy(Context))
CI->replaceAllUsesWith(ConstantInt::get(CI->getType(), 1));
break;
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ CI->replaceAllUsesWith(UndefValue::get(CI->getType()));
+ break;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ break;
}
assert(CI->use_empty() &&
diff --git a/lib/CodeGen/LatencyPriorityQueue.cpp b/lib/CodeGen/LatencyPriorityQueue.cpp
index 794ecf7..23dce4a 100644
--- a/lib/CodeGen/LatencyPriorityQueue.cpp
+++ b/lib/CodeGen/LatencyPriorityQueue.cpp
@@ -55,7 +55,10 @@ SUnit *LatencyPriorityQueue::getSingleUnscheduledPred(SUnit *SU) {
SUnit *OnlyAvailablePred = 0;
for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end();
I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
+
SUnit &Pred = *I->getSUnit();
if (!Pred.isScheduled) {
// We found an available, but not scheduled, predecessor. If it's the
@@ -75,7 +78,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
unsigned NumNodesBlocking = 0;
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
+
if (getSingleUnscheduledPred(I->getSUnit()) == SU)
++NumNodesBlocking;
}
@@ -92,7 +98,10 @@ void LatencyPriorityQueue::push_impl(SUnit *SU) {
void LatencyPriorityQueue::ScheduledNode(SUnit *SU) {
for (SUnit::const_succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
+
AdjustPriorityOfUnscheduledPreds(I->getSUnit());
}
}
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 2a93a35..a60d34f 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -53,7 +53,8 @@ static cl::opt<bool> DisableReMat("disable-rematerialization",
static cl::opt<bool> EnableFastSpilling("fast-spill",
cl::init(false), cl::Hidden);
-static cl::opt<bool> EarlyCoalescing("early-coalescing", cl::init(false));
+static cl::opt<bool> EarlyCoalescing("early-coalescing",
+ cl::init(false), cl::Hidden);
static cl::opt<int> CoalescingLimit("early-coalescing-limit",
cl::init(-1), cl::Hidden);
@@ -646,17 +647,17 @@ void LiveIntervals::handleLiveInRegister(MachineBasicBlock *MBB,
0, false, VNInfoAllocator);
vni->setIsPHIDef(true);
LiveRange LR(start, end, vni);
-
+
interval.addRange(LR);
LR.valno->addKill(end);
DEBUG(errs() << " +" << LR << '\n');
}
-bool
-LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt,
- SmallVector<MachineInstr*,16> &IdentCopies,
- SmallVector<MachineInstr*,16> &OtherCopies) {
- bool HaveConflict = false;
+bool LiveIntervals::
+isSafeAndProfitableToCoalesce(LiveInterval &DstInt,
+ LiveInterval &SrcInt,
+ SmallVector<MachineInstr*,16> &IdentCopies,
+ SmallVector<MachineInstr*,16> &OtherCopies) {
unsigned NumIdent = 0;
for (MachineRegisterInfo::def_iterator ri = mri_->def_begin(SrcInt.reg),
re = mri_->def_end(); ri != re; ++ri) {
@@ -665,16 +666,16 @@ LiveIntervals::isProfitableToCoalesce(LiveInterval &DstInt, LiveInterval &SrcInt
if (!tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
return false;
if (SrcReg != DstInt.reg) {
+ // Non-identity copy - we cannot handle overlapping intervals
+ if (DstInt.liveAt(getInstructionIndex(MI)))
+ return false;
OtherCopies.push_back(MI);
- HaveConflict |= DstInt.liveAt(getInstructionIndex(MI));
} else {
IdentCopies.push_back(MI);
++NumIdent;
}
}
- if (!HaveConflict)
- return false; // Let coalescer handle it
return IdentCopies.size() > OtherCopies.size();
}
@@ -701,19 +702,21 @@ void LiveIntervals::performEarlyCoalescing() {
LiveInterval &SrcInt = getInterval(PHISrc);
SmallVector<MachineInstr*, 16> IdentCopies;
SmallVector<MachineInstr*, 16> OtherCopies;
- if (!isProfitableToCoalesce(DstInt, SrcInt, IdentCopies, OtherCopies))
+ if (!isSafeAndProfitableToCoalesce(DstInt, SrcInt,
+ IdentCopies, OtherCopies))
continue;
DEBUG(errs() << "PHI Join: " << *Join);
assert(DstInt.containsOneValue() && "PHI join should have just one val#!");
+ assert(std::distance(mri_->use_begin(PHISrc), mri_->use_end()) == 1 &&
+ "PHI join src should not be used elsewhere");
VNInfo *VNI = DstInt.getValNumInfo(0);
// Change the non-identity copies to directly target the phi destination.
for (unsigned i = 0, e = OtherCopies.size(); i != e; ++i) {
MachineInstr *PHICopy = OtherCopies[i];
- DEBUG(errs() << "Moving: " << *PHICopy);
-
SlotIndex MIIndex = getInstructionIndex(PHICopy);
+ DEBUG(errs() << "Moving: " << MIIndex << ' ' << *PHICopy);
SlotIndex DefIndex = MIIndex.getDefIndex();
LiveRange *SLR = SrcInt.getLiveRangeContaining(DefIndex);
SlotIndex StartIndex = SLR->start;
@@ -724,8 +727,7 @@ void LiveIntervals::performEarlyCoalescing() {
SrcInt.removeValNo(SLR->valno);
DEBUG(errs() << " added range [" << StartIndex << ','
<< EndIndex << "] to reg" << DstInt.reg << '\n');
- if (DstInt.liveAt(StartIndex))
- DstInt.removeRange(StartIndex, EndIndex);
+ assert (!DstInt.liveAt(StartIndex) && "Cannot coalesce when dst live!");
VNInfo *NewVNI = DstInt.getNextValue(DefIndex, PHICopy, true,
VNInfoAllocator);
NewVNI->setHasPHIKill(true);
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 96c655c..16a79bb 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -50,6 +50,14 @@ void LiveVariables::getAnalysisUsage(AnalysisUsage &AU) const {
MachineFunctionPass::getAnalysisUsage(AU);
}
+MachineInstr *
+LiveVariables::VarInfo::findKill(const MachineBasicBlock *MBB) const {
+ for (unsigned i = 0, e = Kills.size(); i != e; ++i)
+ if (Kills[i]->getParent() == MBB)
+ return Kills[i];
+ return NULL;
+}
+
void LiveVariables::VarInfo::dump() const {
errs() << " Alive in blocks: ";
for (SparseBitVector<>::iterator I = AliveBlocks.begin(),
@@ -222,8 +230,9 @@ MachineInstr *LiveVariables::FindLastPartialDef(unsigned Reg,
/// implicit defs to a machine instruction if there was an earlier def of its
/// super-register.
void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
+ MachineInstr *LastDef = PhysRegDef[Reg];
// If there was a previous use or a "full" def all is well.
- if (!PhysRegDef[Reg] && !PhysRegUse[Reg]) {
+ if (!LastDef && !PhysRegUse[Reg]) {
// Otherwise, the last sub-register def implicitly defines this register.
// e.g.
// AH =
@@ -257,6 +266,11 @@ void LiveVariables::HandlePhysRegUse(unsigned Reg, MachineInstr *MI) {
}
}
}
+ else if (LastDef && !PhysRegUse[Reg] &&
+ !LastDef->findRegisterDefOperand(Reg))
+ // Last def defines the super register, add an implicit def of reg.
+ LastDef->addOperand(MachineOperand::CreateReg(Reg,
+ true/*IsDef*/, true/*IsImp*/));
// Remember this use.
PhysRegUse[Reg] = MI;
@@ -641,3 +655,36 @@ void LiveVariables::analyzePHINodes(const MachineFunction& Fn) {
PHIVarInfo[BBI->getOperand(i + 1).getMBB()->getNumber()]
.push_back(BBI->getOperand(i).getReg());
}
+
+/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
+/// variables that are live out of DomBB will be marked as passing live through
+/// BB.
+void LiveVariables::addNewBlock(MachineBasicBlock *BB,
+ MachineBasicBlock *DomBB) {
+ const unsigned NumNew = BB->getNumber();
+ const unsigned NumDom = DomBB->getNumber();
+
+ // Update info for all live variables
+ for (unsigned Reg = TargetRegisterInfo::FirstVirtualRegister,
+ E = MRI->getLastVirtReg()+1; Reg != E; ++Reg) {
+ VarInfo &VI = getVarInfo(Reg);
+
+ // Anything live through DomBB is also live through BB.
+ if (VI.AliveBlocks.test(NumDom)) {
+ VI.AliveBlocks.set(NumNew);
+ continue;
+ }
+
+ // Variables not defined in DomBB cannot be live out.
+ const MachineInstr *Def = MRI->getVRegDef(Reg);
+ if (!Def || Def->getParent() != DomBB)
+ continue;
+
+ // Killed by DomBB?
+ if (VI.findKill(DomBB))
+ continue;
+
+ // This register is defined in DomBB and live out
+ VI.AliveBlocks.set(NumNew);
+ }
+}
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 7fbdb12..cd52825 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -17,6 +17,7 @@
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetInstrDesc.h"
+#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Support/LeakDetector.h"
#include "llvm/Support/raw_ostream.h"
@@ -242,6 +243,58 @@ void MachineBasicBlock::moveAfter(MachineBasicBlock *NewBefore) {
getParent()->splice(++BBI, this);
}
+void MachineBasicBlock::updateTerminator() {
+ const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ // A block with no successors has no concerns with fall-through edges.
+ if (this->succ_empty()) return;
+
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ bool B = TII->AnalyzeBranch(*this, TBB, FBB, Cond);
+ (void) B;
+ assert(!B && "UpdateTerminators requires analyzable predecessors!");
+ if (Cond.empty()) {
+ if (TBB) {
+ // The block has an unconditional branch. If its successor is now
+ // its layout successor, delete the branch.
+ if (isLayoutSuccessor(TBB))
+ TII->RemoveBranch(*this);
+ } else {
+ // The block has an unconditional fallthrough. If its successor is not
+ // its layout successor, insert a branch.
+ TBB = *succ_begin();
+ if (!isLayoutSuccessor(TBB))
+ TII->InsertBranch(*this, TBB, 0, Cond);
+ }
+ } else {
+ if (FBB) {
+ // The block has a non-fallthrough conditional branch. If one of its
+ // successors is its layout successor, rewrite it to a fallthrough
+ // conditional branch.
+ if (isLayoutSuccessor(TBB)) {
+ TII->RemoveBranch(*this);
+ TII->ReverseBranchCondition(Cond);
+ TII->InsertBranch(*this, FBB, 0, Cond);
+ } else if (isLayoutSuccessor(FBB)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, 0, Cond);
+ }
+ } else {
+ // The block has a fallthrough conditional branch.
+ MachineBasicBlock *MBBA = *succ_begin();
+ MachineBasicBlock *MBBB = *next(succ_begin());
+ if (MBBA == TBB) std::swap(MBBB, MBBA);
+ if (isLayoutSuccessor(TBB)) {
+ TII->RemoveBranch(*this);
+ TII->ReverseBranchCondition(Cond);
+ TII->InsertBranch(*this, MBBA, 0, Cond);
+ } else if (!isLayoutSuccessor(MBBA)) {
+ TII->RemoveBranch(*this);
+ TII->InsertBranch(*this, TBB, MBBA, Cond);
+ }
+ }
+ }
+}
void MachineBasicBlock::addSuccessor(MachineBasicBlock *succ) {
Successors.push_back(succ);
@@ -371,10 +424,7 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
MachineBasicBlock::succ_iterator SI = succ_begin();
MachineBasicBlock *OrigDestA = DestA, *OrigDestB = DestB;
while (SI != succ_end()) {
- if (*SI == DestA && DestA == DestB) {
- DestA = DestB = 0;
- ++SI;
- } else if (*SI == DestA) {
+ if (*SI == DestA) {
DestA = 0;
++SI;
} else if (*SI == DestB) {
@@ -397,3 +447,8 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
}
return MadeChange;
}
+
+void llvm::WriteAsOperand(raw_ostream &OS, const MachineBasicBlock *MBB,
+ bool t) {
+ OS << "BB#" << MBB->getNumber();
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index 5a1d9e6..81d1301 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -441,9 +441,10 @@ DebugLocTuple MachineFunction::getDebugLocTuple(DebugLoc DL) const {
/// index with a negative value.
///
int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset,
- bool Immutable) {
+ bool Immutable, bool isSS) {
assert(Size != 0 && "Cannot allocate zero size fixed stack objects!");
- Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable));
+ Objects.insert(Objects.begin(), StackObject(Size, 1, SPOffset, Immutable,
+ isSS));
return -++NumFixedObjects;
}
@@ -529,10 +530,6 @@ void MachineFrameInfo::dump(const MachineFunction &MF) const {
unsigned MachineJumpTableInfo::getJumpTableIndex(
const std::vector<MachineBasicBlock*> &DestBBs) {
assert(!DestBBs.empty() && "Cannot create an empty jump table!");
- for (unsigned i = 0, e = JumpTables.size(); i != e; ++i)
- if (JumpTables[i].MBBs == DestBBs)
- return i;
-
JumpTables.push_back(MachineJumpTableEntry(DestBBs));
return JumpTables.size()-1;
}
@@ -544,14 +541,25 @@ MachineJumpTableInfo::ReplaceMBBInJumpTables(MachineBasicBlock *Old,
MachineBasicBlock *New) {
assert(Old != New && "Not making a change?");
bool MadeChange = false;
- for (size_t i = 0, e = JumpTables.size(); i != e; ++i) {
- MachineJumpTableEntry &JTE = JumpTables[i];
- for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
- if (JTE.MBBs[j] == Old) {
- JTE.MBBs[j] = New;
- MadeChange = true;
- }
- }
+ for (size_t i = 0, e = JumpTables.size(); i != e; ++i)
+ ReplaceMBBInJumpTable(i, Old, New);
+ return MadeChange;
+}
+
+/// ReplaceMBBInJumpTable - If Old is a target of the jump tables, update
+/// the jump table to branch to New instead.
+bool
+MachineJumpTableInfo::ReplaceMBBInJumpTable(unsigned Idx,
+ MachineBasicBlock *Old,
+ MachineBasicBlock *New) {
+ assert(Old != New && "Not making a change?");
+ bool MadeChange = false;
+ MachineJumpTableEntry &JTE = JumpTables[Idx];
+ for (size_t j = 0, e = JTE.MBBs.size(); j != e; ++j)
+ if (JTE.MBBs[j] == Old) {
+ JTE.MBBs[j] = New;
+ MadeChange = true;
+ }
return MadeChange;
}
diff --git a/lib/CodeGen/MachineFunctionAnalysis.cpp b/lib/CodeGen/MachineFunctionAnalysis.cpp
index 56294d9..f5febc5 100644
--- a/lib/CodeGen/MachineFunctionAnalysis.cpp
+++ b/lib/CodeGen/MachineFunctionAnalysis.cpp
@@ -24,7 +24,7 @@ X("Machine Function Analysis", "machine-function-analysis",
char MachineFunctionAnalysis::ID = 0;
-MachineFunctionAnalysis::MachineFunctionAnalysis(TargetMachine &tm,
+MachineFunctionAnalysis::MachineFunctionAnalysis(const TargetMachine &tm,
CodeGenOpt::Level OL) :
FunctionPass(&ID), TM(tm), OptLevel(OL), MF(0) {
}
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index 5744c8a..b250faa 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -189,19 +189,19 @@ bool MachineOperand::isIdenticalTo(const MachineOperand &Other) const {
/// print - Print the specified machine operand.
///
void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
+ // If the instruction is embedded into a basic block, we can find the
+ // target info for the instruction.
+ if (!TM)
+ if (const MachineInstr *MI = getParent())
+ if (const MachineBasicBlock *MBB = MI->getParent())
+ if (const MachineFunction *MF = MBB->getParent())
+ TM = &MF->getTarget();
+
switch (getType()) {
case MachineOperand::MO_Register:
if (getReg() == 0 || TargetRegisterInfo::isVirtualRegister(getReg())) {
OS << "%reg" << getReg();
} else {
- // If the instruction is embedded into a basic block, we can find the
- // target info for the instruction.
- if (TM == 0)
- if (const MachineInstr *MI = getParent())
- if (const MachineBasicBlock *MBB = MI->getParent())
- if (const MachineFunction *MF = MBB->getParent())
- TM = &MF->getTarget();
-
if (TM)
OS << "%" << TM->getRegisterInfo()->get(getReg()).Name;
else
@@ -265,7 +265,8 @@ void MachineOperand::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << "<jt#" << getIndex() << '>';
break;
case MachineOperand::MO_GlobalAddress:
- OS << "<ga:" << ((Value*)getGlobal())->getName();
+ OS << "<ga:";
+ WriteAsOperand(OS, getGlobal(), /*PrintType=*/false);
if (getOffset()) OS << "+" << getOffset();
OS << '>';
break;
@@ -375,7 +376,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) {
/// MachineInstr ctor - This constructor creates a dummy MachineInstr with
/// TID NULL and no operands.
MachineInstr::MachineInstr()
- : TID(0), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+ : TID(0), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
Parent(0), debugLoc(DebugLoc::getUnknownLoc()) {
// Make sure that we get added to a machine basicblock
LeakDetector::addGarbageObject(this);
@@ -395,7 +396,8 @@ void MachineInstr::addImplicitDefUseOperands() {
/// TargetInstrDesc or the numOperands if it is not zero. (for
/// instructions with variable number of operands).
MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
- : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0),
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0),
debugLoc(DebugLoc::getUnknownLoc()) {
if (!NoImp && TID->getImplicitDefs())
for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -413,7 +415,7 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, bool NoImp)
/// MachineInstr ctor - As above, but with a DebugLoc.
MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
bool NoImp)
- : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
Parent(0), debugLoc(dl) {
if (!NoImp && TID->getImplicitDefs())
for (const unsigned *ImpDefs = TID->getImplicitDefs(); *ImpDefs; ++ImpDefs)
@@ -433,7 +435,8 @@ MachineInstr::MachineInstr(const TargetInstrDesc &tid, const DebugLoc dl,
/// basic block.
///
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
- : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0), Parent(0),
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0),
+ MemRefs(0), MemRefsEnd(0), Parent(0),
debugLoc(DebugLoc::getUnknownLoc()) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
if (TID->ImplicitDefs)
@@ -453,7 +456,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const TargetInstrDesc &tid)
///
MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
const TargetInstrDesc &tid)
- : TID(&tid), NumImplicitOps(0), MemRefs(0), MemRefsEnd(0),
+ : TID(&tid), NumImplicitOps(0), AsmPrinterFlags(0), MemRefs(0), MemRefsEnd(0),
Parent(0), debugLoc(dl) {
assert(MBB && "Cannot use inserting ctor with null basic block!");
if (TID->ImplicitDefs)
@@ -472,7 +475,7 @@ MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl,
/// MachineInstr ctor - Copies MachineInstr arg exactly
///
MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI)
- : TID(&MI.getDesc()), NumImplicitOps(0),
+ : TID(&MI.getDesc()), NumImplicitOps(0), AsmPrinterFlags(0),
MemRefs(MI.MemRefs), MemRefsEnd(MI.MemRefsEnd),
Parent(0), debugLoc(MI.getDebugLoc()) {
Operands.reserve(MI.getNumOperands());
@@ -1060,9 +1063,16 @@ void MachineInstr::dump() const {
}
void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
- unsigned StartOp = 0, e = getNumOperands();
+ // We can be a bit tidier if we know the TargetMachine and/or MachineFunction.
+ const MachineFunction *MF = 0;
+ if (const MachineBasicBlock *MBB = getParent()) {
+ MF = MBB->getParent();
+ if (!TM && MF)
+ TM = &MF->getTarget();
+ }
// Print explicitly defined operands on the left of an assignment syntax.
+ unsigned StartOp = 0, e = getNumOperands();
for (; StartOp < e && getOperand(StartOp).isReg() &&
getOperand(StartOp).isDef() &&
!getOperand(StartOp).isImplicit();
@@ -1078,11 +1088,45 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
OS << getDesc().getName();
// Print the rest of the operands.
+ bool OmittedAnyCallClobbers = false;
+ bool FirstOp = true;
for (unsigned i = StartOp, e = getNumOperands(); i != e; ++i) {
- if (i != StartOp)
- OS << ",";
+ const MachineOperand &MO = getOperand(i);
+
+ // Omit call-clobbered registers which aren't used anywhere. This makes
+ // call instructions much less noisy on targets where calls clobber lots
+ // of registers. Don't rely on MO.isDead() because we may be called before
+ // LiveVariables is run, or we may be looking at a non-allocatable reg.
+ if (MF && getDesc().isCall() &&
+ MO.isReg() && MO.isImplicit() && MO.isDef()) {
+ unsigned Reg = MO.getReg();
+ if (Reg != 0 && TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
+ if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) {
+ bool HasAliasLive = false;
+ for (const unsigned *Alias = TM->getRegisterInfo()->getAliasSet(Reg);
+ unsigned AliasReg = *Alias; ++Alias)
+ if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) {
+ HasAliasLive = true;
+ break;
+ }
+ if (!HasAliasLive) {
+ OmittedAnyCallClobbers = true;
+ continue;
+ }
+ }
+ }
+ }
+
+ if (FirstOp) FirstOp = false; else OS << ",";
OS << " ";
- getOperand(i).print(OS, TM);
+ MO.print(OS, TM);
+ }
+
+ // Briefly indicate whether any call clobbers were omitted.
+ if (OmittedAnyCallClobbers) {
+ if (FirstOp) FirstOp = false; else OS << ",";
+ OS << " ...";
}
bool HaveSemi = false;
@@ -1098,12 +1142,11 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
}
}
- if (!debugLoc.isUnknown()) {
+ if (!debugLoc.isUnknown() && MF) {
if (!HaveSemi) OS << ";"; HaveSemi = true;
// TODO: print InlinedAtLoc information
- const MachineFunction *MF = getParent()->getParent();
DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
DICompileUnit CU(DLT.Scope);
if (!CU.isNull())
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index de3ab27..33b6b82 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -22,6 +22,7 @@
#define DEBUG_TYPE "machine-licm"
#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineLoopInfo.h"
#include "llvm/CodeGen/MachineMemOperand.h"
@@ -43,6 +44,7 @@ STATISTIC(NumCSEed, "Number of hoisted machine instructions CSEed");
namespace {
class MachineLICM : public MachineFunctionPass {
+ MachineConstantPool *MCP;
const TargetMachine *TM;
const TargetInstrInfo *TII;
const TargetRegisterInfo *TRI;
@@ -111,6 +113,11 @@ namespace {
/// be hoistable.
MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
+ /// LookForDuplicate - Find an instruction amount PrevMIs that is a
+ /// duplicate of MI. Return this instruction if it's found.
+ const MachineInstr *LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs);
+
/// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
/// the preheader that compute the same value. If it's found, do a RAU on
/// with the definition of the existing instruction rather than hoisting
@@ -153,6 +160,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
DEBUG(errs() << "******** Machine LICM ********\n");
Changed = FirstInLoop = false;
+ MCP = MF.getConstantPool();
TM = &MF.getTarget();
TII = TM->getInstrInfo();
TRI = TM->getRegisterInfo();
@@ -234,9 +242,9 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
// to decide whether the loaded value is actually a constant. If so, we can
// actually use it as a load.
if (!I.isInvariantLoad(AA))
- // FIXME: we should be able to sink loads with no other side effects if
- // there is nothing that can change memory from here until the end of
- // block. This is a trivial form of alias analysis.
+ // FIXME: we should be able to hoist loads with no other side effects if
+ // there are no other instructions which can change memory in this loop.
+ // This is a trivial form of alias analysis.
return false;
}
@@ -432,32 +440,12 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
}
}
-static const MachineInstr *LookForDuplicate(const MachineInstr *MI,
- std::vector<const MachineInstr*> &PrevMIs,
- MachineRegisterInfo *RegInfo) {
- unsigned NumOps = MI->getNumOperands();
+const MachineInstr*
+MachineLICM::LookForDuplicate(const MachineInstr *MI,
+ std::vector<const MachineInstr*> &PrevMIs) {
for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
const MachineInstr *PrevMI = PrevMIs[i];
- unsigned NumOps2 = PrevMI->getNumOperands();
- if (NumOps != NumOps2)
- continue;
- bool IsSame = true;
- for (unsigned j = 0; j != NumOps; ++j) {
- const MachineOperand &MO = MI->getOperand(j);
- if (MO.isReg() && MO.isDef()) {
- if (RegInfo->getRegClass(MO.getReg()) !=
- RegInfo->getRegClass(PrevMI->getOperand(j).getReg())) {
- IsSame = false;
- break;
- }
- continue;
- }
- if (!MO.isIdenticalTo(PrevMI->getOperand(j))) {
- IsSame = false;
- break;
- }
- }
- if (IsSame)
+ if (TII->isIdentical(MI, PrevMI, RegInfo))
return PrevMI;
}
return 0;
@@ -465,18 +453,19 @@ static const MachineInstr *LookForDuplicate(const MachineInstr *MI,
bool MachineLICM::EliminateCSE(MachineInstr *MI,
DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
- if (CI != CSEMap.end()) {
- if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second, RegInfo)) {
- DEBUG(errs() << "CSEing " << *MI << " with " << *Dup);
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDef())
- RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
- }
- MI->eraseFromParent();
- ++NumCSEed;
- return true;
+ if (CI == CSEMap.end())
+ return false;
+
+ if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
+ DEBUG(errs() << "CSEing " << *MI << " with " << *Dup);
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isDef())
+ RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
}
+ MI->eraseFromParent();
+ ++NumCSEed;
+ return true;
}
return false;
}
diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp
index b62803f..4b067a0 100644
--- a/lib/CodeGen/MachineModuleInfo.cpp
+++ b/lib/CodeGen/MachineModuleInfo.cpp
@@ -76,9 +76,7 @@ void MachineModuleInfo::EndFunction() {
FilterEnds.clear();
CallsEHReturn = 0;
CallsUnwindInit = 0;
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
VariableDbgInfo.clear();
-#endif
}
/// AnalyzeModule - Scan the module for global debug information.
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 99812e0..be9f68f 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -175,6 +175,10 @@ FunctionPass *llvm::createMachineVerifierPass(bool allowPhysDoubleDefs) {
return new MachineVerifier(allowPhysDoubleDefs);
}
+void MachineFunction::verify() const {
+ MachineVerifier().runOnMachineFunction(const_cast<MachineFunction&>(*this));
+}
+
bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) {
raw_ostream *OutFile = 0;
if (OutFileName) {
@@ -287,7 +291,18 @@ void MachineVerifier::visitMachineFunctionBefore() {
markReachable(&MF->front());
}
-void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
+// Does iterator point to a and b as the first two elements?
+bool matchPair(MachineBasicBlock::const_succ_iterator i,
+ const MachineBasicBlock *a, const MachineBasicBlock *b) {
+ if (*i == a)
+ return *++i == b;
+ if (*i == b)
+ return *++i == a;
+ return false;
+}
+
+void
+MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
// Start with minimal CFG sanity checks.
@@ -379,8 +394,7 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
} if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/fall-through but doesn't have "
"exactly two CFG successors!", MBB);
- } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == MBBI) ||
- (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == MBBI)) {
+ } else if (!matchPair(MBB->succ_begin(), TBB, MBBI)) {
report("MBB exits via conditional branch/fall-through but the CFG "
"successors don't match the actual successors!", MBB);
}
@@ -400,8 +414,7 @@ void MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB)
if (MBB->succ_size() != 2) {
report("MBB exits via conditional branch/branch but doesn't have "
"exactly two CFG successors!", MBB);
- } else if ((MBB->succ_begin()[0] == TBB && MBB->succ_end()[1] == FBB) ||
- (MBB->succ_begin()[1] == TBB && MBB->succ_end()[0] == FBB)) {
+ } else if (!matchPair(MBB->succ_begin(), TBB, FBB)) {
report("MBB exits via conditional branch/branch but the CFG "
"successors don't match the actual successors!", MBB);
}
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 8071b0a..cd38dd1 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -15,24 +15,32 @@
#define DEBUG_TYPE "phielim"
#include "PHIElimination.h"
-#include "llvm/BasicBlock.h"
-#include "llvm/Instructions.h"
#include "llvm/CodeGen/LiveVariables.h"
#include "llvm/CodeGen/Passes.h"
-#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineDominators.h"
#include "llvm/CodeGen/MachineInstr.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/RegAllocRegistry.h"
+#include "llvm/Function.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/Debug.h"
#include <algorithm>
#include <map>
using namespace llvm;
STATISTIC(NumAtomic, "Number of atomic phis lowered");
+STATISTIC(NumSplits, "Number of critical edges split on demand");
+
+static cl::opt<bool>
+SplitEdges("split-phi-edges",
+ cl::desc("Split critical edges during phi elimination"),
+ cl::init(false), cl::Hidden);
char PHIElimination::ID = 0;
static RegisterPass<PHIElimination>
@@ -40,11 +48,26 @@ X("phi-node-elimination", "Eliminate PHI nodes for register allocation");
const PassInfo *const llvm::PHIEliminationID = &X;
+namespace llvm { FunctionPass *createLocalRegisterAllocator(); }
+
+// Should we run edge splitting?
+static bool shouldSplitEdges() {
+ // Edge splitting breaks the local register allocator. It cannot tolerate
+ // LiveVariables being run.
+ if (RegisterRegAlloc::getDefault() == createLocalRegisterAllocator)
+ return false;
+ return SplitEdges;
+}
+
void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
AU.addPreserved<LiveVariables>();
- AU.addPreservedID(MachineLoopInfoID);
- AU.addPreservedID(MachineDominatorsID);
+ AU.addPreserved<MachineDominatorTree>();
+ if (shouldSplitEdges()) {
+ AU.addRequired<LiveVariables>();
+ } else {
+ AU.setPreservesCFG();
+ AU.addPreservedID(MachineLoopInfoID);
+ }
MachineFunctionPass::getAnalysisUsage(AU);
}
@@ -53,10 +76,16 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
PHIDefs.clear();
PHIKills.clear();
- analyzePHINodes(Fn);
-
bool Changed = false;
+ // Split critical edges to help the coalescer
+ if (shouldSplitEdges())
+ for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
+ Changed |= SplitPHIEdges(Fn, *I);
+
+ // Populate VRegPHIUseCount
+ analyzePHINodes(Fn);
+
// Eliminate PHI instructions by inserting copies into predecessor blocks.
for (MachineFunction::iterator I = Fn.begin(), E = Fn.end(); I != E; ++I)
Changed |= EliminatePHINodes(Fn, *I);
@@ -75,7 +104,6 @@ bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
return Changed;
}
-
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions in
/// predecessor basic blocks.
///
@@ -107,26 +135,28 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi,
return true;
}
-// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg.
-// This needs to be after any def or uses of SrcReg, but before any subsequent
-// point where control flow might jump out of the basic block.
+// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg
+// when following the CFG edge to SuccMBB. This needs to be after any def of
+// SrcReg, but before any subsequent point where control flow might jump out of
+// the basic block.
MachineBasicBlock::iterator
llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
+ MachineBasicBlock &SuccMBB,
unsigned SrcReg) {
// Handle the trivial case trivially.
if (MBB.empty())
return MBB.begin();
- // If this basic block does not contain an invoke, then control flow always
- // reaches the end of it, so place the copy there. The logic below works in
- // this case too, but is more expensive.
- if (!isa<InvokeInst>(MBB.getBasicBlock()->getTerminator()))
+ // Usually, we just want to insert the copy before the first terminator
+ // instruction. However, for the edge going to a landing pad, we must insert
+ // the copy before the call/invoke instruction.
+ if (!SuccMBB.isLandingPad())
return MBB.getFirstTerminator();
- // Discover any definition/uses in this basic block.
+ // Discover any defs/uses in this basic block.
SmallPtrSet<MachineInstr*, 8> DefUsesInMBB;
for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg),
- RE = MRI->reg_end(); RI != RE; ++RI) {
+ RE = MRI->reg_end(); RI != RE; ++RI) {
MachineInstr *DefUseMI = &*RI;
if (DefUseMI->getParent() == &MBB)
DefUsesInMBB.insert(DefUseMI);
@@ -134,14 +164,14 @@ llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
MachineBasicBlock::iterator InsertPoint;
if (DefUsesInMBB.empty()) {
- // No def/uses. Insert the copy at the start of the basic block.
+ // No defs. Insert the copy at the start of the basic block.
InsertPoint = MBB.begin();
} else if (DefUsesInMBB.size() == 1) {
- // Insert the copy immediately after the definition/use.
+ // Insert the copy immediately after the def/use.
InsertPoint = *DefUsesInMBB.begin();
++InsertPoint;
} else {
- // Insert the copy immediately after the last definition/use.
+ // Insert the copy immediately after the last def/use.
InsertPoint = MBB.end();
while (!DefUsesInMBB.count(&*--InsertPoint)) {}
++InsertPoint;
@@ -155,7 +185,7 @@ llvm::PHIElimination::FindCopyInsertPoint(MachineBasicBlock &MBB,
/// under the assuption that it needs to be lowered in a way that supports
/// atomic execution of PHIs. This lowering method is always correct all of the
/// time.
-///
+///
void llvm::PHIElimination::LowerAtomicPHINode(
MachineBasicBlock &MBB,
MachineBasicBlock::iterator AfterPHIsIt) {
@@ -186,7 +216,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
}
// Record PHI def.
- assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?");
+ assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?");
PHIDefs[DestReg] = &MBB;
// Update live variable information if there is any.
@@ -250,92 +280,35 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// basic block.
if (!MBBsInsertedInto.insert(&opBlock))
continue; // If the copy has already been emitted, we're done.
-
+
// Find a safe location to insert the copy, this may be the first terminator
// in the block (or end()).
- MachineBasicBlock::iterator InsertPos = FindCopyInsertPoint(opBlock, SrcReg);
+ MachineBasicBlock::iterator InsertPos =
+ FindCopyInsertPoint(opBlock, MBB, SrcReg);
// Insert the copy.
TII->copyRegToReg(opBlock, InsertPos, IncomingReg, SrcReg, RC, RC);
// Now update live variable information if we have it. Otherwise we're done
if (!LV) continue;
-
+
// We want to be able to insert a kill of the register if this PHI (aka, the
// copy we just inserted) is the last use of the source value. Live
// variable analysis conservatively handles this by saying that the value is
// live until the end of the block the PHI entry lives in. If the value
// really is dead at the PHI copy, there will be no successor blocks which
// have the value live-in.
- //
- // Check to see if the copy is the last use, and if so, update the live
- // variables information so that it knows the copy source instruction kills
- // the incoming value.
- LiveVariables::VarInfo &InRegVI = LV->getVarInfo(SrcReg);
-
- // Loop over all of the successors of the basic block, checking to see if
- // the value is either live in the block, or if it is killed in the block.
+
// Also check to see if this register is in use by another PHI node which
// has not yet been eliminated. If so, it will be killed at an appropriate
// point later.
// Is it used by any PHI instructions in this block?
- bool ValueIsLive = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0;
-
- std::vector<MachineBasicBlock*> OpSuccBlocks;
-
- // Otherwise, scan successors, including the BB the PHI node lives in.
- for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(),
- E = opBlock.succ_end(); SI != E && !ValueIsLive; ++SI) {
- MachineBasicBlock *SuccMBB = *SI;
-
- // Is it alive in this successor?
- unsigned SuccIdx = SuccMBB->getNumber();
- if (InRegVI.AliveBlocks.test(SuccIdx)) {
- ValueIsLive = true;
- break;
- }
-
- OpSuccBlocks.push_back(SuccMBB);
- }
-
- // Check to see if this value is live because there is a use in a successor
- // that kills it.
- if (!ValueIsLive) {
- switch (OpSuccBlocks.size()) {
- case 1: {
- MachineBasicBlock *MBB = OpSuccBlocks[0];
- for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
- if (InRegVI.Kills[i]->getParent() == MBB) {
- ValueIsLive = true;
- break;
- }
- break;
- }
- case 2: {
- MachineBasicBlock *MBB1 = OpSuccBlocks[0], *MBB2 = OpSuccBlocks[1];
- for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
- if (InRegVI.Kills[i]->getParent() == MBB1 ||
- InRegVI.Kills[i]->getParent() == MBB2) {
- ValueIsLive = true;
- break;
- }
- break;
- }
- default:
- std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
- for (unsigned i = 0, e = InRegVI.Kills.size(); i != e; ++i)
- if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
- InRegVI.Kills[i]->getParent())) {
- ValueIsLive = true;
- break;
- }
- }
- }
+ bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(&opBlock, SrcReg)] != 0;
// Okay, if we now know that the value is not live out of the block, we can
// add a kill marker in this block saying that it kills the incoming value!
- if (!ValueIsLive) {
+ if (!ValueIsUsed && !isLiveOut(SrcReg, opBlock, *LV)) {
// In our final twist, we have to decide which instruction kills the
// register. In most cases this is the copy, however, the first
// terminator instruction at the end of the block may also use the value.
@@ -346,7 +319,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
if (Term != opBlock.end()) {
if (Term->readsRegister(SrcReg))
KillInst = Term;
-
+
// Check that no other terminators use values.
#ifndef NDEBUG
for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end();
@@ -357,16 +330,16 @@ void llvm::PHIElimination::LowerAtomicPHINode(
}
#endif
}
-
+
// Finally, mark it killed.
LV->addVirtualRegisterKilled(SrcReg, KillInst);
// This vreg no longer lives all of the way through opBlock.
unsigned opBlockNum = opBlock.getNumber();
- InRegVI.AliveBlocks.reset(opBlockNum);
+ LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum);
}
}
-
+
// Really delete the PHI instruction now!
MF.DeleteMachineInstr(MPhi);
++NumAtomic;
@@ -386,3 +359,134 @@ void llvm::PHIElimination::analyzePHINodes(const MachineFunction& Fn) {
++VRegPHIUseCount[BBVRegPair(BBI->getOperand(i + 1).getMBB(),
BBI->getOperand(i).getReg())];
}
+
+bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
+ MachineBasicBlock &MBB) {
+ if (MBB.empty() || MBB.front().getOpcode() != TargetInstrInfo::PHI)
+ return false; // Quick exit for basic blocks without PHIs.
+ LiveVariables &LV = getAnalysis<LiveVariables>();
+ for (MachineBasicBlock::const_iterator BBI = MBB.begin(), BBE = MBB.end();
+ BBI != BBE && BBI->getOpcode() == TargetInstrInfo::PHI; ++BBI) {
+ for (unsigned i = 1, e = BBI->getNumOperands(); i != e; i += 2) {
+ unsigned Reg = BBI->getOperand(i).getReg();
+ MachineBasicBlock *PreMBB = BBI->getOperand(i+1).getMBB();
+ // We break edges when registers are live out from the predecessor block
+ // (not considering PHI nodes). If the register is live in to this block
+ // anyway, we would gain nothing from splitting.
+ if (isLiveOut(Reg, *PreMBB, LV) && !isLiveIn(Reg, MBB, LV))
+ SplitCriticalEdge(PreMBB, &MBB);
+ }
+ }
+ return true;
+}
+
+bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
+ LiveVariables &LV) {
+ LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
+
+ // Loop over all of the successors of the basic block, checking to see if
+ // the value is either live in the block, or if it is killed in the block.
+ std::vector<MachineBasicBlock*> OpSuccBlocks;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ E = MBB.succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (VI.AliveBlocks.test(SuccIdx))
+ return true;
+ OpSuccBlocks.push_back(SuccMBB);
+ }
+
+ // Check to see if this value is live because there is a use in a successor
+ // that kills it.
+ switch (OpSuccBlocks.size()) {
+ case 1: {
+ MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB)
+ return true;
+ break;
+ }
+ case 2: {
+ MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB1 ||
+ VI.Kills[i]->getParent() == SuccMBB2)
+ return true;
+ break;
+ }
+ default:
+ std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+ VI.Kills[i]->getParent()))
+ return true;
+ }
+ return false;
+}
+
+bool llvm::PHIElimination::isLiveIn(unsigned Reg, const MachineBasicBlock &MBB,
+ LiveVariables &LV) {
+ LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
+
+ if (VI.AliveBlocks.test(MBB.getNumber()))
+ return true;
+
+ // defined in MBB?
+ const MachineInstr *Def = MRI->getVRegDef(Reg);
+ if (Def && Def->getParent() == &MBB)
+ return false;
+
+ // killed in MBB?
+ return VI.findKill(&MBB);
+}
+
+MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
+ MachineBasicBlock *B) {
+ assert(A && B && "Missing MBB end point");
+
+ MachineFunction *MF = A->getParent();
+
+ // We may need to update A's terminator, but we can't do that if AnalyzeBranch
+ // fails. If A uses a jump table, we won't touch it.
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ if (TII->AnalyzeBranch(*A, TBB, FBB, Cond))
+ return NULL;
+
+ ++NumSplits;
+
+ MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
+ MF->push_back(NMBB);
+ DEBUG(errs() << "PHIElimination splitting critical edge:"
+ " BB#" << A->getNumber()
+ << " -- BB#" << NMBB->getNumber()
+ << " -- BB#" << B->getNumber() << '\n');
+
+ A->ReplaceUsesOfBlockWith(B, NMBB);
+ // If A may fall through to B, we may have to insert a branch.
+ if (A->isLayoutSuccessor(B))
+ A->updateTerminator();
+
+ // Insert unconditional "jump B" instruction in NMBB.
+ NMBB->addSuccessor(B);
+ Cond.clear();
+ MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, B, NULL, Cond);
+
+ // Fix PHI nodes in B so they refer to NMBB instead of A
+ for (MachineBasicBlock::iterator i = B->begin(), e = B->end();
+ i != e && i->getOpcode() == TargetInstrInfo::PHI; ++i)
+ for (unsigned ni = 1, ne = i->getNumOperands(); ni != ne; ni += 2)
+ if (i->getOperand(ni+1).getMBB() == A)
+ i->getOperand(ni+1).setMBB(NMBB);
+
+ if (LiveVariables *LV=getAnalysisIfAvailable<LiveVariables>())
+ LV->addNewBlock(NMBB, A);
+
+ if (MachineDominatorTree *MDT=getAnalysisIfAvailable<MachineDominatorTree>())
+ MDT->addNewBlock(NMBB, A);
+
+ return NMBB;
+}
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index 3d02dfd..94716ee 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -89,11 +89,33 @@ namespace llvm {
///
void analyzePHINodes(const MachineFunction& Fn);
- // FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
- // SrcReg. This needs to be after any def or uses of SrcReg, but before
- // any subsequent point where control flow might jump out of the basic
- // block.
+ /// Split critical edges where necessary for good coalescer performance.
+ bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB);
+
+ /// isLiveOut - Determine if Reg is live out from MBB, when not
+ /// considering PHI nodes. This means that Reg is either killed by
+ /// a successor block or passed through one.
+ bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
+ LiveVariables &LV);
+
+ /// isLiveIn - Determine if Reg is live in to MBB, not considering PHI
+ /// source registers. This means that Reg is either killed by MBB or passes
+ /// through it.
+ bool isLiveIn(unsigned Reg, const MachineBasicBlock &MBB,
+ LiveVariables &LV);
+
+ /// SplitCriticalEdge - Split a critical edge from A to B by
+ /// inserting a new MBB. Update branches in A and PHI instructions
+ /// in B. Return the new block.
+ MachineBasicBlock *SplitCriticalEdge(MachineBasicBlock *A,
+ MachineBasicBlock *B);
+
+ /// FindCopyInsertPoint - Find a safe place in MBB to insert a copy from
+ /// SrcReg when following the CFG edge to SuccMBB. This needs to be after
+ /// any def of SrcReg, but before any subsequent point where control flow
+ /// might jump out of the basic block.
MachineBasicBlock::iterator FindCopyInsertPoint(MachineBasicBlock &MBB,
+ MachineBasicBlock &SuccMBB,
unsigned SrcReg);
// SkipPHIsAndLabels - Copies need to be inserted after phi nodes and
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 3ed61a2..5f1f1f3 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -216,13 +216,14 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
// Check for explicit enable/disable of post-ra scheduling.
TargetSubtarget::AntiDepBreakMode AntiDepMode = TargetSubtarget::ANTIDEP_NONE;
+ SmallVector<TargetRegisterClass*, 4> CriticalPathRCs;
if (EnablePostRAScheduler.getPosition() > 0) {
if (!EnablePostRAScheduler)
return false;
} else {
// Check that post-RA scheduling is enabled for this target.
const TargetSubtarget &ST = Fn.getTarget().getSubtarget<TargetSubtarget>();
- if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode))
+ if (!ST.enablePostRAScheduler(OptLevel, AntiDepMode, CriticalPathRCs))
return false;
}
@@ -243,7 +244,7 @@ bool PostRAScheduler::runOnMachineFunction(MachineFunction &Fn) {
(ScheduleHazardRecognizer *)new SimpleHazardRecognizer();
AntiDepBreaker *ADB =
((AntiDepMode == TargetSubtarget::ANTIDEP_ALL) ?
- (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn) :
+ (AntiDepBreaker *)new AggressiveAntiDepBreaker(Fn, CriticalPathRCs) :
((AntiDepMode == TargetSubtarget::ANTIDEP_CRITICAL) ?
(AntiDepBreaker *)new CriticalAntiDepBreaker(Fn) : NULL));
@@ -602,7 +603,9 @@ void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge,
void SchedulePostRATDList::ReleaseSuccessors(SUnit *SU, bool IgnoreAntiDep) {
for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end();
I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
ReleaseSucc(SU, &*I, IgnoreAntiDep);
}
}
@@ -657,7 +660,7 @@ void SchedulePostRATDList::ListScheduleTopDown(
available = true;
for (SUnit::const_pred_iterator I = SUnits[i].Preds.begin(),
E = SUnits[i].Preds.end(); I != E; ++I) {
- if (I->getKind() != SDep::Anti) {
+ if ((I->getKind() != SDep::Anti) && (I->getKind() != SDep::Output)) {
available = false;
} else {
SUnits[i].NumPredsLeft -= 1;
@@ -736,7 +739,9 @@ void SchedulePostRATDList::ListScheduleTopDown(
AntiDepBreaker::AntiDepRegVector AntiDepRegs;
for (SUnit::const_pred_iterator I = FoundSUnit->Preds.begin(),
E = FoundSUnit->Preds.end(); I != E; ++I) {
- if ((I->getKind() == SDep::Anti) && !I->getSUnit()->isScheduled)
+ if (((I->getKind() == SDep::Anti) ||
+ (I->getKind() == SDep::Output)) &&
+ !I->getSUnit()->isScheduled)
AntiDepRegs.push_back(I->getReg());
}
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index cce5ae8..8f62345 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -39,8 +39,10 @@
using namespace llvm;
static cl::opt<int> PreSplitLimit("pre-split-limit", cl::init(-1), cl::Hidden);
-static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1), cl::Hidden);
-static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1), cl::Hidden);
+static cl::opt<int> DeadSplitLimit("dead-split-limit", cl::init(-1),
+ cl::Hidden);
+static cl::opt<int> RestoreFoldLimit("restore-fold-limit", cl::init(-1),
+ cl::Hidden);
STATISTIC(NumSplits, "Number of intervals split");
STATISTIC(NumRemats, "Number of intervals split by rematerialization");
@@ -131,17 +133,14 @@ namespace {
private:
- MachineBasicBlock::iterator
- findNextEmptySlot(MachineBasicBlock*, MachineInstr*,
- SlotIndex&);
MachineBasicBlock::iterator
findSpillPoint(MachineBasicBlock*, MachineInstr*, MachineInstr*,
- SmallPtrSet<MachineInstr*, 4>&, SlotIndex&);
+ SmallPtrSet<MachineInstr*, 4>&);
MachineBasicBlock::iterator
findRestorePoint(MachineBasicBlock*, MachineInstr*, SlotIndex,
- SmallPtrSet<MachineInstr*, 4>&, SlotIndex&);
+ SmallPtrSet<MachineInstr*, 4>&);
int CreateSpillStackSlot(unsigned, const TargetRegisterClass *);
@@ -161,7 +160,6 @@ namespace {
bool Rematerialize(unsigned vreg, VNInfo* ValNo,
MachineInstr* DefMI,
MachineBasicBlock::iterator RestorePt,
- SlotIndex RestoreIdx,
SmallPtrSet<MachineInstr*, 4>& RefsInMBB);
MachineInstr* FoldSpill(unsigned vreg, const TargetRegisterClass* RC,
MachineInstr* DefMI,
@@ -208,24 +206,6 @@ X("pre-alloc-splitting", "Pre-Register Allocation Live Interval Splitting");
const PassInfo *const llvm::PreAllocSplittingID = &X;
-
-/// findNextEmptySlot - Find a gap after the given machine instruction in the
-/// instruction index map. If there isn't one, return end().
-MachineBasicBlock::iterator
-PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI,
- SlotIndex &SpotIndex) {
- MachineBasicBlock::iterator MII = MI;
- if (++MII != MBB->end()) {
- SlotIndex Index =
- LIs->findGapBeforeInstr(LIs->getInstructionIndex(MII));
- if (Index != SlotIndex()) {
- SpotIndex = Index;
- return MII;
- }
- }
- return MBB->end();
-}
-
/// findSpillPoint - Find a gap as far away from the given MI that's suitable
/// for spilling the current live interval. The index must be before any
/// defs and uses of the live interval register in the mbb. Return begin() if
@@ -233,8 +213,7 @@ PreAllocSplitting::findNextEmptySlot(MachineBasicBlock *MBB, MachineInstr *MI,
MachineBasicBlock::iterator
PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
MachineInstr *DefMI,
- SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
- SlotIndex &SpillIndex) {
+ SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
MachineBasicBlock::iterator Pt = MBB->begin();
MachineBasicBlock::iterator MII = MI;
@@ -247,8 +226,6 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
while (MII != EndPt && !RefsInMBB.count(MII)) {
- SlotIndex Index = LIs->getInstructionIndex(MII);
-
// We can't insert the spill between the barrier (a call), and its
// corresponding call frame setup.
if (MII->getOpcode() == TRI->getCallFrameDestroyOpcode()) {
@@ -259,9 +236,8 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
}
}
continue;
- } else if (LIs->hasGapBeforeInstr(Index)) {
+ } else {
Pt = MII;
- SpillIndex = LIs->findGapBeforeInstr(Index, true);
}
if (RefsInMBB.count(MII))
@@ -281,8 +257,7 @@ PreAllocSplitting::findSpillPoint(MachineBasicBlock *MBB, MachineInstr *MI,
MachineBasicBlock::iterator
PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
SlotIndex LastIdx,
- SmallPtrSet<MachineInstr*, 4> &RefsInMBB,
- SlotIndex &RestoreIndex) {
+ SmallPtrSet<MachineInstr*, 4> &RefsInMBB) {
// FIXME: Allow spill to be inserted to the beginning of the mbb. Update mbb
// begin index accordingly.
MachineBasicBlock::iterator Pt = MBB->end();
@@ -306,7 +281,6 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
SlotIndex Index = LIs->getInstructionIndex(MII);
if (Index > LastIdx)
break;
- SlotIndex Gap = LIs->findGapBeforeInstr(Index);
// We can't insert a restore between the barrier (a call) and its
// corresponding call frame teardown.
@@ -315,9 +289,8 @@ PreAllocSplitting::findRestorePoint(MachineBasicBlock *MBB, MachineInstr *MI,
if (MII == EndPt || RefsInMBB.count(MII)) return Pt;
++MII;
} while (MII->getOpcode() != TRI->getCallFrameDestroyOpcode());
- } else if (Gap != SlotIndex()) {
+ } else {
Pt = MII;
- RestoreIndex = Gap;
}
if (RefsInMBB.count(MII))
@@ -339,7 +312,7 @@ int PreAllocSplitting::CreateSpillStackSlot(unsigned Reg,
if (I != IntervalSSMap.end()) {
SS = I->second;
} else {
- SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+ SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
IntervalSSMap[Reg] = SS;
}
@@ -364,10 +337,10 @@ PreAllocSplitting::IsAvailableInStack(MachineBasicBlock *DefMBB,
if (!DefMBB)
return false;
- DenseMap<unsigned, int>::iterator I = IntervalSSMap.find(Reg);
+ DenseMap<unsigned, int>::const_iterator I = IntervalSSMap.find(Reg);
if (I == IntervalSSMap.end())
return false;
- DenseMap<SlotIndex, SlotIndex>::iterator
+ DenseMap<SlotIndex, SlotIndex>::const_iterator
II = Def2SpillMap.find(DefIndex);
if (II == Def2SpillMap.end())
return false;
@@ -740,7 +713,7 @@ void PreAllocSplitting::ReconstructLiveInterval(LiveInterval* LI) {
DefIdx = DefIdx.getDefIndex();
assert(DI->getOpcode() != TargetInstrInfo::PHI &&
- "Following NewVN isPHIDef flag incorrect. Fix me!");
+ "PHI instr in code during pre-alloc splitting.");
VNInfo* NewVN = LI->getNextValue(DefIdx, 0, true, Alloc);
// If the def is a move, set the copy field.
@@ -896,25 +869,22 @@ void PreAllocSplitting::RenumberValno(VNInfo* VN) {
bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
MachineInstr* DefMI,
MachineBasicBlock::iterator RestorePt,
- SlotIndex RestoreIdx,
SmallPtrSet<MachineInstr*, 4>& RefsInMBB) {
MachineBasicBlock& MBB = *RestorePt->getParent();
MachineBasicBlock::iterator KillPt = BarrierMBB->end();
- SlotIndex KillIdx;
if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
- KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, KillIdx);
+ KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
else
- KillPt = findNextEmptySlot(DefMI->getParent(), DefMI, KillIdx);
+ KillPt = next(MachineBasicBlock::iterator(DefMI));
if (KillPt == DefMI->getParent()->end())
return false;
- TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI);
- LIs->InsertMachineInstrInMaps(prior(RestorePt), RestoreIdx);
+ TII->reMaterialize(MBB, RestorePt, VReg, 0, DefMI, TRI);
+ SlotIndex RematIdx = LIs->InsertMachineInstrInMaps(prior(RestorePt));
ReconstructLiveInterval(CurrLI);
- SlotIndex RematIdx = LIs->getInstructionIndex(prior(RestorePt));
RematIdx = RematIdx.getDefIndex();
RenumberValno(CurrLI->findDefinedVNInfoForRegInt(RematIdx));
@@ -955,7 +925,7 @@ MachineInstr* PreAllocSplitting::FoldSpill(unsigned vreg,
if (I != IntervalSSMap.end()) {
SS = I->second;
} else {
- SS = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+ SS = MFI->CreateSpillStackObject(RC->getSize(), RC->getAlignment());
}
MachineInstr* FMI = TII->foldMemoryOperand(*MBB->getParent(),
@@ -1086,17 +1056,15 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
}
// Find a point to restore the value after the barrier.
- SlotIndex RestoreIndex;
MachineBasicBlock::iterator RestorePt =
- findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB, RestoreIndex);
+ findRestorePoint(BarrierMBB, Barrier, LR->end, RefsInMBB);
if (RestorePt == BarrierMBB->end()) {
DEBUG(errs() << "FAILED (could not find a suitable restore point).\n");
return false;
}
if (DefMI && LIs->isReMaterializable(*LI, ValNo, DefMI))
- if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt,
- RestoreIndex, RefsInMBB)) {
+ if (Rematerialize(LI->reg, ValNo, DefMI, RestorePt, RefsInMBB)) {
DEBUG(errs() << "success (remat).\n");
return true;
}
@@ -1114,7 +1082,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
SpillIndex = LIs->getInstructionIndex(SpillMI);
} else {
MachineBasicBlock::iterator SpillPt =
- findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB, SpillIndex);
+ findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
if (SpillPt == BarrierMBB->begin()) {
DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
return false; // No gap to insert spill.
@@ -1124,10 +1092,10 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
SS = CreateSpillStackSlot(CurrLI->reg, RC);
TII->storeRegToStackSlot(*BarrierMBB, SpillPt, CurrLI->reg, true, SS, RC);
SpillMI = prior(SpillPt);
- LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
+ SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
}
} else if (!IsAvailableInStack(DefMBB, CurrLI->reg, ValNo->def,
- RestoreIndex, SpillIndex, SS)) {
+ LIs->getZeroIndex(), SpillIndex, SS)) {
// If it's already split, just restore the value. There is no need to spill
// the def again.
if (!DefMI) {
@@ -1144,13 +1112,13 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
if (DefMBB == BarrierMBB) {
// Add spill after the def and the last use before the barrier.
SpillPt = findSpillPoint(BarrierMBB, Barrier, DefMI,
- RefsInMBB, SpillIndex);
+ RefsInMBB);
if (SpillPt == DefMBB->begin()) {
DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
return false; // No gap to insert spill.
}
} else {
- SpillPt = findNextEmptySlot(DefMBB, DefMI, SpillIndex);
+ SpillPt = next(MachineBasicBlock::iterator(DefMI));
if (SpillPt == DefMBB->end()) {
DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
return false; // No gap to insert spill.
@@ -1160,7 +1128,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
SS = CreateSpillStackSlot(CurrLI->reg, RC);
TII->storeRegToStackSlot(*DefMBB, SpillPt, CurrLI->reg, false, SS, RC);
SpillMI = prior(SpillPt);
- LIs->InsertMachineInstrInMaps(SpillMI, SpillIndex);
+ SpillIndex = LIs->InsertMachineInstrInMaps(SpillMI);
}
}
@@ -1170,6 +1138,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
// Add restore.
bool FoldedRestore = false;
+ SlotIndex RestoreIndex;
if (MachineInstr* LMI = FoldRestore(CurrLI->reg, RC, Barrier,
BarrierMBB, SS, RefsInMBB)) {
RestorePt = LMI;
@@ -1178,7 +1147,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
} else {
TII->loadRegFromStackSlot(*BarrierMBB, RestorePt, CurrLI->reg, SS, RC);
MachineInstr *LoadMI = prior(RestorePt);
- LIs->InsertMachineInstrInMaps(LoadMI, RestoreIndex);
+ RestoreIndex = LIs->InsertMachineInstrInMaps(LoadMI);
}
// Update spill stack slot live interval.
@@ -1398,7 +1367,7 @@ bool PreAllocSplitting::removeDeadSpills(SmallPtrSet<LiveInterval*, 8>& split) {
// Otherwise, this is a load-store case, so DCE them.
for (SmallPtrSet<MachineInstr*, 4>::iterator UI =
VNUseCount[CurrVN].begin(), UE = VNUseCount[CurrVN].end();
- UI != UI; ++UI) {
+ UI != UE; ++UI) {
LIs->RemoveMachineInstrFromMaps(*UI);
(*UI)->eraseFromParent();
}
diff --git a/lib/CodeGen/ProcessImplicitDefs.cpp b/lib/CodeGen/ProcessImplicitDefs.cpp
index 48567a0..455964b 100644
--- a/lib/CodeGen/ProcessImplicitDefs.cpp
+++ b/lib/CodeGen/ProcessImplicitDefs.cpp
@@ -77,6 +77,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
SmallVector<MachineInstr*, 8> ImpDefMIs;
MachineBasicBlock *Entry = fn.begin();
SmallPtrSet<MachineBasicBlock*,16> Visited;
+ SmallPtrSet<MachineInstr*, 8> ModInsts;
for (df_ext_iterator<MachineBasicBlock*, SmallPtrSet<MachineBasicBlock*,16> >
DFI = df_ext_begin(Entry, Visited), E = df_ext_end(Entry, Visited);
@@ -201,6 +202,8 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
MachineOperand &RMO = UI.getOperand();
MachineInstr *RMI = &*UI;
++UI;
+ if (ModInsts.count(RMI))
+ continue;
MachineBasicBlock *RMBB = RMI->getParent();
if (RMBB == MBB)
continue;
@@ -209,9 +212,14 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
if (tii_->isMoveInstr(*RMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
Reg == SrcReg) {
+ if (RMO.isKill()) {
+ LiveVariables::VarInfo& vi = lv_->getVarInfo(Reg);
+ vi.removeKill(RMI);
+ }
RMI->setDesc(tii_->get(TargetInstrInfo::IMPLICIT_DEF));
for (int j = RMI->getNumOperands() - 1, ee = 0; j > ee; --j)
RMI->RemoveOperand(j);
+ ModInsts.insert(RMI);
continue;
}
@@ -222,6 +230,7 @@ bool ProcessImplicitDefs::runOnMachineFunction(MachineFunction &fn) {
RMO.setIsKill();
}
}
+ ModInsts.clear();
ImpDefRegs.clear();
ImpDefMIs.clear();
}
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 230a20c..8905f75 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -264,7 +264,8 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) {
if ((unsigned)FrameIdx > MaxCSFrameIndex) MaxCSFrameIndex = FrameIdx;
} else {
// Spill it to the stack where we must.
- FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset);
+ FrameIdx = FFI->CreateFixedObject(RC->getSize(), FixedSlot->Offset,
+ true, false);
}
I->setFrameIdx(FrameIdx);
diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp
index 5507646..7fb3e6e 100644
--- a/lib/CodeGen/PseudoSourceValue.cpp
+++ b/lib/CodeGen/PseudoSourceValue.cpp
@@ -43,35 +43,14 @@ static const char *const PSVNames[] = {
// Eventually these should be uniqued on LLVMContext rather than in a managed
// static. For now, we can safely use the global context for the time being to
// squeak by.
-PseudoSourceValue::PseudoSourceValue() :
+PseudoSourceValue::PseudoSourceValue(enum ValueTy Subclass) :
Value(Type::getInt8PtrTy(getGlobalContext()),
- PseudoSourceValueVal) {}
+ Subclass) {}
void PseudoSourceValue::printCustom(raw_ostream &O) const {
O << PSVNames[this - *PSVs];
}
-namespace {
- /// FixedStackPseudoSourceValue - A specialized PseudoSourceValue
- /// for holding FixedStack values, which must include a frame
- /// index.
- class FixedStackPseudoSourceValue : public PseudoSourceValue {
- const int FI;
- public:
- explicit FixedStackPseudoSourceValue(int fi) : FI(fi) {}
-
- virtual bool isConstant(const MachineFrameInfo *MFI) const;
-
- virtual bool isAliased(const MachineFrameInfo *MFI) const;
-
- virtual bool mayAlias(const MachineFrameInfo *) const;
-
- virtual void printCustom(raw_ostream &OS) const {
- OS << "FixedStack" << FI;
- }
- };
-}
-
static ManagedStatic<std::map<int, const PseudoSourceValue *> > FSValues;
const PseudoSourceValue *PseudoSourceValue::getFixedStack(int FI) {
@@ -130,3 +109,7 @@ bool FixedStackPseudoSourceValue::mayAlias(const MachineFrameInfo *MFI) const {
// Spill slots will not alias any LLVM IR value.
return !MFI->isSpillSlotObjectIndex(FI);
}
+
+void FixedStackPseudoSourceValue::printCustom(raw_ostream &OS) const {
+ OS << "FixedStack" << FI;
+}
diff --git a/lib/CodeGen/RegAllocLocal.cpp b/lib/CodeGen/RegAllocLocal.cpp
index 1957c16..7bb020a 100644
--- a/lib/CodeGen/RegAllocLocal.cpp
+++ b/lib/CodeGen/RegAllocLocal.cpp
@@ -261,8 +261,8 @@ int RALocal::getStackSpaceFor(unsigned VirtReg, const TargetRegisterClass *RC) {
return SS; // Already has space allocated?
// Allocate a new stack object for this spill location...
- int FrameIdx = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
- RC->getAlignment(),true);
+ int FrameIdx = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
// Assign the slot...
StackSlotForVirtReg[VirtReg] = FrameIdx;
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 5757e47..c677d34 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -693,6 +693,11 @@ void PBQPRegAlloc::addStackInterval(const LiveInterval *spilled,
}
bool PBQPRegAlloc::mapPBQPToRegAlloc(const PBQP::Solution &solution) {
+
+ // Assert that this is a valid solution to the regalloc problem.
+ assert(solution.getCost() != std::numeric_limits<PBQP::PBQPNum>::infinity() &&
+ "Invalid (infinite cost) solution for PBQP problem.");
+
// Set to true if we have any spills
bool anotherRoundNeeded = false;
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index cf90aba..94680ed 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -100,11 +100,8 @@ void RegScavenger::enterBasicBlock(MachineBasicBlock *mbb) {
CalleeSavedRegs.set(CSRegs[i]);
}
- // RS used within emit{Pro,Epi}logue()
- if (mbb != MBB) {
- MBB = mbb;
- initRegState();
- }
+ MBB = mbb;
+ initRegState();
Tracking = false;
}
diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp
index 1363a92..6b27db2 100644
--- a/lib/CodeGen/ScheduleDAG.cpp
+++ b/lib/CodeGen/ScheduleDAG.cpp
@@ -214,7 +214,10 @@ void SUnit::ComputeDepth(bool IgnoreAntiDep) {
unsigned MaxPredDepth = 0;
for (SUnit::const_pred_iterator I = Cur->Preds.begin(),
E = Cur->Preds.end(); I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
+
SUnit *PredSU = I->getSUnit();
if (PredSU->isDepthCurrent)
MaxPredDepth = std::max(MaxPredDepth,
@@ -248,7 +251,10 @@ void SUnit::ComputeHeight(bool IgnoreAntiDep) {
unsigned MaxSuccHeight = 0;
for (SUnit::const_succ_iterator I = Cur->Succs.begin(),
E = Cur->Succs.end(); I != E; ++I) {
- if (IgnoreAntiDep && (I->getKind() == SDep::Anti)) continue;
+ if (IgnoreAntiDep &&
+ ((I->getKind() == SDep::Anti) || (I->getKind() == SDep::Output)))
+ continue;
+
SUnit *SuccSU = I->getSUnit();
if (SuccSU->isHeightCurrent)
MaxSuccHeight = std::max(MaxSuccHeight,
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index f8b219d..56dd533 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -112,12 +112,13 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
V = getUnderlyingObject(V);
if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V)) {
- MayAlias = PSV->mayAlias(MFI);
// For now, ignore PseudoSourceValues which may alias LLVM IR values
// because the code that uses this function has no way to cope with
// such aliases.
if (PSV->isAliased(MFI))
return 0;
+
+ MayAlias = PSV->mayAlias(MFI);
return V;
}
@@ -127,23 +128,6 @@ static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI,
return 0;
}
-static bool mayUnderlyingObjectForInstrAlias(const MachineInstr *MI,
- const MachineFrameInfo *MFI) {
- if (!MI->hasOneMemOperand() ||
- !(*MI->memoperands_begin())->getValue() ||
- (*MI->memoperands_begin())->isVolatile())
- return true;
-
- const Value *V = (*MI->memoperands_begin())->getValue();
- if (!V)
- return true;
-
- V = getUnderlyingObject(V);
- if (const PseudoSourceValue *PSV = dyn_cast<PseudoSourceValue>(V))
- return PSV->mayAlias(MFI);
- return true;
-}
-
void ScheduleDAGInstrs::StartBlock(MachineBasicBlock *BB) {
if (MachineLoop *ML = MLI.getLoopFor(BB))
if (BB == ML->getLoopLatch()) {
@@ -163,16 +147,15 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// We build scheduling units by walking a block's instruction list from bottom
// to top.
- // Remember where a generic side-effecting instruction is as we procede. If
- // ChainMMO is null, this is assumed to have arbitrary side-effects. If
- // ChainMMO is non-null, then Chain makes only a single memory reference.
- SUnit *Chain = 0;
- MachineMemOperand *ChainMMO = 0;
+ // Remember where a generic side-effecting instruction is as we procede.
+ SUnit *BarrierChain = 0, *AliasChain = 0;
- // Memory references to specific known memory locations are tracked so that
- // they can be given more precise dependencies.
- std::map<const Value *, SUnit *> MemDefs;
- std::map<const Value *, std::vector<SUnit *> > MemUses;
+ // Memory references to specific known memory locations are tracked
+ // so that they can be given more precise dependencies. We track
+ // separately the known memory locations that may alias and those
+ // that are known not to alias
+ std::map<const Value *, SUnit *> AliasMemDefs, NonAliasMemDefs;
+ std::map<const Value *, std::vector<SUnit *> > AliasMemUses, NonAliasMemUses;
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = ForceUnitLatencies();
@@ -347,114 +330,132 @@ void ScheduleDAGInstrs::BuildSchedGraph(AliasAnalysis *AA) {
// produce more precise dependence information.
#define STORE_LOAD_LATENCY 1
unsigned TrueMemOrderLatency = 0;
- if (TID.isCall() || TID.hasUnmodeledSideEffects()) {
- new_chain:
- // This is the conservative case. Add dependencies on all memory
- // references.
- if (Chain)
- Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
- Chain = SU;
+ if (TID.isCall() || TID.hasUnmodeledSideEffects() ||
+ (MI->hasVolatileMemoryRef() &&
+ (!TID.mayLoad() || !MI->isInvariantLoad(AA)))) {
+ // Be conservative with these and add dependencies on all memory
+ // references, even those that are known to not alias.
+ for (std::map<const Value *, SUnit *>::iterator I =
+ NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) {
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
+ for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
+ NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) {
+ for (unsigned i = 0, e = I->second.size(); i != e; ++i)
+ I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
+ }
+ NonAliasMemDefs.clear();
+ NonAliasMemUses.clear();
+ // Add SU to the barrier chain.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ BarrierChain = SU;
+
+ // fall-through
+ new_alias_chain:
+ // Chain all possibly aliasing memory references though SU.
+ if (AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ AliasChain = SU;
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
- PendingLoads.clear();
- for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
- E = MemDefs.end(); I != E; ++I) {
+ for (std::map<const Value *, SUnit *>::iterator I = AliasMemDefs.begin(),
+ E = AliasMemDefs.end(); I != E; ++I) {
I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
- I->second = SU;
}
for (std::map<const Value *, std::vector<SUnit *> >::iterator I =
- MemUses.begin(), E = MemUses.end(); I != E; ++I) {
+ AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) {
for (unsigned i = 0, e = I->second.size(); i != e; ++i)
I->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
- I->second.clear();
- I->second.push_back(SU);
}
- // See if it is known to just have a single memory reference.
- MachineInstr *ChainMI = Chain->getInstr();
- const TargetInstrDesc &ChainTID = ChainMI->getDesc();
- if (!ChainTID.isCall() &&
- !ChainTID.hasUnmodeledSideEffects() &&
- ChainMI->hasOneMemOperand() &&
- !(*ChainMI->memoperands_begin())->isVolatile() &&
- (*ChainMI->memoperands_begin())->getValue())
- // We know that the Chain accesses one specific memory location.
- ChainMMO = *ChainMI->memoperands_begin();
- else
- // Unknown memory accesses. Assume the worst.
- ChainMMO = 0;
+ PendingLoads.clear();
+ AliasMemDefs.clear();
+ AliasMemUses.clear();
} else if (TID.mayStore()) {
bool MayAlias = true;
TrueMemOrderLatency = STORE_LOAD_LATENCY;
if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
// A store to a specific PseudoSourceValue. Add precise dependencies.
- // Handle the def in MemDefs, if there is one.
- std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
- if (I != MemDefs.end()) {
+ // Record the def in MemDefs, first adding a dep if there is
+ // an existing def.
+ std::map<const Value *, SUnit *>::iterator I =
+ ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ std::map<const Value *, SUnit *>::iterator IE =
+ ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE) {
I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
/*isNormalMemory=*/true));
I->second = SU;
} else {
- MemDefs[V] = SU;
+ if (MayAlias)
+ AliasMemDefs[V] = SU;
+ else
+ NonAliasMemDefs[V] = SU;
}
// Handle the uses in MemUses, if there are any.
std::map<const Value *, std::vector<SUnit *> >::iterator J =
- MemUses.find(V);
- if (J != MemUses.end()) {
+ ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V));
+ std::map<const Value *, std::vector<SUnit *> >::iterator JE =
+ ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end());
+ if (J != JE) {
for (unsigned i = 0, e = J->second.size(); i != e; ++i)
J->second[i]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency,
/*Reg=*/0, /*isNormalMemory=*/true));
J->second.clear();
}
if (MayAlias) {
- // Add dependencies from all the PendingLoads, since without
- // memoperands we must assume they alias anything.
+ // Add dependencies from all the PendingLoads, i.e. loads
+ // with no underlying object.
for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k)
PendingLoads[k]->addPred(SDep(SU, SDep::Order, TrueMemOrderLatency));
- // Add a general dependence too, if needed.
- if (Chain)
- Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ // Add dependence on alias chain, if needed.
+ if (AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
}
+ // Add dependence on barrier chain, if needed.
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
} else {
// Treat all other stores conservatively.
- goto new_chain;
+ goto new_alias_chain;
}
} else if (TID.mayLoad()) {
bool MayAlias = true;
TrueMemOrderLatency = 0;
if (MI->isInvariantLoad(AA)) {
// Invariant load, no chain dependencies needed!
- } else if (const Value *V =
- getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
- // A load from a specific PseudoSourceValue. Add precise dependencies.
- std::map<const Value *, SUnit *>::iterator I = MemDefs.find(V);
- if (I != MemDefs.end())
- I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
- /*isNormalMemory=*/true));
- MemUses[V].push_back(SU);
-
- // Add a general dependence too, if needed.
- if (Chain && (!ChainMMO ||
- (ChainMMO->isStore() || ChainMMO->isVolatile())))
- Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
- } else if (MI->hasVolatileMemoryRef()) {
- // Treat volatile loads conservatively. Note that this includes
- // cases where memoperand information is unavailable.
- goto new_chain;
} else {
- // A "MayAlias" load. Depend on the general chain, as well as on
- // all stores. In the absense of MachineMemOperand information,
- // we can't even assume that the load doesn't alias well-behaved
- // memory locations.
- if (Chain)
- Chain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
- for (std::map<const Value *, SUnit *>::iterator I = MemDefs.begin(),
- E = MemDefs.end(); I != E; ++I) {
- SUnit *DefSU = I->second;
- if (mayUnderlyingObjectForInstrAlias(DefSU->getInstr(), MFI))
- DefSU->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ if (const Value *V =
+ getUnderlyingObjectForInstr(MI, MFI, MayAlias)) {
+ // A load from a specific PseudoSourceValue. Add precise dependencies.
+ std::map<const Value *, SUnit *>::iterator I =
+ ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V));
+ std::map<const Value *, SUnit *>::iterator IE =
+ ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end());
+ if (I != IE)
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0, /*Reg=*/0,
+ /*isNormalMemory=*/true));
+ if (MayAlias)
+ AliasMemUses[V].push_back(SU);
+ else
+ NonAliasMemUses[V].push_back(SU);
+ } else {
+ // A load with no underlying object. Depend on all
+ // potentially aliasing stores.
+ for (std::map<const Value *, SUnit *>::iterator I =
+ AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I)
+ I->second->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+
+ PendingLoads.push_back(SU);
+ MayAlias = true;
}
- PendingLoads.push_back(SU);
- }
+
+ // Add dependencies on alias and barrier chains, if needed.
+ if (MayAlias && AliasChain)
+ AliasChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ if (BarrierChain)
+ BarrierChain->addPred(SDep(SU, SDep::Order, /*Latency=*/0));
+ }
}
}
diff --git a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
index fbe40b6..38839c4 100644
--- a/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
+++ b/lib/CodeGen/SelectionDAG/CallingConvLower.cpp
@@ -77,6 +77,21 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl<ISD::InputArg> &Ins,
}
}
+/// CheckReturn - Analyze the return values of a function, returning true if
+/// the return can be performed without sret-demotion, and false otherwise.
+bool CCState::CheckReturn(const SmallVectorImpl<EVT> &OutTys,
+ const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+ CCAssignFn Fn) {
+ // Determine which register each value should be copied into.
+ for (unsigned i = 0, e = OutTys.size(); i != e; ++i) {
+ EVT VT = OutTys[i];
+ ISD::ArgFlagsTy ArgFlags = ArgsFlags[i];
+ if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this))
+ return false;
+ }
+ return true;
+}
+
/// AnalyzeReturn - Analyze the returned values of a return,
/// incorporating info about the result values into this state.
void CCState::AnalyzeReturn(const SmallVectorImpl<ISD::OutputArg> &Outs,
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 5f70cb8..06ffdd6 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -37,7 +37,6 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
-#include <set>
using namespace llvm;
STATISTIC(NodesCombined , "Number of dag nodes combined");
@@ -4443,14 +4442,13 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
SDValue Chain = N->getOperand(0);
SDValue N1 = N->getOperand(1);
SDValue N2 = N->getOperand(2);
- ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
- // never taken branch, fold to chain
- if (N1C && N1C->isNullValue())
- return Chain;
- // unconditional branch
- if (N1C && N1C->getAPIntValue() == 1)
- return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other, Chain, N2);
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
// fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal
// on the target.
if (N1.getOpcode() == ISD::SETCC &&
@@ -4517,22 +4515,18 @@ SDValue DAGCombiner::visitBR_CC(SDNode *N) {
CondCodeSDNode *CC = cast<CondCodeSDNode>(N->getOperand(1));
SDValue CondLHS = N->getOperand(2), CondRHS = N->getOperand(3);
+ // If N is a constant we could fold this into a fallthrough or unconditional
+ // branch. However that doesn't happen very often in normal code, because
+ // Instcombine/SimplifyCFG should have handled the available opportunities.
+ // If we did this folding here, it would be necessary to update the
+ // MachineBasicBlock CFG, which is awkward.
+
// Use SimplifySetCC to simplify SETCC's.
SDValue Simp = SimplifySetCC(TLI.getSetCCResultType(CondLHS.getValueType()),
CondLHS, CondRHS, CC->get(), N->getDebugLoc(),
false);
if (Simp.getNode()) AddToWorkList(Simp.getNode());
- ConstantSDNode *SCCC = dyn_cast_or_null<ConstantSDNode>(Simp.getNode());
-
- // fold br_cc true, dest -> br dest (unconditional branch)
- if (SCCC && !SCCC->isNullValue())
- return DAG.getNode(ISD::BR, N->getDebugLoc(), MVT::Other,
- N->getOperand(0), N->getOperand(4));
- // fold br_cc false, dest -> unconditional fall through
- if (SCCC && SCCC->isNullValue())
- return N->getOperand(0);
-
// fold to a simpler setcc
if (Simp.getNode() && Simp.getOpcode() == ISD::SETCC)
return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other,
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 8e955af..7dbc136 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -43,6 +43,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -324,82 +325,12 @@ bool FastISel::SelectCall(User *I) {
unsigned IID = F->getIntrinsicID();
switch (IID) {
default: break;
- case Intrinsic::dbg_stoppoint: {
- DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
- if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::None))
- setCurDebugLoc(ExtractDebugLocation(*SPI, MF.getDebugLocInfo()));
+ case Intrinsic::dbg_stoppoint:
+ case Intrinsic::dbg_region_start:
+ case Intrinsic::dbg_region_end:
+ case Intrinsic::dbg_func_start:
+ // FIXME - Remove this instructions once the dust settles.
return true;
- }
- case Intrinsic::dbg_region_start: {
- DbgRegionStartInst *RSI = cast<DbgRegionStartInst>(I);
- if (isValidDebugInfoIntrinsic(*RSI, CodeGenOpt::None) && DW
- && DW->ShouldEmitDwarfDebug()) {
- unsigned ID =
- DW->RecordRegionStart(RSI->getContext());
- const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
- BuildMI(MBB, DL, II).addImm(ID);
- }
- return true;
- }
- case Intrinsic::dbg_region_end: {
- DbgRegionEndInst *REI = cast<DbgRegionEndInst>(I);
- if (isValidDebugInfoIntrinsic(*REI, CodeGenOpt::None) && DW
- && DW->ShouldEmitDwarfDebug()) {
- unsigned ID = 0;
- DISubprogram Subprogram(REI->getContext());
- if (isInlinedFnEnd(*REI, MF.getFunction())) {
- // This is end of an inlined function.
- const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
- ID = DW->RecordInlinedFnEnd(Subprogram);
- if (ID)
- // Returned ID is 0 if this is unbalanced "end of inlined
- // scope". This could happen if optimizer eats dbg intrinsics
- // or "beginning of inlined scope" is not recoginized due to
- // missing location info. In such cases, ignore this region.end.
- BuildMI(MBB, DL, II).addImm(ID);
- } else {
- const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
- ID = DW->RecordRegionEnd(REI->getContext());
- BuildMI(MBB, DL, II).addImm(ID);
- }
- }
- return true;
- }
- case Intrinsic::dbg_func_start: {
- DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
- if (!isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::None) || !DW
- || !DW->ShouldEmitDwarfDebug())
- return true;
-
- if (isInlinedFnStart(*FSI, MF.getFunction())) {
- // This is a beginning of an inlined function.
-
- // If llvm.dbg.func.start is seen in a new block before any
- // llvm.dbg.stoppoint intrinsic then the location info is unknown.
- // FIXME : Why DebugLoc is reset at the beginning of each block ?
- DebugLoc PrevLoc = DL;
- if (PrevLoc.isUnknown())
- return true;
- // Record the source line.
- setCurDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
-
- DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
- DISubprogram SP(FSI->getSubprogram());
- unsigned LabelID =
- DW->RecordInlinedFnStart(SP,DICompileUnit(PrevLocTpl.Scope),
- PrevLocTpl.Line, PrevLocTpl.Col);
- const TargetInstrDesc &II = TII.get(TargetInstrInfo::DBG_LABEL);
- BuildMI(MBB, DL, II).addImm(LabelID);
- return true;
- }
-
- // This is a beginning of a new function.
- MF.setDefaultDebugLoc(ExtractDebugLocation(*FSI, MF.getDebugLocInfo()));
-
- // llvm.dbg.func_start also defines beginning of function scope.
- DW->RecordRegionStart(FSI->getSubprogram());
- return true;
- }
case Intrinsic::dbg_declare: {
DbgDeclareInst *DI = cast<DbgDeclareInst>(I);
if (!isValidDebugInfoIntrinsic(*DI, CodeGenOpt::None) || !DW
@@ -416,11 +347,13 @@ bool FastISel::SelectCall(User *I) {
StaticAllocaMap.find(AI);
if (SI == StaticAllocaMap.end()) break; // VLAs.
int FI = SI->second;
- if (MMI)
- MMI->setVariableDbgInfo(DI->getVariable(), FI);
-#ifndef ATTACH_DEBUG_INFO_TO_AN_INSN
- DW->RecordVariable(DI->getVariable(), FI);
-#endif
+ if (MMI) {
+ MetadataContext &TheMetadata =
+ DI->getParent()->getContext().getMetadata();
+ unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+ MDNode *Dbg = TheMetadata.getMD(MDDbgKind, DI);
+ MMI->setVariableDbgInfo(DI->getVariable(), FI, Dbg);
+ }
return true;
}
case Intrinsic::eh_exception: {
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
index da311ed..52b0832 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp
@@ -497,7 +497,7 @@ InstrEmitter::EmitCopyToRegClassNode(SDNode *Node,
assert(isNew && "Node emitted out of order - early");
}
-/// EmitNode - Generate machine code for an node and needed dependencies.
+/// EmitNode - Generate machine code for a node and needed dependencies.
///
void InstrEmitter::EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
DenseMap<SDValue, unsigned> &VRBaseMap,
diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h
index bb4634d..91817e4 100644
--- a/lib/CodeGen/SelectionDAG/InstrEmitter.h
+++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h
@@ -97,7 +97,7 @@ public:
/// MachineInstr.
static unsigned CountOperands(SDNode *Node);
- /// EmitNode - Generate machine code for an node and needed dependencies.
+ /// EmitNode - Generate machine code for a node and needed dependencies.
///
void EmitNode(SDNode *Node, bool IsClone, bool IsCloned,
DenseMap<SDValue, unsigned> &VRBaseMap,
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index f389f7f..4f0a229 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -148,8 +148,11 @@ private:
SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32,
RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80,
RTLIB::Libcall Call_PPCF128);
- SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I16,
- RTLIB::Libcall Call_I32, RTLIB::Libcall Call_I64,
+ SDValue ExpandIntLibCall(SDNode *Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
+ RTLIB::Libcall Call_I16,
+ RTLIB::Libcall Call_I32,
+ RTLIB::Libcall Call_I64,
RTLIB::Libcall Call_I128);
SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl);
@@ -1810,10 +1813,19 @@ SDValue SelectionDAGLegalize::ExpandBUILD_VECTOR(SDNode *Node) {
CV.push_back(const_cast<ConstantFP *>(V->getConstantFPValue()));
} else if (ConstantSDNode *V =
dyn_cast<ConstantSDNode>(Node->getOperand(i))) {
- CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ if (OpVT==EltVT)
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
+ else {
+ // If OpVT and EltVT don't match, EltVT is not legal and the
+ // element values have been promoted/truncated earlier. Undo this;
+ // we don't want a v16i8 to become a v16i32 for example.
+ const ConstantInt *CI = V->getConstantIntValue();
+ CV.push_back(ConstantInt::get(EltVT.getTypeForEVT(*DAG.getContext()),
+ CI->getZExtValue()));
+ }
} else {
assert(Node->getOperand(i).getOpcode() == ISD::UNDEF);
- const Type *OpNTy = OpVT.getTypeForEVT(*DAG.getContext());
+ const Type *OpNTy = EltVT.getTypeForEVT(*DAG.getContext());
CV.push_back(UndefValue::get(OpNTy));
}
}
@@ -1909,6 +1921,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node,
}
SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
+ RTLIB::Libcall Call_I8,
RTLIB::Libcall Call_I16,
RTLIB::Libcall Call_I32,
RTLIB::Libcall Call_I64,
@@ -1916,9 +1929,10 @@ SDValue SelectionDAGLegalize::ExpandIntLibCall(SDNode* Node, bool isSigned,
RTLIB::Libcall LC;
switch (Node->getValueType(0).getSimpleVT().SimpleTy) {
default: llvm_unreachable("Unexpected request for libcall!");
- case MVT::i16: LC = Call_I16; break;
- case MVT::i32: LC = Call_I32; break;
- case MVT::i64: LC = Call_I64; break;
+ case MVT::i8: LC = Call_I8; break;
+ case MVT::i16: LC = Call_I16; break;
+ case MVT::i32: LC = Call_I32; break;
+ case MVT::i64: LC = Call_I64; break;
case MVT::i128: LC = Call_I128; break;
}
return ExpandLibCall(LC, Node, isSigned);
@@ -2624,10 +2638,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Tmp1 = DAG.getNode(ISD::MUL, dl, VT, Tmp1, Tmp3);
Tmp1 = DAG.getNode(ISD::SUB, dl, VT, Tmp2, Tmp1);
} else if (isSigned) {
- Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SREM_I16, RTLIB::SREM_I32,
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SREM_I8,
+ RTLIB::SREM_I16, RTLIB::SREM_I32,
RTLIB::SREM_I64, RTLIB::SREM_I128);
} else {
- Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UREM_I16, RTLIB::UREM_I32,
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UREM_I8,
+ RTLIB::UREM_I16, RTLIB::UREM_I32,
RTLIB::UREM_I64, RTLIB::UREM_I128);
}
Results.push_back(Tmp1);
@@ -2643,10 +2661,14 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Node->getOperand(0),
Node->getOperand(1));
else if (isSigned)
- Tmp1 = ExpandIntLibCall(Node, true, RTLIB::SDIV_I16, RTLIB::SDIV_I32,
+ Tmp1 = ExpandIntLibCall(Node, true,
+ RTLIB::SDIV_I8,
+ RTLIB::SDIV_I16, RTLIB::SDIV_I32,
RTLIB::SDIV_I64, RTLIB::SDIV_I128);
else
- Tmp1 = ExpandIntLibCall(Node, false, RTLIB::UDIV_I16, RTLIB::UDIV_I32,
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::UDIV_I8,
+ RTLIB::UDIV_I16, RTLIB::UDIV_I32,
RTLIB::UDIV_I64, RTLIB::UDIV_I128);
Results.push_back(Tmp1);
break;
@@ -2691,7 +2713,9 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
Node->getOperand(1)));
break;
}
- Tmp1 = ExpandIntLibCall(Node, false, RTLIB::MUL_I16, RTLIB::MUL_I32,
+ Tmp1 = ExpandIntLibCall(Node, false,
+ RTLIB::MUL_I8,
+ RTLIB::MUL_I16, RTLIB::MUL_I32,
RTLIB::MUL_I64, RTLIB::MUL_I128);
Results.push_back(Tmp1);
break;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 98e7317..4530ffc 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1270,11 +1270,12 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
return Val;
FoldingSetNodeID ID;
+ SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
+ AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
return SDValue(E, 0);
CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
- SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
@@ -1378,7 +1379,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT, unsigned minAlign) {
unsigned StackAlign =
std::max((unsigned)TLI.getTargetData()->getPrefTypeAlignment(Ty), minAlign);
- int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign);
+ int FrameIdx = FrameInfo->CreateStackObject(ByteSize, StackAlign, false);
return getFrameIndex(FrameIdx, TLI.getPointerTy());
}
@@ -1394,7 +1395,7 @@ SDValue SelectionDAG::CreateStackTemporary(EVT VT1, EVT VT2) {
TD->getPrefTypeAlignment(Ty2));
MachineFrameInfo *FrameInfo = getMachineFunction().getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align);
+ int FrameIdx = FrameInfo->CreateStackObject(Bytes, Align, false);
return getFrameIndex(FrameIdx, TLI.getPointerTy());
}
@@ -5814,9 +5815,8 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
print_types(OS, G);
- OS << " ";
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
- if (i) OS << ", ";
+ if (i) OS << ", "; else OS << " ";
OS << (void*)getOperand(i).getNode();
if (unsigned RN = getOperand(i).getResNo())
OS << ":" << RN;
@@ -5916,7 +5916,8 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
APInt &SplatUndef,
unsigned &SplatBitSize,
bool &HasAnyUndefs,
- unsigned MinSplatBits) {
+ unsigned MinSplatBits,
+ bool isBigEndian) {
EVT VT = getValueType(0);
assert(VT.isVector() && "Expected a vector type");
unsigned sz = VT.getSizeInBits();
@@ -5933,12 +5934,14 @@ bool BuildVectorSDNode::isConstantSplat(APInt &SplatValue,
unsigned int nOps = getNumOperands();
assert(nOps > 0 && "isConstantSplat has 0-size build vector");
unsigned EltBitSize = VT.getVectorElementType().getSizeInBits();
- for (unsigned i = 0; i < nOps; ++i) {
+
+ for (unsigned j = 0; j < nOps; ++j) {
+ unsigned i = isBigEndian ? nOps-1-j : j;
SDValue OpVal = getOperand(i);
- unsigned BitPos = i * EltBitSize;
+ unsigned BitPos = j * EltBitSize;
if (OpVal.getOpcode() == ISD::UNDEF)
- SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos +EltBitSize);
+ SplatUndef |= APInt::getBitsSet(sz, BitPos, BitPos + EltBitSize);
else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(OpVal))
SplatValue |= (APInt(CN->getAPIntValue()).zextOrTrunc(EltBitSize).
zextOrTrunc(sz) << BitPos);
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
index c0d2a4d..90fd95e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.cpp
@@ -26,6 +26,7 @@
#include "llvm/Instructions.h"
#include "llvm/Intrinsics.h"
#include "llvm/IntrinsicInst.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/CodeGen/FastISel.h"
#include "llvm/CodeGen/GCStrategy.h"
@@ -304,7 +305,7 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
TySize *= CUI->getZExtValue(); // Get total allocated size.
if (TySize == 0) TySize = 1; // Don't create zero-sized stack objects.
StaticAllocaMap[AI] =
- MF->getFrameInfo()->CreateStackObject(TySize, Align);
+ MF->getFrameInfo()->CreateStackObject(TySize, Align, false);
}
for (; BB != EB; ++BB)
@@ -334,25 +335,6 @@ void FunctionLoweringInfo::set(Function &fn, MachineFunction &mf,
DebugLoc DL;
for (BasicBlock::iterator
I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (CallInst *CI = dyn_cast<CallInst>(I)) {
- if (Function *F = CI->getCalledFunction()) {
- switch (F->getIntrinsicID()) {
- default: break;
- case Intrinsic::dbg_stoppoint: {
- DbgStopPointInst *SPI = cast<DbgStopPointInst>(I);
- if (isValidDebugInfoIntrinsic(*SPI, CodeGenOpt::Default))
- DL = ExtractDebugLocation(*SPI, MF->getDebugLocInfo());
- break;
- }
- case Intrinsic::dbg_func_start: {
- DbgFuncStartInst *FSI = cast<DbgFuncStartInst>(I);
- if (isValidDebugInfoIntrinsic(*FSI, CodeGenOpt::Default))
- DL = ExtractDebugLocation(*FSI, MF->getDebugLocInfo());
- break;
- }
- }
- }
- }
PN = dyn_cast<PHINode>(I);
if (!PN || PN->use_empty()) continue;
@@ -947,58 +929,143 @@ SDValue SelectionDAGLowering::getValue(const Value *V) {
return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
}
+/// Get the EVTs and ArgFlags collections that represent the return type
+/// of the given function. This does not require a DAG or a return value, and
+/// is suitable for use before any DAGs for the function are constructed.
+static void getReturnInfo(const Type* ReturnType,
+ Attributes attr, SmallVectorImpl<EVT> &OutVTs,
+ SmallVectorImpl<ISD::ArgFlagsTy> &OutFlags,
+ TargetLowering &TLI,
+ SmallVectorImpl<uint64_t> *Offsets = 0) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, ReturnType, ValueVTs, Offsets);
+ unsigned NumValues = ValueVTs.size();
+ if ( NumValues == 0 ) return;
+
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ if (attr & Attribute::SExt)
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (attr & Attribute::ZExt)
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
+
+ unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT);
+ EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT);
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (attr & Attribute::InReg)
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (attr & Attribute::SExt)
+ Flags.setSExt();
+ else if (attr & Attribute::ZExt)
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i) {
+ OutVTs.push_back(PartVT);
+ OutFlags.push_back(Flags);
+ }
+ }
+}
void SelectionDAGLowering::visitRet(ReturnInst &I) {
SDValue Chain = getControlRoot();
SmallVector<ISD::OutputArg, 8> Outs;
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+
+ if (!FLI.CanLowerReturn) {
+ unsigned DemoteReg = FLI.DemoteRegister;
+ const Function *F = I.getParent()->getParent();
+
+ // Emit a store of the return value through the virtual register.
+ // Leave Outs empty so that LowerReturn won't try to load return
+ // registers the usual way.
+ SmallVector<EVT, 1> PtrValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F->getReturnType()),
+ PtrValueVTs);
+
+ SDValue RetPtr = DAG.getRegister(DemoteReg, PtrValueVTs[0]);
+ SDValue RetOp = getValue(I.getOperand(0));
+
SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
+ SmallVector<uint64_t, 4> Offsets;
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs, &Offsets);
unsigned NumValues = ValueVTs.size();
- if (NumValues == 0) continue;
-
- SDValue RetOp = getValue(I.getOperand(i));
- for (unsigned j = 0, f = NumValues; j != f; ++j) {
- EVT VT = ValueVTs[j];
- ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
-
- const Function *F = I.getParent()->getParent();
- if (F->paramHasAttr(0, Attribute::SExt))
- ExtendKind = ISD::SIGN_EXTEND;
- else if (F->paramHasAttr(0, Attribute::ZExt))
- ExtendKind = ISD::ZERO_EXTEND;
+ SmallVector<SDValue, 4> Chains(NumValues);
+ EVT PtrVT = PtrValueVTs[0];
+ for (unsigned i = 0; i != NumValues; ++i)
+ Chains[i] = DAG.getStore(Chain, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + i),
+ DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, RetPtr,
+ DAG.getConstant(Offsets[i], PtrVT)),
+ NULL, Offsets[i], false, 0);
+ Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ }
+ else {
+ for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues == 0) continue;
+
+ SDValue RetOp = getValue(I.getOperand(i));
+ for (unsigned j = 0, f = NumValues; j != f; ++j) {
+ EVT VT = ValueVTs[j];
+
+ ISD::NodeType ExtendKind = ISD::ANY_EXTEND;
+
+ const Function *F = I.getParent()->getParent();
+ if (F->paramHasAttr(0, Attribute::SExt))
+ ExtendKind = ISD::SIGN_EXTEND;
+ else if (F->paramHasAttr(0, Attribute::ZExt))
+ ExtendKind = ISD::ZERO_EXTEND;
+
+ // FIXME: C calling convention requires the return type to be promoted to
+ // at least 32-bit. But this is not necessary for non-C calling
+ // conventions. The frontend should mark functions whose return values
+ // require promoting with signext or zeroext attributes.
+ if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
+ EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
+ if (VT.bitsLT(MinVT))
+ VT = MinVT;
+ }
- // FIXME: C calling convention requires the return type to be promoted to
- // at least 32-bit. But this is not necessary for non-C calling
- // conventions. The frontend should mark functions whose return values
- // require promoting with signext or zeroext attributes.
- if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) {
- EVT MinVT = TLI.getRegisterType(*DAG.getContext(), MVT::i32);
- if (VT.bitsLT(MinVT))
- VT = MinVT;
+ unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
+ EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
+ SmallVector<SDValue, 4> Parts(NumParts);
+ getCopyToParts(DAG, getCurDebugLoc(),
+ SDValue(RetOp.getNode(), RetOp.getResNo() + j),
+ &Parts[0], NumParts, PartVT, ExtendKind);
+
+ // 'inreg' on function refers to return value
+ ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
+ if (F->paramHasAttr(0, Attribute::InReg))
+ Flags.setInReg();
+
+ // Propagate extension type if any
+ if (F->paramHasAttr(0, Attribute::SExt))
+ Flags.setSExt();
+ else if (F->paramHasAttr(0, Attribute::ZExt))
+ Flags.setZExt();
+
+ for (unsigned i = 0; i < NumParts; ++i)
+ Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
}
-
- unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
- EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
- SmallVector<SDValue, 4> Parts(NumParts);
- getCopyToParts(DAG, getCurDebugLoc(),
- SDValue(RetOp.getNode(), RetOp.getResNo() + j),
- &Parts[0], NumParts, PartVT, ExtendKind);
-
- // 'inreg' on function refers to return value
- ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy();
- if (F->paramHasAttr(0, Attribute::InReg))
- Flags.setInReg();
-
- // Propagate extension type if any
- if (F->paramHasAttr(0, Attribute::SExt))
- Flags.setSExt();
- else if (F->paramHasAttr(0, Attribute::ZExt))
- Flags.setZExt();
-
- for (unsigned i = 0; i < NumParts; ++i)
- Outs.push_back(ISD::OutputArg(Flags, Parts[i], /*isfixed=*/true));
}
}
@@ -1691,19 +1758,19 @@ bool SelectionDAGLowering::handleJTSwitchCase(CaseRec& CR,
Case& FrontCase = *CR.Range.first;
Case& BackCase = *(CR.Range.second-1);
- const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();
- const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue();
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
- size_t TSize = 0;
+ APInt TSize(First.getBitWidth(), 0);
for (CaseItr I = CR.Range.first, E = CR.Range.second;
I!=E; ++I)
TSize += I->size();
- if (!areJTsAllowed(TLI) || TSize <= 3)
+ if (!areJTsAllowed(TLI) || TSize.ult(APInt(First.getBitWidth(), 4)))
return false;
APInt Range = ComputeRange(First, Last);
- double Density = (double)TSize / Range.roundToDouble();
+ double Density = TSize.roundToDouble() / Range.roundToDouble();
if (Density < 0.4)
return false;
@@ -1797,32 +1864,34 @@ bool SelectionDAGLowering::handleBTSplitSwitchCase(CaseRec& CR,
// Size is the number of Cases represented by this range.
unsigned Size = CR.Range.second - CR.Range.first;
- const APInt& First = cast<ConstantInt>(FrontCase.Low)->getValue();
- const APInt& Last = cast<ConstantInt>(BackCase.High)->getValue();
+ const APInt &First = cast<ConstantInt>(FrontCase.Low)->getValue();
+ const APInt &Last = cast<ConstantInt>(BackCase.High)->getValue();
double FMetric = 0;
CaseItr Pivot = CR.Range.first + Size/2;
// Select optimal pivot, maximizing sum density of LHS and RHS. This will
// (heuristically) allow us to emit JumpTable's later.
- size_t TSize = 0;
+ APInt TSize(First.getBitWidth(), 0);
for (CaseItr I = CR.Range.first, E = CR.Range.second;
I!=E; ++I)
TSize += I->size();
- size_t LSize = FrontCase.size();
- size_t RSize = TSize-LSize;
+ APInt LSize = FrontCase.size();
+ APInt RSize = TSize-LSize;
DEBUG(errs() << "Selecting best pivot: \n"
<< "First: " << First << ", Last: " << Last <<'\n'
<< "LSize: " << LSize << ", RSize: " << RSize << '\n');
for (CaseItr I = CR.Range.first, J=I+1, E = CR.Range.second;
J!=E; ++I, ++J) {
- const APInt& LEnd = cast<ConstantInt>(I->High)->getValue();
- const APInt& RBegin = cast<ConstantInt>(J->Low)->getValue();
+ const APInt &LEnd = cast<ConstantInt>(I->High)->getValue();
+ const APInt &RBegin = cast<ConstantInt>(J->Low)->getValue();
APInt Range = ComputeRange(LEnd, RBegin);
assert((Range - 2ULL).isNonNegative() &&
"Invalid case distance");
- double LDensity = (double)LSize / (LEnd - First + 1ULL).roundToDouble();
- double RDensity = (double)RSize / (Last - RBegin + 1ULL).roundToDouble();
+ double LDensity = (double)LSize.roundToDouble() /
+ (LEnd - First + 1ULL).roundToDouble();
+ double RDensity = (double)RSize.roundToDouble() /
+ (Last - RBegin + 1ULL).roundToDouble();
double Metric = Range.logBase2()*(LDensity+RDensity);
// Should always split in some non-trivial place
DEBUG(errs() <<"=>Step\n"
@@ -3842,112 +3911,12 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
I.getOperand(1), 0, I.getOperand(2), 0));
return 0;
}
- case Intrinsic::dbg_stoppoint: {
- DbgStopPointInst &SPI = cast<DbgStopPointInst>(I);
- if (isValidDebugInfoIntrinsic(SPI, CodeGenOpt::Default)) {
- MachineFunction &MF = DAG.getMachineFunction();
- DebugLoc Loc = ExtractDebugLocation(SPI, MF.getDebugLocInfo());
- setCurDebugLoc(Loc);
-
- if (OptLevel == CodeGenOpt::None)
- DAG.setRoot(DAG.getDbgStopPoint(Loc, getRoot(),
- SPI.getLine(),
- SPI.getColumn(),
- SPI.getContext()));
- }
+ case Intrinsic::dbg_stoppoint:
+ case Intrinsic::dbg_region_start:
+ case Intrinsic::dbg_region_end:
+ case Intrinsic::dbg_func_start:
+ // FIXME - Remove this instructions once the dust settles.
return 0;
- }
- case Intrinsic::dbg_region_start: {
- DwarfWriter *DW = DAG.getDwarfWriter();
- DbgRegionStartInst &RSI = cast<DbgRegionStartInst>(I);
- if (isValidDebugInfoIntrinsic(RSI, OptLevel) && DW
- && DW->ShouldEmitDwarfDebug()) {
- unsigned LabelID =
- DW->RecordRegionStart(RSI.getContext());
- DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
- getRoot(), LabelID));
- }
- return 0;
- }
- case Intrinsic::dbg_region_end: {
- DwarfWriter *DW = DAG.getDwarfWriter();
- DbgRegionEndInst &REI = cast<DbgRegionEndInst>(I);
-
- if (!isValidDebugInfoIntrinsic(REI, OptLevel) || !DW
- || !DW->ShouldEmitDwarfDebug())
- return 0;
-
- MachineFunction &MF = DAG.getMachineFunction();
- DISubprogram Subprogram(REI.getContext());
-
- if (isInlinedFnEnd(REI, MF.getFunction())) {
- // This is end of inlined function. Debugging information for inlined
- // function is not handled yet (only supported by FastISel).
- if (OptLevel == CodeGenOpt::None) {
- unsigned ID = DW->RecordInlinedFnEnd(Subprogram);
- if (ID != 0)
- // Returned ID is 0 if this is unbalanced "end of inlined
- // scope". This could happen if optimizer eats dbg intrinsics or
- // "beginning of inlined scope" is not recoginized due to missing
- // location info. In such cases, do ignore this region.end.
- DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
- getRoot(), ID));
- }
- return 0;
- }
-
- unsigned LabelID =
- DW->RecordRegionEnd(REI.getContext());
- DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
- getRoot(), LabelID));
- return 0;
- }
- case Intrinsic::dbg_func_start: {
- DwarfWriter *DW = DAG.getDwarfWriter();
- DbgFuncStartInst &FSI = cast<DbgFuncStartInst>(I);
- if (!isValidDebugInfoIntrinsic(FSI, CodeGenOpt::None))
- return 0;
-
- MachineFunction &MF = DAG.getMachineFunction();
- // This is a beginning of an inlined function.
- if (isInlinedFnStart(FSI, MF.getFunction())) {
- if (OptLevel != CodeGenOpt::None)
- // FIXME: Debugging informaation for inlined function is only
- // supported at CodeGenOpt::Node.
- return 0;
-
- DebugLoc PrevLoc = CurDebugLoc;
- // If llvm.dbg.func.start is seen in a new block before any
- // llvm.dbg.stoppoint intrinsic then the location info is unknown.
- // FIXME : Why DebugLoc is reset at the beginning of each block ?
- if (PrevLoc.isUnknown())
- return 0;
-
- // Record the source line.
- setCurDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
-
- if (!DW || !DW->ShouldEmitDwarfDebug())
- return 0;
- DebugLocTuple PrevLocTpl = MF.getDebugLocTuple(PrevLoc);
- DISubprogram SP(FSI.getSubprogram());
- DICompileUnit CU(PrevLocTpl.Scope);
- unsigned LabelID = DW->RecordInlinedFnStart(SP, CU,
- PrevLocTpl.Line,
- PrevLocTpl.Col);
- DAG.setRoot(DAG.getLabel(ISD::DBG_LABEL, getCurDebugLoc(),
- getRoot(), LabelID));
- return 0;
- }
-
- // This is a beginning of a new function.
- MF.setDefaultDebugLoc(ExtractDebugLocation(FSI, MF.getDebugLocInfo()));
-
- if (!DW || !DW->ShouldEmitDwarfDebug())
- return 0;
- // llvm.dbg.func_start also defines beginning of function scope.
- DW->RecordRegionStart(FSI.getSubprogram());
- return 0;
- }
case Intrinsic::dbg_declare: {
if (OptLevel != CodeGenOpt::None)
// FIXME: Variable debug info is not supported here.
@@ -3972,13 +3941,15 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
if (SI == FuncInfo.StaticAllocaMap.end())
return 0; // VLAs.
int FI = SI->second;
-#ifdef ATTACH_DEBUG_INFO_TO_AN_INSN
+
MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
- if (MMI)
- MMI->setVariableDbgInfo(Variable, FI);
-#else
- DW->RecordVariable(Variable, FI);
-#endif
+ if (MMI) {
+ MetadataContext &TheMetadata =
+ DI.getParent()->getContext().getMetadata();
+ unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+ MDNode *Dbg = TheMetadata.getMD(MDDbgKind, &DI);
+ MMI->setVariableDbgInfo(Variable, FI, Dbg);
+ }
return 0;
}
case Intrinsic::eh_exception: {
@@ -4233,7 +4204,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
EVT Ty = Arg.getValueType();
if (CI->getZExtValue() < 2)
- setValue(&I, DAG.getConstant(-1U, Ty));
+ setValue(&I, DAG.getConstant(-1ULL, Ty));
else
setValue(&I, DAG.getConstant(0, Ty));
return 0;
@@ -4355,6 +4326,16 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
return implVisitBinaryAtomic(I, ISD::ATOMIC_LOAD_UMAX);
case Intrinsic::atomic_swap:
return implVisitBinaryAtomic(I, ISD::ATOMIC_SWAP);
+
+ case Intrinsic::invariant_start:
+ case Intrinsic::lifetime_start:
+ // Discard region information.
+ setValue(&I, DAG.getUNDEF(TLI.getPointerTy()));
+ return 0;
+ case Intrinsic::invariant_end:
+ case Intrinsic::lifetime_end:
+ // Discard region information.
+ return 0;
}
}
@@ -4368,7 +4349,7 @@ SelectionDAGLowering::visitIntrinsicCall(CallInst &I, unsigned Intrinsic) {
/// TargetLowering::IsEligibleForTailCallOptimization.
///
static bool
-isInTailCallPosition(const Instruction *I, Attributes RetAttr,
+isInTailCallPosition(const Instruction *I, Attributes CalleeRetAttr,
const TargetLowering &TLI) {
const BasicBlock *ExitBB = I->getParent();
const TerminatorInst *Term = ExitBB->getTerminator();
@@ -4395,9 +4376,14 @@ isInTailCallPosition(const Instruction *I, Attributes RetAttr,
// what the call's return type is.
if (!Ret || Ret->getNumOperands() == 0) return true;
+ // If the return value is undef, it doesn't matter what the call's
+ // return type is.
+ if (isa<UndefValue>(Ret->getOperand(0))) return true;
+
// Conservatively require the attributes of the call to match those of
- // the return.
- if (F->getAttributes().getRetAttributes() != RetAttr)
+ // the return. Ignore noalias because it doesn't affect the call sequence.
+ unsigned CallerRetAttr = F->getAttributes().getRetAttributes();
+ if ((CalleeRetAttr ^ CallerRetAttr) & ~Attribute::NoAlias)
return false;
// Otherwise, make sure the unmodified return value of I is the return value.
@@ -4431,15 +4417,52 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
MachineBasicBlock *LandingPad) {
const PointerType *PT = cast<PointerType>(CS.getCalledValue()->getType());
const FunctionType *FTy = cast<FunctionType>(PT->getElementType());
+ const Type *RetTy = FTy->getReturnType();
MachineModuleInfo *MMI = DAG.getMachineModuleInfo();
unsigned BeginLabel = 0, EndLabel = 0;
TargetLowering::ArgListTy Args;
TargetLowering::ArgListEntry Entry;
Args.reserve(CS.arg_size());
- unsigned j = 1;
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<EVT, 4> OutVTs;
+ SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+ SmallVector<uint64_t, 4> Offsets;
+ getReturnInfo(RetTy, CS.getAttributes().getRetAttributes(),
+ OutVTs, OutsFlags, TLI, &Offsets);
+
+
+ bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(),
+ FTy->isVarArg(), OutVTs, OutsFlags, DAG);
+
+ SDValue DemoteStackSlot;
+
+ if (!CanLowerReturn) {
+ uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(
+ FTy->getReturnType());
+ unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(
+ FTy->getReturnType());
+ MachineFunction &MF = DAG.getMachineFunction();
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
+ const Type *StackSlotPtrType = PointerType::getUnqual(FTy->getReturnType());
+
+ DemoteStackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
+ Entry.Node = DemoteStackSlot;
+ Entry.Ty = StackSlotPtrType;
+ Entry.isSExt = false;
+ Entry.isZExt = false;
+ Entry.isInReg = false;
+ Entry.isSRet = true;
+ Entry.isNest = false;
+ Entry.isByVal = false;
+ Entry.Alignment = Align;
+ Args.push_back(Entry);
+ RetTy = Type::getVoidTy(FTy->getContext());
+ }
+
for (CallSite::arg_iterator i = CS.arg_begin(), e = CS.arg_end();
- i != e; ++i, ++j) {
+ i != e; ++i) {
SDValue ArgNode = getValue(*i);
Entry.Node = ArgNode; Entry.Ty = (*i)->getType();
@@ -4475,7 +4498,7 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
isTailCall = false;
std::pair<SDValue,SDValue> Result =
- TLI.LowerCallTo(getRoot(), CS.getType(),
+ TLI.LowerCallTo(getRoot(), RetTy,
CS.paramHasAttr(0, Attribute::SExt),
CS.paramHasAttr(0, Attribute::ZExt), FTy->isVarArg(),
CS.paramHasAttr(0, Attribute::InReg), FTy->getNumParams(),
@@ -4489,6 +4512,35 @@ void SelectionDAGLowering::LowerCallTo(CallSite CS, SDValue Callee,
"Null value expected with tail call!");
if (Result.first.getNode())
setValue(CS.getInstruction(), Result.first);
+ else if (!CanLowerReturn && Result.second.getNode()) {
+ // The instruction result is the result of loading from the
+ // hidden sret parameter.
+ SmallVector<EVT, 1> PVTs;
+ const Type *PtrRetTy = PointerType::getUnqual(FTy->getReturnType());
+
+ ComputeValueVTs(TLI, PtrRetTy, PVTs);
+ assert(PVTs.size() == 1 && "Pointers should fit in one register");
+ EVT PtrVT = PVTs[0];
+ unsigned NumValues = OutVTs.size();
+ SmallVector<SDValue, 4> Values(NumValues);
+ SmallVector<SDValue, 4> Chains(NumValues);
+
+ for (unsigned i = 0; i < NumValues; ++i) {
+ SDValue L = DAG.getLoad(OutVTs[i], getCurDebugLoc(), Result.second,
+ DAG.getNode(ISD::ADD, getCurDebugLoc(), PtrVT, DemoteStackSlot,
+ DAG.getConstant(Offsets[i], PtrVT)),
+ NULL, Offsets[i], false, 1);
+ Values[i] = L;
+ Chains[i] = L.getValue(1);
+ }
+ SDValue Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
+ MVT::Other, &Chains[0], NumValues);
+ PendingLoads.push_back(Chain);
+
+ setValue(CS.getInstruction(), DAG.getNode(ISD::MERGE_VALUES,
+ getCurDebugLoc(), DAG.getVTList(&OutVTs[0], NumValues),
+ &Values[0], NumValues));
+ }
// As a special case, a null chain means that a tail call has
// been emitted and the DAG root is already updated.
if (Result.second.getNode())
@@ -5229,7 +5281,7 @@ void SelectionDAGLowering::visitInlineAsm(CallSite CS) {
uint64_t TySize = TLI.getTargetData()->getTypeAllocSize(Ty);
unsigned Align = TLI.getTargetData()->getPrefTypeAlignment(Ty);
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(TySize, Align, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, TLI.getPointerTy());
Chain = DAG.getStore(Chain, getCurDebugLoc(),
OpInfo.CallOperand, StackSlot, NULL, 0);
@@ -5757,9 +5809,32 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
SDValue OldRoot = DAG.getRoot();
DebugLoc dl = SDL->getCurDebugLoc();
const TargetData *TD = TLI.getTargetData();
+ SmallVector<ISD::InputArg, 16> Ins;
+
+ // Check whether the function can return without sret-demotion.
+ SmallVector<EVT, 4> OutVTs;
+ SmallVector<ISD::ArgFlagsTy, 4> OutsFlags;
+ getReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(),
+ OutVTs, OutsFlags, TLI);
+ FunctionLoweringInfo &FLI = DAG.getFunctionLoweringInfo();
+
+ FLI.CanLowerReturn = TLI.CanLowerReturn(F.getCallingConv(), F.isVarArg(),
+ OutVTs, OutsFlags, DAG);
+ if (!FLI.CanLowerReturn) {
+ // Put in an sret pointer parameter before all the other parameters.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+
+ // NOTE: Assuming that a pointer will never break down to more than one VT
+ // or one register.
+ ISD::ArgFlagsTy Flags;
+ Flags.setSRet();
+ EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), ValueVTs[0]);
+ ISD::InputArg RetArg(Flags, RegisterVT, true);
+ Ins.push_back(RetArg);
+ }
// Set up the incoming argument description vector.
- SmallVector<ISD::InputArg, 16> Ins;
unsigned Idx = 1;
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end();
I != E; ++I, ++Idx) {
@@ -5837,6 +5912,28 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
// Set up the argument values.
unsigned i = 0;
Idx = 1;
+ if (!FLI.CanLowerReturn) {
+ // Create a virtual register for the sret pointer, and put in a copy
+ // from the sret argument into it.
+ SmallVector<EVT, 1> ValueVTs;
+ ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs);
+ EVT VT = ValueVTs[0];
+ EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
+ ISD::NodeType AssertOp = ISD::DELETED_NODE;
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT,
+ VT, AssertOp);
+
+ MachineFunction& MF = SDL->DAG.getMachineFunction();
+ MachineRegisterInfo& RegInfo = MF.getRegInfo();
+ unsigned SRetReg = RegInfo.createVirtualRegister(TLI.getRegClassFor(RegVT));
+ FLI.DemoteRegister = SRetReg;
+ NewRoot = SDL->DAG.getCopyToReg(NewRoot, SDL->getCurDebugLoc(), SRetReg, ArgValue);
+ DAG.setRoot(NewRoot);
+
+ // i indexes lowered arguments. Bump it past the hidden sret argument.
+ // Idx indexes LLVM arguments. Don't touch it.
+ ++i;
+ }
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E;
++I, ++Idx) {
SmallVector<SDValue, 4> ArgValues;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
index a0ec7aa..10f256c 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuild.h
@@ -90,6 +90,14 @@ public:
MachineFunction *MF;
MachineRegisterInfo *RegInfo;
+ /// CanLowerReturn - true iff the function's return value can be lowered to
+ /// registers.
+ bool CanLowerReturn;
+
+ /// DemoteRegister - if CanLowerReturn is false, DemoteRegister is a vreg
+ /// allocated to hold a pointer to the hidden sret parameter.
+ unsigned DemoteRegister;
+
explicit FunctionLoweringInfo(TargetLowering &TLI);
/// set - Initialize this FunctionLoweringInfo with the given Function
@@ -193,9 +201,9 @@ class SelectionDAGLowering {
Case() : Low(0), High(0), BB(0) { }
Case(Constant* low, Constant* high, MachineBasicBlock* bb) :
Low(low), High(high), BB(bb) { }
- uint64_t size() const {
- uint64_t rHigh = cast<ConstantInt>(High)->getSExtValue();
- uint64_t rLow = cast<ConstantInt>(Low)->getSExtValue();
+ APInt size() const {
+ const APInt &rHigh = cast<ConstantInt>(High)->getValue();
+ const APInt &rLow = cast<ConstantInt>(Low)->getValue();
return (rHigh - rLow + 1ULL);
}
};
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index b63d5bb..ab5f21e 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -68,7 +68,7 @@ static cl::opt<bool>
EnableFastISelAbort("fast-isel-abort", cl::Hidden,
cl::desc("Enable abort calls when \"fast\" instruction fails"));
static cl::opt<bool>
-SchedLiveInCopies("schedule-livein-copies",
+SchedLiveInCopies("schedule-livein-copies", cl::Hidden,
cl::desc("Schedule copies of livein registers"),
cl::init(false));
@@ -387,13 +387,14 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
if (MDDbgKind) {
// Update DebugLoc if debug information is attached with this
// instruction.
- if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
- DILocation DILoc(Dbg);
- DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
- SDL->setCurDebugLoc(Loc);
- if (MF->getDefaultDebugLoc().isUnknown())
- MF->setDefaultDebugLoc(Loc);
- }
+ if (!isa<DbgInfoIntrinsic>(I))
+ if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
+ DILocation DILoc(Dbg);
+ DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
+ SDL->setCurDebugLoc(Loc);
+ if (MF->getDefaultDebugLoc().isUnknown())
+ MF->setDefaultDebugLoc(Loc);
+ }
}
if (!isa<TerminatorInst>(I))
SDL->visit(*I);
@@ -750,14 +751,15 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
if (MDDbgKind) {
// Update DebugLoc if debug information is attached with this
// instruction.
- if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) {
- DILocation DILoc(Dbg);
- DebugLoc Loc = ExtractDebugLocation(DILoc,
- MF.getDebugLocInfo());
- FastIS->setCurDebugLoc(Loc);
- if (MF.getDefaultDebugLoc().isUnknown())
- MF.setDefaultDebugLoc(Loc);
- }
+ if (!isa<DbgInfoIntrinsic>(BI))
+ if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) {
+ DILocation DILoc(Dbg);
+ DebugLoc Loc = ExtractDebugLocation(DILoc,
+ MF.getDebugLocInfo());
+ FastIS->setCurDebugLoc(Loc);
+ if (MF.getDefaultDebugLoc().isUnknown())
+ MF.setDefaultDebugLoc(Loc);
+ }
}
// Just before the terminator instruction, insert instructions to
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 9f36b67..2ca52a4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -22,7 +22,6 @@
#include "llvm/DerivedTypes.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/SelectionDAG.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
@@ -65,22 +64,27 @@ static void InitLibcallNames(const char **Names) {
Names[RTLIB::SRA_I32] = "__ashrsi3";
Names[RTLIB::SRA_I64] = "__ashrdi3";
Names[RTLIB::SRA_I128] = "__ashrti3";
+ Names[RTLIB::MUL_I8] = "__mulqi3";
Names[RTLIB::MUL_I16] = "__mulhi3";
Names[RTLIB::MUL_I32] = "__mulsi3";
Names[RTLIB::MUL_I64] = "__muldi3";
Names[RTLIB::MUL_I128] = "__multi3";
+ Names[RTLIB::SDIV_I8] = "__divqi3";
Names[RTLIB::SDIV_I16] = "__divhi3";
Names[RTLIB::SDIV_I32] = "__divsi3";
Names[RTLIB::SDIV_I64] = "__divdi3";
Names[RTLIB::SDIV_I128] = "__divti3";
+ Names[RTLIB::UDIV_I8] = "__udivqi3";
Names[RTLIB::UDIV_I16] = "__udivhi3";
Names[RTLIB::UDIV_I32] = "__udivsi3";
Names[RTLIB::UDIV_I64] = "__udivdi3";
Names[RTLIB::UDIV_I128] = "__udivti3";
+ Names[RTLIB::SREM_I8] = "__modqi3";
Names[RTLIB::SREM_I16] = "__modhi3";
Names[RTLIB::SREM_I32] = "__modsi3";
Names[RTLIB::SREM_I64] = "__moddi3";
Names[RTLIB::SREM_I128] = "__modti3";
+ Names[RTLIB::UREM_I8] = "__umodqi3";
Names[RTLIB::UREM_I16] = "__umodhi3";
Names[RTLIB::UREM_I32] = "__umodsi3";
Names[RTLIB::UREM_I64] = "__umoddi3";
@@ -2360,7 +2364,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
assert(*(Constraint.end()-1) == '}' && "Not a brace enclosed constraint?");
// Remove the braces from around the name.
- std::string RegName(Constraint.begin()+1, Constraint.end()-1);
+ StringRef RegName(Constraint.data()+1, Constraint.size()-2);
// Figure out which register class contains this reg.
const TargetRegisterInfo *RI = TM.getRegisterInfo();
@@ -2383,7 +2387,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint,
for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end();
I != E; ++I) {
- if (StringsEqualNoCase(RegName, RI->getName(*I)))
+ if (RegName.equals_lower(RI->getName(*I)))
return std::make_pair(*I, RC);
}
}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index b5d6b47..3909c56 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -709,7 +709,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
}
MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
- tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI);
+ tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_);
MachineInstr *NewMI = prior(MII);
if (checkForDeadDef) {
diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp
index f3ad0d1..f85384b 100644
--- a/lib/CodeGen/SlotIndexes.cpp
+++ b/lib/CodeGen/SlotIndexes.cpp
@@ -13,15 +13,43 @@
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/ManagedStatic.h"
using namespace llvm;
-std::auto_ptr<IndexListEntry> IndexListEntry::emptyKeyEntry,
- IndexListEntry::tombstoneKeyEntry;
+
+// Yep - these are thread safe. See the header for details.
+namespace {
+
+
+ class EmptyIndexListEntry : public IndexListEntry {
+ public:
+ EmptyIndexListEntry() : IndexListEntry(EMPTY_KEY) {}
+ };
+
+ class TombstoneIndexListEntry : public IndexListEntry {
+ public:
+ TombstoneIndexListEntry() : IndexListEntry(TOMBSTONE_KEY) {}
+ };
+
+ // The following statics are thread safe. They're read only, and you
+ // can't step from them to any other list entries.
+ ManagedStatic<EmptyIndexListEntry> IndexListEntryEmptyKey;
+ ManagedStatic<TombstoneIndexListEntry> IndexListEntryTombstoneKey;
+}
char SlotIndexes::ID = 0;
static RegisterPass<SlotIndexes> X("slotindexes", "Slot index numbering");
+IndexListEntry* IndexListEntry::getEmptyKeyEntry() {
+ return &*IndexListEntryEmptyKey;
+}
+
+IndexListEntry* IndexListEntry::getTombstoneKeyEntry() {
+ return &*IndexListEntryTombstoneKey;
+}
+
+
void SlotIndexes::getAnalysisUsage(AnalysisUsage &au) const {
au.setPreservesAll();
MachineFunctionPass::getAnalysisUsage(au);
@@ -51,8 +79,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
mf = &fn;
initList();
- const unsigned gap = 1;
-
// Check that the list contains only the sentinal.
assert(indexListHead->getNext() == 0 &&
"Index list non-empty at initial numbering?");
@@ -64,14 +90,6 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
"MachineInstr -> Index mapping non-empty at initial numbering?");
functionSize = 0;
- /*
- for (unsigned s = 0; s < SlotIndex::NUM; ++s) {
- indexList.push_back(createEntry(0, s));
- }
-
- unsigned index = gap * SlotIndex::NUM;
- */
-
unsigned index = 0;
// Iterate over the the function.
@@ -83,7 +101,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
push_back(createEntry(0, index));
SlotIndex blockStartIndex(back(), SlotIndex::LOAD);
- index += gap * SlotIndex::NUM;
+ index += SlotIndex::NUM;
for (MachineBasicBlock::iterator miItr = mbb->begin(), miEnd = mbb->end();
miItr != miEnd; ++miItr) {
@@ -93,7 +111,7 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
push_back(createEntry(0, index));
terminatorGaps.insert(
std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
- index += gap * SlotIndex::NUM;
+ index += SlotIndex::NUM;
}
// Insert a store index for the instr.
@@ -109,14 +127,14 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
if (Slots == 0)
Slots = 1;
- index += (Slots + 1) * gap * SlotIndex::NUM;
+ index += (Slots + 1) * SlotIndex::NUM;
}
if (mbb->getFirstTerminator() == mbb->end()) {
push_back(createEntry(0, index));
terminatorGaps.insert(
std::make_pair(mbb, SlotIndex(back(), SlotIndex::PHI_BIT)));
- index += gap * SlotIndex::NUM;
+ index += SlotIndex::NUM;
}
SlotIndex blockEndIndex(back(), SlotIndex::STORE);
@@ -138,21 +156,36 @@ bool SlotIndexes::runOnMachineFunction(MachineFunction &fn) {
return false;
}
-void SlotIndexes::renumber() {
- assert(false && "SlotIndexes::runmuber is not fully implemented yet.");
+void SlotIndexes::renumberIndexes() {
- // Compute numbering as follows:
- // Grab an iterator to the start of the index list.
- // Iterate over all MBBs, and within each MBB all MIs, keeping the MI
- // iterator in lock-step (though skipping it over indexes which have
- // null pointers in the instruction field).
- // At each iteration assert that the instruction pointed to in the index
- // is the same one pointed to by the MI iterator. This
+ // Renumber updates the index of every element of the index list.
+ // If all instrs in the function have been allocated an index (which has been
+ // placed in the index list in the order of instruction iteration) then the
+ // resulting numbering will match what would have been generated by the
+ // pass during the initial numbering of the function if the new instructions
+ // had been present.
- // FIXME: This can be simplified. The mi2iMap_, Idx2MBBMap, etc. should
- // only need to be set up once - when the first numbering is computed.
+ functionSize = 0;
+ unsigned index = 0;
+
+ for (IndexListEntry *curEntry = front(); curEntry != getTail();
+ curEntry = curEntry->getNext()) {
- assert(false && "Renumbering not supported yet.");
+ curEntry->setIndex(index);
+
+ if (curEntry->getInstr() == 0) {
+ // MBB start entry or terminator gap. Just step index by 1.
+ index += SlotIndex::NUM;
+ }
+ else {
+ ++functionSize;
+ unsigned Slots = curEntry->getInstr()->getDesc().getNumDefs();
+ if (Slots == 0)
+ Slots = 1;
+
+ index += (Slots + 1) * SlotIndex::NUM;
+ }
+ }
}
void SlotIndexes::dump() const {
@@ -167,7 +200,7 @@ void SlotIndexes::dump() const {
}
}
- for (MBB2IdxMap::iterator itr = mbb2IdxMap.begin();
+ for (MBB2IdxMap::const_iterator itr = mbb2IdxMap.begin();
itr != mbb2IdxMap.end(); ++itr) {
errs() << "MBB " << itr->first->getNumber() << " (" << itr->first << ") - ["
<< itr->second.first << ", " << itr->second.second << "]\n";
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 95e85be..9107325 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -52,16 +52,16 @@ protected:
/// Ensures there is space before the given machine instruction, returns the
/// instruction's new number.
SlotIndex makeSpaceBefore(MachineInstr *mi) {
- if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
+ //if (!lis->hasGapBeforeInstr(lis->getInstructionIndex(mi))) {
// FIXME: Should be updated to use rewrite-in-place methods when they're
// introduced. Currently broken.
//lis->scaleNumbering(2);
//ls->scaleNumbering(2);
- }
+ //}
SlotIndex miIdx = lis->getInstructionIndex(mi);
- assert(lis->hasGapBeforeInstr(miIdx));
+ //assert(lis->hasGapBeforeInstr(miIdx));
return miIdx;
}
@@ -69,16 +69,16 @@ protected:
/// Ensure there is space after the given machine instruction, returns the
/// instruction's new number.
SlotIndex makeSpaceAfter(MachineInstr *mi) {
- if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
+ //if (!lis->hasGapAfterInstr(lis->getInstructionIndex(mi))) {
// FIXME: Should be updated to use rewrite-in-place methods when they're
// introduced. Currently broken.
// lis->scaleNumbering(2);
// ls->scaleNumbering(2);
- }
+ //}
SlotIndex miIdx = lis->getInstructionIndex(mi);
- assert(lis->hasGapAfterInstr(miIdx));
+ //assert(lis->hasGapAfterInstr(miIdx));
return miIdx;
}
@@ -99,14 +99,8 @@ protected:
true, ss, trc);
MachineBasicBlock::iterator storeInstItr(next(mi));
MachineInstr *storeInst = &*storeInstItr;
- SlotIndex storeInstIdx = miIdx.getNextIndex();
-
- assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
- "Store inst index already in use.");
- lis->InsertMachineInstrInMaps(storeInst, storeInstIdx);
-
- return storeInstIdx;
+ return lis->InsertMachineInstrInMaps(storeInst);
}
/// Insert a store of the given vreg to the given stack slot immediately
@@ -120,14 +114,8 @@ protected:
tii->storeRegToStackSlot(*mi->getParent(), mi, vreg, true, ss, trc);
MachineBasicBlock::iterator storeInstItr(prior(mi));
MachineInstr *storeInst = &*storeInstItr;
- SlotIndex storeInstIdx = miIdx.getPrevIndex();
-
- assert(lis->getInstructionFromIndex(storeInstIdx) == 0 &&
- "Store inst index already in use.");
- lis->InsertMachineInstrInMaps(storeInst, storeInstIdx);
-
- return storeInstIdx;
+ return lis->InsertMachineInstrInMaps(storeInst);
}
void insertStoreAfterInstOnInterval(LiveInterval *li,
@@ -164,14 +152,8 @@ protected:
tii->loadRegFromStackSlot(*mi->getParent(), nextInstItr, vreg, ss, trc);
MachineBasicBlock::iterator loadInstItr(next(mi));
MachineInstr *loadInst = &*loadInstItr;
- SlotIndex loadInstIdx = miIdx.getNextIndex();
-
- assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
- "Store inst index already in use.");
- lis->InsertMachineInstrInMaps(loadInst, loadInstIdx);
-
- return loadInstIdx;
+ return lis->InsertMachineInstrInMaps(loadInst);
}
/// Insert a load of the given vreg from the given stack slot immediately
@@ -186,14 +168,8 @@ protected:
tii->loadRegFromStackSlot(*mi->getParent(), mi, vreg, ss, trc);
MachineBasicBlock::iterator loadInstItr(prior(mi));
MachineInstr *loadInst = &*loadInstItr;
- SlotIndex loadInstIdx = miIdx.getPrevIndex();
-
- assert(lis->getInstructionFromIndex(loadInstIdx) == 0 &&
- "Load inst index already in use.");
-
- lis->InsertMachineInstrInMaps(loadInst, loadInstIdx);
- return loadInstIdx;
+ return lis->InsertMachineInstrInMaps(loadInst);
}
void insertLoadBeforeInstOnInterval(LiveInterval *li,
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index c646869..102e2a3 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -135,14 +135,52 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg,
unsigned SubIdx,
- const MachineInstr *Orig) const {
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
MachineOperand &MO = MI->getOperand(0);
- MO.setReg(DestReg);
- MO.setSubReg(SubIdx);
+ if (TargetRegisterInfo::isVirtualRegister(DestReg)) {
+ MO.setReg(DestReg);
+ MO.setSubReg(SubIdx);
+ } else if (SubIdx) {
+ MO.setReg(TRI->getSubReg(DestReg, SubIdx));
+ } else {
+ MO.setReg(DestReg);
+ }
MBB.insert(I, MI);
}
+bool
+TargetInstrInfoImpl::isIdentical(const MachineInstr *MI,
+ const MachineInstr *Other,
+ const MachineRegisterInfo *MRI) const {
+ if (MI->getOpcode() != Other->getOpcode() ||
+ MI->getNumOperands() != Other->getNumOperands())
+ return false;
+
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ const MachineOperand &OMO = Other->getOperand(i);
+ if (MO.isReg() && MO.isDef()) {
+ assert(OMO.isReg() && OMO.isDef());
+ unsigned Reg = MO.getReg();
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ if (Reg != OMO.getReg())
+ return false;
+ } else if (MRI->getRegClass(MO.getReg()) !=
+ MRI->getRegClass(OMO.getReg()))
+ return false;
+
+ continue;
+ }
+
+ if (!MO.isIdenticalTo(OMO))
+ return false;
+ }
+
+ return true;
+}
+
unsigned
TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
unsigned FnSize = 0;
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 0a6a0d7..84467ed 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -1033,7 +1033,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
DEBUG(errs() << "2addr: REMATTING : " << *DefMI << "\n");
unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
- TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI);
+ TII->reMaterialize(*mbbi, mi, regA, regASubIdx, DefMI, TRI);
ReMatRegs.set(regB);
++NumReMats;
} else {
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index ce3eed1..c8c5d86 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -117,8 +117,8 @@ int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) {
assert(Virt2StackSlotMap[virtReg] == NO_STACK_SLOT &&
"attempt to assign stack slot to already spilled register");
const TargetRegisterClass* RC = MF->getRegInfo().getRegClass(virtReg);
- int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
- RC->getAlignment(), /*isSS*/true);
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
if (LowSpillSlot == NO_STACK_SLOT)
LowSpillSlot = SS;
if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
@@ -161,8 +161,8 @@ int VirtRegMap::getEmergencySpillSlot(const TargetRegisterClass *RC) {
EmergencySpillSlots.find(RC);
if (I != EmergencySpillSlots.end())
return I->second;
- int SS = MF->getFrameInfo()->CreateStackObject(RC->getSize(),
- RC->getAlignment(), /*isSS*/true);
+ int SS = MF->getFrameInfo()->CreateSpillStackObject(RC->getSize(),
+ RC->getAlignment());
if (LowSpillSlot == NO_STACK_SLOT)
LowSpillSlot = SS;
if (HighSpillSlot == NO_STACK_SLOT || SS > HighSpillSlot)
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index fd80f46..ec0abd1 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -483,19 +483,20 @@ static void InvalidateKills(MachineInstr &MI,
}
/// InvalidateRegDef - If the def operand of the specified def MI is now dead
-/// (since it's spill instruction is removed), mark it isDead. Also checks if
+/// (since its spill instruction is removed), mark it isDead. Also checks if
/// the def MI has other definition operands that are not dead. Returns it by
/// reference.
static bool InvalidateRegDef(MachineBasicBlock::iterator I,
MachineInstr &NewDef, unsigned Reg,
- bool &HasLiveDef) {
+ bool &HasLiveDef,
+ const TargetRegisterInfo *TRI) {
// Due to remat, it's possible this reg isn't being reused. That is,
// the def of this reg (by prev MI) is now dead.
MachineInstr *DefMI = I;
MachineOperand *DefOp = NULL;
for (unsigned i = 0, e = DefMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = DefMI->getOperand(i);
- if (!MO.isReg() || !MO.isUse() || !MO.isKill() || MO.isUndef())
+ if (!MO.isReg() || !MO.isDef() || !MO.isKill() || MO.isUndef())
continue;
if (MO.getReg() == Reg)
DefOp = &MO;
@@ -512,7 +513,8 @@ static bool InvalidateRegDef(MachineBasicBlock::iterator I,
MachineInstr *NMI = I;
for (unsigned j = 0, ee = NMI->getNumOperands(); j != ee; ++j) {
MachineOperand &MO = NMI->getOperand(j);
- if (!MO.isReg() || MO.getReg() != Reg)
+ if (!MO.isReg() || MO.getReg() == 0 ||
+ (MO.getReg() != Reg && !TRI->isSubRegister(Reg, MO.getReg())))
continue;
if (MO.isUse())
FoundUse = true;
@@ -556,11 +558,30 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
KillOps[*SR] = NULL;
RegKills.reset(*SR);
}
-
- if (!MI.isRegTiedToDefOperand(i))
- // Unless it's a two-address operand, this is the new kill.
- MO.setIsKill();
+ } else {
+ // Check for subreg kills as well.
+ // d4 =
+ // store d4, fi#0
+ // ...
+ // = s8<kill>
+ // ...
+ // = d4 <avoiding reload>
+ for (const unsigned *SR = TRI->getSubRegisters(Reg); *SR; ++SR) {
+ unsigned SReg = *SR;
+ if (RegKills[SReg] && KillOps[SReg]->getParent() != &MI) {
+ KillOps[SReg]->setIsKill(false);
+ unsigned KReg = KillOps[SReg]->getReg();
+ KillOps[KReg] = NULL;
+ RegKills.reset(KReg);
+
+ for (const unsigned *SSR = TRI->getSubRegisters(KReg); *SSR; ++SSR) {
+ KillOps[*SSR] = NULL;
+ RegKills.reset(*SSR);
+ }
+ }
+ }
}
+
if (MO.isKill()) {
RegKills.set(Reg);
KillOps[Reg] = &MO;
@@ -573,7 +594,7 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI.getOperand(i);
- if (!MO.isReg() || !MO.isDef())
+ if (!MO.isReg() || !MO.getReg() || !MO.isDef())
continue;
unsigned Reg = MO.getReg();
RegKills.reset(Reg);
@@ -583,6 +604,10 @@ static void UpdateKills(MachineInstr &MI, const TargetRegisterInfo* TRI,
RegKills.reset(*SR);
KillOps[*SR] = NULL;
}
+ for (const unsigned *SR = TRI->getSuperRegisters(Reg); *SR; ++SR) {
+ RegKills.reset(*SR);
+ KillOps[*SR] = NULL;
+ }
}
}
@@ -601,7 +626,7 @@ static void ReMaterialize(MachineBasicBlock &MBB,
"Don't know how to remat instructions that define > 1 values!");
#endif
TII->reMaterialize(MBB, MII, DestReg,
- ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI);
+ ReMatDefMI->getOperand(0).getSubReg(), ReMatDefMI, TRI);
MachineInstr *NewMI = prior(MII);
for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
MachineOperand &MO = NewMI->getOperand(i);
@@ -816,11 +841,8 @@ unsigned ReuseInfo::GetRegForReload(const TargetRegisterClass *RC,
"A reuse cannot be a virtual register");
if (PRRU != RealPhysRegUsed) {
// What was the sub-register index?
- unsigned SubReg;
- for (SubIdx = 1; (SubReg = TRI->getSubReg(PRRU, SubIdx)); SubIdx++)
- if (SubReg == RealPhysRegUsed)
- break;
- assert(SubReg == RealPhysRegUsed &&
+ SubIdx = TRI->getSubRegIndex(PRRU, RealPhysRegUsed);
+ assert(SubIdx &&
"Operand physreg is not a sub-register of PhysRegUsed");
}
@@ -1454,7 +1476,7 @@ private:
// being reused.
for (unsigned j = 0, ee = KillRegs.size(); j != ee; ++j) {
bool HasOtherDef = false;
- if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef)) {
+ if (InvalidateRegDef(PrevMII, *MII, KillRegs[j], HasOtherDef, TRI)) {
MachineInstr *DeadDef = PrevMII;
if (ReMatDefs.count(DeadDef) && !HasOtherDef) {
// FIXME: This assumes a remat def does not have side effects.
@@ -1704,6 +1726,7 @@ private:
// Mark is killed.
MachineInstr *CopyMI = prior(InsertLoc);
+ CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
MachineOperand *KillOpnd = CopyMI->findRegisterUseOperand(InReg);
KillOpnd->setIsKill();
UpdateKills(*CopyMI, TRI, RegKills, KillOps);
@@ -1984,6 +2007,7 @@ private:
TII->copyRegToReg(MBB, InsertLoc, DesignatedReg, PhysReg, RC, RC);
MachineInstr *CopyMI = prior(InsertLoc);
+ CopyMI->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
UpdateKills(*CopyMI, TRI, RegKills, KillOps);
// This invalidates DesignatedReg.
@@ -2112,6 +2136,7 @@ private:
// virtual or needing to clobber any values if it's physical).
NextMII = &MI;
--NextMII; // backtrack to the copy.
+ NextMII->setAsmPrinterFlag(AsmPrinter::ReloadReuse);
// Propagate the sub-register index over.
if (SubIdx) {
DefMO = NextMII->findRegisterDefOperand(DestReg);
diff --git a/lib/CompilerDriver/Action.cpp b/lib/CompilerDriver/Action.cpp
index 5fd63ee..7bcd30a 100644
--- a/lib/CompilerDriver/Action.cpp
+++ b/lib/CompilerDriver/Action.cpp
@@ -13,9 +13,13 @@
#include "llvm/CompilerDriver/Action.h"
#include "llvm/CompilerDriver/BuiltinOptions.h"
+
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Program.h"
+#include "llvm/System/TimeValue.h"
+
#include <stdexcept>
+#include <string>
using namespace llvm;
using namespace llvmc;
@@ -60,14 +64,31 @@ namespace {
}
}
+namespace llvmc {
+ void AppendToGlobalTimeLog(const std::string& cmd, double time);
+}
+
int llvmc::Action::Execute() const {
if (DryRun || VerboseMode) {
errs() << Command_ << " ";
std::for_each(Args_.begin(), Args_.end(), print_string);
errs() << '\n';
}
- if (DryRun)
- return 0;
- else
- return ExecuteProgram(Command_, Args_);
+ if (!DryRun) {
+ if (Time) {
+ sys::TimeValue now = sys::TimeValue::now();
+ int ret = ExecuteProgram(Command_, Args_);
+ sys::TimeValue now2 = sys::TimeValue::now();
+ now2 -= now;
+ double elapsed = now2.seconds() + now2.microseconds() / 1000000.0;
+ AppendToGlobalTimeLog(Command_, elapsed);
+
+ return ret;
+ }
+ else {
+ return ExecuteProgram(Command_, Args_);
+ }
+ }
+
+ return 0;
}
diff --git a/lib/CompilerDriver/BuiltinOptions.cpp b/lib/CompilerDriver/BuiltinOptions.cpp
index d90c50d..d1ac8c9 100644
--- a/lib/CompilerDriver/BuiltinOptions.cpp
+++ b/lib/CompilerDriver/BuiltinOptions.cpp
@@ -30,8 +30,10 @@ cl::opt<std::string> TempDirname("temp-dir", cl::desc("Temp dir name"),
cl::list<std::string> Languages("x",
cl::desc("Specify the language of the following input files"),
cl::ZeroOrMore);
+
cl::opt<bool> DryRun("dry-run",
cl::desc("Only pretend to run commands"));
+cl::opt<bool> Time("time", cl::desc("Time individual commands"));
cl::opt<bool> VerboseMode("v",
cl::desc("Enable verbose mode"));
diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp
index c581809..3a3487a 100644
--- a/lib/CompilerDriver/Main.cpp
+++ b/lib/CompilerDriver/Main.cpp
@@ -19,6 +19,7 @@
#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Path.h"
+#include <sstream>
#include <stdexcept>
#include <string>
@@ -28,6 +29,8 @@ using namespace llvmc;
namespace {
+ std::stringstream* GlobalTimeLog;
+
sys::Path getTempDir() {
sys::Path tempDir;
@@ -81,6 +84,11 @@ namespace {
namespace llvmc {
+// Used to implement -time option. External linkage is intentional.
+void AppendToGlobalTimeLog(const std::string& cmd, double time) {
+ *GlobalTimeLog << "# " << cmd << ' ' << time << '\n';
+}
+
// Sometimes plugins want to condition on the value in argv[0].
const char* ProgramName;
@@ -122,7 +130,19 @@ int Main(int argc, char** argv) {
throw std::runtime_error("no input files");
}
- return BuildTargets(graph, langMap);
+ if (Time) {
+ GlobalTimeLog = new std::stringstream;
+ GlobalTimeLog->precision(2);
+ }
+
+ int ret = BuildTargets(graph, langMap);
+
+ if (Time) {
+ llvm::errs() << GlobalTimeLog->str();
+ delete GlobalTimeLog;
+ }
+
+ return ret;
}
catch(llvmc::error_code& ec) {
return ec.code();
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
index 5a32fd3..9f4ab49 100644
--- a/lib/CompilerDriver/Tool.cpp
+++ b/lib/CompilerDriver/Tool.cpp
@@ -20,11 +20,6 @@
using namespace llvm;
using namespace llvmc;
-// SplitString is used by derived Tool classes.
-typedef void (*SplitStringFunPtr)(const std::string&,
- std::vector<std::string>&, const char*);
-SplitStringFunPtr ForceLinkageSplitString = &llvm::SplitString;
-
namespace {
sys::Path MakeTempFile(const sys::Path& TempDir, const std::string& BaseName,
const std::string& Suffix) {
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 21499e5..cb30748 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -40,7 +40,8 @@ ExecutionEngine *(*ExecutionEngine::JITCtor)(ModuleProvider *MP,
std::string *ErrorStr,
JITMemoryManager *JMM,
CodeGenOpt::Level OptLevel,
- bool GVsWithCode) = 0;
+ bool GVsWithCode,
+ CodeModel::Model CMM) = 0;
ExecutionEngine *(*ExecutionEngine::InterpCtor)(ModuleProvider *MP,
std::string *ErrorStr) = 0;
ExecutionEngine::EERegisterFn ExecutionEngine::ExceptionTableRegister = 0;
@@ -52,7 +53,6 @@ ExecutionEngine::ExecutionEngine(ModuleProvider *P)
CompilingLazily = false;
GVCompilationDisabled = false;
SymbolSearchingDisabled = false;
- DlsymStubsEnabled = false;
Modules.push_back(P);
assert(P && "ModuleProvider is null?");
}
@@ -445,7 +445,7 @@ ExecutionEngine *EngineBuilder::create() {
if (ExecutionEngine::JITCtor) {
ExecutionEngine *EE =
ExecutionEngine::JITCtor(MP, ErrorStr, JMM, OptLevel,
- AllocateGVsWithCode);
+ AllocateGVsWithCode, CMModel);
if (EE) return EE;
}
}
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index 01bd2c7..b59cfd1 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -882,16 +882,6 @@ void Interpreter::visitCallSite(CallSite CS) {
e = SF.Caller.arg_end(); i != e; ++i, ++pNum) {
Value *V = *i;
ArgVals.push_back(getOperandValue(V, SF));
- // Promote all integral types whose size is < sizeof(i32) into i32.
- // We do this by zero or sign extending the value as appropriate
- // according to the parameter attributes
- const Type *Ty = V->getType();
- if (Ty->isInteger() && (ArgVals.back().IntVal.getBitWidth() < 32)) {
- if (CS.paramHasAttr(pNum, Attribute::ZExt))
- ArgVals.back().IntVal = ArgVals.back().IntVal.zext(32);
- else if (CS.paramHasAttr(pNum, Attribute::SExt))
- ArgVals.back().IntVal = ArgVals.back().IntVal.sext(32);
- }
}
// To handle indirect calls, we must get the pointer value from the argument
diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
index 8c45a36..c02d84f 100644
--- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
+++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp
@@ -158,7 +158,7 @@ static void *ffiValueFor(const Type *Ty, const GenericValue &AV,
}
case Type::FloatTyID: {
float *FloatPtr = (float *) ArgDataPtr;
- *FloatPtr = AV.DoubleVal;
+ *FloatPtr = AV.FloatVal;
return ArgDataPtr;
}
case Type::DoubleTyID: {
@@ -284,6 +284,9 @@ GenericValue Interpreter::callExternalFunction(Function *F,
else
llvm_report_error("Tried to execute an unknown external function: " +
F->getType()->getDescription() + " " +F->getName());
+#ifndef USE_LIBFFI
+ errs() << "Recompiling LLVM with --enable-libffi might help.\n";
+#endif
return GenericValue();
}
@@ -419,83 +422,6 @@ GenericValue lle_X_printf(const FunctionType *FT,
return GV;
}
-static void ByteswapSCANFResults(LLVMContext &C,
- const char *Fmt, void *Arg0, void *Arg1,
- void *Arg2, void *Arg3, void *Arg4, void *Arg5,
- void *Arg6, void *Arg7, void *Arg8) {
- void *Args[] = { Arg0, Arg1, Arg2, Arg3, Arg4, Arg5, Arg6, Arg7, Arg8, 0 };
-
- // Loop over the format string, munging read values as appropriate (performs
- // byteswaps as necessary).
- unsigned ArgNo = 0;
- while (*Fmt) {
- if (*Fmt++ == '%') {
- // Read any flag characters that may be present...
- bool Suppress = false;
- bool Half = false;
- bool Long = false;
- bool LongLong = false; // long long or long double
-
- while (1) {
- switch (*Fmt++) {
- case '*': Suppress = true; break;
- case 'a': /*Allocate = true;*/ break; // We don't need to track this
- case 'h': Half = true; break;
- case 'l': Long = true; break;
- case 'q':
- case 'L': LongLong = true; break;
- default:
- if (Fmt[-1] > '9' || Fmt[-1] < '0') // Ignore field width specs
- goto Out;
- }
- }
- Out:
-
- // Read the conversion character
- if (!Suppress && Fmt[-1] != '%') { // Nothing to do?
- unsigned Size = 0;
- const Type *Ty = 0;
-
- switch (Fmt[-1]) {
- case 'i': case 'o': case 'u': case 'x': case 'X': case 'n': case 'p':
- case 'd':
- if (Long || LongLong) {
- Size = 8; Ty = Type::getInt64Ty(C);
- } else if (Half) {
- Size = 4; Ty = Type::getInt16Ty(C);
- } else {
- Size = 4; Ty = Type::getInt32Ty(C);
- }
- break;
-
- case 'e': case 'g': case 'E':
- case 'f':
- if (Long || LongLong) {
- Size = 8; Ty = Type::getDoubleTy(C);
- } else {
- Size = 4; Ty = Type::getFloatTy(C);
- }
- break;
-
- case 's': case 'c': case '[': // No byteswap needed
- Size = 1;
- Ty = Type::getInt8Ty(C);
- break;
-
- default: break;
- }
-
- if (Size) {
- GenericValue GV;
- void *Arg = Args[ArgNo++];
- memcpy(&GV, Arg, Size);
- TheInterpreter->StoreValueToMemory(GV, (GenericValue*)Arg, Ty);
- }
- }
- }
- }
-}
-
// int sscanf(const char *format, ...);
GenericValue lle_X_sscanf(const FunctionType *FT,
const std::vector<GenericValue> &args) {
@@ -508,9 +434,6 @@ GenericValue lle_X_sscanf(const FunctionType *FT,
GenericValue GV;
GV.IntVal = APInt(32, sscanf(Args[0], Args[1], Args[2], Args[3], Args[4],
Args[5], Args[6], Args[7], Args[8], Args[9]));
- ByteswapSCANFResults(FT->getContext(),
- Args[1], Args[2], Args[3], Args[4],
- Args[5], Args[6], Args[7], Args[8], Args[9], 0);
return GV;
}
@@ -526,9 +449,6 @@ GenericValue lle_X_scanf(const FunctionType *FT,
GenericValue GV;
GV.IntVal = APInt(32, scanf( Args[0], Args[1], Args[2], Args[3], Args[4],
Args[5], Args[6], Args[7], Args[8], Args[9]));
- ByteswapSCANFResults(FT->getContext(),
- Args[0], Args[1], Args[2], Args[3], Args[4],
- Args[5], Args[6], Args[7], Args[8], Args[9]);
return GV;
}
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index e21d760..6d781c7 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -198,15 +198,17 @@ ExecutionEngine *ExecutionEngine::createJIT(ModuleProvider *MP,
std::string *ErrorStr,
JITMemoryManager *JMM,
CodeGenOpt::Level OptLevel,
- bool GVsWithCode) {
- return JIT::createJIT(MP, ErrorStr, JMM, OptLevel, GVsWithCode);
+ bool GVsWithCode,
+ CodeModel::Model CMM) {
+ return JIT::createJIT(MP, ErrorStr, JMM, OptLevel, GVsWithCode, CMM);
}
ExecutionEngine *JIT::createJIT(ModuleProvider *MP,
std::string *ErrorStr,
JITMemoryManager *JMM,
CodeGenOpt::Level OptLevel,
- bool GVsWithCode) {
+ bool GVsWithCode,
+ CodeModel::Model CMM) {
// Make sure we can resolve symbols in the program as well. The zero arg
// to the function tells DynamicLibrary to load the program, not a library.
if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
@@ -215,6 +217,7 @@ ExecutionEngine *JIT::createJIT(ModuleProvider *MP,
// Pick a target either via -march or by guessing the native arch.
TargetMachine *TM = JIT::selectTarget(MP, ErrorStr);
if (!TM || (ErrorStr && ErrorStr->length() > 0)) return 0;
+ TM->setCodeModel(CMM);
// If the target supports JIT code generation, create a the JIT.
if (TargetJITInfo *TJ = TM->getJITInfo()) {
@@ -613,11 +616,6 @@ void JIT::runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked) {
// the stub with real address of the function.
updateFunctionStub(PF);
}
-
- // If the JIT is configured to emit info so that dlsym can be used to
- // rewrite stubs to external globals, do so now.
- if (areDlsymStubsEnabled() && !isCompilingLazily())
- updateDlsymStubTable();
}
/// getPointerToFunction - This method is used to get the address of the
@@ -660,8 +658,7 @@ void *JIT::getPointerToFunction(Function *F) {
}
if (F->isDeclaration() || F->hasAvailableExternallyLinkage()) {
- bool AbortOnFailure =
- !areDlsymStubsEnabled() && !F->hasExternalWeakLinkage();
+ bool AbortOnFailure = !F->hasExternalWeakLinkage();
void *Addr = getPointerToNamedFunction(F->getName(), AbortOnFailure);
addGlobalMapping(F, Addr);
return Addr;
@@ -690,7 +687,7 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
return (void*)&__dso_handle;
#endif
Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(GV->getName());
- if (Ptr == 0 && !areDlsymStubsEnabled()) {
+ if (Ptr == 0) {
llvm_report_error("Could not resolve external global address: "
+GV->getName());
}
diff --git a/lib/ExecutionEngine/JIT/JIT.h b/lib/ExecutionEngine/JIT/JIT.h
index fb3cb24..f165bd6 100644
--- a/lib/ExecutionEngine/JIT/JIT.h
+++ b/lib/ExecutionEngine/JIT/JIT.h
@@ -85,8 +85,10 @@ public:
JITMemoryManager *JMM,
CodeGenOpt::Level OptLevel =
CodeGenOpt::Default,
- bool GVsWithCode = true) {
- return ExecutionEngine::createJIT(MP, Err, JMM, OptLevel, GVsWithCode);
+ bool GVsWithCode = true,
+ CodeModel::Model CMM = CodeModel::Default) {
+ return ExecutionEngine::createJIT(MP, Err, JMM, OptLevel, GVsWithCode,
+ CMM);
}
virtual void addModuleProvider(ModuleProvider *MP);
@@ -175,7 +177,8 @@ public:
std::string *ErrorStr,
JITMemoryManager *JMM,
CodeGenOpt::Level OptLevel,
- bool GVsWithCode);
+ bool GVsWithCode,
+ CodeModel::Model CMM);
// Run the JIT on F and return information about the generated code
void runJITOnFunction(Function *F, MachineCodeInfo *MCI = 0);
@@ -195,7 +198,6 @@ private:
TargetMachine &tm);
void runJITOnFunctionUnlocked(Function *F, const MutexGuard &locked);
void updateFunctionStub(Function *F);
- void updateDlsymStubTable();
protected:
diff --git a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
index 49faf64..565509c 100644
--- a/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
+++ b/lib/ExecutionEngine/JIT/JITDebugRegisterer.cpp
@@ -35,7 +35,7 @@ namespace llvm {
extern "C" {
// Debuggers puts a breakpoint in this function.
- void DISABLE_INLINE __jit_debug_register_code() { }
+ DISABLE_INLINE void __jit_debug_register_code() { }
// We put information about the JITed function in this global, which the
// debugger reads. Make sure to specify the version statically, because the
diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp
index 79f1eb4..5f195ee 100644
--- a/lib/ExecutionEngine/JIT/JITEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp
@@ -63,6 +63,7 @@ static JIT *TheJIT = 0;
// JIT lazy compilation code.
//
namespace {
+ class JITEmitter;
class JITResolverState;
template<typename ValueTy>
@@ -213,16 +214,18 @@ namespace {
std::map<void*, unsigned> revGOTMap;
unsigned nextGOTIndex;
+ JITEmitter &JE;
+
static JITResolver *TheJITResolver;
public:
- explicit JITResolver(JIT &jit) : nextGOTIndex(0) {
+ explicit JITResolver(JIT &jit, JITEmitter &je) : nextGOTIndex(0), JE(je) {
TheJIT = &jit;
LazyResolverFn = jit.getJITInfo().getLazyResolverFunction(JITCompilerFn);
assert(TheJITResolver == 0 && "Multiple JIT resolvers?");
TheJITResolver = this;
}
-
+
~JITResolver() {
TheJITResolver = 0;
}
@@ -244,19 +247,9 @@ namespace {
/// specified GV address.
void *getGlobalValueIndirectSym(GlobalValue *V, void *GVAddress);
- /// AddCallbackAtLocation - If the target is capable of rewriting an
- /// instruction without the use of a stub, record the location of the use so
- /// we know which function is being used at the location.
- void *AddCallbackAtLocation(Function *F, void *Location) {
- MutexGuard locked(TheJIT->lock);
- /// Get the target-specific JIT resolver function.
- state.AddCallSite(locked, Location, F);
- return (void*)(intptr_t)LazyResolverFn;
- }
-
void getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
SmallVectorImpl<void*> &Ptrs);
-
+
GlobalValue *invalidateStub(void *Stub);
/// getGOTIndexForAddress - Return a new or existing index in the GOT for
@@ -269,6 +262,225 @@ namespace {
/// been compiled, this function compiles it first.
static void *JITCompilerFn(void *Stub);
};
+
+ /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
+ /// used to output functions to memory for execution.
+ class JITEmitter : public JITCodeEmitter {
+ JITMemoryManager *MemMgr;
+
+ // When outputting a function stub in the context of some other function, we
+ // save BufferBegin/BufferEnd/CurBufferPtr here.
+ uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
+
+ // When reattempting to JIT a function after running out of space, we store
+ // the estimated size of the function we're trying to JIT here, so we can
+ // ask the memory manager for at least this much space. When we
+ // successfully emit the function, we reset this back to zero.
+ uintptr_t SizeEstimate;
+
+ /// Relocations - These are the relocations that the function needs, as
+ /// emitted.
+ std::vector<MachineRelocation> Relocations;
+
+ /// MBBLocations - This vector is a mapping from MBB ID's to their address.
+ /// It is filled in by the StartMachineBasicBlock callback and queried by
+ /// the getMachineBasicBlockAddress callback.
+ std::vector<uintptr_t> MBBLocations;
+
+ /// ConstantPool - The constant pool for the current function.
+ ///
+ MachineConstantPool *ConstantPool;
+
+ /// ConstantPoolBase - A pointer to the first entry in the constant pool.
+ ///
+ void *ConstantPoolBase;
+
+ /// ConstPoolAddresses - Addresses of individual constant pool entries.
+ ///
+ SmallVector<uintptr_t, 8> ConstPoolAddresses;
+
+ /// JumpTable - The jump tables for the current function.
+ ///
+ MachineJumpTableInfo *JumpTable;
+
+ /// JumpTableBase - A pointer to the first entry in the jump table.
+ ///
+ void *JumpTableBase;
+
+ /// Resolver - This contains info about the currently resolved functions.
+ JITResolver Resolver;
+
+ /// DE - The dwarf emitter for the jit.
+ OwningPtr<JITDwarfEmitter> DE;
+
+ /// DR - The debug registerer for the jit.
+ OwningPtr<JITDebugRegisterer> DR;
+
+ /// LabelLocations - This vector is a mapping from Label ID's to their
+ /// address.
+ std::vector<uintptr_t> LabelLocations;
+
+ /// MMI - Machine module info for exception informations
+ MachineModuleInfo* MMI;
+
+ // GVSet - a set to keep track of which globals have been seen
+ SmallPtrSet<const GlobalVariable*, 8> GVSet;
+
+ // CurFn - The llvm function being emitted. Only valid during
+ // finishFunction().
+ const Function *CurFn;
+
+ /// Information about emitted code, which is passed to the
+ /// JITEventListeners. This is reset in startFunction and used in
+ /// finishFunction.
+ JITEvent_EmittedFunctionDetails EmissionDetails;
+
+ struct EmittedCode {
+ void *FunctionBody; // Beginning of the function's allocation.
+ void *Code; // The address the function's code actually starts at.
+ void *ExceptionTable;
+ EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {}
+ };
+ struct EmittedFunctionConfig : public ValueMapConfig<const Function*> {
+ typedef JITEmitter *ExtraData;
+ static void onDelete(JITEmitter *, const Function*);
+ static void onRAUW(JITEmitter *, const Function*, const Function*);
+ };
+ ValueMap<const Function *, EmittedCode,
+ EmittedFunctionConfig> EmittedFunctions;
+
+ // CurFnStubUses - For a given Function, a vector of stubs that it
+ // references. This facilitates the JIT detecting that a stub is no
+ // longer used, so that it may be deallocated.
+ DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses;
+
+ // StubFnRefs - For a given pointer to a stub, a set of Functions which
+ // reference the stub. When the count of a stub's references drops to zero,
+ // the stub is unused.
+ DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs;
+
+ DebugLocTuple PrevDLT;
+
+ public:
+ JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
+ : SizeEstimate(0), Resolver(jit, *this), MMI(0), CurFn(0),
+ EmittedFunctions(this) {
+ MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
+ if (jit.getJITInfo().needsGOT()) {
+ MemMgr->AllocateGOT();
+ DEBUG(errs() << "JIT is managing a GOT\n");
+ }
+
+ if (DwarfExceptionHandling || JITEmitDebugInfo) {
+ DE.reset(new JITDwarfEmitter(jit));
+ }
+ if (JITEmitDebugInfo) {
+ DR.reset(new JITDebugRegisterer(TM));
+ }
+ }
+ ~JITEmitter() {
+ delete MemMgr;
+ }
+
+ /// classof - Methods for support type inquiry through isa, cast, and
+ /// dyn_cast:
+ ///
+ static inline bool classof(const JITEmitter*) { return true; }
+ static inline bool classof(const MachineCodeEmitter*) { return true; }
+
+ JITResolver &getJITResolver() { return Resolver; }
+
+ virtual void startFunction(MachineFunction &F);
+ virtual bool finishFunction(MachineFunction &F);
+
+ void emitConstantPool(MachineConstantPool *MCP);
+ void initJumpTableInfo(MachineJumpTableInfo *MJTI);
+ void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
+
+ virtual void startGVStub(const GlobalValue* GV, unsigned StubSize,
+ unsigned Alignment = 1);
+ virtual void startGVStub(const GlobalValue* GV, void *Buffer,
+ unsigned StubSize);
+ virtual void* finishGVStub(const GlobalValue *GV);
+
+ /// allocateSpace - Reserves space in the current block if any, or
+ /// allocate a new one of the given size.
+ virtual void *allocateSpace(uintptr_t Size, unsigned Alignment);
+
+ /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace,
+ /// this method does not allocate memory in the current output buffer,
+ /// because a global may live longer than the current function.
+ virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment);
+
+ virtual void addRelocation(const MachineRelocation &MR) {
+ Relocations.push_back(MR);
+ }
+
+ virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
+ if (MBBLocations.size() <= (unsigned)MBB->getNumber())
+ MBBLocations.resize((MBB->getNumber()+1)*2);
+ MBBLocations[MBB->getNumber()] = getCurrentPCValue();
+ DEBUG(errs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
+ << (void*) getCurrentPCValue() << "]\n");
+ }
+
+ virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const;
+ virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const;
+
+ virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
+ assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
+ MBBLocations[MBB->getNumber()] && "MBB not emitted!");
+ return MBBLocations[MBB->getNumber()];
+ }
+
+ /// retryWithMoreMemory - Log a retry and deallocate all memory for the
+ /// given function. Increase the minimum allocation size so that we get
+ /// more memory next time.
+ void retryWithMoreMemory(MachineFunction &F);
+
+ /// deallocateMemForFunction - Deallocate all memory for the specified
+ /// function body.
+ void deallocateMemForFunction(const Function *F);
+
+ /// AddStubToCurrentFunction - Mark the current function being JIT'd as
+ /// using the stub at the specified address. Allows
+ /// deallocateMemForFunction to also remove stubs no longer referenced.
+ void AddStubToCurrentFunction(void *Stub);
+
+ virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
+
+ virtual void emitLabel(uint64_t LabelID) {
+ if (LabelLocations.size() <= LabelID)
+ LabelLocations.resize((LabelID+1)*2);
+ LabelLocations[LabelID] = getCurrentPCValue();
+ }
+
+ virtual uintptr_t getLabelAddress(uint64_t LabelID) const {
+ assert(LabelLocations.size() > (unsigned)LabelID &&
+ LabelLocations[LabelID] && "Label not emitted!");
+ return LabelLocations[LabelID];
+ }
+
+ virtual void setModuleInfo(MachineModuleInfo* Info) {
+ MMI = Info;
+ if (DE.get()) DE->setModuleInfo(Info);
+ }
+
+ void setMemoryExecutable() {
+ MemMgr->setMemoryExecutable();
+ }
+
+ JITMemoryManager *getMemMgr() const { return MemMgr; }
+
+ private:
+ void *getPointerToGlobal(GlobalValue *GV, void *Reference,
+ bool MayNeedFarStub);
+ void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference);
+ unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size);
+ unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size);
+ unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size);
+ unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF);
+ };
}
JITResolver *JITResolver::TheJITResolver = 0;
@@ -306,16 +518,13 @@ void *JITResolver::getFunctionStub(Function *F) {
Actual = TheJIT->getPointerToFunction(F);
// If we resolved the symbol to a null address (eg. a weak external)
- // don't emit a stub. Return a null pointer to the application. If dlsym
- // stubs are enabled, not being able to resolve the address is not
- // meaningful.
- if (!Actual && !TheJIT->areDlsymStubsEnabled()) return 0;
+ // don't emit a stub. Return a null pointer to the application.
+ if (!Actual) return 0;
}
// Codegen a new stub, calling the lazy resolver or the actual address of the
// external function, if it was resolved.
- Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual,
- *TheJIT->getCodeEmitter());
+ Stub = TheJIT->getJITInfo().emitFunctionStub(F, Actual, JE);
if (Actual != (void*)(intptr_t)LazyResolverFn) {
// If we are getting the stub for an external function, we really want the
@@ -352,9 +561,9 @@ void *JITResolver::getGlobalValueIndirectSym(GlobalValue *GV, void *GVAddress) {
// Otherwise, codegen a new indirect symbol.
IndirectSym = TheJIT->getJITInfo().emitGlobalValueIndirectSym(GV, GVAddress,
- *TheJIT->getCodeEmitter());
+ JE);
- DEBUG(errs() << "JIT: Indirect symbol emitted at [" << IndirectSym
+ DEBUG(errs() << "JIT: Indirect symbol emitted at [" << IndirectSym
<< "] for GV '" << GV->getName() << "'\n");
return IndirectSym;
@@ -367,8 +576,7 @@ void *JITResolver::getExternalFunctionStub(void *FnAddr) {
void *&Stub = ExternalFnToStubMap[FnAddr];
if (Stub) return Stub;
- Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr,
- *TheJIT->getCodeEmitter());
+ Stub = TheJIT->getJITInfo().emitFunctionStub(0, FnAddr, JE);
DEBUG(errs() << "JIT: Stub emitted at [" << Stub
<< "] for external function at '" << FnAddr << "'\n");
@@ -389,10 +597,10 @@ unsigned JITResolver::getGOTIndexForAddr(void* addr) {
void JITResolver::getRelocatableGVs(SmallVectorImpl<GlobalValue*> &GVs,
SmallVectorImpl<void*> &Ptrs) {
MutexGuard locked(TheJIT->lock);
-
+
const FunctionToStubMapTy &FM = state.getFunctionToStubMap(locked);
GlobalToIndirectSymMapTy &GM = state.getGlobalToIndirectSymMap(locked);
-
+
for (FunctionToStubMapTy::const_iterator i = FM.begin(), e = FM.end();
i != e; ++i){
Function *F = i->first;
@@ -428,7 +636,7 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) {
GM.erase(i);
return GV;
}
-
+
// Lastly, check to see if it's in the ExternalFnToStubMap.
for (std::map<void *, void *>::iterator i = ExternalFnToStubMap.begin(),
e = ExternalFnToStubMap.end(); i != e; ++i) {
@@ -437,7 +645,7 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) {
ExternalFnToStubMap.erase(i);
break;
}
-
+
return 0;
}
@@ -446,7 +654,7 @@ GlobalValue *JITResolver::invalidateStub(void *Stub) {
/// it if necessary, then returns the resultant function pointer.
void *JITResolver::JITCompilerFn(void *Stub) {
JITResolver &JR = *TheJITResolver;
-
+
Function* F = 0;
void* ActualPtr = 0;
@@ -466,16 +674,16 @@ void *JITResolver::JITCompilerFn(void *Stub) {
// If we have already code generated the function, just return the address.
void *Result = TheJIT->getPointerToGlobalIfAvailable(F);
-
+
if (!Result) {
// Otherwise we don't have it, do lazy compilation now.
-
+
// If lazy compilation is disabled, emit a useful error message and abort.
if (!TheJIT->isCompilingLazily()) {
llvm_report_error("LLVM JIT requested to do lazy compilation of function '"
+ F->getName() + "' when lazy compiles are disabled!");
}
-
+
DEBUG(errs() << "JIT: Lazily resolving function '" << F->getName()
<< "' In stub ptr = " << Stub << " actual ptr = "
<< ActualPtr << "\n");
@@ -508,237 +716,8 @@ void *JITResolver::JITCompilerFn(void *Stub) {
//===----------------------------------------------------------------------===//
// JITEmitter code.
//
-namespace {
- /// JITEmitter - The JIT implementation of the MachineCodeEmitter, which is
- /// used to output functions to memory for execution.
- class JITEmitter : public JITCodeEmitter {
- JITMemoryManager *MemMgr;
-
- // When outputting a function stub in the context of some other function, we
- // save BufferBegin/BufferEnd/CurBufferPtr here.
- uint8_t *SavedBufferBegin, *SavedBufferEnd, *SavedCurBufferPtr;
-
- // When reattempting to JIT a function after running out of space, we store
- // the estimated size of the function we're trying to JIT here, so we can
- // ask the memory manager for at least this much space. When we
- // successfully emit the function, we reset this back to zero.
- uintptr_t SizeEstimate;
-
- /// Relocations - These are the relocations that the function needs, as
- /// emitted.
- std::vector<MachineRelocation> Relocations;
-
- /// MBBLocations - This vector is a mapping from MBB ID's to their address.
- /// It is filled in by the StartMachineBasicBlock callback and queried by
- /// the getMachineBasicBlockAddress callback.
- std::vector<uintptr_t> MBBLocations;
-
- /// ConstantPool - The constant pool for the current function.
- ///
- MachineConstantPool *ConstantPool;
-
- /// ConstantPoolBase - A pointer to the first entry in the constant pool.
- ///
- void *ConstantPoolBase;
-
- /// ConstPoolAddresses - Addresses of individual constant pool entries.
- ///
- SmallVector<uintptr_t, 8> ConstPoolAddresses;
-
- /// JumpTable - The jump tables for the current function.
- ///
- MachineJumpTableInfo *JumpTable;
-
- /// JumpTableBase - A pointer to the first entry in the jump table.
- ///
- void *JumpTableBase;
-
- /// Resolver - This contains info about the currently resolved functions.
- JITResolver Resolver;
-
- /// DE - The dwarf emitter for the jit.
- OwningPtr<JITDwarfEmitter> DE;
-
- /// DR - The debug registerer for the jit.
- OwningPtr<JITDebugRegisterer> DR;
-
- /// LabelLocations - This vector is a mapping from Label ID's to their
- /// address.
- std::vector<uintptr_t> LabelLocations;
-
- /// MMI - Machine module info for exception informations
- MachineModuleInfo* MMI;
-
- // GVSet - a set to keep track of which globals have been seen
- SmallPtrSet<const GlobalVariable*, 8> GVSet;
-
- // CurFn - The llvm function being emitted. Only valid during
- // finishFunction().
- const Function *CurFn;
-
- /// Information about emitted code, which is passed to the
- /// JITEventListeners. This is reset in startFunction and used in
- /// finishFunction.
- JITEvent_EmittedFunctionDetails EmissionDetails;
-
- struct EmittedCode {
- void *FunctionBody; // Beginning of the function's allocation.
- void *Code; // The address the function's code actually starts at.
- void *ExceptionTable;
- EmittedCode() : FunctionBody(0), Code(0), ExceptionTable(0) {}
- };
- struct EmittedFunctionConfig : public ValueMapConfig<const Function*> {
- typedef JITEmitter *ExtraData;
- static void onDelete(JITEmitter *, const Function*);
- static void onRAUW(JITEmitter *, const Function*, const Function*);
- };
- ValueMap<const Function *, EmittedCode,
- EmittedFunctionConfig> EmittedFunctions;
-
- // CurFnStubUses - For a given Function, a vector of stubs that it
- // references. This facilitates the JIT detecting that a stub is no
- // longer used, so that it may be deallocated.
- DenseMap<AssertingVH<const Function>, SmallVector<void*, 1> > CurFnStubUses;
-
- // StubFnRefs - For a given pointer to a stub, a set of Functions which
- // reference the stub. When the count of a stub's references drops to zero,
- // the stub is unused.
- DenseMap<void *, SmallPtrSet<const Function*, 1> > StubFnRefs;
-
- // ExtFnStubs - A map of external function names to stubs which have entries
- // in the JITResolver's ExternalFnToStubMap.
- StringMap<void *> ExtFnStubs;
-
- DebugLocTuple PrevDLT;
-
- public:
- JITEmitter(JIT &jit, JITMemoryManager *JMM, TargetMachine &TM)
- : SizeEstimate(0), Resolver(jit), MMI(0), CurFn(0),
- EmittedFunctions(this) {
- MemMgr = JMM ? JMM : JITMemoryManager::CreateDefaultMemManager();
- if (jit.getJITInfo().needsGOT()) {
- MemMgr->AllocateGOT();
- DEBUG(errs() << "JIT is managing a GOT\n");
- }
-
- if (DwarfExceptionHandling || JITEmitDebugInfo) {
- DE.reset(new JITDwarfEmitter(jit));
- }
- if (JITEmitDebugInfo) {
- DR.reset(new JITDebugRegisterer(TM));
- }
- }
- ~JITEmitter() {
- delete MemMgr;
- }
-
- /// classof - Methods for support type inquiry through isa, cast, and
- /// dyn_cast:
- ///
- static inline bool classof(const JITEmitter*) { return true; }
- static inline bool classof(const MachineCodeEmitter*) { return true; }
-
- JITResolver &getJITResolver() { return Resolver; }
-
- virtual void startFunction(MachineFunction &F);
- virtual bool finishFunction(MachineFunction &F);
-
- void emitConstantPool(MachineConstantPool *MCP);
- void initJumpTableInfo(MachineJumpTableInfo *MJTI);
- void emitJumpTableInfo(MachineJumpTableInfo *MJTI);
-
- virtual void startGVStub(const GlobalValue* GV, unsigned StubSize,
- unsigned Alignment = 1);
- virtual void startGVStub(const GlobalValue* GV, void *Buffer,
- unsigned StubSize);
- virtual void* finishGVStub(const GlobalValue *GV);
-
- /// allocateSpace - Reserves space in the current block if any, or
- /// allocate a new one of the given size.
- virtual void *allocateSpace(uintptr_t Size, unsigned Alignment);
-
- /// allocateGlobal - Allocate memory for a global. Unlike allocateSpace,
- /// this method does not allocate memory in the current output buffer,
- /// because a global may live longer than the current function.
- virtual void *allocateGlobal(uintptr_t Size, unsigned Alignment);
-
- virtual void addRelocation(const MachineRelocation &MR) {
- Relocations.push_back(MR);
- }
-
- virtual void StartMachineBasicBlock(MachineBasicBlock *MBB) {
- if (MBBLocations.size() <= (unsigned)MBB->getNumber())
- MBBLocations.resize((MBB->getNumber()+1)*2);
- MBBLocations[MBB->getNumber()] = getCurrentPCValue();
- DEBUG(errs() << "JIT: Emitting BB" << MBB->getNumber() << " at ["
- << (void*) getCurrentPCValue() << "]\n");
- }
-
- virtual uintptr_t getConstantPoolEntryAddress(unsigned Entry) const;
- virtual uintptr_t getJumpTableEntryAddress(unsigned Entry) const;
-
- virtual uintptr_t getMachineBasicBlockAddress(MachineBasicBlock *MBB) const {
- assert(MBBLocations.size() > (unsigned)MBB->getNumber() &&
- MBBLocations[MBB->getNumber()] && "MBB not emitted!");
- return MBBLocations[MBB->getNumber()];
- }
-
- /// retryWithMoreMemory - Log a retry and deallocate all memory for the
- /// given function. Increase the minimum allocation size so that we get
- /// more memory next time.
- void retryWithMoreMemory(MachineFunction &F);
-
- /// deallocateMemForFunction - Deallocate all memory for the specified
- /// function body.
- void deallocateMemForFunction(const Function *F);
-
- /// AddStubToCurrentFunction - Mark the current function being JIT'd as
- /// using the stub at the specified address. Allows
- /// deallocateMemForFunction to also remove stubs no longer referenced.
- void AddStubToCurrentFunction(void *Stub);
-
- /// getExternalFnStubs - Accessor for the JIT to find stubs emitted for
- /// MachineRelocations that reference external functions by name.
- const StringMap<void*> &getExternalFnStubs() const { return ExtFnStubs; }
-
- virtual void processDebugLoc(DebugLoc DL, bool BeforePrintingInsn);
-
- virtual void emitLabel(uint64_t LabelID) {
- if (LabelLocations.size() <= LabelID)
- LabelLocations.resize((LabelID+1)*2);
- LabelLocations[LabelID] = getCurrentPCValue();
- }
-
- virtual uintptr_t getLabelAddress(uint64_t LabelID) const {
- assert(LabelLocations.size() > (unsigned)LabelID &&
- LabelLocations[LabelID] && "Label not emitted!");
- return LabelLocations[LabelID];
- }
-
- virtual void setModuleInfo(MachineModuleInfo* Info) {
- MMI = Info;
- if (DE.get()) DE->setModuleInfo(Info);
- }
-
- void setMemoryExecutable() {
- MemMgr->setMemoryExecutable();
- }
-
- JITMemoryManager *getMemMgr() const { return MemMgr; }
-
- private:
- void *getPointerToGlobal(GlobalValue *GV, void *Reference, bool NoNeedStub);
- void *getPointerToGVIndirectSym(GlobalValue *V, void *Reference,
- bool NoNeedStub);
- unsigned addSizeOfGlobal(const GlobalVariable *GV, unsigned Size);
- unsigned addSizeOfGlobalsInConstantVal(const Constant *C, unsigned Size);
- unsigned addSizeOfGlobalsInInitializer(const Constant *Init, unsigned Size);
- unsigned GetSizeOfGlobalsInBytes(MachineFunction &MF);
- };
-}
-
void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
- bool DoesntNeedStub) {
+ bool MayNeedFarStub) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(V))
return TheJIT->getOrEmitGlobalVariable(GV);
@@ -747,31 +726,26 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
// If we have already compiled the function, return a pointer to its body.
Function *F = cast<Function>(V);
- void *ResultPtr;
- if (!DoesntNeedStub) {
- // Return the function stub if it's already created.
- ResultPtr = Resolver.getFunctionStubIfAvailable(F);
- if (ResultPtr)
- AddStubToCurrentFunction(ResultPtr);
- } else {
- ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
+
+ void *FnStub = Resolver.getFunctionStubIfAvailable(F);
+ if (FnStub) {
+ // Return the function stub if it's already created. We do this first
+ // so that we're returning the same address for the function as any
+ // previous call.
+ AddStubToCurrentFunction(FnStub);
+ return FnStub;
}
+
+ // Otherwise if we have code, go ahead and return that.
+ void *ResultPtr = TheJIT->getPointerToGlobalIfAvailable(F);
if (ResultPtr) return ResultPtr;
// If this is an external function pointer, we can force the JIT to
- // 'compile' it, which really just adds it to the map. In dlsym mode,
- // external functions are forced through a stub, regardless of reloc type.
+ // 'compile' it, which really just adds it to the map.
if (F->isDeclaration() && !F->hasNotBeenReadFromBitcode() &&
- DoesntNeedStub && !TheJIT->areDlsymStubsEnabled())
+ !MayNeedFarStub)
return TheJIT->getPointerToFunction(F);
- // Okay, the function has not been compiled yet, if the target callback
- // mechanism is capable of rewriting the instruction directly, prefer to do
- // that instead of emitting a stub. This uses the lazy resolver, so is not
- // legal if lazy compilation is disabled.
- if (DoesntNeedStub && TheJIT->isCompilingLazily())
- return Resolver.AddCallbackAtLocation(F, Reference);
-
// Otherwise, we have to emit a stub.
void *StubAddr = Resolver.getFunctionStub(F);
@@ -785,17 +759,16 @@ void *JITEmitter::getPointerToGlobal(GlobalValue *V, void *Reference,
return StubAddr;
}
-void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference,
- bool NoNeedStub) {
+void *JITEmitter::getPointerToGVIndirectSym(GlobalValue *V, void *Reference) {
// Make sure GV is emitted first, and create a stub containing the fully
// resolved address.
- void *GVAddress = getPointerToGlobal(V, Reference, true);
+ void *GVAddress = getPointerToGlobal(V, Reference, false);
void *StubAddr = Resolver.getGlobalValueIndirectSym(V, GVAddress);
-
+
// Add the stub to the current function's list of referenced stubs, so we can
// deallocate them if the current function is ever freed.
AddStubToCurrentFunction(StubAddr);
-
+
return StubAddr;
}
@@ -820,7 +793,7 @@ void JITEmitter::processDebugLoc(DebugLoc DL, bool BeforePrintingInsn) {
NextLine.Loc = DL;
EmissionDetails.LineStarts.push_back(NextLine);
}
-
+
PrevDLT = CurDLT;
}
}
@@ -845,7 +818,7 @@ static unsigned GetConstantPoolSizeInBytes(MachineConstantPool *MCP,
static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return 0;
-
+
unsigned NumEntries = 0;
for (unsigned i = 0, e = JT.size(); i != e; ++i)
NumEntries += JT[i].MBBs.size();
@@ -857,7 +830,7 @@ static unsigned GetJumpTableSizeInBytes(MachineJumpTableInfo *MJTI) {
static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) {
if (Alignment == 0) Alignment = 1;
- // Since we do not know where the buffer will be allocated, be pessimistic.
+ // Since we do not know where the buffer will be allocated, be pessimistic.
return Size + Alignment;
}
@@ -867,7 +840,7 @@ static uintptr_t RoundUpToAlign(uintptr_t Size, unsigned Alignment) {
unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
const Type *ElTy = GV->getType()->getElementType();
size_t GVSize = (size_t)TheJIT->getTargetData()->getTypeAllocSize(ElTy);
- size_t GVAlign =
+ size_t GVAlign =
(size_t)TheJIT->getTargetData()->getPreferredAlignment(GV);
DEBUG(errs() << "JIT: Adding in size " << GVSize << " alignment " << GVAlign);
DEBUG(GV->dump());
@@ -884,7 +857,7 @@ unsigned JITEmitter::addSizeOfGlobal(const GlobalVariable *GV, unsigned Size) {
/// but are referenced from the constant; put them in GVSet and add their
/// size into the running total Size.
-unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
+unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
unsigned Size) {
// If its undefined, return the garbage.
if (isa<UndefValue>(C))
@@ -947,7 +920,7 @@ unsigned JITEmitter::addSizeOfGlobalsInConstantVal(const Constant *C,
/// addSizeOfGLobalsInInitializer - handle any globals that we haven't seen yet
/// but are referenced from the given initializer.
-unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init,
+unsigned JITEmitter::addSizeOfGlobalsInInitializer(const Constant *Init,
unsigned Size) {
if (!isa<UndefValue>(Init) &&
!isa<ConstantVector>(Init) &&
@@ -968,7 +941,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
unsigned Size = 0;
GVSet.clear();
- for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
+ for (MachineFunction::iterator MBB = MF.begin(), E = MF.end();
MBB != E; ++MBB) {
for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end();
I != E; ++I) {
@@ -1000,7 +973,7 @@ unsigned JITEmitter::GetSizeOfGlobalsInBytes(MachineFunction &MF) {
DEBUG(errs() << "JIT: About to look through initializers\n");
// Look for more globals that are referenced only from initializers.
// GVSet.end is computed each time because the set can grow as we go.
- for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin();
+ for (SmallPtrSet<const GlobalVariable *, 8>::iterator I = GVSet.begin();
I != GVSet.end(); I++) {
const GlobalVariable* GV = *I;
if (GV->hasInitializer())
@@ -1022,10 +995,10 @@ void JITEmitter::startFunction(MachineFunction &F) {
const TargetInstrInfo* TII = F.getTarget().getInstrInfo();
MachineJumpTableInfo *MJTI = F.getJumpTableInfo();
MachineConstantPool *MCP = F.getConstantPool();
-
+
// Ensure the constant pool/jump table info is at least 4-byte aligned.
ActualSize = RoundUpToAlign(ActualSize, 16);
-
+
// Add the alignment of the constant pool
ActualSize = RoundUpToAlign(ActualSize, MCP->getConstantPoolAlignment());
@@ -1037,7 +1010,7 @@ void JITEmitter::startFunction(MachineFunction &F) {
// Add the jump table size
ActualSize += GetJumpTableSizeInBytes(MJTI);
-
+
// Add the alignment for the function
ActualSize = RoundUpToAlign(ActualSize,
std::max(F.getFunction()->getAlignment(), 8U));
@@ -1110,29 +1083,19 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
ResultPtr = TheJIT->getPointerToNamedFunction(MR.getExternalSymbol(),
false);
DEBUG(errs() << "JIT: Map \'" << MR.getExternalSymbol() << "\' to ["
- << ResultPtr << "]\n");
+ << ResultPtr << "]\n");
// If the target REALLY wants a stub for this function, emit it now.
- if (!MR.doesntNeedStub()) {
- if (!TheJIT->areDlsymStubsEnabled()) {
- ResultPtr = Resolver.getExternalFunctionStub(ResultPtr);
- } else {
- void *&Stub = ExtFnStubs[MR.getExternalSymbol()];
- if (!Stub) {
- Stub = Resolver.getExternalFunctionStub((void *)&Stub);
- AddStubToCurrentFunction(Stub);
- }
- ResultPtr = Stub;
- }
+ if (MR.mayNeedFarStub()) {
+ ResultPtr = Resolver.getExternalFunctionStub(ResultPtr);
}
} else if (MR.isGlobalValue()) {
ResultPtr = getPointerToGlobal(MR.getGlobalValue(),
BufferBegin+MR.getMachineCodeOffset(),
- MR.doesntNeedStub());
+ MR.mayNeedFarStub());
} else if (MR.isIndirectSymbol()) {
- ResultPtr = getPointerToGVIndirectSym(MR.getGlobalValue(),
- BufferBegin+MR.getMachineCodeOffset(),
- MR.doesntNeedStub());
+ ResultPtr = getPointerToGVIndirectSym(
+ MR.getGlobalValue(), BufferBegin+MR.getMachineCodeOffset());
} else if (MR.isBasicBlock()) {
ResultPtr = (void*)getMachineBasicBlockAddress(MR.getBasicBlock());
} else if (MR.isConstantPoolIndex()) {
@@ -1278,7 +1241,7 @@ bool JITEmitter::finishFunction(MachineFunction &F) {
if (MMI)
MMI->EndFunction();
-
+
return false;
}
@@ -1316,20 +1279,20 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
// If the function did not reference any stubs, return.
if (CurFnStubUses.find(F) == CurFnStubUses.end())
return;
-
+
// For each referenced stub, erase the reference to this function, and then
// erase the list of referenced stubs.
SmallVectorImpl<void *> &StubList = CurFnStubUses[F];
for (unsigned i = 0, e = StubList.size(); i != e; ++i) {
void *Stub = StubList[i];
-
+
// If we already invalidated this stub for this function, continue.
if (StubFnRefs.count(Stub) == 0)
continue;
-
+
SmallPtrSet<const Function *, 1> &FnRefs = StubFnRefs[Stub];
FnRefs.erase(F);
-
+
// If this function was the last reference to the stub, invalidate the stub
// in the JITResolver. Were there a memory manager deallocateStub routine,
// we could call that at this point too.
@@ -1338,19 +1301,10 @@ void JITEmitter::deallocateMemForFunction(const Function *F) {
StubFnRefs.erase(Stub);
// Invalidate the stub. If it is a GV stub, update the JIT's global
- // mapping for that GV to zero, otherwise, search the string map of
- // external function names to stubs and remove the entry for this stub.
+ // mapping for that GV to zero.
GlobalValue *GV = Resolver.invalidateStub(Stub);
if (GV) {
TheJIT->updateGlobalMapping(GV, 0);
- } else {
- for (StringMapIterator<void*> i = ExtFnStubs.begin(),
- e = ExtFnStubs.end(); i != e; ++i) {
- if (i->second == Stub) {
- ExtFnStubs.erase(i);
- break;
- }
- }
}
}
}
@@ -1421,7 +1375,7 @@ void JITEmitter::initJumpTableInfo(MachineJumpTableInfo *MJTI) {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty()) return;
-
+
unsigned NumEntries = 0;
for (unsigned i = 0, e = JT.size(); i != e; ++i)
NumEntries += JT[i].MBBs.size();
@@ -1441,7 +1395,7 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
if (JT.empty() || JumpTableBase == 0) return;
-
+
if (TargetMachine::getRelocationModel() == Reloc::PIC_) {
assert(MJTI->getEntrySize() == 4 && "Cross JIT'ing?");
// For each jump table, place the offset from the beginning of the table
@@ -1460,8 +1414,8 @@ void JITEmitter::emitJumpTableInfo(MachineJumpTableInfo *MJTI) {
}
} else {
assert(MJTI->getEntrySize() == sizeof(void*) && "Cross JIT'ing?");
-
- // For each jump table, map each target in the jump table to the address of
+
+ // For each jump table, map each target in the jump table to the address of
// an emitted MachineBasicBlock.
intptr_t *SlotPtr = (intptr_t*)JumpTableBase;
@@ -1480,7 +1434,7 @@ void JITEmitter::startGVStub(const GlobalValue* GV, unsigned StubSize,
SavedBufferBegin = BufferBegin;
SavedBufferEnd = BufferEnd;
SavedCurBufferPtr = CurBufferPtr;
-
+
BufferBegin = CurBufferPtr = MemMgr->allocateStub(GV, StubSize, Alignment);
BufferEnd = BufferBegin+StubSize+1;
}
@@ -1490,7 +1444,7 @@ void JITEmitter::startGVStub(const GlobalValue* GV, void *Buffer,
SavedBufferBegin = BufferBegin;
SavedBufferEnd = BufferEnd;
SavedCurBufferPtr = CurBufferPtr;
-
+
BufferBegin = CurBufferPtr = (uint8_t *)Buffer;
BufferEnd = BufferBegin+StubSize+1;
}
@@ -1519,15 +1473,15 @@ uintptr_t JITEmitter::getConstantPoolEntryAddress(unsigned ConstantNum) const {
uintptr_t JITEmitter::getJumpTableEntryAddress(unsigned Index) const {
const std::vector<MachineJumpTableEntry> &JT = JumpTable->getJumpTables();
assert(Index < JT.size() && "Invalid jump table index!");
-
+
unsigned Offset = 0;
unsigned EntrySize = JumpTable->getEntrySize();
-
+
for (unsigned i = 0; i < Index; ++i)
Offset += JT[i].MBBs.size();
-
+
Offset *= EntrySize;
-
+
return (uintptr_t)((char *)JumpTableBase + Offset);
}
@@ -1572,7 +1526,7 @@ void *JIT::getPointerToFunctionOrStub(Function *F) {
// If we have already code generated the function, just return the address.
if (void *Addr = getPointerToGlobalIfAvailable(F))
return Addr;
-
+
// Get a stub if the target supports it.
assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
@@ -1591,92 +1545,6 @@ void JIT::updateFunctionStub(Function *F) {
getJITInfo().emitFunctionStubAtAddr(F, Addr, Stub, *getCodeEmitter());
}
-/// updateDlsymStubTable - Emit the data necessary to relocate the stubs
-/// that were emitted during code generation.
-///
-void JIT::updateDlsymStubTable() {
- assert(isa<JITEmitter>(JCE) && "Unexpected MCE?");
- JITEmitter *JE = cast<JITEmitter>(getCodeEmitter());
-
- SmallVector<GlobalValue*, 8> GVs;
- SmallVector<void*, 8> Ptrs;
- const StringMap<void *> &ExtFns = JE->getExternalFnStubs();
-
- JE->getJITResolver().getRelocatableGVs(GVs, Ptrs);
-
- unsigned nStubs = GVs.size() + ExtFns.size();
-
- // If there are no relocatable stubs, return.
- if (nStubs == 0)
- return;
-
- // If there are no new relocatable stubs, return.
- void *CurTable = JE->getMemMgr()->getDlsymTable();
- if (CurTable && (*(unsigned *)CurTable == nStubs))
- return;
-
- // Calculate the size of the stub info
- unsigned offset = 4 + 4 * nStubs + sizeof(intptr_t) * nStubs;
-
- SmallVector<unsigned, 8> Offsets;
- for (unsigned i = 0; i != GVs.size(); ++i) {
- Offsets.push_back(offset);
- offset += GVs[i]->getName().size() + 1;
- }
- for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end();
- i != e; ++i) {
- Offsets.push_back(offset);
- offset += strlen(i->first()) + 1;
- }
-
- // Allocate space for the new "stub", which contains the dlsym table.
- JE->startGVStub(0, offset, 4);
-
- // Emit the number of records
- JE->emitInt32(nStubs);
-
- // Emit the string offsets
- for (unsigned i = 0; i != nStubs; ++i)
- JE->emitInt32(Offsets[i]);
-
- // Emit the pointers. Verify that they are at least 2-byte aligned, and set
- // the low bit to 0 == GV, 1 == Function, so that the client code doing the
- // relocation can write the relocated pointer at the appropriate place in
- // the stub.
- for (unsigned i = 0; i != GVs.size(); ++i) {
- intptr_t Ptr = (intptr_t)Ptrs[i];
- assert((Ptr & 1) == 0 && "Stub pointers must be at least 2-byte aligned!");
-
- if (isa<Function>(GVs[i]))
- Ptr |= (intptr_t)1;
-
- if (sizeof(Ptr) == 8)
- JE->emitInt64(Ptr);
- else
- JE->emitInt32(Ptr);
- }
- for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end();
- i != e; ++i) {
- intptr_t Ptr = (intptr_t)i->second | 1;
-
- if (sizeof(Ptr) == 8)
- JE->emitInt64(Ptr);
- else
- JE->emitInt32(Ptr);
- }
-
- // Emit the strings.
- for (unsigned i = 0; i != GVs.size(); ++i)
- JE->emitString(GVs[i]->getName());
- for (StringMapConstIterator<void*> i = ExtFns.begin(), e = ExtFns.end();
- i != e; ++i)
- JE->emitString(i->first());
-
- // Tell the JIT memory manager where it is. The JIT Memory Manager will
- // deallocate space for the old one, if one existed.
- JE->getMemMgr()->SetDlsymTable(JE->finishGVStub(0));
-}
-
/// freeMachineCodeForFunction - release machine code memory for given Function.
///
void JIT::freeMachineCodeForFunction(Function *F) {
diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
index 3796624..80cb999 100644
--- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
+++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp
@@ -49,23 +49,23 @@ namespace {
/// ThisAllocated - This is true if this block is currently allocated. If
/// not, this can be converted to a FreeRangeHeader.
unsigned ThisAllocated : 1;
-
+
/// PrevAllocated - Keep track of whether the block immediately before us is
/// allocated. If not, the word immediately before this header is the size
/// of the previous block.
unsigned PrevAllocated : 1;
-
+
/// BlockSize - This is the size in bytes of this memory block,
/// including this header.
uintptr_t BlockSize : (sizeof(intptr_t)*CHAR_BIT - 2);
-
+
/// getBlockAfter - Return the memory block immediately after this one.
///
MemoryRangeHeader &getBlockAfter() const {
return *(MemoryRangeHeader*)((char*)this+BlockSize);
}
-
+
/// getFreeBlockBefore - If the block before this one is free, return it,
/// otherwise return null.
FreeRangeHeader *getFreeBlockBefore() const {
@@ -73,15 +73,15 @@ namespace {
intptr_t PrevSize = ((intptr_t *)this)[-1];
return (FreeRangeHeader*)((char*)this-PrevSize);
}
-
+
/// FreeBlock - Turn an allocated block into a free block, adjusting
/// bits in the object headers, and adding an end of region memory block.
FreeRangeHeader *FreeBlock(FreeRangeHeader *FreeList);
-
+
/// TrimAllocationToSize - If this allocated block is significantly larger
/// than NewSize, split it into two pieces (where the former is NewSize
/// bytes, including the header), and add the new block to the free list.
- FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList,
+ FreeRangeHeader *TrimAllocationToSize(FreeRangeHeader *FreeList,
uint64_t NewSize);
};
@@ -91,13 +91,13 @@ namespace {
struct FreeRangeHeader : public MemoryRangeHeader {
FreeRangeHeader *Prev;
FreeRangeHeader *Next;
-
+
/// getMinBlockSize - Get the minimum size for a memory block. Blocks
/// smaller than this size cannot be created.
static unsigned getMinBlockSize() {
return sizeof(FreeRangeHeader)+sizeof(intptr_t);
}
-
+
/// SetEndOfBlockSizeMarker - The word at the end of every free block is
/// known to be the size of the free block. Set it for this block.
void SetEndOfBlockSizeMarker() {
@@ -110,7 +110,7 @@ namespace {
Next->Prev = Prev;
return Prev->Next = Next;
}
-
+
void AddToFreeList(FreeRangeHeader *FreeList) {
Next = FreeList;
Prev = FreeList->Prev;
@@ -121,7 +121,7 @@ namespace {
/// GrowBlock - The block after this block just got deallocated. Merge it
/// into the current block.
void GrowBlock(uintptr_t NewSize);
-
+
/// AllocateBlock - Mark this entire block allocated, updating freelists
/// etc. This returns a pointer to the circular free-list.
FreeRangeHeader *AllocateBlock();
@@ -137,7 +137,7 @@ FreeRangeHeader *FreeRangeHeader::AllocateBlock() {
// Mark this block allocated.
ThisAllocated = 1;
getBlockAfter().PrevAllocated = 1;
-
+
// Remove it from the free list.
return RemoveFromFreeList();
}
@@ -150,9 +150,9 @@ FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
MemoryRangeHeader *FollowingBlock = &getBlockAfter();
assert(ThisAllocated && "This block is already free!");
assert(FollowingBlock->PrevAllocated && "Flags out of sync!");
-
+
FreeRangeHeader *FreeListToReturn = FreeList;
-
+
// If the block after this one is free, merge it into this block.
if (!FollowingBlock->ThisAllocated) {
FreeRangeHeader &FollowingFreeBlock = *(FreeRangeHeader *)FollowingBlock;
@@ -164,18 +164,18 @@ FreeRangeHeader *MemoryRangeHeader::FreeBlock(FreeRangeHeader *FreeList) {
assert(&FollowingFreeBlock != FreeList && "No tombstone block?");
}
FollowingFreeBlock.RemoveFromFreeList();
-
+
// Include the following block into this one.
BlockSize += FollowingFreeBlock.BlockSize;
FollowingBlock = &FollowingFreeBlock.getBlockAfter();
-
+
// Tell the block after the block we are coalescing that this block is
// allocated.
FollowingBlock->PrevAllocated = 1;
}
-
+
assert(FollowingBlock->ThisAllocated && "Missed coalescing?");
-
+
if (FreeRangeHeader *PrevFreeBlock = getFreeBlockBefore()) {
PrevFreeBlock->GrowBlock(PrevFreeBlock->BlockSize + BlockSize);
return FreeListToReturn ? FreeListToReturn : PrevFreeBlock;
@@ -218,24 +218,24 @@ TrimAllocationToSize(FreeRangeHeader *FreeList, uint64_t NewSize) {
// Round up size for alignment of header.
unsigned HeaderAlign = __alignof(FreeRangeHeader);
NewSize = (NewSize+ (HeaderAlign-1)) & ~(HeaderAlign-1);
-
+
// Size is now the size of the block we will remove from the start of the
// current block.
assert(NewSize <= BlockSize &&
"Allocating more space from this block than exists!");
-
+
// If splitting this block will cause the remainder to be too small, do not
// split the block.
if (BlockSize <= NewSize+FreeRangeHeader::getMinBlockSize())
return FreeList;
-
+
// Otherwise, we splice the required number of bytes out of this block, form
// a new block immediately after it, then mark this block allocated.
MemoryRangeHeader &FormerNextBlock = getBlockAfter();
-
+
// Change the size of this block.
BlockSize = NewSize;
-
+
// Get the new block we just sliced out and turn it into a free block.
FreeRangeHeader &NewNextBlock = (FreeRangeHeader &)getBlockAfter();
NewNextBlock.BlockSize = (char*)&FormerNextBlock - (char*)&NewNextBlock;
@@ -283,7 +283,7 @@ namespace {
sys::MemoryBlock LastSlab;
// Memory slabs allocated by the JIT. We refer to them as slabs so we don't
- // confuse them with the blocks of memory descibed above.
+ // confuse them with the blocks of memory described above.
std::vector<sys::MemoryBlock> CodeSlabs;
JITSlabAllocator BumpSlabAllocator;
BumpPtrAllocator StubAllocator;
@@ -296,7 +296,6 @@ namespace {
MemoryRangeHeader *CurBlock;
uint8_t *GOTBase; // Target Specific reserved memory
- void *DlsymTable; // Stub external symbol information
public:
DefaultJITMemoryManager();
~DefaultJITMemoryManager();
@@ -318,7 +317,6 @@ namespace {
static const size_t DefaultSizeThreshold;
void AllocateGOT();
- void SetDlsymTable(void *);
// Testing methods.
virtual bool CheckInvariants(std::string &ErrorStr);
@@ -349,7 +347,7 @@ namespace {
}
largest = largest - sizeof(MemoryRangeHeader);
-
+
// If this block isn't big enough for the allocation desired, allocate
// another block of memory and add it to the free list.
if (largest < ActualSize ||
@@ -445,34 +443,30 @@ namespace {
return (uint8_t*)DataAllocator.Allocate(Size, Alignment);
}
- /// startExceptionTable - Use startFunctionBody to allocate memory for the
+ /// startExceptionTable - Use startFunctionBody to allocate memory for the
/// function's exception table.
uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) {
return startFunctionBody(F, ActualSize);
}
- /// endExceptionTable - The exception table of F is now allocated,
+ /// endExceptionTable - The exception table of F is now allocated,
/// and takes the memory in the range [TableStart,TableEnd).
void endExceptionTable(const Function *F, uint8_t *TableStart,
uint8_t *TableEnd, uint8_t* FrameRegister) {
assert(TableEnd > TableStart);
assert(TableStart == (uint8_t *)(CurBlock+1) &&
"Mismatched table start/end!");
-
+
uintptr_t BlockSize = TableEnd - (uint8_t *)CurBlock;
// Release the memory at the end of this block that isn't needed.
FreeMemoryList =CurBlock->TrimAllocationToSize(FreeMemoryList, BlockSize);
}
-
+
uint8_t *getGOTBase() const {
return GOTBase;
}
-
- void *getDlsymTable() const {
- return DlsymTable;
- }
-
+
void deallocateBlock(void *Block) {
// Find the block that is allocated for this function.
MemoryRangeHeader *MemRange = static_cast<MemoryRangeHeader*>(Block) - 1;
@@ -561,16 +555,16 @@ DefaultJITMemoryManager::DefaultJITMemoryManager()
// END ]
//
// The last three blocks are never deallocated or touched.
-
+
// Add MemoryRangeHeader to the end of the memory region, indicating that
// the space after the block of memory is allocated. This is block #3.
MemoryRangeHeader *Mem3 = (MemoryRangeHeader*)(MemBase+MemBlock.size())-1;
Mem3->ThisAllocated = 1;
Mem3->PrevAllocated = 0;
Mem3->BlockSize = sizeof(MemoryRangeHeader);
-
+
/// Add a tiny free region so that the free list always has one entry.
- FreeRangeHeader *Mem2 =
+ FreeRangeHeader *Mem2 =
(FreeRangeHeader *)(((char*)Mem3)-FreeRangeHeader::getMinBlockSize());
Mem2->ThisAllocated = 0;
Mem2->PrevAllocated = 1;
@@ -584,7 +578,7 @@ DefaultJITMemoryManager::DefaultJITMemoryManager()
Mem1->ThisAllocated = 1;
Mem1->PrevAllocated = 0;
Mem1->BlockSize = sizeof(MemoryRangeHeader);
-
+
// Add a FreeRangeHeader to the start of the function body region, indicating
// that the space is free. Mark the previous block allocated so we never look
// at it.
@@ -594,12 +588,11 @@ DefaultJITMemoryManager::DefaultJITMemoryManager()
Mem0->BlockSize = (char*)Mem1-(char*)Mem0;
Mem0->SetEndOfBlockSizeMarker();
Mem0->AddToFreeList(Mem2);
-
+
// Start out with the freelist pointing to Mem0.
FreeMemoryList = Mem0;
GOTBase = NULL;
- DlsymTable = NULL;
}
void DefaultJITMemoryManager::AllocateGOT() {
@@ -608,10 +601,6 @@ void DefaultJITMemoryManager::AllocateGOT() {
HasGOT = true;
}
-void DefaultJITMemoryManager::SetDlsymTable(void *ptr) {
- DlsymTable = ptr;
-}
-
DefaultJITMemoryManager::~DefaultJITMemoryManager() {
for (unsigned i = 0, e = CodeSlabs.size(); i != e; ++i)
sys::Memory::ReleaseRWX(CodeSlabs[i]);
diff --git a/lib/Linker/LinkArchives.cpp b/lib/Linker/LinkArchives.cpp
index 76d81c2..365ec05 100644
--- a/lib/Linker/LinkArchives.cpp
+++ b/lib/Linker/LinkArchives.cpp
@@ -172,10 +172,9 @@ Linker::LinkInArchive(const sys::Path &Filename, bool &is_native) {
verbose(" Linking in module: " + aModule->getModuleIdentifier());
// Link it in
- if (LinkInModule(aModule, &moduleErrorMsg)) {
+ if (LinkInModule(aModule, &moduleErrorMsg))
return error("Cannot link in module '" +
aModule->getModuleIdentifier() + "': " + moduleErrorMsg);
- }
}
}
diff --git a/lib/Linker/LinkItems.cpp b/lib/Linker/LinkItems.cpp
index 61f3c26..2c22550 100644
--- a/lib/Linker/LinkItems.cpp
+++ b/lib/Linker/LinkItems.cpp
@@ -70,7 +70,7 @@ Linker::LinkInItems(const ItemList& Items, ItemList& NativeItems) {
/// LinkInLibrary - links one library into the HeadModule.
///
-bool Linker::LinkInLibrary(const StringRef &Lib, bool& is_native) {
+bool Linker::LinkInLibrary(StringRef Lib, bool& is_native) {
is_native = false;
// Determine where this library lives.
sys::Path Pathname = FindLib(Lib);
@@ -160,14 +160,17 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
// Check for a file of name "-", which means "read standard input"
if (File.str() == "-") {
std::auto_ptr<Module> M;
- if (MemoryBuffer *Buffer = MemoryBuffer::getSTDIN()) {
+ MemoryBuffer *Buffer = MemoryBuffer::getSTDIN();
+ if (!Buffer->getBufferSize()) {
+ delete Buffer;
+ Error = "standard input is empty";
+ } else {
M.reset(ParseBitcodeFile(Buffer, Context, &Error));
delete Buffer;
if (M.get())
if (!LinkInModule(M.get(), &Error))
return false;
- } else
- Error = "standard input is empty";
+ }
return error("Cannot link stdin: " + Error);
}
@@ -187,7 +190,6 @@ bool Linker::LinkInFile(const sys::Path &File, bool &is_native) {
case sys::Archive_FileType:
// A user may specify an ar archive without -l, perhaps because it
// is not installed as a library. Detect that and link the archive.
- verbose("Linking archive file '" + File.str() + "'");
if (LinkInArchive(File, is_native))
return true;
break;
diff --git a/lib/Linker/Linker.cpp b/lib/Linker/Linker.cpp
index aef79d0..32aa0f9 100644
--- a/lib/Linker/Linker.cpp
+++ b/lib/Linker/Linker.cpp
@@ -20,8 +20,8 @@
#include "llvm/Config/config.h"
using namespace llvm;
-Linker::Linker(const StringRef &progname, const StringRef &modname,
- LLVMContext& C, unsigned flags):
+Linker::Linker(StringRef progname, StringRef modname,
+ LLVMContext& C, unsigned flags):
Context(C),
Composite(new Module(modname, C)),
LibPaths(),
@@ -29,7 +29,7 @@ Linker::Linker(const StringRef &progname, const StringRef &modname,
Error(),
ProgramName(progname) { }
-Linker::Linker(const StringRef &progname, Module* aModule, unsigned flags) :
+Linker::Linker(StringRef progname, Module* aModule, unsigned flags) :
Context(aModule->getContext()),
Composite(aModule),
LibPaths(),
@@ -42,7 +42,7 @@ Linker::~Linker() {
}
bool
-Linker::error(const StringRef &message) {
+Linker::error(StringRef message) {
Error = message;
if (!(Flags&QuietErrors))
errs() << ProgramName << ": error: " << message << "\n";
@@ -50,7 +50,7 @@ Linker::error(const StringRef &message) {
}
bool
-Linker::warning(const StringRef &message) {
+Linker::warning(StringRef message) {
Error = message;
if (!(Flags&QuietWarnings))
errs() << ProgramName << ": warning: " << message << "\n";
@@ -58,7 +58,7 @@ Linker::warning(const StringRef &message) {
}
void
-Linker::verbose(const StringRef &message) {
+Linker::verbose(StringRef message) {
if (Flags&Verbose)
errs() << " " << message << "\n";
}
@@ -114,7 +114,7 @@ Linker::LoadObject(const sys::Path &FN) {
// IsLibrary - Determine if "Name" is a library in "Directory". Return
// a non-empty sys::Path if its found, an empty one otherwise.
-static inline sys::Path IsLibrary(const StringRef &Name,
+static inline sys::Path IsLibrary(StringRef Name,
const sys::Path &Directory) {
sys::Path FullPath(Directory);
@@ -153,7 +153,7 @@ static inline sys::Path IsLibrary(const StringRef &Name,
/// Path if no matching file can be found.
///
sys::Path
-Linker::FindLib(const StringRef &Filename) {
+Linker::FindLib(StringRef Filename) {
// Determine if the pathname can be found as it stands.
sys::Path FilePath(Filename);
if (FilePath.canRead() &&
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index e939f37..b6ebb1a 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -58,7 +58,7 @@ public:
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0);
- virtual void EmitBytes(const StringRef &Data);
+ virtual void EmitBytes(StringRef Data);
virtual void EmitValue(const MCExpr *Value, unsigned Size);
@@ -186,7 +186,7 @@ void MCAsmStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
OS << '\n';
}
-void MCAsmStreamer::EmitBytes(const StringRef &Data) {
+void MCAsmStreamer::EmitBytes(StringRef Data) {
assert(CurSection && "Cannot emit contents before setting section!");
for (unsigned i = 0, e = Data.size(); i != e; ++i)
OS << ".byte " << (unsigned) (unsigned char) Data[i] << '\n';
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 4f39f1e..1f5b6f1 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -180,7 +180,7 @@ public:
OS << StringRef(Zeros, N % 16);
}
- void WriteString(const StringRef &Str, unsigned ZeroFillSize = 0) {
+ void WriteString(StringRef Str, unsigned ZeroFillSize = 0) {
OS << Str;
if (ZeroFillSize)
WriteZeros(ZeroFillSize - Str.size());
diff --git a/lib/MC/MCContext.cpp b/lib/MC/MCContext.cpp
index 09479c5..45d2c02 100644
--- a/lib/MC/MCContext.cpp
+++ b/lib/MC/MCContext.cpp
@@ -23,7 +23,7 @@ MCContext::~MCContext() {
// we don't need to free them here.
}
-MCSymbol *MCContext::CreateSymbol(const StringRef &Name) {
+MCSymbol *MCContext::CreateSymbol(StringRef Name) {
assert(Name[0] != '\0' && "Normal symbols cannot be unnamed!");
// Create and bind the symbol, and ensure that names are unique.
@@ -32,7 +32,7 @@ MCSymbol *MCContext::CreateSymbol(const StringRef &Name) {
return Entry = new (*this) MCSymbol(Name, false);
}
-MCSymbol *MCContext::GetOrCreateSymbol(const StringRef &Name) {
+MCSymbol *MCContext::GetOrCreateSymbol(StringRef Name) {
MCSymbol *&Entry = Symbols[Name];
if (Entry) return Entry;
@@ -46,7 +46,7 @@ MCSymbol *MCContext::GetOrCreateSymbol(const Twine &Name) {
}
-MCSymbol *MCContext::CreateTemporarySymbol(const StringRef &Name) {
+MCSymbol *MCContext::CreateTemporarySymbol(StringRef Name) {
// If unnamed, just create a symbol.
if (Name.empty())
new (*this) MCSymbol("", true);
@@ -57,6 +57,6 @@ MCSymbol *MCContext::CreateTemporarySymbol(const StringRef &Name) {
return Entry = new (*this) MCSymbol(Name, true);
}
-MCSymbol *MCContext::LookupSymbol(const StringRef &Name) const {
+MCSymbol *MCContext::LookupSymbol(StringRef Name) const {
return Symbols.lookup(Name);
}
diff --git a/lib/MC/MCExpr.cpp b/lib/MC/MCExpr.cpp
index c950ff2..a5a2256 100644
--- a/lib/MC/MCExpr.cpp
+++ b/lib/MC/MCExpr.cpp
@@ -133,8 +133,7 @@ const MCSymbolRefExpr *MCSymbolRefExpr::Create(const MCSymbol *Sym,
return new (Ctx) MCSymbolRefExpr(Sym);
}
-const MCSymbolRefExpr *MCSymbolRefExpr::Create(const StringRef &Name,
- MCContext &Ctx) {
+const MCSymbolRefExpr *MCSymbolRefExpr::Create(StringRef Name, MCContext &Ctx) {
return Create(Ctx.GetOrCreateSymbol(Name), Ctx);
}
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 189f072..828b92a 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -134,7 +134,7 @@ public:
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0);
- virtual void EmitBytes(const StringRef &Data);
+ virtual void EmitBytes(StringRef Data);
virtual void EmitValue(const MCExpr *Value, unsigned Size);
@@ -315,7 +315,7 @@ void MCMachOStreamer::EmitZerofill(const MCSection *Section, MCSymbol *Symbol,
SectData.setAlignment(ByteAlignment);
}
-void MCMachOStreamer::EmitBytes(const StringRef &Data) {
+void MCMachOStreamer::EmitBytes(StringRef Data) {
MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
if (!DF)
DF = new MCDataFragment(CurSectionData);
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 3cd22ca..ddc4e69 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -45,7 +45,7 @@ namespace {
virtual void EmitZerofill(const MCSection *Section, MCSymbol *Symbol = 0,
unsigned Size = 0, unsigned ByteAlignment = 0) {}
- virtual void EmitBytes(const StringRef &Data) {}
+ virtual void EmitBytes(StringRef Data) {}
virtual void EmitValue(const MCExpr *Value, unsigned Size) {}
diff --git a/lib/MC/MCSection.cpp b/lib/MC/MCSection.cpp
index 333a471..24c89ef 100644
--- a/lib/MC/MCSection.cpp
+++ b/lib/MC/MCSection.cpp
@@ -25,7 +25,7 @@ MCSection::~MCSection() {
//===----------------------------------------------------------------------===//
MCSectionCOFF *MCSectionCOFF::
-Create(const StringRef &Name, bool IsDirective, SectionKind K, MCContext &Ctx) {
+Create(StringRef Name, bool IsDirective, SectionKind K, MCContext &Ctx) {
return new (Ctx) MCSectionCOFF(Name, IsDirective, K);
}
diff --git a/lib/MC/MCSectionELF.cpp b/lib/MC/MCSectionELF.cpp
index 660a8c9..c6812ed 100644
--- a/lib/MC/MCSectionELF.cpp
+++ b/lib/MC/MCSectionELF.cpp
@@ -15,7 +15,7 @@
using namespace llvm;
MCSectionELF *MCSectionELF::
-Create(const StringRef &Section, unsigned Type, unsigned Flags,
+Create(StringRef Section, unsigned Type, unsigned Flags,
SectionKind K, bool isExplicit, MCContext &Ctx) {
return new (Ctx) MCSectionELF(Section, Type, Flags, K, isExplicit);
}
diff --git a/lib/MC/MCSectionMachO.cpp b/lib/MC/MCSectionMachO.cpp
index b3aeb9c..6cc67a2 100644
--- a/lib/MC/MCSectionMachO.cpp
+++ b/lib/MC/MCSectionMachO.cpp
@@ -66,7 +66,7 @@ ENTRY(0 /*FIXME*/, S_ATTR_LOC_RELOC)
MCSectionMachO *MCSectionMachO::
-Create(const StringRef &Segment, const StringRef &Section,
+Create(StringRef Segment, StringRef Section,
unsigned TypeAndAttributes, unsigned Reserved2,
SectionKind K, MCContext &Ctx) {
// S_SYMBOL_STUBS must be set for Reserved2 to be non-zero.
diff --git a/lib/MC/MCSymbol.cpp b/lib/MC/MCSymbol.cpp
index 86ff3f3..b145d07 100644
--- a/lib/MC/MCSymbol.cpp
+++ b/lib/MC/MCSymbol.cpp
@@ -35,7 +35,7 @@ static void MangleLetter(raw_ostream &OS, unsigned char C) {
/// NameNeedsEscaping - Return true if the identifier \arg Str needs quotes
/// for this assembler.
-static bool NameNeedsEscaping(const StringRef &Str, const MCAsmInfo &MAI) {
+static bool NameNeedsEscaping(StringRef Str, const MCAsmInfo &MAI) {
assert(!Str.empty() && "Cannot create an empty MCSymbol");
// If the first character is a number and the target does not allow this, we
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 626daa2..59340d4 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -17,6 +17,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/ManagedStatic.h"
@@ -765,6 +766,11 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
free(*i);
}
+ DEBUG(errs() << "\nArgs: ";
+ for (int i = 0; i < argc; ++i)
+ errs() << argv[i] << ' ';
+ );
+
// If we had an error processing our arguments, don't let the program execute
if (ErrorParsing) exit(1);
}
@@ -1147,9 +1153,12 @@ public:
#ifndef NDEBUG
OS << " with assertions";
#endif
+ std::string CPU = sys::getHostCPUName();
+ if (CPU == "generic") CPU = "(unknown)";
OS << ".\n"
<< " Built " << __DATE__ << " (" << __TIME__ << ").\n"
<< " Host: " << sys::getHostTriple() << '\n'
+ << " Host CPU: " << CPU << '\n'
<< '\n'
<< " Registered Targets:\n";
diff --git a/lib/Support/ConstantRange.cpp b/lib/Support/ConstantRange.cpp
index 423e90d..e427f82 100644
--- a/lib/Support/ConstantRange.cpp
+++ b/lib/Support/ConstantRange.cpp
@@ -492,6 +492,30 @@ ConstantRange ConstantRange::truncate(uint32_t DstTySize) const {
return ConstantRange(L, U);
}
+/// zextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is zero extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::zextOrTrunc(uint32_t DstTySize) const {
+ unsigned SrcTySize = getBitWidth();
+ if (SrcTySize > DstTySize)
+ return truncate(DstTySize);
+ else if (SrcTySize < DstTySize)
+ return zeroExtend(DstTySize);
+ else
+ return *this;
+}
+
+/// sextOrTrunc - make this range have the bit width given by \p DstTySize. The
+/// value is sign extended, truncated, or left alone to make it that width.
+ConstantRange ConstantRange::sextOrTrunc(uint32_t DstTySize) const {
+ unsigned SrcTySize = getBitWidth();
+ if (SrcTySize > DstTySize)
+ return truncate(DstTySize);
+ else if (SrcTySize < DstTySize)
+ return signExtend(DstTySize);
+ else
+ return *this;
+}
+
ConstantRange
ConstantRange::add(const ConstantRange &Other) const {
if (isEmptySet() || Other.isEmptySet())
@@ -585,6 +609,43 @@ ConstantRange::udiv(const ConstantRange &RHS) const {
return ConstantRange(Lower, Upper);
}
+ConstantRange
+ConstantRange::shl(const ConstantRange &Amount) const {
+ if (isEmptySet())
+ return *this;
+
+ APInt min = getUnsignedMin() << Amount.getUnsignedMin();
+ APInt max = getUnsignedMax() << Amount.getUnsignedMax();
+
+ // there's no overflow!
+ APInt Zeros(getBitWidth(), getUnsignedMax().countLeadingZeros());
+ if (Zeros.uge(Amount.getUnsignedMax()))
+ return ConstantRange(min, max);
+
+ // FIXME: implement the other tricky cases
+ return ConstantRange(getBitWidth());
+}
+
+ConstantRange
+ConstantRange::ashr(const ConstantRange &Amount) const {
+ if (isEmptySet())
+ return *this;
+
+ APInt min = getUnsignedMax().ashr(Amount.getUnsignedMin());
+ APInt max = getUnsignedMin().ashr(Amount.getUnsignedMax());
+ return ConstantRange(min, max);
+}
+
+ConstantRange
+ConstantRange::lshr(const ConstantRange &Amount) const {
+ if (isEmptySet())
+ return *this;
+
+ APInt min = getUnsignedMax().lshr(Amount.getUnsignedMin());
+ APInt max = getUnsignedMin().lshr(Amount.getUnsignedMax());
+ return ConstantRange(min, max);
+}
+
/// print - Print out the bounds to a stream...
///
void ConstantRange::print(raw_ostream &OS) const {
diff --git a/lib/Support/Debug.cpp b/lib/Support/Debug.cpp
index d4954b6..50abe01 100644
--- a/lib/Support/Debug.cpp
+++ b/lib/Support/Debug.cpp
@@ -62,7 +62,7 @@ bool llvm::isCurrentDebugType(const char *DebugType) {
/// option were specified. Note that DebugFlag also needs to be set to true for
/// debug output to be produced.
///
-void SetCurrentDebugType(const char *Type) {
+void llvm::SetCurrentDebugType(const char *Type) {
CurrentDebugType = Type;
}
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 88e2050..b04864a 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -70,7 +70,7 @@ namespace {
class MemoryBufferMem : public MemoryBuffer {
std::string FileID;
public:
- MemoryBufferMem(const char *Start, const char *End, const char *FID,
+ MemoryBufferMem(const char *Start, const char *End, StringRef FID,
bool Copy = false)
: FileID(FID) {
if (!Copy)
@@ -107,7 +107,7 @@ MemoryBuffer *MemoryBuffer::getMemBufferCopy(const char *StartPtr,
/// initialize the memory allocated by this method. The memory is owned by
/// the MemoryBuffer object.
MemoryBuffer *MemoryBuffer::getNewUninitMemBuffer(size_t Size,
- const char *BufferName) {
+ StringRef BufferName) {
char *Buf = (char *)malloc((Size+1) * sizeof(char));
if (!Buf) return 0;
Buf[Size] = 0;
@@ -134,17 +134,12 @@ MemoryBuffer *MemoryBuffer::getNewMemBuffer(size_t Size,
/// if the Filename is "-". If an error occurs, this returns null and fills
/// in *ErrStr with a reason. If stdin is empty, this API (unlike getSTDIN)
/// returns an empty buffer.
-MemoryBuffer *MemoryBuffer::getFileOrSTDIN(const char *Filename,
+MemoryBuffer *MemoryBuffer::getFileOrSTDIN(StringRef Filename,
std::string *ErrStr,
int64_t FileSize) {
- if (Filename[0] != '-' || Filename[1] != 0)
- return getFile(Filename, ErrStr, FileSize);
- MemoryBuffer *M = getSTDIN();
- if (M) return M;
-
- // If stdin was empty, M is null. Cons up an empty memory buffer now.
- const char *EmptyStr = "";
- return MemoryBuffer::getMemBuffer(EmptyStr, EmptyStr, "<stdin>");
+ if (Filename == "-")
+ return getSTDIN();
+ return getFile(Filename, ErrStr, FileSize);
}
//===----------------------------------------------------------------------===//
@@ -158,7 +153,7 @@ namespace {
class MemoryBufferMMapFile : public MemoryBuffer {
std::string Filename;
public:
- MemoryBufferMMapFile(const char *filename, const char *Pages, uint64_t Size)
+ MemoryBufferMMapFile(StringRef filename, const char *Pages, uint64_t Size)
: Filename(filename) {
init(Pages, Pages+Size);
}
@@ -173,13 +168,13 @@ public:
};
}
-MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
+MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
int64_t FileSize) {
int OpenFlags = 0;
#ifdef O_BINARY
OpenFlags |= O_BINARY; // Open input file in binary mode on win32.
#endif
- int FD = ::open(Filename, O_RDONLY|OpenFlags);
+ int FD = ::open(Filename.str().c_str(), O_RDONLY|OpenFlags);
if (FD == -1) {
if (ErrStr) *ErrStr = "could not open file";
return 0;
@@ -203,6 +198,8 @@ MemoryBuffer *MemoryBuffer::getFile(const char *Filename, std::string *ErrStr,
// for small files, because this can severely fragment our address space. Also
// don't try to map files that are exactly a multiple of the system page size,
// as the file would not have the required null terminator.
+ //
+ // FIXME: Can we just mmap an extra page in the latter case?
if (FileSize >= 4096*4 &&
(FileSize & (sys::Process::GetPageSize()-1)) != 0) {
if (const char *Pages = sys::Path::MapInFilePages(FD, FileSize)) {
@@ -262,6 +259,9 @@ MemoryBuffer *MemoryBuffer::getSTDIN() {
std::vector<char> FileData;
// Read in all of the data from stdin, we cannot mmap stdin.
+ //
+ // FIXME: That isn't necessarily true, we should try to mmap stdin and
+ // fallback if it fails.
sys::Program::ChangeStdinToBinary();
size_t ReadBytes;
do {
@@ -271,8 +271,6 @@ MemoryBuffer *MemoryBuffer::getSTDIN() {
FileData.push_back(0); // &FileData[Size] is invalid. So is &*FileData.end().
size_t Size = FileData.size();
- if (Size <= 1)
- return 0;
MemoryBuffer *B = new STDINBufferFile();
B->initCopyOf(&FileData[0], &FileData[Size-1]);
return B;
diff --git a/lib/Support/StringExtras.cpp b/lib/Support/StringExtras.cpp
index c72f121..1b233ab 100644
--- a/lib/Support/StringExtras.cpp
+++ b/lib/Support/StringExtras.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringExtras.h"
+#include "llvm/ADT/SmallVector.h"
#include <cstring>
using namespace llvm;
@@ -56,3 +57,24 @@ void llvm::SplitString(const std::string &Source,
S2 = getToken(S, Delimiters);
}
}
+
+void llvm::StringRef::split(SmallVectorImpl<StringRef> &A,
+ StringRef Separators, int MaxSplit,
+ bool KeepEmpty) const {
+ StringRef rest = *this;
+
+ // rest.data() is used to distinguish cases like "a," that splits into
+ // "a" + "" and "a" that splits into "a" + 0.
+ for (int splits = 0;
+ rest.data() != NULL && (MaxSplit < 0 || splits < MaxSplit);
+ ++splits) {
+ std::pair<llvm::StringRef, llvm::StringRef> p = rest.split(Separators);
+
+ if (p.first.size() != 0 || KeepEmpty)
+ A.push_back(p.first);
+ rest = p.second;
+ }
+ // If we have a tail left, add it.
+ if (rest.data() != NULL && (rest.size() != 0 || KeepEmpty))
+ A.push_back(rest);
+}
diff --git a/lib/Support/StringMap.cpp b/lib/Support/StringMap.cpp
index a729d3d..6f28277 100644
--- a/lib/Support/StringMap.cpp
+++ b/lib/Support/StringMap.cpp
@@ -52,7 +52,7 @@ void StringMapImpl::init(unsigned InitSize) {
/// specified bucket will be non-null. Otherwise, it will be null. In either
/// case, the FullHashValue field of the bucket will be set to the hash value
/// of the string.
-unsigned StringMapImpl::LookupBucketFor(const StringRef &Name) {
+unsigned StringMapImpl::LookupBucketFor(StringRef Name) {
unsigned HTSize = NumBuckets;
if (HTSize == 0) { // Hash table unallocated so far?
init(16);
@@ -110,7 +110,7 @@ unsigned StringMapImpl::LookupBucketFor(const StringRef &Name) {
/// FindKey - Look up the bucket that contains the specified key. If it exists
/// in the map, return the bucket number of the key. Otherwise return -1.
/// This does not modify the map.
-int StringMapImpl::FindKey(const StringRef &Key) const {
+int StringMapImpl::FindKey(StringRef Key) const {
unsigned HTSize = NumBuckets;
if (HTSize == 0) return -1; // Really empty table?
unsigned FullHashValue = HashString(Key);
@@ -161,7 +161,7 @@ void StringMapImpl::RemoveKey(StringMapEntryBase *V) {
/// RemoveKey - Remove the StringMapEntry for the specified key from the
/// table, returning it. If the key is not in the table, this returns null.
-StringMapEntryBase *StringMapImpl::RemoveKey(const StringRef &Key) {
+StringMapEntryBase *StringMapImpl::RemoveKey(StringRef Key) {
int Bucket = FindKey(Key);
if (Bucket == -1) return 0;
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index deaa19e..51e1100 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -15,6 +15,26 @@ using namespace llvm;
const size_t StringRef::npos;
#endif
+static char ascii_tolower(char x) {
+ if (x >= 'A' && x <= 'Z')
+ return x - 'A' + 'a';
+ return x;
+}
+
+/// compare_lower - Compare strings, ignoring case.
+int StringRef::compare_lower(StringRef RHS) const {
+ for (size_t I = 0, E = std::min(Length, RHS.Length); I != E; ++I) {
+ char LHC = ascii_tolower(Data[I]);
+ char RHC = ascii_tolower(RHS.Data[I]);
+ if (LHC != RHC)
+ return LHC < RHC ? -1 : 1;
+ }
+
+ if (Length == RHS.Length)
+ return 0;
+ return Length < RHS.Length ? -1 : 1;
+}
+
//===----------------------------------------------------------------------===//
// String Searching
//===----------------------------------------------------------------------===//
@@ -24,11 +44,11 @@ const size_t StringRef::npos;
///
/// \return - The index of the first occurence of \arg Str, or npos if not
/// found.
-size_t StringRef::find(const StringRef &Str) const {
+size_t StringRef::find(StringRef Str, size_t From) const {
size_t N = Str.size();
if (N > Length)
return npos;
- for (size_t i = 0, e = Length - N + 1; i != e; ++i)
+ for (size_t e = Length - N + 1, i = std::min(From, e); i != e; ++i)
if (substr(i, N).equals(Str))
return i;
return npos;
@@ -38,7 +58,7 @@ size_t StringRef::find(const StringRef &Str) const {
///
/// \return - The index of the last occurence of \arg Str, or npos if not
/// found.
-size_t StringRef::rfind(const StringRef &Str) const {
+size_t StringRef::rfind(StringRef Str) const {
size_t N = Str.size();
if (N > Length)
return npos;
@@ -50,19 +70,34 @@ size_t StringRef::rfind(const StringRef &Str) const {
return npos;
}
-/// find_first_of - Find the first character from the string 'Chars' in the
-/// current string or return npos if not in string.
-StringRef::size_type StringRef::find_first_of(StringRef Chars) const {
- for (size_type i = 0, e = Length; i != e; ++i)
+/// find_first_of - Find the first character in the string that is in \arg
+/// Chars, or npos if not found.
+///
+/// Note: O(size() * Chars.size())
+StringRef::size_type StringRef::find_first_of(StringRef Chars,
+ size_t From) const {
+ for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
if (Chars.find(Data[i]) != npos)
return i;
return npos;
}
/// find_first_not_of - Find the first character in the string that is not
-/// in the string 'Chars' or return npos if all are in string. Same as find.
-StringRef::size_type StringRef::find_first_not_of(StringRef Chars) const {
- for (size_type i = 0, e = Length; i != e; ++i)
+/// \arg C or npos if not found.
+StringRef::size_type StringRef::find_first_not_of(char C, size_t From) const {
+ for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
+ if (Data[i] != C)
+ return i;
+ return npos;
+}
+
+/// find_first_not_of - Find the first character in the string that is not
+/// in the string \arg Chars, or npos if not found.
+///
+/// Note: O(size() * Chars.size())
+StringRef::size_type StringRef::find_first_not_of(StringRef Chars,
+ size_t From) const {
+ for (size_type i = std::min(From, Length), e = Length; i != e; ++i)
if (Chars.find(Data[i]) == npos)
return i;
return npos;
@@ -75,7 +110,7 @@ StringRef::size_type StringRef::find_first_not_of(StringRef Chars) const {
/// count - Return the number of non-overlapped occurrences of \arg Str in
/// the string.
-size_t StringRef::count(const StringRef &Str) const {
+size_t StringRef::count(StringRef Str) const {
size_t Count = 0;
size_t N = Str.size();
if (N > Length)
diff --git a/lib/Support/Timer.cpp b/lib/Support/Timer.cpp
index dd58d1f..7d32ee6 100644
--- a/lib/Support/Timer.cpp
+++ b/lib/Support/Timer.cpp
@@ -66,7 +66,7 @@ static TimerGroup *getDefaultTimerGroup() {
}
llvm_release_global_lock();
}
-
+
return tmp;
}
@@ -145,7 +145,7 @@ static TimeRecord getTimeRecord(bool Start) {
static ManagedStatic<std::vector<Timer*> > ActiveTimers;
void Timer::startTimer() {
- sys::SmartScopedLock<true> L(Lock);
+ sys::SmartScopedLock<true> L(*TimerLock);
Started = true;
ActiveTimers->push_back(this);
TimeRecord TR = getTimeRecord(true);
@@ -157,7 +157,7 @@ void Timer::startTimer() {
}
void Timer::stopTimer() {
- sys::SmartScopedLock<true> L(Lock);
+ sys::SmartScopedLock<true> L(*TimerLock);
TimeRecord TR = getTimeRecord(false);
Elapsed += TR.Elapsed;
UserTime += TR.UserTime;
@@ -175,27 +175,11 @@ void Timer::stopTimer() {
}
void Timer::sum(const Timer &T) {
- if (&T < this) {
- T.Lock.acquire();
- Lock.acquire();
- } else {
- Lock.acquire();
- T.Lock.acquire();
- }
-
Elapsed += T.Elapsed;
UserTime += T.UserTime;
SystemTime += T.SystemTime;
MemUsed += T.MemUsed;
PeakMem += T.PeakMem;
-
- if (&T < this) {
- T.Lock.release();
- Lock.release();
- } else {
- Lock.release();
- T.Lock.release();
- }
}
/// addPeakMemoryMeasurement - This method should be called whenever memory
@@ -203,14 +187,12 @@ void Timer::sum(const Timer &T) {
/// currently active timers, which will be printed when the timer group prints
///
void Timer::addPeakMemoryMeasurement() {
+ sys::SmartScopedLock<true> L(*TimerLock);
size_t MemUsed = getMemUsage();
for (std::vector<Timer*>::iterator I = ActiveTimers->begin(),
- E = ActiveTimers->end(); I != E; ++I) {
- (*I)->Lock.acquire();
+ E = ActiveTimers->end(); I != E; ++I)
(*I)->PeakMem = std::max((*I)->PeakMem, MemUsed-(*I)->PeakMemBase);
- (*I)->Lock.release();
- }
}
//===----------------------------------------------------------------------===//
@@ -280,14 +262,7 @@ static void printVal(double Val, double Total, raw_ostream &OS) {
}
void Timer::print(const Timer &Total, raw_ostream &OS) {
- if (&Total < this) {
- Total.Lock.acquire();
- Lock.acquire();
- } else {
- Lock.acquire();
- Total.Lock.acquire();
- }
-
+ sys::SmartScopedLock<true> L(*TimerLock);
if (Total.UserTime)
printVal(UserTime, Total.UserTime, OS);
if (Total.SystemTime)
@@ -310,14 +285,6 @@ void Timer::print(const Timer &Total, raw_ostream &OS) {
OS << Name << "\n";
Started = false; // Once printed, don't print again
-
- if (&Total < this) {
- Total.Lock.release();
- Lock.release();
- } else {
- Lock.release();
- Total.Lock.release();
- }
}
// GetLibSupportInfoOutputFile - Return a file stream to print our output on...
@@ -329,13 +296,13 @@ llvm::GetLibSupportInfoOutputFile() {
if (LibSupportInfoOutputFilename == "-")
return &outs();
-
+
std::string Error;
raw_ostream *Result = new raw_fd_ostream(LibSupportInfoOutputFilename.c_str(),
Error, raw_fd_ostream::F_Append);
if (Error.empty())
return Result;
-
+
errs() << "Error opening info-output-file '"
<< LibSupportInfoOutputFilename << " for appending!\n";
delete Result;
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 26a1a4e..840fb98 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -94,6 +94,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
case MinGW64: return "mingw64";
case NetBSD: return "netbsd";
case OpenBSD: return "openbsd";
+ case Psp: return "psp";
case Solaris: return "solaris";
case Win32: return "win32";
case Haiku: return "haiku";
@@ -102,7 +103,7 @@ const char *Triple::getOSTypeName(OSType Kind) {
return "<invalid>";
}
-Triple::ArchType Triple::getArchTypeForLLVMName(const StringRef &Name) {
+Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
if (Name == "alpha")
return alpha;
if (Name == "arm")
@@ -141,7 +142,7 @@ Triple::ArchType Triple::getArchTypeForLLVMName(const StringRef &Name) {
return UnknownArch;
}
-Triple::ArchType Triple::getArchTypeForDarwinArchName(const StringRef &Str) {
+Triple::ArchType Triple::getArchTypeForDarwinArchName(StringRef Str) {
// See arch(3) and llvm-gcc's driver-driver.c. We don't implement support for
// archs which Darwin doesn't use.
@@ -178,6 +179,33 @@ Triple::ArchType Triple::getArchTypeForDarwinArchName(const StringRef &Str) {
return Triple::UnknownArch;
}
+// Returns architecture name that is unsderstood by the target assembler.
+const char *Triple::getArchNameForAssembler() {
+ if (getOS() != Triple::Darwin && getVendor() != Triple::Apple)
+ return NULL;
+
+ StringRef Str = getArchName();
+ if (Str == "i386")
+ return "i386";
+ if (Str == "x86_64")
+ return "x86_64";
+ if (Str == "powerpc")
+ return "ppc";
+ if (Str == "powerpc64")
+ return "ppc64";
+ if (Str == "arm")
+ return "arm";
+ if (Str == "armv4t" || Str == "thumbv4t")
+ return "armv4t";
+ if (Str == "armv5" || Str == "armv5e" || Str == "thumbv5" || Str == "thumbv5e")
+ return "armv5";
+ if (Str == "armv6" || Str == "thumbv6")
+ return "armv6";
+ if (Str == "armv7" || Str == "thumbv7")
+ return "armv7";
+ return NULL;
+}
+
//
void Triple::Parse() const {
@@ -273,6 +301,8 @@ void Triple::Parse() const {
OS = NetBSD;
else if (OSName.startswith("openbsd"))
OS = OpenBSD;
+ else if (OSName.startswith("psp"))
+ OS = Psp;
else if (OSName.startswith("solaris"))
OS = Solaris;
else if (OSName.startswith("win32"))
@@ -393,7 +423,7 @@ void Triple::setOS(OSType Kind) {
setOSName(getOSTypeName(Kind));
}
-void Triple::setArchName(const StringRef &Str) {
+void Triple::setArchName(StringRef Str) {
// Work around a miscompilation bug for Twines in gcc 4.0.3.
SmallString<64> Triple;
Triple += Str;
@@ -404,11 +434,11 @@ void Triple::setArchName(const StringRef &Str) {
setTriple(Triple.str());
}
-void Triple::setVendorName(const StringRef &Str) {
+void Triple::setVendorName(StringRef Str) {
setTriple(getArchName() + "-" + Str + "-" + getOSAndEnvironmentName());
}
-void Triple::setOSName(const StringRef &Str) {
+void Triple::setOSName(StringRef Str) {
if (hasEnvironment())
setTriple(getArchName() + "-" + getVendorName() + "-" + Str +
"-" + getEnvironmentName());
@@ -416,11 +446,11 @@ void Triple::setOSName(const StringRef &Str) {
setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
}
-void Triple::setEnvironmentName(const StringRef &Str) {
- setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() +
+void Triple::setEnvironmentName(StringRef Str) {
+ setTriple(getArchName() + "-" + getVendorName() + "-" + getOSName() +
"-" + Str);
}
-void Triple::setOSAndEnvironmentName(const StringRef &Str) {
+void Triple::setOSAndEnvironmentName(StringRef Str) {
setTriple(getArchName() + "-" + getVendorName() + "-" + Str);
}
diff --git a/lib/System/Host.cpp b/lib/System/Host.cpp
index fd2d952..37591a5 100644
--- a/lib/System/Host.cpp
+++ b/lib/System/Host.cpp
@@ -13,6 +13,7 @@
#include "llvm/System/Host.h"
#include "llvm/Config/config.h"
+#include <string.h>
// Include the platform-specific parts of this class.
#ifdef LLVM_ON_UNIX
@@ -22,3 +23,276 @@
#include "Win32/Host.inc"
#endif
+//===----------------------------------------------------------------------===//
+//
+// Implementations of the CPU detection routines
+//
+//===----------------------------------------------------------------------===//
+
+using namespace llvm;
+
+#if defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)\
+ || defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+
+/// GetX86CpuIDAndInfo - Execute the specified cpuid and return the 4 values in the
+/// specified arguments. If we can't run cpuid on the host, return true.
+static bool GetX86CpuIDAndInfo(unsigned value, unsigned *rEAX,
+ unsigned *rEBX, unsigned *rECX, unsigned *rEDX) {
+#if defined(__x86_64__) || defined(_M_AMD64) || defined (_M_X64)
+ #if defined(__GNUC__)
+ // gcc doesn't know cpuid would clobber ebx/rbx. Preseve it manually.
+ asm ("movq\t%%rbx, %%rsi\n\t"
+ "cpuid\n\t"
+ "xchgq\t%%rbx, %%rsi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ int registers[4];
+ __cpuid(registers, value);
+ *rEAX = registers[0];
+ *rEBX = registers[1];
+ *rECX = registers[2];
+ *rEDX = registers[3];
+ return false;
+ #endif
+#elif defined(i386) || defined(__i386__) || defined(__x86__) || defined(_M_IX86)
+ #if defined(__GNUC__)
+ asm ("movl\t%%ebx, %%esi\n\t"
+ "cpuid\n\t"
+ "xchgl\t%%ebx, %%esi\n\t"
+ : "=a" (*rEAX),
+ "=S" (*rEBX),
+ "=c" (*rECX),
+ "=d" (*rEDX)
+ : "a" (value));
+ return false;
+ #elif defined(_MSC_VER)
+ __asm {
+ mov eax,value
+ cpuid
+ mov esi,rEAX
+ mov dword ptr [esi],eax
+ mov esi,rEBX
+ mov dword ptr [esi],ebx
+ mov esi,rECX
+ mov dword ptr [esi],ecx
+ mov esi,rEDX
+ mov dword ptr [esi],edx
+ }
+ return false;
+ #endif
+#endif
+ return true;
+}
+
+static void DetectX86FamilyModel(unsigned EAX, unsigned &Family, unsigned &Model) {
+ Family = (EAX >> 8) & 0xf; // Bits 8 - 11
+ Model = (EAX >> 4) & 0xf; // Bits 4 - 7
+ if (Family == 6 || Family == 0xf) {
+ if (Family == 0xf)
+ // Examine extended family ID if family ID is F.
+ Family += (EAX >> 20) & 0xff; // Bits 20 - 27
+ // Examine extended model ID if family ID is 6 or F.
+ Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
+ }
+}
+#endif
+
+
+std::string sys::getHostCPUName() {
+#if defined(__x86_64__) || defined(__i386__)
+ unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
+ if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
+ return "generic";
+ unsigned Family = 0;
+ unsigned Model = 0;
+ DetectX86FamilyModel(EAX, Family, Model);
+
+ GetX86CpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
+ bool Em64T = (EDX >> 29) & 0x1;
+ bool HasSSE3 = (ECX & 0x1);
+
+ union {
+ unsigned u[3];
+ char c[12];
+ } text;
+
+ GetX86CpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
+ if (memcmp(text.c, "GenuineIntel", 12) == 0) {
+ switch (Family) {
+ case 3:
+ return "i386";
+ case 4:
+ switch (Model) {
+ case 0: // Intel486TM DX processors
+ case 1: // Intel486TM DX processors
+ case 2: // Intel486 SX processors
+ case 3: // Intel487TM processors, IntelDX2 OverDrive® processors,
+ // IntelDX2TM processors
+ case 4: // Intel486 SL processor
+ case 5: // IntelSX2TM processors
+ case 7: // Write-Back Enhanced IntelDX2 processors
+ case 8: // IntelDX4 OverDrive processors, IntelDX4TM processors
+ default: return "i486";
+ }
+ case 5:
+ switch (Model) {
+ case 1: // Pentium OverDrive processor for Pentium processor (60, 66),
+ // Pentium® processors (60, 66)
+ case 2: // Pentium OverDrive processor for Pentium processor (75, 90,
+ // 100, 120, 133), Pentium processors (75, 90, 100, 120, 133,
+ // 150, 166, 200)
+ case 3: // Pentium OverDrive processors for Intel486 processor-based
+ // systems
+ return "pentium";
+
+ case 4: // Pentium OverDrive processor with MMXTM technology for Pentium
+ // processor (75, 90, 100, 120, 133), Pentium processor with
+ // MMXTM technology (166, 200)
+ return "pentium-mmx";
+
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 1: // Pentium Pro processor
+ return "pentiumpro";
+
+ case 3: // Intel Pentium II OverDrive processor, Pentium II processor,
+ // model 03
+ case 5: // Pentium II processor, model 05, Pentium II Xeon processor,
+ // model 05, and Intel® Celeron® processor, model 05
+ case 6: // Celeron processor, model 06
+ return "pentium2";
+
+ case 7: // Pentium III processor, model 07, and Pentium III Xeon
+ // processor, model 07
+ case 8: // Pentium III processor, model 08, Pentium III Xeon processor,
+ // model 08, and Celeron processor, model 08
+ case 10: // Pentium III Xeon processor, model 0Ah
+ case 11: // Pentium III processor, model 0Bh
+ return "pentium3";
+
+ case 9: // Intel Pentium M processor, Intel Celeron M processor model 09.
+ case 13: // Intel Pentium M processor, Intel Celeron M processor, model
+ // 0Dh. All processors are manufactured using the 90 nm process.
+ return "pentium-m";
+
+ case 14: // Intel CoreTM Duo processor, Intel CoreTM Solo processor, model
+ // 0Eh. All processors are manufactured using the 65 nm process.
+ return "yonah";
+
+ case 15: // Intel CoreTM2 Duo processor, Intel CoreTM2 Duo mobile
+ // processor, Intel CoreTM2 Quad processor, Intel CoreTM2 Quad
+ // mobile processor, Intel CoreTM2 Extreme processor, Intel
+ // Pentium Dual-Core processor, Intel Xeon processor, model
+ // 0Fh. All processors are manufactured using the 65 nm process.
+ case 22: // Intel Celeron processor model 16h. All processors are
+ // manufactured using the 65 nm process
+ return "core2";
+
+ case 21: // Intel EP80579 Integrated Processor and Intel EP80579
+ // Integrated Processor with Intel QuickAssist Technology
+ return "i686"; // FIXME: ???
+
+ case 23: // Intel CoreTM2 Extreme processor, Intel Xeon processor, model
+ // 17h. All processors are manufactured using the 45 nm process.
+ //
+ // 45nm: Penryn , Wolfdale, Yorkfield (XE)
+ return "penryn";
+
+ case 26: // Intel Core i7 processor and Intel Xeon processor. All
+ // processors are manufactured using the 45 nm process.
+ case 29: // Intel Xeon processor MP. All processors are manufactured using
+ // the 45 nm process.
+ return "corei7";
+
+ case 28: // Intel Atom processor. All processors are manufactured using
+ // the 45 nm process
+ return "atom";
+
+ default: return "i686";
+ }
+ case 15: {
+ switch (Model) {
+ case 0: // Pentium 4 processor, Intel Xeon processor. All processors are
+ // model 00h and manufactured using the 0.18 micron process.
+ case 1: // Pentium 4 processor, Intel Xeon processor, Intel Xeon
+ // processor MP, and Intel Celeron processor. All processors are
+ // model 01h and manufactured using the 0.18 micron process.
+ case 2: // Pentium 4 processor, Mobile Intel Pentium 4 processor – M,
+ // Intel Xeon processor, Intel Xeon processor MP, Intel Celeron
+ // processor, and Mobile Intel Celeron processor. All processors
+ // are model 02h and manufactured using the 0.13 micron process.
+ return (Em64T) ? "x86-64" : "pentium4";
+
+ case 3: // Pentium 4 processor, Intel Xeon processor, Intel Celeron D
+ // processor. All processors are model 03h and manufactured using
+ // the 90 nm process.
+ case 4: // Pentium 4 processor, Pentium 4 processor Extreme Edition,
+ // Pentium D processor, Intel Xeon processor, Intel Xeon
+ // processor MP, Intel Celeron D processor. All processors are
+ // model 04h and manufactured using the 90 nm process.
+ case 6: // Pentium 4 processor, Pentium D processor, Pentium processor
+ // Extreme Edition, Intel Xeon processor, Intel Xeon processor
+ // MP, Intel Celeron D processor. All processors are model 06h
+ // and manufactured using the 65 nm process.
+ return (Em64T) ? "nocona" : "prescott";
+
+ default:
+ return (Em64T) ? "x86-64" : "pentium4";
+ }
+ }
+
+ default:
+ return "generic";
+ }
+ } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
+ // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
+ // appears to be no way to generate the wide variety of AMD-specific targets
+ // from the information returned from CPUID.
+ switch (Family) {
+ case 4:
+ return "i486";
+ case 5:
+ switch (Model) {
+ case 6:
+ case 7: return "k6";
+ case 8: return "k6-2";
+ case 9:
+ case 13: return "k6-3";
+ default: return "pentium";
+ }
+ case 6:
+ switch (Model) {
+ case 4: return "athlon-tbird";
+ case 6:
+ case 7:
+ case 8: return "athlon-mp";
+ case 10: return "athlon-xp";
+ default: return "athlon";
+ }
+ case 15:
+ if (HasSSE3) {
+ return "k8-sse3";
+ } else {
+ switch (Model) {
+ case 1: return "opteron";
+ case 5: return "athlon-fx"; // also opteron
+ default: return "athlon64";
+ }
+ }
+ case 16:
+ return "amdfam10";
+ default:
+ return "generic";
+ }
+ }
+#endif
+
+ return "generic";
+}
diff --git a/lib/System/Unix/Program.inc b/lib/System/Unix/Program.inc
index c52f3a8..43c3606 100644
--- a/lib/System/Unix/Program.inc
+++ b/lib/System/Unix/Program.inc
@@ -121,6 +121,9 @@ static bool RedirectIO(const Path *Path, int FD, std::string* ErrMsg) {
return false;
}
+static void TimeOutHandler(int Sig) {
+}
+
static void SetMemoryLimits (unsigned size)
{
#if HAVE_SYS_RESOURCE_H
@@ -231,11 +234,14 @@ Program::Wait(unsigned secondsToWait,
return -1;
}
- // Install a timeout handler.
+ // Install a timeout handler. The handler itself does nothing, but the simple
+ // fact of having a handler at all causes the wait below to return with EINTR,
+ // unlike if we used SIG_IGN.
if (secondsToWait) {
- memset(&Act, 0, sizeof(Act));
- Act.sa_handler = SIG_IGN;
+ Act.sa_sigaction = 0;
+ Act.sa_handler = TimeOutHandler;
sigemptyset(&Act.sa_mask);
+ Act.sa_flags = 0;
sigaction(SIGALRM, &Act, &Old);
alarm(secondsToWait);
}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index 76cc06e..ff1980d 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -103,11 +103,13 @@ FunctionPass *createARMObjectCodeEmitterPass(ARMBaseTargetMachine &TM,
ObjectCodeEmitter &OCE);
FunctionPass *createARMLoadStoreOptimizationPass(bool PreAlloc = false);
+FunctionPass *createARMExpandPseudoPass();
FunctionPass *createARMConstantIslandPass();
FunctionPass *createNEONPreAllocPass();
FunctionPass *createNEONMoveFixPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createThumb2SizeReductionPass();
+FunctionPass *createARMMaxStackAlignmentCalculatorPass();
extern Target TheARMTarget, TheThumbTarget;
diff --git a/lib/Target/ARM/ARMAddressingModes.h b/lib/Target/ARM/ARMAddressingModes.h
index c603708..ddeb1b9 100644
--- a/lib/Target/ARM/ARMAddressingModes.h
+++ b/lib/Target/ARM/ARMAddressingModes.h
@@ -520,8 +520,8 @@ namespace ARM_AM {
return ((AM5Opc >> 8) & 1) ? sub : add;
}
- /// getAM5Opc - This function encodes the addrmode5 opc field for FLDM and
- /// FSTM instructions.
+ /// getAM5Opc - This function encodes the addrmode5 opc field for VLDM and
+ /// VSTM instructions.
static inline unsigned getAM5Opc(AMSubMode SubMode, bool WB,
unsigned char Offset) {
assert((SubMode == ia || SubMode == db) &&
@@ -541,13 +541,15 @@ namespace ARM_AM {
//
// This is used for NEON load / store instructions.
//
- // addrmode6 := reg with optional writeback
+ // addrmode6 := reg with optional writeback and alignment
//
- // This is stored in three operands [regaddr, regupdate, opc]. The first is
- // the address register. The second register holds the value of a post-access
- // increment for writeback or reg0 if no writeback or if the writeback
- // increment is the size of the memory access. The third operand encodes
- // whether there is writeback to the address register.
+ // This is stored in four operands [regaddr, regupdate, opc, align]. The
+ // first is the address register. The second register holds the value of
+ // a post-access increment for writeback or reg0 if no writeback or if the
+ // writeback increment is the size of the memory access. The third
+ // operand encodes whether there is writeback to the address register. The
+ // fourth operand is the value of the alignment specifier to use or zero if
+ // no explicit alignment.
static inline unsigned getAM6Opc(bool WB = false) {
return (int)WB;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 7c5b0f0..b50b609 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -----------*- C++ -*-===//
+//===- ARMBaseInstrInfo.cpp - ARM Instruction Information -------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,11 +14,16 @@
#include "ARMBaseInstrInfo.h"
#include "ARM.h"
#include "ARMAddressingModes.h"
+#include "ARMConstantPoolValue.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
#include "ARMRegisterInfo.h"
+#include "llvm/Constants.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalValue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineJumpTableInfo.h"
@@ -504,9 +509,9 @@ ARMBaseInstrInfo::isMoveInstr(const MachineInstr &MI,
switch (MI.getOpcode()) {
default: break;
- case ARM::FCPYS:
- case ARM::FCPYD:
+ case ARM::VMOVS:
case ARM::VMOVD:
+ case ARM::VMOVDneon:
case ARM::VMOVQ: {
SrcReg = MI.getOperand(1).getReg();
DstReg = MI.getOperand(0).getReg();
@@ -556,8 +561,8 @@ ARMBaseInstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
- case ARM::FLDD:
- case ARM::FLDS:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
if (MI->getOperand(1).isFI() &&
MI->getOperand(2).isImm() &&
MI->getOperand(2).getImm() == 0) {
@@ -595,8 +600,8 @@ ARMBaseInstrInfo::isStoreToStackSlot(const MachineInstr *MI,
return MI->getOperand(0).getReg();
}
break;
- case ARM::FSTD:
- case ARM::FSTS:
+ case ARM::VSTRD:
+ case ARM::VSTRS:
if (MI->getOperand(1).isFI() &&
MI->getOperand(2).isImm() &&
MI->getOperand(2).getImm() == 0) {
@@ -632,17 +637,17 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::MOVr),
DestReg).addReg(SrcReg)));
} else if (DestRC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYS), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVS), DestReg)
.addReg(SrcReg));
} else if (DestRC == ARM::DPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FCPYD), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg)
.addReg(SrcReg));
} else if (DestRC == ARM::DPR_VFP2RegisterClass ||
DestRC == ARM::DPR_8RegisterClass ||
SrcRC == ARM::DPR_VFP2RegisterClass ||
SrcRC == ARM::DPR_8RegisterClass) {
// Always use neon reg-reg move if source or dest is NEON-only regclass.
- BuildMI(MBB, I, DL, get(ARM::VMOVD), DestReg).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(ARM::VMOVDneon), DestReg).addReg(SrcReg);
} else if (DestRC == ARM::QPRRegisterClass ||
DestRC == ARM::QPR_VFP2RegisterClass ||
DestRC == ARM::QPR_8RegisterClass) {
@@ -662,12 +667,13 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
MachineMemOperand::MOStore, 0,
MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ Align);
if (RC == ARM::GPRRegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
@@ -676,19 +682,27 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else if (RC == ARM::DPRRegisterClass ||
RC == ARM::DPR_VFP2RegisterClass ||
RC == ARM::DPR_8RegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTD))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRD))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (RC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FSTS))
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill))
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else {
assert((RC == ARM::QPRRegisterClass ||
RC == ARM::QPR_VFP2RegisterClass) && "Unknown regclass!");
// FIXME: Neon instructions should support predicates
- BuildMI(MBB, I, DL, get(ARM::VSTRQ)).addReg(SrcReg, getKillRegState(isKill))
- .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ if (Align >= 16
+ && (getRegisterInfo().needsStackRealignment(MF))) {
+ BuildMI(MBB, I, DL, get(ARM::VST1q64))
+ .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO)
+ .addReg(SrcReg, getKillRegState(isKill));
+ } else {
+ BuildMI(MBB, I, DL, get(ARM::VSTRQ)).
+ addReg(SrcReg, getKillRegState(isKill))
+ .addFrameIndex(FI).addImm(0).addMemOperand(MMO);
+ }
}
}
@@ -700,12 +714,13 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
if (I != MBB.end()) DL = I->getDebugLoc();
MachineFunction &MF = *MBB.getParent();
MachineFrameInfo &MFI = *MF.getFrameInfo();
+ unsigned Align = MFI.getObjectAlignment(FI);
MachineMemOperand *MMO =
MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FI),
MachineMemOperand::MOLoad, 0,
MFI.getObjectSize(FI),
- MFI.getObjectAlignment(FI));
+ Align);
if (RC == ARM::GPRRegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
@@ -713,18 +728,24 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
} else if (RC == ARM::DPRRegisterClass ||
RC == ARM::DPR_VFP2RegisterClass ||
RC == ARM::DPR_8RegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDD), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRD), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else if (RC == ARM::SPRRegisterClass) {
- AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::FLDS), DestReg)
+ AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLDRS), DestReg)
.addFrameIndex(FI).addImm(0).addMemOperand(MMO));
} else {
assert((RC == ARM::QPRRegisterClass ||
RC == ARM::QPR_VFP2RegisterClass ||
RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
// FIXME: Neon instructions should support predicates
- BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).
- addMemOperand(MMO);
+ if (Align >= 16
+ && (getRegisterInfo().needsStackRealignment(MF))) {
+ BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
+ .addFrameIndex(FI).addImm(0).addImm(0).addImm(128).addMemOperand(MMO);
+ } else {
+ BuildMI(MBB, I, DL, get(ARM::VLDRQ), DestReg).addFrameIndex(FI).addImm(0).
+ addMemOperand(MMO);
+ }
}
}
@@ -805,7 +826,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
DstSubReg)
.addFrameIndex(FI).addImm(0).addImm(ARMCC::AL).addReg(0);
}
- } else if (Opc == ARM::FCPYS) {
+ } else if (Opc == ARM::VMOVS) {
unsigned Pred = MI->getOperand(2).getImm();
unsigned PredReg = MI->getOperand(3).getReg();
if (OpNum == 0) { // move -> store
@@ -813,7 +834,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned SrcSubReg = MI->getOperand(1).getSubReg();
bool isKill = MI->getOperand(1).isKill();
bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTS))
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRS))
.addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef),
SrcSubReg)
.addFrameIndex(FI)
@@ -823,7 +844,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned DstSubReg = MI->getOperand(0).getSubReg();
bool isDead = MI->getOperand(0).isDead();
bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDS))
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRS))
.addReg(DstReg,
RegState::Define |
getDeadRegState(isDead) |
@@ -832,7 +853,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
.addFrameIndex(FI).addImm(0).addImm(Pred).addReg(PredReg);
}
}
- else if (Opc == ARM::FCPYD) {
+ else if (Opc == ARM::VMOVD) {
unsigned Pred = MI->getOperand(2).getImm();
unsigned PredReg = MI->getOperand(3).getReg();
if (OpNum == 0) { // move -> store
@@ -840,7 +861,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned SrcSubReg = MI->getOperand(1).getSubReg();
bool isKill = MI->getOperand(1).isKill();
bool isUndef = MI->getOperand(1).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FSTD))
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VSTRD))
.addReg(SrcReg,
getKillRegState(isKill) | getUndefRegState(isUndef),
SrcSubReg)
@@ -850,7 +871,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI,
unsigned DstSubReg = MI->getOperand(0).getSubReg();
bool isDead = MI->getOperand(0).isDead();
bool isUndef = MI->getOperand(0).isUndef();
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::FLDD))
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(ARM::VLDRD))
.addReg(DstReg,
RegState::Define |
getDeadRegState(isDead) |
@@ -886,15 +907,114 @@ ARMBaseInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
Opc == ARM::tMOVtgpr2gpr ||
Opc == ARM::tMOVgpr2tgpr) {
return true;
- } else if (Opc == ARM::FCPYS || Opc == ARM::FCPYD) {
+ } else if (Opc == ARM::VMOVS || Opc == ARM::VMOVD) {
return true;
- } else if (Opc == ARM::VMOVD || Opc == ARM::VMOVQ) {
+ } else if (Opc == ARM::VMOVDneon || Opc == ARM::VMOVQ) {
return false; // FIXME
}
return false;
}
+void ARMBaseInstrInfo::
+reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
+ DebugLoc dl = Orig->getDebugLoc();
+
+ if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
+ DestReg = TRI->getSubReg(DestReg, SubIdx);
+ SubIdx = 0;
+ }
+
+ unsigned Opcode = Orig->getOpcode();
+ switch (Opcode) {
+ default: {
+ MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
+ MI->getOperand(0).setReg(DestReg);
+ MBB.insert(I, MI);
+ break;
+ }
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ MachineFunction &MF = *MBB.getParent();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ MachineConstantPool *MCP = MF.getConstantPool();
+ unsigned CPI = Orig->getOperand(1).getIndex();
+ const MachineConstantPoolEntry &MCPE = MCP->getConstants()[CPI];
+ assert(MCPE.isMachineConstantPoolEntry() &&
+ "Expecting a machine constantpool entry!");
+ ARMConstantPoolValue *ACPV =
+ static_cast<ARMConstantPoolValue*>(MCPE.Val.MachineCPVal);
+ unsigned PCLabelId = AFI->createConstPoolEntryUId();
+ ARMConstantPoolValue *NewCPV = 0;
+ if (ACPV->isGlobalValue())
+ NewCPV = new ARMConstantPoolValue(ACPV->getGV(), PCLabelId,
+ ARMCP::CPValue, 4);
+ else if (ACPV->isExtSymbol())
+ NewCPV = new ARMConstantPoolValue(MF.getFunction()->getContext(),
+ ACPV->getSymbol(), PCLabelId, 4);
+ else if (ACPV->isBlockAddress())
+ NewCPV = new ARMConstantPoolValue(ACPV->getBlockAddress(), PCLabelId,
+ ARMCP::CPBlockAddress, 4);
+ else
+ llvm_unreachable("Unexpected ARM constantpool value type!!");
+ CPI = MCP->getConstantPoolIndex(NewCPV, MCPE.getAlignment());
+ MachineInstrBuilder MIB = BuildMI(MBB, I, Orig->getDebugLoc(), get(Opcode),
+ DestReg)
+ .addConstantPoolIndex(CPI).addImm(PCLabelId);
+ (*MIB).setMemRefs(Orig->memoperands_begin(), Orig->memoperands_end());
+ break;
+ }
+ }
+
+ MachineInstr *NewMI = prior(I);
+ NewMI->getOperand(0).setSubReg(SubIdx);
+}
+
+bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
+ const MachineInstr *MI1,
+ const MachineRegisterInfo *MRI) const {
+ int Opcode = MI0->getOpcode();
+ if (Opcode == ARM::t2LDRpci_pic || Opcode == ARM::tLDRpci_pic) {
+ if (MI1->getOpcode() != Opcode)
+ return false;
+ if (MI0->getNumOperands() != MI1->getNumOperands())
+ return false;
+
+ const MachineOperand &MO0 = MI0->getOperand(1);
+ const MachineOperand &MO1 = MI1->getOperand(1);
+ if (MO0.getOffset() != MO1.getOffset())
+ return false;
+
+ const MachineFunction *MF = MI0->getParent()->getParent();
+ const MachineConstantPool *MCP = MF->getConstantPool();
+ int CPI0 = MO0.getIndex();
+ int CPI1 = MO1.getIndex();
+ const MachineConstantPoolEntry &MCPE0 = MCP->getConstants()[CPI0];
+ const MachineConstantPoolEntry &MCPE1 = MCP->getConstants()[CPI1];
+ ARMConstantPoolValue *ACPV0 =
+ static_cast<ARMConstantPoolValue*>(MCPE0.Val.MachineCPVal);
+ ARMConstantPoolValue *ACPV1 =
+ static_cast<ARMConstantPoolValue*>(MCPE1.Val.MachineCPVal);
+ return ACPV0->hasSameValue(ACPV1);
+ }
+
+ return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI);
+}
+
+unsigned ARMBaseInstrInfo::TailDuplicationLimit(const MachineBasicBlock &MBB,
+ unsigned DefaultLimit) const {
+ // If the target processor can predict indirect branches, it is highly
+ // desirable to duplicate them, since it can often make them predictable.
+ if (!MBB.empty() && isIndirectBranchOpcode(MBB.back().getOpcode()) &&
+ getSubtarget().hasBranchTargetBuffer())
+ return DefaultLimit + 2;
+ return DefaultLimit;
+}
+
/// getInstrPredicate - If instruction is predicated, returns its predicate
/// condition, otherwise returns AL. It also returns the condition code
/// register by reference.
@@ -1022,6 +1142,7 @@ bool llvm::rewriteARMFrameIndex(MachineInstr &MI, unsigned FrameRegIdx,
break;
}
case ARMII::AddrMode4:
+ case ARMII::AddrMode6:
// Can't fold any offset even if it's zero.
return false;
case ARMII::AddrMode5: {
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 2ba3774..73e854f 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -1,4 +1,4 @@
-//===- ARMBaseInstrInfo.h - ARM Base Instruction Information -------------*- C++ -*-===//
+//===- ARMBaseInstrInfo.h - ARM Base Instruction Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -261,9 +261,20 @@ public:
virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
- const SmallVectorImpl<unsigned> &Ops,
+ const SmallVectorImpl<unsigned> &Ops,
MachineInstr* LoadMI) const;
+ virtual void reMaterialize(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const;
+
+ virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other,
+ const MachineRegisterInfo *MRI) const;
+
+ virtual unsigned TailDuplicationLimit(const MachineBasicBlock &MBB,
+ unsigned DefaultLimit) const;
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 70377f9e..19762ee 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -44,10 +44,6 @@ static cl::opt<bool>
ReuseFrameIndexVals("arm-reuse-frame-index-vals", cl::Hidden, cl::init(true),
cl::desc("Reuse repeated frame index values"));
-static cl::opt<bool>
-ARMDynamicStackAlign("arm-dynamic-stack-alignment", cl::Hidden, cl::init(false),
- cl::desc("Dynamically re-align the stack as needed"));
-
unsigned ARMBaseRegisterInfo::getRegisterNumbering(unsigned RegEnum,
bool *isSPVFP) {
if (isSPVFP)
@@ -476,11 +472,7 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
}
static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
- // FIXME: For now, force at least 128-bit alignment. This will push the
- // nightly tester harder for making sure things work correctly. When
- // we're ready to enable this for real, this goes back to starting at zero.
- unsigned MaxAlign = 16;
-// unsigned MaxAlign = 0;
+ unsigned MaxAlign = 0;
for (int i = FFI->getObjectIndexBegin(),
e = FFI->getObjectIndexEnd(); i != e; ++i) {
@@ -508,20 +500,12 @@ bool ARMBaseRegisterInfo::hasFP(const MachineFunction &MF) const {
bool ARMBaseRegisterInfo::
needsStackRealignment(const MachineFunction &MF) const {
- // Only do this for ARM if explicitly enabled
- // FIXME: Once it's passing all the tests, enable by default
- if (!ARMDynamicStackAlign)
- return false;
-
- // FIXME: To force more brutal testing, realign whether we need to or not.
- // Change this to be more selective when we turn it on for real, of course.
const MachineFrameInfo *MFI = MF.getFrameInfo();
const ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
-// unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
+ unsigned StackAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
return (RealignStack &&
!AFI->isThumb1OnlyFunction() &&
- AFI->hasStackFrame() &&
-// (MFI->getMaxAlignment() > StackAlign) &&
+ (MFI->getMaxAlignment() > StackAlign) &&
!MFI->hasVarSizedObjects());
}
@@ -529,7 +513,8 @@ bool ARMBaseRegisterInfo::cannotEliminateFrame(const MachineFunction &MF) const
const MachineFrameInfo *MFI = MF.getFrameInfo();
if (NoFramePointerElim && MFI->hasCalls())
return true;
- return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken();
+ return MFI->hasVarSizedObjects() || MFI->isFrameAddressTaken()
+ || needsStackRealignment(MF);
}
/// estimateStackSize - Estimate and return the size of the frame.
@@ -604,7 +589,7 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- if (ARMDynamicStackAlign) {
+ if (RealignStack) {
unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
calculateMaxStackAlignment(MFI));
MFI->setMaxAlignment(MaxAlign);
@@ -789,7 +774,8 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Reserve a slot closest to SP or frame pointer.
const TargetRegisterClass *RC = ARM::GPRRegisterClass;
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment()));
+ RC->getAlignment(),
+ false));
}
}
}
@@ -806,7 +792,8 @@ unsigned ARMBaseRegisterInfo::getRARegister() const {
return ARM::LR;
}
-unsigned ARMBaseRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned
+ARMBaseRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
if (STI.isTargetDarwin() || hasFP(MF))
return FramePtr;
return ARM::SP;
@@ -1183,7 +1170,8 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// as much as possible above, handle the rest, providing a register that is
// SP+LargeImm.
assert((Offset ||
- (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4) &&
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode4 ||
+ (MI.getDesc().TSFlags & ARMII::AddrModeMask) == ARMII::AddrMode6) &&
"This code isn't needed if offset already handled!");
unsigned ScratchReg = 0;
@@ -1192,7 +1180,7 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
? ARMCC::AL : (ARMCC::CondCodes)MI.getOperand(PIdx).getImm();
unsigned PredReg = (PIdx == -1) ? 0 : MI.getOperand(PIdx+1).getReg();
if (Offset == 0)
- // Must be addrmode4.
+ // Must be addrmode4/6.
MI.getOperand(i).ChangeToRegister(FrameReg, false, false, false);
else {
ScratchReg = MF.getRegInfo().createVirtualRegister(ARM::GPRRegisterClass);
@@ -1346,7 +1334,7 @@ emitPrologue(MachineFunction &MF) const {
AFI->setGPRCalleeSavedArea2Offset(GPRCS2Offset);
AFI->setDPRCalleeSavedAreaOffset(DPRCSOffset);
- movePastCSLoadStoreOps(MBB, MBBI, ARM::FSTD, 0, 3, STI);
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::VSTRD, 0, 3, STI);
NumBytes = DPRCSOffset;
if (NumBytes) {
// Adjust SP after all the callee-save spills.
@@ -1385,7 +1373,7 @@ static bool isCalleeSavedRegister(unsigned Reg, const unsigned *CSRegs) {
static bool isCSRestore(MachineInstr *MI,
const ARMBaseInstrInfo &TII,
const unsigned *CSRegs) {
- return ((MI->getOpcode() == (int)ARM::FLDD ||
+ return ((MI->getOpcode() == (int)ARM::VLDRD ||
MI->getOpcode() == (int)ARM::LDR ||
MI->getOpcode() == (int)ARM::t2LDRi12) &&
MI->getOperand(1).isFI() &&
@@ -1411,7 +1399,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
if (NumBytes != 0)
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
} else {
- // Unwind MBBI to point to first LDR / FLDD.
+ // Unwind MBBI to point to first LDR / VLDRD.
const unsigned *CSRegs = getCalleeSavedRegs();
if (MBBI != MBB.begin()) {
do
@@ -1459,7 +1447,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, NumBytes);
// Move SP to start of integer callee save spill area 2.
- movePastCSLoadStoreOps(MBB, MBBI, ARM::FLDD, 0, 3, STI);
+ movePastCSLoadStoreOps(MBB, MBBI, ARM::VLDRD, 0, 3, STI);
emitSPUpdate(isARM, MBB, MBBI, dl, TII, AFI->getDPRCalleeSavedAreaSize());
// Move SP to start of integer callee save spill area 1.
@@ -1475,4 +1463,48 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
}
+namespace {
+ struct MaximalStackAlignmentCalculator : public MachineFunctionPass {
+ static char ID;
+ MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ MachineRegisterInfo &RI = MF.getRegInfo();
+
+ // Calculate max stack alignment of all already allocated stack objects.
+ unsigned MaxAlign = calculateMaxStackAlignment(FFI);
+
+ // Be over-conservative: scan over all vreg defs and find, whether vector
+ // registers are used. If yes - there is probability, that vector register
+ // will be spilled and thus stack needs to be aligned properly.
+ for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
+ RegNum < RI.getLastVirtReg(); ++RegNum)
+ MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
+
+ if (FFI->getMaxAlignment() == MaxAlign)
+ return false;
+
+ FFI->setMaxAlignment(MaxAlign);
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "ARM Stack Required Alignment Auto-Detector";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+
+ char MaximalStackAlignmentCalculator::ID = 0;
+}
+
+FunctionPass*
+llvm::createARMMaxStackAlignmentCalculatorPass() {
+ return new MaximalStackAlignmentCalculator();
+}
+
#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 029e468..4b267b0 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -105,7 +105,7 @@ public:
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/ARM/ARMCodeEmitter.cpp b/lib/Target/ARM/ARMCodeEmitter.cpp
index 13cf676..766acff 100644
--- a/lib/Target/ARM/ARMCodeEmitter.cpp
+++ b/lib/Target/ARM/ARMCodeEmitter.cpp
@@ -168,7 +168,8 @@ namespace {
/// Routines that handle operands which add machine relocations which are
/// fixed up by the relocation stage.
void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
- bool NeedStub, bool Indirect, intptr_t ACPV = 0);
+ bool MayNeedFarStub, bool Indirect,
+ intptr_t ACPV = 0);
void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
void emitConstPoolAddress(unsigned CPI, unsigned Reloc);
void emitJumpTableAddress(unsigned JTIndex, unsigned Reloc);
@@ -277,13 +278,13 @@ unsigned Emitter<CodeEmitter>::getMachineOpValue(const MachineInstr &MI,
///
template<class CodeEmitter>
void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
- bool NeedStub, bool Indirect,
+ bool MayNeedFarStub, bool Indirect,
intptr_t ACPV) {
MachineRelocation MR = Indirect
? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
- GV, ACPV, NeedStub)
+ GV, ACPV, MayNeedFarStub)
: MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- GV, ACPV, NeedStub);
+ GV, ACPV, MayNeedFarStub);
MCE.addRelocation(MR);
}
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index 9819625..d22c43a 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -31,6 +31,7 @@
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/CommandLine.h"
#include <algorithm>
using namespace llvm;
@@ -42,6 +43,13 @@ STATISTIC(NumTBs, "Number of table branches generated");
STATISTIC(NumT2CPShrunk, "Number of Thumb2 constantpool instructions shrunk");
STATISTIC(NumT2BrShrunk, "Number of Thumb2 immediate branches shrunk");
STATISTIC(NumCBZ, "Number of CBZ / CBNZ formed");
+STATISTIC(NumJTMoved, "Number of jump table destination blocks moved");
+STATISTIC(NumJTInserted, "Number of jump table intermediate blocks inserted");
+
+
+static cl::opt<bool>
+AdjustJumpTableBlocks("arm-adjust-jump-tables", cl::Hidden, cl::init(true),
+ cl::desc("Adjust basic block layout to better use TB[BH]"));
namespace {
/// ARMConstantIslands - Due to limited PC-relative displacements, ARM
@@ -174,6 +182,7 @@ namespace {
void DoInitialPlacement(MachineFunction &MF,
std::vector<MachineInstr*> &CPEMIs);
CPEntry *findConstPoolEntry(unsigned CPI, const MachineInstr *CPEMI);
+ void JumpTableFunctionScan(MachineFunction &MF);
void InitialFunctionScan(MachineFunction &MF,
const std::vector<MachineInstr*> &CPEMIs);
MachineBasicBlock *SplitBlockBeforeInstr(MachineInstr *MI);
@@ -201,7 +210,10 @@ namespace {
bool UndoLRSpillRestore();
bool OptimizeThumb2Instructions(MachineFunction &MF);
bool OptimizeThumb2Branches(MachineFunction &MF);
+ bool ReorderThumb2JumpTables(MachineFunction &MF);
bool OptimizeThumb2JumpTables(MachineFunction &MF);
+ MachineBasicBlock *AdjustJTTargetBlockForward(MachineBasicBlock *BB,
+ MachineBasicBlock *JTBB);
unsigned GetOffsetOf(MachineInstr *MI) const;
void dumpBBs();
@@ -262,6 +274,18 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// the numbers agree with the position of the block in the function.
MF.RenumberBlocks();
+ // Try to reorder and otherwise adjust the block layout to make good use
+ // of the TB[BH] instructions.
+ bool MadeChange = false;
+ if (isThumb2 && AdjustJumpTableBlocks) {
+ JumpTableFunctionScan(MF);
+ MadeChange |= ReorderThumb2JumpTables(MF);
+ // Data is out of date, so clear it. It'll be re-computed later.
+ T2JumpTables.clear();
+ // Blocks may have shifted around. Keep the numbering up to date.
+ MF.RenumberBlocks();
+ }
+
// Thumb1 functions containing constant pools get 4-byte alignment.
// This is so we can keep exact track of where the alignment padding goes.
@@ -292,7 +316,6 @@ bool ARMConstantIslands::runOnMachineFunction(MachineFunction &MF) {
// Iteratively place constant pool entries and fix up branches until there
// is no change.
- bool MadeChange = false;
unsigned NoCPIters = 0, NoBRIters = 0;
while (true) {
bool CPChange = false;
@@ -409,6 +432,21 @@ ARMConstantIslands::CPEntry
return NULL;
}
+/// JumpTableFunctionScan - Do a scan of the function, building up
+/// information about the sizes of each block and the locations of all
+/// the jump tables.
+void ARMConstantIslands::JumpTableFunctionScan(MachineFunction &MF) {
+ for (MachineFunction::iterator MBBI = MF.begin(), E = MF.end();
+ MBBI != E; ++MBBI) {
+ MachineBasicBlock &MBB = *MBBI;
+
+ for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
+ I != E; ++I)
+ if (I->getDesc().isBranch() && I->getOpcode() == ARM::t2BR_JT)
+ T2JumpTables.push_back(I);
+ }
+}
+
/// InitialFunctionScan - Do the initial scan of the function, building up
/// information about the sizes of each block, the location of all the water,
/// and finding all of the constant pool users.
@@ -541,8 +579,8 @@ void ARMConstantIslands::InitialFunctionScan(MachineFunction &MF,
Scale = 4; // +(offset_8*4)
break;
- case ARM::FLDD:
- case ARM::FLDS:
+ case ARM::VLDRD:
+ case ARM::VLDRS:
Bits = 8;
Scale = 4; // +-(offset_8*4)
NegOk = true;
@@ -1552,7 +1590,6 @@ bool ARMConstantIslands::OptimizeThumb2Branches(MachineFunction &MF) {
return MadeChange;
}
-
/// OptimizeThumb2JumpTables - Use tbb / tbh instructions to generate smaller
/// jumptables when it's possible.
bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
@@ -1560,7 +1597,7 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
// FIXME: After the tables are shrunk, can we get rid some of the
// constantpool tables?
- const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
MachineInstr *MI = T2JumpTables[i];
@@ -1660,3 +1697,99 @@ bool ARMConstantIslands::OptimizeThumb2JumpTables(MachineFunction &MF) {
return MadeChange;
}
+
+/// ReorderThumb2JumpTables - Adjust the function's block layout to ensure that
+/// jump tables always branch forwards, since that's what tbb and tbh need.
+bool ARMConstantIslands::ReorderThumb2JumpTables(MachineFunction &MF) {
+ bool MadeChange = false;
+
+ MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ for (unsigned i = 0, e = T2JumpTables.size(); i != e; ++i) {
+ MachineInstr *MI = T2JumpTables[i];
+ const TargetInstrDesc &TID = MI->getDesc();
+ unsigned NumOps = TID.getNumOperands();
+ unsigned JTOpIdx = NumOps - (TID.isPredicable() ? 3 : 2);
+ MachineOperand JTOP = MI->getOperand(JTOpIdx);
+ unsigned JTI = JTOP.getIndex();
+ assert(JTI < JT.size());
+
+ // We prefer if target blocks for the jump table come after the jump
+ // instruction so we can use TB[BH]. Loop through the target blocks
+ // and try to adjust them such that that's true.
+ int JTNumber = MI->getParent()->getNumber();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ for (unsigned j = 0, ee = JTBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *MBB = JTBBs[j];
+ int DTNumber = MBB->getNumber();
+
+ if (DTNumber < JTNumber) {
+ // The destination precedes the switch. Try to move the block forward
+ // so we have a positive offset.
+ MachineBasicBlock *NewBB =
+ AdjustJTTargetBlockForward(MBB, MI->getParent());
+ if (NewBB)
+ MJTI->ReplaceMBBInJumpTable(JTI, JTBBs[j], NewBB);
+ MadeChange = true;
+ }
+ }
+ }
+
+ return MadeChange;
+}
+
+MachineBasicBlock *ARMConstantIslands::
+AdjustJTTargetBlockForward(MachineBasicBlock *BB, MachineBasicBlock *JTBB)
+{
+ MachineFunction &MF = *BB->getParent();
+
+ // If it's the destination block is terminated by an unconditional branch,
+ // try to move it; otherwise, create a new block following the jump
+ // table that branches back to the actual target. This is a very simple
+ // heuristic. FIXME: We can definitely improve it.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ SmallVector<MachineOperand, 4> CondPrior;
+ MachineFunction::iterator BBi = BB;
+ MachineFunction::iterator OldPrior = prior(BBi);
+
+ // If the block terminator isn't analyzable, don't try to move the block
+ bool B = TII->AnalyzeBranch(*BB, TBB, FBB, Cond);
+
+ // If the block ends in an unconditional branch, move it. The prior block
+ // has to have an analyzable terminator for us to move this one. Be paranoid
+ // and make sure we're not trying to move the entry block of the function.
+ if (!B && Cond.empty() && BB != MF.begin() &&
+ !TII->AnalyzeBranch(*OldPrior, TBB, FBB, CondPrior)) {
+ BB->moveAfter(JTBB);
+ OldPrior->updateTerminator();
+ BB->updateTerminator();
+ // Update numbering to account for the block being moved.
+ MF.RenumberBlocks();
+ ++NumJTMoved;
+ return NULL;
+ }
+
+ // Create a new MBB for the code after the jump BB.
+ MachineBasicBlock *NewBB =
+ MF.CreateMachineBasicBlock(JTBB->getBasicBlock());
+ MachineFunction::iterator MBBI = JTBB; ++MBBI;
+ MF.insert(MBBI, NewBB);
+
+ // Add an unconditional branch from NewBB to BB.
+ // There doesn't seem to be meaningful DebugInfo available; this doesn't
+ // correspond directly to anything in the source.
+ assert (isThumb2 && "Adjusting for TB[BH] but not in Thumb2?");
+ BuildMI(NewBB, DebugLoc::getUnknownLoc(), TII->get(ARM::t2B)).addMBB(BB);
+
+ // Update internal data structures to account for the newly inserted MBB.
+ MF.RenumberBlocks(NewBB);
+
+ // Update the CFG.
+ NewBB->addSuccessor(BB);
+ JTBB->removeSuccessor(BB);
+ JTBB->addSuccessor(NewBB);
+
+ ++NumJTInserted;
+ return NewBB;
+}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.cpp b/lib/Target/ARM/ARMConstantPoolValue.cpp
index efa941a..90dd0c7 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.cpp
+++ b/lib/Target/ARM/ARMConstantPoolValue.cpp
@@ -62,9 +62,10 @@ int ARMConstantPoolValue::getExistingMachineCPValue(MachineConstantPool *CP,
ARMConstantPoolValue *CPV =
(ARMConstantPoolValue *)Constants[i].Val.MachineCPVal;
if (CPV->CVal == CVal &&
- CPV->S == S &&
CPV->LabelId == LabelId &&
- CPV->PCAdjust == PCAdjust)
+ CPV->PCAdjust == PCAdjust &&
+ (CPV->S == S || strcmp(CPV->S, S) == 0) &&
+ (CPV->Modifier == Modifier || strcmp(CPV->Modifier, Modifier) == 0))
return i;
}
}
@@ -84,6 +85,23 @@ ARMConstantPoolValue::AddSelectionDAGCSEId(FoldingSetNodeID &ID) {
ID.AddInteger(PCAdjust);
}
+bool
+ARMConstantPoolValue::hasSameValue(ARMConstantPoolValue *ACPV) {
+ if (ACPV->Kind == Kind &&
+ ACPV->CVal == CVal &&
+ ACPV->PCAdjust == PCAdjust &&
+ (ACPV->S == S || strcmp(ACPV->S, S) == 0) &&
+ (ACPV->Modifier == Modifier || strcmp(ACPV->Modifier, Modifier) == 0)) {
+ if (ACPV->LabelId == LabelId)
+ return true;
+ // Two PC relative constpool entries containing the same GV address or
+ // external symbols. FIXME: What about blockaddress?
+ if (Kind == ARMCP::CPValue || Kind == ARMCP::CPExtSymbol)
+ return true;
+ }
+ return false;
+}
+
void ARMConstantPoolValue::dump() const {
errs() << " " << *this;
}
diff --git a/lib/Target/ARM/ARMConstantPoolValue.h b/lib/Target/ARM/ARMConstantPoolValue.h
index 8fb3f92..741acde 100644
--- a/lib/Target/ARM/ARMConstantPoolValue.h
+++ b/lib/Target/ARM/ARMConstantPoolValue.h
@@ -81,6 +81,10 @@ public:
virtual void AddSelectionDAGCSEId(FoldingSetNodeID &ID);
+ /// hasSameValue - Return true if this ARM constpool value
+ /// can share the same constantpool entry as another ARM constpool value.
+ bool hasSameValue(ARMConstantPoolValue *ACPV);
+
void print(raw_ostream *O) const { if (O) print(*O); }
void print(raw_ostream &O) const;
void dump() const;
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
new file mode 100644
index 0000000..4d0f899
--- /dev/null
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -0,0 +1,115 @@
+//===-- ARMExpandPseudoInsts.cpp - Expand pseudo instructions -----*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a pass that expand pseudo instructions into target
+// instructions to allow proper scheduling, if-conversion, and other late
+// optimizations. This pass should be run after register allocation but before
+// post- regalloc scheduling pass.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "arm-pseudo"
+#include "ARM.h"
+#include "ARMBaseInstrInfo.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+
+using namespace llvm;
+
+namespace {
+ class ARMExpandPseudo : public MachineFunctionPass {
+ public:
+ static char ID;
+ ARMExpandPseudo() : MachineFunctionPass(&ID) {}
+
+ const TargetInstrInfo *TII;
+
+ virtual bool runOnMachineFunction(MachineFunction &Fn);
+
+ virtual const char *getPassName() const {
+ return "ARM pseudo instruction expansion pass";
+ }
+
+ private:
+ bool ExpandMBB(MachineBasicBlock &MBB);
+ };
+ char ARMExpandPseudo::ID = 0;
+}
+
+bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
+ bool Modified = false;
+
+ MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
+ while (MBBI != E) {
+ MachineInstr &MI = *MBBI;
+ MachineBasicBlock::iterator NMBBI = next(MBBI);
+
+ unsigned Opcode = MI.getOpcode();
+ switch (Opcode) {
+ default: break;
+ case ARM::tLDRpci_pic:
+ case ARM::t2LDRpci_pic: {
+ unsigned NewLdOpc = (Opcode == ARM::tLDRpci_pic)
+ ? ARM::tLDRpci : ARM::t2LDRpci;
+ unsigned DstReg = MI.getOperand(0).getReg();
+ if (!MI.getOperand(0).isDead()) {
+ MachineInstr *NewMI =
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(NewLdOpc), DstReg)
+ .addOperand(MI.getOperand(1)));
+ NewMI->setMemRefs(MI.memoperands_begin(), MI.memoperands_end());
+ BuildMI(MBB, MBBI, MI.getDebugLoc(), TII->get(ARM::tPICADD))
+ .addReg(DstReg, getDefRegState(true))
+ .addReg(DstReg)
+ .addOperand(MI.getOperand(2));
+ }
+ MI.eraseFromParent();
+ Modified = true;
+ break;
+ }
+ case ARM::t2MOVi32imm: {
+ unsigned DstReg = MI.getOperand(0).getReg();
+ unsigned Imm = MI.getOperand(1).getImm();
+ unsigned Lo16 = Imm & 0xffff;
+ unsigned Hi16 = (Imm >> 16) & 0xffff;
+ if (!MI.getOperand(0).isDead()) {
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::t2MOVi16), DstReg)
+ .addImm(Lo16));
+ AddDefaultPred(BuildMI(MBB, MBBI, MI.getDebugLoc(),
+ TII->get(ARM::t2MOVTi16))
+ .addReg(DstReg, getDefRegState(true))
+ .addReg(DstReg).addImm(Hi16));
+ }
+ MI.eraseFromParent();
+ Modified = true;
+ }
+ // FIXME: expand t2MOVi32imm
+ }
+ MBBI = NMBBI;
+ }
+
+ return Modified;
+}
+
+bool ARMExpandPseudo::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+
+ bool Modified = false;
+ for (MachineFunction::iterator MFI = MF.begin(), E = MF.end(); MFI != E;
+ ++MFI)
+ Modified |= ExpandMBB(*MFI);
+ return Modified;
+}
+
+/// createARMExpandPseudoPass - returns an instance of the pseudo instruction
+/// expansion pass.
+FunctionPass *llvm::createARMExpandPseudoPass() {
+ return new ARMExpandPseudo();
+}
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index 1489cab..9be7454 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -81,7 +81,7 @@ public:
bool SelectAddrMode5(SDValue Op, SDValue N, SDValue &Base,
SDValue &Offset);
bool SelectAddrMode6(SDValue Op, SDValue N, SDValue &Addr, SDValue &Update,
- SDValue &Opc);
+ SDValue &Opc, SDValue &Align);
bool SelectAddrModePC(SDValue Op, SDValue N, SDValue &Offset,
SDValue &Label);
@@ -187,8 +187,6 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
void ARMDAGToDAGISel::InstructionSelect() {
- DEBUG(BB->dump());
-
SelectRoot(*CurDAG);
CurDAG->RemoveDeadNodes();
}
@@ -491,11 +489,13 @@ bool ARMDAGToDAGISel::SelectAddrMode5(SDValue Op, SDValue N,
bool ARMDAGToDAGISel::SelectAddrMode6(SDValue Op, SDValue N,
SDValue &Addr, SDValue &Update,
- SDValue &Opc) {
+ SDValue &Opc, SDValue &Align) {
Addr = N;
// Default to no writeback.
Update = CurDAG->getRegister(0, MVT::i32);
Opc = CurDAG->getTargetConstant(ARM_AM::getAM6Opc(false), MVT::i32);
+ // Default to no alignment.
+ Align = CurDAG->getTargetConstant(0, MVT::i32);
return true;
}
@@ -1010,8 +1010,8 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
SDNode *N = Op.getNode();
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc;
- if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+ SDValue MemAddr, MemUpdate, MemOpc, Align;
+ if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@@ -1036,10 +1036,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
+ const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
std::vector<EVT> ResTys(NumVecs, VT);
ResTys.push_back(MVT::Other);
- return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
+ return CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
}
EVT RegVT = GetNEONSubregVT(VT);
@@ -1047,10 +1047,10 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
// Quad registers are directly supported for VLD2,
// loading 2 pairs of D regs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Chain };
+ const SDValue Ops[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
std::vector<EVT> ResTys(4, VT);
ResTys.push_back(MVT::Other);
- SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 4);
+ SDNode *VLd = CurDAG->getMachineNode(Opc, dl, ResTys, Ops, 5);
Chain = SDValue(VLd, 4);
// Combine the even and odd subregs to produce the result.
@@ -1071,14 +1071,15 @@ SDNode *ARMDAGToDAGISel::SelectVLD(SDValue Op, unsigned NumVecs,
// Load the even subregs.
unsigned Opc = QOpcodes0[OpcodeIndex];
- const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Chain };
- SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 4);
+ const SDValue OpsA[] = { MemAddr, MemUpdate, MemOpc, Align, Chain };
+ SDNode *VLdA = CurDAG->getMachineNode(Opc, dl, ResTys, OpsA, 5);
Chain = SDValue(VLdA, NumVecs+1);
// Load the odd subregs.
Opc = QOpcodes1[OpcodeIndex];
- const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc, Chain };
- SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 4);
+ const SDValue OpsB[] = { SDValue(VLdA, NumVecs), MemUpdate, MemOpc,
+ Align, Chain };
+ SDNode *VLdB = CurDAG->getMachineNode(Opc, dl, ResTys, OpsB, 5);
Chain = SDValue(VLdB, NumVecs+1);
// Combine the even and odd subregs to produce the result.
@@ -1098,8 +1099,8 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
SDNode *N = Op.getNode();
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc;
- if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+ SDValue MemAddr, MemUpdate, MemOpc, Align;
+ if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@@ -1126,13 +1127,14 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
Ops.push_back(MemAddr);
Ops.push_back(MemUpdate);
Ops.push_back(MemOpc);
+ Ops.push_back(Align);
if (is64BitVector) {
unsigned Opc = DOpcodes[OpcodeIndex];
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
Ops.push_back(N->getOperand(Vec+3));
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+4);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), NumVecs+5);
}
EVT RegVT = GetNEONSubregVT(VT);
@@ -1147,7 +1149,7 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
N->getOperand(Vec+3)));
}
Ops.push_back(Chain);
- return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 8);
+ return CurDAG->getMachineNode(Opc, dl, MVT::Other, Ops.data(), 9);
}
// Otherwise, quad registers are stored with two separate instructions,
@@ -1163,18 +1165,18 @@ SDNode *ARMDAGToDAGISel::SelectVST(SDValue Op, unsigned NumVecs,
Ops.push_back(Chain);
unsigned Opc = QOpcodes0[OpcodeIndex];
SDNode *VStA = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+4);
+ MVT::Other, Ops.data(), NumVecs+5);
Chain = SDValue(VStA, 1);
// Store the odd subregs.
Ops[0] = SDValue(VStA, 0); // MemAddr
for (unsigned Vec = 0; Vec < NumVecs; ++Vec)
- Ops[Vec+3] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
+ Ops[Vec+4] = CurDAG->getTargetExtractSubreg(ARM::DSUBREG_1, dl, RegVT,
N->getOperand(Vec+3));
- Ops[NumVecs+3] = Chain;
+ Ops[NumVecs+4] = Chain;
Opc = QOpcodes1[OpcodeIndex];
SDNode *VStB = CurDAG->getMachineNode(Opc, dl, MemAddr.getValueType(),
- MVT::Other, Ops.data(), NumVecs+4);
+ MVT::Other, Ops.data(), NumVecs+5);
Chain = SDValue(VStB, 1);
ReplaceUses(SDValue(N, 0), Chain);
return NULL;
@@ -1188,8 +1190,8 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
SDNode *N = Op.getNode();
DebugLoc dl = N->getDebugLoc();
- SDValue MemAddr, MemUpdate, MemOpc;
- if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc))
+ SDValue MemAddr, MemUpdate, MemOpc, Align;
+ if (!SelectAddrMode6(Op, N->getOperand(2), MemAddr, MemUpdate, MemOpc, Align))
return NULL;
SDValue Chain = N->getOperand(0);
@@ -1226,6 +1228,7 @@ SDNode *ARMDAGToDAGISel::SelectVLDSTLane(SDValue Op, bool IsLoad,
Ops.push_back(MemAddr);
Ops.push_back(MemUpdate);
Ops.push_back(MemOpc);
+ Ops.push_back(Align);
unsigned Opc = 0;
if (is64BitVector) {
@@ -1463,8 +1466,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
}
break;
}
- case ARMISD::FMRRD:
- return CurDAG->getMachineNode(ARM::FMRRD, dl, MVT::i32, MVT::i32,
+ case ARMISD::VMOVRRD:
+ return CurDAG->getMachineNode(ARM::VMOVRRD, dl, MVT::i32, MVT::i32,
Op.getOperand(0), getAL(CurDAG),
CurDAG->getRegister(0, MVT::i32));
case ISD::UMUL_LOHI: {
@@ -1653,10 +1656,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
: ARM::MOVCCr;
break;
case MVT::f32:
- Opc = ARM::FCPYScc;
+ Opc = ARM::VMOVScc;
break;
case MVT::f64:
- Opc = ARM::FCPYDcc;
+ Opc = ARM::VMOVDcc;
break;
}
return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
@@ -1680,10 +1683,10 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) {
default: assert(false && "Illegal conditional move type!");
break;
case MVT::f32:
- Opc = ARM::FNEGScc;
+ Opc = ARM::VNEGScc;
break;
case MVT::f64:
- Opc = ARM::FNEGDcc;
+ Opc = ARM::VNEGDcc;
break;
}
return CurDAG->SelectNodeTo(Op.getNode(), Opc, VT, Ops, 5);
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index b6ce5dd..c3af8e6 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -133,7 +133,7 @@ static TargetLoweringObjectFile *createTLOF(TargetMachine &TM) {
}
ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
- : TargetLowering(TM, createTLOF(TM)), ARMPCLabelIndex(0) {
+ : TargetLowering(TM, createTLOF(TM)) {
Subtarget = &TM.getSubtarget<ARMSubtarget>();
if (Subtarget->isTargetDarwin()) {
@@ -389,7 +389,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
if (!UseSoftFloat && Subtarget->hasVFP2() && !Subtarget->isThumb1Only())
- // Turn f64->i64 into FMRRD, i64 -> f64 to FMDRR iff target supports vfp2.
+ // Turn f64->i64 into VMOVRRD, i64 -> f64 to VMOVDRR iff target supports vfp2.
setOperationAction(ISD::BIT_CONVERT, MVT::i64, Custom);
// We want to custom lower some of our intrinsics.
@@ -434,7 +434,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
}
// We have target-specific dag combine patterns for the following nodes:
- // ARMISD::FMRRD - No need to call setTargetDAGCombine
+ // ARMISD::VMOVRRD - No need to call setTargetDAGCombine
setTargetDAGCombine(ISD::ADD);
setTargetDAGCombine(ISD::SUB);
@@ -493,8 +493,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::SRA_FLAG: return "ARMISD::SRA_FLAG";
case ARMISD::RRX: return "ARMISD::RRX";
- case ARMISD::FMRRD: return "ARMISD::FMRRD";
- case ARMISD::FMDRR: return "ARMISD::FMDRR";
+ case ARMISD::VMOVRRD: return "ARMISD::VMOVRRD";
+ case ARMISD::VMOVDRR: return "ARMISD::VMOVDRR";
case ARMISD::EH_SJLJ_SETJMP: return "ARMISD::EH_SJLJ_SETJMP";
case ARMISD::EH_SJLJ_LONGJMP:return "ARMISD::EH_SJLJ_LONGJMP";
@@ -790,7 +790,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
- Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
if (VA.getLocVT() == MVT::v2f64) {
SDValue Vec = DAG.getNode(ISD::UNDEF, dl, MVT::v2f64);
@@ -805,7 +805,7 @@ ARMTargetLowering::LowerCallResult(SDValue Chain, SDValue InFlag,
Hi = DAG.getCopyFromReg(Chain, dl, VA.getLocReg(), MVT::i32, InFlag);
Chain = Hi.getValue(1);
InFlag = Hi.getValue(2);
- Val = DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+ Val = DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
Val = DAG.getNode(ISD::INSERT_VECTOR_ELT, dl, MVT::v2f64, Vec, Val,
DAG.getConstant(1, MVT::i32));
}
@@ -870,7 +870,7 @@ void ARMTargetLowering::PassF64ArgInRegs(DebugLoc dl, SelectionDAG &DAG,
SmallVector<SDValue, 8> &MemOpChains,
ISD::ArgFlagsTy Flags) {
- SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Arg);
RegsToPass.push_back(std::make_pair(VA.getLocReg(), fmrrd));
@@ -1004,6 +1004,8 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
bool isDirect = false;
bool isARMFunc = false;
bool isLocalARMFunc = false;
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
GlobalValue *GV = G->getGlobal();
isDirect = true;
@@ -1015,6 +1017,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
isLocalARMFunc = !Subtarget->isThumb() && !isExt;
// tBX takes a register source operand.
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(GV,
ARMPCLabelIndex,
ARMCP::CPValue, 4);
@@ -1023,7 +1026,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
PseudoSourceValue::getConstantPool(), 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
} else
@@ -1036,6 +1039,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// tBX takes a register source operand.
const char *Sym = S->getSymbol();
if (isARMFunc && Subtarget->isThumb1Only() && !Subtarget->hasV5TOps()) {
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(*DAG.getContext(),
Sym, ARMPCLabelIndex, 4);
SDValue CPAddr = DAG.getTargetConstantPool(CPV, getPointerTy(), 4);
@@ -1043,7 +1047,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getLoad(getPointerTy(), dl,
DAG.getEntryNode(), CPAddr,
PseudoSourceValue::getConstantPool(), 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Callee = DAG.getNode(ARMISD::PIC_ADD, dl,
getPointerTy(), Callee, PICLabel);
} else
@@ -1145,7 +1149,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
// Extract the first half and return it in two registers.
SDValue Half = DAG.getNode(ISD::EXTRACT_VECTOR_ELT, dl, MVT::f64, Arg,
DAG.getConstant(0, MVT::i32));
- SDValue HalfGPRs = DAG.getNode(ARMISD::FMRRD, dl,
+ SDValue HalfGPRs = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), Half);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), HalfGPRs, Flag);
@@ -1162,7 +1166,7 @@ ARMTargetLowering::LowerReturn(SDValue Chain,
}
// Legalize ret f64 -> ret 2 x i32. We always have fmrrd if f64 is
// available.
- SDValue fmrrd = DAG.getNode(ARMISD::FMRRD, dl,
+ SDValue fmrrd = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), &Arg, 1);
Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(), fmrrd, Flag);
Flag = Chain.getValue(1);
@@ -1208,6 +1212,9 @@ static SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
}
SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = 0;
DebugLoc DL = Op.getDebugLoc();
EVT PtrVT = getPointerTy();
BlockAddress *BA = cast<BlockAddressSDNode>(Op)->getBlockAddress();
@@ -1217,6 +1224,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
CPAddr = DAG.getTargetConstantPool(BA, PtrVT, 4);
} else {
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
+ ARMPCLabelIndex = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *CPV = new ARMConstantPoolValue(BA, ARMPCLabelIndex,
ARMCP::CPBlockAddress,
PCAdj);
@@ -1227,7 +1235,7 @@ SDValue ARMTargetLowering::LowerBlockAddress(SDValue Op, SelectionDAG &DAG) {
PseudoSourceValue::getConstantPool(), 0);
if (RelocM == Reloc::Static)
return Result;
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, DL, PtrVT, Result, PICLabel);
}
@@ -1238,6 +1246,9 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
DebugLoc dl = GA->getDebugLoc();
EVT PtrVT = getPointerTy();
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
ARMConstantPoolValue *CPV =
new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
ARMCP::CPValue, PCAdj, "tlsgd", true);
@@ -1247,7 +1258,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
PseudoSourceValue::getConstantPool(), 0);
SDValue Chain = Argument.getValue(1);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Argument = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Argument, PICLabel);
// call __tls_get_addr.
@@ -1279,7 +1290,10 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
SDValue ThreadPointer = DAG.getNode(ARMISD::THREAD_POINTER, dl, PtrVT);
if (GV->isDeclaration()) {
- // initial exec model
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
+ // Initial exec model.
unsigned char PCAdj = Subtarget->isThumb() ? 4 : 8;
ARMConstantPoolValue *CPV =
new ARMConstantPoolValue(GA->getGlobal(), ARMPCLabelIndex,
@@ -1290,7 +1304,7 @@ ARMTargetLowering::LowerToTLSExecModels(GlobalAddressSDNode *GA,
PseudoSourceValue::getConstantPool(), 0);
Chain = Offset.getValue(1);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Offset = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Offset, PICLabel);
Offset = DAG.getLoad(PtrVT, dl, Chain, Offset,
@@ -1355,6 +1369,9 @@ SDValue ARMTargetLowering::LowerGlobalAddressELF(SDValue Op,
SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = 0;
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
@@ -1363,6 +1380,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
if (RelocM == Reloc::Static)
CPAddr = DAG.getTargetConstantPool(GV, PtrVT, 4);
else {
+ ARMPCLabelIndex = AFI->createConstPoolEntryUId();
unsigned PCAdj = (RelocM != Reloc::PIC_) ? 0 : (Subtarget->isThumb()?4:8);
ARMConstantPoolValue *CPV =
new ARMConstantPoolValue(GV, ARMPCLabelIndex, ARMCP::CPValue, PCAdj);
@@ -1375,7 +1393,7 @@ SDValue ARMTargetLowering::LowerGlobalAddressDarwin(SDValue Op,
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
@@ -1390,6 +1408,9 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
SelectionDAG &DAG){
assert(Subtarget->isTargetELF() &&
"GLOBAL OFFSET TABLE not implemented for non-ELF targets");
+ MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
unsigned PCAdj = Subtarget->isThumb() ? 4 : 8;
@@ -1400,7 +1421,7 @@ SDValue ARMTargetLowering::LowerGLOBAL_OFFSET_TABLE(SDValue Op,
CPAddr = DAG.getNode(ARMISD::Wrapper, dl, MVT::i32, CPAddr);
SDValue Result = DAG.getLoad(PtrVT, dl, DAG.getEntryNode(), CPAddr,
PseudoSourceValue::getConstantPool(), 0);
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
return DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
@@ -1416,6 +1437,8 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
}
case Intrinsic::eh_sjlj_lsda: {
MachineFunction &MF = DAG.getMachineFunction();
+ ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
+ unsigned ARMPCLabelIndex = AFI->createConstPoolEntryUId();
EVT PtrVT = getPointerTy();
DebugLoc dl = Op.getDebugLoc();
Reloc::Model RelocM = getTargetMachine().getRelocationModel();
@@ -1433,7 +1456,7 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
SDValue Chain = Result.getValue(1);
if (RelocM == Reloc::PIC_) {
- SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex++, MVT::i32);
+ SDValue PICLabel = DAG.getConstant(ARMPCLabelIndex, MVT::i32);
Result = DAG.getNode(ARMISD::PIC_ADD, dl, PtrVT, Result, PICLabel);
}
return Result;
@@ -1522,7 +1545,8 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
if (NextVA.isMemLoc()) {
unsigned ArgSize = NextVA.getLocVT().getSizeInBits()/8;
MachineFrameInfo *MFI = MF.getFrameInfo();
- int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset());
+ int FI = MFI->CreateFixedObject(ArgSize, NextVA.getLocMemOffset(),
+ true, false);
// Create load node to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1533,7 +1557,7 @@ ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA,
ArgValue2 = DAG.getCopyFromReg(Root, dl, Reg, MVT::i32);
}
- return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, ArgValue, ArgValue2);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, ArgValue, ArgValue2);
}
SDValue
@@ -1636,7 +1660,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
assert(VA.getValVT() != MVT::i64 && "i64 should already be lowered");
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset());
+ int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
+ true, false);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
@@ -1664,7 +1689,8 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
// the result of va_next.
AFI->setVarArgsRegSaveSize(VARegSaveSize);
VarArgsFrameIndex = MFI->CreateFixedObject(VARegSaveSize, ArgOffset +
- VARegSaveSize - VARegSize);
+ VARegSaveSize - VARegSize,
+ true, false);
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, getPointerTy());
SmallVector<SDValue, 4> MemOps;
@@ -1688,7 +1714,7 @@ ARMTargetLowering::LowerFormalArguments(SDValue Chain,
&MemOps[0], MemOps.size());
} else
// This will point to the next argument passed via stack.
- VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset);
+ VarArgsFrameIndex = MFI->CreateFixedObject(4, ArgOffset, true, false);
}
return Chain;
@@ -1710,46 +1736,41 @@ static bool isFloatingPointZero(SDValue Op) {
return false;
}
-static bool isLegalCmpImmediate(unsigned C, bool isThumb1Only) {
- return ( isThumb1Only && (C & ~255U) == 0) ||
- (!isThumb1Only && ARM_AM::getSOImmVal(C) != -1);
-}
-
/// Returns appropriate ARM CMP (cmp) and corresponding condition code for
/// the given operands.
-static SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
- SDValue &ARMCC, SelectionDAG &DAG, bool isThumb1Only,
- DebugLoc dl) {
+SDValue
+ARMTargetLowering::getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl) {
if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS.getNode())) {
unsigned C = RHSC->getZExtValue();
- if (!isLegalCmpImmediate(C, isThumb1Only)) {
+ if (!isLegalICmpImmediate(C)) {
// Constant does not fit, try adjusting it by one?
switch (CC) {
default: break;
case ISD::SETLT:
case ISD::SETGE:
- if (isLegalCmpImmediate(C-1, isThumb1Only)) {
+ if (isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETLT) ? ISD::SETLE : ISD::SETGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETULT:
case ISD::SETUGE:
- if (C > 0 && isLegalCmpImmediate(C-1, isThumb1Only)) {
+ if (C > 0 && isLegalICmpImmediate(C-1)) {
CC = (CC == ISD::SETULT) ? ISD::SETULE : ISD::SETUGT;
RHS = DAG.getConstant(C-1, MVT::i32);
}
break;
case ISD::SETLE:
case ISD::SETGT:
- if (isLegalCmpImmediate(C+1, isThumb1Only)) {
+ if (isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETLE) ? ISD::SETLT : ISD::SETGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
break;
case ISD::SETULE:
case ISD::SETUGT:
- if (C < 0xffffffff && isLegalCmpImmediate(C+1, isThumb1Only)) {
+ if (C < 0xffffffff && isLegalICmpImmediate(C+1)) {
CC = (CC == ISD::SETULE) ? ISD::SETULT : ISD::SETUGE;
RHS = DAG.getConstant(C+1, MVT::i32);
}
@@ -1785,8 +1806,7 @@ static SDValue getVFPCmp(SDValue LHS, SDValue RHS, SelectionDAG &DAG,
return DAG.getNode(ARMISD::FMSTAT, dl, MVT::Flag, Cmp);
}
-static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
EVT VT = Op.getValueType();
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -1798,7 +1818,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
if (LHS.getValueType() == MVT::i32) {
SDValue ARMCC;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
return DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC, CCR,Cmp);
}
@@ -1820,8 +1840,7 @@ static SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG,
return Result;
}
-static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
SDValue Chain = Op.getOperand(0);
ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(1))->get();
SDValue LHS = Op.getOperand(2);
@@ -1832,7 +1851,7 @@ static SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG,
if (LHS.getValueType() == MVT::i32) {
SDValue ARMCC;
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
- SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, ST->isThumb1Only(), dl);
+ SDValue Cmp = getARMCmp(LHS, RHS, CC, ARMCC, DAG, dl);
return DAG.getNode(ARMISD::BRCOND, dl, MVT::Other,
Chain, Dest, ARMCC, CCR,Cmp);
}
@@ -2049,16 +2068,16 @@ static SDValue ExpandBIT_CONVERT(SDNode *N, SelectionDAG &DAG) {
SDValue Op = N->getOperand(0);
DebugLoc dl = N->getDebugLoc();
if (N->getValueType(0) == MVT::f64) {
- // Turn i64->f64 into FMDRR.
+ // Turn i64->f64 into VMOVDRR.
SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(0, MVT::i32));
SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op,
DAG.getConstant(1, MVT::i32));
- return DAG.getNode(ARMISD::FMDRR, dl, MVT::f64, Lo, Hi);
+ return DAG.getNode(ARMISD::VMOVDRR, dl, MVT::f64, Lo, Hi);
}
- // Turn f64->i64 into FMRRD.
- SDValue Cvt = DAG.getNode(ARMISD::FMRRD, dl,
+ // Turn f64->i64 into VMOVRRD.
+ SDValue Cvt = DAG.getNode(ARMISD::VMOVRRD, dl,
DAG.getVTList(MVT::i32, MVT::i32), &Op, 1);
// Merge the pieces into a single i64 value.
@@ -2115,8 +2134,7 @@ static SDValue getOnesVector(EVT VT, SelectionDAG &DAG, DebugLoc dl) {
/// LowerShiftRightParts - Lower SRA_PARTS, which returns two
/// i32 values and take a 2 x i32 value to shift plus a shift amount.
-static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerShiftRightParts(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
@@ -2140,7 +2158,7 @@ static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
- ARMCC, DAG, ST->isThumb1Only(), dl);
+ ARMCC, DAG, dl);
SDValue Hi = DAG.getNode(Opc, dl, VT, ShOpHi, ShAmt);
SDValue Lo = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, TrueVal, ARMCC,
CCR, Cmp);
@@ -2151,8 +2169,7 @@ static SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG,
/// LowerShiftLeftParts - Lower SHL_PARTS, which returns two
/// i32 values and take a 2 x i32 value to shift plus a shift amount.
-static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG,
- const ARMSubtarget *ST) {
+SDValue ARMTargetLowering::LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG) {
assert(Op.getNumOperands() == 3 && "Not a double-shift!");
EVT VT = Op.getValueType();
unsigned VTBits = VT.getSizeInBits();
@@ -2174,7 +2191,7 @@ static SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG,
SDValue FalseVal = DAG.getNode(ISD::OR, dl, VT, Tmp1, Tmp2);
SDValue CCR = DAG.getRegister(ARM::CPSR, MVT::i32);
SDValue Cmp = getARMCmp(ExtraShAmt, DAG.getConstant(0, MVT::i32), ISD::SETGE,
- ARMCC, DAG, ST->isThumb1Only(), dl);
+ ARMCC, DAG, dl);
SDValue Lo = DAG.getNode(ISD::SHL, dl, VT, ShOpLo, ShAmt);
SDValue Hi = DAG.getNode(ARMISD::CMOV, dl, VT, FalseVal, Tmp3, ARMCC,
CCR, Cmp);
@@ -2860,8 +2877,8 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
return Subtarget->isTargetDarwin() ? LowerGlobalAddressDarwin(Op, DAG) :
LowerGlobalAddressELF(Op, DAG);
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
- case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget);
- case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
@@ -2878,9 +2895,9 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::SHL:
case ISD::SRL:
case ISD::SRA: return LowerShift(Op.getNode(), DAG, Subtarget);
- case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG, Subtarget);
+ case ISD::SHL_PARTS: return LowerShiftLeftParts(Op, DAG);
case ISD::SRL_PARTS:
- case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG, Subtarget);
+ case ISD::SRA_PARTS: return LowerShiftRightParts(Op, DAG);
case ISD::VSETCC: return LowerVSETCC(Op, DAG);
case ISD::BUILD_VECTOR: return LowerBUILD_VECTOR(Op, DAG);
case ISD::VECTOR_SHUFFLE: return LowerVECTOR_SHUFFLE(Op, DAG);
@@ -3155,12 +3172,12 @@ static SDValue PerformSUBCombine(SDNode *N,
return SDValue();
}
-/// PerformFMRRDCombine - Target-specific dag combine xforms for ARMISD::FMRRD.
-static SDValue PerformFMRRDCombine(SDNode *N,
+/// PerformVMOVRRDCombine - Target-specific dag combine xforms for ARMISD::VMOVRRD.
+static SDValue PerformVMOVRRDCombine(SDNode *N,
TargetLowering::DAGCombinerInfo &DCI) {
// fmrrd(fmdrr x, y) -> x,y
SDValue InDouble = N->getOperand(0);
- if (InDouble.getOpcode() == ARMISD::FMDRR)
+ if (InDouble.getOpcode() == ARMISD::VMOVDRR)
return DCI.CombineTo(N, InDouble.getOperand(0), InDouble.getOperand(1));
return SDValue();
}
@@ -3455,7 +3472,7 @@ SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
default: break;
case ISD::ADD: return PerformADDCombine(N, DCI);
case ISD::SUB: return PerformSUBCombine(N, DCI);
- case ARMISD::FMRRD: return PerformFMRRDCombine(N, DCI);
+ case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
case ISD::INTRINSIC_WO_CHAIN:
return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
@@ -3683,6 +3700,18 @@ bool ARMTargetLowering::isLegalAddressingMode(const AddrMode &AM,
return true;
}
+/// isLegalICmpImmediate - Return true if the specified immediate is legal
+/// icmp immediate, that is the target has icmp instructions which can compare
+/// a register against the immediate without having to materialize the
+/// immediate into a register.
+bool ARMTargetLowering::isLegalICmpImmediate(int64_t Imm) const {
+ if (!Subtarget->isThumb())
+ return ARM_AM::getSOImmVal(Imm) != -1;
+ if (Subtarget->isThumb2())
+ return ARM_AM::getT2SOImmVal(Imm) != -1;
+ return Imm >= 0 && Imm <= 255;
+}
+
static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
bool isSEXTLoad, SDValue &Base,
SDValue &Offset, bool &isInc,
@@ -3737,7 +3766,7 @@ static bool getARMIndexedAddressParts(SDNode *Ptr, EVT VT,
return true;
}
- // FIXME: Use FLDM / FSTM to emulate indexed FP load / store.
+ // FIXME: Use VLDM / VSTM to emulate indexed FP load / store.
return false;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 9c7a91d..4f31f8a 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -62,8 +62,8 @@ namespace llvm {
SRA_FLAG, // V,Flag = sra_flag X -> sra X, 1 + save carry out.
RRX, // V = RRX X, Flag -> srl X, 1 + shift in carry flag.
- FMRRD, // double to two gprs.
- FMDRR, // Two gprs to double.
+ VMOVRRD, // double to two gprs.
+ VMOVDRR, // Two gprs to double.
EH_SJLJ_SETJMP, // SjLj exception handling setjmp.
EH_SJLJ_LONGJMP, // SjLj exception handling longjmp.
@@ -180,6 +180,12 @@ namespace llvm {
virtual bool isLegalAddressingMode(const AddrMode &AM, const Type *Ty)const;
bool isLegalT2ScaledAddressingMode(const AddrMode &AM, EVT VT) const;
+ /// isLegalICmpImmediate - Return true if the specified immediate is legal
+ /// icmp immediate, that is the target has icmp instructions which can compare
+ /// a register against the immediate without having to materialize the
+ /// immediate into a register.
+ virtual bool isLegalICmpImmediate(int64_t Imm) const;
+
/// getPreIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if the node's address
/// can be legally represented as pre-indexed load / store address.
@@ -278,8 +284,12 @@ namespace llvm {
SelectionDAG &DAG);
SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG);
SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerShiftRightParts(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerShiftLeftParts(SDValue Op, SelectionDAG &DAG);
SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl,
SDValue Chain,
@@ -315,6 +325,9 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
DebugLoc dl, SelectionDAG &DAG);
+
+ SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
+ SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl);
};
}
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 86bbe2a..87bb12b 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -80,22 +80,26 @@ bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
}
void ARMInstrInfo::
-reMaterialize(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator I,
- unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig) const {
+reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
DebugLoc dl = Orig->getDebugLoc();
- if (Orig->getOpcode() == ARM::MOVi2pieces) {
+ unsigned Opcode = Orig->getOpcode();
+ switch (Opcode) {
+ default:
+ break;
+ case ARM::MOVi2pieces: {
RI.emitLoadConstPool(MBB, I, dl,
DestReg, SubIdx,
Orig->getOperand(1).getImm(),
(ARMCC::CondCodes)Orig->getOperand(2).getImm(),
Orig->getOperand(3).getReg());
+ MachineInstr *NewMI = prior(I);
+ NewMI->getOperand(0).setSubReg(SubIdx);
return;
}
+ }
- MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig);
- MI->getOperand(0).setReg(DestReg);
- MBB.insert(I, MI);
+ return ARMBaseInstrInfo::reMaterialize(MBB, I, DestReg, SubIdx, Orig, TRI);
}
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 5d1678d..4319577 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -35,15 +35,16 @@ public:
// Return true if the block does not fall through.
bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
+ void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
+ unsigned DestReg, unsigned SubIdx,
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const;
+
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
///
const ARMRegisterInfo &getRegisterInfo() const { return RI; }
-
- void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
- unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig) const;
};
}
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index cbe80b4..3fe634e 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -340,9 +340,9 @@ def addrmode5 : Operand<i32>,
// addrmode6 := reg with optional writeback
//
def addrmode6 : Operand<i32>,
- ComplexPattern<i32, 3, "SelectAddrMode6", []> {
+ ComplexPattern<i32, 4, "SelectAddrMode6", []> {
let PrintMethod = "printAddrMode6Operand";
- let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm);
+ let MIOperandInfo = (ops GPR:$addr, GPR:$upd, i32imm, i32imm);
}
// addrmodepc := pc + reg
@@ -377,15 +377,13 @@ multiclass AsI1_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
IIC_iALUr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
let isCommutable = Commutable;
}
def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
IIC_iALUsr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
}
@@ -396,24 +394,22 @@ let Defs = [CPSR] in {
multiclass AI1_bin_s_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0> {
def ri : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, opc, "s\t$dst, $a, $b",
+ IIC_iALUi, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]> {
let Inst{20} = 1;
let Inst{25} = 1;
}
def rr : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm,
- IIC_iALUr, opc, "s\t$dst, $a, $b",
+ IIC_iALUr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]> {
let isCommutable = Commutable;
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{20} = 1;
let Inst{25} = 0;
}
def rs : AI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, opc, "s\t$dst, $a, $b",
+ IIC_iALUsr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -435,7 +431,7 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
def rr : AI1<opcod, (outs), (ins GPR:$a, GPR:$b), DPFrm, IIC_iCMPr,
opc, "\t$a, $b",
[(opnode GPR:$a, GPR:$b)]> {
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{20} = 1;
let Inst{25} = 0;
let isCommutable = Commutable;
@@ -443,8 +439,6 @@ multiclass AI1_cmp_irs<bits<4> opcod, string opc, PatFrag opnode,
def rs : AI1<opcod, (outs), (ins GPR:$a, so_reg:$b), DPSoRegFrm, IIC_iCMPsr,
opc, "\t$a, $b",
[(opnode GPR:$a, so_reg:$b)]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -501,20 +495,22 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
Requires<[IsARM, CarryDefIsUnused]> {
let isCommutable = Commutable;
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
}
def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
Requires<[IsARM, CarryDefIsUnused]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
- // Carry setting variants
+}
+// Carry setting variants
+let Defs = [CPSR] in {
+multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
- DPFrm, IIC_iALUi, !strconcat(opc, "s\t$dst, $a, $b"),
+ DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
Requires<[IsARM, CarryDefIsUsed]> {
let Defs = [CPSR];
@@ -522,26 +518,25 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
let Inst{25} = 1;
}
def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
- DPFrm, IIC_iALUr, !strconcat(opc, "s\t$dst, $a, $b"),
+ DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
Requires<[IsARM, CarryDefIsUsed]> {
let Defs = [CPSR];
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{20} = 1;
let Inst{25} = 0;
}
def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
- DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "s\t$dst, $a, $b"),
+ DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
Requires<[IsARM, CarryDefIsUsed]> {
let Defs = [CPSR];
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
}
}
}
+}
//===----------------------------------------------------------------------===//
// Instructions
@@ -652,6 +647,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
let isReturn = 1, isTerminator = 1, isBarrier = 1 in
def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
"bx", "\tlr", [(ARMretflag)]> {
+ let Inst{3-0} = 0b1110;
let Inst{7-4} = 0b0001;
let Inst{19-8} = 0b111111111111;
let Inst{27-20} = 0b00010010;
@@ -664,6 +660,7 @@ let isBranch = 1, isTerminator = 1, isBarrier = 1, isIndirectBranch = 1 in {
let Inst{7-4} = 0b0001;
let Inst{19-8} = 0b111111111111;
let Inst{27-20} = 0b00010010;
+ let Inst{31-28} = 0b1110;
}
}
@@ -673,7 +670,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1, mayLoad = 1,
hasExtraDefRegAllocReq = 1 in
def LDM_RET : AXI4ld<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
- LdStMulFrm, IIC_Br, "ldm${p}${addr:submode}\t$addr, $wb",
+ LdStMulFrm, IIC_Br, "ldm${addr:submode}${p}\t$addr, $wb",
[]>;
// On non-Darwin platforms R9 is callee-saved.
@@ -762,6 +759,7 @@ let isBranch = 1, isTerminator = 1 in {
def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
IIC_Br, "mov\tpc, $target \n$jt",
[(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
+ let Inst{15-12} = 0b1111;
let Inst{20} = 0; // S Bit
let Inst{24-21} = 0b1101;
let Inst{27-25} = 0b000;
@@ -771,6 +769,7 @@ let isBranch = 1, isTerminator = 1 in {
IIC_Br, "ldr\tpc, $target \n$jt",
[(ARMbrjt (i32 (load addrmode2:$target)), tjumptable:$jt,
imm:$id)]> {
+ let Inst{15-12} = 0b1111;
let Inst{20} = 1; // L bit
let Inst{21} = 0; // W bit
let Inst{22} = 0; // B bit
@@ -782,6 +781,7 @@ let isBranch = 1, isTerminator = 1 in {
IIC_Br, "add\tpc, $target, $idx \n$jt",
[(ARMbrjt (add GPR:$target, GPR:$idx), tjumptable:$jt,
imm:$id)]> {
+ let Inst{15-12} = 0b1111;
let Inst{20} = 0; // S bit
let Inst{24-21} = 0b0100;
let Inst{27-25} = 0b000;
@@ -813,26 +813,26 @@ def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
// Loads with zero extension
def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldr", "h\t$dst, $addr",
+ IIC_iLoadr, "ldrh", "\t$dst, $addr",
[(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
- IIC_iLoadr, "ldr", "b\t$dst, $addr",
+ IIC_iLoadr, "ldrb", "\t$dst, $addr",
[(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
// Loads with sign extension
def LDRSH : AI3ldsh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldr", "sh\t$dst, $addr",
+ IIC_iLoadr, "ldrsh", "\t$dst, $addr",
[(set GPR:$dst, (sextloadi16 addrmode3:$addr))]>;
def LDRSB : AI3ldsb<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldr", "sb\t$dst, $addr",
+ IIC_iLoadr, "ldrsb", "\t$dst, $addr",
[(set GPR:$dst, (sextloadi8 addrmode3:$addr))]>;
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
// Load doubleword
def LDRD : AI3ldd<(outs GPR:$dst1, GPR:$dst2), (ins addrmode3:$addr), LdMiscFrm,
- IIC_iLoadr, "ldr", "d\t$dst1, $addr",
+ IIC_iLoadr, "ldrd", "\t$dst1, $addr",
[]>, Requires<[IsARM, HasV5TE]>;
// Indexed loads
@@ -846,35 +846,35 @@ def LDR_POST : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
def LDRH_PRE : AI3ldhpr<(outs GPR:$dst, GPR:$base_wb),
(ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
- "ldr", "h\t$dst, $addr!", "$addr.base = $base_wb", []>;
+ "ldrh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRH_POST : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
- "ldr", "h\t$dst, [$base], $offset", "$base = $base_wb", []>;
+ "ldrh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
def LDRB_PRE : AI2ldbpr<(outs GPR:$dst, GPR:$base_wb),
(ins addrmode2:$addr), LdFrm, IIC_iLoadru,
- "ldr", "b\t$dst, $addr!", "$addr.base = $base_wb", []>;
+ "ldrb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRB_POST : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
- "ldr", "b\t$dst, [$base], $offset", "$base = $base_wb", []>;
+ "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
def LDRSH_PRE : AI3ldshpr<(outs GPR:$dst, GPR:$base_wb),
(ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
- "ldr", "sh\t$dst, $addr!", "$addr.base = $base_wb", []>;
+ "ldrsh", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRSH_POST: AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
- "ldr", "sh\t$dst, [$base], $offset", "$base = $base_wb", []>;
+ "ldrsh", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
(ins addrmode3:$addr), LdMiscFrm, IIC_iLoadru,
- "ldr", "sb\t$dst, $addr!", "$addr.base = $base_wb", []>;
+ "ldrsb", "\t$dst, $addr!", "$addr.base = $base_wb", []>;
def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
- "ldr", "sb\t$dst, [$base], $offset", "$base = $base_wb", []>;
+ "ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
}
// Store
@@ -884,18 +884,18 @@ def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
// Stores with truncate
def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer,
- "str", "h\t$src, $addr",
+ "strh", "\t$src, $addr",
[(truncstorei16 GPR:$src, addrmode3:$addr)]>;
def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
- "str", "b\t$src, $addr",
+ "strb", "\t$src, $addr",
[(truncstorei8 GPR:$src, addrmode2:$addr)]>;
// Store doubleword
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
StMiscFrm, IIC_iStorer,
- "str", "d\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
+ "strd", "\t$src1, $addr", []>, Requires<[IsARM, HasV5TE]>;
// Indexed stores
def STR_PRE : AI2stwpr<(outs GPR:$base_wb),
@@ -915,28 +915,28 @@ def STR_POST : AI2stwpo<(outs GPR:$base_wb),
def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am3offset:$offset),
StMiscFrm, IIC_iStoreru,
- "str", "h\t$src, [$base, $offset]!", "$base = $base_wb",
+ "strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb,
(pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am3offset:$offset),
StMiscFrm, IIC_iStoreru,
- "str", "h\t$src, [$base], $offset", "$base = $base_wb",
+ "strh", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb, (post_truncsti16 GPR:$src,
GPR:$base, am3offset:$offset))]>;
def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
- "str", "b\t$src, [$base, $offset]!", "$base = $base_wb",
+ "strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
GPR:$base, am2offset:$offset))]>;
def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
(ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
- "str", "b\t$src, [$base], $offset", "$base = $base_wb",
+ "strb", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb, (post_truncsti8 GPR:$src,
GPR:$base, am2offset:$offset))]>;
@@ -947,13 +947,13 @@ def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
def LDM : AXI4ld<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
- LdStMulFrm, IIC_iLoadm, "ldm${p}${addr:submode}\t$addr, $wb",
+ LdStMulFrm, IIC_iLoadm, "ldm${addr:submode}${p}\t$addr, $wb",
[]>;
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
def STM : AXI4st<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
- LdStMulFrm, IIC_iStorem, "stm${p}${addr:submode}\t$addr, $wb",
+ LdStMulFrm, IIC_iStorem, "stm${addr:submode}${p}\t$addr, $wb",
[]>;
//===----------------------------------------------------------------------===//
@@ -963,15 +963,13 @@ def STM : AXI4st<(outs),
let neverHasSideEffects = 1 in
def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
"mov", "\t$dst, $src", []>, UnaryDP {
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
}
def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
"mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
@@ -1016,10 +1014,10 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
let Defs = [CPSR] in {
def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
- IIC_iMOVsi, "mov", "s\t$dst, $src, lsr #1",
+ IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1",
[(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
- IIC_iMOVsi, "mov", "s\t$dst, $src, asr #1",
+ IIC_iMOVsi, "movs", "\t$dst, $src, asr #1",
[(set GPR:$dst, (ARMsra_flag GPR:$src))]>, UnaryDP;
}
@@ -1095,15 +1093,19 @@ defm SUB : AsI1_bin_irs<0b0010, "sub",
BinOpFrag<(sub node:$LHS, node:$RHS)>>;
// ADD and SUB with 's' bit set.
-defm ADDS : AI1_bin_s_irs<0b0100, "add",
- BinOpFrag<(addc node:$LHS, node:$RHS)>>;
-defm SUBS : AI1_bin_s_irs<0b0010, "sub",
+defm ADDS : AI1_bin_s_irs<0b0100, "adds",
+ BinOpFrag<(addc node:$LHS, node:$RHS)>, 1>;
+defm SUBS : AI1_bin_s_irs<0b0010, "subs",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
+ BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
+ BinOpFrag<(sube node:$LHS, node:$RHS)>>;
// These don't define reg/reg forms, because they are handled above.
def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
@@ -1115,24 +1117,20 @@ def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
def RSBrs : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
IIC_iALUsr, "rsb", "\t$dst, $a, $b",
[(set GPR:$dst, (sub so_reg:$b, GPR:$a))]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
// RSB with 's' bit set.
let Defs = [CPSR] in {
def RSBSri : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
- IIC_iALUi, "rsb", "s\t$dst, $a, $b",
+ IIC_iALUi, "rsbs", "\t$dst, $a, $b",
[(set GPR:$dst, (subc so_imm:$b, GPR:$a))]> {
let Inst{20} = 1;
let Inst{25} = 1;
}
def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
- IIC_iALUsr, "rsb", "s\t$dst, $a, $b",
+ IIC_iALUsr, "rsbs", "\t$dst, $a, $b",
[(set GPR:$dst, (subc so_reg:$b, GPR:$a))]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -1149,8 +1147,6 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
[(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
Requires<[IsARM, CarryDefIsUnused]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
}
@@ -1168,8 +1164,6 @@ def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b",
[(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
Requires<[IsARM, CarryDefIsUnused]> {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -1216,14 +1210,11 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
"mvn", "\t$dst, $src",
[(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
}
def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
IIC_iMOVsr, "mvn", "\t$dst, $src",
- [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP {
- let Inst{4} = 1;
- let Inst{7} = 0;
-}
+ [(set GPR:$dst, (not so_reg:$src))]>, UnaryDP;
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
IIC_iMOVi, "mvn", "\t$dst, $imm",
@@ -1536,7 +1527,7 @@ def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
IIC_iCMOVr, "mov", "\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst">, UnaryDP {
- let Inst{4} = 0;
+ let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
}
@@ -1545,8 +1536,6 @@ def MOVCCs : AI1<0b1101, (outs GPR:$dst),
"mov", "\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, so_reg:$true, imm:$cc, CCR:$ccr))*/]>,
RegConstraint<"$false = $dst">, UnaryDP {
- let Inst{4} = 1;
- let Inst{7} = 0;
let Inst{25} = 0;
}
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 25c4acd..e1353b7 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -102,6 +102,19 @@ def addrmode_neonldstm : Operand<i32>,
}
*/
+def h8imm : Operand<i8> {
+ let PrintMethod = "printHex8ImmOperand";
+}
+def h16imm : Operand<i16> {
+ let PrintMethod = "printHex16ImmOperand";
+}
+def h32imm : Operand<i32> {
+ let PrintMethod = "printHex32ImmOperand";
+}
+def h64imm : Operand<i64> {
+ let PrintMethod = "printHex64ImmOperand";
+}
+
//===----------------------------------------------------------------------===//
// NEON load / store instructions
//===----------------------------------------------------------------------===//
@@ -133,7 +146,7 @@ def VLDMS : NI<(outs),
// Use vldmia to load a Q register as a D register pair.
def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
IIC_fpLoadm,
- "vldmia $addr, ${dst:dregpair}",
+ "vldmia\t$addr, ${dst:dregpair}",
[(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
let Inst{27-25} = 0b110;
let Inst{24} = 0; // P bit
@@ -145,7 +158,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
// Use vstmia to store a Q register as a D register pair.
def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
IIC_fpStorem,
- "vstmia $addr, ${src:dregpair}",
+ "vstmia\t$addr, ${src:dregpair}",
[(store (v2f64 QPR:$src), addrmode4:$addr)]> {
let Inst{27-25} = 0b110;
let Inst{24} = 0; // P bit
@@ -2282,7 +2295,7 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
// VMOV : Vector Move (Register)
-def VMOVD : N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
+def VMOVDneon: N3V<0, 0, 0b10, 0b0001, 0, 1, (outs DPR:$dst), (ins DPR:$src),
IIC_VMOVD, "vmov\t$dst, $src", "", []>;
def VMOVQ : N3V<0, 0, 0b10, 0b0001, 1, 1, (outs QPR:$dst), (ins QPR:$src),
IIC_VMOVD, "vmov\t$dst, $src", "", []>;
@@ -2325,38 +2338,38 @@ def vmovImm64 : PatLeaf<(build_vector), [{
// be encoded based on the immed values.
def VMOVv8i8 : N1ModImm<1, 0b000, 0b1110, 0, 0, 0, 1, (outs DPR:$dst),
- (ins i8imm:$SIMM), IIC_VMOVImm,
+ (ins h8imm:$SIMM), IIC_VMOVImm,
"vmov.i8\t$dst, $SIMM", "",
[(set DPR:$dst, (v8i8 vmovImm8:$SIMM))]>;
def VMOVv16i8 : N1ModImm<1, 0b000, 0b1110, 0, 1, 0, 1, (outs QPR:$dst),
- (ins i8imm:$SIMM), IIC_VMOVImm,
+ (ins h8imm:$SIMM), IIC_VMOVImm,
"vmov.i8\t$dst, $SIMM", "",
[(set QPR:$dst, (v16i8 vmovImm8:$SIMM))]>;
def VMOVv4i16 : N1ModImm<1, 0b000, 0b1000, 0, 0, 0, 1, (outs DPR:$dst),
- (ins i16imm:$SIMM), IIC_VMOVImm,
+ (ins h16imm:$SIMM), IIC_VMOVImm,
"vmov.i16\t$dst, $SIMM", "",
[(set DPR:$dst, (v4i16 vmovImm16:$SIMM))]>;
def VMOVv8i16 : N1ModImm<1, 0b000, 0b1000, 0, 1, 0, 1, (outs QPR:$dst),
- (ins i16imm:$SIMM), IIC_VMOVImm,
+ (ins h16imm:$SIMM), IIC_VMOVImm,
"vmov.i16\t$dst, $SIMM", "",
[(set QPR:$dst, (v8i16 vmovImm16:$SIMM))]>;
def VMOVv2i32 : N1ModImm<1, 0b000, 0b0000, 0, 0, 0, 1, (outs DPR:$dst),
- (ins i32imm:$SIMM), IIC_VMOVImm,
+ (ins h32imm:$SIMM), IIC_VMOVImm,
"vmov.i32\t$dst, $SIMM", "",
[(set DPR:$dst, (v2i32 vmovImm32:$SIMM))]>;
def VMOVv4i32 : N1ModImm<1, 0b000, 0b0000, 0, 1, 0, 1, (outs QPR:$dst),
- (ins i32imm:$SIMM), IIC_VMOVImm,
+ (ins h32imm:$SIMM), IIC_VMOVImm,
"vmov.i32\t$dst, $SIMM", "",
[(set QPR:$dst, (v4i32 vmovImm32:$SIMM))]>;
def VMOVv1i64 : N1ModImm<1, 0b000, 0b1110, 0, 0, 1, 1, (outs DPR:$dst),
- (ins i64imm:$SIMM), IIC_VMOVImm,
+ (ins h64imm:$SIMM), IIC_VMOVImm,
"vmov.i64\t$dst, $SIMM", "",
[(set DPR:$dst, (v1i64 vmovImm64:$SIMM))]>;
def VMOVv2i64 : N1ModImm<1, 0b000, 0b1110, 0, 1, 1, 1, (outs QPR:$dst),
- (ins i64imm:$SIMM), IIC_VMOVImm,
+ (ins h64imm:$SIMM), IIC_VMOVImm,
"vmov.i64\t$dst, $SIMM", "",
[(set QPR:$dst, (v2i64 vmovImm64:$SIMM))]>;
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 5d02925..2796364 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -740,3 +740,13 @@ def : T1Pat<(i32 thumb_immshifted:$src),
def : T1Pat<(i32 imm0_255_comp:$src),
(tMVN (tMOVi8 (imm_comp_XFORM imm:$src)))>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def tLDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ NoItinerary, "@ ldr.n\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb1Only]>;
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 5bfda37..1bb9bfd 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1179,3 +1179,13 @@ let isReMaterializable = 1 in
def t2MOVi32imm : T2Ix2<(outs GPR:$dst), (ins i32imm:$src), IIC_iMOVi,
"movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
[(set GPR:$dst, (i32 imm:$src))]>;
+
+// Pseudo instruction that combines ldr from constpool and add pc. This should
+// be expanded into two instructions late to allow if-conversion and
+// scheduling.
+let isReMaterializable = 1 in
+def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
+ NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
+ [(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
+ imm:$cp))]>,
+ Requires<[IsThumb2]>;
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index 455c33b..ba341f4 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -17,7 +17,7 @@ def SDT_ITOF :
SDTypeProfile<1, 1, [SDTCisFP<0>, SDTCisVT<1, f32>]>;
def SDT_CMPFP0 :
SDTypeProfile<0, 1, [SDTCisFP<0>]>;
-def SDT_FMDRR :
+def SDT_VMOVDRR :
SDTypeProfile<1, 2, [SDTCisVT<0, f64>, SDTCisVT<1, i32>,
SDTCisSameAs<1, 2>]>;
@@ -28,7 +28,7 @@ def arm_uitof : SDNode<"ARMISD::UITOF", SDT_ITOF>;
def arm_fmstat : SDNode<"ARMISD::FMSTAT", SDTNone, [SDNPInFlag,SDNPOutFlag]>;
def arm_cmpfp : SDNode<"ARMISD::CMPFP", SDT_ARMCmp, [SDNPOutFlag]>;
def arm_cmpfp0 : SDNode<"ARMISD::CMPFPw0",SDT_CMPFP0, [SDNPOutFlag]>;
-def arm_fmdrr : SDNode<"ARMISD::FMDRR", SDT_FMDRR>;
+def arm_fmdrr : SDNode<"ARMISD::VMOVDRR", SDT_VMOVDRR>;
//===----------------------------------------------------------------------===//
// Operand Definitions.
@@ -55,21 +55,21 @@ def vfp_f64imm : Operand<f64>,
//
let canFoldAsLoad = 1 in {
-def FLDD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
- IIC_fpLoad64, "fldd", "\t$dst, $addr",
+def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
+ IIC_fpLoad64, "vldr", ".64\t$dst, $addr",
[(set DPR:$dst, (load addrmode5:$addr))]>;
-def FLDS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
- IIC_fpLoad32, "flds", "\t$dst, $addr",
+def VLDRS : ASI5<0b1101, 0b01, (outs SPR:$dst), (ins addrmode5:$addr),
+ IIC_fpLoad32, "vldr", ".32\t$dst, $addr",
[(set SPR:$dst, (load addrmode5:$addr))]>;
} // canFoldAsLoad
-def FSTD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
- IIC_fpStore64, "fstd", "\t$src, $addr",
+def VSTRD : ADI5<0b1101, 0b00, (outs), (ins DPR:$src, addrmode5:$addr),
+ IIC_fpStore64, "vstr", ".64\t$src, $addr",
[(store DPR:$src, addrmode5:$addr)]>;
-def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
- IIC_fpStore32, "fsts", "\t$src, $addr",
+def VSTRS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
+ IIC_fpStore32, "vstr", ".32\t$src, $addr",
[(store SPR:$src, addrmode5:$addr)]>;
//===----------------------------------------------------------------------===//
@@ -77,32 +77,32 @@ def FSTS : ASI5<0b1101, 0b00, (outs), (ins SPR:$src, addrmode5:$addr),
//
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
-def FLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VLDMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
variable_ops), IIC_fpLoadm,
- "fldm${addr:submode}d${p}\t${addr:base}, $wb",
+ "vldm${addr:submode}${p}\t${addr:base}, $wb",
[]> {
let Inst{20} = 1;
}
-def FLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VLDMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
variable_ops), IIC_fpLoadm,
- "fldm${addr:submode}s${p}\t${addr:base}, $wb",
+ "vldm${addr:submode}${p}\t${addr:base}, $wb",
[]> {
let Inst{20} = 1;
}
} // mayLoad, hasExtraDefRegAllocReq
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
-def FSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VSTMD : AXDI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
variable_ops), IIC_fpStorem,
- "fstm${addr:submode}d${p}\t${addr:base}, $wb",
+ "vstm${addr:submode}${p}\t${addr:base}, $wb",
[]> {
let Inst{20} = 0;
}
-def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
+def VSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
variable_ops), IIC_fpStorem,
- "fstm${addr:submode}s${p}\t${addr:base}, $wb",
+ "vstm${addr:submode}${p}\t${addr:base}, $wb",
[]> {
let Inst{20} = 0;
}
@@ -114,68 +114,68 @@ def FSTMS : AXSI5<(outs), (ins addrmode5:$addr, pred:$p, reglist:$wb,
// FP Binary Operations.
//
-def FADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpALU64, "faddd", "\t$dst, $a, $b",
+def VADDD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ IIC_fpALU64, "vadd", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd DPR:$a, DPR:$b))]>;
-def FADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpALU32, "fadds", "\t$dst, $a, $b",
+def VADDS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ IIC_fpALU32, "vadd", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fadd SPR:$a, SPR:$b))]>;
// These are encoded as unary instructions.
let Defs = [FPSCR] in {
-def FCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
- IIC_fpCMP64, "fcmped", "\t$a, $b",
+def VCMPED : ADuI<0b11101011, 0b0100, 0b1100, (outs), (ins DPR:$a, DPR:$b),
+ IIC_fpCMP64, "vcmpe", ".f64\t$a, $b",
[(arm_cmpfp DPR:$a, DPR:$b)]>;
-def FCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
- IIC_fpCMP32, "fcmpes", "\t$a, $b",
+def VCMPES : ASuI<0b11101011, 0b0100, 0b1100, (outs), (ins SPR:$a, SPR:$b),
+ IIC_fpCMP32, "vcmpe", ".f32\t$a, $b",
[(arm_cmpfp SPR:$a, SPR:$b)]>;
}
-def FDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpDIV64, "fdivd", "\t$dst, $a, $b",
+def VDIVD : ADbI<0b11101000, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ IIC_fpDIV64, "vdiv", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fdiv DPR:$a, DPR:$b))]>;
-def FDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpDIV32, "fdivs", "\t$dst, $a, $b",
+def VDIVS : ASbI<0b11101000, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ IIC_fpDIV32, "vdiv", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fdiv SPR:$a, SPR:$b))]>;
-def FMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpMUL64, "fmuld", "\t$dst, $a, $b",
+def VMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ IIC_fpMUL64, "vmul", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fmul DPR:$a, DPR:$b))]>;
-def FMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpMUL32, "fmuls", "\t$dst, $a, $b",
+def VMULS : ASbIn<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ IIC_fpMUL32, "vmul", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fmul SPR:$a, SPR:$b))]>;
-
-def FNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpMUL64, "fnmuld", "\t$dst, $a, $b",
+
+def VNMULD : ADbI<0b11100010, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ IIC_fpMUL64, "vnmul", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fneg (fmul DPR:$a, DPR:$b)))]> {
let Inst{6} = 1;
}
-def FNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpMUL32, "fnmuls", "\t$dst, $a, $b",
+def VNMULS : ASbI<0b11100010, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ IIC_fpMUL32, "vnmul", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fneg (fmul SPR:$a, SPR:$b)))]> {
let Inst{6} = 1;
}
// Match reassociated forms only if not sign dependent rounding.
def : Pat<(fmul (fneg DPR:$a), DPR:$b),
- (FNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+ (VNMULD DPR:$a, DPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
def : Pat<(fmul (fneg SPR:$a), SPR:$b),
- (FNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
+ (VNMULS SPR:$a, SPR:$b)>, Requires<[NoHonorSignDependentRounding]>;
-def FSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
- IIC_fpALU64, "fsubd", "\t$dst, $a, $b",
+def VSUBD : ADbI<0b11100011, (outs DPR:$dst), (ins DPR:$a, DPR:$b),
+ IIC_fpALU64, "vsub", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub DPR:$a, DPR:$b))]> {
let Inst{6} = 1;
}
-def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
- IIC_fpALU32, "fsubs", "\t$dst, $a, $b",
+def VSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
+ IIC_fpALU32, "vsub", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fsub SPR:$a, SPR:$b))]> {
let Inst{6} = 1;
}
@@ -184,31 +184,31 @@ def FSUBS : ASbIn<0b11100011, (outs SPR:$dst), (ins SPR:$a, SPR:$b),
// FP Unary Operations.
//
-def FABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "fabsd", "\t$dst, $a",
+def VABSD : ADuI<0b11101011, 0b0000, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+ IIC_fpUNA64, "vabs", ".f64\t$dst, $a",
[(set DPR:$dst, (fabs DPR:$a))]>;
-def FABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "fabss", "\t$dst, $a",
+def VABSS : ASuIn<0b11101011, 0b0000, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+ IIC_fpUNA32, "vabs", ".f32\t$dst, $a",
[(set SPR:$dst, (fabs SPR:$a))]>;
let Defs = [FPSCR] in {
-def FCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
- IIC_fpCMP64, "fcmpezd", "\t$a",
+def VCMPEZD : ADuI<0b11101011, 0b0101, 0b1100, (outs), (ins DPR:$a),
+ IIC_fpCMP64, "vcmpe", ".f64\t$a, #0",
[(arm_cmpfp0 DPR:$a)]>;
-def FCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
- IIC_fpCMP32, "fcmpezs", "\t$a",
+def VCMPEZS : ASuI<0b11101011, 0b0101, 0b1100, (outs), (ins SPR:$a),
+ IIC_fpCMP32, "vcmpe", ".f32\t$a, #0",
[(arm_cmpfp0 SPR:$a)]>;
}
-def FCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTDS, "fcvtds", "\t$dst, $a",
+def VCVTDS : ASuI<0b11101011, 0b0111, 0b1100, (outs DPR:$dst), (ins SPR:$a),
+ IIC_fpCVTDS, "vcvt", ".f64.f32\t$dst, $a",
[(set DPR:$dst, (fextend SPR:$a))]>;
// Special case encoding: bits 11-8 is 0b1011.
-def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
- IIC_fpCVTSD, "fcvtsd", "\t$dst, $a",
+def VCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
+ IIC_fpCVTSD, "vcvt", ".f32.f64\t$dst, $a",
[(set SPR:$dst, (fround DPR:$a))]> {
let Inst{27-23} = 0b11101;
let Inst{21-16} = 0b110111;
@@ -217,52 +217,52 @@ def FCVTSD : VFPAI<(outs SPR:$dst), (ins DPR:$a), VFPUnaryFrm,
}
let neverHasSideEffects = 1 in {
-def FCPYD : ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "fcpyd", "\t$dst, $a", []>;
+def VMOVD: ADuI<0b11101011, 0b0000, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+ IIC_fpUNA64, "vmov", ".f64\t$dst, $a", []>;
-def FCPYS : ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "fcpys", "\t$dst, $a", []>;
+def VMOVS: ASuI<0b11101011, 0b0000, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+ IIC_fpUNA32, "vmov", ".f32\t$dst, $a", []>;
} // neverHasSideEffects
-def FNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpUNA64, "fnegd", "\t$dst, $a",
+def VNEGD : ADuI<0b11101011, 0b0001, 0b0100, (outs DPR:$dst), (ins DPR:$a),
+ IIC_fpUNA64, "vneg", ".f64\t$dst, $a",
[(set DPR:$dst, (fneg DPR:$a))]>;
-def FNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpUNA32, "fnegs", "\t$dst, $a",
+def VNEGS : ASuIn<0b11101011, 0b0001, 0b0100, (outs SPR:$dst), (ins SPR:$a),
+ IIC_fpUNA32, "vneg", ".f32\t$dst, $a",
[(set SPR:$dst, (fneg SPR:$a))]>;
-def FSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
- IIC_fpSQRT64, "fsqrtd", "\t$dst, $a",
+def VSQRTD : ADuI<0b11101011, 0b0001, 0b1100, (outs DPR:$dst), (ins DPR:$a),
+ IIC_fpSQRT64, "vsqrt", ".f64\t$dst, $a",
[(set DPR:$dst, (fsqrt DPR:$a))]>;
-def FSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
- IIC_fpSQRT32, "fsqrts", "\t$dst, $a",
+def VSQRTS : ASuI<0b11101011, 0b0001, 0b1100, (outs SPR:$dst), (ins SPR:$a),
+ IIC_fpSQRT32, "vsqrt", ".f32\t$dst, $a",
[(set SPR:$dst, (fsqrt SPR:$a))]>;
//===----------------------------------------------------------------------===//
// FP <-> GPR Copies. Int <-> FP Conversions.
//
-def FMRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
- IIC_VMOVSI, "fmrs", "\t$dst, $src",
+def VMOVRS : AVConv2I<0b11100001, 0b1010, (outs GPR:$dst), (ins SPR:$src),
+ IIC_VMOVSI, "vmov", "\t$dst, $src",
[(set GPR:$dst, (bitconvert SPR:$src))]>;
-def FMSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
- IIC_VMOVIS, "fmsr", "\t$dst, $src",
+def VMOVSR : AVConv4I<0b11100000, 0b1010, (outs SPR:$dst), (ins GPR:$src),
+ IIC_VMOVIS, "vmov", "\t$dst, $src",
[(set SPR:$dst, (bitconvert GPR:$src))]>;
-def FMRRD : AVConv3I<0b11000101, 0b1011,
+def VMOVRRD : AVConv3I<0b11000101, 0b1011,
(outs GPR:$wb, GPR:$dst2), (ins DPR:$src),
- IIC_VMOVDI, "fmrrd", "\t$wb, $dst2, $src",
+ IIC_VMOVDI, "vmov", "\t$wb, $dst2, $src",
[/* FIXME: Can't write pattern for multiple result instr*/]>;
// FMDHR: GPR -> SPR
// FMDLR: GPR -> SPR
-def FMDRR : AVConv5I<0b11000100, 0b1011,
+def VMOVDRR : AVConv5I<0b11000100, 0b1011,
(outs DPR:$dst), (ins GPR:$src1, GPR:$src2),
- IIC_VMOVID, "fmdrr", "\t$dst, $src1, $src2",
+ IIC_VMOVID, "vmov", "\t$dst, $src1, $src2",
[(set DPR:$dst, (arm_fmdrr GPR:$src1, GPR:$src2))]>;
// FMRDH: SPR -> GPR
@@ -277,53 +277,53 @@ def FMDRR : AVConv5I<0b11000100, 0b1011,
// Int to FP:
-def FSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTID, "fsitod", "\t$dst, $a",
+def VSITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+ IIC_fpCVTID, "vcvt", ".f64.s32\t$dst, $a",
[(set DPR:$dst, (arm_sitof SPR:$a))]> {
let Inst{7} = 1;
}
-def FSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
- IIC_fpCVTIS, "fsitos", "\t$dst, $a",
+def VSITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
+ IIC_fpCVTIS, "vcvt", ".f32.s32\t$dst, $a",
[(set SPR:$dst, (arm_sitof SPR:$a))]> {
let Inst{7} = 1;
}
-def FUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
- IIC_fpCVTID, "fuitod", "\t$dst, $a",
+def VUITOD : AVConv1I<0b11101011, 0b1000, 0b1011, (outs DPR:$dst), (ins SPR:$a),
+ IIC_fpCVTID, "vcvt", ".f64.u32\t$dst, $a",
[(set DPR:$dst, (arm_uitof SPR:$a))]>;
-def FUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
- IIC_fpCVTIS, "fuitos", "\t$dst, $a",
+def VUITOS : AVConv1In<0b11101011, 0b1000, 0b1010, (outs SPR:$dst),(ins SPR:$a),
+ IIC_fpCVTIS, "vcvt", ".f32.u32\t$dst, $a",
[(set SPR:$dst, (arm_uitof SPR:$a))]>;
// FP to Int:
// Always set Z bit in the instruction, i.e. "round towards zero" variants.
-def FTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
+def VTOSIZD : AVConv1I<0b11101011, 0b1101, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "ftosizd", "\t$dst, $a",
+ IIC_fpCVTDI, "vcvt", ".s32.f64\t$dst, $a",
[(set SPR:$dst, (arm_ftosi DPR:$a))]> {
let Inst{7} = 1; // Z bit
}
-def FTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010,
+def VTOSIZS : AVConv1In<0b11101011, 0b1101, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "ftosizs", "\t$dst, $a",
+ IIC_fpCVTSI, "vcvt", ".s32.f32\t$dst, $a",
[(set SPR:$dst, (arm_ftosi SPR:$a))]> {
let Inst{7} = 1; // Z bit
}
-def FTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
+def VTOUIZD : AVConv1I<0b11101011, 0b1100, 0b1011,
(outs SPR:$dst), (ins DPR:$a),
- IIC_fpCVTDI, "ftouizd", "\t$dst, $a",
+ IIC_fpCVTDI, "vcvt", ".u32.f64\t$dst, $a",
[(set SPR:$dst, (arm_ftoui DPR:$a))]> {
let Inst{7} = 1; // Z bit
}
-def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
+def VTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
(outs SPR:$dst), (ins SPR:$a),
- IIC_fpCVTSI, "ftouizs", "\t$dst, $a",
+ IIC_fpCVTSI, "vcvt", ".u32.f32\t$dst, $a",
[(set SPR:$dst, (arm_ftoui SPR:$a))]> {
let Inst{7} = 1; // Z bit
}
@@ -332,54 +332,54 @@ def FTOUIZS : AVConv1In<0b11101011, 0b1100, 0b1010,
// FP FMA Operations.
//
-def FMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "fmacd", "\t$dst, $a, $b",
+def VMLAD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ IIC_fpMAC64, "vmla", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def FMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "fmacs", "\t$dst, $a, $b",
+def VMLAS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ IIC_fpMAC32, "vmla", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fadd (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def FMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "fmscd", "\t$dst, $a, $b",
+def VNMLSD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ IIC_fpMAC64, "vnmls", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub (fmul DPR:$a, DPR:$b), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def FMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "fmscs", "\t$dst, $a, $b",
+def VNMLSS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ IIC_fpMAC32, "vnmls", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fsub (fmul SPR:$a, SPR:$b), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst">;
-def FNMACD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "fnmacd", "\t$dst, $a, $b",
+def VMLSD : ADbI<0b11100000, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ IIC_fpMAC64, "vmls", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fadd (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst"> {
let Inst{6} = 1;
}
-def FNMACS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "fnmacs", "\t$dst, $a, $b",
+def VMLSS : ASbIn<0b11100000, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ IIC_fpMAC32, "vmls", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fadd (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst"> {
let Inst{6} = 1;
}
def : Pat<(fsub DPR:$dstin, (fmul DPR:$a, DPR:$b)),
- (FNMACD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
+ (VMLSD DPR:$dstin, DPR:$a, DPR:$b)>, Requires<[DontUseNEONForFP]>;
def : Pat<(fsub SPR:$dstin, (fmul SPR:$a, SPR:$b)),
- (FNMACS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
+ (VMLSS SPR:$dstin, SPR:$a, SPR:$b)>, Requires<[DontUseNEONForFP]>;
-def FNMSCD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
- IIC_fpMAC64, "fnmscd", "\t$dst, $a, $b",
+def VNMLAD : ADbI<0b11100001, (outs DPR:$dst), (ins DPR:$dstin, DPR:$a, DPR:$b),
+ IIC_fpMAC64, "vnmla", ".f64\t$dst, $a, $b",
[(set DPR:$dst, (fsub (fneg (fmul DPR:$a, DPR:$b)), DPR:$dstin))]>,
RegConstraint<"$dstin = $dst"> {
let Inst{6} = 1;
}
-def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
- IIC_fpMAC32, "fnmscs", "\t$dst, $a, $b",
+def VNMLAS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
+ IIC_fpMAC32, "vnmla", ".f32\t$dst, $a, $b",
[(set SPR:$dst, (fsub (fneg (fmul SPR:$a, SPR:$b)), SPR:$dstin))]>,
RegConstraint<"$dstin = $dst"> {
let Inst{6} = 1;
@@ -389,27 +389,27 @@ def FNMSCS : ASbI<0b11100001, (outs SPR:$dst), (ins SPR:$dstin, SPR:$a, SPR:$b),
// FP Conditional moves.
//
-def FCPYDcc : ADuI<0b11101011, 0b0000, 0b0100,
+def VMOVDcc : ADuI<0b11101011, 0b0000, 0b0100,
(outs DPR:$dst), (ins DPR:$false, DPR:$true),
- IIC_fpUNA64, "fcpyd", "\t$dst, $true",
+ IIC_fpUNA64, "vmov", ".f64\t$dst, $true",
[/*(set DPR:$dst, (ARMcmov DPR:$false, DPR:$true, imm:$cc))*/]>,
RegConstraint<"$false = $dst">;
-def FCPYScc : ASuI<0b11101011, 0b0000, 0b0100,
+def VMOVScc : ASuI<0b11101011, 0b0000, 0b0100,
(outs SPR:$dst), (ins SPR:$false, SPR:$true),
- IIC_fpUNA32, "fcpys", "\t$dst, $true",
+ IIC_fpUNA32, "vmov", ".f32\t$dst, $true",
[/*(set SPR:$dst, (ARMcmov SPR:$false, SPR:$true, imm:$cc))*/]>,
RegConstraint<"$false = $dst">;
-def FNEGDcc : ADuI<0b11101011, 0b0001, 0b0100,
+def VNEGDcc : ADuI<0b11101011, 0b0001, 0b0100,
(outs DPR:$dst), (ins DPR:$false, DPR:$true),
- IIC_fpUNA64, "fnegd", "\t$dst, $true",
+ IIC_fpUNA64, "vneg", ".f64\t$dst, $true",
[/*(set DPR:$dst, (ARMcneg DPR:$false, DPR:$true, imm:$cc))*/]>,
RegConstraint<"$false = $dst">;
-def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100,
+def VNEGScc : ASuI<0b11101011, 0b0001, 0b0100,
(outs SPR:$dst), (ins SPR:$false, SPR:$true),
- IIC_fpUNA32, "fnegs", "\t$dst, $true",
+ IIC_fpUNA32, "vneg", ".f32\t$dst, $true",
[/*(set SPR:$dst, (ARMcneg SPR:$false, SPR:$true, imm:$cc))*/]>,
RegConstraint<"$false = $dst">;
@@ -418,8 +418,11 @@ def FNEGScc : ASuI<0b11101011, 0b0001, 0b0100,
// Misc.
//
+// APSR is the application level alias of CPSR. This FPSCR N, Z, C, V flags
+// to APSR.
let Defs = [CPSR], Uses = [FPSCR] in
-def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "",
+def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "vmrs",
+ "\tapsr_nzcv, fpscr",
[(arm_fmstat)]> {
let Inst{27-20} = 0b11101111;
let Inst{19-16} = 0b0001;
@@ -431,26 +434,26 @@ def FMSTAT : VFPAI<(outs), (ins), VFPMiscFrm, IIC_fpSTAT, "fmstat", "",
// Materialize FP immediates. VFP3 only.
-let isReMaterializable = 1 in
-def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
+let isReMaterializable = 1 in {
+def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
VFPMiscFrm, IIC_VMOVImm,
- "fconsts", "\t$dst, $imm",
- [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
+ "fconstd", "\t$dst, $imm",
+ [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
let Inst{27-23} = 0b11101;
let Inst{21-20} = 0b11;
let Inst{11-9} = 0b101;
- let Inst{8} = 0;
+ let Inst{8} = 1;
let Inst{7-4} = 0b0000;
}
-let isReMaterializable = 1 in
-def FCONSTD : VFPAI<(outs DPR:$dst), (ins vfp_f64imm:$imm),
+def FCONSTS : VFPAI<(outs SPR:$dst), (ins vfp_f32imm:$imm),
VFPMiscFrm, IIC_VMOVImm,
- "fconstd", "\t$dst, $imm",
- [(set DPR:$dst, vfp_f64imm:$imm)]>, Requires<[HasVFP3]> {
+ "fconsts", "\t$dst, $imm",
+ [(set SPR:$dst, vfp_f32imm:$imm)]>, Requires<[HasVFP3]> {
let Inst{27-23} = 0b11101;
let Inst{21-20} = 0b11;
let Inst{11-9} = 0b101;
- let Inst{8} = 1;
+ let Inst{8} = 0;
let Inst{7-4} = 0b0000;
}
+}
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 7e1783b..304d0ef 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -41,8 +41,8 @@ using namespace llvm;
STATISTIC(NumLDMGened , "Number of ldm instructions generated");
STATISTIC(NumSTMGened , "Number of stm instructions generated");
-STATISTIC(NumFLDMGened, "Number of fldm instructions generated");
-STATISTIC(NumFSTMGened, "Number of fstm instructions generated");
+STATISTIC(NumVLDMGened, "Number of vldm instructions generated");
+STATISTIC(NumVSTMGened, "Number of vstm instructions generated");
STATISTIC(NumLdStMoved, "Number of load / store instructions moved");
STATISTIC(NumLDRDFormed,"Number of ldrd created before allocation");
STATISTIC(NumSTRDFormed,"Number of strd created before allocation");
@@ -127,18 +127,18 @@ static int getLoadStoreMultipleOpcode(int Opcode) {
case ARM::t2STRi12:
NumSTMGened++;
return ARM::t2STM;
- case ARM::FLDS:
- NumFLDMGened++;
- return ARM::FLDMS;
- case ARM::FSTS:
- NumFSTMGened++;
- return ARM::FSTMS;
- case ARM::FLDD:
- NumFLDMGened++;
- return ARM::FLDMD;
- case ARM::FSTD:
- NumFSTMGened++;
- return ARM::FSTMD;
+ case ARM::VLDRS:
+ NumVLDMGened++;
+ return ARM::VLDMS;
+ case ARM::VSTRS:
+ NumVSTMGened++;
+ return ARM::VSTMS;
+ case ARM::VLDRD:
+ NumVLDMGened++;
+ return ARM::VLDMD;
+ case ARM::VSTRD:
+ NumVSTMGened++;
+ return ARM::VSTMD;
default: llvm_unreachable("Unhandled opcode!");
}
return 0;
@@ -229,8 +229,8 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB,
BaseKill = true; // New base is always killed right its use.
}
- bool isDPR = Opcode == ARM::FLDD || Opcode == ARM::FSTD;
- bool isDef = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ bool isDPR = Opcode == ARM::VLDRD || Opcode == ARM::VSTRD;
+ bool isDef = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
Opcode = getLoadStoreMultipleOpcode(Opcode);
MachineInstrBuilder MIB = (isAM4)
? BuildMI(MBB, MBBI, dl, TII->get(Opcode))
@@ -373,27 +373,27 @@ static inline unsigned getLSMultipleTransferSize(MachineInstr *MI) {
case ARM::t2LDRi12:
case ARM::t2STRi8:
case ARM::t2STRi12:
- case ARM::FLDS:
- case ARM::FSTS:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
return 4;
- case ARM::FLDD:
- case ARM::FSTD:
+ case ARM::VLDRD:
+ case ARM::VSTRD:
return 8;
case ARM::LDM:
case ARM::STM:
case ARM::t2LDM:
case ARM::t2STM:
return (MI->getNumOperands() - 5) * 4;
- case ARM::FLDMS:
- case ARM::FSTMS:
- case ARM::FLDMD:
- case ARM::FSTMD:
+ case ARM::VLDMS:
+ case ARM::VSTMS:
+ case ARM::VLDMD:
+ case ARM::VSTMD:
return ARM_AM::getAM5Offset(MI->getOperand(1).getImm()) * 4;
}
}
/// MergeBaseUpdateLSMultiple - Fold proceeding/trailing inc/dec of base
-/// register into the LDM/STM/FLDM{D|S}/FSTM{D|S} op when possible:
+/// register into the LDM/STM/VLDM{D|S}/VSTM{D|S} op when possible:
///
/// stmia rn, <ra, rb, rc>
/// rn := rn + 4 * 3;
@@ -475,7 +475,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
}
} else {
- // FLDM{D|S}, FSTM{D|S} addressing mode 5 ops.
+ // VLDM{D|S}, VSTM{D|S} addressing mode 5 ops.
if (ARM_AM::getAM5WBFlag(MI->getOperand(1).getImm()))
return false;
@@ -517,10 +517,10 @@ static unsigned getPreIndexedLoadStoreOpcode(unsigned Opc) {
switch (Opc) {
case ARM::LDR: return ARM::LDR_PRE;
case ARM::STR: return ARM::STR_PRE;
- case ARM::FLDS: return ARM::FLDMS;
- case ARM::FLDD: return ARM::FLDMD;
- case ARM::FSTS: return ARM::FSTMS;
- case ARM::FSTD: return ARM::FSTMD;
+ case ARM::VLDRS: return ARM::VLDMS;
+ case ARM::VLDRD: return ARM::VLDMD;
+ case ARM::VSTRS: return ARM::VSTMS;
+ case ARM::VSTRD: return ARM::VSTMD;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_PRE;
@@ -536,10 +536,10 @@ static unsigned getPostIndexedLoadStoreOpcode(unsigned Opc) {
switch (Opc) {
case ARM::LDR: return ARM::LDR_POST;
case ARM::STR: return ARM::STR_POST;
- case ARM::FLDS: return ARM::FLDMS;
- case ARM::FLDD: return ARM::FLDMD;
- case ARM::FSTS: return ARM::FSTMS;
- case ARM::FSTD: return ARM::FSTMD;
+ case ARM::VLDRS: return ARM::VLDMS;
+ case ARM::VLDRD: return ARM::VLDMD;
+ case ARM::VSTRS: return ARM::VSTMS;
+ case ARM::VSTRD: return ARM::VSTMD;
case ARM::t2LDRi8:
case ARM::t2LDRi12:
return ARM::t2LDR_POST;
@@ -564,8 +564,8 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
unsigned Bytes = getLSMultipleTransferSize(MI);
int Opcode = MI->getOpcode();
DebugLoc dl = MI->getDebugLoc();
- bool isAM5 = Opcode == ARM::FLDD || Opcode == ARM::FLDS ||
- Opcode == ARM::FSTD || Opcode == ARM::FSTS;
+ bool isAM5 = Opcode == ARM::VLDRD || Opcode == ARM::VLDRS ||
+ Opcode == ARM::VSTRD || Opcode == ARM::VSTRS;
bool isAM2 = Opcode == ARM::LDR || Opcode == ARM::STR;
if (isAM2 && ARM_AM::getAM2Offset(MI->getOperand(3).getImm()) != 0)
return false;
@@ -575,7 +575,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (MI->getOperand(2).getImm() != 0)
return false;
- bool isLd = isi32Load(Opcode) || Opcode == ARM::FLDS || Opcode == ARM::FLDD;
+ bool isLd = isi32Load(Opcode) || Opcode == ARM::VLDRS || Opcode == ARM::VLDRD;
// Can't do the merge if the destination register is the same as the would-be
// writeback register.
if (isLd && MI->getOperand(0).getReg() == Base)
@@ -626,7 +626,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
if (!DoMerge)
return false;
- bool isDPR = NewOpc == ARM::FLDMD || NewOpc == ARM::FSTMD;
+ bool isDPR = NewOpc == ARM::VLDMD || NewOpc == ARM::VSTMD;
unsigned Offset = 0;
if (isAM5)
Offset = ARM_AM::getAM5Opc((AddSub == ARM_AM::sub)
@@ -638,7 +638,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
Offset = AddSub == ARM_AM::sub ? -Bytes : Bytes;
if (isLd) {
if (isAM5)
- // FLDMS, FLDMD
+ // VLDMS, VLDMD
BuildMI(MBB, MBBI, dl, TII->get(NewOpc))
.addReg(Base, getKillRegState(BaseKill))
.addImm(Offset).addImm(Pred).addReg(PredReg)
@@ -657,7 +657,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
} else {
MachineOperand &MO = MI->getOperand(0);
if (isAM5)
- // FSTMS, FSTMD
+ // VSTMS, VSTMD
BuildMI(MBB, MBBI, dl, TII->get(NewOpc)).addReg(Base).addImm(Offset)
.addImm(Pred).addReg(PredReg)
.addReg(Base, getDefRegState(true)) // WB base register
@@ -687,11 +687,11 @@ static bool isMemoryOp(const MachineInstr *MI) {
case ARM::LDR:
case ARM::STR:
return MI->getOperand(1).isReg() && MI->getOperand(2).getReg() == 0;
- case ARM::FLDS:
- case ARM::FSTS:
+ case ARM::VLDRS:
+ case ARM::VSTRS:
return MI->getOperand(1).isReg();
- case ARM::FLDD:
- case ARM::FSTD:
+ case ARM::VLDRD:
+ case ARM::VSTRD:
return MI->getOperand(1).isReg();
case ARM::t2LDRi8:
case ARM::t2LDRi12:
@@ -866,6 +866,13 @@ bool ARMLoadStoreOpt::FixInvalidRegPairOp(MachineBasicBlock &MBB,
BaseReg, BaseKill, BaseUndef, OffReg, OffKill, OffUndef,
Pred, PredReg, TII, isT2);
} else {
+ if (OddReg == EvenReg && EvenDeadKill) {
+ // If the two source operands are the same, the kill marker is probably
+ // on the first one. e.g.
+ // t2STRDi8 %R5<kill>, %R5, %R9<kill>, 0, 14, %reg0
+ EvenDeadKill = false;
+ OddDeadKill = true;
+ }
InsertLDR_STR(MBB, MBBI, OffImm, isLd, dl, NewOpc,
EvenReg, EvenDeadKill, EvenUndef,
BaseReg, false, BaseUndef, OffReg, false, OffUndef,
@@ -1214,7 +1221,7 @@ ARMPreAllocLoadStoreOpt::CanFormLdStDWord(MachineInstr *Op0, MachineInstr *Op1,
if (!STI->hasV5TEOps())
return false;
- // FIXME: FLDS / FSTS -> FLDD / FSTD
+ // FIXME: VLDRS / VSTRS -> VLDRD / VSTRD
unsigned Scale = 1;
unsigned Opcode = Op0->getOpcode();
if (Opcode == ARM::LDR)
@@ -1456,7 +1463,7 @@ ARMPreAllocLoadStoreOpt::RescheduleLoadStoreInstrs(MachineBasicBlock *MBB) {
continue;
int Opc = MI->getOpcode();
- bool isLd = isi32Load(Opc) || Opc == ARM::FLDS || Opc == ARM::FLDD;
+ bool isLd = isi32Load(Opc) || Opc == ARM::VLDRS || Opc == ARM::VLDRD;
unsigned Base = MI->getOperand(1).getReg();
int Offset = getMemoryOpOffset(MI);
diff --git a/lib/Target/ARM/ARMSubtarget.cpp b/lib/Target/ARM/ARMSubtarget.cpp
index 5af95c3..432ed78 100644
--- a/lib/Target/ARM/ARMSubtarget.cpp
+++ b/lib/Target/ARM/ARMSubtarget.cpp
@@ -16,6 +16,7 @@
#include "llvm/GlobalValue.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Support/CommandLine.h"
+#include "llvm/ADT/SmallVector.h"
using namespace llvm;
static cl::opt<bool>
@@ -108,6 +109,8 @@ ARMSubtarget::ARMSubtarget(const std::string &TT, const std::string &FS,
if (UseNEONFP.getPosition() == 0)
UseNEONForSinglePrecisionFP = true;
}
+ HasBranchTargetBuffer = (CPUString == "cortex-a8" ||
+ CPUString == "cortex-a9");
}
/// GVIsIndirectSymbol - true if the GV will be accessed via an indirect symbol.
@@ -159,3 +162,13 @@ ARMSubtarget::GVIsIndirectSymbol(GlobalValue *GV, Reloc::Model RelocM) const {
return false;
}
+
+bool ARMSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ CriticalPathRCs.push_back(&ARM::GPRRegClass);
+ return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
+}
diff --git a/lib/Target/ARM/ARMSubtarget.h b/lib/Target/ARM/ARMSubtarget.h
index e721a7f..3d0e01e 100644
--- a/lib/Target/ARM/ARMSubtarget.h
+++ b/lib/Target/ARM/ARMSubtarget.h
@@ -17,6 +17,7 @@
#include "llvm/Target/TargetInstrItineraries.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetSubtarget.h"
+#include "ARMBaseRegisterInfo.h"
#include <string>
namespace llvm {
@@ -49,6 +50,9 @@ protected:
/// determine if NEON should actually be used.
bool UseNEONForSinglePrecisionFP;
+ /// HasBranchTargetBuffer - True if processor can predict indirect branches.
+ bool HasBranchTargetBuffer;
+
/// IsThumb - True if we are in thumb mode, false if in ARM mode.
bool IsThumb;
@@ -122,17 +126,16 @@ protected:
bool isThumb2() const { return IsThumb && (ThumbMode == Thumb2); }
bool hasThumb2() const { return ThumbMode >= Thumb2; }
+ bool hasBranchTargetBuffer() const { return HasBranchTargetBuffer; }
+
bool isR9Reserved() const { return IsR9Reserved; }
const std::string & getCPUString() const { return CPUString; }
- /// enablePostRAScheduler - True at 'More' optimization except
- /// for Thumb1.
+ /// enablePostRAScheduler - True at 'More' optimization.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtarget::AntiDepBreakMode& mode) const {
- mode = TargetSubtarget::ANTIDEP_CRITICAL;
- return PostRAScheduler && OptLevel >= CodeGenOpt::Default;
- }
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
/// getInstrItins - Return the instruction itineraies based on subtarget
/// selection.
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index b4ce1d7..2564ed9 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -21,8 +21,7 @@
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-static const MCAsmInfo *createMCAsmInfo(const Target &T,
- const StringRef &TT) {
+static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
case Triple::Darwin:
@@ -61,8 +60,8 @@ ARMTargetMachine::ARMTargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: ARMBaseTargetMachine(T, TT, FS, false), InstrInfo(Subtarget),
DataLayout(Subtarget.isAPCS_ABI() ?
- std::string("e-p:32:32-f64:32:32-i64:32:32") :
- std::string("e-p:32:32-f64:64:64-i64:64:64")),
+ std::string("e-p:32:32-f64:32:32-i64:32:32-n32") :
+ std::string("e-p:32:32-f64:64:64-i64:64:64-n32")),
TLInfo(*this) {
}
@@ -74,9 +73,9 @@ ThumbTargetMachine::ThumbTargetMachine(const Target &T, const std::string &TT,
: ((ARMBaseInstrInfo*)new Thumb1InstrInfo(Subtarget))),
DataLayout(Subtarget.isAPCS_ABI() ?
std::string("e-p:32:32-f64:32:32-i64:32:32-"
- "i16:16:32-i8:8:32-i1:8:32-a:0:32") :
+ "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32") :
std::string("e-p:32:32-f64:64:64-i64:64:64-"
- "i16:16:32-i8:8:32-i1:8:32-a:0:32")),
+ "i16:16:32-i8:8:32-i1:8:32-a:0:32-n32")),
TLInfo(*this) {
}
@@ -94,6 +93,10 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
if (Subtarget.hasNEON())
PM.add(createNEONPreAllocPass());
+ // Calculate and set max stack object alignment early, so we can decide
+ // whether we will need stack realignment (and thus FP).
+ PM.add(createARMMaxStackAlignmentCalculatorPass());
+
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass(true));
@@ -106,6 +109,10 @@ bool ARMBaseTargetMachine::addPreSched2(PassManagerBase &PM,
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
PM.add(createARMLoadStoreOptimizationPass());
+ // Expand some pseudo instructions into multiple instructions to allow
+ // proper scheduling.
+ PM.add(createARMExpandPseudoPass());
+
return true;
}
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 6cb3e9e4..0352503 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -43,6 +43,7 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
#include "llvm/ADT/StringSet.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -138,6 +139,19 @@ namespace {
void printVFPf32ImmOperand(const MachineInstr *MI, int OpNum);
void printVFPf64ImmOperand(const MachineInstr *MI, int OpNum);
+ void printHex8ImmOperand(const MachineInstr *MI, int OpNum) {
+ O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xff);
+ }
+ void printHex16ImmOperand(const MachineInstr *MI, int OpNum) {
+ O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffff);
+ }
+ void printHex32ImmOperand(const MachineInstr *MI, int OpNum) {
+ O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm() & 0xffffffff);
+ }
+ void printHex64ImmOperand(const MachineInstr *MI, int OpNum) {
+ O << "#0x" << utohexstr(MI->getOperand(OpNum).getImm());
+ }
+
virtual bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
unsigned AsmVariant, const char *ExtraCode);
virtual bool PrintAsmMemoryOperand(const MachineInstr *MI, unsigned OpNum,
@@ -199,7 +213,7 @@ namespace {
if (ACPV->hasModifier()) O << "(" << ACPV->getModifier() << ")";
if (ACPV->getPCAdjustment() != 0) {
O << "-(" << MAI->getPrivateGlobalPrefix() << "PC"
- << ACPV->getLabelId()
+ << getFunctionNumber() << "_" << ACPV->getLabelId()
<< "+" << (unsigned)ACPV->getPCAdjustment();
if (ACPV->mustAddCurrentAddress())
O << "-.";
@@ -333,6 +347,7 @@ void ARMAsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
&ARM::DPR_VFP2RegClass);
O << getRegisterName(DReg) << '[' << (RegNum & 1) << ']';
} else {
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
O << getRegisterName(Reg);
}
break;
@@ -594,12 +609,7 @@ void ARMAsmPrinter::printAddrMode5Operand(const MachineInstr *MI, int Op,
if (Modifier && strcmp(Modifier, "submode") == 0) {
ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- if (MO1.getReg() == ARM::SP) {
- bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
- MI->getOpcode() == ARM::FLDMS);
- O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
- } else
- O << ARM_AM::getAMSubModeStr(Mode);
+ O << ARM_AM::getAMSubModeStr(Mode);
return;
} else if (Modifier && strcmp(Modifier, "base") == 0) {
// Used for FSTM{D|S} and LSTM{D|S} operations.
@@ -623,9 +633,14 @@ void ARMAsmPrinter::printAddrMode6Operand(const MachineInstr *MI, int Op) {
const MachineOperand &MO1 = MI->getOperand(Op);
const MachineOperand &MO2 = MI->getOperand(Op+1);
const MachineOperand &MO3 = MI->getOperand(Op+2);
+ const MachineOperand &MO4 = MI->getOperand(Op+3);
- // FIXME: No support yet for specifying alignment.
- O << "[" << getRegisterName(MO1.getReg()) << "]";
+ O << "[" << getRegisterName(MO1.getReg());
+ if (MO4.getImm()) {
+ // FIXME: Both darwin as and GNU as violate ARM docs here.
+ O << ", :" << MO4.getImm();
+ }
+ O << "]";
if (ARM_AM::getAM6WBFlag(MO3.getImm())) {
if (MO2.getReg() == 0)
@@ -697,11 +712,8 @@ ARMAsmPrinter::printThumbAddrModeRI5Operand(const MachineInstr *MI, int Op,
O << "[" << getRegisterName(MO1.getReg());
if (MO3.getReg())
O << ", " << getRegisterName(MO3.getReg());
- else if (unsigned ImmOffs = MO2.getImm()) {
- O << ", #" << ImmOffs;
- if (Scale > 1)
- O << " * " << Scale;
- }
+ else if (unsigned ImmOffs = MO2.getImm())
+ O << ", #" << ImmOffs * Scale;
O << "]";
}
@@ -844,7 +856,8 @@ void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){
void ARMAsmPrinter::printPCLabel(const MachineInstr *MI, int OpNum) {
int Id = (int)MI->getOperand(OpNum).getImm();
- O << MAI->getPrivateGlobalPrefix() << "PC" << Id;
+ O << MAI->getPrivateGlobalPrefix()
+ << "PC" << getFunctionNumber() << "_" << Id;
}
void ARMAsmPrinter::printRegisterList(const MachineInstr *MI, int OpNum) {
@@ -1070,7 +1083,7 @@ void ARMAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
printInstruction(MI);
}
- if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*MI);
O << '\n';
processDebugLoc(MI, false);
@@ -1107,9 +1120,8 @@ void ARMAsmPrinter::EmitStartOfAsmFile(Module &M) {
}
}
- // Use unified assembler syntax mode for Thumb.
- if (Subtarget->isThumb())
- O << "\t.syntax unified\n";
+ // Use unified assembler syntax.
+ O << "\t.syntax unified\n";
// Emit ARM Build Attributes
if (Subtarget->isTargetELF()) {
@@ -1349,7 +1361,6 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
printKill(MI);
return;
case TargetInstrInfo::INLINEASM:
- O << '\t';
printInlineAsm(MI);
return;
case TargetInstrInfo::IMPLICIT_DEF:
@@ -1365,7 +1376,8 @@ void ARMAsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
// FIXME: MOVE TO SHARED PLACE.
unsigned Id = (unsigned)MI->getOperand(2).getImm();
const char *Prefix = MAI->getPrivateGlobalPrefix();
- MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)+"PC"+Twine(Id));
+ MCSymbol *Label =OutContext.GetOrCreateSymbol(Twine(Prefix)
+ + "PC" + Twine(getFunctionNumber()) + "_" + Twine(Id));
OutStreamer.EmitLabel(Label);
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index f422798..0047925 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -259,12 +259,7 @@ void ARMInstPrinter::printAddrMode5Operand(const MCInst *MI, unsigned OpNum,
if (Modifier && strcmp(Modifier, "submode") == 0) {
ARM_AM::AMSubMode Mode = ARM_AM::getAM5SubMode(MO2.getImm());
- if (MO1.getReg() == ARM::SP) {
- bool isFLDM = (MI->getOpcode() == ARM::FLDMD ||
- MI->getOpcode() == ARM::FLDMS);
- O << ARM_AM::getAMSubModeAltStr(Mode, isFLDM);
- } else
- O << ARM_AM::getAMSubModeStr(Mode);
+ O << ARM_AM::getAMSubModeStr(Mode);
return;
} else if (Modifier && strcmp(Modifier, "base") == 0) {
// Used for FSTM{D|S} and LSTM{D|S} operations.
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 5bf966b..9e7f8d5 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -80,6 +80,10 @@ public:
void printNoHashImmediate(const MCInst *MI, unsigned OpNum);
void printVFPf32ImmOperand(const MCInst *MI, int OpNum) {}
void printVFPf64ImmOperand(const MCInst *MI, int OpNum) {}
+ void printHex8ImmOperand(const MCInst *MI, int OpNum) {}
+ void printHex16ImmOperand(const MCInst *MI, int OpNum) {}
+ void printHex32ImmOperand(const MCInst *MI, int OpNum) {}
+ void printHex64ImmOperand(const MCInst *MI, int OpNum) {}
void printPCLabel(const MCInst *MI, unsigned OpNum);
// FIXME: Implement.
diff --git a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
index 8686961..c49fee3 100644
--- a/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMMCInstLower.cpp
@@ -137,6 +137,7 @@ void ARMMCInstLower::Lower(const MachineInstr *MI, MCInst &OutMI) const {
case MachineOperand::MO_Register:
// Ignore all implicit register operands.
if (MO.isImplicit()) continue;
+ assert(!MO.getSubReg() && "Subregs should be eliminated!");
MCOp = MCOperand::CreateReg(MO.getReg());
break;
case MachineOperand::MO_Immediate:
diff --git a/lib/Target/ARM/CMakeLists.txt b/lib/Target/ARM/CMakeLists.txt
index e071b61..964551f 100644
--- a/lib/Target/ARM/CMakeLists.txt
+++ b/lib/Target/ARM/CMakeLists.txt
@@ -17,6 +17,7 @@ add_llvm_target(ARMCodeGen
ARMCodeEmitter.cpp
ARMConstantIslandPass.cpp
ARMConstantPoolValue.cpp
+ ARMExpandPseudoInsts.cpp
ARMISelDAGToDAG.cpp
ARMISelLowering.cpp
ARMInstrInfo.cpp
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index f307e3b..7d767ec 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -54,10 +54,10 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
NextMII = next(MII);
MachineInstr *MI = &*MII;
- if (MI->getOpcode() == ARM::FCPYD &&
+ if (MI->getOpcode() == ARM::VMOVD &&
!TII->isPredicated(MI)) {
unsigned SrcReg = MI->getOperand(1).getReg();
- // If we do not found an instruction defining the reg, this means the
+ // If we do not find an instruction defining the reg, this means the
// register should be live-in for this BB. It's always to better to use
// NEON reg-reg moves.
unsigned Domain = ARMII::DomainNEON;
@@ -71,7 +71,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
}
if (Domain & ARMII::DomainNEON) {
- // Convert FCPYD to VMOVD.
+ // Convert VMOVD to VMOVDneon
unsigned DestReg = MI->getOperand(0).getReg();
DEBUG({errs() << "vmov convert: "; MI->dump();});
@@ -82,7 +82,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
// - The imp-defs / imp-uses are superregs only, we don't care about
// them.
BuildMI(MBB, *MI, MI->getDebugLoc(),
- TII->get(ARM::VMOVD), DestReg).addReg(SrcReg);
+ TII->get(ARM::VMOVDneon), DestReg).addReg(SrcReg);
MBB.erase(MI);
MachineBasicBlock::iterator I = prior(NextMII);
MI = &*I;
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index 8b2bcd0..206677b 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -177,20 +177,20 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST2LNd8:
case ARM::VST2LNd16:
case ARM::VST2LNd32:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 2;
return true;
case ARM::VST2q8:
case ARM::VST2q16:
case ARM::VST2q32:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 4;
return true;
case ARM::VST2LNq16a:
case ARM::VST2LNq32a:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 2;
Offset = 0;
Stride = 2;
@@ -198,7 +198,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST2LNq16b:
case ARM::VST2LNq32b:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 2;
Offset = 1;
Stride = 2;
@@ -211,14 +211,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3LNd8:
case ARM::VST3LNd16:
case ARM::VST3LNd32:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 3;
return true;
case ARM::VST3q8a:
case ARM::VST3q16a:
case ARM::VST3q32a:
- FirstOpnd = 4;
+ FirstOpnd = 5;
NumRegs = 3;
Offset = 0;
Stride = 2;
@@ -227,7 +227,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3q8b:
case ARM::VST3q16b:
case ARM::VST3q32b:
- FirstOpnd = 4;
+ FirstOpnd = 5;
NumRegs = 3;
Offset = 1;
Stride = 2;
@@ -235,7 +235,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3LNq16a:
case ARM::VST3LNq32a:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 3;
Offset = 0;
Stride = 2;
@@ -243,7 +243,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST3LNq16b:
case ARM::VST3LNq32b:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 3;
Offset = 1;
Stride = 2;
@@ -256,14 +256,14 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4LNd8:
case ARM::VST4LNd16:
case ARM::VST4LNd32:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 4;
return true;
case ARM::VST4q8a:
case ARM::VST4q16a:
case ARM::VST4q32a:
- FirstOpnd = 4;
+ FirstOpnd = 5;
NumRegs = 4;
Offset = 0;
Stride = 2;
@@ -272,7 +272,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4q8b:
case ARM::VST4q16b:
case ARM::VST4q32b:
- FirstOpnd = 4;
+ FirstOpnd = 5;
NumRegs = 4;
Offset = 1;
Stride = 2;
@@ -280,7 +280,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4LNq16a:
case ARM::VST4LNq32a:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 4;
Offset = 0;
Stride = 2;
@@ -288,7 +288,7 @@ static bool isNEONMultiRegOp(int Opcode, unsigned &FirstOpnd, unsigned &NumRegs,
case ARM::VST4LNq16b:
case ARM::VST4LNq32b:
- FirstOpnd = 3;
+ FirstOpnd = 4;
NumRegs = 4;
Offset = 1;
Stride = 2;
diff --git a/lib/Target/ARM/README-Thumb.txt b/lib/Target/ARM/README-Thumb.txt
index e7770b2..6b605bb 100644
--- a/lib/Target/ARM/README-Thumb.txt
+++ b/lib/Target/ARM/README-Thumb.txt
@@ -37,7 +37,7 @@ LPCRELL0:
mov r1, #PCRELV0
add r1, pc
ldr r0, [r0, r1]
- cpy pc, r0
+ mov pc, r0
.align 2
LJTI1_0_0:
.long LBB1_3
@@ -51,7 +51,7 @@ We should be able to generate:
LPCRELL0:
add r1, LJTI1_0_0
ldr r0, [r0, r1]
- cpy pc, r0
+ mov pc, r0
.align 2
LJTI1_0_0:
.long LBB1_3
@@ -206,8 +206,8 @@ LPC0:
add r5, pc
ldr r6, LCPI1_1
ldr r2, LCPI1_2
- cpy r3, r6
- cpy lr, pc
+ mov r3, r6
+ mov lr, pc
bx r5
//===---------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index fb64d9f..11c48ad 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -321,7 +321,7 @@ time.
4) Once we added support for multiple result patterns, write indexed loads
patterns instead of C++ instruction selection code.
-5) Use FLDM / FSTM to emulate indexed FP load / store.
+5) Use VLDM / VSTM to emulate indexed FP load / store.
//===---------------------------------------------------------------------===//
@@ -591,3 +591,8 @@ http://lists.cs.uiuc.edu/pipermail/llvmdev/2009-June/022763.html
//===---------------------------------------------------------------------===//
Make use of the "rbit" instruction.
+
+//===---------------------------------------------------------------------===//
+
+Take a look at test/CodeGen/Thumb2/machine-licm.ll. ARM should be taught how
+to licm and cse the unnecessary load from cp#1.
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index b6dd56c..7602b6d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information --------*- C++ -*-===//
+//===- Thumb1InstrInfo.cpp - Thumb-1 Instruction Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,7 +11,7 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMInstrInfo.h"
+#include "Thumb1InstrInfo.h"
#include "ARM.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index 13cc578..b28229d 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ----------*- C++ -*-===//
+//===- Thumb1InstrInfo.h - Thumb-1 Instruction Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 5aaaf9c..37adf37 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information -------*- C++ -*-===//
+//===- Thumb1RegisterInfo.cpp - Thumb-1 Register Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the Thumb-1 implementation of the TargetRegisterInfo class.
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
//
//===----------------------------------------------------------------------===//
@@ -794,7 +795,7 @@ void Thumb1RegisterInfo::emitEpilogue(MachineFunction &MF,
if (NumBytes != 0)
emitSPUpdate(MBB, MBBI, TII, dl, *this, NumBytes);
} else {
- // Unwind MBBI to point to first LDR / FLDD.
+ // Unwind MBBI to point to first LDR / VLDRD.
const unsigned *CSRegs = getCalleeSavedRegs();
if (MBBI != MBB.begin()) {
do
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.h b/lib/Target/ARM/Thumb1RegisterInfo.h
index 241f1cc..37ad388 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.h
+++ b/lib/Target/ARM/Thumb1RegisterInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl ----*- C++ -*-===//
+//===- Thumb1RegisterInfo.h - Thumb-1 Register Information Impl -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the Thumb-1 implementation of the TargetRegisterInfo class.
+// This file contains the Thumb-1 implementation of the TargetRegisterInfo
+// class.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/ARM/Thumb2ITBlockPass.cpp b/lib/Target/ARM/Thumb2ITBlockPass.cpp
index 462844b..f5ba155 100644
--- a/lib/Target/ARM/Thumb2ITBlockPass.cpp
+++ b/lib/Target/ARM/Thumb2ITBlockPass.cpp
@@ -1,4 +1,4 @@
-//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks -----------*- C++ -*-===//
+//===-- Thumb2ITBlockPass.cpp - Insert Thumb IT blocks ----------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -34,10 +34,6 @@ namespace {
}
private:
- MachineBasicBlock::iterator
- SplitT2MOV32imm(MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI,
- MachineInstr *MI, DebugLoc dl,
- unsigned PredReg, ARMCC::CondCodes CC);
bool InsertITBlocks(MachineBasicBlock &MBB);
};
char Thumb2ITBlockPass::ID = 0;
@@ -50,34 +46,6 @@ static ARMCC::CondCodes getPredicate(const MachineInstr *MI, unsigned &PredReg){
return llvm::getInstrPredicate(MI, PredReg);
}
-MachineBasicBlock::iterator
-Thumb2ITBlockPass::SplitT2MOV32imm(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator MBBI,
- MachineInstr *MI,
- DebugLoc dl, unsigned PredReg,
- ARMCC::CondCodes CC) {
- // Splitting t2MOVi32imm into a pair of t2MOVi16 + t2MOVTi16 here.
- // The only reason it was a single instruction was so it could be
- // re-materialized. We want to split it before this and the thumb2
- // size reduction pass to make sure the IT mask is correct and expose
- // width reduction opportunities. It doesn't make sense to do this in a
- // separate pass so here it is.
- unsigned DstReg = MI->getOperand(0).getReg();
- bool DstDead = MI->getOperand(0).isDead(); // Is this possible?
- unsigned Imm = MI->getOperand(1).getImm();
- unsigned Lo16 = Imm & 0xffff;
- unsigned Hi16 = (Imm >> 16) & 0xffff;
- BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVi16), DstReg)
- .addImm(Lo16).addImm(CC).addReg(PredReg);
- BuildMI(MBB, MBBI, dl, TII->get(ARM::t2MOVTi16))
- .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstDead))
- .addReg(DstReg).addImm(Hi16).addImm(CC).addReg(PredReg);
- --MBBI;
- --MBBI;
- MI->eraseFromParent();
- return MBBI;
-}
-
bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
bool Modified = false;
@@ -88,11 +56,6 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
unsigned PredReg = 0;
ARMCC::CondCodes CC = getPredicate(MI, PredReg);
- if (MI->getOpcode() == ARM::t2MOVi32imm) {
- MBBI = SplitT2MOV32imm(MBB, MBBI, MI, dl, PredReg, CC);
- continue;
- }
-
if (CC == ARMCC::AL) {
++MBBI;
continue;
@@ -115,11 +78,6 @@ bool Thumb2ITBlockPass::InsertITBlocks(MachineBasicBlock &MBB) {
DebugLoc ndl = NMI->getDebugLoc();
unsigned NPredReg = 0;
ARMCC::CondCodes NCC = getPredicate(NMI, NPredReg);
- if (NMI->getOpcode() == ARM::t2MOVi32imm) {
- MBBI = SplitT2MOV32imm(MBB, MBBI, NMI, ndl, NPredReg, NCC);
- continue;
- }
-
if (NCC == OCC) {
Mask |= (1 << Pos);
} else if (NCC != CC)
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 21fff51..16c1e6f 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information --------*- C++ -*-===//
+//===- Thumb2InstrInfo.cpp - Thumb-2 Instruction Information ----*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -11,8 +11,9 @@
//
//===----------------------------------------------------------------------===//
-#include "ARMInstrInfo.h"
+#include "Thumb2InstrInfo.h"
#include "ARM.h"
+#include "ARMConstantPoolValue.h"
#include "ARMAddressingModes.h"
#include "ARMGenInstrInfo.inc"
#include "ARMMachineFunctionInfo.h"
@@ -132,7 +133,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
ARMBaseInstrInfo::loadRegFromStackSlot(MBB, I, DestReg, FI, RC);
}
-
void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB,
MachineBasicBlock::iterator &MBBI, DebugLoc dl,
unsigned DestReg, unsigned BaseReg, int NumBytes,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index f3688c0..663a60b 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ----------*- C++ -*-===//
+//===- Thumb2InstrInfo.h - Thumb-2 Instruction Information ------*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
diff --git a/lib/Target/ARM/Thumb2RegisterInfo.h b/lib/Target/ARM/Thumb2RegisterInfo.h
index a295630..b3cf2e5 100644
--- a/lib/Target/ARM/Thumb2RegisterInfo.h
+++ b/lib/Target/ARM/Thumb2RegisterInfo.h
@@ -1,4 +1,4 @@
-//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl ----*- C++ -*-===//
+//===- Thumb2RegisterInfo.h - Thumb-2 Register Information Impl -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -7,7 +7,8 @@
//
//===----------------------------------------------------------------------===//
//
-// This file contains the Thumb-2 implementation of the TargetRegisterInfo class.
+// This file contains the Thumb-2 implementation of the TargetRegisterInfo
+// class.
//
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index e3587fb..5b0a89d 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -225,8 +225,6 @@ SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
/// InstructionSelect - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
void AlphaDAGToDAGISel::InstructionSelect() {
- DEBUG(BB->dump());
-
// Select target instructions for the DAG.
SelectRoot(*CurDAG);
CurDAG->RemoveDeadNodes();
diff --git a/lib/Target/Alpha/AlphaISelLowering.cpp b/lib/Target/Alpha/AlphaISelLowering.cpp
index cb03a6f..9217522 100644
--- a/lib/Target/Alpha/AlphaISelLowering.cpp
+++ b/lib/Target/Alpha/AlphaISelLowering.cpp
@@ -426,7 +426,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
}
} else { //more args
// Create the frame index object for this incoming parameter...
- int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6));
+ int FI = MFI->CreateFixedObject(8, 8 * (ArgNo - 6), true, false);
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
@@ -444,7 +444,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
if (TargetRegisterInfo::isPhysicalRegister(args_int[i]))
args_int[i] = AddLiveIn(MF, args_int[i], &Alpha::GPRCRegClass);
SDValue argt = DAG.getCopyFromReg(Chain, dl, args_int[i], MVT::i64);
- int FI = MFI->CreateFixedObject(8, -8 * (6 - i));
+ int FI = MFI->CreateFixedObject(8, -8 * (6 - i), true, false);
if (i == 0) VarArgsBase = FI;
SDValue SDFI = DAG.getFrameIndex(FI, MVT::i64);
LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
@@ -452,7 +452,7 @@ AlphaTargetLowering::LowerFormalArguments(SDValue Chain,
if (TargetRegisterInfo::isPhysicalRegister(args_float[i]))
args_float[i] = AddLiveIn(MF, args_float[i], &Alpha::F8RCRegClass);
argt = DAG.getCopyFromReg(Chain, dl, args_float[i], MVT::f64);
- FI = MFI->CreateFixedObject(8, - 8 * (12 - i));
+ FI = MFI->CreateFixedObject(8, - 8 * (12 - i), true, false);
SDFI = DAG.getFrameIndex(FI, MVT::i64);
LS.push_back(DAG.getStore(Chain, dl, argt, SDFI, NULL, 0));
}
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 81e1fb7..8917e86 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -391,7 +391,7 @@ def : Pat<(setune GPRC:$X, GPRC:$Y), (CMPEQi (CMPEQ GPRC:$X, GPRC:$Y), 0)>;
def : Pat<(setune GPRC:$X, immUExt8:$Y), (CMPEQi (CMPEQ GPRC:$X, immUExt8:$Y), 0)>;
-let isReturn = 1, isTerminator = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
+let isReturn = 1, isTerminator = 1, isBarrier = 1, Ra = 31, Rb = 26, disp = 1, Uses = [R26] in {
def RETDAG : MbrForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", s_jsr>; //Return from subroutine
def RETDAGp : MbrpForm< 0x1A, 0x02, (ops), "ret $$31,($$26),1", [(retflag)], s_jsr>; //Return from subroutine
}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.cpp b/lib/Target/Alpha/AlphaRegisterInfo.cpp
index 98e9730..64bdd62 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.cpp
+++ b/lib/Target/Alpha/AlphaRegisterInfo.cpp
@@ -314,7 +314,7 @@ unsigned AlphaRegisterInfo::getRARegister() const {
return 0;
}
-unsigned AlphaRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned AlphaRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return hasFP(MF) ? Alpha::R15 : Alpha::R30;
}
diff --git a/lib/Target/Alpha/AlphaRegisterInfo.h b/lib/Target/Alpha/AlphaRegisterInfo.h
index 66f0898..a971e21 100644
--- a/lib/Target/Alpha/AlphaRegisterInfo.h
+++ b/lib/Target/Alpha/AlphaRegisterInfo.h
@@ -52,7 +52,7 @@ struct AlphaRegisterInfo : public AlphaGenRegisterInfo {
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/Alpha/AlphaTargetMachine.cpp b/lib/Target/Alpha/AlphaTargetMachine.cpp
index b8bc13b..d0d5a43 100644
--- a/lib/Target/Alpha/AlphaTargetMachine.cpp
+++ b/lib/Target/Alpha/AlphaTargetMachine.cpp
@@ -28,7 +28,7 @@ extern "C" void LLVMInitializeAlphaTarget() {
AlphaTargetMachine::AlphaTargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: LLVMTargetMachine(T, TT),
- DataLayout("e-f128:128:128"),
+ DataLayout("e-f128:128:128-n64"),
FrameInfo(TargetFrameInfo::StackGrowsDown, 16, 0),
JITInfo(*this),
Subtarget(TT, FS),
diff --git a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
index 209a5bf..338057b 100644
--- a/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
+++ b/lib/Target/Alpha/AsmPrinter/AlphaAsmPrinter.cpp
@@ -178,7 +178,7 @@ bool AlphaAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
processDebugLoc(II, true);
printInstruction(II);
- if (VerboseAsm && !II->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*II);
O << '\n';
processDebugLoc(II, false);
diff --git a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
index 1900d00..917f7f5 100644
--- a/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
+++ b/lib/Target/Blackfin/AsmPrinter/BlackfinAsmPrinter.cpp
@@ -31,8 +31,8 @@
#include "llvm/Target/TargetRegistry.h"
#include "llvm/Support/Mangler.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
-
using namespace llvm;
STATISTIC(EmittedInsts, "Number of machine instrs printed");
@@ -143,7 +143,7 @@ bool BlackfinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
processDebugLoc(II, true);
printInstruction(II);
- if (VerboseAsm && !II->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*II);
O << '\n';
diff --git a/lib/Target/Blackfin/BlackfinISelLowering.cpp b/lib/Target/Blackfin/BlackfinISelLowering.cpp
index 4b321ec..c5c96f8 100644
--- a/lib/Target/Blackfin/BlackfinISelLowering.cpp
+++ b/lib/Target/Blackfin/BlackfinISelLowering.cpp
@@ -25,7 +25,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/ADT/VectorExtras.h"
#include "llvm/Support/Debug.h"
-
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -207,7 +207,8 @@ BlackfinTargetLowering::LowerFormalArguments(SDValue Chain,
} else {
assert(VA.isMemLoc() && "CCValAssign must be RegLoc or MemLoc");
unsigned ObjSize = VA.getLocVT().getStoreSize();
- int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset());
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(),
+ true, false);
SDValue FIN = DAG.getFrameIndex(FI, MVT::i32);
InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0));
}
diff --git a/lib/Target/Blackfin/BlackfinInstrInfo.td b/lib/Target/Blackfin/BlackfinInstrInfo.td
index c952af1..88ff85f 100644
--- a/lib/Target/Blackfin/BlackfinInstrInfo.td
+++ b/lib/Target/Blackfin/BlackfinInstrInfo.td
@@ -174,6 +174,7 @@ def CALLp: F1<(outs), (ins P:$func, variable_ops),
let isReturn = 1,
isTerminator = 1,
+ isBarrier = 1,
Uses = [RETS] in
def RTS: F1<(outs), (ins), "rts;", [(BfinRet)]>;
diff --git a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
index c8c5925..ea9480d 100644
--- a/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinIntrinsicInfo.cpp
@@ -62,7 +62,6 @@ BlackfinIntrinsicInfo::lookupName(const char *Name, unsigned Len) const {
bool BlackfinIntrinsicInfo::isOverloaded(unsigned IntrID) const {
// Overload Table
const bool OTable[] = {
- false, // illegal intrinsic
#define GET_INTRINSIC_OVERLOAD_TABLE
#include "BlackfinGenIntrinsics.inc"
#undef GET_INTRINSIC_OVERLOAD_TABLE
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
index 8c0a58a..224165b 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.cpp
@@ -368,7 +368,8 @@ processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (requiresRegisterScavenging(MF)) {
// Reserve a slot close to SP or frame pointer.
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment()));
+ RC->getAlignment(),
+ false));
}
}
@@ -449,7 +450,8 @@ unsigned BlackfinRegisterInfo::getRARegister() const {
return BF::RETS;
}
-unsigned BlackfinRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned
+BlackfinRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return hasFP(MF) ? BF::FP : BF::SP;
}
diff --git a/lib/Target/Blackfin/BlackfinRegisterInfo.h b/lib/Target/Blackfin/BlackfinRegisterInfo.h
index 501f504..68ef08a 100644
--- a/lib/Target/Blackfin/BlackfinRegisterInfo.h
+++ b/lib/Target/Blackfin/BlackfinRegisterInfo.h
@@ -76,7 +76,7 @@ namespace llvm {
void emitPrologue(MachineFunction &MF) const;
void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
unsigned getRARegister() const;
// Exception handling queries.
diff --git a/lib/Target/Blackfin/BlackfinTargetMachine.cpp b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
index 47ba2fe..45d7c35 100644
--- a/lib/Target/Blackfin/BlackfinTargetMachine.cpp
+++ b/lib/Target/Blackfin/BlackfinTargetMachine.cpp
@@ -28,7 +28,7 @@ BlackfinTargetMachine::BlackfinTargetMachine(const Target &T,
const std::string &TT,
const std::string &FS)
: LLVMTargetMachine(T, TT),
- DataLayout("e-p:32:32-i64:32-f64:32"),
+ DataLayout("e-p:32:32-i64:32-f64:32-n32"),
Subtarget(TT, FS),
TLInfo(*this),
InstrInfo(Subtarget),
diff --git a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
index 007fe8f..dc9f81c4 100644
--- a/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
+++ b/lib/Target/CellSPU/AsmPrinter/SPUAsmPrinter.cpp
@@ -406,7 +406,7 @@ void SPUAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
++EmittedInsts;
processDebugLoc(MI, true);
printInstruction(MI);
- if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*MI);
processDebugLoc(MI, false);
O << '\n';
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 1f9e5fc..c69a751 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -417,8 +417,6 @@ namespace {
void
SPUDAGToDAGISel::InstructionSelect()
{
- DEBUG(BB->dump());
-
// Select target instructions for the DAG.
SelectRoot(*CurDAG);
CurDAG->RemoveDeadNodes();
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index aaf0783..4dd82a6 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -1090,7 +1090,7 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
// We need to load the argument to a virtual register if we determined
// above that we ran out of physical registers of the appropriate type
// or we're forced to do vararg
- int FI = MFI->CreateFixedObject(ObjSize, ArgOffset);
+ int FI = MFI->CreateFixedObject(ObjSize, ArgOffset, true, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
ArgOffset += StackSlotSize;
@@ -1110,7 +1110,8 @@ SPUTargetLowering::LowerFormalArguments(SDValue Chain,
// Create the frame slot
for (; ArgRegIdx != NumArgRegs; ++ArgRegIdx) {
- VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset);
+ VarArgsFrameIndex = MFI->CreateFixedObject(StackSlotSize, ArgOffset,
+ true, false);
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
SDValue ArgVal = DAG.getRegister(ArgRegs[ArgRegIdx], MVT::v16i8);
SDValue Store = DAG.getStore(Chain, dl, ArgVal, FIN, NULL, 0);
diff --git a/lib/Target/CellSPU/SPUInstrInfo.td b/lib/Target/CellSPU/SPUInstrInfo.td
index 09849da..d3b575a 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.td
+++ b/lib/Target/CellSPU/SPUInstrInfo.td
@@ -3601,21 +3601,23 @@ def : Pat<(SPUcall (SPUaform texternalsym:$func, 0)),
(BRASL texternalsym:$func)>;
// Unconditional branches:
-let isBranch = 1, isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
- def BR :
- UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
- "br\t$dest",
- [(br bb:$dest)]>;
-
- // Unconditional, absolute address branch
- def BRA:
- UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
- "bra\t$dest",
- [/* no pattern */]>;
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ let isBarrier = 1 in {
+ def BR :
+ UncondBranch<0b001001100, (outs), (ins brtarget:$dest),
+ "br\t$dest",
+ [(br bb:$dest)]>;
+
+ // Unconditional, absolute address branch
+ def BRA:
+ UncondBranch<0b001100000, (outs), (ins brtarget:$dest),
+ "bra\t$dest",
+ [/* no pattern */]>;
- // Indirect branch
- def BI:
- BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+ // Indirect branch
+ def BI:
+ BIForm<0b00010101100, "bi\t$func", [(brind R32C:$func)]>;
+ }
// Conditional branches:
class BRNZInst<dag IOL, list<dag> pattern>:
diff --git a/lib/Target/CellSPU/SPURegisterInfo.cpp b/lib/Target/CellSPU/SPURegisterInfo.cpp
index 8412006..af94e67 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.cpp
+++ b/lib/Target/CellSPU/SPURegisterInfo.cpp
@@ -596,7 +596,7 @@ SPURegisterInfo::getRARegister() const
}
unsigned
-SPURegisterInfo::getFrameRegister(MachineFunction &MF) const
+SPURegisterInfo::getFrameRegister(const MachineFunction &MF) const
{
return SPU::R1;
}
diff --git a/lib/Target/CellSPU/SPURegisterInfo.h b/lib/Target/CellSPU/SPURegisterInfo.h
index 1d9d07e..9691cb6 100644
--- a/lib/Target/CellSPU/SPURegisterInfo.h
+++ b/lib/Target/CellSPU/SPURegisterInfo.h
@@ -78,7 +78,7 @@ namespace llvm {
//! Get return address register (LR, aka R0)
unsigned getRARegister() const;
//! Get the stack frame register (SP, aka R1)
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
//! Perform target-specific stack frame setup.
void getInitialFrameState(std::vector<MachineMove> &Moves) const;
diff --git a/lib/Target/CellSPU/SPUSubtarget.h b/lib/Target/CellSPU/SPUSubtarget.h
index 94ac73c..88201c6 100644
--- a/lib/Target/CellSPU/SPUSubtarget.h
+++ b/lib/Target/CellSPU/SPUSubtarget.h
@@ -82,7 +82,7 @@ namespace llvm {
const char *getTargetDataString() const {
return "E-p:32:32:128-f64:64:128-f32:32:128-i64:32:128-i32:32:128"
"-i16:16:128-i8:8:128-i1:8:128-a:0:128-v64:128:128-v128:128:128"
- "-s:128:128";
+ "-s:128:128-n32:64";
}
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
index 11ac931..145359f 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430AsmPrinter.cpp
@@ -22,6 +22,7 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Module.h"
+#include "llvm/Assembly/Writer.h"
#include "llvm/CodeGen/AsmPrinter.h"
#include "llvm/CodeGen/DwarfWriter.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
@@ -70,9 +71,6 @@ namespace {
void printSrcMemOperand(const MachineInstr *MI, int OpNum,
const char* Modifier = 0);
void printCCOperand(const MachineInstr *MI, int OpNum);
- void printInstruction(const MachineInstr *MI); // autogenerated.
- static const char *getRegisterName(unsigned RegNo);
-
void printMachineInstruction(const MachineInstr * MI);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant,
@@ -82,13 +80,10 @@ namespace {
const char *ExtraCode);
void printInstructionThroughMCStreamer(const MachineInstr *MI);
+ void PrintGlobalVariable(const GlobalVariable* GVar);
void emitFunctionHeader(const MachineFunction &MF);
bool runOnMachineFunction(MachineFunction &F);
- virtual void PrintGlobalVariable(const GlobalVariable *GV) {
- // FIXME: No support for global variables?
- }
-
void getAnalysisUsage(AnalysisUsage &AU) const {
AsmPrinter::getAnalysisUsage(AU);
AU.setPreservesAll();
@@ -96,8 +91,89 @@ namespace {
};
} // end of anonymous namespace
-#include "MSP430GenAsmWriter.inc"
+void MSP430AsmPrinter::PrintGlobalVariable(const GlobalVariable* GVar) {
+ if (!GVar->hasInitializer())
+ return; // External global require no code
+
+ // Check to see if this is a special global used by LLVM, if so, emit it.
+ if (EmitSpecialLLVMGlobal(GVar))
+ return;
+
+ const TargetData *TD = TM.getTargetData();
+
+ std::string name = Mang->getMangledName(GVar);
+ Constant *C = GVar->getInitializer();
+ unsigned Size = TD->getTypeAllocSize(C->getType());
+ unsigned Align = TD->getPreferredAlignmentLog(GVar);
+
+ printVisibility(name, GVar->getVisibility());
+
+ O << "\t.type\t" << name << ",@object\n";
+
+ OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(GVar, Mang,
+ TM));
+
+ if (C->isNullValue() && !GVar->hasSection() &&
+ !GVar->isThreadLocal() &&
+ (GVar->hasLocalLinkage() || GVar->isWeakForLinker())) {
+
+ if (Size == 0) Size = 1; // .comm Foo, 0 is undefined, avoid it.
+
+ if (GVar->hasLocalLinkage())
+ O << "\t.local\t" << name << '\n';
+
+ O << MAI->getCOMMDirective() << name << ',' << Size;
+ if (MAI->getCOMMDirectiveTakesAlignment())
+ O << ',' << (MAI->getAlignmentIsInBytes() ? (1 << Align) : Align);
+
+ if (VerboseAsm) {
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << ' ';
+ WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+ }
+ O << '\n';
+ return;
+ }
+
+ switch (GVar->getLinkage()) {
+ case GlobalValue::CommonLinkage:
+ case GlobalValue::LinkOnceAnyLinkage:
+ case GlobalValue::LinkOnceODRLinkage:
+ case GlobalValue::WeakAnyLinkage:
+ case GlobalValue::WeakODRLinkage:
+ O << "\t.weak\t" << name << '\n';
+ break;
+ case GlobalValue::DLLExportLinkage:
+ case GlobalValue::AppendingLinkage:
+ // FIXME: appending linkage variables should go into a section of
+ // their name or something. For now, just emit them as external.
+ case GlobalValue::ExternalLinkage:
+ // If external or appending, declare as a global symbol
+ O << "\t.globl " << name << '\n';
+ // FALL THROUGH
+ case GlobalValue::PrivateLinkage:
+ case GlobalValue::LinkerPrivateLinkage:
+ case GlobalValue::InternalLinkage:
+ break;
+ default:
+ assert(0 && "Unknown linkage type!");
+ }
+
+ // Use 16-bit alignment by default to simplify bunch of stuff
+ EmitAlignment(Align, GVar);
+ O << name << ":";
+ if (VerboseAsm) {
+ O.PadToColumn(MAI->getCommentColumn());
+ O << MAI->getCommentString() << ' ';
+ WriteAsOperand(O, GVar, /*PrintType=*/false, GVar->getParent());
+ }
+ O << '\n';
+ EmitGlobalConstant(C);
+
+ if (MAI->hasDotTypeDotSizeDirective())
+ O << "\t.size\t" << name << ", " << Size << '\n';
+}
void MSP430AsmPrinter::emitFunctionHeader(const MachineFunction &MF) {
const Function *F = MF.getFunction();
@@ -161,14 +237,9 @@ void MSP430AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
processDebugLoc(MI, true);
- // Call the autogenerated instruction printer routines.
- if (EnableMCInst) {
- printInstructionThroughMCStreamer(MI);
- } else {
- printInstruction(MI);
- }
+ printInstructionThroughMCStreamer(MI);
- if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*MI);
O << '\n';
@@ -180,7 +251,7 @@ void MSP430AsmPrinter::printOperand(const MachineInstr *MI, int OpNum,
const MachineOperand &MO = MI->getOperand(OpNum);
switch (MO.getType()) {
case MachineOperand::MO_Register:
- O << getRegisterName(MO.getReg());
+ O << MSP430InstPrinter::getRegisterName(MO.getReg());
return;
case MachineOperand::MO_Immediate:
if (!Modifier || strcmp(Modifier, "nohash"))
@@ -224,22 +295,23 @@ void MSP430AsmPrinter::printSrcMemOperand(const MachineInstr *MI, int OpNum,
const MachineOperand &Base = MI->getOperand(OpNum);
const MachineOperand &Disp = MI->getOperand(OpNum+1);
- if (Base.isGlobal())
- printOperand(MI, OpNum, "mem");
- else if (Disp.isImm() && !Base.getReg())
+ // Print displacement first
+ if (!Disp.isImm()) {
+ printOperand(MI, OpNum+1, "mem");
+ } else {
+ if (!Base.getReg())
+ O << '&';
+
+ printOperand(MI, OpNum+1, "nohash");
+ }
+
+
+ // Print register base field
+ if (Base.getReg()) {
+ O << '(';
printOperand(MI, OpNum);
- else if (Base.getReg()) {
- if (Disp.getImm()) {
- printOperand(MI, OpNum + 1, "nohash");
- O << '(';
- printOperand(MI, OpNum);
- O << ')';
- } else {
- O << '@';
- printOperand(MI, OpNum);
- }
- } else
- llvm_unreachable("Unsupported memory operand");
+ O << ')';
+ }
}
void MSP430AsmPrinter::printCCOperand(const MachineInstr *MI, int OpNum) {
@@ -294,8 +366,7 @@ bool MSP430AsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
}
//===----------------------------------------------------------------------===//
-void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI)
-{
+void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI){
MSP430MCInstLower MCInstLowering(OutContext, *Mang, *this);
@@ -309,7 +380,6 @@ void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI)
printKill(MI);
return;
case TargetInstrInfo::INLINEASM:
- O << '\t';
printInlineAsm(MI);
return;
case TargetInstrInfo::IMPLICIT_DEF:
@@ -324,7 +394,18 @@ void MSP430AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI)
printMCInst(&TmpInst);
}
+static MCInstPrinter *createMSP430MCInstPrinter(const Target &T,
+ unsigned SyntaxVariant,
+ const MCAsmInfo &MAI,
+ raw_ostream &O) {
+ if (SyntaxVariant == 0)
+ return new MSP430InstPrinter(O, MAI);
+ return 0;
+}
+
// Force static initialization.
extern "C" void LLVMInitializeMSP430AsmPrinter() {
RegisterAsmPrinter<MSP430AsmPrinter> X(TheMSP430Target);
+ TargetRegistry::RegisterMCInstPrinter(TheMSP430Target,
+ createMSP430MCInstPrinter);
}
diff --git a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
index a3ecc67..0a403c4 100644
--- a/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
+++ b/lib/Target/MSP430/AsmPrinter/MSP430InstPrinter.cpp
@@ -25,11 +25,9 @@ using namespace llvm;
// Include the auto-generated portion of the assembly writer.
#define MachineInstr MCInst
-#define MSP430AsmPrinter MSP430InstPrinter // FIXME: REMOVE.
#define NO_ASM_WRITER_BOILERPLATE
#include "MSP430GenAsmWriter.inc"
#undef MachineInstr
-#undef MSP430AsmPrinter
void MSP430InstPrinter::printInst(const MCInst *MI) {
printInstruction(MI);
@@ -65,25 +63,22 @@ void MSP430InstPrinter::printSrcMemOperand(const MCInst *MI, unsigned OpNo,
const MCOperand &Base = MI->getOperand(OpNo);
const MCOperand &Disp = MI->getOperand(OpNo+1);
- // FIXME: move global to displacement field!
- if (Base.isExpr()) {
+ // Print displacement first
+ if (Disp.isExpr()) {
O << '&';
- Base.getExpr()->print(O, &MAI);
- } else if (Disp.isImm() && !Base.isReg())
- printOperand(MI, OpNo);
- else if (Base.isReg()) {
- if (Disp.getImm()) {
- O << Disp.getImm() << '(';
- printOperand(MI, OpNo);
- O << ')';
- } else {
- O << '@';
- printOperand(MI, OpNo);
- }
+ Disp.getExpr()->print(O, &MAI);
} else {
- Base.dump();
- Disp.dump();
- llvm_unreachable("Unsupported memory operand");
+ assert(Disp.isImm() && "Expected immediate in displacement field");
+ if (!Base.getReg())
+ O << '&';
+
+ O << Disp.getImm();
+ }
+
+
+ // Print register base field
+ if (Base.getReg()) {
+ O << '(' << getRegisterName(Base.getReg()) << ')';
}
}
diff --git a/lib/Target/MSP430/MSP430.td b/lib/Target/MSP430/MSP430.td
index 89313ab..870a3df 100644
--- a/lib/Target/MSP430/MSP430.td
+++ b/lib/Target/MSP430/MSP430.td
@@ -50,11 +50,17 @@ include "MSP430InstrInfo.td"
def MSP430InstrInfo : InstrInfo {}
+
+def MSP430InstPrinter : AsmWriter {
+ string AsmWriterClassName = "InstPrinter";
+}
+
//===----------------------------------------------------------------------===//
// Target Declaration
//===----------------------------------------------------------------------===//
def MSP430 : Target {
let InstructionSet = MSP430InstrInfo;
+ let AssemblyWriters = [MSP430InstPrinter];
}
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index b7d9282..c0084be 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -45,6 +45,70 @@ static const bool ViewRMWDAGs = false;
STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
+
+namespace {
+ struct MSP430ISelAddressMode {
+ enum {
+ RegBase,
+ FrameIndexBase
+ } BaseType;
+
+ struct { // This is really a union, discriminated by BaseType!
+ SDValue Reg;
+ int FrameIndex;
+ } Base;
+
+ int16_t Disp;
+ GlobalValue *GV;
+ Constant *CP;
+ BlockAddress *BlockAddr;
+ const char *ES;
+ int JT;
+ unsigned Align; // CP alignment.
+
+ MSP430ISelAddressMode()
+ : BaseType(RegBase), Disp(0), GV(0), CP(0), BlockAddr(0),
+ ES(0), JT(-1), Align(0) {
+ }
+
+ bool hasSymbolicDisplacement() const {
+ return GV != 0 || CP != 0 || ES != 0 || JT != -1;
+ }
+
+ bool hasBaseReg() const {
+ return Base.Reg.getNode() != 0;
+ }
+
+ void setBaseReg(SDValue Reg) {
+ BaseType = RegBase;
+ Base.Reg = Reg;
+ }
+
+ void dump() {
+ errs() << "MSP430ISelAddressMode " << this << '\n';
+ if (Base.Reg.getNode() != 0) {
+ errs() << "Base.Reg ";
+ Base.Reg.getNode()->dump();
+ } else {
+ errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
+ }
+ errs() << " Disp " << Disp << '\n';
+ if (GV) {
+ errs() << "GV ";
+ GV->dump();
+ } else if (CP) {
+ errs() << " CP ";
+ CP->dump();
+ errs() << " Align" << Align << '\n';
+ } else if (ES) {
+ errs() << "ES ";
+ errs() << ES << '\n';
+ } else if (JT != -1)
+ errs() << " JT" << JT << " Align" << Align << '\n';
+ }
+ };
+}
+
/// MSP430DAGToDAGISel - MSP430 specific code to select MSP430 machine
/// instructions for SelectionDAG operations.
///
@@ -65,6 +129,10 @@ namespace {
return "MSP430 DAG->DAG Pattern Instruction Selection";
}
+ bool MatchAddress(SDValue N, MSP430ISelAddressMode &AM);
+ bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
+ bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
+
bool IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
SDNode *Root) const;
@@ -79,6 +147,10 @@ namespace {
DenseMap<SDNode*, SDNode*> RMWStores;
void PreprocessForRMW();
SDNode *Select(SDValue Op);
+ SDNode *SelectIndexedLoad(SDValue Op);
+ SDNode *SelectIndexedBinOp(SDValue Op, SDValue N1, SDValue N2,
+ unsigned Opc8, unsigned Opc16);
+
bool SelectAddr(SDValue Op, SDValue Addr, SDValue &Base, SDValue &Disp);
#ifndef NDEBUG
@@ -95,50 +167,155 @@ FunctionPass *llvm::createMSP430ISelDag(MSP430TargetMachine &TM,
return new MSP430DAGToDAGISel(TM, OptLevel);
}
-// FIXME: This is pretty dummy routine and needs to be rewritten in the future.
-bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue Addr,
- SDValue &Base, SDValue &Disp) {
- // Try to match frame address first.
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16);
- Disp = CurDAG->getTargetConstant(0, MVT::i16);
+
+/// MatchWrapper - Try to match MSP430ISD::Wrapper node into an addressing mode.
+/// These wrap things that will resolve down into a symbol reference. If no
+/// match is possible, this returns true, otherwise it returns false.
+bool MSP430DAGToDAGISel::MatchWrapper(SDValue N, MSP430ISelAddressMode &AM) {
+ // If the addressing mode already has a symbol as the displacement, we can
+ // never match another symbol.
+ if (AM.hasSymbolicDisplacement())
return true;
+
+ SDValue N0 = N.getOperand(0);
+
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
+ AM.GV = G->getGlobal();
+ AM.Disp += G->getOffset();
+ //AM.SymbolFlags = G->getTargetFlags();
+ } else if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(N0)) {
+ AM.CP = CP->getConstVal();
+ AM.Align = CP->getAlignment();
+ AM.Disp += CP->getOffset();
+ //AM.SymbolFlags = CP->getTargetFlags();
+ } else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(N0)) {
+ AM.ES = S->getSymbol();
+ //AM.SymbolFlags = S->getTargetFlags();
+ } else if (JumpTableSDNode *J = dyn_cast<JumpTableSDNode>(N0)) {
+ AM.JT = J->getIndex();
+ //AM.SymbolFlags = J->getTargetFlags();
+ } else {
+ AM.BlockAddr = cast<BlockAddressSDNode>(N0)->getBlockAddress();
+ //AM.SymbolFlags = cast<BlockAddressSDNode>(N0)->getTargetFlags();
}
+ return false;
+}
- switch (Addr.getOpcode()) {
- case ISD::ADD:
- // Operand is a result from ADD with constant operand which fits into i16.
- if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
- uint64_t CVal = CN->getZExtValue();
- // Offset should fit into 16 bits.
- if (((CVal << 48) >> 48) == CVal) {
- SDValue N0 = Addr.getOperand(0);
- if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(N0))
- Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i16);
- else
- Base = N0;
-
- Disp = CurDAG->getTargetConstant(CVal, MVT::i16);
- return true;
- }
+/// MatchAddressBase - Helper for MatchAddress. Add the specified node to the
+/// specified addressing mode without any further recursion.
+bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM) {
+ // Is the base register already occupied?
+ if (AM.BaseType != MSP430ISelAddressMode::RegBase || AM.Base.Reg.getNode()) {
+ // If so, we cannot select it.
+ return true;
+ }
+
+ // Default, generate it as a register.
+ AM.BaseType = MSP430ISelAddressMode::RegBase;
+ AM.Base.Reg = N;
+ return false;
+}
+
+bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
+ DebugLoc dl = N.getDebugLoc();
+ DEBUG({
+ errs() << "MatchAddress: ";
+ AM.dump();
+ });
+
+ switch (N.getOpcode()) {
+ default: break;
+ case ISD::Constant: {
+ uint64_t Val = cast<ConstantSDNode>(N)->getSExtValue();
+ AM.Disp += Val;
+ return false;
+ }
+
+ case MSP430ISD::Wrapper:
+ if (!MatchWrapper(N, AM))
+ return false;
+ break;
+
+ case ISD::FrameIndex:
+ if (AM.BaseType == MSP430ISelAddressMode::RegBase
+ && AM.Base.Reg.getNode() == 0) {
+ AM.BaseType = MSP430ISelAddressMode::FrameIndexBase;
+ AM.Base.FrameIndex = cast<FrameIndexSDNode>(N)->getIndex();
+ return false;
}
break;
- case MSP430ISD::Wrapper:
- SDValue N0 = Addr.getOperand(0);
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(N0)) {
- Base = CurDAG->getTargetGlobalAddress(G->getGlobal(),
- MVT::i16, G->getOffset());
- Disp = CurDAG->getTargetConstant(0, MVT::i16);
- return true;
- } else if (ExternalSymbolSDNode *E = dyn_cast<ExternalSymbolSDNode>(N0)) {
- Base = CurDAG->getTargetExternalSymbol(E->getSymbol(), MVT::i16);
- Disp = CurDAG->getTargetConstant(0, MVT::i16);
+
+ case ISD::ADD: {
+ MSP430ISelAddressMode Backup = AM;
+ if (!MatchAddress(N.getNode()->getOperand(0), AM) &&
+ !MatchAddress(N.getNode()->getOperand(1), AM))
+ return false;
+ AM = Backup;
+ if (!MatchAddress(N.getNode()->getOperand(1), AM) &&
+ !MatchAddress(N.getNode()->getOperand(0), AM))
+ return false;
+ AM = Backup;
+
+ break;
+ }
+
+ case ISD::OR:
+ // Handle "X | C" as "X + C" iff X is known to have C bits clear.
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N.getOperand(1))) {
+ MSP430ISelAddressMode Backup = AM;
+ uint64_t Offset = CN->getSExtValue();
+ // Start with the LHS as an addr mode.
+ if (!MatchAddress(N.getOperand(0), AM) &&
+ // Address could not have picked a GV address for the displacement.
+ AM.GV == NULL &&
+ // Check to see if the LHS & C is zero.
+ CurDAG->MaskedValueIsZero(N.getOperand(0), CN->getAPIntValue())) {
+ AM.Disp += Offset;
+ return false;
+ }
+ AM = Backup;
}
break;
- };
+ }
+
+ return MatchAddressBase(N, AM);
+}
+
+/// SelectAddr - returns true if it is able pattern match an addressing mode.
+/// It returns the operands which make up the maximal addressing mode it can
+/// match by reference.
+bool MSP430DAGToDAGISel::SelectAddr(SDValue Op, SDValue N,
+ SDValue &Base, SDValue &Disp) {
+ MSP430ISelAddressMode AM;
+
+ if (MatchAddress(N, AM))
+ return false;
+
+ EVT VT = N.getValueType();
+ if (AM.BaseType == MSP430ISelAddressMode::RegBase) {
+ if (!AM.Base.Reg.getNode())
+ AM.Base.Reg = CurDAG->getRegister(0, VT);
+ }
- Base = Addr;
- Disp = CurDAG->getTargetConstant(0, MVT::i16);
+ Base = (AM.BaseType == MSP430ISelAddressMode::FrameIndexBase) ?
+ CurDAG->getTargetFrameIndex(AM.Base.FrameIndex, TLI.getPointerTy()) :
+ AM.Base.Reg;
+
+ if (AM.GV)
+ Disp = CurDAG->getTargetGlobalAddress(AM.GV, MVT::i16, AM.Disp,
+ 0/*AM.SymbolFlags*/);
+ else if (AM.CP)
+ Disp = CurDAG->getTargetConstantPool(AM.CP, MVT::i16,
+ AM.Align, AM.Disp, 0/*AM.SymbolFlags*/);
+ else if (AM.ES)
+ Disp = CurDAG->getTargetExternalSymbol(AM.ES, MVT::i16, 0/*AM.SymbolFlags*/);
+ else if (AM.JT != -1)
+ Disp = CurDAG->getTargetJumpTable(AM.JT, MVT::i16, 0/*AM.SymbolFlags*/);
+ else if (AM.BlockAddr)
+ Disp = CurDAG->getBlockAddress(AM.BlockAddr, DebugLoc()/*MVT::i32*/,
+ true /*AM.SymbolFlags*/);
+ else
+ Disp = CurDAG->getTargetConstant(AM.Disp, MVT::i16);
return true;
}
@@ -187,7 +364,7 @@ bool MSP430DAGToDAGISel::IsLegalAndProfitableToFold(SDNode *N, SDNode *U,
/// TokenFactor by PreprocessForRMW. Query the map Store => Load1 (created
/// during preprocessing) to determine whether it's legal to introduce such
/// "cycle" for a moment.
- DenseMap<SDNode*, SDNode*>::iterator I = RMWStores.find(Root);
+ DenseMap<SDNode*, SDNode*>::const_iterator I = RMWStores.find(Root);
if (I != RMWStores.end() && I->second == N)
return true;
@@ -423,6 +600,89 @@ void MSP430DAGToDAGISel::PreprocessForRMW() {
}
}
+
+static bool isValidIndexedLoad(const LoadSDNode *LD) {
+ ISD::MemIndexedMode AM = LD->getAddressingMode();
+ if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ EVT VT = LD->getMemoryVT();
+
+ switch (VT.getSimpleVT().SimpleTy) {
+ case MVT::i8:
+ // Sanity check
+ if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 1)
+ return false;
+
+ break;
+ case MVT::i16:
+ // Sanity check
+ if (cast<ConstantSDNode>(LD->getOffset())->getZExtValue() != 2)
+ return false;
+
+ break;
+ default:
+ return false;
+ }
+
+ return true;
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedLoad(SDValue Op) {
+ LoadSDNode *LD = cast<LoadSDNode>(Op);
+ if (!isValidIndexedLoad(LD))
+ return NULL;
+
+ MVT VT = LD->getMemoryVT().getSimpleVT();
+
+ unsigned Opcode = 0;
+ switch (VT.SimpleTy) {
+ case MVT::i8:
+ Opcode = MSP430::MOV8rm_POST;
+ break;
+ case MVT::i16:
+ Opcode = MSP430::MOV16rm_POST;
+ break;
+ default:
+ return NULL;
+ }
+
+ return CurDAG->getMachineNode(Opcode, Op.getDebugLoc(),
+ VT, MVT::i16, MVT::Other,
+ LD->getBasePtr(), LD->getChain());
+}
+
+SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDValue Op,
+ SDValue N1, SDValue N2,
+ unsigned Opc8, unsigned Opc16) {
+ if (N1.getOpcode() == ISD::LOAD &&
+ N1.hasOneUse() &&
+ IsLegalAndProfitableToFold(N1.getNode(), Op.getNode(), Op.getNode())) {
+ LoadSDNode *LD = cast<LoadSDNode>(N1);
+ if (!isValidIndexedLoad(LD))
+ return NULL;
+
+ MVT VT = LD->getMemoryVT().getSimpleVT();
+ unsigned Opc = (VT == MVT::i16 ? Opc16 : Opc8);
+ MachineSDNode::mmo_iterator MemRefs0 = MF->allocateMemRefsArray(1);
+ MemRefs0[0] = cast<MemSDNode>(N1)->getMemOperand();
+ SDValue Ops0[] = { N2, LD->getBasePtr(), LD->getChain() };
+ SDNode *ResNode =
+ CurDAG->SelectNodeTo(Op.getNode(), Opc,
+ VT, MVT::i16, MVT::Other,
+ Ops0, 3);
+ cast<MachineSDNode>(ResNode)->setMemRefs(MemRefs0, MemRefs0 + 1);
+ // Transfer chain.
+ ReplaceUses(SDValue(N1.getNode(), 2), SDValue(ResNode, 2));
+ // Transfer writeback.
+ ReplaceUses(SDValue(N1.getNode(), 1), SDValue(ResNode, 1));
+ return ResNode;
+ }
+
+ return NULL;
+}
+
+
/// InstructionSelect - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
void MSP430DAGToDAGISel::InstructionSelect() {
@@ -438,8 +698,6 @@ void MSP430DAGToDAGISel::InstructionSelect() {
DEBUG(errs() << "Selection DAG after RMW preprocessing:\n");
DEBUG(CurDAG->dump());
- DEBUG(BB->dump());
-
// Codegen the basic block.
DEBUG(errs() << "===== Instruction selection begins:\n");
DEBUG(Indent = 0);
@@ -482,6 +740,72 @@ SDNode *MSP430DAGToDAGISel::Select(SDValue Op) {
return CurDAG->getMachineNode(MSP430::ADD16ri, dl, MVT::i16,
TFI, CurDAG->getTargetConstant(0, MVT::i16));
}
+ case ISD::LOAD:
+ if (SDNode *ResNode = SelectIndexedLoad(Op))
+ return ResNode;
+ // Other cases are autogenerated.
+ break;
+ case ISD::ADD:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Op,
+ Op.getOperand(0), Op.getOperand(1),
+ MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+ MSP430::ADD8rm_POST, MSP430::ADD16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::SUB:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Op,
+ Op.getOperand(0), Op.getOperand(1),
+ MSP430::SUB8rm_POST, MSP430::SUB16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::AND:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Op,
+ Op.getOperand(0), Op.getOperand(1),
+ MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+ MSP430::AND8rm_POST, MSP430::AND16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::OR:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Op,
+ Op.getOperand(0), Op.getOperand(1),
+ MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+ MSP430::OR8rm_POST, MSP430::OR16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
+ case ISD::XOR:
+ if (SDNode *ResNode =
+ SelectIndexedBinOp(Op,
+ Op.getOperand(0), Op.getOperand(1),
+ MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ return ResNode;
+ else if (SDNode *ResNode =
+ SelectIndexedBinOp(Op, Op.getOperand(1), Op.getOperand(0),
+ MSP430::XOR8rm_POST, MSP430::XOR16rm_POST))
+ return ResNode;
+
+ // Other cases are autogenerated.
+ break;
}
// Select the default instruction
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 34e6d2c..5a925f5 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -62,10 +62,14 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setBooleanContents(ZeroOrOneBooleanContent);
setSchedulingPreference(SchedulingForLatency);
- setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
- setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
+ // We have post-incremented loads / stores.
+ setIndexedLoadAction(ISD::POST_INC, MVT::i8, Legal);
+ setIndexedLoadAction(ISD::POST_INC, MVT::i16, Legal);
+
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i8, Expand);
setLoadExtAction(ISD::SEXTLOAD, MVT::i16, Expand);
// We don't have any truncstores
@@ -115,12 +119,23 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
// FIXME: Implement efficiently multiplication by a constant
+ setOperationAction(ISD::MUL, MVT::i8, Expand);
+ setOperationAction(ISD::MULHS, MVT::i8, Expand);
+ setOperationAction(ISD::MULHU, MVT::i8, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i8, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i8, Expand);
setOperationAction(ISD::MUL, MVT::i16, Expand);
setOperationAction(ISD::MULHS, MVT::i16, Expand);
setOperationAction(ISD::MULHU, MVT::i16, Expand);
setOperationAction(ISD::SMUL_LOHI, MVT::i16, Expand);
setOperationAction(ISD::UMUL_LOHI, MVT::i16, Expand);
+ setOperationAction(ISD::UDIV, MVT::i8, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::UREM, MVT::i8, Expand);
+ setOperationAction(ISD::SDIV, MVT::i8, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i8, Expand);
+ setOperationAction(ISD::SREM, MVT::i8, Expand);
setOperationAction(ISD::UDIV, MVT::i16, Expand);
setOperationAction(ISD::UDIVREM, MVT::i16, Expand);
setOperationAction(ISD::UREM, MVT::i16, Expand);
@@ -303,7 +318,7 @@ MSP430TargetLowering::LowerCCCArguments(SDValue Chain,
<< "\n";
}
// Create the frame index object for this incoming parameter...
- int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset());
+ int FI = MFI->CreateFixedObject(ObjSize, VA.getLocMemOffset(), true, false);
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
@@ -659,6 +674,42 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
DAG.getValueType(Val.getValueType()));
}
+/// getPostIndexedAddressParts - returns true by value, base pointer and
+/// offset pointer and addressing mode by reference if this node can be
+/// combined with a load / store to form a post-indexed load / store.
+bool MSP430TargetLowering::getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const {
+
+ LoadSDNode *LD = cast<LoadSDNode>(N);
+ if (LD->getExtensionType() != ISD::NON_EXTLOAD)
+ return false;
+
+ EVT VT = LD->getMemoryVT();
+ if (VT != MVT::i8 && VT != MVT::i16)
+ return false;
+
+ if (Op->getOpcode() != ISD::ADD)
+ return false;
+
+ if (ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(Op->getOperand(1))) {
+ uint64_t RHSC = RHS->getZExtValue();
+ if ((VT == MVT::i16 && RHSC != 2) ||
+ (VT == MVT::i8 && RHSC != 1))
+ return false;
+
+ Base = Op->getOperand(0);
+ Offset = DAG.getConstant(RHSC, VT);
+ AM = ISD::POST_INC;
+ return true;
+ }
+
+ return false;
+}
+
+
const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return NULL;
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index fdbc384..d413ccb 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -136,6 +136,12 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
DebugLoc dl, SelectionDAG &DAG);
+ virtual bool getPostIndexedAddressParts(SDNode *N, SDNode *Op,
+ SDValue &Base,
+ SDValue &Offset,
+ ISD::MemIndexedMode &AM,
+ SelectionDAG &DAG) const;
+
const MSP430Subtarget &Subtarget;
const MSP430TargetMachine &TM;
};
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index a6d9638..b2f09c7 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -35,15 +35,23 @@ void MSP430InstrInfo::storeRegToStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC) const {
DebugLoc DL = DebugLoc::getUnknownLoc();
if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
+ MachineMemOperand::MOStore, 0,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
if (RC == &MSP430::GR16RegClass)
BuildMI(MBB, MI, DL, get(MSP430::MOV16mr))
.addFrameIndex(FrameIdx).addImm(0)
- .addReg(SrcReg, getKillRegState(isKill));
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else if (RC == &MSP430::GR8RegClass)
BuildMI(MBB, MI, DL, get(MSP430::MOV8mr))
.addFrameIndex(FrameIdx).addImm(0)
- .addReg(SrcReg, getKillRegState(isKill));
+ .addReg(SrcReg, getKillRegState(isKill)).addMemOperand(MMO);
else
llvm_unreachable("Cannot store this register to stack slot!");
}
@@ -54,13 +62,21 @@ void MSP430InstrInfo::loadRegFromStackSlot(MachineBasicBlock &MBB,
const TargetRegisterClass *RC) const{
DebugLoc DL = DebugLoc::getUnknownLoc();
if (MI != MBB.end()) DL = MI->getDebugLoc();
+ MachineFunction &MF = *MBB.getParent();
+ MachineFrameInfo &MFI = *MF.getFrameInfo();
+
+ MachineMemOperand *MMO =
+ MF.getMachineMemOperand(PseudoSourceValue::getFixedStack(FrameIdx),
+ MachineMemOperand::MOLoad, 0,
+ MFI.getObjectSize(FrameIdx),
+ MFI.getObjectAlignment(FrameIdx));
if (RC == &MSP430::GR16RegClass)
BuildMI(MBB, MI, DL, get(MSP430::MOV16rm))
- .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0);
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
else if (RC == &MSP430::GR8RegClass)
BuildMI(MBB, MI, DL, get(MSP430::MOV8rm))
- .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0);
+ .addReg(DestReg).addFrameIndex(FrameIdx).addImm(0).addMemOperand(MMO);
else
llvm_unreachable("Cannot store this register to stack slot!");
}
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 2b50669..c3bbfe8 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -127,7 +127,7 @@ def NOP : Pseudo<(outs), (ins), "nop", []>;
//
// FIXME: Provide proper encoding!
-let isReturn = 1, isTerminator = 1 in {
+let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>;
}
@@ -142,7 +142,7 @@ let isBarrier = 1 in
// Conditional branches
let Uses = [SRW] in
def JCC : Pseudo<(outs), (ins brtarget:$dst, cc:$cc),
- "j$cc $dst",
+ "j$cc\t$dst",
[(MSP430brcc bb:$dst, imm:$cc)]>;
} // isBranch, isTerminator
@@ -215,6 +215,13 @@ def MOVZX16rm8 : Pseudo<(outs GR16:$dst), (ins memsrc:$src),
"mov.b\t{$src, $dst}",
[(set GR16:$dst, (zextloadi16i8 addr:$src))]>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1, Constraints = "$base = $base_wb" in {
+def MOV8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR16:$base),
+ "mov.b\t{@$base+, $dst}", []>;
+def MOV16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$base),
+ "mov.w\t{@$base+, $dst}", []>;
+}
+
// Any instruction that defines a 8-bit result leaves the high half of the
// register. Truncate can be lowered to EXTRACT_SUBREG, and CopyFromReg may
// be copying from a truncate, but any other 8-bit operation will zero-extend
@@ -280,6 +287,15 @@ def ADD16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
[(set GR16:$dst, (add GR16:$src1, (load addr:$src2))),
(implicit SRW)]>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src1 = $dst" in {
+def ADD8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base),
+ "add.b\t{@$base+, $dst}", []>;
+def ADD16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base),
+ "add.w\t{@$base+, $dst}", []>;
+}
+
+
def ADD8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
"add.b\t{$src2, $dst}",
[(set GR8:$dst, (add GR8:$src1, imm:$src2)),
@@ -409,6 +425,14 @@ def AND16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
[(set GR16:$dst, (and GR16:$src1, (load addr:$src2))),
(implicit SRW)]>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src1 = $dst" in {
+def AND8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base),
+ "and.b\t{@$base+, $dst}", []>;
+def AND16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base),
+ "and.w\t{@$base+, $dst}", []>;
+}
+
let isTwoAddress = 0 in {
def AND8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
"and.b\t{$src, $dst}",
@@ -438,6 +462,92 @@ def AND16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
(implicit SRW)]>;
}
+let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
+def OR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>;
+def OR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>;
+}
+
+def OR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>;
+def OR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>;
+
+def OR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "bis.b\t{$src2, $dst}",
+ [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>;
+def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "bis.w\t{$src2, $dst}",
+ [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>;
+
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src1 = $dst" in {
+def OR8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base),
+ "bis.b\t{@$base+, $dst}", []>;
+def OR16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base),
+ "bis.w\t{@$base+, $dst}", []>;
+}
+
+let isTwoAddress = 0 in {
+def OR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
+def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
+
+def OR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
+def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
+
+def OR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "bis.b\t{$src, $dst}",
+ [(store (or (i8 (load addr:$dst)),
+ (i8 (load addr:$src))), addr:$dst)]>;
+def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "bis.w\t{$src, $dst}",
+ [(store (or (i16 (load addr:$dst)),
+ (i16 (load addr:$src))), addr:$dst)]>;
+}
+
+// bic does not modify condition codes
+def BIC8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
+ "bic.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src1, (not GR8:$src2)))]>;
+def BIC16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
+ "bic.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src1, (not GR16:$src2)))]>;
+
+def BIC8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
+ "bic.b\t{$src2, $dst}",
+ [(set GR8:$dst, (and GR8:$src1, (not (i8 (load addr:$src2)))))]>;
+def BIC16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
+ "bic.w\t{$src2, $dst}",
+ [(set GR16:$dst, (and GR16:$src1, (not (i16 (load addr:$src2)))))]>;
+
+let isTwoAddress = 0 in {
+def BIC8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
+ "bic.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (not GR8:$src)), addr:$dst)]>;
+def BIC16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
+ "bic.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (not GR16:$src)), addr:$dst)]>;
+
+def BIC8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "bic.b\t{$src, $dst}",
+ [(store (and (load addr:$dst), (not (i8 (load addr:$src)))), addr:$dst)]>;
+def BIC16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
+ "bic.w\t{$src, $dst}",
+ [(store (and (load addr:$dst), (not (i16 (load addr:$src)))), addr:$dst)]>;
+}
let isCommutable = 1 in { // X = XOR Y, Z == X = XOR Z, Y
def XOR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
@@ -468,6 +578,14 @@ def XOR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
[(set GR16:$dst, (xor GR16:$src1, (load addr:$src2))),
(implicit SRW)]>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src1 = $dst" in {
+def XOR8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base),
+ "xor.b\t{@$base+, $dst}", []>;
+def XOR16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base),
+ "xor.w\t{@$base+, $dst}", []>;
+}
+
let isTwoAddress = 0 in {
def XOR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
"xor.b\t{$src, $dst}",
@@ -525,6 +643,14 @@ def SUB16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
[(set GR16:$dst, (sub GR16:$src1, (load addr:$src2))),
(implicit SRW)]>;
+let mayLoad = 1, hasExtraDefRegAllocReq = 1,
+Constraints = "$base = $base_wb, $src1 = $dst" in {
+def SUB8rm_POST : Pseudo<(outs GR8:$dst, GR16:$base_wb), (ins GR8:$src1, GR16:$base),
+ "sub.b\t{@$base+, $dst}", []>;
+def SUB16rm_POST : Pseudo<(outs GR16:$dst, GR16:$base_wb), (ins GR16:$src1, GR16:$base),
+ "sub.w\t{@$base+, $dst}", []>;
+}
+
let isTwoAddress = 0 in {
def SUB8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
"sub.b\t{$src, $dst}",
@@ -650,58 +776,14 @@ def SEXT16r : Pseudo<(outs GR16:$dst), (ins GR16:$src),
} // Defs = [SRW]
+def ZEXT16r : Pseudo<(outs GR16:$dst), (ins GR16:$src),
+ "mov.b\t{$src, $dst}",
+ [(set GR16:$dst, (zext (trunc GR16:$src)))]>;
+
def SWPB16r : Pseudo<(outs GR16:$dst), (ins GR16:$src),
"swpb\t$dst",
[(set GR16:$dst, (bswap GR16:$src))]>;
-let isCommutable = 1 in { // X = OR Y, Z == X = OR Z, Y
-def OR8rr : Pseudo<(outs GR8:$dst), (ins GR8:$src1, GR8:$src2),
- "bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src1, GR8:$src2))]>;
-def OR16rr : Pseudo<(outs GR16:$dst), (ins GR16:$src1, GR16:$src2),
- "bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src1, GR16:$src2))]>;
-}
-
-def OR8ri : Pseudo<(outs GR8:$dst), (ins GR8:$src1, i8imm:$src2),
- "bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src1, imm:$src2))]>;
-def OR16ri : Pseudo<(outs GR16:$dst), (ins GR16:$src1, i16imm:$src2),
- "bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src1, imm:$src2))]>;
-
-def OR8rm : Pseudo<(outs GR8:$dst), (ins GR8:$src1, memsrc:$src2),
- "bis.b\t{$src2, $dst}",
- [(set GR8:$dst, (or GR8:$src1, (load addr:$src2)))]>;
-def OR16rm : Pseudo<(outs GR16:$dst), (ins GR16:$src1, memsrc:$src2),
- "bis.w\t{$src2, $dst}",
- [(set GR16:$dst, (or GR16:$src1, (load addr:$src2)))]>;
-
-let isTwoAddress = 0 in {
-def OR8mr : Pseudo<(outs), (ins memdst:$dst, GR8:$src),
- "bis.b\t{$src, $dst}",
- [(store (or (load addr:$dst), GR8:$src), addr:$dst)]>;
-def OR16mr : Pseudo<(outs), (ins memdst:$dst, GR16:$src),
- "bis.w\t{$src, $dst}",
- [(store (or (load addr:$dst), GR16:$src), addr:$dst)]>;
-
-def OR8mi : Pseudo<(outs), (ins memdst:$dst, i8imm:$src),
- "bis.b\t{$src, $dst}",
- [(store (or (load addr:$dst), (i8 imm:$src)), addr:$dst)]>;
-def OR16mi : Pseudo<(outs), (ins memdst:$dst, i16imm:$src),
- "bis.w\t{$src, $dst}",
- [(store (or (load addr:$dst), (i16 imm:$src)), addr:$dst)]>;
-
-def OR8mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
- "bis.b\t{$src, $dst}",
- [(store (or (i8 (load addr:$dst)),
- (i8 (load addr:$src))), addr:$dst)]>;
-def OR16mm : Pseudo<(outs), (ins memdst:$dst, memsrc:$src),
- "bis.w\t{$src, $dst}",
- [(store (or (i16 (load addr:$dst)),
- (i16 (load addr:$src))), addr:$dst)]>;
-}
-
} // isTwoAddress = 1
// Integer comparisons
@@ -851,3 +933,6 @@ def : Pat<(store (subc (load addr:$dst), GR8:$src), addr:$dst),
(SUB8mr addr:$dst, GR8:$src)>;
def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
(SUB8mm addr:$dst, addr:$src)>;
+
+// peephole patterns
+def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
diff --git a/lib/Target/MSP430/MSP430MCAsmInfo.cpp b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
index 069313e..4e3a8d0 100644
--- a/lib/Target/MSP430/MSP430MCAsmInfo.cpp
+++ b/lib/Target/MSP430/MSP430MCAsmInfo.cpp
@@ -15,6 +15,12 @@
using namespace llvm;
MSP430MCAsmInfo::MSP430MCAsmInfo(const Target &T, const StringRef &TT) {
+ PrivateGlobalPrefix = ".L";
+ WeakRefDirective ="\t.weak\t";
+ SetDirective = "\t.set\t";
+ PCSymbol=".";
+
AlignmentIsInBytes = false;
AllowNameToStartWithDigit = true;
+ UsesELFSectionDirectiveForBSS = true;
}
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 1a5893e..92baad9 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -212,7 +212,7 @@ MSP430RegisterInfo::processFunctionBeforeFrameFinalized(MachineFunction &MF)
const {
// Create a frame entry for the FPW register that must be saved.
if (hasFP(MF)) {
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4);
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(2, -4, true, false);
assert(FrameIdx == MF.getFrameInfo()->getObjectIndexBegin() &&
"Slot for FPW register must be last in order to be found!");
FrameIdx = 0;
@@ -355,7 +355,7 @@ unsigned MSP430RegisterInfo::getRARegister() const {
return MSP430::PCW;
}
-unsigned MSP430RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned MSP430RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return hasFP(MF) ? MSP430::FPW : MSP430::SPW;
}
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.h b/lib/Target/MSP430/MSP430RegisterInfo.h
index 5f3a216..aa08787 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.h
+++ b/lib/Target/MSP430/MSP430RegisterInfo.h
@@ -60,7 +60,7 @@ public:
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
//! Get DWARF debugging register number
int getDwarfRegNum(unsigned RegNum, bool isEH) const;
diff --git a/lib/Target/MSP430/MSP430TargetMachine.cpp b/lib/Target/MSP430/MSP430TargetMachine.cpp
index da54507..14db406 100644
--- a/lib/Target/MSP430/MSP430TargetMachine.cpp
+++ b/lib/Target/MSP430/MSP430TargetMachine.cpp
@@ -32,7 +32,7 @@ MSP430TargetMachine::MSP430TargetMachine(const Target &T,
LLVMTargetMachine(T, TT),
Subtarget(TT, FS),
// FIXME: Check TargetData string.
- DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32"),
+ DataLayout("e-p:16:16:16-i8:8:8-i16:16:16-i32:16:32-n8:16"),
InstrInfo(*this), TLInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsDown, 2, -2) { }
diff --git a/lib/Target/MSP430/README.txt b/lib/Target/MSP430/README.txt
index b14e93d..5b9634b 100644
--- a/lib/Target/MSP430/README.txt
+++ b/lib/Target/MSP430/README.txt
@@ -11,8 +11,6 @@ available pretty soon.
Some things are incomplete / not implemented yet (this list surely is not
complete as well):
-0. Implement asmprinting for variables :)
-
1. Verify, how stuff is handling implicit zext with 8 bit operands (this might
be modelled currently in improper way - should we need to mark the superreg as
def for every 8 bit instruction?).
diff --git a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
index 66ade89..4898fae 100644
--- a/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
+++ b/lib/Target/Mips/AsmPrinter/MipsAsmPrinter.cpp
@@ -282,7 +282,7 @@ bool MipsAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// Print the assembly for the instruction.
printInstruction(II);
- if (VerboseAsm && !II->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*II);
O << '\n';
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index 810dce1..cbcedb8 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -108,7 +108,6 @@ private:
/// InstructionSelect - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
void MipsDAGToDAGISel::InstructionSelect() {
- DEBUG(BB->dump());
// Codegen the basic block.
DEBUG(errs() << "===== Instruction selection begins:\n");
DEBUG(Indent = 0);
@@ -171,6 +170,27 @@ SelectAddr(SDValue Op, SDValue Addr, SDValue &Offset, SDValue &Base)
return true;
}
}
+
+ // When loading from constant pools, load the lower address part in
+ // the instruction itself. Instead of:
+ // lui $2, %hi($CPI1_0)
+ // addiu $2, $2, %lo($CPI1_0)
+ // lwc1 $f0, 0($2)
+ // Generate:
+ // lui $2, %hi($CPI1_0)
+ // lwc1 $f0, %lo($CPI1_0)($2)
+ if (Addr.getOperand(0).getOpcode() == MipsISD::Hi &&
+ Addr.getOperand(1).getOpcode() == MipsISD::Lo) {
+ SDValue LoVal = Addr.getOperand(1);
+ if (ConstantPoolSDNode *CP = dyn_cast<ConstantPoolSDNode>(
+ LoVal.getOperand(0))) {
+ if (!CP->getOffset()) {
+ Base = Addr.getOperand(0);
+ Offset = LoVal.getOperand(0);
+ return true;
+ }
+ }
+ }
}
Base = Addr;
@@ -315,6 +335,16 @@ SDNode* MipsDAGToDAGISel::Select(SDValue N) {
case ISD::GLOBAL_OFFSET_TABLE:
return getGlobalBaseReg();
+ case ISD::ConstantFP: {
+ ConstantFPSDNode *CN = dyn_cast<ConstantFPSDNode>(N);
+ if (N.getValueType() == MVT::f64 && CN->isExactlyValue(+0.0)) {
+ SDValue Zero = CurDAG->getRegister(Mips::ZERO, MVT::i32);
+ ReplaceUses(N, Zero);
+ return Zero.getNode();
+ }
+ break;
+ }
+
/// Handle direct and indirect calls when using PIC. On PIC, when
/// GOT is smaller than about 64k (small code) the GA target is
/// loaded with only one instruction. Otherwise GA's target must
diff --git a/lib/Target/Mips/MipsISelLowering.cpp b/lib/Target/Mips/MipsISelLowering.cpp
index 61da8f8..c9a43b4 100644
--- a/lib/Target/Mips/MipsISelLowering.cpp
+++ b/lib/Target/Mips/MipsISelLowering.cpp
@@ -568,7 +568,7 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
Constant *C = N->getConstVal();
SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
- MipsII::MO_ABS_HILO);
+ N->getOffset(), MipsII::MO_ABS_HILO);
// FIXME there isn't actually debug info here
DebugLoc dl = Op.getDebugLoc();
@@ -704,7 +704,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// the stack (even if less than 4 are used as arguments)
if (Subtarget->isABI_O32()) {
int VTsize = EVT(MVT::i32).getSizeInBits()/8;
- MFI->CreateFixedObject(VTsize, (VTsize*3));
+ MFI->CreateFixedObject(VTsize, (VTsize*3), true, false);
CCInfo.AnalyzeCallOperands(Outs, CC_MipsO32);
} else
CCInfo.AnalyzeCallOperands(Outs, CC_Mips);
@@ -773,7 +773,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// if O32 ABI is used. For EABI the first address is zero.
LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
- LastArgStackLoc);
+ LastArgStackLoc, true, false);
SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
@@ -849,7 +849,7 @@ MipsTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create the frame index only once. SPOffset here can be anything
// (this will be fixed on processFunctionBeforeFrameFinalized)
if (MipsFI->getGPStackOffset() == -1) {
- FI = MFI->CreateFixedObject(4, 0);
+ FI = MFI->CreateFixedObject(4, 0, true, false);
MipsFI->setGPFI(FI);
}
MipsFI->setGPStackOffset(LastArgStackLoc);
@@ -1002,7 +1002,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// be used on emitPrologue) to avoid mis-calc of the first stack
// offset on PEI::calculateFrameObjectOffsets.
// Arguments are always 32-bit.
- int FI = MFI->CreateFixedObject(4, 0);
+ int FI = MFI->CreateFixedObject(4, 0, true, false);
MipsFI->recordStoreVarArgsFI(FI, -(4+(i*4)));
SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
@@ -1025,7 +1025,7 @@ MipsTargetLowering::LowerFormalArguments(SDValue Chain,
// offset on PEI::calculateFrameObjectOffsets.
// Arguments are always 32-bit.
unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
- int FI = MFI->CreateFixedObject(ArgSize, 0);
+ int FI = MFI->CreateFixedObject(ArgSize, 0, true, false);
MipsFI->recordLoadArgsFI(FI, -(ArgSize+
(FirstStackArgLoc + VA.getLocMemOffset())));
diff --git a/lib/Target/Mips/MipsInstrFPU.td b/lib/Target/Mips/MipsInstrFPU.td
index bd61738..ce89cfd 100644
--- a/lib/Target/Mips/MipsInstrFPU.td
+++ b/lib/Target/Mips/MipsInstrFPU.td
@@ -48,6 +48,7 @@ let PrintMethod = "printFCCOperand" in
def In32BitMode : Predicate<"!Subtarget.isFP64bit()">;
def IsSingleFloat : Predicate<"Subtarget.isSingleFloat()">;
def IsNotSingleFloat : Predicate<"!Subtarget.isSingleFloat()">;
+def IsNotMipsI : Predicate<"!Subtarget.isMips1()">;
//===----------------------------------------------------------------------===//
// Instruction Class Templates
@@ -173,7 +174,7 @@ let fd = 0 in {
}
/// Floating Point Memory Instructions
-let Predicates = [IsNotSingleFloat] in {
+let Predicates = [IsNotSingleFloat, IsNotMipsI] in {
def LDC1 : FFI<0b110101, (outs AFGR64:$ft), (ins mem:$addr),
"ldc1 $ft, $addr", [(set AFGR64:$ft, (load addr:$addr))]>;
@@ -284,7 +285,12 @@ def fpimm0 : PatLeaf<(fpimm), [{
return N->isExactlyValue(+0.0);
}]>;
+def fpimm0neg : PatLeaf<(fpimm), [{
+ return N->isExactlyValue(-0.0);
+}]>;
+
def : Pat<(f32 fpimm0), (MTC1 ZERO)>;
+def : Pat<(f32 fpimm0neg), (FNEG_S32 (MTC1 ZERO))>;
def : Pat<(f32 (sint_to_fp CPURegs:$src)), (CVTS_W32 (MTC1 CPURegs:$src))>;
def : Pat<(f64 (sint_to_fp CPURegs:$src)), (CVTD_W32 (MTC1 CPURegs:$src))>;
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 9159904..af64c9f 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -134,6 +134,9 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
const TargetRegisterClass *DestRC,
const TargetRegisterClass *SrcRC) const {
DebugLoc DL = DebugLoc::getUnknownLoc();
+ const MachineFunction *MF = MBB.getParent();
+ const TargetRegisterInfo *TRI = MF->getTarget().getRegisterInfo();
+
if (I != MBB.end()) DL = I->getDebugLoc();
if (DestRC != SrcRC) {
@@ -153,6 +156,13 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
else if ((DestRC == Mips::FGR32RegisterClass) &&
(SrcRC == Mips::CPURegsRegisterClass))
BuildMI(MBB, I, DL, get(Mips::MTC1), DestReg).addReg(SrcReg);
+ else if ((DestRC == Mips::AFGR64RegisterClass) &&
+ (SrcRC == Mips::CPURegsRegisterClass) &&
+ (SrcReg == Mips::ZERO)) {
+ const unsigned *AliasSet = TRI->getAliasSet(DestReg);
+ BuildMI(MBB, I, DL, get(Mips::MTC1), AliasSet[0]).addReg(SrcReg);
+ BuildMI(MBB, I, DL, get(Mips::MTC1), AliasSet[1]).addReg(SrcReg);
+ }
// Move from/to Hi/Lo registers
else if ((DestRC == Mips::HILORegisterClass) &&
@@ -163,9 +173,8 @@ copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
(DestRC == Mips::CPURegsRegisterClass)) {
unsigned Opc = (SrcReg == Mips::HI) ? Mips::MFHI : Mips::MFLO;
BuildMI(MBB, I, DL, get(Opc), DestReg);
-
- // Can't copy this register
- } else
+ } else
+ // Can't copy this register
return false;
return true;
diff --git a/lib/Target/Mips/MipsMachineFunction.h b/lib/Target/Mips/MipsMachineFunction.h
index 949c78a..a300f49 100644
--- a/lib/Target/Mips/MipsMachineFunction.h
+++ b/lib/Target/Mips/MipsMachineFunction.h
@@ -103,6 +103,7 @@ public:
int getGPFI() const { return GPHolder.FI; }
void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
void setGPFI(int FI) { GPHolder.FI = FI; }
+ bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
bool hasLoadArgs() const { return HasLoadArgs; }
bool hasStoreVarArgs() const { return HasStoreVarArgs; }
diff --git a/lib/Target/Mips/MipsRegisterInfo.cpp b/lib/Target/Mips/MipsRegisterInfo.cpp
index d2289e9..ad326db 100644
--- a/lib/Target/Mips/MipsRegisterInfo.cpp
+++ b/lib/Target/Mips/MipsRegisterInfo.cpp
@@ -287,7 +287,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
}
if (hasFP(MF)) {
- MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize),
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
StackOffset);
MipsFI->setFPStackOffset(StackOffset);
TopCPUSavedRegOff = StackOffset;
@@ -295,7 +295,7 @@ void MipsRegisterInfo::adjustMipsStackFrame(MachineFunction &MF) const
}
if (MFI->hasCalls()) {
- MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize),
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
StackOffset);
MipsFI->setRAStackOffset(StackOffset);
TopCPUSavedRegOff = StackOffset;
@@ -438,11 +438,10 @@ emitPrologue(MachineFunction &MF) const
.addReg(Mips::SP).addReg(Mips::ZERO);
}
- // PIC speficic function prologue
- if ((isPIC) && (MFI->hasCalls())) {
+ // Restore GP from the saved stack location
+ if (MipsFI->needGPSaveRestore())
BuildMI(MBB, MBBI, dl, TII.get(Mips::CPRESTORE))
.addImm(MipsFI->getGPStackOffset());
- }
}
void MipsRegisterInfo::
@@ -489,13 +488,11 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const
void MipsRegisterInfo::
processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
- // Set the SPOffset on the FI where GP must be saved/loaded.
+ // Set the stack offset where GP must be saved/loaded from.
MachineFrameInfo *MFI = MF.getFrameInfo();
- bool isPIC = (MF.getTarget().getRelocationModel() == Reloc::PIC_);
- if (MFI->hasCalls() && isPIC) {
- MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ MipsFunctionInfo *MipsFI = MF.getInfo<MipsFunctionInfo>();
+ if (MipsFI->needGPSaveRestore())
MFI->setObjectOffset(MipsFI->getGPFI(), MipsFI->getGPStackOffset());
- }
}
unsigned MipsRegisterInfo::
@@ -504,7 +501,7 @@ getRARegister() const {
}
unsigned MipsRegisterInfo::
-getFrameRegister(MachineFunction &MF) const {
+getFrameRegister(const MachineFunction &MF) const {
return hasFP(MF) ? Mips::FP : Mips::SP;
}
diff --git a/lib/Target/Mips/MipsRegisterInfo.h b/lib/Target/Mips/MipsRegisterInfo.h
index 122f786..5b45921 100644
--- a/lib/Target/Mips/MipsRegisterInfo.h
+++ b/lib/Target/Mips/MipsRegisterInfo.h
@@ -65,7 +65,7 @@ struct MipsRegisterInfo : public MipsGenRegisterInfo {
/// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
/// Exception handling queries.
unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/Mips/MipsTargetMachine.cpp b/lib/Target/Mips/MipsTargetMachine.cpp
index 4fa5450..b3c2313 100644
--- a/lib/Target/Mips/MipsTargetMachine.cpp
+++ b/lib/Target/Mips/MipsTargetMachine.cpp
@@ -38,8 +38,8 @@ MipsTargetMachine(const Target &T, const std::string &TT, const std::string &FS,
bool isLittle=false):
LLVMTargetMachine(T, TT),
Subtarget(TT, FS, isLittle),
- DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32") :
- std::string("E-p:32:32:32-i8:8:32-i16:16:32")),
+ DataLayout(isLittle ? std::string("e-p:32:32:32-i8:8:32-i16:16:32-n32") :
+ std::string("E-p:32:32:32-i8:8:32-i16:16:32-n32")),
InstrInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
TLInfo(*this) {
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index b2a4c11..e1f2587 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -46,7 +46,7 @@ PIC16AsmPrinter::PIC16AsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
bool PIC16AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
processDebugLoc(MI, true);
printInstruction(MI);
- if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*MI);
O << '\n';
processDebugLoc(MI, false);
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
index cc57d12..e13e6cd 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
@@ -30,7 +30,6 @@ FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) {
/// InstructionSelect - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
void PIC16DAGToDAGISel::InstructionSelect() {
- DEBUG(BB->dump());
SelectRoot(*CurDAG);
CurDAG->RemoveDeadNodes();
}
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index 635befe..71c3d37 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -1070,7 +1070,7 @@ SDValue PIC16TargetLowering::ConvertToMemOperand(SDValue Op,
// Put the value on stack.
// Get a stack slot index and convert to es.
- int FI = MF.getFrameInfo()->CreateStackObject(1, 1);
+ int FI = MF.getFrameInfo()->CreateStackObject(1, 1, false);
const char *tmpName = createESName(PAN::getTempdataLabel(FuncName));
SDValue ES = DAG.getTargetExternalSymbol(tmpName, MVT::i8);
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.cpp b/lib/Target/PIC16/PIC16RegisterInfo.cpp
index 47087ab..8ba9a1d 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.cpp
+++ b/lib/Target/PIC16/PIC16RegisterInfo.cpp
@@ -72,7 +72,7 @@ getDwarfRegNum(unsigned RegNum, bool isEH) const {
return -1;
}
-unsigned PIC16RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned PIC16RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
llvm_unreachable("PIC16 Does not have any frame register");
return 0;
}
diff --git a/lib/Target/PIC16/PIC16RegisterInfo.h b/lib/Target/PIC16/PIC16RegisterInfo.h
index 8aa5a10..1d5dbbf 100644
--- a/lib/Target/PIC16/PIC16RegisterInfo.h
+++ b/lib/Target/PIC16/PIC16RegisterInfo.h
@@ -59,7 +59,7 @@ class PIC16RegisterInfo : public PIC16GenRegisterInfo {
virtual void emitPrologue(MachineFunction &MF) const;
virtual void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
virtual int getDwarfRegNum(unsigned RegNum, bool isEH) const;
- virtual unsigned getFrameRegister(MachineFunction &MF) const;
+ virtual unsigned getFrameRegister(const MachineFunction &MF) const;
virtual unsigned getRARegister() const;
};
diff --git a/lib/Target/PIC16/PIC16TargetMachine.cpp b/lib/Target/PIC16/PIC16TargetMachine.cpp
index 08307e7..e2acb85 100644
--- a/lib/Target/PIC16/PIC16TargetMachine.cpp
+++ b/lib/Target/PIC16/PIC16TargetMachine.cpp
@@ -34,7 +34,7 @@ PIC16TargetMachine::PIC16TargetMachine(const Target &T, const std::string &TT,
const std::string &FS, bool Trad)
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS, Trad),
- DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8"),
+ DataLayout("e-p:16:8:8-i8:8:8-i16:8:8-i32:8:8-n8"),
InstrInfo(*this), TLInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0) { }
diff --git a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
index 2dac18f..aae4607 100644
--- a/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
+++ b/lib/Target/PowerPC/AsmPrinter/PPCAsmPrinter.cpp
@@ -594,7 +594,7 @@ void PPCAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
printInstruction(MI);
- if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*MI);
O << '\n';
@@ -672,14 +672,14 @@ bool PPCLinuxAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
O << "\t.size\t" << CurrentFnName << ",.-" << CurrentFnName << '\n';
- // Print out jump tables referenced by the function.
- EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
-
OutStreamer.SwitchSection(getObjFileLowering().SectionForGlobal(F, Mang, TM));
// Emit post-function debug information.
DW->EndFunction(&MF);
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
// We didn't modify anything.
return false;
}
@@ -853,12 +853,12 @@ bool PPCDarwinAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
}
}
- // Print out jump tables referenced by the function.
- EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
-
// Emit post-function debug information.
DW->EndFunction(&MF);
+ // Print out jump tables referenced by the function.
+ EmitJumpTableInfo(MF.getJumpTableInfo(), MF);
+
// We didn't modify anything.
return false;
}
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index b866240..fb9a240 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -187,8 +187,6 @@ private:
/// InstructionSelect - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
void PPCDAGToDAGISel::InstructionSelect() {
- DEBUG(BB->dump());
-
// Select target instructions for the DAG.
SelectRoot(*CurDAG);
CurDAG->RemoveDeadNodes();
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index 7f48ef0..099fcb5 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -637,7 +637,7 @@ bool PPC::isAllNegativeZeroVector(SDNode *N) {
unsigned BitSize;
bool HasAnyUndefs;
- if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32))
+ if (BV->isConstantSplat(APVal, APUndef, BitSize, HasAnyUndefs, 32, true))
if (ConstantFPSDNode *CFP = dyn_cast<ConstantFPSDNode>(N->getOperand(0)))
return CFP->getValueAPF().isNegZero();
@@ -1625,7 +1625,7 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
unsigned ArgSize = VA.getLocVT().getSizeInBits() / 8;
int FI = MFI->CreateFixedObject(ArgSize, VA.getLocMemOffset(),
- isImmutable);
+ isImmutable, false);
// Create load nodes to retrieve arguments from the stack.
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
@@ -1690,9 +1690,10 @@ PPCTargetLowering::LowerFormalArguments_SVR4(
NumFPArgRegs * EVT(MVT::f64).getSizeInBits()/8;
VarArgsStackOffset = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
- CCInfo.getNextStackOffset());
+ CCInfo.getNextStackOffset(),
+ true, false);
- VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8);
+ VarArgsFrameIndex = MFI->CreateStackObject(Depth, 8, false);
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
// The fixed integer arguments of a variadic function are
@@ -1895,7 +1896,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
CurArgOffset = CurArgOffset + (4 - ObjSize);
}
// The value of the object is its address.
- int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset);
+ int FI = MFI->CreateFixedObject(ObjSize, CurArgOffset, true, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
InVals.push_back(FIN);
if (ObjSize==1 || ObjSize==2) {
@@ -1918,7 +1919,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
// the object.
if (GPR_idx != Num_GPR_Regs) {
unsigned VReg = MF.addLiveIn(GPR[GPR_idx], &PPC::GPRCRegClass);
- int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset);
+ int FI = MFI->CreateFixedObject(PtrByteSize, ArgOffset, true, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
SDValue Val = DAG.getCopyFromReg(Chain, dl, VReg, PtrVT);
SDValue Store = DAG.getStore(Val.getValue(1), dl, Val, FIN, NULL, 0);
@@ -2043,7 +2044,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
if (needsLoad) {
int FI = MFI->CreateFixedObject(ObjSize,
CurArgOffset + (ArgSize - ObjSize),
- isImmutable);
+ isImmutable, false);
SDValue FIN = DAG.getFrameIndex(FI, PtrVT);
ArgVal = DAG.getLoad(ObjectVT, dl, Chain, FIN, NULL, 0);
}
@@ -2076,7 +2077,7 @@ PPCTargetLowering::LowerFormalArguments_Darwin(
int Depth = ArgOffset;
VarArgsFrameIndex = MFI->CreateFixedObject(PtrVT.getSizeInBits()/8,
- Depth);
+ Depth, true, false);
SDValue FIN = DAG.getFrameIndex(VarArgsFrameIndex, PtrVT);
// If this function is vararg, store any remaining integer argument regs
@@ -2289,7 +2290,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
int NewRetAddrLoc = SPDiff + PPCFrameInfo::getReturnSaveOffset(isPPC64,
isDarwinABI);
int NewRetAddr = MF.getFrameInfo()->CreateFixedObject(SlotSize,
- NewRetAddrLoc);
+ NewRetAddrLoc,
+ true, false);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewRetAddr, VT);
Chain = DAG.getStore(Chain, dl, OldRetAddr, NewRetAddrFrIdx,
@@ -2300,7 +2302,8 @@ static SDValue EmitTailCallStoreFPAndRetAddr(SelectionDAG &DAG,
if (isDarwinABI) {
int NewFPLoc =
SPDiff + PPCFrameInfo::getFramePointerSaveOffset(isPPC64, isDarwinABI);
- int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc);
+ int NewFPIdx = MF.getFrameInfo()->CreateFixedObject(SlotSize, NewFPLoc,
+ true, false);
SDValue NewFramePtrIdx = DAG.getFrameIndex(NewFPIdx, VT);
Chain = DAG.getStore(Chain, dl, OldFP, NewFramePtrIdx,
PseudoSourceValue::getFixedStack(NewFPIdx), 0);
@@ -2317,7 +2320,7 @@ CalculateTailCallArgDest(SelectionDAG &DAG, MachineFunction &MF, bool isPPC64,
SmallVector<TailCallArgumentInfo, 8>& TailCallArguments) {
int Offset = ArgOffset + SPDiff;
uint32_t OpSize = (Arg.getValueType().getSizeInBits()+7)/8;
- int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ int FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true,false);
EVT VT = isPPC64 ? MVT::i64 : MVT::i32;
SDValue FIN = DAG.getFrameIndex(FI, VT);
TailCallArgumentInfo Info;
@@ -3224,7 +3227,8 @@ PPCTargetLowering::getReturnAddrFrameIndex(SelectionDAG & DAG) const {
// Find out what the fix offset of the frame pointer save area.
int LROffset = PPCFrameInfo::getReturnSaveOffset(IsPPC64, isDarwinABI);
// Allocate the frame index for frame pointer save area.
- RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset);
+ RASI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, LROffset,
+ true, false);
// Save the result.
FI->setReturnAddrSaveIndex(RASI);
}
@@ -3250,7 +3254,8 @@ PPCTargetLowering::getFramePointerFrameIndex(SelectionDAG & DAG) const {
isDarwinABI);
// Allocate the frame index for frame pointer save area.
- FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
+ FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset,
+ true, false);
// Save the result.
FI->setFramePointerSaveIndex(FPSI);
}
@@ -3411,7 +3416,7 @@ SDValue PPCTargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
// then lfd it and fcfid it.
MachineFunction &MF = DAG.getMachineFunction();
MachineFrameInfo *FrameInfo = MF.getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(8, 8);
+ int FrameIdx = FrameInfo->CreateStackObject(8, 8, false);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
@@ -3469,7 +3474,7 @@ SDValue PPCTargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
SDValue Chain = DAG.getNode(PPCISD::MFFS, dl, NodeTys, &InFlag, 0);
// Save FP register to stack slot
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, PtrVT);
SDValue Store = DAG.getStore(DAG.getEntryNode(), dl, Chain,
StackSlot, NULL, 0);
@@ -3667,7 +3672,7 @@ SDValue PPCTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
unsigned SplatBitSize;
bool HasAnyUndefs;
if (! BVN->isConstantSplat(APSplatBits, APSplatUndef, SplatBitSize,
- HasAnyUndefs) || SplatBitSize > 32)
+ HasAnyUndefs, 0, true) || SplatBitSize > 32)
return SDValue();
unsigned SplatBits = APSplatBits.getZExtValue();
@@ -4137,7 +4142,7 @@ SDValue PPCTargetLowering::LowerSCALAR_TO_VECTOR(SDValue Op,
DebugLoc dl = Op.getDebugLoc();
// Create a stack slot that is 16-byte aligned.
MachineFrameInfo *FrameInfo = DAG.getMachineFunction().getFrameInfo();
- int FrameIdx = FrameInfo->CreateStackObject(16, 16);
+ int FrameIdx = FrameInfo->CreateStackObject(16, 16, false);
EVT PtrVT = DAG.getTargetLoweringInfo().getPointerTy();
SDValue FIdx = DAG.getFrameIndex(FrameIdx, PtrVT);
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.cpp b/lib/Target/PowerPC/PPCRegisterInfo.cpp
index cf5c7c0..e65e644 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.cpp
+++ b/lib/Target/PowerPC/PPCRegisterInfo.cpp
@@ -1043,7 +1043,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FPOffset = PPCFrameInfo::getFramePointerSaveOffset(IsPPC64,
isDarwinABI);
// Allocate the frame index for frame pointer save area.
- FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset);
+ FPSI = MF.getFrameInfo()->CreateFixedObject(IsPPC64? 8 : 4, FPOffset,
+ true, false);
// Save the result.
FI->setFramePointerSaveIndex(FPSI);
}
@@ -1051,7 +1052,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Reserve stack space to move the linkage area to in case of a tail call.
int TCSPDelta = 0;
if (PerformTailCallOpt && (TCSPDelta = FI->getTailCallSPDelta()) < 0) {
- MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta);
+ MF.getFrameInfo()->CreateFixedObject(-1 * TCSPDelta, TCSPDelta,
+ true, false);
}
// Reserve a slot closest to SP or frame pointer if we have a dynalloc or
@@ -1067,7 +1069,8 @@ PPCRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
const TargetRegisterClass *G8RC = &PPC::G8RCRegClass;
const TargetRegisterClass *RC = IsPPC64 ? G8RC : GPRC;
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment()));
+ RC->getAlignment(),
+ false));
}
}
@@ -1356,12 +1359,6 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
unsigned TargetAlign = MF.getTarget().getFrameInfo()->getStackAlignment();
unsigned MaxAlign = MFI->getMaxAlignment();
- if (needsFrameMoves) {
- // Mark effective beginning of when frame pointer becomes valid.
- FrameLabelId = MMI->NextLabelID();
- BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId);
- }
-
// Adjust stack pointer: r1 += NegFrameSize.
// If there is a preferred stack alignment, align R1 now
if (!IsPPC64) {
@@ -1431,12 +1428,18 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::X0);
}
}
+
+ std::vector<MachineMove> &Moves = MMI->getFrameMoves();
+ // Add the "machine moves" for the instructions we generated above, but in
+ // reverse order.
if (needsFrameMoves) {
- std::vector<MachineMove> &Moves = MMI->getFrameMoves();
-
+ // Mark effective beginning of when frame pointer becomes valid.
+ FrameLabelId = MMI->NextLabelID();
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(FrameLabelId);
+
+ // Show update of SP.
if (NegFrameSize) {
- // Show update of SP.
MachineLocation SPDst(MachineLocation::VirtualFP);
MachineLocation SPSrc(MachineLocation::VirtualFP, NegFrameSize);
Moves.push_back(MachineMove(FrameLabelId, SPDst, SPSrc));
@@ -1451,31 +1454,15 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
Moves.push_back(MachineMove(FrameLabelId, FPDst, FPSrc));
}
- // Add callee saved registers to move list.
- const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
- for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
- int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
- unsigned Reg = CSI[I].getReg();
- if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
- MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
- MachineLocation CSSrc(Reg);
- Moves.push_back(MachineMove(FrameLabelId, CSDst, CSSrc));
+ if (MustSaveLR) {
+ MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
+ MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR);
+ Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc));
}
-
- MachineLocation LRDst(MachineLocation::VirtualFP, LROffset);
- MachineLocation LRSrc(IsPPC64 ? PPC::LR8 : PPC::LR);
- Moves.push_back(MachineMove(FrameLabelId, LRDst, LRSrc));
-
- // Mark effective beginning of when frame pointer is ready.
- unsigned ReadyLabelId = MMI->NextLabelID();
- BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId);
-
- MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) :
- (IsPPC64 ? PPC::X1 : PPC::R1));
- MachineLocation FPSrc(MachineLocation::VirtualFP);
- Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
}
+ unsigned ReadyLabelId = 0;
+
// If there is a frame pointer, copy R1 into R31
if (HasFP) {
if (!IsPPC64) {
@@ -1487,6 +1474,33 @@ PPCRegisterInfo::emitPrologue(MachineFunction &MF) const {
.addReg(PPC::X1)
.addReg(PPC::X1);
}
+
+ if (needsFrameMoves) {
+ ReadyLabelId = MMI->NextLabelID();
+
+ // Mark effective beginning of when frame pointer is ready.
+ BuildMI(MBB, MBBI, dl, TII.get(PPC::DBG_LABEL)).addImm(ReadyLabelId);
+
+ MachineLocation FPDst(HasFP ? (IsPPC64 ? PPC::X31 : PPC::R31) :
+ (IsPPC64 ? PPC::X1 : PPC::R1));
+ MachineLocation FPSrc(MachineLocation::VirtualFP);
+ Moves.push_back(MachineMove(ReadyLabelId, FPDst, FPSrc));
+ }
+ }
+
+ if (needsFrameMoves) {
+ unsigned LabelId = HasFP ? ReadyLabelId : FrameLabelId;
+
+ // Add callee saved registers to move list.
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned I = 0, E = CSI.size(); I != E; ++I) {
+ int Offset = MFI->getObjectOffset(CSI[I].getFrameIdx());
+ unsigned Reg = CSI[I].getReg();
+ if (Reg == PPC::LR || Reg == PPC::LR8 || Reg == PPC::RM) continue;
+ MachineLocation CSDst(MachineLocation::VirtualFP, Offset);
+ MachineLocation CSSrc(Reg);
+ Moves.push_back(MachineMove(LabelId, CSDst, CSSrc));
+ }
}
}
@@ -1700,7 +1714,7 @@ unsigned PPCRegisterInfo::getRARegister() const {
return !Subtarget.isPPC64() ? PPC::LR : PPC::LR8;
}
-unsigned PPCRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned PPCRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
if (!Subtarget.isPPC64())
return hasFP(MF) ? PPC::R31 : PPC::R1;
else
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.h b/lib/Target/PowerPC/PPCRegisterInfo.h
index 1689bc2..3aeed80 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.h
+++ b/lib/Target/PowerPC/PPCRegisterInfo.h
@@ -83,7 +83,7 @@ public:
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
void getInitialFrameState(std::vector<MachineMove> &Moves) const;
// Exception handling queries.
diff --git a/lib/Target/PowerPC/PPCSubtarget.h b/lib/Target/PowerPC/PPCSubtarget.h
index 02c8ad7..75fcf62 100644
--- a/lib/Target/PowerPC/PPCSubtarget.h
+++ b/lib/Target/PowerPC/PPCSubtarget.h
@@ -101,8 +101,8 @@ public:
const char *getTargetDataString() const {
// Note, the alignment values for f64 and i64 on ppc64 in Darwin
// documentation are wrong; these are correct (i.e. "what gcc does").
- return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128"
- : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128";
+ return isPPC64() ? "E-p:64:64-f64:64:64-i64:64:64-f128:64:128-n32:64"
+ : "E-p:32:32-f64:32:64-i64:32:64-f128:64:128-n32";
}
/// isPPC64 - Return true if we are generating code for 64-bit pointer mode.
diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp
index 3371954..8079c6e 100644
--- a/lib/Target/PowerPC/PPCTargetMachine.cpp
+++ b/lib/Target/PowerPC/PPCTargetMachine.cpp
@@ -20,8 +20,7 @@
#include "llvm/Support/FormattedStream.h"
using namespace llvm;
-static const MCAsmInfo *createMCAsmInfo(const Target &T,
- const StringRef &TT) {
+static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
bool isPPC64 = TheTriple.getArch() == Triple::ppc64;
if (TheTriple.getOS() == Triple::Darwin)
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index a345d3d..aad621f 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -339,6 +339,8 @@ we don't have whole-function selection dags. On x86, this means we use one
extra register for the function when effective_addr2 is declared as U64 than
when it is declared U32.
+PHI Slicing could be extended to do this.
+
//===---------------------------------------------------------------------===//
LSR should know what GPR types a target has. This code:
@@ -406,22 +408,6 @@ return: ; preds = %then.1, %else.0, %then.0
//===---------------------------------------------------------------------===//
-Tail recursion elimination is not transforming this function, because it is
-returning n, which fails the isDynamicConstant check in the accumulator
-recursion checks.
-
-long long fib(const long long n) {
- switch(n) {
- case 0:
- case 1:
- return n;
- default:
- return fib(n-1) + fib(n-2);
- }
-}
-
-//===---------------------------------------------------------------------===//
-
Tail recursion elimination should handle:
int pow2m1(int n) {
@@ -1229,6 +1215,40 @@ GCC PR33344 is a similar case.
//===---------------------------------------------------------------------===//
+[PHI TRANSLATE INDEXED GEPs] PR5313
+
+Load redundancy elimination for simple loop. This loop:
+
+void append_text(const char* text,unsigned char * const io) {
+ while(*text)
+ *io=*text++;
+}
+
+Compiles to have a fully redundant load in the loop (%2):
+
+define void @append_text(i8* nocapture %text, i8* nocapture %io) nounwind {
+entry:
+ %0 = load i8* %text, align 1 ; <i8> [#uses=1]
+ %1 = icmp eq i8 %0, 0 ; <i1> [#uses=1]
+ br i1 %1, label %return, label %bb
+
+bb: ; preds = %bb, %entry
+ %indvar = phi i32 [ 0, %entry ], [ %tmp, %bb ] ; <i32> [#uses=2]
+ %text_addr.04 = getelementptr i8* %text, i32 %indvar ; <i8*> [#uses=1]
+ %2 = load i8* %text_addr.04, align 1 ; <i8> [#uses=1]
+ store i8 %2, i8* %io, align 1
+ %tmp = add i32 %indvar, 1 ; <i32> [#uses=2]
+ %scevgep = getelementptr i8* %text, i32 %tmp ; <i8*> [#uses=1]
+ %3 = load i8* %scevgep, align 1 ; <i8> [#uses=1]
+ %4 = icmp eq i8 %3, 0 ; <i1> [#uses=1]
+ br i1 %4, label %return, label %bb
+
+return: ; preds = %bb, %entry
+ ret void
+}
+
+//===---------------------------------------------------------------------===//
+
There are many load PRE testcases in testsuite/gcc.dg/tree-ssa/loadpre* in the
GCC testsuite. There are many pre testcases as ssa-pre-*.c
@@ -1594,12 +1614,6 @@ int int_char(char m) {if(m>7) return 0; return m;}
//===---------------------------------------------------------------------===//
-IPSCCP is propagating elements of first class aggregates, but is not propagating
-the entire aggregate itself. This leads it to miss opportunities, for example
-in test/Transforms/SCCP/ipsccp-basic.ll:test5b.
-
-//===---------------------------------------------------------------------===//
-
int func(int a, int b) { if (a & 0x80) b |= 0x80; else b &= ~0x80; return b; }
Generates this:
@@ -1668,3 +1682,55 @@ entry:
}
//===---------------------------------------------------------------------===//
+
+IPSCCP does not currently propagate argument dependent constants through
+functions where it does not not all of the callers. This includes functions
+with normal external linkage as well as templates, C99 inline functions etc.
+Specifically, it does nothing to:
+
+define i32 @test(i32 %x, i32 %y, i32 %z) nounwind {
+entry:
+ %0 = add nsw i32 %y, %z
+ %1 = mul i32 %0, %x
+ %2 = mul i32 %y, %z
+ %3 = add nsw i32 %1, %2
+ ret i32 %3
+}
+
+define i32 @test2() nounwind {
+entry:
+ %0 = call i32 @test(i32 1, i32 2, i32 4) nounwind
+ ret i32 %0
+}
+
+It would be interesting extend IPSCCP to be able to handle simple cases like
+this, where all of the arguments to a call are constant. Because IPSCCP runs
+before inlining, trivial templates and inline functions are not yet inlined.
+The results for a function + set of constant arguments should be memoized in a
+map.
+
+//===---------------------------------------------------------------------===//
+
+The libcall constant folding stuff should be moved out of SimplifyLibcalls into
+libanalysis' constantfolding logic. This would allow IPSCCP to be able to
+handle simple things like this:
+
+static int foo(const char *X) { return strlen(X); }
+int bar() { return foo("abcd"); }
+
+//===---------------------------------------------------------------------===//
+
+InstCombine should use SimplifyDemandedBits to remove the or instruction:
+
+define i1 @test(i8 %x, i8 %y) {
+ %A = or i8 %x, 1
+ %B = icmp ugt i8 %A, 3
+ ret i1 %B
+}
+
+Currently instcombine calls SimplifyDemandedBits with either all bits or just
+the sign bit, if the comparison is obviously a sign test. In this case, we only
+need all but the bottom two bits from %A, and if we gave that mask to SDB it
+would delete the or instruction for us.
+
+//===---------------------------------------------------------------------===//
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 452b46f..cd85dd4 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -126,7 +126,7 @@ bool SparcAsmPrinter::runOnMachineFunction(MachineFunction &MF) {
processDebugLoc(II, true);
printInstruction(II);
- if (VerboseAsm && !II->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*II);
O << '\n';
processDebugLoc(II, false);
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index a1a4a8e..b41917e 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -75,7 +75,6 @@ private:
/// InstructionSelect - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
void SparcDAGToDAGISel::InstructionSelect() {
- DEBUG(BB->dump());
CurBB = BB;
// Select target instructions for the DAG.
SelectRoot(*CurDAG);
diff --git a/lib/Target/Sparc/SparcISelLowering.cpp b/lib/Target/Sparc/SparcISelLowering.cpp
index 164770d..133f828 100644
--- a/lib/Target/Sparc/SparcISelLowering.cpp
+++ b/lib/Target/Sparc/SparcISelLowering.cpp
@@ -129,7 +129,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
}
InVals.push_back(Arg);
} else {
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+ true, false);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
SDValue Load;
if (ObjectVT == MVT::i32) {
@@ -163,7 +164,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
Arg = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::f32, Arg);
InVals.push_back(Arg);
} else {
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+ true, false);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
SDValue Load = DAG.getLoad(MVT::f32, dl, Chain, FIPtr, NULL, 0);
InVals.push_back(Load);
@@ -184,7 +186,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
MF.getRegInfo().addLiveIn(*CurArgReg++, VRegHi);
HiVal = DAG.getCopyFromReg(Chain, dl, VRegHi, MVT::i32);
} else {
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+ true, false);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
HiVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
}
@@ -195,7 +198,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
MF.getRegInfo().addLiveIn(*CurArgReg++, VRegLo);
LoVal = DAG.getCopyFromReg(Chain, dl, VRegLo, MVT::i32);
} else {
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4);
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset+4,
+ true, false);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
LoVal = DAG.getLoad(MVT::i32, dl, Chain, FIPtr, NULL, 0);
}
@@ -227,7 +231,8 @@ SparcTargetLowering::LowerFormalArguments(SDValue Chain,
MF.getRegInfo().addLiveIn(*CurArgReg, VReg);
SDValue Arg = DAG.getCopyFromReg(DAG.getRoot(), dl, VReg, MVT::i32);
- int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset);
+ int FrameIdx = MF.getFrameInfo()->CreateFixedObject(4, ArgOffset,
+ true, false);
SDValue FIPtr = DAG.getFrameIndex(FrameIdx, MVT::i32);
OutChains.push_back(DAG.getStore(DAG.getRoot(), dl, Arg, FIPtr, NULL, 0));
diff --git a/lib/Target/Sparc/SparcInstrInfo.td b/lib/Target/Sparc/SparcInstrInfo.td
index f2f1b96..d88d508 100644
--- a/lib/Target/Sparc/SparcInstrInfo.td
+++ b/lib/Target/Sparc/SparcInstrInfo.td
@@ -277,7 +277,7 @@ let usesCustomInserter = 1 in { // Expanded after instruction selection.
// Section A.3 - Synthetic Instructions, p. 85
// special cases of JMPL:
-let isReturn = 1, isTerminator = 1, hasDelaySlot = 1 in {
+let isReturn = 1, isTerminator = 1, hasDelaySlot = 1, isBarrier = 1 in {
let rd = O7.Num, rs1 = G0.Num, simm13 = 8 in
def RETL: F3_2<2, 0b111000, (outs), (ins), "retl", [(retflag)]>;
}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.cpp b/lib/Target/Sparc/SparcRegisterInfo.cpp
index 7883260..6f6183e 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.cpp
+++ b/lib/Target/Sparc/SparcRegisterInfo.cpp
@@ -175,7 +175,7 @@ unsigned SparcRegisterInfo::getRARegister() const {
return SP::I7;
}
-unsigned SparcRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned SparcRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return SP::I6;
}
diff --git a/lib/Target/Sparc/SparcRegisterInfo.h b/lib/Target/Sparc/SparcRegisterInfo.h
index 753b1c0..8889ea6 100644
--- a/lib/Target/Sparc/SparcRegisterInfo.h
+++ b/lib/Target/Sparc/SparcRegisterInfo.h
@@ -54,7 +54,7 @@ struct SparcRegisterInfo : public SparcGenRegisterInfo {
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/Sparc/SparcTargetMachine.cpp b/lib/Target/Sparc/SparcTargetMachine.cpp
index 3a38115..1eec112 100644
--- a/lib/Target/Sparc/SparcTargetMachine.cpp
+++ b/lib/Target/Sparc/SparcTargetMachine.cpp
@@ -29,7 +29,7 @@ extern "C" void LLVMInitializeSparcTarget() {
SparcTargetMachine::SparcTargetMachine(const Target &T, const std::string &TT,
const std::string &FS)
: LLVMTargetMachine(T, TT),
- DataLayout("E-p:32:32-f128:128:128"),
+ DataLayout("E-p:32:32-f128:128:128-n32"),
Subtarget(TT, FS), TLInfo(*this), InstrInfo(Subtarget),
FrameInfo(TargetFrameInfo::StackGrowsDown, 8, 0) {
}
diff --git a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
index a4a8d6a..e97e7ca 100644
--- a/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
+++ b/lib/Target/SystemZ/AsmPrinter/SystemZAsmPrinter.cpp
@@ -33,6 +33,7 @@
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Target/TargetRegistry.h"
#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Mangler.h"
@@ -154,7 +155,7 @@ void SystemZAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
// Call the autogenerated instruction printer routines.
printInstruction(MI);
- if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*MI);
O << '\n';
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 028ee89..d64611d 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -603,8 +603,6 @@ bool SystemZDAGToDAGISel::TryFoldLoad(SDValue P, SDValue N,
/// InstructionSelect - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
void SystemZDAGToDAGISel::InstructionSelect() {
- DEBUG(BB->dump());
-
// Codegen the basic block.
DEBUG(errs() << "===== Instruction selection begins:\n");
DEBUG(Indent = 0);
diff --git a/lib/Target/SystemZ/SystemZISelLowering.cpp b/lib/Target/SystemZ/SystemZISelLowering.cpp
index 5c8cae0..d6b476e 100644
--- a/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -34,6 +34,7 @@
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/VectorExtras.h"
using namespace llvm;
@@ -328,7 +329,7 @@ SystemZTargetLowering::LowerCCCArguments(SDValue Chain,
// Create the nodes corresponding to a load from this parameter slot.
// Create the frame index object for this incoming parameter...
int FI = MFI->CreateFixedObject(LocVT.getSizeInBits()/8,
- VA.getLocMemOffset());
+ VA.getLocMemOffset(), true, false);
// Create the SelectionDAG nodes corresponding to a load
// from this parameter
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index 236711c..d82d928 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -22,7 +22,7 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/PseudoSourceValue.h"
-
+#include "llvm/Support/ErrorHandling.h"
using namespace llvm;
SystemZInstrInfo::SystemZInstrInfo(SystemZTargetMachine &tm)
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 38460a6..4d1c01f 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -320,7 +320,8 @@ unsigned SystemZRegisterInfo::getRARegister() const {
return 0;
}
-unsigned SystemZRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned
+SystemZRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
assert(0 && "What is the frame register");
return 0;
}
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.h b/lib/Target/SystemZ/SystemZRegisterInfo.h
index b22b05d..93f6aee 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -68,7 +68,7 @@ struct SystemZRegisterInfo : public SystemZGenRegisterInfo {
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
// Exception handling queries.
unsigned getEHExceptionRegister() const;
diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp
index 990e003..dfa26a1 100644
--- a/lib/Target/SystemZ/SystemZTargetMachine.cpp
+++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp
@@ -28,7 +28,7 @@ SystemZTargetMachine::SystemZTargetMachine(const Target &T,
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS),
DataLayout("E-p:64:64:64-i8:8:16-i16:16:16-i32:32:32-i64:64:64-f32:32:32"
- "-f64:64:64-f128:128:128-a0:16:16"),
+ "-f64:64:64-f128:128:128-a0:16:16-n32:64"),
InstrInfo(*this), TLInfo(*this),
FrameInfo(TargetFrameInfo::StackGrowsDown, 8, -160) {
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 5bcd658..fc71bc3 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -17,16 +17,16 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetData.h"
-#include "llvm/Module.h"
-#include "llvm/DerivedTypes.h"
#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/ManagedStatic.h"
#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
#include "llvm/System/Mutex.h"
#include "llvm/ADT/DenseMap.h"
-#include "llvm/ADT/StringExtras.h"
#include <algorithm>
#include <cstdlib>
using namespace llvm;
@@ -132,50 +132,18 @@ const TargetAlignElem TargetData::InvalidAlignmentElem =
// TargetData Class Implementation
//===----------------------------------------------------------------------===//
-/*!
- A TargetDescription string consists of a sequence of hyphen-delimited
- specifiers for target endianness, pointer size and alignments, and various
- primitive type sizes and alignments. A typical string looks something like:
- <br><br>
- "E-p:32:32:32-i1:8:8-i8:8:8-i32:32:32-i64:32:64-f32:32:32-f64:32:64"
- <br><br>
- (note: this string is not fully specified and is only an example.)
- \p
- Alignments come in two flavors: ABI and preferred. ABI alignment (abi_align,
- below) dictates how a type will be aligned within an aggregate and when used
- as an argument. Preferred alignment (pref_align, below) determines a type's
- alignment when emitted as a global.
- \p
- Specifier string details:
- <br><br>
- <i>[E|e]</i>: Endianness. "E" specifies a big-endian target data model, "e"
- specifies a little-endian target data model.
- <br><br>
- <i>p:@verbatim<size>:<abi_align>:<pref_align>@endverbatim</i>: Pointer size,
- ABI and preferred alignment.
- <br><br>
- <i>@verbatim<type><size>:<abi_align>:<pref_align>@endverbatim</i>: Numeric type
- alignment. Type is
- one of <i>i|f|v|a</i>, corresponding to integer, floating point, vector, or
- aggregate. Size indicates the size, e.g., 32 or 64 bits.
- \p
- The default string, fully specified, is:
- <br><br>
- "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64"
- "-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64"
- "-v64:64:64-v128:128:128"
- <br><br>
- Note that in the case of aggregates, 0 is the default ABI and preferred
- alignment. This is a special case, where the aggregate's computed worst-case
- alignment will be used.
- */
-void TargetData::init(const std::string &TargetDescription) {
- std::string temp = TargetDescription;
-
+/// getInt - Get an integer ignoring errors.
+static unsigned getInt(StringRef R) {
+ unsigned Result = 0;
+ R.getAsInteger(10, Result);
+ return Result;
+}
+
+void TargetData::init(StringRef Desc) {
LayoutMap = 0;
LittleEndian = false;
PointerMemSize = 8;
- PointerABIAlign = 8;
+ PointerABIAlign = 8;
PointerPrefAlign = PointerABIAlign;
// Default alignments
@@ -190,11 +158,21 @@ void TargetData::init(const std::string &TargetDescription) {
setAlignment(VECTOR_ALIGN, 16, 16, 128); // v16i8, v8i16, v4i32, ...
setAlignment(AGGREGATE_ALIGN, 0, 8, 0); // struct
- while (!temp.empty()) {
- std::string token = getToken(temp, "-");
- std::string arg0 = getToken(token, ":");
- const char *p = arg0.c_str();
- switch(*p) {
+ while (!Desc.empty()) {
+ std::pair<StringRef, StringRef> Split = Desc.split('-');
+ StringRef Token = Split.first;
+ Desc = Split.second;
+
+ if (Token.empty())
+ continue;
+
+ Split = Token.split(':');
+ StringRef Specifier = Split.first;
+ Token = Split.second;
+
+ assert(!Specifier.empty() && "Can't be empty here");
+
+ switch (Specifier[0]) {
case 'E':
LittleEndian = false;
break;
@@ -202,9 +180,12 @@ void TargetData::init(const std::string &TargetDescription) {
LittleEndian = true;
break;
case 'p':
- PointerMemSize = atoi(getToken(token,":").c_str()) / 8;
- PointerABIAlign = atoi(getToken(token,":").c_str()) / 8;
- PointerPrefAlign = atoi(getToken(token,":").c_str()) / 8;
+ Split = Token.split(':');
+ PointerMemSize = getInt(Split.first) / 8;
+ Split = Split.second.split(':');
+ PointerABIAlign = getInt(Split.first) / 8;
+ Split = Split.second.split(':');
+ PointerPrefAlign = getInt(Split.first) / 8;
if (PointerPrefAlign == 0)
PointerPrefAlign = PointerABIAlign;
break;
@@ -213,28 +194,52 @@ void TargetData::init(const std::string &TargetDescription) {
case 'f':
case 'a':
case 's': {
- AlignTypeEnum align_type = STACK_ALIGN; // Dummy init, silence warning
- switch(*p) {
- case 'i': align_type = INTEGER_ALIGN; break;
- case 'v': align_type = VECTOR_ALIGN; break;
- case 'f': align_type = FLOAT_ALIGN; break;
- case 'a': align_type = AGGREGATE_ALIGN; break;
- case 's': align_type = STACK_ALIGN; break;
+ AlignTypeEnum AlignType;
+ switch (Specifier[0]) {
+ default:
+ case 'i': AlignType = INTEGER_ALIGN; break;
+ case 'v': AlignType = VECTOR_ALIGN; break;
+ case 'f': AlignType = FLOAT_ALIGN; break;
+ case 'a': AlignType = AGGREGATE_ALIGN; break;
+ case 's': AlignType = STACK_ALIGN; break;
}
- uint32_t size = (uint32_t) atoi(++p);
- unsigned char abi_align = atoi(getToken(token, ":").c_str()) / 8;
- unsigned char pref_align = atoi(getToken(token, ":").c_str()) / 8;
- if (pref_align == 0)
- pref_align = abi_align;
- setAlignment(align_type, abi_align, pref_align, size);
+ unsigned Size = getInt(Specifier.substr(1));
+ Split = Token.split(':');
+ unsigned char ABIAlign = getInt(Split.first) / 8;
+
+ Split = Split.second.split(':');
+ unsigned char PrefAlign = getInt(Split.first) / 8;
+ if (PrefAlign == 0)
+ PrefAlign = ABIAlign;
+ setAlignment(AlignType, ABIAlign, PrefAlign, Size);
break;
}
+ case 'n': // Native integer types.
+ Specifier = Specifier.substr(1);
+ do {
+ if (unsigned Width = getInt(Specifier))
+ LegalIntWidths.push_back(Width);
+ Split = Token.split(':');
+ Specifier = Split.first;
+ Token = Split.second;
+ } while (!Specifier.empty() || !Token.empty());
+ break;
+
default:
break;
}
}
}
+/// Default ctor.
+///
+/// @note This has to exist, because this is a pass, but it should never be
+/// used.
+TargetData::TargetData() : ImmutablePass(&ID) {
+ llvm_report_error("Bad TargetData ctor used. "
+ "Tool did not specify a TargetData to use?");
+}
+
TargetData::TargetData(const Module *M)
: ImmutablePass(&ID) {
init(M->getDataLayout());
@@ -318,37 +323,130 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
: Alignments[BestMatchIdx].PrefAlign;
}
-typedef DenseMap<const StructType*, StructLayout*>LayoutInfoTy;
+typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
-TargetData::~TargetData() {
- if (!LayoutMap)
- return;
-
- // Remove any layouts for this TD.
- LayoutInfoTy &TheMap = *static_cast<LayoutInfoTy*>(LayoutMap);
- for (LayoutInfoTy::iterator I = TheMap.begin(), E = TheMap.end(); I != E; ) {
- I->second->~StructLayout();
- free(I->second);
- TheMap.erase(I++);
+namespace llvm {
+
+class StructLayoutMap : public AbstractTypeUser {
+ LayoutInfoTy LayoutInfo;
+
+ /// refineAbstractType - The callback method invoked when an abstract type is
+ /// resolved to another type. An object must override this method to update
+ /// its internal state to reference NewType instead of OldType.
+ ///
+ virtual void refineAbstractType(const DerivedType *OldTy,
+ const Type *) {
+ const StructType *STy = dyn_cast<const StructType>(OldTy);
+ if (!STy) {
+ OldTy->removeAbstractTypeUser(this);
+ return;
+ }
+
+ StructLayout *SL = LayoutInfo[STy];
+ if (SL) {
+ SL->~StructLayout();
+ free(SL);
+ LayoutInfo[STy] = NULL;
+ }
+
+ OldTy->removeAbstractTypeUser(this);
}
-
- delete static_cast<LayoutInfoTy*>(LayoutMap);
+
+ /// typeBecameConcrete - The other case which AbstractTypeUsers must be aware
+ /// of is when a type makes the transition from being abstract (where it has
+ /// clients on its AbstractTypeUsers list) to concrete (where it does not).
+ /// This method notifies ATU's when this occurs for a type.
+ ///
+ virtual void typeBecameConcrete(const DerivedType *AbsTy) {
+ const StructType *STy = dyn_cast<const StructType>(AbsTy);
+ if (!STy) {
+ AbsTy->removeAbstractTypeUser(this);
+ return;
+ }
+
+ StructLayout *SL = LayoutInfo[STy];
+ if (SL) {
+ SL->~StructLayout();
+ free(SL);
+ LayoutInfo[STy] = NULL;
+ }
+
+ AbsTy->removeAbstractTypeUser(this);
+ }
+
+ bool insert(const Type *Ty) {
+ if (Ty->isAbstract())
+ Ty->addAbstractTypeUser(this);
+ return true;
+ }
+
+public:
+ virtual ~StructLayoutMap() {
+ // Remove any layouts.
+ for (LayoutInfoTy::iterator
+ I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I)
+ if (StructLayout *SL = I->second) {
+ SL->~StructLayout();
+ free(SL);
+ }
+ }
+
+ inline LayoutInfoTy::iterator begin() {
+ return LayoutInfo.begin();
+ }
+ inline LayoutInfoTy::iterator end() {
+ return LayoutInfo.end();
+ }
+ inline LayoutInfoTy::const_iterator begin() const {
+ return LayoutInfo.begin();
+ }
+ inline LayoutInfoTy::const_iterator end() const {
+ return LayoutInfo.end();
+ }
+
+ LayoutInfoTy::iterator find(const StructType *&Val) {
+ return LayoutInfo.find(Val);
+ }
+ LayoutInfoTy::const_iterator find(const StructType *&Val) const {
+ return LayoutInfo.find(Val);
+ }
+
+ bool erase(const StructType *&Val) {
+ return LayoutInfo.erase(Val);
+ }
+ bool erase(LayoutInfoTy::iterator I) {
+ return LayoutInfo.erase(I);
+ }
+
+ StructLayout *&operator[](const Type *Key) {
+ const StructType *STy = dyn_cast<const StructType>(Key);
+ assert(STy && "Trying to access the struct layout map with a non-struct!");
+ insert(STy);
+ return LayoutInfo[STy];
+ }
+
+ // for debugging...
+ virtual void dump() const {}
+};
+
+} // end namespace llvm
+
+TargetData::~TargetData() {
+ delete LayoutMap;
}
const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
if (!LayoutMap)
- LayoutMap = static_cast<void*>(new LayoutInfoTy());
-
- LayoutInfoTy &TheMap = *static_cast<LayoutInfoTy*>(LayoutMap);
+ LayoutMap = new StructLayoutMap();
- StructLayout *&SL = TheMap[Ty];
+ StructLayout *&SL = (*LayoutMap)[Ty];
if (SL) return SL;
// Otherwise, create the struct layout. Because it is variable length, we
// malloc it, then use placement new.
int NumElts = Ty->getNumElements();
StructLayout *L =
- (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1)*sizeof(uint64_t));
+ (StructLayout *)malloc(sizeof(StructLayout)+(NumElts-1) * sizeof(uint64_t));
// Set SL before calling StructLayout's ctor. The ctor could cause other
// entries to be added to TheMap, invalidating our reference.
@@ -365,31 +463,35 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
if (!LayoutMap) return; // No cache.
- LayoutInfoTy* LayoutInfo = static_cast<LayoutInfoTy*>(LayoutMap);
- LayoutInfoTy::iterator I = LayoutInfo->find(Ty);
- if (I == LayoutInfo->end()) return;
+ DenseMap<const StructType*, StructLayout*>::iterator I = LayoutMap->find(Ty);
+ if (I == LayoutMap->end()) return;
I->second->~StructLayout();
free(I->second);
- LayoutInfo->erase(I);
+ LayoutMap->erase(I);
}
std::string TargetData::getStringRepresentation() const {
- std::string repr;
- repr.append(LittleEndian ? "e" : "E");
- repr.append("-p:").append(itostr((int64_t) (PointerMemSize * 8))).
- append(":").append(itostr((int64_t) (PointerABIAlign * 8))).
- append(":").append(itostr((int64_t) (PointerPrefAlign * 8)));
- for (align_const_iterator I = Alignments.begin();
- I != Alignments.end();
- ++I) {
- repr.append("-").append(1, (char) I->AlignType).
- append(utostr((int64_t) I->TypeBitWidth)).
- append(":").append(utostr((uint64_t) (I->ABIAlign * 8))).
- append(":").append(utostr((uint64_t) (I->PrefAlign * 8)));
+ std::string Result;
+ raw_string_ostream OS(Result);
+
+ OS << (LittleEndian ? "e" : "E")
+ << "-p:" << PointerMemSize*8 << ':' << PointerABIAlign*8
+ << ':' << PointerPrefAlign*8;
+ for (unsigned i = 0, e = Alignments.size(); i != e; ++i) {
+ const TargetAlignElem &AI = Alignments[i];
+ OS << '-' << (char)AI.AlignType << AI.TypeBitWidth << ':'
+ << AI.ABIAlign*8 << ':' << AI.PrefAlign*8;
+ }
+
+ if (!LegalIntWidths.empty()) {
+ OS << "-n" << (unsigned)LegalIntWidths[0];
+
+ for (unsigned i = 1, e = LegalIntWidths.size(); i != e; ++i)
+ OS << ':' << (unsigned)LegalIntWidths[i];
}
- return repr;
+ return OS.str();
}
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index c1aab99..f887523 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -24,6 +24,7 @@
#include "llvm/Target/TargetData.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/Mangler.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
@@ -151,7 +152,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
// relocation, then we may have to drop this into a wriable data section
// even though it is marked const.
switch (C->getRelocationInfo()) {
- default: llvm_unreachable("unknown relocation info kind");
+ default: assert(0 && "unknown relocation info kind");
case Constant::NoRelocation:
// If initializer is a null-terminated string, put it in a "cstring"
// section of the right width.
@@ -219,7 +220,7 @@ SectionKind TargetLoweringObjectFile::getKindForGlobal(const GlobalValue *GV,
return SectionKind::getDataNoRel();
switch (C->getRelocationInfo()) {
- default: llvm_unreachable("unknown relocation info kind");
+ default: assert(0 && "unknown relocation info kind");
case Constant::NoRelocation:
return SectionKind::getDataNoRel();
case Constant::LocalRelocation:
@@ -671,7 +672,7 @@ TargetLoweringObjectFileMachO::~TargetLoweringObjectFileMachO() {
const MCSectionMachO *TargetLoweringObjectFileMachO::
-getMachOSection(const StringRef &Segment, const StringRef &Section,
+getMachOSection(StringRef Segment, StringRef Section,
unsigned TypeAndAttributes,
unsigned Reserved2, SectionKind Kind) const {
// We unique sections by their segment/section pair. The returned section
diff --git a/lib/Target/TargetSubtarget.cpp b/lib/Target/TargetSubtarget.cpp
index 95c92ca..edb76f9 100644
--- a/lib/Target/TargetSubtarget.cpp
+++ b/lib/Target/TargetSubtarget.cpp
@@ -12,6 +12,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/ADT/SmallVector.h"
using namespace llvm;
//---------------------------------------------------------------------------
@@ -20,3 +21,13 @@ using namespace llvm;
TargetSubtarget::TargetSubtarget() {}
TargetSubtarget::~TargetSubtarget() {}
+
+bool TargetSubtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = ANTIDEP_NONE;
+ CriticalPathRCs.clear();
+ return false;
+}
+
diff --git a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
index ae8e6d3..b88063f 100644
--- a/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86AsmPrinter.cpp
@@ -651,7 +651,7 @@ void X86AsmPrinter::printMachineInstruction(const MachineInstr *MI) {
printInstructionThroughMCStreamer(MI);
- if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*MI);
O << '\n';
diff --git a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
index 821cca4..be9f4b2 100644
--- a/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
+++ b/lib/Target/X86/AsmPrinter/X86MCInstLower.cpp
@@ -21,6 +21,7 @@
#include "llvm/MC/MCExpr.h"
#include "llvm/MC/MCInst.h"
#include "llvm/MC/MCStreamer.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/FormattedStream.h"
#include "llvm/Support/Mangler.h"
#include "llvm/ADT/SmallString.h"
@@ -405,7 +406,6 @@ void X86AsmPrinter::printInstructionThroughMCStreamer(const MachineInstr *MI) {
printLabel(MI);
return;
case TargetInstrInfo::INLINEASM:
- O << '\t';
printInlineAsm(MI);
return;
case TargetInstrInfo::IMPLICIT_DEF:
diff --git a/lib/Target/X86/X86CodeEmitter.cpp b/lib/Target/X86/X86CodeEmitter.cpp
index a0bded3..4497931 100644
--- a/lib/Target/X86/X86CodeEmitter.cpp
+++ b/lib/Target/X86/X86CodeEmitter.cpp
@@ -82,7 +82,7 @@ namespace {
void emitPCRelativeBlockAddress(MachineBasicBlock *MBB);
void emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
intptr_t Disp = 0, intptr_t PCAdj = 0,
- bool NeedStub = false, bool Indirect = false);
+ bool Indirect = false);
void emitExternalSymbolAddress(const char *ES, unsigned Reloc);
void emitConstPoolAddress(unsigned CPI, unsigned Reloc, intptr_t Disp = 0,
intptr_t PCAdj = 0);
@@ -176,7 +176,6 @@ template<class CodeEmitter>
void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
intptr_t Disp /* = 0 */,
intptr_t PCAdj /* = 0 */,
- bool NeedStub /* = false */,
bool Indirect /* = false */) {
intptr_t RelocCST = Disp;
if (Reloc == X86::reloc_picrel_word)
@@ -185,9 +184,9 @@ void Emitter<CodeEmitter>::emitGlobalAddress(GlobalValue *GV, unsigned Reloc,
RelocCST = PCAdj;
MachineRelocation MR = Indirect
? MachineRelocation::getIndirectSymbol(MCE.getCurrentPCOffset(), Reloc,
- GV, RelocCST, NeedStub)
+ GV, RelocCST, false)
: MachineRelocation::getGV(MCE.getCurrentPCOffset(), Reloc,
- GV, RelocCST, NeedStub);
+ GV, RelocCST, false);
MCE.addRelocation(MR);
// The relocated value will be added to the displacement
if (Reloc == X86::reloc_absolute_dword)
@@ -333,10 +332,9 @@ void Emitter<CodeEmitter>::emitDisplacementField(const MachineOperand *RelocOp,
// do it, otherwise fallback to absolute (this is determined by IsPCRel).
// 89 05 00 00 00 00 mov %eax,0(%rip) # PC-relative
// 89 04 25 00 00 00 00 mov %eax,0x0 # Absolute
- bool NeedStub = isa<Function>(RelocOp->getGlobal());
bool Indirect = gvNeedsNonLazyPtr(*RelocOp, TM);
emitGlobalAddress(RelocOp->getGlobal(), RelocType, RelocOp->getOffset(),
- Adj, NeedStub, Indirect);
+ Adj, Indirect);
} else if (RelocOp->isSymbol()) {
emitExternalSymbolAddress(RelocOp->getSymbolName(), RelocType);
} else if (RelocOp->isCPI()) {
@@ -633,14 +631,8 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
}
if (MO.isGlobal()) {
- // Assume undefined functions may be outside the Small codespace.
- bool NeedStub =
- (Is64BitMode &&
- (TM.getCodeModel() == CodeModel::Large ||
- TM.getSubtarget<X86Subtarget>().isTargetDarwin())) ||
- Opcode == X86::TAILJMPd;
emitGlobalAddress(MO.getGlobal(), X86::reloc_pcrel_word,
- MO.getOffset(), 0, NeedStub);
+ MO.getOffset(), 0);
break;
}
@@ -681,10 +673,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64ri)
rt = X86::reloc_absolute_dword; // FIXME: add X86II flag?
if (MO1.isGlobal()) {
- bool NeedStub = isa<Function>(MO1.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- NeedStub, Indirect);
+ Indirect);
} else if (MO1.isSymbol())
emitExternalSymbolAddress(MO1.getSymbolName(), rt);
else if (MO1.isCPI())
@@ -790,10 +781,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64ri32)
rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
if (MO1.isGlobal()) {
- bool NeedStub = isa<Function>(MO1.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO1, TM);
emitGlobalAddress(MO1.getGlobal(), rt, MO1.getOffset(), 0,
- NeedStub, Indirect);
+ Indirect);
} else if (MO1.isSymbol())
emitExternalSymbolAddress(MO1.getSymbolName(), rt);
else if (MO1.isCPI())
@@ -831,10 +821,9 @@ void Emitter<CodeEmitter>::emitInstruction(const MachineInstr &MI,
if (Opcode == X86::MOV64mi32)
rt = X86::reloc_absolute_word_sext; // FIXME: add X86II flag?
if (MO.isGlobal()) {
- bool NeedStub = isa<Function>(MO.getGlobal());
bool Indirect = gvNeedsNonLazyPtr(MO, TM);
emitGlobalAddress(MO.getGlobal(), rt, MO.getOffset(), 0,
- NeedStub, Indirect);
+ Indirect);
} else if (MO.isSymbol())
emitExternalSymbolAddress(MO.getSymbolName(), rt);
else if (MO.isCPI())
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 3401df0..431c120 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1493,7 +1493,7 @@ bool X86FastISel::X86SelectCall(Instruction *I) {
EVT ResVT = RVLocs[0].getValVT();
unsigned Opc = ResVT == MVT::f32 ? X86::ST_Fp80m32 : X86::ST_Fp80m64;
unsigned MemSize = ResVT.getSizeInBits()/8;
- int FI = MFI.CreateStackObject(MemSize, MemSize);
+ int FI = MFI.CreateStackObject(MemSize, MemSize, false);
addFrameReference(BuildMI(MBB, DL, TII.get(Opc)), FI).addReg(ResultReg);
DstRC = ResVT == MVT::f32
? X86::FR32RegisterClass : X86::FR64RegisterClass;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 122f515..6a3577a 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -12,6 +12,15 @@
//
//===----------------------------------------------------------------------===//
+// Force NDEBUG on in any optimized build on Darwin.
+//
+// FIXME: This is a huge hack, to work around ridiculously awful compile times
+// on this file with gcc-4.2 on Darwin, in Release mode.
+#if (!defined(__llvm__) && defined(__APPLE__) && \
+ defined(__OPTIMIZE__) && !defined(NDEBUG))
+#define NDEBUG
+#endif
+
#define DEBUG_TYPE "x86-isel"
#include "X86.h"
#include "X86InstrBuilder.h"
@@ -661,7 +670,6 @@ void X86DAGToDAGISel::InstructionSelect() {
const Function *F = MF->getFunction();
OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
- DEBUG(BB->dump());
if (OptLevel != CodeGenOpt::None)
PreprocessForRMW();
@@ -1950,14 +1958,12 @@ SDNode *X86DAGToDAGISel::Select(SDValue N) {
0);
// We just did a 32-bit clear, insert it into a 64-bit register to
// clear the whole 64-bit reg.
- SDValue Undef =
- SDValue(CurDAG->getMachineNode(TargetInstrInfo::IMPLICIT_DEF,
- dl, MVT::i64), 0);
+ SDValue Zero = CurDAG->getTargetConstant(0, MVT::i64);
SDValue SubRegNo =
CurDAG->getTargetConstant(X86::SUBREG_32BIT, MVT::i32);
ClrNode =
- SDValue(CurDAG->getMachineNode(TargetInstrInfo::INSERT_SUBREG, dl,
- MVT::i64, Undef, ClrNode, SubRegNo),
+ SDValue(CurDAG->getMachineNode(TargetInstrInfo::SUBREG_TO_REG, dl,
+ MVT::i64, Zero, ClrNode, SubRegNo),
0);
} else {
ClrNode = SDValue(CurDAG->getMachineNode(ClrOpcode, dl, NVT), 0);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 86ec9f2..6018cf5 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -1087,6 +1087,17 @@ unsigned X86TargetLowering::getFunctionAlignment(const Function *F) const {
#include "X86GenCallingConv.inc"
+bool
+X86TargetLowering::CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<EVT> &OutTys,
+ const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+ SelectionDAG &DAG) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_X86);
+}
+
SDValue
X86TargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
@@ -1370,7 +1381,7 @@ X86TargetLowering::LowerMemArgument(SDValue Chain,
// In case of tail call optimization mark all arguments mutable. Since they
// could be overwritten by lowering of arguments in case of a tail call.
int FI = MFI->CreateFixedObject(ValVT.getSizeInBits()/8,
- VA.getLocMemOffset(), isImmutable);
+ VA.getLocMemOffset(), isImmutable, false);
SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal())
return FIN;
@@ -1499,7 +1510,7 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
// the start of the first vararg value... for expansion of llvm.va_start.
if (isVarArg) {
if (Is64Bit || CallConv != CallingConv::X86_FastCall) {
- VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize);
+ VarArgsFrameIndex = MFI->CreateFixedObject(1, StackSize, true, false);
}
if (Is64Bit) {
unsigned TotalNumIntRegs = 0, TotalNumXMMRegs = 0;
@@ -1550,7 +1561,8 @@ X86TargetLowering::LowerFormalArguments(SDValue Chain,
VarArgsGPOffset = NumIntRegs * 8;
VarArgsFPOffset = TotalNumIntRegs * 8 + NumXMMRegs * 16;
RegSaveFrameIndex = MFI->CreateStackObject(TotalNumIntRegs * 8 +
- TotalNumXMMRegs * 16, 16);
+ TotalNumXMMRegs * 16, 16,
+ false);
// Store the integer parameter registers.
SmallVector<SDValue, 8> MemOps;
@@ -1671,7 +1683,8 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
- MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize);
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize,
+ true, false);
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
@@ -1884,7 +1897,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// Create frame index.
int32_t Offset = VA.getLocMemOffset()+FPDiff;
uint32_t OpSize = (VA.getLocVT().getSizeInBits()+7)/8;
- FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset);
+ FI = MF.getFrameInfo()->CreateFixedObject(OpSize, Offset, true, false);
FIN = DAG.getFrameIndex(FI, getPointerTy());
if (Flags.isByVal()) {
@@ -1924,9 +1937,19 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
FPDiff, dl);
}
- // If the callee is a GlobalAddress node (quite common, every direct call is)
- // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
- if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ bool WasGlobalOrExternal = false;
+ if (getTargetMachine().getCodeModel() == CodeModel::Large) {
+ assert(Is64Bit && "Large code model is only legal in 64-bit mode.");
+ // In the 64-bit large code model, we have to make all calls
+ // through a register, since the call instruction's 32-bit
+ // pc-relative offset may not be large enough to hold the whole
+ // address.
+ } else if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
+ WasGlobalOrExternal = true;
+ // If the callee is a GlobalAddress node (quite common, every direct call
+ // is) turn it into a TargetGlobalAddress node so that legalize doesn't hack
+ // it.
+
// We should use extra load for direct calls to dllimported functions in
// non-JIT mode.
GlobalValue *GV = G->getGlobal();
@@ -1954,6 +1977,7 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
G->getOffset(), OpFlags);
}
} else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee)) {
+ WasGlobalOrExternal = true;
unsigned char OpFlags = 0;
// On ELF targets, in either X86-64 or X86-32 mode, direct calls to external
@@ -1971,7 +1995,9 @@ X86TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
Callee = DAG.getTargetExternalSymbol(S->getSymbol(), getPointerTy(),
OpFlags);
- } else if (isTailCall) {
+ }
+
+ if (isTailCall && !WasGlobalOrExternal) {
unsigned Opc = Is64Bit ? X86::R11 : X86::EAX;
Chain = DAG.getCopyToReg(Chain, dl,
@@ -2169,7 +2195,8 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
if (ReturnAddrIndex == 0) {
// Set up a frame object for the return address.
uint64_t SlotSize = TD->getPointerSize();
- ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize);
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+ true, false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
@@ -2517,6 +2544,21 @@ bool X86::isMOVHLPSMask(ShuffleVectorSDNode *N) {
isUndefOrEqual(N->getMaskElt(3), 3);
}
+/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
+/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
+/// <2, 3, 2, 3>
+bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
+ unsigned NumElems = N->getValueType(0).getVectorNumElements();
+
+ if (NumElems != 4)
+ return false;
+
+ return isUndefOrEqual(N->getMaskElt(0), 2) &&
+ isUndefOrEqual(N->getMaskElt(1), 3) &&
+ isUndefOrEqual(N->getMaskElt(2), 2) &&
+ isUndefOrEqual(N->getMaskElt(3), 3);
+}
+
/// isMOVLPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to MOVLP{S|D}.
bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
@@ -2536,10 +2578,9 @@ bool X86::isMOVLPMask(ShuffleVectorSDNode *N) {
return true;
}
-/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
-/// specifies a shuffle of elements that is suitable for input to MOVHP{S|D}
-/// and MOVLHPS.
-bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
+/// isMOVLHPSMask - Return true if the specified VECTOR_SHUFFLE operand
+/// specifies a shuffle of elements that is suitable for input to MOVLHPS.
+bool X86::isMOVLHPSMask(ShuffleVectorSDNode *N) {
unsigned NumElems = N->getValueType(0).getVectorNumElements();
if (NumElems != 2 && NumElems != 4)
@@ -2556,21 +2597,6 @@ bool X86::isMOVHPMask(ShuffleVectorSDNode *N) {
return true;
}
-/// isMOVHLPS_v_undef_Mask - Special case of isMOVHLPSMask for canonical form
-/// of vector_shuffle v, v, <2, 3, 2, 3>, i.e. vector_shuffle v, undef,
-/// <2, 3, 2, 3>
-bool X86::isMOVHLPS_v_undef_Mask(ShuffleVectorSDNode *N) {
- unsigned NumElems = N->getValueType(0).getVectorNumElements();
-
- if (NumElems != 4)
- return false;
-
- return isUndefOrEqual(N->getMaskElt(0), 2) &&
- isUndefOrEqual(N->getMaskElt(1), 3) &&
- isUndefOrEqual(N->getMaskElt(2), 2) &&
- isUndefOrEqual(N->getMaskElt(3), 3);
-}
-
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
static bool isUNPCKLMask(const SmallVectorImpl<int> &Mask, EVT VT,
@@ -4264,7 +4290,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
if (!isMMX && (X86::isMOVSHDUPMask(SVOp) ||
X86::isMOVSLDUPMask(SVOp) ||
X86::isMOVHLPSMask(SVOp) ||
- X86::isMOVHPMask(SVOp) ||
+ X86::isMOVLHPSMask(SVOp) ||
X86::isMOVLPMask(SVOp)))
return Op;
@@ -4961,7 +4987,7 @@ SDValue X86TargetLowering::LowerSINT_TO_FP(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
unsigned Size = SrcVT.getSizeInBits()/8;
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(Size, Size, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getStore(DAG.getEntryNode(), dl, Op.getOperand(0),
StackSlot,
@@ -4995,7 +5021,7 @@ SDValue X86TargetLowering::BuildFILD(SDValue Op, EVT SrcVT, SDValue Chain,
// shouldn't be necessary except that RFP cannot be live across
// multiple blocks. When stackifier is fixed, they can be uncoupled.
MachineFunction &MF = DAG.getMachineFunction();
- int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(8, 8, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
Tys = DAG.getVTList(MVT::Other);
SmallVector<SDValue, 8> Ops;
@@ -5205,7 +5231,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
// stack slot.
MachineFunction &MF = DAG.getMachineFunction();
unsigned MemSize = DstTy.getSizeInBits()/8;
- int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
unsigned Opc;
@@ -5228,7 +5254,7 @@ FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG, bool IsSigned) {
};
Value = DAG.getNode(X86ISD::FLD, dl, Tys, Ops, 3);
Chain = Value.getValue(1);
- SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize);
+ SSFI = MF.getFrameInfo()->CreateStackObject(MemSize, MemSize, false);
StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
}
@@ -6752,7 +6778,7 @@ SDValue X86TargetLowering::LowerFLT_ROUNDS_(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
// Save FP Control Word to stack slot
- int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment);
+ int SSFI = MF.getFrameInfo()->CreateStackObject(2, StackAlignment, false);
SDValue StackSlot = DAG.getFrameIndex(SSFI, getPointerTy());
SDValue Chain = DAG.getNode(X86ISD::FNSTCW16m, dl, MVT::Other,
@@ -7977,7 +8003,7 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
// Change the floating point control register to use "round towards zero"
// mode when truncating to an integer value.
MachineFunction *F = BB->getParent();
- int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2);
+ int CWFrameIdx = F->getFrameInfo()->CreateStackObject(2, 2, false);
addFrameReference(BuildMI(BB, DL, TII->get(X86::FNSTCW16m)), CWFrameIdx);
// Load the old value of the high byte of the control word...
@@ -9585,14 +9611,14 @@ X86TargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
}
// GCC allows "st(0)" to be called just plain "st".
- if (StringsEqualNoCase("{st}", Constraint)) {
+ if (StringRef("{st}").equals_lower(Constraint)) {
Res.first = X86::ST0;
Res.second = X86::RFP80RegisterClass;
return Res;
}
// flags -> EFLAGS
- if (StringsEqualNoCase("{flags}", Constraint)) {
+ if (StringRef("{flags}").equals_lower(Constraint)) {
Res.first = X86::EFLAGS;
Res.second = X86::CCRRegisterClass;
return Res;
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7b59b81..7b4ab62 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -286,7 +286,7 @@ namespace llvm {
/// isMOVHPMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for MOVHP{S|D}.
/// as well as MOVLHPS.
- bool isMOVHPMask(ShuffleVectorSDNode *N);
+ bool isMOVLHPSMask(ShuffleVectorSDNode *N);
/// isUNPCKLMask - Return true if the specified VECTOR_SHUFFLE operand
/// specifies a shuffle of elements that is suitable for input to UNPCKL.
@@ -699,6 +699,12 @@ namespace llvm {
const SmallVectorImpl<ISD::OutputArg> &Outs,
DebugLoc dl, SelectionDAG &DAG);
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<EVT> &OutTys,
+ const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+ SelectionDAG &DAG);
+
void ReplaceATOMIC_BINARY_64(SDNode *N, SmallVectorImpl<SDValue> &Results,
SelectionDAG &DAG, unsigned NewOp);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 3edced7..a01534b 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -309,7 +309,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
[(set GR64:$dst, i64immSExt32:$src)]>;
}
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (load addr:$src))]>;
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index 87bc10d..1ddceb1 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -26,11 +26,15 @@
#include "llvm/CodeGen/MachineInstrBuilder.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/LiveVariables.h"
+#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/MC/MCAsmInfo.h"
+
+#include <limits>
+
using namespace llvm;
static cl::opt<bool>
@@ -707,9 +711,23 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
}
}
-unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
+/// isFrameOperand - Return true and the FrameIndex if the specified
+/// operand and follow operands form a reference to the stack frame.
+bool X86InstrInfo::isFrameOperand(const MachineInstr *MI, unsigned int Op,
+ int &FrameIndex) const {
+ if (MI->getOperand(Op).isFI() && MI->getOperand(Op+1).isImm() &&
+ MI->getOperand(Op+2).isReg() && MI->getOperand(Op+3).isImm() &&
+ MI->getOperand(Op+1).getImm() == 1 &&
+ MI->getOperand(Op+2).getReg() == 0 &&
+ MI->getOperand(Op+3).getImm() == 0) {
+ FrameIndex = MI->getOperand(Op).getIndex();
+ return true;
+ }
+ return false;
+}
+
+static bool isFrameLoadOpcode(int Opcode) {
+ switch (Opcode) {
default: break;
case X86::MOV8rm:
case X86::MOV16rm:
@@ -723,22 +741,14 @@ unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
case X86::MOVDQArm:
case X86::MMX_MOVD64rm:
case X86::MMX_MOVQ64rm:
- if (MI->getOperand(1).isFI() && MI->getOperand(2).isImm() &&
- MI->getOperand(3).isReg() && MI->getOperand(4).isImm() &&
- MI->getOperand(2).getImm() == 1 &&
- MI->getOperand(3).getReg() == 0 &&
- MI->getOperand(4).getImm() == 0) {
- FrameIndex = MI->getOperand(1).getIndex();
- return MI->getOperand(0).getReg();
- }
+ return true;
break;
}
- return 0;
+ return false;
}
-unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
- int &FrameIndex) const {
- switch (MI->getOpcode()) {
+static bool isFrameStoreOpcode(int Opcode) {
+ switch (Opcode) {
default: break;
case X86::MOV8mr:
case X86::MOV16mr:
@@ -753,19 +763,83 @@ unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
case X86::MMX_MOVD64mr:
case X86::MMX_MOVQ64mr:
case X86::MMX_MOVNTQmr:
- if (MI->getOperand(0).isFI() && MI->getOperand(1).isImm() &&
- MI->getOperand(2).isReg() && MI->getOperand(3).isImm() &&
- MI->getOperand(1).getImm() == 1 &&
- MI->getOperand(2).getReg() == 0 &&
- MI->getOperand(3).getImm() == 0) {
- FrameIndex = MI->getOperand(0).getIndex();
+ return true;
+ }
+ return false;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode()))
+ if (isFrameOperand(MI, 1, FrameIndex))
+ return MI->getOperand(0).getReg();
+ return 0;
+}
+
+unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameLoadOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ return hasLoadFromStackSlot(MI, FrameIndex);
+ }
+ return 0;
+}
+
+bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isLoad() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ return true;
+ }
+ }
+ return false;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode()))
+ if (isFrameOperand(MI, 0, FrameIndex))
return MI->getOperand(X86AddrNumOperands).getReg();
- }
- break;
+ return 0;
+}
+
+unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const {
+ if (isFrameStoreOpcode(MI->getOpcode())) {
+ unsigned Reg;
+ if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
+ return Reg;
+ // Check for post-frame index elimination operations
+ return hasStoreToStackSlot(MI, FrameIndex);
}
return 0;
}
+bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const {
+ for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
+ oe = MI->memoperands_end();
+ o != oe;
+ ++o) {
+ if ((*o)->isStore() && (*o)->getValue())
+ if (const FixedStackPseudoSourceValue *Value =
+ dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
+ FrameIndex = Value->getFrameIndex();
+ return true;
+ }
+ }
+ return false;
+}
+
/// regIsPICBase - Return true if register is PIC base (i.e.g defined by
/// X86::MOVPC32r.
static bool regIsPICBase(unsigned BaseReg, const MachineRegisterInfo &MRI) {
@@ -794,10 +868,14 @@ X86InstrInfo::isReallyTriviallyReMaterializable(const MachineInstr *MI,
case X86::MOVSSrm:
case X86::MOVSDrm:
case X86::MOVAPSrm:
+ case X86::MOVUPSrm:
+ case X86::MOVUPSrm_Int:
case X86::MOVAPDrm:
case X86::MOVDQArm:
case X86::MMX_MOVD64rm:
- case X86::MMX_MOVQ64rm: {
+ case X86::MMX_MOVQ64rm:
+ case X86::FsMOVAPSrm:
+ case X86::FsMOVAPDrm: {
// Loads from constant pools are trivially rematerializable.
if (MI->getOperand(1).isReg() &&
MI->getOperand(2).isImm() &&
@@ -917,12 +995,13 @@ static bool isSafeToClobberEFLAGS(MachineBasicBlock &MBB,
void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig) const {
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const {
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
if (SubIdx && TargetRegisterInfo::isPhysicalRegister(DestReg)) {
- DestReg = RI.getSubReg(DestReg, SubIdx);
+ DestReg = TRI->getSubReg(DestReg, SubIdx);
SubIdx = 0;
}
@@ -1891,8 +1970,7 @@ void X86InstrInfo::storeRegToAddr(MachineFunction &MF, unsigned SrcReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ bool isAligned = (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getStoreRegOpcode(SrcReg, RC, isAligned, TM);
DebugLoc DL = DebugLoc::getUnknownLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc));
@@ -1985,8 +2063,7 @@ void X86InstrInfo::loadRegFromAddr(MachineFunction &MF, unsigned DestReg,
MachineInstr::mmo_iterator MMOBegin,
MachineInstr::mmo_iterator MMOEnd,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ bool isAligned = (*MMOBegin)->getAlignment() >= 16;
unsigned Opc = getLoadRegOpcode(DestReg, RC, isAligned, TM);
DebugLoc DL = DebugLoc::getUnknownLoc();
MachineInstrBuilder MIB = BuildMI(MF, DL, get(Opc), DestReg);
@@ -2170,7 +2247,7 @@ X86InstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
// If table selected...
if (OpcodeTablePtr) {
// Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
OpcodeTablePtr->find((unsigned*)MI->getOpcode());
if (I != OpcodeTablePtr->end()) {
unsigned Opcode = I->second.first;
@@ -2402,7 +2479,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
if (OpcodeTablePtr) {
// Find the Opcode to fuse
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
OpcodeTablePtr->find((unsigned*)Opc);
if (I != OpcodeTablePtr->end())
return true;
@@ -2413,7 +2490,7 @@ bool X86InstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
bool X86InstrInfo::unfoldMemoryOperand(MachineFunction &MF, MachineInstr *MI,
unsigned Reg, bool UnfoldLoad, bool UnfoldStore,
SmallVectorImpl<MachineInstr*> &NewMIs) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
MemOp2RegOpTable.find((unsigned*)MI->getOpcode());
if (I == MemOp2RegOpTable.end())
return false;
@@ -2530,7 +2607,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
if (!N->isMachineOpcode())
return false;
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
MemOp2RegOpTable.find((unsigned*)N->getMachineOpcode());
if (I == MemOp2RegOpTable.end())
return false;
@@ -2563,17 +2640,16 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
MachineFunction &MF = DAG.getMachineFunction();
if (FoldedLoad) {
EVT VT = *RC->vt_begin();
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ bool isAligned = (*MMOs.first)->getAlignment() >= 16;
Load = DAG.getMachineNode(getLoadRegOpcode(0, RC, isAligned, TM), dl,
VT, MVT::Other, &AddrOps[0], AddrOps.size());
NewNodes.push_back(Load);
// Preserve memory reference information.
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator> MMOs =
- MF.extractLoadMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
- cast<MachineSDNode>(N)->memoperands_end());
cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
}
@@ -2601,8 +2677,11 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
AddrOps.pop_back();
AddrOps.push_back(SDValue(NewNode, 0));
AddrOps.push_back(Chain);
- bool isAligned = (RI.getStackAlignment() >= 16) ||
- RI.needsStackRealignment(MF);
+ std::pair<MachineInstr::mmo_iterator,
+ MachineInstr::mmo_iterator> MMOs =
+ MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
+ cast<MachineSDNode>(N)->memoperands_end());
+ bool isAligned = (*MMOs.first)->getAlignment() >= 16;
SDNode *Store = DAG.getMachineNode(getStoreRegOpcode(0, DstRC,
isAligned, TM),
dl, MVT::Other,
@@ -2610,10 +2689,6 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
NewNodes.push_back(Store);
// Preserve memory reference information.
- std::pair<MachineInstr::mmo_iterator,
- MachineInstr::mmo_iterator> MMOs =
- MF.extractStoreMemRefs(cast<MachineSDNode>(N)->memoperands_begin(),
- cast<MachineSDNode>(N)->memoperands_end());
cast<MachineSDNode>(Load)->setMemRefs(MMOs.first, MMOs.second);
}
@@ -2623,7 +2698,7 @@ X86InstrInfo::unfoldMemoryOperand(SelectionDAG &DAG, SDNode *N,
unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex) const {
- DenseMap<unsigned*, std::pair<unsigned,unsigned> >::iterator I =
+ DenseMap<unsigned*, std::pair<unsigned,unsigned> >::const_iterator I =
MemOp2RegOpTable.find((unsigned*)Opc);
if (I == MemOp2RegOpTable.end())
return 0;
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index 6eb07d5..c6daa25 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -449,13 +449,41 @@ public:
unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
unsigned isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// isLoadFromStackSlotPostFE - Check for post-frame ptr elimination
+ /// stack locations as well. This uses a heuristic so it isn't
+ /// reliable for correctness.
+ unsigned isLoadFromStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// hasLoadFromStackSlot - If the specified machine instruction has
+ /// a load from a stack slot, return true along with the FrameIndex
+ /// of the loaded stack slot. If not, return false. Unlike
+ /// isLoadFromStackSlot, this returns true for any instructions that
+ /// loads from the stack. This is a hint only and may not catch all
+ /// cases.
+ bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
+ /// stack locations as well. This uses a heuristic so it isn't
+ /// reliable for correctness.
+ unsigned isStoreToStackSlotPostFE(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// hasStoreToStackSlot - If the specified machine instruction has a
+ /// store to a stack slot, return true along with the FrameIndex of
+ /// the loaded stack slot. If not, return false. Unlike
+ /// isStoreToStackSlot, this returns true for any instructions that
+ /// loads from the stack. This is a hint only and may not catch all
+ /// cases.
+ bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA) const;
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
- const MachineInstr *Orig) const;
+ const MachineInstr *Orig,
+ const TargetRegisterInfo *TRI) const;
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
@@ -610,6 +638,11 @@ private:
unsigned OpNum,
const SmallVectorImpl<MachineOperand> &MOs,
unsigned Size, unsigned Alignment) const;
+
+ /// isFrameOperand - Return true and the FrameIndex if the specified
+ /// operand and follow operands form a reference to the stack frame.
+ bool isFrameOperand(const MachineInstr *MI, unsigned int Op,
+ int &FrameIndex) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 9b82e1e..a79f262 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -543,7 +543,7 @@ let neverHasSideEffects = 1 in {
}
// Trap
-def INT3 : I<0xcc, RawFrm, (outs), (ins), "int 3", []>;
+def INT3 : I<0xcc, RawFrm, (outs), (ins), "int\t3", []>;
def INT : I<0xcd, RawFrm, (outs), (ins i8imm:$trap), "int\t$trap", []>;
// PIC base construction. This expands to code that looks like this:
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index be242a0..ee63d56 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -225,9 +225,9 @@ def movhlps_undef : PatFrag<(ops node:$lhs, node:$rhs),
return X86::isMOVHLPS_v_undef_Mask(cast<ShuffleVectorSDNode>(N));
}]>;
-def movhp : PatFrag<(ops node:$lhs, node:$rhs),
- (vector_shuffle node:$lhs, node:$rhs), [{
- return X86::isMOVHPMask(cast<ShuffleVectorSDNode>(N));
+def movlhps : PatFrag<(ops node:$lhs, node:$rhs),
+ (vector_shuffle node:$lhs, node:$rhs), [{
+ return X86::isMOVLHPSMask(cast<ShuffleVectorSDNode>(N));
}]>;
def movlp : PatFrag<(ops node:$lhs, node:$rhs),
@@ -497,7 +497,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
// disregarded.
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
@@ -706,7 +706,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
let neverHasSideEffects = 1 in
def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movups\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (loadv4f32 addr:$src))]>;
@@ -715,7 +715,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(store (v4f32 VR128:$src), addr:$dst)]>;
// Intrinsic forms of MOVUPS load and store
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
@@ -735,7 +735,7 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (movhp VR128:$src1,
+ (movlhps VR128:$src1,
(bc_v4f32 (v2f64 (scalar_to_vector (loadf64 addr:$src2))))))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@@ -760,7 +760,7 @@ def MOVLHPSrr : PSI<0x16, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
"movlhps\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v4f32 (movhp VR128:$src1, VR128:$src2)))]>;
+ (v4f32 (movlhps VR128:$src1, VR128:$src2)))]>;
def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
(ins VR128:$src1, VR128:$src2),
@@ -1256,7 +1256,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
// disregarded.
-let canFoldAsLoad = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
@@ -1494,7 +1494,7 @@ let Constraints = "$src1 = $dst" in {
(outs VR128:$dst), (ins VR128:$src1, f64mem:$src2),
"movhpd\t{$src2, $dst|$dst, $src2}",
[(set VR128:$dst,
- (v2f64 (movhp VR128:$src1,
+ (v2f64 (movlhps VR128:$src1,
(scalar_to_vector (loadf64 addr:$src2)))))]>;
} // AddedComplexity
} // Constraints = "$src1 = $dst"
@@ -2085,7 +2085,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
[(set VR128:$dst, (v4i32 (pshufd:$src2
(bc_v4i32(memopv2i64 addr:$src1)),
(undef))))]>;
-}
+}
// SSE2 with ImmT == Imm8 and XS prefix.
def PSHUFHWri : Ii8<0x70, MRMSrcReg,
@@ -2874,7 +2874,7 @@ def : Pat<(v16i8 (palign:$src3 VR128:$src1, VR128:$src2)),
(PALIGNR128rr VR128:$src2, VR128:$src1,
(SHUFFLE_get_palign_imm VR128:$src3))>,
Requires<[HasSSSE3]>;
-}
+}
def : Pat<(X86pshufb VR128:$src, VR128:$mask),
(PSHUFBrr128 VR128:$src, VR128:$mask)>, Requires<[HasSSSE3]>;
@@ -3035,7 +3035,7 @@ def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
let AddedComplexity = 20 in {
// vector_shuffle v1, v2 <0, 1, 4, 5> using MOVLHPS
-def : Pat<(v4i32 (movhp VR128:$src1, VR128:$src2)),
+def : Pat<(v4i32 (movlhps VR128:$src1, VR128:$src2)),
(MOVLHPSrr VR128:$src1, VR128:$src2)>;
// vector_shuffle v1, v2 <6, 7, 2, 3> using MOVHLPS
@@ -3051,48 +3051,26 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
let AddedComplexity = 20 in {
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
-// vector_shuffle v1, (load v2) <0, 1, 4, 5> using MOVHPS
def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4f32 (movhp VR128:$src1, (load addr:$src2))),
- (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2f64 (movhp VR128:$src1, (load addr:$src2))),
- (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-
def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
(MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(v4i32 (movhp VR128:$src1, (load addr:$src2))),
- (MOVHPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(v2i64 (movhp VR128:$src1, (load addr:$src2))),
- (MOVHPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
}
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
-// (store (vector_shuffle (load addr), v2, <0, 1, 4, 5>), addr) using MOVHPS
def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(store (v4f32 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2f64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-
def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
addr:$src1),
(MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
(MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-def : Pat<(store (v4i32 (movhp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
- addr:$src1),
- (MOVHPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
-def : Pat<(store (v2i64 (movhp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVHPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
-
let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
diff --git a/lib/Target/X86/X86JITInfo.cpp b/lib/Target/X86/X86JITInfo.cpp
index 62ca47f..0792bdd 100644
--- a/lib/Target/X86/X86JITInfo.cpp
+++ b/lib/Target/X86/X86JITInfo.cpp
@@ -367,8 +367,9 @@ X86CompilationCallback2(intptr_t *StackPtr, intptr_t RetAddr) {
// Rewrite the call target... so that we don't end up here every time we
// execute the call.
#if defined (X86_64_JIT)
- if (!isStub)
- *(intptr_t *)(RetAddr - 0xa) = NewVal;
+ assert(isStub &&
+ "X86-64 doesn't support rewriting non-stub lazy compilation calls:"
+ " the call instruction varies too much.");
#else
*(intptr_t *)RetAddr = (intptr_t)(NewVal-RetAddr-4);
#endif
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index c5ff525..f577fcf 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -392,6 +392,11 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
Reserved.set(X86::SP);
Reserved.set(X86::SPL);
+ // Set the instruction pointer register and its aliases as reserved.
+ Reserved.set(X86::RIP);
+ Reserved.set(X86::EIP);
+ Reserved.set(X86::IP);
+
// Set the frame-pointer register and its aliases as reserved if needed.
if (hasFP(MF)) {
Reserved.set(X86::RBP);
@@ -450,12 +455,17 @@ bool X86RegisterInfo::hasFP(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ bool requiresRealignment =
+ RealignStack && (MFI->getMaxAlignment() > StackAlign);
// FIXME: Currently we don't support stack realignment for functions with
- // variable-sized allocas
- return (RealignStack &&
- (MFI->getMaxAlignment() > StackAlign &&
- !MFI->hasVarSizedObjects()));
+ // variable-sized allocas.
+ // FIXME: Temporary disable the error - it seems to be too conservative.
+ if (0 && requiresRealignment && MFI->hasVarSizedObjects())
+ llvm_report_error(
+ "Stack realignment in presense of dynamic allocas is not supported");
+
+ return (requiresRealignment && !MFI->hasVarSizedObjects());
}
bool X86RegisterInfo::hasReservedCallFrame(MachineFunction &MF) const {
@@ -610,8 +620,8 @@ X86RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// Offset is a 32-bit integer.
int Offset = getFrameIndexOffset(MF, FrameIndex) +
(int)(MI.getOperand(i + 3).getImm());
-
- MI.getOperand(i + 3).ChangeToImmediate(Offset);
+
+ MI.getOperand(i + 3).ChangeToImmediate(Offset);
} else {
// Offset is symbolic. This is extremely rare.
uint64_t Offset = getFrameIndexOffset(MF, FrameIndex) +
@@ -647,7 +657,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// }
// [EBP]
MFI->CreateFixedObject(-TailCallReturnAddrDelta,
- (-1U*SlotSize)+TailCallReturnAddrDelta);
+ (-1U*SlotSize)+TailCallReturnAddrDelta,
+ true, false);
}
if (hasFP(MF)) {
@@ -659,7 +670,8 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FrameIdx = MFI->CreateFixedObject(SlotSize,
-(int)SlotSize +
TFI.getOffsetOfLocalArea() +
- TailCallReturnAddrDelta);
+ TailCallReturnAddrDelta,
+ true, false);
assert(FrameIdx == MFI->getObjectIndexBegin() &&
"Slot for EBP register must be last in order to be found!");
FrameIdx = 0;
@@ -1271,7 +1283,7 @@ unsigned X86RegisterInfo::getRARegister() const {
: X86::EIP; // Should have dwarf #8.
}
-unsigned X86RegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned X86RegisterInfo::getFrameRegister(const MachineFunction &MF) const {
return hasFP(MF) ? FramePtr : StackPtr;
}
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index f635707..f281a3c 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -153,7 +153,7 @@ public:
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
int getFrameIndexOffset(MachineFunction &MF, int FI) const;
void getInitialFrameState(std::vector<MachineMove> &Moves) const;
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 9525f04..b901c14 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -18,8 +18,10 @@
#include "llvm/GlobalValue.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/System/Host.h"
#include "llvm/Target/TargetMachine.h"
#include "llvm/Target/TargetOptions.h"
+#include "llvm/ADT/SmallVector.h"
using namespace llvm;
#if defined(_MSC_VER)
@@ -257,118 +259,6 @@ void X86Subtarget::AutoDetectSubtargetFeatures() {
}
}
-static const char *GetCurrentX86CPU() {
- unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
- if (GetCpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
- return "generic";
- unsigned Family = 0;
- unsigned Model = 0;
- DetectFamilyModel(EAX, Family, Model);
-
- GetCpuIDAndInfo(0x80000001, &EAX, &EBX, &ECX, &EDX);
- bool Em64T = (EDX >> 29) & 0x1;
- bool HasSSE3 = (ECX & 0x1);
-
- union {
- unsigned u[3];
- char c[12];
- } text;
-
- GetCpuIDAndInfo(0, &EAX, text.u+0, text.u+2, text.u+1);
- if (memcmp(text.c, "GenuineIntel", 12) == 0) {
- switch (Family) {
- case 3:
- return "i386";
- case 4:
- return "i486";
- case 5:
- switch (Model) {
- case 4: return "pentium-mmx";
- default: return "pentium";
- }
- case 6:
- switch (Model) {
- case 1: return "pentiumpro";
- case 3:
- case 5:
- case 6: return "pentium2";
- case 7:
- case 8:
- case 10:
- case 11: return "pentium3";
- case 9:
- case 13: return "pentium-m";
- case 14: return "yonah";
- case 15:
- case 22: // Celeron M 540
- return "core2";
- case 23: // 45nm: Penryn , Wolfdale, Yorkfield (XE)
- return "penryn";
- default: return "i686";
- }
- case 15: {
- switch (Model) {
- case 3:
- case 4:
- case 6: // same as 4, but 65nm
- return (Em64T) ? "nocona" : "prescott";
- case 26:
- return "corei7";
- case 28:
- return "atom";
- default:
- return (Em64T) ? "x86-64" : "pentium4";
- }
- }
-
- default:
- return "generic";
- }
- } else if (memcmp(text.c, "AuthenticAMD", 12) == 0) {
- // FIXME: this poorly matches the generated SubtargetFeatureKV table. There
- // appears to be no way to generate the wide variety of AMD-specific targets
- // from the information returned from CPUID.
- switch (Family) {
- case 4:
- return "i486";
- case 5:
- switch (Model) {
- case 6:
- case 7: return "k6";
- case 8: return "k6-2";
- case 9:
- case 13: return "k6-3";
- default: return "pentium";
- }
- case 6:
- switch (Model) {
- case 4: return "athlon-tbird";
- case 6:
- case 7:
- case 8: return "athlon-mp";
- case 10: return "athlon-xp";
- default: return "athlon";
- }
- case 15:
- if (HasSSE3) {
- return "k8-sse3";
- } else {
- switch (Model) {
- case 1: return "opteron";
- case 5: return "athlon-fx"; // also opteron
- default: return "athlon64";
- }
- }
- case 16:
- return "amdfam10";
- default:
- return "generic";
- }
- } else {
- return "generic";
- }
-}
-
X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
bool is64Bit)
: PICStyle(PICStyles::None)
@@ -395,7 +285,7 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
// Determine default and user specified characteristics
if (!FS.empty()) {
// If feature string is not empty, parse features string.
- std::string CPU = GetCurrentX86CPU();
+ std::string CPU = sys::getHostCPUName();
ParseSubtargetFeatures(FS, CPU);
// All X86-64 CPUs also have SSE2, however user might request no SSE via
// -mattr, so don't force SSELevel here.
@@ -455,3 +345,12 @@ X86Subtarget::X86Subtarget(const std::string &TT, const std::string &FS,
if (StackAlignment)
stackAlignment = StackAlignment;
}
+
+bool X86Subtarget::enablePostRAScheduler(
+ CodeGenOpt::Level OptLevel,
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const {
+ Mode = TargetSubtarget::ANTIDEP_CRITICAL;
+ CriticalPathRCs.clear();
+ return OptLevel >= CodeGenOpt::Default;
+}
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index e64b854..23f2841 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -166,11 +166,11 @@ public:
std::string getDataLayout() const {
const char *p;
if (is64Bit())
- p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128";
+ p = "e-p:64:64-s:64-f64:64:64-i64:64:64-f80:128:128-n8:16:32:64";
else if (isTargetDarwin())
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128";
+ p = "e-p:32:32-f64:32:64-i64:32:64-f80:128:128-n8:16:32";
else
- p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32";
+ p = "e-p:32:32-f64:32:64-i64:32:64-f80:32:32-n8:16:32";
return std::string(p);
}
@@ -219,10 +219,8 @@ public:
/// enablePostRAScheduler - X86 target is enabling post-alloc scheduling
/// at 'More' optimization level.
bool enablePostRAScheduler(CodeGenOpt::Level OptLevel,
- TargetSubtarget::AntiDepBreakMode& mode) const {
- mode = TargetSubtarget::ANTIDEP_CRITICAL;
- return OptLevel >= CodeGenOpt::Default;
- }
+ TargetSubtarget::AntiDepBreakMode& Mode,
+ RegClassVector& CriticalPathRCs) const;
};
} // End llvm namespace
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index a61de1c..0cda8bc 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -22,8 +22,7 @@
#include "llvm/Target/TargetRegistry.h"
using namespace llvm;
-static const MCAsmInfo *createMCAsmInfo(const Target &T,
- const StringRef &TT) {
+static const MCAsmInfo *createMCAsmInfo(const Target &T, StringRef TT) {
Triple TheTriple(TT);
switch (TheTriple.getOS()) {
case Triple::Darwin:
@@ -186,14 +185,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
}
// 64-bit JIT places everything in the same buffer except external functions.
- // On Darwin, use small code model but hack the call instruction for
- // externals. Elsewhere, do not assume globals are in the lower 4G.
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTargetDarwin())
- setCodeModel(CodeModel::Small);
- else
+ if (Subtarget.is64Bit())
setCodeModel(CodeModel::Large);
- }
PM.add(createX86CodeEmitterPass(*this, MCE));
@@ -212,14 +205,8 @@ bool X86TargetMachine::addCodeEmitter(PassManagerBase &PM,
}
// 64-bit JIT places everything in the same buffer except external functions.
- // On Darwin, use small code model but hack the call instruction for
- // externals. Elsewhere, do not assume globals are in the lower 4G.
- if (Subtarget.is64Bit()) {
- if (Subtarget.isTargetDarwin())
- setCodeModel(CodeModel::Small);
- else
+ if (Subtarget.is64Bit())
setCodeModel(CodeModel::Large);
- }
PM.add(createX86JITCodeEmitterPass(*this, JCE));
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index bc1bbc3..d7106a0 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -361,7 +361,7 @@ void XCoreAsmPrinter::printMachineInstruction(const MachineInstr *MI) {
return;
}
printInstruction(MI);
- if (VerboseAsm && !MI->getDebugLoc().isUnknown())
+ if (VerboseAsm)
EmitComments(*MI);
O << '\n';
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 860b72f..da2fb04 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -149,10 +149,7 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDValue Op, SDValue Addr,
/// InstructionSelect - This callback is invoked by
/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void XCoreDAGToDAGISel::
-InstructionSelect() {
- DEBUG(BB->dump());
-
+void XCoreDAGToDAGISel::InstructionSelect() {
// Select target instructions for the DAG.
SelectRoot(*CurDAG);
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 5ef56c9..16e68fe 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -860,7 +860,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
}
// Create the frame index object for this incoming parameter...
int FI = MFI->CreateFixedObject(ObjSize,
- LRSaveSize + VA.getLocMemOffset());
+ LRSaveSize + VA.getLocMemOffset(),
+ true, false);
// Create the SelectionDAG nodes corresponding to a load
//from this parameter
@@ -884,7 +885,7 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// address
for (unsigned i = array_lengthof(ArgRegs) - 1; i >= FirstVAReg; --i) {
// Create a stack slot
- int FI = MFI->CreateFixedObject(4, offset);
+ int FI = MFI->CreateFixedObject(4, offset, true, false);
if (i == FirstVAReg) {
XFI->setVarArgsFrameIndex(FI);
}
@@ -905,7 +906,8 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
} else {
// This will point to the next argument passed via stack.
XFI->setVarArgsFrameIndex(
- MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset()));
+ MFI->CreateFixedObject(4, LRSaveSize + CCInfo.getNextStackOffset(),
+ true, false));
}
}
@@ -916,6 +918,17 @@ XCoreTargetLowering::LowerCCCArguments(SDValue Chain,
// Return Value Calling Convention Implementation
//===----------------------------------------------------------------------===//
+bool XCoreTargetLowering::
+CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<EVT> &OutTys,
+ const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+ SelectionDAG &DAG) {
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+ return CCInfo.CheckReturn(OutTys, ArgsFlags, RetCC_XCore);
+}
+
SDValue
XCoreTargetLowering::LowerReturn(SDValue Chain,
CallingConv::ID CallConv, bool isVarArg,
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index ef8555e..10631af 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -159,6 +159,12 @@ namespace llvm {
CallingConv::ID CallConv, bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &Outs,
DebugLoc dl, SelectionDAG &DAG);
+
+ virtual bool
+ CanLowerReturn(CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<EVT> &OutTys,
+ const SmallVectorImpl<ISD::ArgFlagsTy> &ArgsFlags,
+ SelectionDAG &DAG);
};
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 68e69a2..4ed4ed4 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -617,7 +617,7 @@ defm EXTSP : FU6_LU6_np<"extsp">;
let mayStore = 1 in
defm ENTSP : FU6_LU6_np<"entsp">;
-let isReturn = 1, isTerminator = 1, mayLoad = 1 in {
+let isReturn = 1, isTerminator = 1, mayLoad = 1, isBarrier = 1 in {
defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
}
}
diff --git a/lib/Target/XCore/XCoreRegisterInfo.cpp b/lib/Target/XCore/XCoreRegisterInfo.cpp
index 136a035..c7c8c7b 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.cpp
+++ b/lib/Target/XCore/XCoreRegisterInfo.cpp
@@ -330,9 +330,10 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
int FrameIdx;
if (! isVarArg) {
// A fixed offset of 0 allows us to save / restore LR using entsp / retsp.
- FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0);
+ FrameIdx = MFI->CreateFixedObject(RC->getSize(), 0, true, false);
} else {
- FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment());
+ FrameIdx = MFI->CreateStackObject(RC->getSize(), RC->getAlignment(),
+ false);
}
XFI->setUsesLR(FrameIdx);
XFI->setLRSpillSlot(FrameIdx);
@@ -340,13 +341,15 @@ XCoreRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
if (requiresRegisterScavenging(MF)) {
// Reserve a slot close to SP or frame pointer.
RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment()));
+ RC->getAlignment(),
+ false));
}
if (hasFP(MF)) {
// A callee save register is used to hold the FP.
// This needs saving / restoring in the epilogue / prologue.
XFI->setFPSpillSlot(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment()));
+ RC->getAlignment(),
+ false));
}
}
@@ -593,7 +596,7 @@ int XCoreRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
return XCoreGenRegisterInfo::getDwarfRegNumFull(RegNum, 0);
}
-unsigned XCoreRegisterInfo::getFrameRegister(MachineFunction &MF) const {
+unsigned XCoreRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
bool FP = hasFP(MF);
return FP ? XCore::R10 : XCore::SP;
diff --git a/lib/Target/XCore/XCoreRegisterInfo.h b/lib/Target/XCore/XCoreRegisterInfo.h
index a7df510..8ab1750 100644
--- a/lib/Target/XCore/XCoreRegisterInfo.h
+++ b/lib/Target/XCore/XCoreRegisterInfo.h
@@ -60,7 +60,7 @@ public:
unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
int SPAdj, int *Value = NULL,
RegScavenger *RS = NULL) const;
-
+
void processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
RegScavenger *RS = NULL) const;
@@ -71,7 +71,7 @@ public:
// Debug information queries.
unsigned getRARegister() const;
- unsigned getFrameRegister(MachineFunction &MF) const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
void getInitialFrameState(std::vector<MachineMove> &Moves) const;
//! Return the array of argument passing registers
diff --git a/lib/Target/XCore/XCoreTargetMachine.cpp b/lib/Target/XCore/XCoreTargetMachine.cpp
index 75f2055..267f46a 100644
--- a/lib/Target/XCore/XCoreTargetMachine.cpp
+++ b/lib/Target/XCore/XCoreTargetMachine.cpp
@@ -25,7 +25,7 @@ XCoreTargetMachine::XCoreTargetMachine(const Target &T, const std::string &TT,
: LLVMTargetMachine(T, TT),
Subtarget(TT, FS),
DataLayout("e-p:32:32:32-a0:0:32-f32:32:32-f64:32:32-i1:8:32-i8:8:32-"
- "i16:16:32-i32:32:32-i64:32:32"),
+ "i16:16:32-i32:32:32-i64:32:32-n32"),
InstrInfo(),
FrameInfo(*this),
TLInfo(*this) {
diff --git a/lib/Transforms/Hello/CMakeLists.txt b/lib/Transforms/Hello/CMakeLists.txt
index b80d15b..917b745 100644
--- a/lib/Transforms/Hello/CMakeLists.txt
+++ b/lib/Transforms/Hello/CMakeLists.txt
@@ -1,3 +1,3 @@
-add_llvm_library( LLVMHello
+add_llvm_loadable_module( LLVMHello
Hello.cpp
)
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 234d0ec..442f2fb 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -20,7 +20,6 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -245,8 +244,7 @@ static bool AnalyzeGlobal(Value *V, GlobalStatus &GS,
return false;
}
-static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx,
- LLVMContext &Context) {
+static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx) {
ConstantInt *CI = dyn_cast<ConstantInt>(Idx);
if (!CI) return 0;
unsigned IdxV = CI->getZExtValue();
@@ -282,8 +280,7 @@ static Constant *getAggregateConstantElement(Constant *Agg, Constant *Idx,
/// users of the global, cleaning up the obvious ones. This is largely just a
/// quick scan over the use list to clean up the easy and obvious cruft. This
/// returns true if it made a change.
-static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
- LLVMContext &Context) {
+static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
bool Changed = false;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;) {
User *U = *UI++;
@@ -304,11 +301,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Constant *SubInit = 0;
if (Init)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
- Changed |= CleanupConstantGlobalUsers(CE, SubInit, Context);
+ Changed |= CleanupConstantGlobalUsers(CE, SubInit);
} else if (CE->getOpcode() == Instruction::BitCast &&
isa<PointerType>(CE->getType())) {
// Pointer cast, delete any stores and memsets to the global.
- Changed |= CleanupConstantGlobalUsers(CE, 0, Context);
+ Changed |= CleanupConstantGlobalUsers(CE, 0);
}
if (CE->use_empty()) {
@@ -322,11 +319,11 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
Constant *SubInit = 0;
if (!isa<ConstantExpr>(GEP->getOperand(0))) {
ConstantExpr *CE =
- dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP, Context));
+ dyn_cast_or_null<ConstantExpr>(ConstantFoldInstruction(GEP));
if (Init && CE && CE->getOpcode() == Instruction::GetElementPtr)
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
}
- Changed |= CleanupConstantGlobalUsers(GEP, SubInit, Context);
+ Changed |= CleanupConstantGlobalUsers(GEP, SubInit);
if (GEP->use_empty()) {
GEP->eraseFromParent();
@@ -344,7 +341,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init,
if (SafeToDestroyConstant(C)) {
C->destroyConstant();
// This could have invalidated UI, start over from scratch.
- CleanupConstantGlobalUsers(V, Init, Context);
+ CleanupConstantGlobalUsers(V, Init);
return true;
}
}
@@ -469,8 +466,7 @@ static bool GlobalUsersSafeToSRA(GlobalValue *GV) {
/// behavior of the program in a more fine-grained way. We have determined that
/// this transformation is safe already. We return the first global variable we
/// insert so that the caller can reprocess it.
-static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
- LLVMContext &Context) {
+static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
// Make sure this global only has simple uses that we can SRA.
if (!GlobalUsersSafeToSRA(GV))
return 0;
@@ -492,11 +488,9 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
const StructLayout &Layout = *TD.getStructLayout(STy);
for (unsigned i = 0, e = STy->getNumElements(); i != e; ++i) {
Constant *In = getAggregateConstantElement(Init,
- ConstantInt::get(Type::getInt32Ty(Context), i),
- Context);
+ ConstantInt::get(Type::getInt32Ty(STy->getContext()), i));
assert(In && "Couldn't get element of initializer?");
- GlobalVariable *NGV = new GlobalVariable(Context,
- STy->getElementType(i), false,
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(i), false,
GlobalVariable::InternalLinkage,
In, GV->getName()+"."+Twine(i),
GV->isThreadLocal(),
@@ -527,12 +521,10 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
unsigned EltAlign = TD.getABITypeAlignment(STy->getElementType());
for (unsigned i = 0, e = NumElements; i != e; ++i) {
Constant *In = getAggregateConstantElement(Init,
- ConstantInt::get(Type::getInt32Ty(Context), i),
- Context);
+ ConstantInt::get(Type::getInt32Ty(Init->getContext()), i));
assert(In && "Couldn't get element of initializer?");
- GlobalVariable *NGV = new GlobalVariable(Context,
- STy->getElementType(), false,
+ GlobalVariable *NGV = new GlobalVariable(STy->getElementType(), false,
GlobalVariable::InternalLinkage,
In, GV->getName()+"."+Twine(i),
GV->isThreadLocal(),
@@ -554,7 +546,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD,
DEBUG(errs() << "PERFORMING GLOBAL SRA ON: " << *GV);
- Constant *NullInt = Constant::getNullValue(Type::getInt32Ty(Context));
+ Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
// Loop over all of the uses of the global, replacing the constantexpr geps,
// with smaller constantexpr geps or direct references.
@@ -678,8 +670,7 @@ static bool AllUsesOfLoadedValueWillTrapIfNull(GlobalVariable *GV) {
return true;
}
-static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
- LLVMContext &Context) {
+static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV) {
bool Changed = false;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ) {
Instruction *I = cast<Instruction>(*UI++);
@@ -712,7 +703,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
} else if (CastInst *CI = dyn_cast<CastInst>(I)) {
Changed |= OptimizeAwayTrappingUsesOfValue(CI,
ConstantExpr::getCast(CI->getOpcode(),
- NewV, CI->getType()), Context);
+ NewV, CI->getType()));
if (CI->use_empty()) {
Changed = true;
CI->eraseFromParent();
@@ -730,7 +721,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
if (Idxs.size() == GEPI->getNumOperands()-1)
Changed |= OptimizeAwayTrappingUsesOfValue(GEPI,
ConstantExpr::getGetElementPtr(NewV, &Idxs[0],
- Idxs.size()), Context);
+ Idxs.size()));
if (GEPI->use_empty()) {
Changed = true;
GEPI->eraseFromParent();
@@ -746,8 +737,7 @@ static bool OptimizeAwayTrappingUsesOfValue(Value *V, Constant *NewV,
/// value stored into it. If there are uses of the loaded value that would trap
/// if the loaded value is dynamically null, then we know that they cannot be
/// reachable with a null optimize away the load.
-static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
- LLVMContext &Context) {
+static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV) {
bool Changed = false;
// Keep track of whether we are able to remove all the uses of the global
@@ -758,7 +748,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
for (Value::use_iterator GUI = GV->use_begin(), E = GV->use_end(); GUI != E;){
User *GlobalUser = *GUI++;
if (LoadInst *LI = dyn_cast<LoadInst>(GlobalUser)) {
- Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV, Context);
+ Changed |= OptimizeAwayTrappingUsesOfValue(LI, LV);
// If we were able to delete all uses of the loads
if (LI->use_empty()) {
LI->eraseFromParent();
@@ -789,7 +779,7 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
// nor is the global.
if (AllNonStoreUsesGone) {
DEBUG(errs() << " *** GLOBAL NOW DEAD!\n");
- CleanupConstantGlobalUsers(GV, 0, Context);
+ CleanupConstantGlobalUsers(GV, 0);
if (GV->use_empty()) {
GV->eraseFromParent();
++NumDeleted;
@@ -801,10 +791,10 @@ static bool OptimizeAwayTrappingUsesOfLoads(GlobalVariable *GV, Constant *LV,
/// ConstantPropUsersOf - Walk the use list of V, constant folding all of the
/// instructions that are foldable.
-static void ConstantPropUsersOf(Value *V, LLVMContext &Context) {
+static void ConstantPropUsersOf(Value *V) {
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; )
if (Instruction *I = dyn_cast<Instruction>(*UI++))
- if (Constant *NewC = ConstantFoldInstruction(I, Context)) {
+ if (Constant *NewC = ConstantFoldInstruction(I)) {
I->replaceAllUsesWith(NewC);
// Advance UI to the next non-I use to avoid invalidating it!
@@ -824,11 +814,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
CallInst *CI,
const Type *AllocTy,
Value* NElems,
- LLVMContext &Context,
TargetData* TD) {
DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
- const Type *IntPtrTy = TD->getIntPtrType(Context);
+ const Type *IntPtrTy = TD->getIntPtrType(GV->getContext());
// CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have
// returned NULL and we would not be here).
@@ -883,10 +872,10 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// If there is a comparison against null, we will insert a global bool to
// keep track of whether the global was initialized yet or not.
GlobalVariable *InitBool =
- new GlobalVariable(Context, Type::getInt1Ty(Context), false,
+ new GlobalVariable(Type::getInt1Ty(GV->getContext()), false,
GlobalValue::InternalLinkage,
- ConstantInt::getFalse(Context), GV->getName()+".init",
- GV->isThreadLocal());
+ ConstantInt::getFalse(GV->getContext()),
+ GV->getName()+".init", GV->isThreadLocal());
bool InitBoolUsed = false;
// Loop over all uses of GV, processing them in turn.
@@ -905,8 +894,8 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
switch (ICI->getPredicate()) {
default: llvm_unreachable("Unknown ICmp Predicate!");
case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_SLT:
- LV = ConstantInt::getFalse(Context); // X < null -> always false
+ case ICmpInst::ICMP_SLT: // X < null -> always false
+ LV = ConstantInt::getFalse(GV->getContext());
break;
case ICmpInst::ICMP_ULE:
case ICmpInst::ICMP_SLE:
@@ -928,7 +917,7 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
} else {
StoreInst *SI = cast<StoreInst>(GV->use_back());
// The global is initialized when the store to it occurs.
- new StoreInst(ConstantInt::getTrue(Context), InitBool, SI);
+ new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
SI->eraseFromParent();
}
@@ -949,9 +938,9 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
// To further other optimizations, loop over all users of NewGV and try to
// constant prop them. This will promote GEP instructions with constant
// indices into GEP constant-exprs, which will allow global-opt to hack on it.
- ConstantPropUsersOf(NewGV, Context);
+ ConstantPropUsersOf(NewGV);
if (RepValue != NewGV)
- ConstantPropUsersOf(RepValue, Context);
+ ConstantPropUsersOf(RepValue);
return NewGV;
}
@@ -1153,8 +1142,7 @@ static bool AllGlobalLoadUsesSimpleEnoughForHeapSRA(GlobalVariable *GV,
static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
- std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
- LLVMContext &Context) {
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
std::vector<Value*> &FieldVals = InsertedScalarizedValues[V];
if (FieldNo >= FieldVals.size())
@@ -1172,7 +1160,7 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
// a new Load of the scalarized global.
Result = new LoadInst(GetHeapSROAValue(LI->getOperand(0), FieldNo,
InsertedScalarizedValues,
- PHIsToRewrite, Context),
+ PHIsToRewrite),
LI->getName()+".f"+Twine(FieldNo), LI);
} else if (PHINode *PN = dyn_cast<PHINode>(V)) {
// PN's type is pointer to struct. Make a new PHI of pointer to struct
@@ -1196,16 +1184,14 @@ static Value *GetHeapSROAValue(Value *V, unsigned FieldNo,
/// the load, rewrite the derived value to use the HeapSRoA'd load.
static void RewriteHeapSROALoadUser(Instruction *LoadUser,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
- std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
- LLVMContext &Context) {
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
// If this is a comparison against null, handle it.
if (ICmpInst *SCI = dyn_cast<ICmpInst>(LoadUser)) {
assert(isa<ConstantPointerNull>(SCI->getOperand(1)));
// If we have a setcc of the loaded pointer, we can use a setcc of any
// field.
Value *NPtr = GetHeapSROAValue(SCI->getOperand(0), 0,
- InsertedScalarizedValues, PHIsToRewrite,
- Context);
+ InsertedScalarizedValues, PHIsToRewrite);
Value *New = new ICmpInst(SCI, SCI->getPredicate(), NPtr,
Constant::getNullValue(NPtr->getType()),
@@ -1223,8 +1209,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
// Load the pointer for this field.
unsigned FieldNo = cast<ConstantInt>(GEPI->getOperand(2))->getZExtValue();
Value *NewPtr = GetHeapSROAValue(GEPI->getOperand(0), FieldNo,
- InsertedScalarizedValues, PHIsToRewrite,
- Context);
+ InsertedScalarizedValues, PHIsToRewrite);
// Create the new GEP idx vector.
SmallVector<Value*, 8> GEPIdx;
@@ -1256,8 +1241,7 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
// users.
for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end(); UI != E; ) {
Instruction *User = cast<Instruction>(*UI++);
- RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite,
- Context);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
}
}
@@ -1267,13 +1251,11 @@ static void RewriteHeapSROALoadUser(Instruction *LoadUser,
/// AllGlobalLoadUsesSimpleEnoughForHeapSRA.
static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
DenseMap<Value*, std::vector<Value*> > &InsertedScalarizedValues,
- std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite,
- LLVMContext &Context) {
+ std::vector<std::pair<PHINode*, unsigned> > &PHIsToRewrite) {
for (Value::use_iterator UI = Load->use_begin(), E = Load->use_end();
UI != E; ) {
Instruction *User = cast<Instruction>(*UI++);
- RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite,
- Context);
+ RewriteHeapSROALoadUser(User, InsertedScalarizedValues, PHIsToRewrite);
}
if (Load->use_empty()) {
@@ -1285,8 +1267,7 @@ static void RewriteUsesOfLoadForHeapSRoA(LoadInst *Load,
/// PerformHeapAllocSRoA - CI is an allocation of an array of structures. Break
/// it up into multiple allocations of arrays of the fields.
static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
- Value* NElems, LLVMContext &Context,
- TargetData *TD) {
+ Value* NElems, TargetData *TD) {
DEBUG(errs() << "SROA HEAP ALLOC: " << *GV << " MALLOC = " << *CI << '\n');
const Type* MAT = getMallocAllocatedType(CI);
const StructType *STy = cast<StructType>(MAT);
@@ -1315,14 +1296,16 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
FieldGlobals.push_back(NGV);
unsigned TypeSize = TD->getTypeAllocSize(FieldTy);
- if (const StructType* ST = dyn_cast<StructType>(FieldTy))
+ if (const StructType *ST = dyn_cast<StructType>(FieldTy))
TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
- const Type* IntPtrTy = TD->getIntPtrType(Context);
+ const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
Value *NMI = CallInst::CreateMalloc(CI, IntPtrTy, FieldTy,
ConstantInt::get(IntPtrTy, TypeSize),
NElems,
CI->getName() + ".f" + Twine(FieldNo));
- FieldMallocs.push_back(NMI);
+ CallInst *NCI = dyn_cast<BitCastInst>(NMI) ?
+ extractMallocCallFromBitCast(NMI) : cast<CallInst>(NMI);
+ FieldMallocs.push_back(NCI);
new StoreInst(NMI, NGV, CI);
}
@@ -1338,15 +1321,15 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// if (F1) { free(F1); F1 = 0; }
// if (F2) { free(F2); F2 = 0; }
// }
- Value *RunningOr = 0;
+ // The malloc can also fail if its argument is too large.
+ Constant *ConstantZero = ConstantInt::get(CI->getOperand(1)->getType(), 0);
+ Value *RunningOr = new ICmpInst(CI, ICmpInst::ICMP_SLT, CI->getOperand(1),
+ ConstantZero, "isneg");
for (unsigned i = 0, e = FieldMallocs.size(); i != e; ++i) {
Value *Cond = new ICmpInst(CI, ICmpInst::ICMP_EQ, FieldMallocs[i],
Constant::getNullValue(FieldMallocs[i]->getType()),
"isnull");
- if (!RunningOr)
- RunningOr = Cond; // First seteq
- else
- RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
+ RunningOr = BinaryOperator::CreateOr(RunningOr, Cond, "tmp", CI);
}
// Split the basic block at the old malloc.
@@ -1355,7 +1338,8 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
// Create the block to check the first condition. Put all these blocks at the
// end of the function as they are unlikely to be executed.
- BasicBlock *NullPtrBlock = BasicBlock::Create(Context, "malloc_ret_null",
+ BasicBlock *NullPtrBlock = BasicBlock::Create(OrigBB->getContext(),
+ "malloc_ret_null",
OrigBB->getParent());
// Remove the uncond branch from OrigBB to ContBB, turning it into a cond
@@ -1370,9 +1354,9 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Value *Cmp = new ICmpInst(*NullPtrBlock, ICmpInst::ICMP_NE, GVVal,
Constant::getNullValue(GVVal->getType()),
"tmp");
- BasicBlock *FreeBlock = BasicBlock::Create(Context, "free_it",
+ BasicBlock *FreeBlock = BasicBlock::Create(Cmp->getContext(), "free_it",
OrigBB->getParent());
- BasicBlock *NextBlock = BasicBlock::Create(Context, "next",
+ BasicBlock *NextBlock = BasicBlock::Create(Cmp->getContext(), "next",
OrigBB->getParent());
Instruction *BI = BranchInst::Create(FreeBlock, NextBlock,
Cmp, NullPtrBlock);
@@ -1406,8 +1390,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
Instruction *User = cast<Instruction>(*UI++);
if (LoadInst *LI = dyn_cast<LoadInst>(User)) {
- RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite,
- Context);
+ RewriteUsesOfLoadForHeapSRoA(LI, InsertedScalarizedValues, PHIsToRewrite);
continue;
}
@@ -1438,7 +1421,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *InVal = PN->getIncomingValue(i);
InVal = GetHeapSROAValue(InVal, FieldNo, InsertedScalarizedValues,
- PHIsToRewrite, Context);
+ PHIsToRewrite);
FieldPN->addIncoming(InVal, PN->getIncomingBlock(i));
}
}
@@ -1477,8 +1460,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CallInst *CI,
const Type *AllocTy,
Module::global_iterator &GVI,
- TargetData *TD,
- LLVMContext &Context) {
+ TargetData *TD) {
// If this is a malloc of an abstract type, don't touch it.
if (!AllocTy->isSized())
return false;
@@ -1508,15 +1490,14 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// This eliminates dynamic allocation, avoids an indirection accessing the
// data, and exposes the resultant global to further GlobalOpt.
// We cannot optimize the malloc if we cannot determine malloc array size.
- if (Value *NElems = getMallocArraySize(CI, Context, TD)) {
+ if (Value *NElems = getMallocArraySize(CI, TD, true)) {
if (ConstantInt *NElements = dyn_cast<ConstantInt>(NElems))
// Restrict this transformation to only working on small allocations
// (2048 bytes currently), as we don't want to introduce a 16M global or
// something.
if (TD &&
NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems,
- Context, TD);
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, TD);
return true;
}
@@ -1540,7 +1521,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// structs. malloc [100 x struct],1 -> malloc struct, 100
if (const ArrayType *AT =
dyn_cast<ArrayType>(getMallocAllocatedType(CI))) {
- const Type *IntPtrTy = TD->getIntPtrType(Context);
+ const Type *IntPtrTy = TD->getIntPtrType(CI->getContext());
unsigned TypeSize = TD->getStructLayout(AllocSTy)->getSizeInBytes();
Value *AllocSize = ConstantInt::get(IntPtrTy, TypeSize);
Value *NumElements = ConstantInt::get(IntPtrTy, AT->getNumElements());
@@ -1551,12 +1532,10 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
CI->replaceAllUsesWith(Cast);
CI->eraseFromParent();
CI = dyn_cast<BitCastInst>(Malloc) ?
- extractMallocCallFromBitCast(Malloc):
- cast<CallInst>(Malloc);
+ extractMallocCallFromBitCast(Malloc) : cast<CallInst>(Malloc);
}
- GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, Context, TD),
- Context, TD);
+ GVI = PerformHeapAllocSRoA(GV, CI, getMallocArraySize(CI, TD, true),TD);
return true;
}
}
@@ -1569,7 +1548,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// that only one value (besides its initializer) is ever stored to the global.
static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
Module::global_iterator &GVI,
- TargetData *TD, LLVMContext &Context) {
+ TargetData *TD) {
// Ignore no-op GEPs and bitcasts.
StoredOnceVal = StoredOnceVal->stripPointerCasts();
@@ -1585,12 +1564,12 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
ConstantExpr::getBitCast(SOVC, GV->getInitializer()->getType());
// Optimize away any trapping uses of the loaded value.
- if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC, Context))
+ if (OptimizeAwayTrappingUsesOfLoads(GV, SOVC))
return true;
} else if (CallInst *CI = extractMallocCall(StoredOnceVal)) {
const Type* MallocType = getMallocAllocatedType(CI);
if (MallocType && TryToOptimizeStoreOfMallocToGlobal(GV, CI, MallocType,
- GVI, TD, Context))
+ GVI, TD))
return true;
}
}
@@ -1602,8 +1581,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
/// two values ever stored into GV are its initializer and OtherVal. See if we
/// can shrink the global into a boolean and select between the two values
/// whenever it is used. This exposes the values to other scalar optimizations.
-static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
- LLVMContext &Context) {
+static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
const Type *GVElType = GV->getType()->getElementType();
// If GVElType is already i1, it is already shrunk. If the type of the GV is
@@ -1611,7 +1589,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
// between them is very expensive and unlikely to lead to later
// simplification. In these cases, we typically end up with "cond ? v1 : v2"
// where v1 and v2 both require constant pool loads, a big loss.
- if (GVElType == Type::getInt1Ty(Context) || GVElType->isFloatingPoint() ||
+ if (GVElType == Type::getInt1Ty(GV->getContext()) ||
+ GVElType->isFloatingPoint() ||
isa<PointerType>(GVElType) || isa<VectorType>(GVElType))
return false;
@@ -1624,15 +1603,16 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
DEBUG(errs() << " *** SHRINKING TO BOOL: " << *GV);
// Create the new global, initializing it to false.
- GlobalVariable *NewGV = new GlobalVariable(Context,
- Type::getInt1Ty(Context), false,
- GlobalValue::InternalLinkage, ConstantInt::getFalse(Context),
+ GlobalVariable *NewGV = new GlobalVariable(Type::getInt1Ty(GV->getContext()),
+ false,
+ GlobalValue::InternalLinkage,
+ ConstantInt::getFalse(GV->getContext()),
GV->getName()+".b",
GV->isThreadLocal());
GV->getParent()->getGlobalList().insert(GV, NewGV);
Constant *InitVal = GV->getInitializer();
- assert(InitVal->getType() != Type::getInt1Ty(Context) &&
+ assert(InitVal->getType() != Type::getInt1Ty(GV->getContext()) &&
"No reason to shrink to bool!");
// If initialized to zero and storing one into the global, we can use a cast
@@ -1649,7 +1629,8 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal,
// Only do this if we weren't storing a loaded value.
Value *StoreVal;
if (StoringOther || SI->getOperand(0) == InitVal)
- StoreVal = ConstantInt::get(Type::getInt1Ty(Context), StoringOther);
+ StoreVal = ConstantInt::get(Type::getInt1Ty(GV->getContext()),
+ StoringOther);
else {
// Otherwise, we are storing a previously loaded copy. To do this,
// change the copy from copying the original value to just copying the
@@ -1708,24 +1689,26 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
if (!AnalyzeGlobal(GV, GS, PHIUsers)) {
#if 0
- cerr << "Global: " << *GV;
- cerr << " isLoaded = " << GS.isLoaded << "\n";
- cerr << " StoredType = ";
+ DEBUG(errs() << "Global: " << *GV);
+ DEBUG(errs() << " isLoaded = " << GS.isLoaded << "\n");
+ DEBUG(errs() << " StoredType = ");
switch (GS.StoredType) {
- case GlobalStatus::NotStored: cerr << "NEVER STORED\n"; break;
- case GlobalStatus::isInitializerStored: cerr << "INIT STORED\n"; break;
- case GlobalStatus::isStoredOnce: cerr << "STORED ONCE\n"; break;
- case GlobalStatus::isStored: cerr << "stored\n"; break;
+ case GlobalStatus::NotStored: DEBUG(errs() << "NEVER STORED\n"); break;
+ case GlobalStatus::isInitializerStored: DEBUG(errs() << "INIT STORED\n");
+ break;
+ case GlobalStatus::isStoredOnce: DEBUG(errs() << "STORED ONCE\n"); break;
+ case GlobalStatus::isStored: DEBUG(errs() << "stored\n"); break;
}
if (GS.StoredType == GlobalStatus::isStoredOnce && GS.StoredOnceValue)
- cerr << " StoredOnceValue = " << *GS.StoredOnceValue << "\n";
+ DEBUG(errs() << " StoredOnceValue = " << *GS.StoredOnceValue << "\n");
if (GS.AccessingFunction && !GS.HasMultipleAccessingFunctions)
- cerr << " AccessingFunction = " << GS.AccessingFunction->getName()
- << "\n";
- cerr << " HasMultipleAccessingFunctions = "
- << GS.HasMultipleAccessingFunctions << "\n";
- cerr << " HasNonInstructionUser = " << GS.HasNonInstructionUser<<"\n";
- cerr << "\n";
+ DEBUG(errs() << " AccessingFunction = " << GS.AccessingFunction->getName()
+ << "\n");
+ DEBUG(errs() << " HasMultipleAccessingFunctions = "
+ << GS.HasMultipleAccessingFunctions << "\n");
+ DEBUG(errs() << " HasNonInstructionUser = "
+ << GS.HasNonInstructionUser<<"\n");
+ DEBUG(errs() << "\n");
#endif
// If this is a first class global and has only one accessing function
@@ -1764,8 +1747,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Delete any stores we can find to the global. We may not be able to
// make it completely dead though.
- bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer(),
- GV->getContext());
+ bool Changed = CleanupConstantGlobalUsers(GV, GV->getInitializer());
// If the global is dead now, delete it.
if (GV->use_empty()) {
@@ -1780,7 +1762,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GV->setConstant(true);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(), GV->getContext());
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
// If the global is dead now, just nuke it.
if (GV->use_empty()) {
@@ -1794,8 +1776,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
return true;
} else if (!GV->getInitializer()->getType()->isSingleValueType()) {
if (TargetData *TD = getAnalysisIfAvailable<TargetData>())
- if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD,
- GV->getContext())) {
+ if (GlobalVariable *FirstNewGV = SRAGlobal(GV, *TD)) {
GVI = FirstNewGV; // Don't skip the newly produced globals!
return true;
}
@@ -1810,8 +1791,7 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
GV->setInitializer(SOVConstant);
// Clean up any obviously simplifiable users now.
- CleanupConstantGlobalUsers(GV, GV->getInitializer(),
- GV->getContext());
+ CleanupConstantGlobalUsers(GV, GV->getInitializer());
if (GV->use_empty()) {
DEBUG(errs() << " *** Substituting initializer allowed us to "
@@ -1828,14 +1808,13 @@ bool GlobalOpt::ProcessInternalGlobal(GlobalVariable *GV,
// Try to optimize globals based on the knowledge that only one value
// (besides its initializer) is ever stored to the global.
if (OptimizeOnceStoredGlobal(GV, GS.StoredOnceValue, GVI,
- getAnalysisIfAvailable<TargetData>(),
- GV->getContext()))
+ getAnalysisIfAvailable<TargetData>()))
return true;
// Otherwise, if the global was not a boolean, we can shrink it to be a
// boolean.
if (Constant *SOVConstant = dyn_cast<Constant>(GS.StoredOnceValue))
- if (TryToShrinkGlobalToBoolean(GV, SOVConstant, GV->getContext())) {
+ if (TryToShrinkGlobalToBoolean(GV, SOVConstant)) {
++NumShrunkToBool;
return true;
}
@@ -1987,11 +1966,10 @@ static std::vector<Function*> ParseGlobalCtors(GlobalVariable *GV) {
/// InstallGlobalCtors - Given a specified llvm.global_ctors list, install the
/// specified array, returning the new global to use.
static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
- const std::vector<Function*> &Ctors,
- LLVMContext &Context) {
+ const std::vector<Function*> &Ctors) {
// If we made a change, reassemble the initializer list.
std::vector<Constant*> CSVals;
- CSVals.push_back(ConstantInt::get(Type::getInt32Ty(Context), 65535));
+ CSVals.push_back(ConstantInt::get(Type::getInt32Ty(GCL->getContext()),65535));
CSVals.push_back(0);
// Create the new init list.
@@ -2000,12 +1978,14 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
if (Ctors[i]) {
CSVals[1] = Ctors[i];
} else {
- const Type *FTy = FunctionType::get(Type::getVoidTy(Context), false);
+ const Type *FTy = FunctionType::get(Type::getVoidTy(GCL->getContext()),
+ false);
const PointerType *PFTy = PointerType::getUnqual(FTy);
CSVals[1] = Constant::getNullValue(PFTy);
- CSVals[0] = ConstantInt::get(Type::getInt32Ty(Context), 2147483647);
+ CSVals[0] = ConstantInt::get(Type::getInt32Ty(GCL->getContext()),
+ 2147483647);
}
- CAList.push_back(ConstantStruct::get(Context, CSVals, false));
+ CAList.push_back(ConstantStruct::get(GCL->getContext(), CSVals, false));
}
// Create the array initializer.
@@ -2021,8 +2001,7 @@ static GlobalVariable *InstallGlobalCtors(GlobalVariable *GCL,
}
// Create the new global and insert it next to the existing list.
- GlobalVariable *NGV = new GlobalVariable(Context, CA->getType(),
- GCL->isConstant(),
+ GlobalVariable *NGV = new GlobalVariable(CA->getType(), GCL->isConstant(),
GCL->getLinkage(), CA, "",
GCL->isThreadLocal());
GCL->getParent()->getGlobalList().insert(GCL, NGV);
@@ -2056,7 +2035,7 @@ static Constant *getVal(DenseMap<Value*, Constant*> &ComputedValues,
/// enough for us to understand. In particular, if it is a cast of something,
/// we punt. We basically just support direct accesses to globals and GEP's of
/// globals. This should be kept up to date with CommitValueTo.
-static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) {
+static bool isSimpleEnoughPointerToCommit(Constant *C) {
// Conservatively, avoid aggregate types. This is because we don't
// want to worry about them partially overlapping other stores.
if (!cast<PointerType>(C->getType())->getElementType()->isSingleValueType())
@@ -2096,8 +2075,7 @@ static bool isSimpleEnoughPointerToCommit(Constant *C, LLVMContext &Context) {
/// initializer. This returns 'Init' modified to reflect 'Val' stored into it.
/// At this point, the GEP operands of Addr [0, OpNo) have been stepped into.
static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
- ConstantExpr *Addr, unsigned OpNo,
- LLVMContext &Context) {
+ ConstantExpr *Addr, unsigned OpNo) {
// Base case of the recursion.
if (OpNo == Addr->getNumOperands()) {
assert(Val->getType() == Init->getType() && "Type mismatch!");
@@ -2126,10 +2104,11 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
ConstantInt *CU = cast<ConstantInt>(Addr->getOperand(OpNo));
unsigned Idx = CU->getZExtValue();
assert(Idx < STy->getNumElements() && "Struct index out of range!");
- Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1, Context);
+ Elts[Idx] = EvaluateStoreInto(Elts[Idx], Val, Addr, OpNo+1);
// Return the modified struct.
- return ConstantStruct::get(Context, &Elts[0], Elts.size(), STy->isPacked());
+ return ConstantStruct::get(Init->getContext(), &Elts[0], Elts.size(),
+ STy->isPacked());
} else {
ConstantInt *CI = cast<ConstantInt>(Addr->getOperand(OpNo));
const ArrayType *ATy = cast<ArrayType>(Init->getType());
@@ -2152,15 +2131,14 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
assert(CI->getZExtValue() < ATy->getNumElements());
Elts[CI->getZExtValue()] =
- EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1, Context);
+ EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
return ConstantArray::get(ATy, Elts);
}
}
/// CommitValueTo - We have decided that Addr (which satisfies the predicate
/// isSimpleEnoughPointerToCommit) should get Val as its value. Make it happen.
-static void CommitValueTo(Constant *Val, Constant *Addr,
- LLVMContext &Context) {
+static void CommitValueTo(Constant *Val, Constant *Addr) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Addr)) {
assert(GV->hasInitializer());
GV->setInitializer(Val);
@@ -2171,7 +2149,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr,
GlobalVariable *GV = cast<GlobalVariable>(CE->getOperand(0));
Constant *Init = GV->getInitializer();
- Init = EvaluateStoreInto(Init, Val, CE, 2, Context);
+ Init = EvaluateStoreInto(Init, Val, CE, 2);
GV->setInitializer(Init);
}
@@ -2179,8 +2157,7 @@ static void CommitValueTo(Constant *Val, Constant *Addr,
/// P after the stores reflected by 'memory' have been performed. If we can't
/// decide, return null.
static Constant *ComputeLoadResult(Constant *P,
- const DenseMap<Constant*, Constant*> &Memory,
- LLVMContext &Context) {
+ const DenseMap<Constant*, Constant*> &Memory) {
// If this memory location has been recently stored, use the stored value: it
// is the most up-to-date.
DenseMap<Constant*, Constant*>::const_iterator I = Memory.find(P);
@@ -2218,8 +2195,6 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (std::find(CallStack.begin(), CallStack.end(), F) != CallStack.end())
return false;
- LLVMContext &Context = F->getContext();
-
CallStack.push_back(F);
/// Values - As we compute SSA register values, we store their contents here.
@@ -2246,7 +2221,7 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
if (StoreInst *SI = dyn_cast<StoreInst>(CurInst)) {
if (SI->isVolatile()) return false; // no volatile accesses.
Constant *Ptr = getVal(Values, SI->getOperand(1));
- if (!isSimpleEnoughPointerToCommit(Ptr, Context))
+ if (!isSimpleEnoughPointerToCommit(Ptr))
// If this is too complex for us to commit, reject it.
return false;
Constant *Val = getVal(Values, SI->getOperand(0));
@@ -2280,12 +2255,12 @@ static bool EvaluateFunction(Function *F, Constant *&RetVal,
} else if (LoadInst *LI = dyn_cast<LoadInst>(CurInst)) {
if (LI->isVolatile()) return false; // no volatile accesses.
InstResult = ComputeLoadResult(getVal(Values, LI->getOperand(0)),
- MutatedMemory, Context);
+ MutatedMemory);
if (InstResult == 0) return false; // Could not evaluate load.
} else if (AllocaInst *AI = dyn_cast<AllocaInst>(CurInst)) {
if (AI->isArrayAllocation()) return false; // Cannot handle array allocs.
const Type *Ty = AI->getType()->getElementType();
- AllocaTmps.push_back(new GlobalVariable(Context, Ty, false,
+ AllocaTmps.push_back(new GlobalVariable(Ty, false,
GlobalValue::InternalLinkage,
UndefValue::get(Ty),
AI->getName()));
@@ -2423,7 +2398,7 @@ static bool EvaluateStaticConstructor(Function *F) {
<< " stores.\n");
for (DenseMap<Constant*, Constant*>::iterator I = MutatedMemory.begin(),
E = MutatedMemory.end(); I != E; ++I)
- CommitValueTo(I->second, I->first, F->getContext());
+ CommitValueTo(I->second, I->first);
}
// At this point, we are done interpreting. If we created any 'alloca'
@@ -2480,7 +2455,7 @@ bool GlobalOpt::OptimizeGlobalCtorsList(GlobalVariable *&GCL) {
if (!MadeChange) return false;
- GCL = InstallGlobalCtors(GCL, Ctors, GCL->getContext());
+ GCL = InstallGlobalCtors(GCL, Ctors);
return true;
}
diff --git a/lib/Transforms/IPO/Inliner.cpp b/lib/Transforms/IPO/Inliner.cpp
index ea47366..6918fe8 100644
--- a/lib/Transforms/IPO/Inliner.cpp
+++ b/lib/Transforms/IPO/Inliner.cpp
@@ -19,10 +19,11 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/Analysis/CallGraph.h"
#include "llvm/Analysis/InlineCost.h"
-#include "llvm/Support/CallSite.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/IPO/InlinerPass.h"
#include "llvm/Transforms/Utils/Cloning.h"
+#include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Support/CallSite.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
@@ -32,6 +33,7 @@
using namespace llvm;
STATISTIC(NumInlined, "Number of functions inlined");
+STATISTIC(NumCallsDeleted, "Number of call sites deleted, not inlined");
STATISTIC(NumDeleted, "Number of functions deleted because all callers found");
STATISTIC(NumMergedAllocas, "Number of allocas merged together");
@@ -336,23 +338,38 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
for (unsigned CSi = 0; CSi != CallSites.size(); ++CSi) {
CallSite CS = CallSites[CSi];
+ Function *Caller = CS.getCaller();
Function *Callee = CS.getCalledFunction();
- // We can only inline direct calls to non-declarations.
- if (Callee == 0 || Callee->isDeclaration()) continue;
+
+ // If this call site is dead and it is to a readonly function, we should
+ // just delete the call instead of trying to inline it, regardless of
+ // size. This happens because IPSCCP propagates the result out of the
+ // call and then we're left with the dead call.
+ if (isInstructionTriviallyDead(CS.getInstruction())) {
+ DEBUG(errs() << " -> Deleting dead call: "
+ << *CS.getInstruction() << "\n");
+ // Update the call graph by deleting the edge from Callee to Caller.
+ CG[Caller]->removeCallEdgeFor(CS);
+ CS.getInstruction()->eraseFromParent();
+ ++NumCallsDeleted;
+ } else {
+ // We can only inline direct calls to non-declarations.
+ if (Callee == 0 || Callee->isDeclaration()) continue;
- // If the policy determines that we should inline this function,
- // try to do so.
- if (!shouldInline(CS))
- continue;
+ // If the policy determines that we should inline this function,
+ // try to do so.
+ if (!shouldInline(CS))
+ continue;
- Function *Caller = CS.getCaller();
- // Attempt to inline the function...
- if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
- continue;
+ // Attempt to inline the function...
+ if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))
+ continue;
+ ++NumInlined;
+ }
- // If we inlined the last possible call site to the function, delete the
- // function body now.
- if (Callee->use_empty() && Callee->hasLocalLinkage() &&
+ // If we inlined or deleted the last possible call site to the function,
+ // delete the function body now.
+ if (Callee && Callee->use_empty() && Callee->hasLocalLinkage() &&
// TODO: Can remove if in SCC now.
!SCCFunctions.count(Callee) &&
@@ -391,7 +408,6 @@ bool Inliner::runOnSCC(std::vector<CallGraphNode*> &SCC) {
}
--CSi;
- ++NumInlined;
Changed = true;
LocalChange = true;
}
diff --git a/lib/Transforms/IPO/LoopExtractor.cpp b/lib/Transforms/IPO/LoopExtractor.cpp
index fd69aeb..cb81330 100644
--- a/lib/Transforms/IPO/LoopExtractor.cpp
+++ b/lib/Transforms/IPO/LoopExtractor.cpp
@@ -75,6 +75,10 @@ bool LoopExtractor::runOnLoop(Loop *L, LPPassManager &LPM) {
if (L->getParentLoop())
return false;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
DominatorTree &DT = getAnalysis<DominatorTree>();
bool Changed = false;
diff --git a/lib/Transforms/IPO/StripSymbols.cpp b/lib/Transforms/IPO/StripSymbols.cpp
index 4f6369e..0b5e007 100644
--- a/lib/Transforms/IPO/StripSymbols.cpp
+++ b/lib/Transforms/IPO/StripSymbols.cpp
@@ -202,53 +202,35 @@ static bool StripSymbolNames(Module &M, bool PreserveDbgInfo) {
// llvm.dbg.region.end calls, and any globals they point to if now dead.
static bool StripDebugInfo(Module &M) {
+ bool Changed = false;
+
// Remove all of the calls to the debugger intrinsics, and remove them from
// the module.
- Function *FuncStart = M.getFunction("llvm.dbg.func.start");
- Function *StopPoint = M.getFunction("llvm.dbg.stoppoint");
- Function *RegionStart = M.getFunction("llvm.dbg.region.start");
- Function *RegionEnd = M.getFunction("llvm.dbg.region.end");
- Function *Declare = M.getFunction("llvm.dbg.declare");
-
- if (FuncStart) {
- while (!FuncStart->use_empty()) {
- CallInst *CI = cast<CallInst>(FuncStart->use_back());
- CI->eraseFromParent();
- }
- FuncStart->eraseFromParent();
- }
- if (StopPoint) {
- while (!StopPoint->use_empty()) {
- CallInst *CI = cast<CallInst>(StopPoint->use_back());
- CI->eraseFromParent();
- }
- StopPoint->eraseFromParent();
- }
- if (RegionStart) {
- while (!RegionStart->use_empty()) {
- CallInst *CI = cast<CallInst>(RegionStart->use_back());
- CI->eraseFromParent();
- }
- RegionStart->eraseFromParent();
- }
- if (RegionEnd) {
- while (!RegionEnd->use_empty()) {
- CallInst *CI = cast<CallInst>(RegionEnd->use_back());
- CI->eraseFromParent();
- }
- RegionEnd->eraseFromParent();
- }
- if (Declare) {
+ if (Function *Declare = M.getFunction("llvm.dbg.declare")) {
while (!Declare->use_empty()) {
CallInst *CI = cast<CallInst>(Declare->use_back());
CI->eraseFromParent();
}
Declare->eraseFromParent();
+ Changed = true;
}
NamedMDNode *NMD = M.getNamedMetadata("llvm.dbg.gv");
- if (NMD)
+ if (NMD) {
+ Changed = true;
NMD->eraseFromParent();
+ }
+ MetadataContext &TheMetadata = M.getContext().getMetadata();
+ unsigned MDDbgKind = TheMetadata.getMDKind("dbg");
+ if (!MDDbgKind)
+ return Changed;
+
+ for (Module::iterator MI = M.begin(), ME = M.end(); MI != ME; ++MI)
+ for (Function::iterator FI = MI->begin(), FE = MI->end(); FI != FE;
+ ++FI)
+ for (BasicBlock::iterator BI = FI->begin(), BE = FI->end(); BI != BE;
+ ++BI)
+ TheMetadata.removeMD(MDDbgKind, BI);
return true;
}
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
index c8541d7..e58fa63 100644
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -412,7 +412,9 @@ class ABCD : public FunctionPass {
/// If PN_op1 and PN_o2 are different from NULL, create a constraint
/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
/// with the respective V_op#, if V_op# is a ConstantInt.
- void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2, APInt value);
+ void createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
+ ConstantInt *V_op1, ConstantInt *V_op2,
+ APInt value);
/// Returns the sigma representing the Instruction I in BasicBlock BB.
/// Returns NULL in case there is no sigma for this Instruction in this
@@ -735,25 +737,27 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
APInt Zero = APInt::getNullValue(width);
CmpInst::Predicate Pred = ICI->getPredicate();
+ ConstantInt *CI1 = dyn_cast<ConstantInt>(V_op1);
+ ConstantInt *CI2 = dyn_cast<ConstantInt>(V_op2);
switch (Pred) {
case CmpInst::ICMP_SGT: // signed greater than
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, MinusOne);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, Zero);
+ createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, MinusOne);
+ createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, Zero);
break;
case CmpInst::ICMP_SGE: // signed greater or equal
- createConstraintSigSig(SIG_op2_t, SIG_op1_t, Zero);
- createConstraintSigSig(SIG_op1_f, SIG_op2_f, MinusOne);
+ createConstraintSigSig(SIG_op2_t, SIG_op1_t, CI2, CI1, Zero);
+ createConstraintSigSig(SIG_op1_f, SIG_op2_f, CI1, CI2, MinusOne);
break;
case CmpInst::ICMP_SLT: // signed less than
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, MinusOne);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, Zero);
+ createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, MinusOne);
+ createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, Zero);
break;
case CmpInst::ICMP_SLE: // signed less or equal
- createConstraintSigSig(SIG_op1_t, SIG_op2_t, Zero);
- createConstraintSigSig(SIG_op2_f, SIG_op1_f, MinusOne);
+ createConstraintSigSig(SIG_op1_t, SIG_op2_t, CI1, CI2, Zero);
+ createConstraintSigSig(SIG_op2_f, SIG_op1_f, CI2, CI1, MinusOne);
break;
default:
@@ -772,6 +776,10 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
/// b->a and c->a with weight 0 in the lower bound graph, and the edges
/// a->b and a->c with weight 0 in the upper bound graph.
void ABCD::createConstraintPHINode(PHINode *PN) {
+ // FIXME: We really want to disallow sigma nodes, but I don't know the best
+ // way to detect the other than this.
+ if (PN->getNumOperands() == 2) return;
+
int32_t width = cast<IntegerType>(PN->getType())->getBitWidth();
for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
Value *V = PN->getIncomingValue(i);
@@ -796,13 +804,11 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
int32_t width = cast<IntegerType>((*SIG_op_t)->getType())->getBitWidth();
inequality_graph.addEdge(I_op, *SIG_op_t, APInt(width, 0), true);
inequality_graph.addEdge(*SIG_op_t, I_op, APInt(width, 0), false);
- created.insert(*SIG_op_t);
}
if (*SIG_op_f) {
int32_t width = cast<IntegerType>((*SIG_op_f)->getType())->getBitWidth();
inequality_graph.addEdge(I_op, *SIG_op_f, APInt(width, 0), true);
inequality_graph.addEdge(*SIG_op_f, I_op, APInt(width, 0), false);
- created.insert(*SIG_op_f);
}
}
@@ -810,10 +816,17 @@ void ABCD::createConstraintSigInst(Instruction *I_op, BasicBlock *BB_succ_t,
/// PN_op2 -> PN_op1 with value. In case any of them is NULL, replace
/// with the respective V_op#, if V_op# is a ConstantInt.
void ABCD::createConstraintSigSig(PHINode *SIG_op1, PHINode *SIG_op2,
+ ConstantInt *V_op1, ConstantInt *V_op2,
APInt value) {
if (SIG_op1 && SIG_op2) {
inequality_graph.addEdge(SIG_op2, SIG_op1, value, true);
inequality_graph.addEdge(SIG_op1, SIG_op2, -value, false);
+ } else if (SIG_op1 && V_op2) {
+ inequality_graph.addEdge(V_op2, SIG_op1, value, true);
+ inequality_graph.addEdge(SIG_op1, V_op2, -value, false);
+ } else if (SIG_op2 && V_op1) {
+ inequality_graph.addEdge(SIG_op2, V_op1, value, true);
+ inequality_graph.addEdge(V_op1, SIG_op2, -value, false);
}
}
@@ -1036,7 +1049,7 @@ void ABCD::InequalityGraph::printHeader(raw_ostream &OS, Function &F) const {
/// Prints the body of the dot file
void ABCD::InequalityGraph::printBody(raw_ostream &OS) const {
- DenseMap<Value *, SmallPtrSet<Edge *, 16> >::iterator begin =
+ DenseMap<Value *, SmallPtrSet<Edge *, 16> >::const_iterator begin =
graph.begin(), end = graph.end();
for (; begin != end ; ++begin) {
diff --git a/lib/Transforms/Scalar/CMakeLists.txt b/lib/Transforms/Scalar/CMakeLists.txt
index e048518..5a92399 100644
--- a/lib/Transforms/Scalar/CMakeLists.txt
+++ b/lib/Transforms/Scalar/CMakeLists.txt
@@ -3,7 +3,6 @@ add_llvm_library(LLVMScalarOpts
ADCE.cpp
BasicBlockPlacement.cpp
CodeGenPrepare.cpp
- CondPropagate.cpp
ConstantProp.cpp
DCE.cpp
DeadStoreElimination.cpp
diff --git a/lib/Transforms/Scalar/CondPropagate.cpp b/lib/Transforms/Scalar/CondPropagate.cpp
deleted file mode 100644
index 8a6c556..0000000
--- a/lib/Transforms/Scalar/CondPropagate.cpp
+++ /dev/null
@@ -1,289 +0,0 @@
-//===-- CondPropagate.cpp - Propagate Conditional Expressions -------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass propagates information about conditional expressions through the
-// program, allowing it to eliminate conditional branches in some cases.
-//
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "condprop"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Instructions.h"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Pass.h"
-#include "llvm/Type.h"
-#include "llvm/Transforms/Utils/BasicBlockUtils.h"
-#include "llvm/Transforms/Utils/Local.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SmallVector.h"
-using namespace llvm;
-
-STATISTIC(NumBrThread, "Number of CFG edges threaded through branches");
-STATISTIC(NumSwThread, "Number of CFG edges threaded through switches");
-
-namespace {
- struct CondProp : public FunctionPass {
- static char ID; // Pass identification, replacement for typeid
- CondProp() : FunctionPass(&ID) {}
-
- virtual bool runOnFunction(Function &F);
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(BreakCriticalEdgesID);
- //AU.addRequired<DominanceFrontier>();
- }
-
- private:
- bool MadeChange;
- SmallVector<BasicBlock *, 4> DeadBlocks;
- void SimplifyBlock(BasicBlock *BB);
- void SimplifyPredecessors(BranchInst *BI);
- void SimplifyPredecessors(SwitchInst *SI);
- void RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB);
- bool RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI);
- };
-}
-
-char CondProp::ID = 0;
-static RegisterPass<CondProp> X("condprop", "Conditional Propagation");
-
-FunctionPass *llvm::createCondPropagationPass() {
- return new CondProp();
-}
-
-bool CondProp::runOnFunction(Function &F) {
- bool EverMadeChange = false;
- DeadBlocks.clear();
-
- // While we are simplifying blocks, keep iterating.
- do {
- MadeChange = false;
- for (Function::iterator BB = F.begin(), E = F.end(); BB != E;)
- SimplifyBlock(BB++);
- EverMadeChange = EverMadeChange || MadeChange;
- } while (MadeChange);
-
- if (EverMadeChange) {
- while (!DeadBlocks.empty()) {
- BasicBlock *BB = DeadBlocks.back(); DeadBlocks.pop_back();
- DeleteDeadBlock(BB);
- }
- }
- return EverMadeChange;
-}
-
-void CondProp::SimplifyBlock(BasicBlock *BB) {
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
- // If this is a conditional branch based on a phi node that is defined in
- // this block, see if we can simplify predecessors of this block.
- if (BI->isConditional() && isa<PHINode>(BI->getCondition()) &&
- cast<PHINode>(BI->getCondition())->getParent() == BB)
- SimplifyPredecessors(BI);
-
- } else if (SwitchInst *SI = dyn_cast<SwitchInst>(BB->getTerminator())) {
- if (isa<PHINode>(SI->getCondition()) &&
- cast<PHINode>(SI->getCondition())->getParent() == BB)
- SimplifyPredecessors(SI);
- }
-
- // If possible, simplify the terminator of this block.
- if (ConstantFoldTerminator(BB))
- MadeChange = true;
-
- // If this block ends with an unconditional branch and the only successor has
- // only this block as a predecessor, merge the two blocks together.
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- if (BI->isUnconditional() && BI->getSuccessor(0)->getSinglePredecessor() &&
- BB != BI->getSuccessor(0)) {
- BasicBlock *Succ = BI->getSuccessor(0);
-
- // If Succ has any PHI nodes, they are all single-entry PHI's. Eliminate
- // them.
- FoldSingleEntryPHINodes(Succ);
-
- // Remove BI.
- BI->eraseFromParent();
-
- // Move over all of the instructions.
- BB->getInstList().splice(BB->end(), Succ->getInstList());
-
- // Any phi nodes that had entries for Succ now have entries from BB.
- Succ->replaceAllUsesWith(BB);
-
- // Succ is now dead, but we cannot delete it without potentially
- // invalidating iterators elsewhere. Just insert an unreachable
- // instruction in it and delete this block later on.
- new UnreachableInst(BB->getContext(), Succ);
- DeadBlocks.push_back(Succ);
- MadeChange = true;
- }
-}
-
-// SimplifyPredecessors(branches) - We know that BI is a conditional branch
-// based on a PHI node defined in this block. If the phi node contains constant
-// operands, then the blocks corresponding to those operands can be modified to
-// jump directly to the destination instead of going through this block.
-void CondProp::SimplifyPredecessors(BranchInst *BI) {
- // TODO: We currently only handle the most trival case, where the PHI node has
- // one use (the branch), and is the only instruction besides the branch and dbg
- // intrinsics in the block.
- PHINode *PN = cast<PHINode>(BI->getCondition());
-
- if (PN->getNumIncomingValues() == 1) {
- // Eliminate single-entry PHI nodes.
- FoldSingleEntryPHINodes(PN->getParent());
- return;
- }
-
-
- if (!PN->hasOneUse()) return;
-
- BasicBlock *BB = BI->getParent();
- if (&*BB->begin() != PN)
- return;
- BasicBlock::iterator BBI = BB->begin();
- BasicBlock::iterator BBE = BB->end();
- while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */;
- if (&*BBI != BI)
- return;
-
- // Ok, we have this really simple case, walk the PHI operands, looking for
- // constants. Walk from the end to remove operands from the end when
- // possible, and to avoid invalidating "i".
- for (unsigned i = PN->getNumIncomingValues(); i != 0; --i) {
- Value *InVal = PN->getIncomingValue(i-1);
- if (!RevectorBlockTo(PN->getIncomingBlock(i-1), InVal, BI))
- continue;
-
- ++NumBrThread;
-
- // If there were two predecessors before this simplification, or if the
- // PHI node contained all the same value except for the one we just
- // substituted, the PHI node may be deleted. Don't iterate through it the
- // last time.
- if (BI->getCondition() != PN) return;
- }
-}
-
-// SimplifyPredecessors(switch) - We know that SI is switch based on a PHI node
-// defined in this block. If the phi node contains constant operands, then the
-// blocks corresponding to those operands can be modified to jump directly to
-// the destination instead of going through this block.
-void CondProp::SimplifyPredecessors(SwitchInst *SI) {
- // TODO: We currently only handle the most trival case, where the PHI node has
- // one use (the branch), and is the only instruction besides the branch and
- // dbg intrinsics in the block.
- PHINode *PN = cast<PHINode>(SI->getCondition());
- if (!PN->hasOneUse()) return;
-
- BasicBlock *BB = SI->getParent();
- if (&*BB->begin() != PN)
- return;
- BasicBlock::iterator BBI = BB->begin();
- BasicBlock::iterator BBE = BB->end();
- while (BBI != BBE && isa<DbgInfoIntrinsic>(++BBI)) /* empty */;
- if (&*BBI != SI)
- return;
-
- // Ok, we have this really simple case, walk the PHI operands, looking for
- // constants. Walk from the end to remove operands from the end when
- // possible, and to avoid invalidating "i".
- for (unsigned i = PN->getNumIncomingValues(); i != 0; --i)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(PN->getIncomingValue(i-1))) {
- BasicBlock *PredBB = PN->getIncomingBlock(i-1);
- if (isa<BranchInst>(PredBB->getTerminator())) {
- // If we have a constant, forward the edge from its current to its
- // ultimate destination.
- unsigned DestCase = SI->findCaseValue(CI);
- RevectorBlockTo(PredBB, SI->getSuccessor(DestCase));
- ++NumSwThread;
-
- // If there were two predecessors before this simplification, or if the
- // PHI node contained all the same value except for the one we just
- // substituted, the PHI node may be deleted. Don't iterate through it the
- // last time.
- if (SI->getCondition() != PN) return;
- }
- }
-}
-
-
-// RevectorBlockTo - Revector the unconditional branch at the end of FromBB to
-// the ToBB block, which is one of the successors of its current successor.
-void CondProp::RevectorBlockTo(BasicBlock *FromBB, BasicBlock *ToBB) {
- BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator());
- assert(FromBr->isUnconditional() && "FromBB should end with uncond br!");
-
- // Get the old block we are threading through.
- BasicBlock *OldSucc = FromBr->getSuccessor(0);
-
- // OldSucc had multiple successors. If ToBB has multiple predecessors, then
- // the edge between them would be critical, which we already took care of.
- // If ToBB has single operand PHI node then take care of it here.
- FoldSingleEntryPHINodes(ToBB);
-
- // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
- OldSucc->removePredecessor(FromBB);
-
- // Change FromBr to branch to the new destination.
- FromBr->setSuccessor(0, ToBB);
-
- MadeChange = true;
-}
-
-bool CondProp::RevectorBlockTo(BasicBlock *FromBB, Value *Cond, BranchInst *BI){
- BranchInst *FromBr = cast<BranchInst>(FromBB->getTerminator());
- if (!FromBr->isUnconditional())
- return false;
-
- // Get the old block we are threading through.
- BasicBlock *OldSucc = FromBr->getSuccessor(0);
-
- // If the condition is a constant, simply revector the unconditional branch at
- // the end of FromBB to one of the successors of its current successor.
- if (ConstantInt *CB = dyn_cast<ConstantInt>(Cond)) {
- BasicBlock *ToBB = BI->getSuccessor(CB->isZero());
-
- // OldSucc had multiple successors. If ToBB has multiple predecessors, then
- // the edge between them would be critical, which we already took care of.
- // If ToBB has single operand PHI node then take care of it here.
- FoldSingleEntryPHINodes(ToBB);
-
- // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
- OldSucc->removePredecessor(FromBB);
-
- // Change FromBr to branch to the new destination.
- FromBr->setSuccessor(0, ToBB);
- } else {
- BasicBlock *Succ0 = BI->getSuccessor(0);
- // Do not perform transform if the new destination has PHI nodes. The
- // transform will add new preds to the PHI's.
- if (isa<PHINode>(Succ0->begin()))
- return false;
-
- BasicBlock *Succ1 = BI->getSuccessor(1);
- if (isa<PHINode>(Succ1->begin()))
- return false;
-
- // Insert the new conditional branch.
- BranchInst::Create(Succ0, Succ1, Cond, FromBr);
-
- FoldSingleEntryPHINodes(Succ0);
- FoldSingleEntryPHINodes(Succ1);
-
- // Update PHI nodes in OldSucc to know that FromBB no longer branches to it.
- OldSucc->removePredecessor(FromBB);
-
- // Delete the old branch.
- FromBr->eraseFromParent();
- }
-
- MadeChange = true;
- return true;
-}
diff --git a/lib/Transforms/Scalar/ConstantProp.cpp b/lib/Transforms/Scalar/ConstantProp.cpp
index 4fee327..ea20813 100644
--- a/lib/Transforms/Scalar/ConstantProp.cpp
+++ b/lib/Transforms/Scalar/ConstantProp.cpp
@@ -66,7 +66,7 @@ bool ConstantPropagation::runOnFunction(Function &F) {
WorkList.erase(WorkList.begin()); // Get an element from the worklist...
if (!I->use_empty()) // Don't muck with dead instructions...
- if (Constant *C = ConstantFoldInstruction(I, F.getContext())) {
+ if (Constant *C = ConstantFoldInstruction(I)) {
// Add all of the users of this instruction to the worklist, they might
// be constant propagatable now...
for (Value::use_iterator UI = I->use_begin(), UE = I->use_end();
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index 90436f4..b0988b5 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -78,19 +78,96 @@ static RegisterPass<DSE> X("dse", "Dead Store Elimination");
FunctionPass *llvm::createDeadStoreEliminationPass() { return new DSE(); }
-/// isValueAtLeastAsBigAs - Return true if V1 is greater than or equal to the
-/// stored size of V2. This returns false if we don't know.
+/// doesClobberMemory - Does this instruction clobber (write without reading)
+/// some memory?
+static bool doesClobberMemory(Instruction *I) {
+ if (isa<StoreInst>(I))
+ return true;
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
+ switch (II->getIntrinsicID()) {
+ default: return false;
+ case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy:
+ case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true;
+ }
+ }
+ return false;
+}
+
+/// isElidable - If the value of this instruction and the memory it writes to is
+/// unused, may we delete this instrtction?
+static bool isElidable(Instruction *I) {
+ assert(doesClobberMemory(I));
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I))
+ return II->getIntrinsicID() != Intrinsic::lifetime_end;
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return !SI->isVolatile();
+ return true;
+}
+
+/// getPointerOperand - Return the pointer that is being clobbered.
+static Value *getPointerOperand(Instruction *I) {
+ assert(doesClobberMemory(I));
+ if (StoreInst *SI = dyn_cast<StoreInst>(I))
+ return SI->getPointerOperand();
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
+ return MI->getOperand(1);
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default:
+ assert(false && "Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return II->getOperand(1);
+ case Intrinsic::lifetime_end:
+ return II->getOperand(2);
+ }
+}
+
+/// getStoreSize - Return the length in bytes of the write by the clobbering
+/// instruction. If variable or unknown, returns -1.
+static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
+ assert(doesClobberMemory(I));
+ if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
+ if (!TD) return -1u;
+ return TD->getTypeStoreSize(SI->getOperand(0)->getType());
+ }
+
+ Value *Len;
+ if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
+ Len = MI->getLength();
+ } else {
+ IntrinsicInst *II = cast<IntrinsicInst>(I);
+ switch (II->getIntrinsicID()) {
+ default:
+ assert(false && "Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return -1u;
+ case Intrinsic::lifetime_end:
+ Len = II->getOperand(1);
+ break;
+ }
+ }
+ if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
+ if (!LenCI->isAllOnesValue())
+ return LenCI->getZExtValue();
+ return -1u;
+}
+
+/// isStoreAtLeastAsWideAs - Return true if the size of the store in I1 is
+/// greater than or equal to the store in I2. This returns false if we don't
+/// know.
///
-static bool isValueAtLeastAsBigAs(Value *V1, Value *V2, const TargetData *TD) {
- const Type *V1Ty = V1->getType(), *V2Ty = V2->getType();
+static bool isStoreAtLeastAsWideAs(Instruction *I1, Instruction *I2,
+ const TargetData *TD) {
+ const Type *I1Ty = getPointerOperand(I1)->getType();
+ const Type *I2Ty = getPointerOperand(I2)->getType();
// Exactly the same type, must have exactly the same size.
- if (V1Ty == V2Ty) return true;
+ if (I1Ty == I2Ty) return true;
- // If we don't have target data, we don't know.
- if (TD == 0) return false;
+ int I1Size = getStoreSize(I1, TD);
+ int I2Size = getStoreSize(I2, TD);
- return TD->getTypeStoreSize(V1Ty) >= TD->getTypeStoreSize(V2Ty);
+ return I1Size != -1 && I2Size != -1 && I1Size >= I2Size;
}
bool DSE::runOnBasicBlock(BasicBlock &BB) {
@@ -104,14 +181,9 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
Instruction *Inst = BBI++;
// If we find a store or a free, get its memory dependence.
- if (!isa<StoreInst>(Inst) && !isFreeCall(Inst))
+ if (!doesClobberMemory(Inst) && !isFreeCall(Inst))
continue;
- // Don't molest volatile stores or do queries that will return "clobber".
- if (StoreInst *SI = dyn_cast<StoreInst>(Inst))
- if (SI->isVolatile())
- continue;
-
MemDepResult InstDep = MD.getDependency(Inst);
// Ignore non-local stores.
@@ -124,16 +196,16 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
continue;
}
- StoreInst *SI = cast<StoreInst>(Inst);
-
// If not a definite must-alias dependency, ignore it.
if (!InstDep.isDef())
continue;
// If this is a store-store dependence, then the previous store is dead so
// long as this store is at least as big as it.
- if (StoreInst *DepStore = dyn_cast<StoreInst>(InstDep.getInst()))
- if (isValueAtLeastAsBigAs(SI->getOperand(0), DepStore->getOperand(0),TD)){
+ if (doesClobberMemory(InstDep.getInst())) {
+ Instruction *DepStore = InstDep.getInst();
+ if (isStoreAtLeastAsWideAs(Inst, DepStore, TD) &&
+ isElidable(DepStore)) {
// Delete the store and now-dead instructions that feed it.
DeleteDeadInstruction(DepStore);
NumFastStores++;
@@ -146,37 +218,43 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
--BBI;
continue;
}
+ }
+
+ if (!isElidable(Inst))
+ continue;
// If we're storing the same value back to a pointer that we just
// loaded from, then the store can be removed.
- if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
- if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
- SI->getOperand(0) == DepLoad) {
- // DeleteDeadInstruction can delete the current instruction. Save BBI
- // in case we need it.
- WeakVH NextInst(BBI);
-
- DeleteDeadInstruction(SI);
-
- if (NextInst == 0) // Next instruction deleted.
- BBI = BB.begin();
- else if (BBI != BB.begin()) // Revisit this instruction if possible.
- --BBI;
- NumFastStores++;
- MadeChange = true;
- continue;
+ if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
+ if (LoadInst *DepLoad = dyn_cast<LoadInst>(InstDep.getInst())) {
+ if (SI->getPointerOperand() == DepLoad->getPointerOperand() &&
+ SI->getOperand(0) == DepLoad) {
+ // DeleteDeadInstruction can delete the current instruction. Save BBI
+ // in case we need it.
+ WeakVH NextInst(BBI);
+
+ DeleteDeadInstruction(SI);
+
+ if (NextInst == 0) // Next instruction deleted.
+ BBI = BB.begin();
+ else if (BBI != BB.begin()) // Revisit this instruction if possible.
+ --BBI;
+ NumFastStores++;
+ MadeChange = true;
+ continue;
+ }
}
}
// If this is a lifetime end marker, we can throw away the store.
- if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(InstDep.getInst())) {
if (II->getIntrinsicID() == Intrinsic::lifetime_end) {
// Delete the store and now-dead instructions that feed it.
// DeleteDeadInstruction can delete the current instruction. Save BBI
// in case we need it.
WeakVH NextInst(BBI);
- DeleteDeadInstruction(SI);
+ DeleteDeadInstruction(Inst);
if (NextInst == 0) // Next instruction deleted.
BBI = BB.begin();
@@ -202,11 +280,11 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) {
bool DSE::handleFreeWithNonTrivialDependency(Instruction *F, MemDepResult Dep) {
AliasAnalysis &AA = getAnalysis<AliasAnalysis>();
- StoreInst *Dependency = dyn_cast_or_null<StoreInst>(Dep.getInst());
- if (!Dependency || Dependency->isVolatile())
+ Instruction *Dependency = Dep.getInst();
+ if (!Dependency || !doesClobberMemory(Dependency) || !isElidable(Dependency))
return false;
- Value *DepPointer = Dependency->getPointerOperand()->getUnderlyingObject();
+ Value *DepPointer = getPointerOperand(Dependency)->getUnderlyingObject();
// Check for aliasing.
if (AA.alias(F->getOperand(1), 1, DepPointer, 1) !=
@@ -251,39 +329,28 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
--BBI;
// If we find a store whose pointer is dead.
- if (StoreInst* S = dyn_cast<StoreInst>(BBI)) {
- if (!S->isVolatile()) {
+ if (doesClobberMemory(BBI)) {
+ if (isElidable(BBI)) {
// See through pointer-to-pointer bitcasts
- Value* pointerOperand = S->getPointerOperand()->getUnderlyingObject();
+ Value *pointerOperand = getPointerOperand(BBI)->getUnderlyingObject();
// Alloca'd pointers or byval arguments (which are functionally like
// alloca's) are valid candidates for removal.
if (deadPointers.count(pointerOperand)) {
// DCE instructions only used to calculate that store.
+ Instruction *Dead = BBI;
BBI++;
- DeleteDeadInstruction(S, &deadPointers);
+ DeleteDeadInstruction(Dead, &deadPointers);
NumFastStores++;
MadeChange = true;
+ continue;
}
}
- continue;
- }
-
- // We can also remove memcpy's to local variables at the end of a function.
- if (MemCpyInst *M = dyn_cast<MemCpyInst>(BBI)) {
- Value *dest = M->getDest()->getUnderlyingObject();
-
- if (deadPointers.count(dest)) {
- BBI++;
- DeleteDeadInstruction(M, &deadPointers);
- NumFastOther++;
- MadeChange = true;
+ // Because a memcpy or memmove is also a load, we can't skip it if we
+ // didn't remove it.
+ if (!isa<MemTransferInst>(BBI))
continue;
- }
-
- // Because a memcpy is also a load, we can't skip it if we didn't remove
- // it.
}
Value* killPointer = 0;
@@ -304,11 +371,11 @@ bool DSE::handleEndBlock(BasicBlock &BB) {
killPointer = L->getPointerOperand();
} else if (VAArgInst* V = dyn_cast<VAArgInst>(BBI)) {
killPointer = V->getOperand(0);
- } else if (isa<MemCpyInst>(BBI) &&
- isa<ConstantInt>(cast<MemCpyInst>(BBI)->getLength())) {
- killPointer = cast<MemCpyInst>(BBI)->getSource();
+ } else if (isa<MemTransferInst>(BBI) &&
+ isa<ConstantInt>(cast<MemTransferInst>(BBI)->getLength())) {
+ killPointer = cast<MemTransferInst>(BBI)->getSource();
killPointerSize = cast<ConstantInt>(
- cast<MemCpyInst>(BBI)->getLength())->getZExtValue();
+ cast<MemTransferInst>(BBI)->getLength())->getZExtValue();
} else if (AllocaInst* A = dyn_cast<AllocaInst>(BBI)) {
deadPointers.erase(A);
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 0e3f750..a8f39c1 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -443,6 +443,11 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
valueNumbering[C] = e;
return e;
}
+ if (!MD) {
+ e = nextValueNumber++;
+ valueNumbering[C] = e;
+ return e;
+ }
MemDepResult local_dep = MD->getDependency(C);
@@ -624,7 +629,7 @@ uint32_t ValueTable::lookup_or_add(Value *V) {
/// lookup - Returns the value number of the specified value. Fails if
/// the value has not yet been numbered.
uint32_t ValueTable::lookup(Value *V) const {
- DenseMap<Value*, uint32_t>::iterator VI = valueNumbering.find(V);
+ DenseMap<Value*, uint32_t>::const_iterator VI = valueNumbering.find(V);
assert(VI != valueNumbering.end() && "Value not numbered?");
return VI->second;
}
@@ -644,7 +649,7 @@ void ValueTable::erase(Value *V) {
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
void ValueTable::verifyRemoved(const Value *V) const {
- for (DenseMap<Value*, uint32_t>::iterator
+ for (DenseMap<Value*, uint32_t>::const_iterator
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
assert(I->first != V && "Inst still occurs in value numbering map!");
}
@@ -669,10 +674,12 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- GVN(bool nopre = false) : FunctionPass(&ID), NoPRE(nopre) { }
+ explicit GVN(bool nopre = false, bool noloads = false)
+ : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { }
private:
bool NoPRE;
+ bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
@@ -682,7 +689,8 @@ namespace {
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
- AU.addRequired<MemoryDependenceAnalysis>();
+ if (!NoLoads)
+ AU.addRequired<MemoryDependenceAnalysis>();
AU.addRequired<AliasAnalysis>();
AU.addPreserved<DominatorTree>();
@@ -711,7 +719,9 @@ namespace {
}
// createGVNPass - The public interface to this file...
-FunctionPass *llvm::createGVNPass(bool NoPRE) { return new GVN(NoPRE); }
+FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) {
+ return new GVN(NoPRE, NoLoads);
+}
static RegisterPass<GVN> X("gvn",
"Global Value Numbering");
@@ -1476,6 +1486,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
/// processLoad - Attempt to eliminate a load, first by eliminating it
/// locally, and then attempting non-local elimination if that fails.
bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
+ if (!MD)
+ return false;
+
if (L->isVolatile())
return false;
@@ -1686,7 +1699,7 @@ bool GVN::processInstruction(Instruction *I,
if (constVal) {
p->replaceAllUsesWith(constVal);
- if (isa<PointerType>(constVal->getType()))
+ if (MD && isa<PointerType>(constVal->getType()))
MD->invalidateCachedPointerInfo(constVal);
VN.erase(p);
@@ -1707,7 +1720,7 @@ bool GVN::processInstruction(Instruction *I,
// Remove it!
VN.erase(I);
I->replaceAllUsesWith(repl);
- if (isa<PointerType>(repl->getType()))
+ if (MD && isa<PointerType>(repl->getType()))
MD->invalidateCachedPointerInfo(repl);
toErase.push_back(I);
return true;
@@ -1721,7 +1734,8 @@ bool GVN::processInstruction(Instruction *I,
/// runOnFunction - This is the main transformation entry point for a function.
bool GVN::runOnFunction(Function& F) {
- MD = &getAnalysis<MemoryDependenceAnalysis>();
+ if (!NoLoads)
+ MD = &getAnalysis<MemoryDependenceAnalysis>();
DT = &getAnalysis<DominatorTree>();
VN.setAliasAnalysis(&getAnalysis<AliasAnalysis>());
VN.setMemDep(MD);
@@ -1793,7 +1807,7 @@ bool GVN::processBlock(BasicBlock *BB) {
for (SmallVector<Instruction*, 4>::iterator I = toErase.begin(),
E = toErase.end(); I != E; ++I) {
DEBUG(errs() << "GVN removed: " << **I << '\n');
- MD->removeInstruction(*I);
+ if (MD) MD->removeInstruction(*I);
(*I)->eraseFromParent();
DEBUG(verifyRemoved(*I));
}
@@ -1946,12 +1960,12 @@ bool GVN::performPRE(Function &F) {
localAvail[CurrentBlock]->table[ValNo] = Phi;
CurInst->replaceAllUsesWith(Phi);
- if (isa<PointerType>(Phi->getType()))
+ if (MD && isa<PointerType>(Phi->getType()))
MD->invalidateCachedPointerInfo(Phi);
VN.erase(CurInst);
DEBUG(errs() << "GVN PRE removed: " << *CurInst << '\n');
- MD->removeInstruction(CurInst);
+ if (MD) MD->removeInstruction(CurInst);
CurInst->eraseFromParent();
DEBUG(verifyRemoved(CurInst));
Changed = true;
@@ -2011,12 +2025,12 @@ void GVN::verifyRemoved(const Instruction *Inst) const {
// Walk through the value number scope to make sure the instruction isn't
// ferreted away in it.
- for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
+ for (DenseMap<BasicBlock*, ValueNumberScope*>::const_iterator
I = localAvail.begin(), E = localAvail.end(); I != E; ++I) {
const ValueNumberScope *VNS = I->second;
while (VNS) {
- for (DenseMap<uint32_t, Value*>::iterator
+ for (DenseMap<uint32_t, Value*>::const_iterator
II = VNS->table.begin(), IE = VNS->table.end(); II != IE; ++II) {
assert(II->second != Inst && "Inst still in value numbering scope!");
}
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index b0bc70c..2912421 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -536,8 +536,10 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
BasicBlock *ExitBlock = L->getExitBlock();
if (!ExitBlock) return;
- Instruction *InsertPt = ExitBlock->getFirstNonPHI();
BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) return;
+
+ Instruction *InsertPt = ExitBlock->getFirstNonPHI();
BasicBlock::iterator I = Preheader->getTerminator();
while (I != Preheader->begin()) {
--I;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index 7e75cfb..1c48366 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -42,6 +42,7 @@
#include "llvm/GlobalVariable.h"
#include "llvm/Operator.h"
#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/ValueTracking.h"
#include "llvm/Target/TargetData.h"
@@ -283,6 +284,8 @@ namespace {
Instruction *visitSelectInstWithICmp(SelectInst &SI, ICmpInst *ICI);
Instruction *visitCallInst(CallInst &CI);
Instruction *visitInvokeInst(InvokeInst &II);
+
+ Instruction *SliceUpIllegalIntegerPHI(PHINode &PN);
Instruction *visitPHINode(PHINode &PN);
Instruction *visitGetElementPtrInst(GetElementPtrInst &GEP);
Instruction *visitAllocaInst(AllocaInst &AI);
@@ -380,10 +383,6 @@ namespace {
/// commutative operators.
bool SimplifyCommutative(BinaryOperator &I);
- /// SimplifyCompare - This reorders the operands of a CmpInst to get them in
- /// most-complex to least-complex order.
- bool SimplifyCompare(CmpInst &I);
-
/// SimplifyDemandedUseBits - Attempts to replace V with a simpler value
/// based on the demanded bits.
Value *SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
@@ -478,6 +477,34 @@ static const Type *getPromotedType(const Type *Ty) {
return Ty;
}
+/// ShouldChangeType - Return true if it is desirable to convert a computation
+/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
+/// type for example, or from a smaller to a larger illegal type.
+static bool ShouldChangeType(const Type *From, const Type *To,
+ const TargetData *TD) {
+ assert(isa<IntegerType>(From) && isa<IntegerType>(To));
+
+ // If we don't have TD, we don't know if the source/dest are legal.
+ if (!TD) return false;
+
+ unsigned FromWidth = From->getPrimitiveSizeInBits();
+ unsigned ToWidth = To->getPrimitiveSizeInBits();
+ bool FromLegal = TD->isLegalInteger(FromWidth);
+ bool ToLegal = TD->isLegalInteger(ToWidth);
+
+ // If this is a legal integer from type, and the result would be an illegal
+ // type, don't do the transformation.
+ if (FromLegal && !ToLegal)
+ return false;
+
+ // Otherwise, if both are illegal, do not increase the size of the result. We
+ // do allow things like i160 -> i64, but not i64 -> i160.
+ if (!FromLegal && !ToLegal && ToWidth > FromWidth)
+ return false;
+
+ return true;
+}
+
/// getBitCastOperand - If the specified operand is a CastInst, a constant
/// expression bitcast, or a GetElementPtrInst with all zero indices, return the
/// operand value, otherwise return null.
@@ -584,17 +611,6 @@ bool InstCombiner::SimplifyCommutative(BinaryOperator &I) {
return Changed;
}
-/// SimplifyCompare - For a CmpInst this function just orders the operands
-/// so that theyare listed from right (least complex) to left (most complex).
-/// This puts constants before unary operators before binary operators.
-bool InstCombiner::SimplifyCompare(CmpInst &I) {
- if (getComplexity(I.getOperand(0)) >= getComplexity(I.getOperand(1)))
- return false;
- I.swapOperands();
- // Compare instructions are not associative so there's nothing else we can do.
- return true;
-}
-
// dyn_castNegVal - Given a 'sub' instruction, return the RHS of the instruction
// if the LHS is a constant zero (which is the 'negate' form).
//
@@ -4304,25 +4320,15 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (isa<UndefValue>(Op1)) // X & undef -> 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
-
- // and X, X = X
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, Op1);
+ if (Value *V = SimplifyAndInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (isa<VectorType>(I.getType())) {
- if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
- if (CP->isAllOnesValue()) // X & <-1,-1> -> X
- return ReplaceInstUsesWith(I, I.getOperand(0));
- } else if (isa<ConstantAggregateZero>(Op1)) {
- return ReplaceInstUsesWith(I, Op1); // X & <0,0> -> <0,0>
- }
- }
+
if (ConstantInt *AndRHS = dyn_cast<ConstantInt>(Op1)) {
const APInt &AndRHSMask = AndRHS->getValue();
@@ -4443,42 +4449,29 @@ Instruction *InstCombiner::visitAnd(BinaryOperator &I) {
return NV;
}
- Value *Op0NotVal = dyn_castNotVal(Op0);
- Value *Op1NotVal = dyn_castNotVal(Op1);
-
- if (Op0NotVal == Op1 || Op1NotVal == Op0) // A & ~A == ~A & A == 0
- return ReplaceInstUsesWith(I, Constant::getNullValue(I.getType()));
// (~A & ~B) == (~(A | B)) - De Morgan's Law
- if (Op0NotVal && Op1NotVal && isOnlyUse(Op0) && isOnlyUse(Op1)) {
- Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
- I.getName()+".demorgan");
- return BinaryOperator::CreateNot(Or);
- }
-
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *Or = Builder->CreateOr(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(Or);
+ }
+
{
Value *A = 0, *B = 0, *C = 0, *D = 0;
- if (match(Op0, m_Or(m_Value(A), m_Value(B)))) {
- if (A == Op1 || B == Op1) // (A | ?) & A --> A
- return ReplaceInstUsesWith(I, Op1);
-
- // (A|B) & ~(A&B) -> A^B
- if (match(Op1, m_Not(m_And(m_Value(C), m_Value(D))))) {
- if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::CreateXor(A, B);
- }
- }
+ // (A|B) & ~(A&B) -> A^B
+ if (match(Op0, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op1, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
- if (match(Op1, m_Or(m_Value(A), m_Value(B)))) {
- if (A == Op0 || B == Op0) // A & (A | ?) --> A
- return ReplaceInstUsesWith(I, Op0);
-
- // ~(A&B) & (A|B) -> A^B
- if (match(Op0, m_Not(m_And(m_Value(C), m_Value(D))))) {
- if ((A == C && B == D) || (A == D && B == C))
- return BinaryOperator::CreateXor(A, B);
- }
- }
+ // ~(A&B) & (A|B) -> A^B
+ if (match(Op1, m_Or(m_Value(A), m_Value(B))) &&
+ match(Op0, m_Not(m_And(m_Value(C), m_Value(D)))) &&
+ ((A == C && B == D) || (A == D && B == C)))
+ return BinaryOperator::CreateXor(A, B);
if (Op0->hasOneUse() &&
match(Op0, m_Xor(m_Value(A), m_Value(B)))) {
@@ -5010,27 +5003,15 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
bool Changed = SimplifyCommutative(I);
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- if (isa<UndefValue>(Op1)) // X | undef -> -1
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
- // or X, X = X
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, Op0);
-
+ if (Value *V = SimplifyOrInst(Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (isa<VectorType>(I.getType())) {
- if (isa<ConstantAggregateZero>(Op1)) {
- return ReplaceInstUsesWith(I, Op0); // X | <0,0> -> X
- } else if (ConstantVector *CP = dyn_cast<ConstantVector>(Op1)) {
- if (CP->isAllOnesValue()) // X | <-1,-1> -> <-1,-1>
- return ReplaceInstUsesWith(I, I.getOperand(1));
- }
- }
- // or X, -1 == -1
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
ConstantInt *C1 = 0; Value *X = 0;
// (X & C1) | C2 --> (X | C2) & (C1|C2)
@@ -5063,13 +5044,6 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
Value *A = 0, *B = 0;
ConstantInt *C1 = 0, *C2 = 0;
- if (match(Op0, m_And(m_Value(A), m_Value(B))))
- if (A == Op1 || B == Op1) // (A & ?) | A --> A
- return ReplaceInstUsesWith(I, Op1);
- if (match(Op1, m_And(m_Value(A), m_Value(B))))
- if (A == Op0 || B == Op0) // A | (A & ?) --> A
- return ReplaceInstUsesWith(I, Op0);
-
// (A | B) | C and A | (B | C) -> bswap if possible.
// (A >> B) | (C << D) and (A << B) | (B >> C) -> bswap if possible.
if (match(Op0, m_Or(m_Value(), m_Value())) ||
@@ -5203,23 +5177,14 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Ret) return Ret;
}
- if ((A = dyn_castNotVal(Op0))) { // ~A | Op1
- if (A == Op1) // ~A | A == -1
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
- } else {
- A = 0;
- }
- // Note, A is still live here!
- if ((B = dyn_castNotVal(Op1))) { // Op0 | ~B
- if (Op0 == B)
- return ReplaceInstUsesWith(I, Constant::getAllOnesValue(I.getType()));
-
- // (~A | ~B) == (~(A & B)) - De Morgan's Law
- if (A && isOnlyUse(Op0) && isOnlyUse(Op1)) {
- Value *And = Builder->CreateAnd(A, B, I.getName()+".demorgan");
- return BinaryOperator::CreateNot(And);
- }
- }
+ // (~A | ~B) == (~(A & B)) - De Morgan's Law
+ if (Value *Op0NotVal = dyn_castNotVal(Op0))
+ if (Value *Op1NotVal = dyn_castNotVal(Op1))
+ if (Op0->hasOneUse() && Op1->hasOneUse()) {
+ Value *And = Builder->CreateAnd(Op0NotVal, Op1NotVal,
+ I.getName()+".demorgan");
+ return BinaryOperator::CreateNot(And);
+ }
// (icmp1 A, B) | (icmp2 A, B) --> (icmp3 A, B)
if (ICmpInst *RHS = dyn_cast<ICmpInst>(I.getOperand(1))) {
@@ -5942,28 +5907,25 @@ Instruction *InstCombiner::FoldFCmp_IntToFP_Cst(FCmpInst &I,
}
Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
- bool Changed = SimplifyCompare(I);
- Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ bool Changed = false;
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+ I.swapOperands();
+ Changed = true;
+ }
- // Fold trivial predicates.
- if (I.getPredicate() == FCmpInst::FCMP_FALSE)
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
- if (I.getPredicate() == FCmpInst::FCMP_TRUE)
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
+ Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
+ if (Value *V = SimplifyFCmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
// Simplify 'fcmp pred X, X'
if (Op0 == Op1) {
switch (I.getPredicate()) {
default: llvm_unreachable("Unknown predicate!");
- case FCmpInst::FCMP_UEQ: // True if unordered or equal
- case FCmpInst::FCMP_UGE: // True if unordered, greater than, or equal
- case FCmpInst::FCMP_ULE: // True if unordered, less than, or equal
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 1));
- case FCmpInst::FCMP_OGT: // True if ordered and greater than
- case FCmpInst::FCMP_OLT: // True if ordered and less than
- case FCmpInst::FCMP_ONE: // True if ordered and operands are unequal
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(), 0));
-
case FCmpInst::FCMP_UNO: // True if unordered: isnan(X) | isnan(Y)
case FCmpInst::FCMP_ULT: // True if unordered or less than
case FCmpInst::FCMP_UGT: // True if unordered or greater than
@@ -5984,23 +5946,8 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
}
}
- if (isa<UndefValue>(Op1)) // fcmp pred X, undef -> undef
- return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
-
// Handle fcmp with constant RHS
if (Constant *RHSC = dyn_cast<Constant>(Op1)) {
- // If the constant is a nan, see if we can fold the comparison based on it.
- if (ConstantFP *CFP = dyn_cast<ConstantFP>(RHSC)) {
- if (CFP->getValueAPF().isNaN()) {
- if (FCmpInst::isOrdered(I.getPredicate())) // True if ordered and...
- return ReplaceInstUsesWith(I, ConstantInt::getFalse(*Context));
- assert(FCmpInst::isUnordered(I.getPredicate()) &&
- "Comparison must be either ordered or unordered!");
- // True if unordered.
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
- }
- }
-
if (Instruction *LHSI = dyn_cast<Instruction>(Op0))
switch (LHSI->getOpcode()) {
case Instruction::PHI:
@@ -6047,26 +5994,22 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
}
Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
- bool Changed = SimplifyCompare(I);
+ bool Changed = false;
+
+ /// Orders the operands of the compare so that they are listed from most
+ /// complex to least complex. This puts constants before unary operators,
+ /// before binary operators.
+ if (getComplexity(I.getOperand(0)) < getComplexity(I.getOperand(1))) {
+ I.swapOperands();
+ Changed = true;
+ }
+
Value *Op0 = I.getOperand(0), *Op1 = I.getOperand(1);
- const Type *Ty = Op0->getType();
-
- // icmp X, X
- if (Op0 == Op1)
- return ReplaceInstUsesWith(I, ConstantInt::get(I.getType(),
- I.isTrueWhenEqual()));
-
- if (isa<UndefValue>(Op1)) // X icmp undef -> undef
- return ReplaceInstUsesWith(I, UndefValue::get(I.getType()));
- // icmp <global/alloca*/null>, <global/alloca*/null> - Global/Stack value
- // addresses never equal each other! We already know that Op0 != Op1.
- if ((isa<GlobalValue>(Op0) || isa<AllocaInst>(Op0) ||
- isa<ConstantPointerNull>(Op0)) &&
- (isa<GlobalValue>(Op1) || isa<AllocaInst>(Op1) ||
- isa<ConstantPointerNull>(Op1)))
- return ReplaceInstUsesWith(I, ConstantInt::get(Type::getInt1Ty(*Context),
- !I.isTrueWhenEqual()));
+ if (Value *V = SimplifyICmpInst(I.getPredicate(), Op0, Op1, TD))
+ return ReplaceInstUsesWith(I, V);
+
+ const Type *Ty = Op0->getType();
// icmp's with boolean values can always be turned into bitwise operations
if (Ty == Type::getInt1Ty(*Context)) {
@@ -6131,27 +6074,24 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// If we have an icmp le or icmp ge instruction, turn it into the
// appropriate icmp lt or icmp gt instruction. This allows us to rely on
- // them being folded in the code below.
+ // them being folded in the code below. The SimplifyICmpInst code has
+ // already handled the edge cases for us, so we just assert on them.
switch (I.getPredicate()) {
default: break;
case ICmpInst::ICMP_ULE:
- if (CI->isMaxValue(false)) // A <=u MAX -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMaxValue(false)); // A <=u MAX -> TRUE
return new ICmpInst(ICmpInst::ICMP_ULT, Op0,
AddOne(CI));
case ICmpInst::ICMP_SLE:
- if (CI->isMaxValue(true)) // A <=s MAX -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMaxValue(true)); // A <=s MAX -> TRUE
return new ICmpInst(ICmpInst::ICMP_SLT, Op0,
AddOne(CI));
case ICmpInst::ICMP_UGE:
- if (CI->isMinValue(false)) // A >=u MIN -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMinValue(false)); // A >=u MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_UGT, Op0,
SubOne(CI));
case ICmpInst::ICMP_SGE:
- if (CI->isMinValue(true)) // A >=s MIN -> TRUE
- return ReplaceInstUsesWith(I, ConstantInt::getTrue(*Context));
+ assert(!CI->isMinValue(true)); // A >=s MIN -> TRUE
return new ICmpInst(ICmpInst::ICMP_SGT, Op0,
SubOne(CI));
}
@@ -8083,8 +8023,7 @@ bool InstCombiner::CanEvaluateInDifferentType(Value *V, const Type *Ty,
Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
bool isSigned) {
if (Constant *C = dyn_cast<Constant>(V))
- return ConstantExpr::getIntegerCast(C, Ty,
- isSigned /*Sext or ZExt*/);
+ return ConstantExpr::getIntegerCast(C, Ty, isSigned /*Sext or ZExt*/);
// Otherwise, it must be an instruction.
Instruction *I = cast<Instruction>(V);
@@ -8117,8 +8056,7 @@ Value *InstCombiner::EvaluateInDifferentType(Value *V, const Type *Ty,
return I->getOperand(0);
// Otherwise, must be the same type of cast, so just reinsert a new one.
- Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),
- Ty);
+ Res = CastInst::Create(cast<CastInst>(I)->getOpcode(), I->getOperand(0),Ty);
break;
case Instruction::Select: {
Value *True = EvaluateInDifferentType(I->getOperand(1), Ty, isSigned);
@@ -8167,9 +8105,15 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
return NV;
// If we are casting a PHI then fold the cast into the PHI
- if (isa<PHINode>(Src))
- if (Instruction *NV = FoldOpIntoPhi(CI))
- return NV;
+ if (isa<PHINode>(Src)) {
+ // We don't do this if this would create a PHI node with an illegal type if
+ // it is currently legal.
+ if (!isa<IntegerType>(Src->getType()) ||
+ !isa<IntegerType>(CI.getType()) ||
+ ShouldChangeType(CI.getType(), Src->getType(), TD))
+ if (Instruction *NV = FoldOpIntoPhi(CI))
+ return NV;
+ }
return 0;
}
@@ -8289,23 +8233,6 @@ Instruction *InstCombiner::commonPointerCastTransforms(CastInst &CI) {
return commonCastTransforms(CI);
}
-/// isSafeIntegerType - Return true if this is a basic integer type, not a crazy
-/// type like i42. We don't want to introduce operations on random non-legal
-/// integer types where they don't already exist in the code. In the future,
-/// we should consider making this based off target-data, so that 32-bit targets
-/// won't get i64 operations etc.
-static bool isSafeIntegerType(const Type *Ty) {
- switch (Ty->getPrimitiveSizeInBits()) {
- case 8:
- case 16:
- case 32:
- case 64:
- return true;
- default:
- return false;
- }
-}
-
/// commonIntCastTransforms - This function implements the common transforms
/// for trunc, zext, and sext.
Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
@@ -8334,8 +8261,8 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
// Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((isSafeIntegerType(DestTy->getScalarType()) ||
- !isSafeIntegerType(SrcI->getType()->getScalarType())) &&
+ if ((isa<VectorType>(DestTy) ||
+ ShouldChangeType(SrcI->getType(), DestTy, TD)) &&
CanEvaluateInDifferentType(SrcI, DestTy,
CI.getOpcode(), NumCastsRemoved)) {
// If this cast is a truncate, evaluting in a different type always
@@ -8356,6 +8283,7 @@ Instruction *InstCombiner::commonIntCastTransforms(CastInst &CI) {
break;
case Instruction::ZExt: {
DoXForm = NumCastsRemoved >= 1;
+
if (!DoXForm && 0) {
// If it's unnecessary to issue an AND to clear the high bits, it's
// always profitable to do this xform.
@@ -8522,7 +8450,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
return BinaryOperator::CreateLShr(V1, V2);
}
}
-
+
return 0;
}
@@ -10880,9 +10808,10 @@ Instruction *InstCombiner::FoldPHIArgLoadIntoPHI(PHINode &PN) {
}
-// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
-// operator and they all are only used by the PHI, PHI together their
-// inputs, and do the operation once, to the result of the PHI.
+
+/// FoldPHIArgOpIntoPHI - If all operands to a PHI node are the same "unary"
+/// operator and they all are only used by the PHI, PHI together their
+/// inputs, and do the operation once, to the result of the PHI.
Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
Instruction *FirstInst = cast<Instruction>(PN.getIncomingValue(0));
@@ -10900,6 +10829,13 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
if (isa<CastInst>(FirstInst)) {
CastSrcTy = FirstInst->getOperand(0)->getType();
+
+ // Be careful about transforming integer PHIs. We don't want to pessimize
+ // the code by turning an i32 into an i1293.
+ if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) {
+ if (!ShouldChangeType(PN.getType(), CastSrcTy, TD))
+ return 0;
+ }
} else if (isa<BinaryOperator>(FirstInst) || isa<CmpInst>(FirstInst)) {
// Can fold binop, compare or shift here if the RHS is a constant,
// otherwise call FoldPHIArgBinOpIntoPHI.
@@ -11012,6 +10948,222 @@ static bool PHIsEqualValue(PHINode *PN, Value *NonPhiInVal,
}
+namespace {
+struct PHIUsageRecord {
+ unsigned PHIId; // The ID # of the PHI (something determinstic to sort on)
+ unsigned Shift; // The amount shifted.
+ Instruction *Inst; // The trunc instruction.
+
+ PHIUsageRecord(unsigned pn, unsigned Sh, Instruction *User)
+ : PHIId(pn), Shift(Sh), Inst(User) {}
+
+ bool operator<(const PHIUsageRecord &RHS) const {
+ if (PHIId < RHS.PHIId) return true;
+ if (PHIId > RHS.PHIId) return false;
+ if (Shift < RHS.Shift) return true;
+ if (Shift > RHS.Shift) return false;
+ return Inst->getType()->getPrimitiveSizeInBits() <
+ RHS.Inst->getType()->getPrimitiveSizeInBits();
+ }
+};
+
+struct LoweredPHIRecord {
+ PHINode *PN; // The PHI that was lowered.
+ unsigned Shift; // The amount shifted.
+ unsigned Width; // The width extracted.
+
+ LoweredPHIRecord(PHINode *pn, unsigned Sh, const Type *Ty)
+ : PN(pn), Shift(Sh), Width(Ty->getPrimitiveSizeInBits()) {}
+
+ // Ctor form used by DenseMap.
+ LoweredPHIRecord(PHINode *pn, unsigned Sh)
+ : PN(pn), Shift(Sh), Width(0) {}
+};
+}
+
+namespace llvm {
+ template<>
+ struct DenseMapInfo<LoweredPHIRecord> {
+ static inline LoweredPHIRecord getEmptyKey() {
+ return LoweredPHIRecord(0, 0);
+ }
+ static inline LoweredPHIRecord getTombstoneKey() {
+ return LoweredPHIRecord(0, 1);
+ }
+ static unsigned getHashValue(const LoweredPHIRecord &Val) {
+ return DenseMapInfo<PHINode*>::getHashValue(Val.PN) ^ (Val.Shift>>3) ^
+ (Val.Width>>3);
+ }
+ static bool isEqual(const LoweredPHIRecord &LHS,
+ const LoweredPHIRecord &RHS) {
+ return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
+ LHS.Width == RHS.Width;
+ }
+ static bool isPod() { return true; }
+ };
+}
+
+
+/// SliceUpIllegalIntegerPHI - This is an integer PHI and we know that it has an
+/// illegal type: see if it is only used by trunc or trunc(lshr) operations. If
+/// so, we split the PHI into the various pieces being extracted. This sort of
+/// thing is introduced when SROA promotes an aggregate to large integer values.
+///
+/// TODO: The user of the trunc may be an bitcast to float/double/vector or an
+/// inttoptr. We should produce new PHIs in the right type.
+///
+Instruction *InstCombiner::SliceUpIllegalIntegerPHI(PHINode &FirstPhi) {
+ // PHIUsers - Keep track of all of the truncated values extracted from a set
+ // of PHIs, along with their offset. These are the things we want to rewrite.
+ SmallVector<PHIUsageRecord, 16> PHIUsers;
+
+ // PHIs are often mutually cyclic, so we keep track of a whole set of PHI
+ // nodes which are extracted from. PHIsToSlice is a set we use to avoid
+ // revisiting PHIs, PHIsInspected is a ordered list of PHIs that we need to
+ // check the uses of (to ensure they are all extracts).
+ SmallVector<PHINode*, 8> PHIsToSlice;
+ SmallPtrSet<PHINode*, 8> PHIsInspected;
+
+ PHIsToSlice.push_back(&FirstPhi);
+ PHIsInspected.insert(&FirstPhi);
+
+ for (unsigned PHIId = 0; PHIId != PHIsToSlice.size(); ++PHIId) {
+ PHINode *PN = PHIsToSlice[PHIId];
+
+ for (Value::use_iterator UI = PN->use_begin(), E = PN->use_end();
+ UI != E; ++UI) {
+ Instruction *User = cast<Instruction>(*UI);
+
+ // If the user is a PHI, inspect its uses recursively.
+ if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
+ if (PHIsInspected.insert(UserPN))
+ PHIsToSlice.push_back(UserPN);
+ continue;
+ }
+
+ // Truncates are always ok.
+ if (isa<TruncInst>(User)) {
+ PHIUsers.push_back(PHIUsageRecord(PHIId, 0, User));
+ continue;
+ }
+
+ // Otherwise it must be a lshr which can only be used by one trunc.
+ if (User->getOpcode() != Instruction::LShr ||
+ !User->hasOneUse() || !isa<TruncInst>(User->use_back()) ||
+ !isa<ConstantInt>(User->getOperand(1)))
+ return 0;
+
+ unsigned Shift = cast<ConstantInt>(User->getOperand(1))->getZExtValue();
+ PHIUsers.push_back(PHIUsageRecord(PHIId, Shift, User->use_back()));
+ }
+ }
+
+ // If we have no users, they must be all self uses, just nuke the PHI.
+ if (PHIUsers.empty())
+ return ReplaceInstUsesWith(FirstPhi, UndefValue::get(FirstPhi.getType()));
+
+ // If this phi node is transformable, create new PHIs for all the pieces
+ // extracted out of it. First, sort the users by their offset and size.
+ array_pod_sort(PHIUsers.begin(), PHIUsers.end());
+
+ DEBUG(errs() << "SLICING UP PHI: " << FirstPhi << '\n';
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ errs() << "AND USER PHI #" << i << ": " << *PHIsToSlice[i] <<'\n';
+ );
+
+ // PredValues - This is a temporary used when rewriting PHI nodes. It is
+ // hoisted out here to avoid construction/destruction thrashing.
+ DenseMap<BasicBlock*, Value*> PredValues;
+
+ // ExtractedVals - Each new PHI we introduce is saved here so we don't
+ // introduce redundant PHIs.
+ DenseMap<LoweredPHIRecord, PHINode*> ExtractedVals;
+
+ for (unsigned UserI = 0, UserE = PHIUsers.size(); UserI != UserE; ++UserI) {
+ unsigned PHIId = PHIUsers[UserI].PHIId;
+ PHINode *PN = PHIsToSlice[PHIId];
+ unsigned Offset = PHIUsers[UserI].Shift;
+ const Type *Ty = PHIUsers[UserI].Inst->getType();
+
+ PHINode *EltPHI;
+
+ // If we've already lowered a user like this, reuse the previously lowered
+ // value.
+ if ((EltPHI = ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)]) == 0) {
+
+ // Otherwise, Create the new PHI node for this user.
+ EltPHI = PHINode::Create(Ty, PN->getName()+".off"+Twine(Offset), PN);
+ assert(EltPHI->getType() != PN->getType() &&
+ "Truncate didn't shrink phi?");
+
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *Pred = PN->getIncomingBlock(i);
+ Value *&PredVal = PredValues[Pred];
+
+ // If we already have a value for this predecessor, reuse it.
+ if (PredVal) {
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+
+ // Handle the PHI self-reuse case.
+ Value *InVal = PN->getIncomingValue(i);
+ if (InVal == PN) {
+ PredVal = EltPHI;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ } else if (PHINode *InPHI = dyn_cast<PHINode>(PN)) {
+ // If the incoming value was a PHI, and if it was one of the PHIs we
+ // already rewrote it, just use the lowered value.
+ if (Value *Res = ExtractedVals[LoweredPHIRecord(InPHI, Offset, Ty)]) {
+ PredVal = Res;
+ EltPHI->addIncoming(PredVal, Pred);
+ continue;
+ }
+ }
+
+ // Otherwise, do an extract in the predecessor.
+ Builder->SetInsertPoint(Pred, Pred->getTerminator());
+ Value *Res = InVal;
+ if (Offset)
+ Res = Builder->CreateLShr(Res, ConstantInt::get(InVal->getType(),
+ Offset), "extract");
+ Res = Builder->CreateTrunc(Res, Ty, "extract.t");
+ PredVal = Res;
+ EltPHI->addIncoming(Res, Pred);
+
+ // If the incoming value was a PHI, and if it was one of the PHIs we are
+ // rewriting, we will ultimately delete the code we inserted. This
+ // means we need to revisit that PHI to make sure we extract out the
+ // needed piece.
+ if (PHINode *OldInVal = dyn_cast<PHINode>(PN->getIncomingValue(i)))
+ if (PHIsInspected.count(OldInVal)) {
+ unsigned RefPHIId = std::find(PHIsToSlice.begin(),PHIsToSlice.end(),
+ OldInVal)-PHIsToSlice.begin();
+ PHIUsers.push_back(PHIUsageRecord(RefPHIId, Offset,
+ cast<Instruction>(Res)));
+ ++UserE;
+ }
+ }
+ PredValues.clear();
+
+ DEBUG(errs() << " Made element PHI for offset " << Offset << ": "
+ << *EltPHI << '\n');
+ ExtractedVals[LoweredPHIRecord(PN, Offset, Ty)] = EltPHI;
+ }
+
+ // Replace the use of this piece with the PHI node.
+ ReplaceInstUsesWith(*PHIUsers[UserI].Inst, EltPHI);
+ }
+
+ // Replace all the remaining uses of the PHI nodes (self uses and the lshrs)
+ // with undefs.
+ Value *Undef = UndefValue::get(FirstPhi.getType());
+ for (unsigned i = 1, e = PHIsToSlice.size(); i != e; ++i)
+ ReplaceInstUsesWith(*PHIsToSlice[i], Undef);
+ return ReplaceInstUsesWith(FirstPhi, Undef);
+}
+
// PHINode simplification
//
Instruction *InstCombiner::visitPHINode(PHINode &PN) {
@@ -11117,6 +11269,15 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
}
}
+ // If this is an integer PHI and we know that it has an illegal type, see if
+ // it is only used by trunc or trunc(lshr) operations. If so, we split the
+ // PHI into the various pieces being extracted. This sort of thing is
+ // introduced when SROA promotes an aggregate to a single large integer type.
+ if (isa<IntegerType>(PN.getType()) && TD &&
+ !TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
+ if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
+ return Res;
+
return 0;
}
@@ -12210,6 +12371,47 @@ Instruction *InstCombiner::visitExtractValueInst(ExtractValueInst &EV) {
return ExtractValueInst::Create(IV->getInsertedValueOperand(),
exti, exte);
}
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Agg)) {
+ // We're extracting from an intrinsic, see if we're the only user, which
+ // allows us to simplify multiple result intrinsics to simpler things that
+ // just get one value..
+ if (II->hasOneUse()) {
+ // Check if we're grabbing the overflow bit or the result of a 'with
+ // overflow' intrinsic. If it's the latter we can remove the intrinsic
+ // and replace it with a traditional binary instruction.
+ switch (II->getIntrinsicID()) {
+ case Intrinsic::uadd_with_overflow:
+ case Intrinsic::sadd_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateAdd(LHS, RHS);
+ }
+ break;
+ case Intrinsic::usub_with_overflow:
+ case Intrinsic::ssub_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateSub(LHS, RHS);
+ }
+ break;
+ case Intrinsic::umul_with_overflow:
+ case Intrinsic::smul_with_overflow:
+ if (*EV.idx_begin() == 0) { // Normal result.
+ Value *LHS = II->getOperand(1), *RHS = II->getOperand(2);
+ II->replaceAllUsesWith(UndefValue::get(II->getType()));
+ EraseInstFromFunction(*II);
+ return BinaryOperator::CreateMul(LHS, RHS);
+ }
+ break;
+ default:
+ break;
+ }
+ }
+ }
// Can't simplify extracts from other values. Note that nested extracts are
// already simplified implicitely by the above (extract ( extract (insert) )
// will be translated into extract ( insert ( extract ) ) first and then just
@@ -12715,29 +12917,33 @@ Instruction *InstCombiner::visitShuffleVectorInst(ShuffleVectorInst &SVI) {
if (isa<UndefValue>(RHS)) {
std::vector<unsigned> LHSMask = getShuffleMask(LHSSVI);
- std::vector<unsigned> NewMask;
- for (unsigned i = 0, e = Mask.size(); i != e; ++i)
- if (Mask[i] >= 2*e)
- NewMask.push_back(2*e);
- else
- NewMask.push_back(LHSMask[Mask[i]]);
+ if (LHSMask.size() == Mask.size()) {
+ std::vector<unsigned> NewMask;
+ for (unsigned i = 0, e = Mask.size(); i != e; ++i)
+ if (Mask[i] >= 2*e)
+ NewMask.push_back(2*e);
+ else
+ NewMask.push_back(LHSMask[Mask[i]]);
- // If the result mask is equal to the src shuffle or this shuffle mask, do
- // the replacement.
- if (NewMask == LHSMask || NewMask == Mask) {
- unsigned LHSInNElts =
- cast<VectorType>(LHSSVI->getOperand(0)->getType())->getNumElements();
- std::vector<Constant*> Elts;
- for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
- if (NewMask[i] >= LHSInNElts*2) {
- Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
- } else {
- Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context), NewMask[i]));
+ // If the result mask is equal to the src shuffle or this
+ // shuffle mask, do the replacement.
+ if (NewMask == LHSMask || NewMask == Mask) {
+ unsigned LHSInNElts =
+ cast<VectorType>(LHSSVI->getOperand(0)->getType())->
+ getNumElements();
+ std::vector<Constant*> Elts;
+ for (unsigned i = 0, e = NewMask.size(); i != e; ++i) {
+ if (NewMask[i] >= LHSInNElts*2) {
+ Elts.push_back(UndefValue::get(Type::getInt32Ty(*Context)));
+ } else {
+ Elts.push_back(ConstantInt::get(Type::getInt32Ty(*Context),
+ NewMask[i]));
+ }
}
+ return new ShuffleVectorInst(LHSSVI->getOperand(0),
+ LHSSVI->getOperand(1),
+ ConstantVector::get(Elts));
}
- return new ShuffleVectorInst(LHSSVI->getOperand(0),
- LHSSVI->getOperand(1),
- ConstantVector::get(Elts));
}
}
}
@@ -12824,7 +13030,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
// ConstantProp instruction if trivially constant.
if (!Inst->use_empty() && isa<Constant>(Inst->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
+ if (Constant *C = ConstantFoldInstruction(Inst, TD)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: "
<< *Inst << '\n');
Inst->replaceAllUsesWith(C);
@@ -12846,8 +13052,7 @@ static bool AddReachableCodeToWorklist(BasicBlock *BB,
if (!FoldedConstants.insert(CE))
continue;
- Constant *NewC =
- ConstantFoldConstantExpression(CE, BB->getContext(), TD);
+ Constant *NewC = ConstantFoldConstantExpression(CE, TD);
if (NewC && NewC != CE) {
*i = NewC;
MadeIRChange = true;
@@ -12954,7 +13159,7 @@ bool InstCombiner::DoOneIteration(Function &F, unsigned Iteration) {
// Instruction isn't dead, see if we can constant propagate it.
if (!I->use_empty() && isa<Constant>(I->getOperand(0)))
- if (Constant *C = ConstantFoldInstruction(I, F.getContext(), TD)) {
+ if (Constant *C = ConstantFoldInstruction(I, TD)) {
DEBUG(errs() << "IC: ConstFold to: " << *C << " from: " << *I << '\n');
// Add operands to the worklist.
@@ -13065,7 +13270,7 @@ bool InstCombiner::runOnFunction(Function &F) {
/// Builder - This is an IRBuilder that automatically inserts new
/// instructions into the worklist when they are created.
IRBuilder<true, TargetFolder, InstCombineIRInserter>
- TheBuilder(F.getContext(), TargetFolder(TD, F.getContext()),
+ TheBuilder(F.getContext(), TargetFolder(TD),
InstCombineIRInserter(Worklist));
Builder = &TheBuilder;
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 10c9ec6..5864113 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -16,7 +16,8 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
-#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Analysis/LazyValueInfo.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/SSAUpdater.h"
@@ -40,6 +41,12 @@ Threshold("jump-threading-threshold",
cl::desc("Max block size to duplicate for jump threading"),
cl::init(6), cl::Hidden);
+// Turn on use of LazyValueInfo.
+static cl::opt<bool>
+EnableLVI("enable-jump-threading-lvi", cl::ReallyHidden);
+
+
+
namespace {
/// This pass performs 'jump threading', which looks at blocks that have
/// multiple predecessors and multiple successors. If one or more of the
@@ -59,6 +66,7 @@ namespace {
///
class JumpThreading : public FunctionPass {
TargetData *TD;
+ LazyValueInfo *LVI;
#ifdef NDEBUG
SmallPtrSet<BasicBlock*, 16> LoopHeaders;
#else
@@ -69,20 +77,31 @@ namespace {
JumpThreading() : FunctionPass(&ID) {}
bool runOnFunction(Function &F);
- void FindLoopHeaders(Function &F);
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ if (EnableLVI)
+ AU.addRequired<LazyValueInfo>();
+ }
+
+ void FindLoopHeaders(Function &F);
bool ProcessBlock(BasicBlock *BB);
- bool ThreadEdge(BasicBlock *BB, BasicBlock *PredBB, BasicBlock *SuccBB);
+ bool ThreadEdge(BasicBlock *BB, const SmallVectorImpl<BasicBlock*> &PredBBs,
+ BasicBlock *SuccBB);
bool DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
BasicBlock *PredBB);
-
- BasicBlock *FactorCommonPHIPreds(PHINode *PN, Value *Val);
+
+ typedef SmallVectorImpl<std::pair<ConstantInt*,
+ BasicBlock*> > PredValueInfo;
+
+ bool ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,
+ PredValueInfo &Result);
+ bool ProcessThreadableEdges(Value *Cond, BasicBlock *BB);
+
+
bool ProcessBranchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
bool ProcessSwitchOnDuplicateCond(BasicBlock *PredBB, BasicBlock *DestBB);
bool ProcessJumpOnPHI(PHINode *PN);
- bool ProcessBranchOnLogical(Value *V, BasicBlock *BB, bool isAnd);
- bool ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB);
bool SimplifyPartiallyRedundantLoad(LoadInst *LI);
};
@@ -100,6 +119,7 @@ FunctionPass *llvm::createJumpThreadingPass() { return new JumpThreading(); }
bool JumpThreading::runOnFunction(Function &F) {
DEBUG(errs() << "Jump threading on function '" << F.getName() << "'\n");
TD = getAnalysisIfAvailable<TargetData>();
+ LVI = EnableLVI ? &getAnalysis<LazyValueInfo>() : 0;
FindLoopHeaders(F);
@@ -109,6 +129,7 @@ bool JumpThreading::runOnFunction(Function &F) {
bool Changed = false;
for (Function::iterator I = F.begin(), E = F.end(); I != E;) {
BasicBlock *BB = I;
+ // Thread all of the branches we can over this block.
while (ProcessBlock(BB))
Changed = true;
@@ -123,6 +144,29 @@ bool JumpThreading::runOnFunction(Function &F) {
LoopHeaders.erase(BB);
DeleteDeadBlock(BB);
Changed = true;
+ } else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
+ // Can't thread an unconditional jump, but if the block is "almost
+ // empty", we can replace uses of it with uses of the successor and make
+ // this dead.
+ if (BI->isUnconditional() &&
+ BB != &BB->getParent()->getEntryBlock()) {
+ BasicBlock::iterator BBI = BB->getFirstNonPHI();
+ // Ignore dbg intrinsics.
+ while (isa<DbgInfoIntrinsic>(BBI))
+ ++BBI;
+ // If the terminator is the only non-phi instruction, try to nuke it.
+ if (BBI->isTerminator()) {
+ // Since TryToSimplifyUncondBranchFromEmptyBlock may delete the
+ // block, we have to make sure it isn't in the LoopHeaders set. We
+ // reinsert afterward in the rare case when the block isn't deleted.
+ bool ErasedFromLoopHeaders = LoopHeaders.erase(BB);
+
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
+ Changed = true;
+ else if (ErasedFromLoopHeaders)
+ LoopHeaders.insert(BB);
+ }
+ }
}
}
AnotherIteration = Changed;
@@ -139,6 +183,10 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
/// Ignore PHI nodes, these will be flattened when duplication happens.
BasicBlock::const_iterator I = BB->getFirstNonPHI();
+ // FIXME: THREADING will delete values that are just used to compute the
+ // branch, so they shouldn't count against the duplication cost.
+
+
// Sum up the cost of each instruction until we get to the terminator. Don't
// include the terminator because the copy won't include it.
unsigned Size = 0;
@@ -173,8 +221,6 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
return Size;
}
-
-
/// FindLoopHeaders - We do not want jump threading to turn proper loop
/// structures into irreducible loops. Doing this breaks up the loop nesting
/// hierarchy and pessimizes later transformations. To prevent this from
@@ -198,29 +244,181 @@ void JumpThreading::FindLoopHeaders(Function &F) {
LoopHeaders.insert(const_cast<BasicBlock*>(Edges[i].second));
}
-
-/// FactorCommonPHIPreds - If there are multiple preds with the same incoming
-/// value for the PHI, factor them together so we get one block to thread for
-/// the whole group.
-/// This is important for things like "phi i1 [true, true, false, true, x]"
-/// where we only need to clone the block for the true blocks once.
+/// ComputeValueKnownInPredecessors - Given a basic block BB and a value V, see
+/// if we can infer that the value is a known ConstantInt in any of our
+/// predecessors. If so, return the known list of value and pred BB in the
+/// result vector. If a value is known to be undef, it is returned as null.
+///
+/// This returns true if there were any known values.
///
-BasicBlock *JumpThreading::FactorCommonPHIPreds(PHINode *PN, Value *Val) {
- SmallVector<BasicBlock*, 16> CommonPreds;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == Val)
- CommonPreds.push_back(PN->getIncomingBlock(i));
-
- if (CommonPreds.size() == 1)
- return CommonPreds[0];
+bool JumpThreading::
+ComputeValueKnownInPredecessors(Value *V, BasicBlock *BB,PredValueInfo &Result){
+ // If V is a constantint, then it is known in all predecessors.
+ if (isa<ConstantInt>(V) || isa<UndefValue>(V)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(V);
- DEBUG(errs() << " Factoring out " << CommonPreds.size()
- << " common predecessors.\n");
- return SplitBlockPredecessors(PN->getParent(),
- &CommonPreds[0], CommonPreds.size(),
- ".thr_comm", this);
-}
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI)
+ Result.push_back(std::make_pair(CI, *PI));
+ return true;
+ }
+
+ // If V is a non-instruction value, or an instruction in a different block,
+ // then it can't be derived from a PHI.
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0 || I->getParent() != BB) {
+
+ // Okay, if this is a live-in value, see if it has a known value at the end
+ // of any of our predecessors.
+ //
+ // FIXME: This should be an edge property, not a block end property.
+ /// TODO: Per PR2563, we could infer value range information about a
+ /// predecessor based on its terminator.
+ //
+ if (LVI) {
+ // FIXME: change this to use the more-rich 'getPredicateOnEdge' method if
+ // "I" is a non-local compare-with-a-constant instruction. This would be
+ // able to handle value inequalities better, for example if the compare is
+ // "X < 4" and "X < 3" is known true but "X < 4" itself is not available.
+ // Perhaps getConstantOnEdge should be smart enough to do this?
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ Constant *PredCst = LVI->getConstantOnEdge(V, *PI, BB);
+ if (PredCst == 0 ||
+ (!isa<ConstantInt>(PredCst) && !isa<UndefValue>(PredCst)))
+ continue;
+
+ Result.push_back(std::make_pair(dyn_cast<ConstantInt>(PredCst), *PI));
+ }
+
+ return !Result.empty();
+ }
+
+ return false;
+ }
+
+ /// If I is a PHI node, then we know the incoming values for any constants.
+ if (PHINode *PN = dyn_cast<PHINode>(I)) {
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ Value *InVal = PN->getIncomingValue(i);
+ if (isa<ConstantInt>(InVal) || isa<UndefValue>(InVal)) {
+ ConstantInt *CI = dyn_cast<ConstantInt>(InVal);
+ Result.push_back(std::make_pair(CI, PN->getIncomingBlock(i)));
+ }
+ }
+ return !Result.empty();
+ }
+
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> LHSVals, RHSVals;
+
+ // Handle some boolean conditions.
+ if (I->getType()->getPrimitiveSizeInBits() == 1) {
+ // X | true -> true
+ // X & false -> false
+ if (I->getOpcode() == Instruction::Or ||
+ I->getOpcode() == Instruction::And) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, LHSVals);
+ ComputeValueKnownInPredecessors(I->getOperand(1), BB, RHSVals);
+
+ if (LHSVals.empty() && RHSVals.empty())
+ return false;
+
+ ConstantInt *InterestingVal;
+ if (I->getOpcode() == Instruction::Or)
+ InterestingVal = ConstantInt::getTrue(I->getContext());
+ else
+ InterestingVal = ConstantInt::getFalse(I->getContext());
+
+ // Scan for the sentinel.
+ for (unsigned i = 0, e = LHSVals.size(); i != e; ++i)
+ if (LHSVals[i].first == InterestingVal || LHSVals[i].first == 0)
+ Result.push_back(LHSVals[i]);
+ for (unsigned i = 0, e = RHSVals.size(); i != e; ++i)
+ if (RHSVals[i].first == InterestingVal || RHSVals[i].first == 0)
+ Result.push_back(RHSVals[i]);
+ return !Result.empty();
+ }
+
+ // Handle the NOT form of XOR.
+ if (I->getOpcode() == Instruction::Xor &&
+ isa<ConstantInt>(I->getOperand(1)) &&
+ cast<ConstantInt>(I->getOperand(1))->isOne()) {
+ ComputeValueKnownInPredecessors(I->getOperand(0), BB, Result);
+ if (Result.empty())
+ return false;
+
+ // Invert the known values.
+ for (unsigned i = 0, e = Result.size(); i != e; ++i)
+ if (Result[i].first)
+ Result[i].first =
+ cast<ConstantInt>(ConstantExpr::getNot(Result[i].first));
+ return true;
+ }
+ }
+ // Handle compare with phi operand, where the PHI is defined in this block.
+ if (CmpInst *Cmp = dyn_cast<CmpInst>(I)) {
+ PHINode *PN = dyn_cast<PHINode>(Cmp->getOperand(0));
+ if (PN && PN->getParent() == BB) {
+ // We can do this simplification if any comparisons fold to true or false.
+ // See if any do.
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
+ BasicBlock *PredBB = PN->getIncomingBlock(i);
+ Value *LHS = PN->getIncomingValue(i);
+ Value *RHS = Cmp->getOperand(1)->DoPHITranslation(BB, PredBB);
+
+ Value *Res = SimplifyCmpInst(Cmp->getPredicate(), LHS, RHS, TD);
+ if (Res == 0) {
+ if (!LVI || !isa<Constant>(RHS))
+ continue;
+
+ LazyValueInfo::Tristate
+ ResT = LVI->getPredicateOnEdge(Cmp->getPredicate(), LHS,
+ cast<Constant>(RHS), PredBB, BB);
+ if (ResT == LazyValueInfo::Unknown)
+ continue;
+ Res = ConstantInt::get(Type::getInt1Ty(LHS->getContext()), ResT);
+ }
+
+ if (isa<UndefValue>(Res))
+ Result.push_back(std::make_pair((ConstantInt*)0, PredBB));
+ else if (ConstantInt *CI = dyn_cast<ConstantInt>(Res))
+ Result.push_back(std::make_pair(CI, PredBB));
+ }
+
+ return !Result.empty();
+ }
+
+
+ // If comparing a live-in value against a constant, see if we know the
+ // live-in value on any predecessors.
+ if (LVI && isa<Constant>(Cmp->getOperand(1)) &&
+ Cmp->getType()->isInteger() && // Not vector compare.
+ (!isa<Instruction>(Cmp->getOperand(0)) ||
+ cast<Instruction>(Cmp->getOperand(0))->getParent() != BB)) {
+ Constant *RHSCst = cast<Constant>(Cmp->getOperand(1));
+
+ for (pred_iterator PI = pred_begin(BB), E = pred_end(BB); PI != E; ++PI) {
+ // If the value is known by LazyValueInfo to be a constant in a
+ // predecessor, use that information to try to thread this block.
+ LazyValueInfo::Tristate
+ Res = LVI->getPredicateOnEdge(Cmp->getPredicate(), Cmp->getOperand(0),
+ RHSCst, *PI, BB);
+ if (Res == LazyValueInfo::Unknown)
+ continue;
+
+ Constant *ResC = ConstantInt::get(Cmp->getType(), Res);
+ Result.push_back(std::make_pair(cast<ConstantInt>(ResC), *PI));
+ }
+
+ return !Result.empty();
+ }
+ }
+ return false;
+}
+
+
/// GetBestDestForBranchOnUndef - If we determine that the specified block ends
/// in an undefined jump, decide which block is best to revector to.
@@ -251,7 +449,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// successor, merge the blocks. This encourages recursive jump threading
// because now the condition in this block can be threaded through
// predecessors of our predecessor block.
- if (BasicBlock *SinglePred = BB->getSinglePredecessor())
+ if (BasicBlock *SinglePred = BB->getSinglePredecessor()) {
if (SinglePred->getTerminator()->getNumSuccessors() == 1 &&
SinglePred != BB) {
// If SinglePred was a loop header, BB becomes one.
@@ -267,10 +465,10 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
BB->moveBefore(&BB->getParent()->getEntryBlock());
return true;
}
-
- // See if this block ends with a branch or switch. If so, see if the
- // condition is a phi node. If so, and if an entry of the phi node is a
- // constant, we can thread the block.
+ }
+
+ // Look to see if the terminator is a branch of switch, if not we can't thread
+ // it.
Value *Condition;
if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator())) {
// Can't thread an unconditional jump.
@@ -301,7 +499,7 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
TerminatorInst *BBTerm = BB->getTerminator();
for (unsigned i = 0, e = BBTerm->getNumSuccessors(); i != e; ++i) {
if (i == BestSucc) continue;
- BBTerm->getSuccessor(i)->removePredecessor(BB);
+ RemovePredecessorAndSimplify(BBTerm->getSuccessor(i), BB, TD);
}
DEBUG(errs() << " In block '" << BB->getName()
@@ -318,7 +516,8 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
// br COND, BBX, BBY
// BBX:
// br COND, BBZ, BBW
- if (!Condition->hasOneUse() && // Multiple uses.
+ if (!LVI &&
+ !Condition->hasOneUse() && // Multiple uses.
(CondInst == 0 || CondInst->getParent() != BB)) { // Non-local definition.
pred_iterator PI = pred_begin(BB), E = pred_end(BB);
if (isa<BranchInst>(BB->getTerminator())) {
@@ -338,52 +537,40 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
}
// All the rest of our checks depend on the condition being an instruction.
- if (CondInst == 0)
+ if (CondInst == 0) {
+ // FIXME: Unify this with code below.
+ if (LVI && ProcessThreadableEdges(Condition, BB))
+ return true;
return false;
+ }
+
// See if this is a phi node in the current block.
if (PHINode *PN = dyn_cast<PHINode>(CondInst))
if (PN->getParent() == BB)
return ProcessJumpOnPHI(PN);
- // If this is a conditional branch whose condition is and/or of a phi, try to
- // simplify it.
- if ((CondInst->getOpcode() == Instruction::And ||
- CondInst->getOpcode() == Instruction::Or) &&
- isa<BranchInst>(BB->getTerminator()) &&
- ProcessBranchOnLogical(CondInst, BB,
- CondInst->getOpcode() == Instruction::And))
- return true;
-
if (CmpInst *CondCmp = dyn_cast<CmpInst>(CondInst)) {
- if (isa<PHINode>(CondCmp->getOperand(0))) {
- // If we have "br (phi != 42)" and the phi node has any constant values
- // as operands, we can thread through this block.
- //
- // If we have "br (cmp phi, x)" and the phi node contains x such that the
- // comparison uniquely identifies the branch target, we can thread
- // through this block.
-
- if (ProcessBranchOnCompare(CondCmp, BB))
- return true;
- }
-
- // If we have a comparison, loop over the predecessors to see if there is
- // a condition with the same value.
- pred_iterator PI = pred_begin(BB), E = pred_end(BB);
- for (; PI != E; ++PI)
- if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
- if (PBI->isConditional() && *PI != BB) {
- if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
- if (CI->getOperand(0) == CondCmp->getOperand(0) &&
- CI->getOperand(1) == CondCmp->getOperand(1) &&
- CI->getPredicate() == CondCmp->getPredicate()) {
- // TODO: Could handle things like (x != 4) --> (x == 17)
- if (ProcessBranchOnDuplicateCond(*PI, BB))
- return true;
+ if (!LVI &&
+ (!isa<PHINode>(CondCmp->getOperand(0)) ||
+ cast<PHINode>(CondCmp->getOperand(0))->getParent() != BB)) {
+ // If we have a comparison, loop over the predecessors to see if there is
+ // a condition with a lexically identical value.
+ pred_iterator PI = pred_begin(BB), E = pred_end(BB);
+ for (; PI != E; ++PI)
+ if (BranchInst *PBI = dyn_cast<BranchInst>((*PI)->getTerminator()))
+ if (PBI->isConditional() && *PI != BB) {
+ if (CmpInst *CI = dyn_cast<CmpInst>(PBI->getCondition())) {
+ if (CI->getOperand(0) == CondCmp->getOperand(0) &&
+ CI->getOperand(1) == CondCmp->getOperand(1) &&
+ CI->getPredicate() == CondCmp->getPredicate()) {
+ // TODO: Could handle things like (x != 4) --> (x == 17)
+ if (ProcessBranchOnDuplicateCond(*PI, BB))
+ return true;
+ }
}
}
- }
+ }
}
// Check for some cases that are worth simplifying. Right now we want to look
@@ -398,10 +585,21 @@ bool JumpThreading::ProcessBlock(BasicBlock *BB) {
if (isa<Constant>(CondCmp->getOperand(1)))
SimplifyValue = CondCmp->getOperand(0);
+ // TODO: There are other places where load PRE would be profitable, such as
+ // more complex comparisons.
if (LoadInst *LI = dyn_cast<LoadInst>(SimplifyValue))
if (SimplifyPartiallyRedundantLoad(LI))
return true;
+
+ // Handle a variety of cases where we are branching on something derived from
+ // a PHI node in the current block. If we can prove that any predecessors
+ // compute a predictable value based on a PHI node, thread those predecessors.
+ //
+ if (ProcessThreadableEdges(CondInst, BB))
+ return true;
+
+
// TODO: If we have: "br (X > 0)" and we have a predecessor where we know
// "(X == 4)" thread through this block.
@@ -459,8 +657,11 @@ bool JumpThreading::ProcessBranchOnDuplicateCond(BasicBlock *PredBB,
// Next, figure out which successor we are threading to.
BasicBlock *SuccBB = DestBI->getSuccessor(!BranchDir);
+ SmallVector<BasicBlock*, 2> Preds;
+ Preds.push_back(PredBB);
+
// Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
+ return ThreadEdge(BB, Preds, SuccBB);
}
/// ProcessSwitchOnDuplicateCond - We found a block and a predecessor of that
@@ -553,7 +754,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
Value *LoadedPtr = LI->getOperand(0);
// If the loaded operand is defined in the LoadBB, it can't be available.
- // FIXME: Could do PHI translation, that would be fun :)
+ // TODO: Could do simple PHI translation, that would be fun :)
if (Instruction *PtrOp = dyn_cast<Instruction>(LoadedPtr))
if (PtrOp->getParent() == LoadBB)
return false;
@@ -562,8 +763,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// the entry to its block.
BasicBlock::iterator BBIt = LI;
- if (Value *AvailableVal = FindAvailableLoadedValue(LoadedPtr, LoadBB,
- BBIt, 6)) {
+ if (Value *AvailableVal =
+ FindAvailableLoadedValue(LoadedPtr, LoadBB, BBIt, 6)) {
// If the value if the load is locally available within the block, just use
// it. This frequently occurs for reg2mem'd allocas.
//cerr << "LOAD ELIMINATED:\n" << *BBIt << *LI << "\n";
@@ -646,7 +847,7 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
// Split them out to their own block.
UnavailablePred =
SplitBlockPredecessors(LoadBB, &PredsToSplit[0], PredsToSplit.size(),
- "thread-split", this);
+ "thread-pre-split", this);
}
// If the value isn't available in all predecessors, then there will be
@@ -655,7 +856,8 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
if (UnavailablePred) {
assert(UnavailablePred->getTerminator()->getNumSuccessors() == 1 &&
"Can't handle critical edge here!");
- Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr",
+ Value *NewVal = new LoadInst(LoadedPtr, LI->getName()+".pr", false,
+ LI->getAlignment(),
UnavailablePred->getTerminator());
AvailablePreds.push_back(std::make_pair(UnavailablePred, NewVal));
}
@@ -690,55 +892,183 @@ bool JumpThreading::SimplifyPartiallyRedundantLoad(LoadInst *LI) {
return true;
}
-
-/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
-/// the current block. See if there are any simplifications we can do based on
-/// inputs to the phi node.
-///
-bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
- BasicBlock *BB = PN->getParent();
+/// FindMostPopularDest - The specified list contains multiple possible
+/// threadable destinations. Pick the one that occurs the most frequently in
+/// the list.
+static BasicBlock *
+FindMostPopularDest(BasicBlock *BB,
+ const SmallVectorImpl<std::pair<BasicBlock*,
+ BasicBlock*> > &PredToDestList) {
+ assert(!PredToDestList.empty());
+
+ // Determine popularity. If there are multiple possible destinations, we
+ // explicitly choose to ignore 'undef' destinations. We prefer to thread
+ // blocks with known and real destinations to threading undef. We'll handle
+ // them later if interesting.
+ DenseMap<BasicBlock*, unsigned> DestPopularity;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second)
+ DestPopularity[PredToDestList[i].second]++;
+
+ // Find the most popular dest.
+ DenseMap<BasicBlock*, unsigned>::iterator DPI = DestPopularity.begin();
+ BasicBlock *MostPopularDest = DPI->first;
+ unsigned Popularity = DPI->second;
+ SmallVector<BasicBlock*, 4> SamePopularity;
+
+ for (++DPI; DPI != DestPopularity.end(); ++DPI) {
+ // If the popularity of this entry isn't higher than the popularity we've
+ // seen so far, ignore it.
+ if (DPI->second < Popularity)
+ ; // ignore.
+ else if (DPI->second == Popularity) {
+ // If it is the same as what we've seen so far, keep track of it.
+ SamePopularity.push_back(DPI->first);
+ } else {
+ // If it is more popular, remember it.
+ SamePopularity.clear();
+ MostPopularDest = DPI->first;
+ Popularity = DPI->second;
+ }
+ }
- // See if the phi node has any constant integer or undef values. If so, we
- // can determine where the corresponding predecessor will branch.
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- Value *PredVal = PN->getIncomingValue(i);
-
- // Check to see if this input is a constant integer. If so, the direction
- // of the branch is predictable.
- if (ConstantInt *CI = dyn_cast<ConstantInt>(PredVal)) {
- // Merge any common predecessors that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, CI);
+ // Okay, now we know the most popular destination. If there is more than
+ // destination, we need to determine one. This is arbitrary, but we need
+ // to make a deterministic decision. Pick the first one that appears in the
+ // successor list.
+ if (!SamePopularity.empty()) {
+ SamePopularity.push_back(MostPopularDest);
+ TerminatorInst *TI = BB->getTerminator();
+ for (unsigned i = 0; ; ++i) {
+ assert(i != TI->getNumSuccessors() && "Didn't find any successor!");
- BasicBlock *SuccBB;
- if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
- SuccBB = BI->getSuccessor(CI->isZero());
- else {
- SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
- SuccBB = SI->getSuccessor(SI->findCaseValue(CI));
- }
+ if (std::find(SamePopularity.begin(), SamePopularity.end(),
+ TI->getSuccessor(i)) == SamePopularity.end())
+ continue;
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
+ MostPopularDest = TI->getSuccessor(i);
+ break;
}
+ }
+
+ // Okay, we have finally picked the most popular destination.
+ return MostPopularDest;
+}
+
+bool JumpThreading::ProcessThreadableEdges(Value *Cond, BasicBlock *BB) {
+ // If threading this would thread across a loop header, don't even try to
+ // thread the edge.
+ if (LoopHeaders.count(BB))
+ return false;
+
+ SmallVector<std::pair<ConstantInt*, BasicBlock*>, 8> PredValues;
+ if (!ComputeValueKnownInPredecessors(Cond, BB, PredValues))
+ return false;
+ assert(!PredValues.empty() &&
+ "ComputeValueKnownInPredecessors returned true with no values");
+
+ DEBUG(errs() << "IN BB: " << *BB;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ errs() << " BB '" << BB->getName() << "': FOUND condition = ";
+ if (PredValues[i].first)
+ errs() << *PredValues[i].first;
+ else
+ errs() << "UNDEF";
+ errs() << " for pred '" << PredValues[i].second->getName()
+ << "'.\n";
+ });
+
+ // Decide what we want to thread through. Convert our list of known values to
+ // a list of known destinations for each pred. This also discards duplicate
+ // predecessors and keeps track of the undefined inputs (which are represented
+ // as a null dest in the PredToDestList).
+ SmallPtrSet<BasicBlock*, 16> SeenPreds;
+ SmallVector<std::pair<BasicBlock*, BasicBlock*>, 16> PredToDestList;
+
+ BasicBlock *OnlyDest = 0;
+ BasicBlock *MultipleDestSentinel = (BasicBlock*)(intptr_t)~0ULL;
+
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i) {
+ BasicBlock *Pred = PredValues[i].second;
+ if (!SeenPreds.insert(Pred))
+ continue; // Duplicate predecessor entry.
- // If the input is an undef, then it doesn't matter which way it will go.
- // Pick an arbitrary dest and thread the edge.
- if (UndefValue *UV = dyn_cast<UndefValue>(PredVal)) {
- // Merge any common predecessors that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, UV);
- BasicBlock *SuccBB =
- BB->getTerminator()->getSuccessor(GetBestDestForJumpOnUndef(BB));
-
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
+ // If the predecessor ends with an indirect goto, we can't change its
+ // destination.
+ if (isa<IndirectBrInst>(Pred->getTerminator()))
+ continue;
+
+ ConstantInt *Val = PredValues[i].first;
+
+ BasicBlock *DestBB;
+ if (Val == 0) // Undef.
+ DestBB = 0;
+ else if (BranchInst *BI = dyn_cast<BranchInst>(BB->getTerminator()))
+ DestBB = BI->getSuccessor(Val->isZero());
+ else {
+ SwitchInst *SI = cast<SwitchInst>(BB->getTerminator());
+ DestBB = SI->getSuccessor(SI->findCaseValue(Val));
}
+
+ // If we have exactly one destination, remember it for efficiency below.
+ if (i == 0)
+ OnlyDest = DestBB;
+ else if (OnlyDest != DestBB)
+ OnlyDest = MultipleDestSentinel;
+
+ PredToDestList.push_back(std::make_pair(Pred, DestBB));
}
- // If the incoming values are all variables, we don't know the destination of
- // any predecessors. However, if any of the predecessor blocks end in an
- // unconditional branch, we can *duplicate* the jump into that block in order
- // to further encourage jump threading and to eliminate cases where we have
- // branch on a phi of an icmp (branch on icmp is much better).
+ // If all edges were unthreadable, we fail.
+ if (PredToDestList.empty())
+ return false;
+
+ // Determine which is the most common successor. If we have many inputs and
+ // this block is a switch, we want to start by threading the batch that goes
+ // to the most popular destination first. If we only know about one
+ // threadable destination (the common case) we can avoid this.
+ BasicBlock *MostPopularDest = OnlyDest;
+
+ if (MostPopularDest == MultipleDestSentinel)
+ MostPopularDest = FindMostPopularDest(BB, PredToDestList);
+
+ // Now that we know what the most popular destination is, factor all
+ // predecessors that will jump to it into a single predecessor.
+ SmallVector<BasicBlock*, 16> PredsToFactor;
+ for (unsigned i = 0, e = PredToDestList.size(); i != e; ++i)
+ if (PredToDestList[i].second == MostPopularDest) {
+ BasicBlock *Pred = PredToDestList[i].first;
+
+ // This predecessor may be a switch or something else that has multiple
+ // edges to the block. Factor each of these edges by listing them
+ // according to # occurrences in PredsToFactor.
+ TerminatorInst *PredTI = Pred->getTerminator();
+ for (unsigned i = 0, e = PredTI->getNumSuccessors(); i != e; ++i)
+ if (PredTI->getSuccessor(i) == BB)
+ PredsToFactor.push_back(Pred);
+ }
+
+ // If the threadable edges are branching on an undefined value, we get to pick
+ // the destination that these predecessors should get to.
+ if (MostPopularDest == 0)
+ MostPopularDest = BB->getTerminator()->
+ getSuccessor(GetBestDestForJumpOnUndef(BB));
+
+ // Ok, try to thread it!
+ return ThreadEdge(BB, PredsToFactor, MostPopularDest);
+}
+
+/// ProcessJumpOnPHI - We have a conditional branch or switch on a PHI node in
+/// the current block. See if there are any simplifications we can do based on
+/// inputs to the phi node.
+///
+bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
+ BasicBlock *BB = PN->getParent();
+
+ // If any of the predecessor blocks end in an unconditional branch, we can
+ // *duplicate* the jump into that block in order to further encourage jump
+ // threading and to eliminate cases where we have branch on a phi of an icmp
+ // (branch on icmp is much better).
// We don't want to do this tranformation for switches, because we don't
// really want to duplicate a switch.
@@ -759,137 +1089,6 @@ bool JumpThreading::ProcessJumpOnPHI(PHINode *PN) {
}
-/// ProcessJumpOnLogicalPHI - PN's basic block contains a conditional branch
-/// whose condition is an AND/OR where one side is PN. If PN has constant
-/// operands that permit us to evaluate the condition for some operand, thread
-/// through the block. For example with:
-/// br (and X, phi(Y, Z, false))
-/// the predecessor corresponding to the 'false' will always jump to the false
-/// destination of the branch.
-///
-bool JumpThreading::ProcessBranchOnLogical(Value *V, BasicBlock *BB,
- bool isAnd) {
- // If this is a binary operator tree of the same AND/OR opcode, check the
- // LHS/RHS.
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(V))
- if ((isAnd && BO->getOpcode() == Instruction::And) ||
- (!isAnd && BO->getOpcode() == Instruction::Or)) {
- if (ProcessBranchOnLogical(BO->getOperand(0), BB, isAnd))
- return true;
- if (ProcessBranchOnLogical(BO->getOperand(1), BB, isAnd))
- return true;
- }
-
- // If this isn't a PHI node, we can't handle it.
- PHINode *PN = dyn_cast<PHINode>(V);
- if (!PN || PN->getParent() != BB) return false;
-
- // We can only do the simplification for phi nodes of 'false' with AND or
- // 'true' with OR. See if we have any entries in the phi for this.
- unsigned PredNo = ~0U;
- ConstantInt *PredCst = ConstantInt::get(Type::getInt1Ty(BB->getContext()),
- !isAnd);
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- if (PN->getIncomingValue(i) == PredCst) {
- PredNo = i;
- break;
- }
- }
-
- // If no match, bail out.
- if (PredNo == ~0U)
- return false;
-
- // If so, we can actually do this threading. Merge any common predecessors
- // that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredCst);
-
- // Next, figure out which successor we are threading to. If this was an AND,
- // the constant must be FALSE, and we must be targeting the 'false' block.
- // If this is an OR, the constant must be TRUE, and we must be targeting the
- // 'true' block.
- BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(isAnd);
-
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
-}
-
-/// GetResultOfComparison - Given an icmp/fcmp predicate and the left and right
-/// hand sides of the compare instruction, try to determine the result. If the
-/// result can not be determined, a null pointer is returned.
-static Constant *GetResultOfComparison(CmpInst::Predicate pred,
- Value *LHS, Value *RHS,
- LLVMContext &Context) {
- if (Constant *CLHS = dyn_cast<Constant>(LHS))
- if (Constant *CRHS = dyn_cast<Constant>(RHS))
- return ConstantExpr::getCompare(pred, CLHS, CRHS);
-
- if (LHS == RHS)
- if (isa<IntegerType>(LHS->getType()) || isa<PointerType>(LHS->getType()))
- return ICmpInst::isTrueWhenEqual(pred) ?
- ConstantInt::getTrue(Context) : ConstantInt::getFalse(Context);
-
- return 0;
-}
-
-/// ProcessBranchOnCompare - We found a branch on a comparison between a phi
-/// node and a value. If we can identify when the comparison is true between
-/// the phi inputs and the value, we can fold the compare for that edge and
-/// thread through it.
-bool JumpThreading::ProcessBranchOnCompare(CmpInst *Cmp, BasicBlock *BB) {
- PHINode *PN = cast<PHINode>(Cmp->getOperand(0));
- Value *RHS = Cmp->getOperand(1);
-
- // If the phi isn't in the current block, an incoming edge to this block
- // doesn't control the destination.
- if (PN->getParent() != BB)
- return false;
-
- // We can do this simplification if any comparisons fold to true or false.
- // See if any do.
- Value *PredVal = 0;
- bool TrueDirection = false;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) {
- PredVal = PN->getIncomingValue(i);
-
- Constant *Res = GetResultOfComparison(Cmp->getPredicate(), PredVal,
- RHS, Cmp->getContext());
- if (!Res) {
- PredVal = 0;
- continue;
- }
-
- // If this folded to a constant expr, we can't do anything.
- if (ConstantInt *ResC = dyn_cast<ConstantInt>(Res)) {
- TrueDirection = ResC->getZExtValue();
- break;
- }
- // If this folded to undef, just go the false way.
- if (isa<UndefValue>(Res)) {
- TrueDirection = false;
- break;
- }
-
- // Otherwise, we can't fold this input.
- PredVal = 0;
- }
-
- // If no match, bail out.
- if (PredVal == 0)
- return false;
-
- // If so, we can actually do this threading. Merge any common predecessors
- // that will act the same.
- BasicBlock *PredBB = FactorCommonPHIPreds(PN, PredVal);
-
- // Next, get our successor.
- BasicBlock *SuccBB = BB->getTerminator()->getSuccessor(!TrueDirection);
-
- // Ok, try to thread it!
- return ThreadEdge(BB, PredBB, SuccBB);
-}
-
-
/// AddPHINodeEntriesForMappedBlock - We're adding 'NewPred' as a new
/// predecessor to the PHIBB block. If it has PHI nodes, add entries for
/// NewPred using the entries from OldPred (suitably mapped).
@@ -914,10 +1113,11 @@ static void AddPHINodeEntriesForMappedBlock(BasicBlock *PHIBB,
}
}
-/// ThreadEdge - We have decided that it is safe and profitable to thread an
-/// edge from PredBB to SuccBB across BB. Transform the IR to reflect this
-/// change.
-bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
+/// ThreadEdge - We have decided that it is safe and profitable to factor the
+/// blocks in PredBBs to one predecessor, then thread an edge from it to SuccBB
+/// across BB. Transform the IR to reflect this change.
+bool JumpThreading::ThreadEdge(BasicBlock *BB,
+ const SmallVectorImpl<BasicBlock*> &PredBBs,
BasicBlock *SuccBB) {
// If threading to the same block as we come from, we would infinite loop.
if (SuccBB == BB) {
@@ -929,8 +1129,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
// If threading this would thread across a loop header, don't thread the edge.
// See the comments above FindLoopHeaders for justifications and caveats.
if (LoopHeaders.count(BB)) {
- DEBUG(errs() << " Not threading from '" << PredBB->getName()
- << "' across loop header BB '" << BB->getName()
+ DEBUG(errs() << " Not threading across loop header BB '" << BB->getName()
<< "' to dest BB '" << SuccBB->getName()
<< "' - it might create an irreducible loop!\n");
return false;
@@ -943,6 +1142,17 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
return false;
}
+ // And finally, do it! Start by factoring the predecessors is needed.
+ BasicBlock *PredBB;
+ if (PredBBs.size() == 1)
+ PredBB = PredBBs[0];
+ else {
+ DEBUG(errs() << " Factoring out " << PredBBs.size()
+ << " common predecessors.\n");
+ PredBB = SplitBlockPredecessors(BB, &PredBBs[0], PredBBs.size(),
+ ".thr_comm", this);
+ }
+
// And finally, do it!
DEBUG(errs() << " Threading edge from '" << PredBB->getName() << "' to '"
<< SuccBB->getName() << "' with cost: " << JumpThreadCost
@@ -1034,7 +1244,7 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
TerminatorInst *PredTerm = PredBB->getTerminator();
for (unsigned i = 0, e = PredTerm->getNumSuccessors(); i != e; ++i)
if (PredTerm->getSuccessor(i) == BB) {
- BB->removePredecessor(PredBB);
+ RemovePredecessorAndSimplify(BB, PredBB, TD);
PredTerm->setSuccessor(i, NewBB);
}
@@ -1044,9 +1254,12 @@ bool JumpThreading::ThreadEdge(BasicBlock *BB, BasicBlock *PredBB,
BI = NewBB->begin();
for (BasicBlock::iterator E = NewBB->end(); BI != E; ) {
Instruction *Inst = BI++;
- if (Constant *C = ConstantFoldInstruction(Inst, BB->getContext(), TD)) {
- Inst->replaceAllUsesWith(C);
- Inst->eraseFromParent();
+
+ if (Value *V = SimplifyInstruction(Inst, TD)) {
+ WeakVH BIHandle(BI);
+ ReplaceAndSimplifyAllUses(Inst, V, TD);
+ if (BIHandle == 0)
+ BI = NewBB->begin();
continue;
}
@@ -1164,7 +1377,7 @@ bool JumpThreading::DuplicateCondBranchOnPHIIntoPred(BasicBlock *BB,
// PredBB no longer jumps to BB, remove entries in the PHI node for the edge
// that we nuked.
- BB->removePredecessor(PredBB);
+ RemovePredecessorAndSimplify(BB, PredBB, TD);
// Remove the unconditional branch at the end of the PredBB block.
OldPredBranch->eraseFromParent();
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 756fbf3..104c873 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -263,7 +263,6 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// Get the preheader block to move instructions into...
Preheader = L->getLoopPreheader();
- assert(Preheader&&"Preheader insertion pass guarantees we have a preheader!");
// Loop over the body of this loop, looking for calls, invokes, and stores.
// Because subloops have already been incorporated into AST, we skip blocks in
@@ -286,12 +285,14 @@ bool LICM::runOnLoop(Loop *L, LPPassManager &LPM) {
// us to sink instructions in one pass, without iteration. After sinking
// instructions, we perform another pass to hoist them out of the loop.
//
- SinkRegion(DT->getNode(L->getHeader()));
- HoistRegion(DT->getNode(L->getHeader()));
+ if (L->hasDedicatedExits())
+ SinkRegion(DT->getNode(L->getHeader()));
+ if (Preheader)
+ HoistRegion(DT->getNode(L->getHeader()));
// Now that all loop invariants have been removed from the loop, promote any
// memory references to scalars that we can...
- if (!DisablePromotion)
+ if (!DisablePromotion && Preheader && L->hasDedicatedExits())
PromoteValuesInLoop();
// Clear out loops state information for the next iteration
diff --git a/lib/Transforms/Scalar/LoopDeletion.cpp b/lib/Transforms/Scalar/LoopDeletion.cpp
index 866d8b4..48817ab 100644
--- a/lib/Transforms/Scalar/LoopDeletion.cpp
+++ b/lib/Transforms/Scalar/LoopDeletion.cpp
@@ -115,6 +115,10 @@ bool LoopDeletion::runOnLoop(Loop* L, LPPassManager& LPM) {
if (!preheader)
return false;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->hasDedicatedExits())
+ return false;
+
// We can't remove loops that contain subloops. If the subloops were dead,
// they would already have been removed in earlier executions of this pass.
if (L->begin() != L->end())
diff --git a/lib/Transforms/Scalar/LoopIndexSplit.cpp b/lib/Transforms/Scalar/LoopIndexSplit.cpp
index 920d85c..8b6a233 100644
--- a/lib/Transforms/Scalar/LoopIndexSplit.cpp
+++ b/lib/Transforms/Scalar/LoopIndexSplit.cpp
@@ -209,6 +209,10 @@ bool LoopIndexSplit::runOnLoop(Loop *IncomingLoop, LPPassManager &LPM_Ref) {
L = IncomingLoop;
LPM = &LPM_Ref;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->isLoopSimplifyForm())
+ return false;
+
// FIXME - Nested loops make dominator info updates tricky.
if (!L->getSubLoops().empty())
return false;
diff --git a/lib/Transforms/Scalar/LoopRotation.cpp b/lib/Transforms/Scalar/LoopRotation.cpp
index 7a4bb35..5004483 100644
--- a/lib/Transforms/Scalar/LoopRotation.cpp
+++ b/lib/Transforms/Scalar/LoopRotation.cpp
@@ -15,7 +15,6 @@
#include "llvm/Transforms/Scalar.h"
#include "llvm/Function.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/ScalarEvolution.h"
@@ -49,6 +48,7 @@ namespace {
AU.addRequiredID(LCSSAID);
AU.addPreservedID(LCSSAID);
AU.addPreserved<ScalarEvolution>();
+ AU.addRequired<LoopInfo>();
AU.addPreserved<LoopInfo>();
AU.addPreserved<DominatorTree>();
AU.addPreserved<DominanceFrontier>();
@@ -104,17 +104,18 @@ bool LoopRotate::runOnLoop(Loop *Lp, LPPassManager &LPM) {
bool LoopRotate::rotateLoop(Loop *Lp, LPPassManager &LPM) {
L = Lp;
- OrigHeader = L->getHeader();
OrigPreHeader = L->getLoopPreheader();
+ if (!OrigPreHeader) return false;
+
OrigLatch = L->getLoopLatch();
+ if (!OrigLatch) return false;
+
+ OrigHeader = L->getHeader();
// If the loop has only one block then there is not much to rotate.
if (L->getBlocks().size() == 1)
return false;
- assert(OrigHeader && OrigLatch && OrigPreHeader &&
- "Loop is not in canonical form");
-
// If the loop header is not one of the loop exiting blocks then
// either this loop is already rotated or it is not
// suitable for loop rotation transformations.
@@ -287,7 +288,7 @@ void LoopRotate::preserveCanonicalLoopForm(LPPassManager &LPM) {
"bb.nph",
OrigHeader->getParent(),
NewHeader);
- LoopInfo &LI = LPM.getAnalysis<LoopInfo>();
+ LoopInfo &LI = getAnalysis<LoopInfo>();
if (Loop *PL = LI.getLoopFor(OrigPreHeader))
PL->addBasicBlockToLoop(NewPreHeader, LI.getBase());
BranchInst::Create(NewHeader, NewPreHeader);
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index e20fb16..564c7ac 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -51,6 +51,7 @@ STATISTIC(NumEliminated, "Number of strides eliminated");
STATISTIC(NumShadow, "Number of Shadow IVs optimized");
STATISTIC(NumImmSunk, "Number of common expr immediates sunk into uses");
STATISTIC(NumLoopCond, "Number of loop terminating conds optimized");
+STATISTIC(NumCountZero, "Number of count iv optimized to count toward zero");
static cl::opt<bool> EnableFullLSRMode("enable-full-lsr",
cl::init(false),
@@ -107,7 +108,7 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
- explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
+ explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
LoopPass(&ID), TLI(tli) {
}
@@ -131,12 +132,10 @@ namespace {
}
private:
- ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
- const SCEV *const * &CondStride);
-
void OptimizeIndvars(Loop *L);
- void OptimizeLoopCountIV(Loop *L);
+
+ /// OptimizeLoopTermCond - Change loop terminating condition to use the
+ /// postinc iv when possible.
void OptimizeLoopTermCond(Loop *L);
/// OptimizeShadowIV - If IV is used in a int-to-float cast
@@ -148,8 +147,28 @@ namespace {
ICmpInst *OptimizeMax(Loop *L, ICmpInst *Cond,
IVStrideUse* &CondUse);
+ /// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for
+ /// deciding when to exit the loop is used only for that purpose, try to
+ /// rearrange things so it counts down to a test against zero.
+ bool OptimizeLoopCountIV(Loop *L);
+ bool OptimizeLoopCountIVOfStride(const SCEV* &Stride,
+ IVStrideUse* &CondUse, Loop *L);
+
+ /// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a
+ /// single stride of IV. All of the users may have different starting
+ /// values, and this may not be the only stride.
+ void StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L);
+ void StrengthReduceIVUsers(Loop *L);
+
+ ICmpInst *ChangeCompareStride(Loop *L, ICmpInst *Cond,
+ IVStrideUse* &CondUse,
+ const SCEV* &CondStride,
+ bool PostPass = false);
+
bool FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
- const SCEV *const * &CondStride);
+ const SCEV* &CondStride);
bool RequiresTypeConversion(const Type *Ty, const Type *NewTy);
const SCEV *CheckForIVReuse(bool, bool, bool, const SCEV *const&,
IVExpr&, const Type*,
@@ -164,6 +183,7 @@ namespace {
bool &AllUsesAreAddresses,
bool &AllUsesAreOutsideLoop,
std::vector<BasedUser> &UsersToProcess);
+ bool StrideMightBeShared(const SCEV *Stride, Loop *L, bool CheckPreInc);
bool ShouldUseFullStrengthReductionMode(
const std::vector<BasedUser> &UsersToProcess,
const Loop *L,
@@ -188,9 +208,7 @@ namespace {
Instruction *IVIncInsertPt,
const Loop *L,
SCEVExpander &PreheaderRewriter);
- void StrengthReduceStridedIVUsers(const SCEV *const &Stride,
- IVUsersOfOneStride &Uses,
- Loop *L);
+
void DeleteTriviallyDeadInstructions();
};
}
@@ -208,11 +226,11 @@ Pass *llvm::createLoopStrengthReducePass(const TargetLowering *TLI) {
/// their operands subsequently dead.
void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
if (DeadInsts.empty()) return;
-
+
while (!DeadInsts.empty()) {
Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back());
DeadInsts.pop_back();
-
+
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
@@ -223,14 +241,14 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
DeadInsts.push_back(U);
}
}
-
+
I->eraseFromParent();
Changed = true;
}
}
-/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
-/// subexpression that is an AddRec from a loop other than L. An outer loop
+/// containsAddRecFromDifferentLoop - Determine whether expression S involves a
+/// subexpression that is an AddRec from a loop other than L. An outer loop
/// of L is OK, but not an inner loop nor a disjoint loop.
static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
// This is very common, put it first.
@@ -256,7 +274,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
containsAddRecFromDifferentLoop(DE->getRHS(), L);
#if 0
- // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
+ // SCEVSDivExpr has been backed out temporarily, but will be back; we'll
// need this when it is.
if (const SCEVSDivExpr *DE = dyn_cast<SCEVSDivExpr>(S))
return containsAddRecFromDifferentLoop(DE->getLHS(), L) ||
@@ -328,7 +346,7 @@ namespace {
/// field to the Imm field (below). BasedUser values are sorted by this
/// field.
const SCEV *Base;
-
+
/// Inst - The instruction using the induction variable.
Instruction *Inst;
@@ -352,11 +370,11 @@ namespace {
// instruction for a loop and uses outside the loop that are dominated by
// the loop.
bool isUseOfPostIncrementedValue;
-
+
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
: SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
OperandValToReplace(IVSU.getOperandValToReplace()),
- Imm(SE->getIntegerSCEV(0, Base->getType())),
+ Imm(SE->getIntegerSCEV(0, Base->getType())),
isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
// Once we rewrite the code to insert the new IVs we want, update the
@@ -367,8 +385,8 @@ namespace {
SCEVExpander &Rewriter, Loop *L, Pass *P,
LoopInfo &LI,
SmallVectorImpl<WeakVH> &DeadInsts);
-
- Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+
+ Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L,
@@ -383,7 +401,7 @@ void BasedUser::dump() const {
errs() << " Inst: " << *Inst;
}
-Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
+Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
Instruction *IP, Loop *L,
@@ -393,10 +411,10 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
// want to insert this expression before the user, we'd rather pull it out as
// many loops as possible.
Instruction *BaseInsertPt = IP;
-
+
// Figure out the most-nested loop that IP is in.
Loop *InsertLoop = LI.getLoopFor(IP->getParent());
-
+
// If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
// the preheader of the outer-most loop where NewBase is not loop invariant.
if (L->contains(IP->getParent()))
@@ -404,7 +422,7 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
InsertLoop = InsertLoop->getParentLoop();
}
-
+
Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
const SCEV *NewValSCEV = SE->getUnknown(Base);
@@ -430,7 +448,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
-
+
// However, if the Operand is itself an instruction, the (potentially
// complex) inserted code may be shared by many users. Because of this, we
// want to emit code for the computation of the operand right before its old
@@ -442,7 +460,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
//
// If this is a use outside the loop (which means after, since it is based
// on a loop indvar) we use the post-incremented value, so that we don't
- // artificially make the preinc value live out the bottom of the loop.
+ // artificially make the preinc value live out the bottom of the loop.
if (!isUseOfPostIncrementedValue && L->contains(Inst->getParent())) {
if (NewBasePt && isa<PHINode>(OperandValToReplace)) {
InsertPt = NewBasePt;
@@ -477,7 +495,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
if (PN->getIncomingValue(i) == OperandValToReplace) {
// If the original expression is outside the loop, put the replacement
// code in the same place as the original expression,
- // which need not be an immediate predecessor of this PHI. This way we
+ // which need not be an immediate predecessor of this PHI. This way we
// need only one copy of it even if it is referenced multiple times in
// the PHI. We don't do this when the original expression is inside the
// loop because multiple copies sometimes do useful sinking of code in
@@ -490,6 +508,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
// is the canonical backedge for this loop, as this can make some
// inserted code be in an illegal position.
if (e != 1 && PHIPred->getTerminator()->getNumSuccessors() > 1 &&
+ !isa<IndirectBrInst>(PHIPred->getTerminator()) &&
(PN->getParent() != L->getHeader() || !L->contains(PHIPred))) {
// First step, split the critical edge.
@@ -572,11 +591,11 @@ static bool fitsInAddressMode(const SCEV *const &V, const Type *AccessTy,
static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
Loop *L, ScalarEvolution *SE) {
if (Val->isLoopInvariant(L)) return; // Nothing to do.
-
+
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
-
+
for (unsigned i = 0; i != SAE->getNumOperands(); ++i)
if (!SAE->getOperand(i)->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
@@ -594,7 +613,7 @@ static void MoveLoopVariantsToImmediateField(const SCEV *&Val, const SCEV *&Imm,
// Try to pull immediates out of the start value of nested addrec's.
const SCEV *Start = SARE->getStart();
MoveLoopVariantsToImmediateField(Start, Imm, L, SE);
-
+
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
Val = SE->getAddRecExpr(Ops, SARE->getLoop());
@@ -617,11 +636,11 @@ static void MoveImmediateValues(const TargetLowering *TLI,
if (const SCEVAddExpr *SAE = dyn_cast<SCEVAddExpr>(Val)) {
SmallVector<const SCEV *, 4> NewOps;
NewOps.reserve(SAE->getNumOperands());
-
+
for (unsigned i = 0; i != SAE->getNumOperands(); ++i) {
const SCEV *NewOp = SAE->getOperand(i);
MoveImmediateValues(TLI, AccessTy, NewOp, Imm, isAddress, L, SE);
-
+
if (!NewOp->isLoopInvariant(L)) {
// If this is a loop-variant expression, it must stay in the immediate
// field of the expression.
@@ -640,7 +659,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
// Try to pull immediates out of the start value of nested addrec's.
const SCEV *Start = SARE->getStart();
MoveImmediateValues(TLI, AccessTy, Start, Imm, isAddress, L, SE);
-
+
if (Start != SARE->getStart()) {
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Start;
@@ -656,8 +675,8 @@ static void MoveImmediateValues(const TargetLowering *TLI,
const SCEV *SubImm = SE->getIntegerSCEV(0, Val->getType());
const SCEV *NewOp = SME->getOperand(1);
MoveImmediateValues(TLI, AccessTy, NewOp, SubImm, isAddress, L, SE);
-
- // If we extracted something out of the subexpressions, see if we can
+
+ // If we extracted something out of the subexpressions, see if we can
// simplify this!
if (NewOp != SME->getOperand(1)) {
// Scale SubImm up by "8". If the result is a target constant, we are
@@ -666,7 +685,7 @@ static void MoveImmediateValues(const TargetLowering *TLI,
if (fitsInAddressMode(SubImm, AccessTy, TLI, false)) {
// Accumulate the immediate.
Imm = SE->getAddExpr(Imm, SubImm);
-
+
// Update what is left of 'Val'.
Val = SE->getMulExpr(SME->getOperand(0), NewOp);
return;
@@ -714,7 +733,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
SmallVector<const SCEV *, 4> Ops(SARE->op_begin(), SARE->op_end());
Ops[0] = Zero; // Start with zero base.
SubExprs.push_back(SE->getAddRecExpr(Ops, SARE->getLoop()));
-
+
SeparateSubExprs(SubExprs, SARE->getOperand(0), SE);
}
@@ -724,7 +743,7 @@ static void SeparateSubExprs(SmallVector<const SCEV *, 16> &SubExprs,
}
}
-// This is logically local to the following function, but C++ says we have
+// This is logically local to the following function, but C++ says we have
// to make it file scope.
struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
@@ -762,7 +781,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// an addressing mode "for free"; such expressions are left within the loop.
// struct SubExprUseData { unsigned Count; bool notAllUsesAreFree; };
std::map<const SCEV *, SubExprUseData> SubExpressionUseData;
-
+
// UniqueSubExprs - Keep track of all of the subexpressions we see in the
// order we see them.
SmallVector<const SCEV *, 16> UniqueSubExprs;
@@ -779,7 +798,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
if (!L->contains(Uses[i].Inst->getParent()))
continue;
NumUsesInsideLoop++;
-
+
// If the base is zero (which is common), return zero now, there are no
// CSEs we can find.
if (Uses[i].Base == Zero) return Zero;
@@ -811,13 +830,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// Now that we know how many times each is used, build Result. Iterate over
// UniqueSubexprs so that we have a stable ordering.
for (unsigned i = 0, e = UniqueSubExprs.size(); i != e; ++i) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
+ std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(UniqueSubExprs[i]);
assert(I != SubExpressionUseData.end() && "Entry not found?");
- if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
+ if (I->second.Count == NumUsesInsideLoop) { // Found CSE!
if (I->second.notAllUsesAreFree)
Result = SE->getAddExpr(Result, I->first);
- else
+ else
FreeResult = SE->getAddExpr(FreeResult, I->first);
} else
// Remove non-cse's from SubExpressionUseData.
@@ -849,13 +868,13 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
// If we found no CSE's, return now.
if (Result == Zero) return Result;
-
+
// If we still have a FreeResult, remove its subexpressions from
// SubExpressionUseData. This means they will remain in the use Bases.
if (FreeResult != Zero) {
SeparateSubExprs(SubExprs, FreeResult, SE);
for (unsigned j = 0, e = SubExprs.size(); j != e; ++j) {
- std::map<const SCEV *, SubExprUseData>::iterator I =
+ std::map<const SCEV *, SubExprUseData>::iterator I =
SubExpressionUseData.find(SubExprs[j]);
SubExpressionUseData.erase(I);
}
@@ -882,7 +901,7 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
SubExprs.erase(SubExprs.begin()+j);
--j; --e;
}
-
+
// Finally, add the non-shared expressions together.
if (SubExprs.empty())
Uses[i].Base = Zero;
@@ -890,11 +909,11 @@ RemoveCommonExpressionsFromUseBases(std::vector<BasedUser> &Uses,
Uses[i].Base = SE->getAddExpr(SubExprs);
SubExprs.clear();
}
-
+
return Result;
}
-/// ValidScale - Check whether the given Scale is valid for all loads and
+/// ValidScale - Check whether the given Scale is valid for all loads and
/// stores in UsersToProcess.
///
bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
@@ -911,7 +930,7 @@ bool LoopStrengthReduce::ValidScale(bool HasBaseReg, int64_t Scale,
AccessTy = getAccessType(UsersToProcess[i].Inst);
else if (isa<PHINode>(UsersToProcess[i].Inst))
continue;
-
+
TargetLowering::AddrMode AM;
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(UsersToProcess[i].Imm))
AM.BaseOffs = SC->getValue()->getSExtValue();
@@ -983,13 +1002,13 @@ bool LoopStrengthReduce::RequiresTypeConversion(const Type *Ty1,
/// reuse is possible. Factors can be negative on same targets, e.g. ARM.
///
/// If all uses are outside the loop, we don't require that all multiplies
-/// be folded into the addressing mode, nor even that the factor be constant;
-/// a multiply (executed once) outside the loop is better than another IV
+/// be folded into the addressing mode, nor even that the factor be constant;
+/// a multiply (executed once) outside the loop is better than another IV
/// within. Well, usually.
const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
bool AllUsesAreAddresses,
bool AllUsesAreOutsideLoop,
- const SCEV *const &Stride,
+ const SCEV *const &Stride,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
if (StrideNoReuse.count(Stride))
@@ -999,11 +1018,16 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
StrideNoReuse.count(SI->first))
continue;
+ // The other stride has no uses, don't reuse it.
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
+ IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
+ if (UI->second->Users.empty())
+ continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SI->first != Stride &&
(unsigned(abs64(SInt)) < SSInt || (SInt % SSInt) != 0))
@@ -1052,7 +1076,7 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
// an existing IV if we can.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
continue;
@@ -1072,9 +1096,9 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
// -1*old.
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
- std::map<const SCEV *, IVsOfOneStride>::iterator SI =
+ std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
- if (SI == IVsByStride.end())
+ if (SI == IVsByStride.end())
continue;
if (const SCEVMulExpr *ME = dyn_cast<SCEVMulExpr>(SI->first))
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(ME->getOperand(0)))
@@ -1104,18 +1128,18 @@ static bool PartitionByIsUseOfPostIncrementedValue(const BasedUser &Val) {
static bool isNonConstantNegative(const SCEV *const &Expr) {
const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(Expr);
if (!Mul) return false;
-
+
// If there is a constant factor, it will be first.
const SCEVConstant *SC = dyn_cast<SCEVConstant>(Mul->getOperand(0));
if (!SC) return false;
-
+
// Return true if the value is negative, this matches things like (-42 * V).
return SC->getValue()->getValue().isNegative();
}
/// CollectIVUsers - Transform our list of users and offsets to a bit more
-/// complex table. In this new vector, each 'BasedUser' contains 'Base', the base
-/// of the strided accesses, as well as the old information from Uses. We
+/// complex table. In this new vector, each 'BasedUser' contains 'Base', the
+/// base of the strided accesses, as well as the old information from Uses. We
/// progressively move information from the Base field to the Imm field, until
/// we eventually have the full access expression to rewrite the use.
const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
@@ -1145,7 +1169,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
// We now have a whole bunch of uses of like-strided induction variables, but
// they might all have different bases. We want to emit one PHI node for this
// stride which we fold as many common expressions (between the IVs) into as
- // possible. Start by identifying the common expressions in the base values
+ // possible. Start by identifying the common expressions in the base values
// for the strides (e.g. if we have "A+C+B" and "A+B+D" as our bases, find
// "A+B"), emit it to the preheader, then remove the expression from the
// UsersToProcess base values.
@@ -1165,11 +1189,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
if (!L->contains(UsersToProcess[i].Inst->getParent())) {
UsersToProcess[i].Imm = SE->getAddExpr(UsersToProcess[i].Imm,
UsersToProcess[i].Base);
- UsersToProcess[i].Base =
+ UsersToProcess[i].Base =
SE->getIntegerSCEV(0, UsersToProcess[i].Base->getType());
} else {
// Not all uses are outside the loop.
- AllUsesAreOutsideLoop = false;
+ AllUsesAreOutsideLoop = false;
// Addressing modes can be folded into loads and stores. Be careful that
// the store is through the expression, not of the expression though.
@@ -1183,11 +1207,11 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
if (isAddress)
HasAddress = true;
-
+
// If this use isn't an address, then not all uses are addresses.
if (!isAddress && !isPHI)
AllUsesAreAddresses = false;
-
+
MoveImmediateValues(TLI, UsersToProcess[i].Inst, UsersToProcess[i].Base,
UsersToProcess[i].Imm, isAddress, L, SE);
}
@@ -1198,7 +1222,7 @@ const SCEV *LoopStrengthReduce::CollectIVUsers(const SCEV *const &Stride,
// for one fewer iv.
if (NumPHI > 1)
AllUsesAreAddresses = false;
-
+
// There are no in-loop address uses.
if (AllUsesAreAddresses && (!HasAddress && !AllUsesAreOutsideLoop))
AllUsesAreAddresses = false;
@@ -1491,12 +1515,13 @@ static bool IsImmFoldedIntoAddrMode(GlobalValue *GV, int64_t Offset,
return true;
}
-/// StrengthReduceStridedIVUsers - Strength reduce all of the users of a single
+/// StrengthReduceIVUsersOfStride - Strength reduce all of the users of a single
/// stride of IV. All of the users may have different starting values, and this
/// may not be the only stride.
-void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
- IVUsersOfOneStride &Uses,
- Loop *L) {
+void
+LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
+ IVUsersOfOneStride &Uses,
+ Loop *L) {
// If all the users are moved to another stride, then there is nothing to do.
if (Uses.Users.empty())
return;
@@ -1518,8 +1543,8 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// have the full access expression to rewrite the use.
std::vector<BasedUser> UsersToProcess;
const SCEV *CommonExprs = CollectIVUsers(Stride, Uses, L, AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
// Sort the UsersToProcess array so that users with common bases are
// next to each other.
@@ -1588,12 +1613,12 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
const SCEV *RewriteFactor = SE->getIntegerSCEV(0, ReplacedTy);
IVExpr ReuseIV(SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
- SE->getIntegerSCEV(0,
+ SE->getIntegerSCEV(0,
Type::getInt32Ty(Preheader->getContext())),
0);
- /// Choose a strength-reduction strategy and prepare for it by creating
- /// the necessary PHIs and adjusting the bookkeeping.
+ // Choose a strength-reduction strategy and prepare for it by creating
+ // the necessary PHIs and adjusting the bookkeeping.
if (ShouldUseFullStrengthReductionMode(UsersToProcess, L,
AllUsesAreAddresses, Stride)) {
PrepareToStrengthReduceFully(UsersToProcess, Stride, CommonExprs, L,
@@ -1606,7 +1631,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// If all uses are addresses, check if it is possible to reuse an IV. The
// new IV must have a stride that is a multiple of the old stride; the
// multiple must be a number that can be encoded in the scale field of the
- // target addressing mode; and we must have a valid instruction after this
+ // target addressing mode; and we must have a valid instruction after this
// substitution, including the immediate field, if any.
RewriteFactor = CheckForIVReuse(HaveCommonExprs, AllUsesAreAddresses,
AllUsesAreOutsideLoop,
@@ -1649,7 +1674,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// We want this constant emitted into the preheader! This is just
// using cast as a copy so BitCast (no-op cast) is appropriate
BaseV = new BitCastInst(BaseV, BaseV->getType(), "preheaderinsert",
- PreInsertPt);
+ PreInsertPt);
}
}
@@ -1723,7 +1748,7 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
assert(SE->getTypeSizeInBits(RewriteExpr->getType()) <
SE->getTypeSizeInBits(ReuseIV.Base->getType()) &&
"Unexpected lengthening conversion!");
- typedBase = SE->getTruncateExpr(ReuseIV.Base,
+ typedBase = SE->getTruncateExpr(ReuseIV.Base,
RewriteExpr->getType());
}
RewriteExpr = SE->getMinusSCEV(RewriteExpr, typedBase);
@@ -1775,11 +1800,29 @@ void LoopStrengthReduce::StrengthReduceStridedIVUsers(const SCEV *const &Stride,
// different starting values, into different PHIs.
}
+void LoopStrengthReduce::StrengthReduceIVUsers(Loop *L) {
+ // Note: this processes each stride/type pair individually. All users
+ // passed into StrengthReduceIVUsersOfStride have the same type AND stride.
+ // Also, note that we iterate over IVUsesByStride indirectly by using
+ // StrideOrder. This extra layer of indirection makes the ordering of
+ // strides deterministic - not dependent on map order.
+ for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e; ++Stride) {
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SI->first->isLoopInvariant(L))
+ continue;
+ StrengthReduceIVUsersOfStride(SI->first, *SI->second, L);
+ }
+}
+
/// FindIVUserForCond - If Cond has an operand that is an expression of an IV,
/// set the IV user and stride information and return true, otherwise return
/// false.
-bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse,
- const SCEV *const * &CondStride) {
+bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond,
+ IVStrideUse *&CondUse,
+ const SCEV* &CondStride) {
for (unsigned Stride = 0, e = IU->StrideOrder.size();
Stride != e && !CondUse; ++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
@@ -1793,12 +1836,12 @@ bool LoopStrengthReduce::FindIVUserForCond(ICmpInst *Cond, IVStrideUse *&CondUse
// InstCombine does it as well for simple uses, it's not clear that it
// occurs enough in real life to handle.
CondUse = UI;
- CondStride = &SI->first;
+ CondStride = SI->first;
return true;
}
}
return false;
-}
+}
namespace {
// Constant strides come first which in turns are sorted by their absolute
@@ -1851,8 +1894,9 @@ namespace {
/// v1 = v1 + 3
/// if (v1 < 30) goto loop
ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
- IVStrideUse* &CondUse,
- const SCEV *const* &CondStride) {
+ IVStrideUse* &CondUse,
+ const SCEV* &CondStride,
+ bool PostPass) {
// If there's only one stride in the loop, there's nothing to do here.
if (IU->StrideOrder.size() < 2)
return Cond;
@@ -1860,23 +1904,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
// trying to change the condition because the stride will still
// remain.
std::map<const SCEV *, IVUsersOfOneStride *>::iterator I =
- IU->IVUsesByStride.find(*CondStride);
- if (I == IU->IVUsesByStride.end() ||
- I->second->Users.size() != 1)
+ IU->IVUsesByStride.find(CondStride);
+ if (I == IU->IVUsesByStride.end())
return Cond;
+ if (I->second->Users.size() > 1) {
+ for (ilist<IVStrideUse>::iterator II = I->second->Users.begin(),
+ EE = I->second->Users.end(); II != EE; ++II) {
+ if (II->getUser() == Cond)
+ continue;
+ if (!isInstructionTriviallyDead(II->getUser()))
+ return Cond;
+ }
+ }
// Only handle constant strides for now.
- const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride);
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(CondStride);
if (!SC) return Cond;
ICmpInst::Predicate Predicate = Cond->getPredicate();
int64_t CmpSSInt = SC->getValue()->getSExtValue();
- unsigned BitWidth = SE->getTypeSizeInBits((*CondStride)->getType());
+ unsigned BitWidth = SE->getTypeSizeInBits(CondStride->getType());
uint64_t SignBit = 1ULL << (BitWidth-1);
const Type *CmpTy = Cond->getOperand(0)->getType();
const Type *NewCmpTy = NULL;
unsigned TyBits = SE->getTypeSizeInBits(CmpTy);
unsigned NewTyBits = 0;
- const SCEV **NewStride = NULL;
+ const SCEV *NewStride = NULL;
Value *NewCmpLHS = NULL;
Value *NewCmpRHS = NULL;
int64_t Scale = 1;
@@ -1885,16 +1937,31 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (ConstantInt *C = dyn_cast<ConstantInt>(Cond->getOperand(1))) {
int64_t CmpVal = C->getValue().getSExtValue();
+ // Check the relevant induction variable for conformance to
+ // the pattern.
+ const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine())
+ return Cond;
+
+ const SCEVConstant *StartC = dyn_cast<SCEVConstant>(AR->getStart());
// Check stride constant and the comparision constant signs to detect
// overflow.
- if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
- return Cond;
+ if (StartC) {
+ if ((StartC->getValue()->getSExtValue() < CmpVal && CmpSSInt < 0) ||
+ (StartC->getValue()->getSExtValue() > CmpVal && CmpSSInt > 0))
+ return Cond;
+ } else {
+ // More restrictive check for the other cases.
+ if ((CmpVal & SignBit) != (CmpSSInt & SignBit))
+ return Cond;
+ }
// Look for a suitable stride / iv as replacement.
for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
IU->IVUsesByStride.find(IU->StrideOrder[i]);
- if (!isa<SCEVConstant>(SI->first))
+ if (!isa<SCEVConstant>(SI->first) || SI->second->Users.empty())
continue;
int64_t SSInt = cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
if (SSInt == CmpSSInt ||
@@ -1904,6 +1971,14 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Scale = SSInt / CmpSSInt;
int64_t NewCmpVal = CmpVal * Scale;
+
+ // If old icmp value fits in icmp immediate field, but the new one doesn't
+ // try something else.
+ if (TLI &&
+ TLI->isLegalICmpImmediate(CmpVal) &&
+ !TLI->isLegalICmpImmediate(NewCmpVal))
+ continue;
+
APInt Mul = APInt(BitWidth*2, CmpVal, true);
Mul = Mul * APInt(BitWidth*2, Scale, true);
// Check for overflow.
@@ -1918,8 +1993,6 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
(CmpVal & SignBit) != (NewCmpVal & SignBit))
continue;
- if (NewCmpVal == CmpVal)
- continue;
// Pick the best iv to use trying to avoid a cast.
NewCmpLHS = NULL;
for (ilist<IVStrideUse>::iterator UI = SI->second->Users.begin(),
@@ -1969,19 +2042,21 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (NewTyBits != TyBits && !isa<SCEVConstant>(CondUse->getOffset()))
continue;
- bool AllUsesAreAddresses = true;
- bool AllUsesAreOutsideLoop = true;
- std::vector<BasedUser> UsersToProcess;
- const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
- AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
- // Avoid rewriting the compare instruction with an iv of new stride
- // if it's likely the new stride uses will be rewritten using the
- // stride of the compare instruction.
- if (AllUsesAreAddresses &&
- ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
- continue;
+ if (!PostPass) {
+ bool AllUsesAreAddresses = true;
+ bool AllUsesAreOutsideLoop = true;
+ std::vector<BasedUser> UsersToProcess;
+ const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+ AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
+ // Avoid rewriting the compare instruction with an iv of new stride
+ // if it's likely the new stride uses will be rewritten using the
+ // stride of the compare instruction.
+ if (AllUsesAreAddresses &&
+ ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
+ continue;
+ }
// Avoid rewriting the compare instruction with an iv which has
// implicit extension or truncation built into it.
@@ -1994,7 +2069,7 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
if (Scale < 0 && !Cond->isEquality())
Predicate = ICmpInst::getSwappedPredicate(Predicate);
- NewStride = &IU->StrideOrder[i];
+ NewStride = IU->StrideOrder[i];
if (!isa<PointerType>(NewCmpTy))
NewCmpRHS = ConstantInt::get(NewCmpTy, NewCmpVal);
else {
@@ -2031,13 +2106,16 @@ ICmpInst *LoopStrengthReduce::ChangeCompareStride(Loop *L, ICmpInst *Cond,
Cond = new ICmpInst(OldCond, Predicate, NewCmpLHS, NewCmpRHS,
L->getHeader()->getName() + ".termcond");
+ DEBUG(errs() << " Change compare stride in Inst " << *OldCond);
+ DEBUG(errs() << " to " << *Cond << '\n');
+
// Remove the old compare instruction. The old indvar is probably dead too.
DeadInsts.push_back(CondUse->getOperandValToReplace());
OldCond->replaceAllUsesWith(Cond);
OldCond->eraseFromParent();
- IU->IVUsesByStride[*NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
- CondUse = &IU->IVUsesByStride[*NewStride]->Users.back();
+ IU->IVUsesByStride[NewStride]->addUser(NewOffset, Cond, NewCmpLHS);
+ CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
CondStride = NewStride;
++NumEliminated;
Changed = true;
@@ -2180,7 +2258,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
return;
-
+
for (unsigned Stride = 0, e = IU->StrideOrder.size(); Stride != e;
++Stride) {
std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
@@ -2199,13 +2277,13 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
/* If shadow use is a int->float cast then insert a second IV
to eliminate this cast.
- for (unsigned i = 0; i < n; ++i)
+ for (unsigned i = 0; i < n; ++i)
foo((double)i);
is transformed into
double d = 0.0;
- for (unsigned i = 0; i < n; ++i, ++d)
+ for (unsigned i = 0; i < n; ++i, ++d)
foo(d);
*/
if (UIToFPInst *UCast = dyn_cast<UIToFPInst>(CandidateUI->getUser()))
@@ -2227,7 +2305,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
const Type *SrcTy = PH->getType();
int Mantissa = DestTy->getFPMantissaWidth();
- if (Mantissa == -1) continue;
+ if (Mantissa == -1) continue;
if ((int)SE->getTypeSizeInBits(SrcTy) > Mantissa)
continue;
@@ -2239,12 +2317,12 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
Entry = 1;
Latch = 0;
}
-
+
ConstantInt *Init = dyn_cast<ConstantInt>(PH->getIncomingValue(Entry));
if (!Init) continue;
Constant *NewInit = ConstantFP::get(DestTy, Init->getZExtValue());
- BinaryOperator *Incr =
+ BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(Latch));
if (!Incr) continue;
if (Incr->getOpcode() != Instruction::Add
@@ -2271,7 +2349,7 @@ void LoopStrengthReduce::OptimizeShadowIV(Loop *L) {
/* create new increment. '++d' in above example. */
Constant *CFP = ConstantFP::get(DestTy, C->getZExtValue());
- BinaryOperator *NewIncr =
+ BinaryOperator *NewIncr =
BinaryOperator::Create(Incr->getOpcode() == Instruction::Add ?
Instruction::FAdd : Instruction::FSub,
NewPH, CFP, "IV.S.next.", Incr);
@@ -2297,237 +2375,385 @@ void LoopStrengthReduce::OptimizeIndvars(Loop *L) {
OptimizeShadowIV(L);
}
-/// OptimizeLoopTermCond - Change loop terminating condition to use the
+bool LoopStrengthReduce::StrideMightBeShared(const SCEV* Stride, Loop *L,
+ bool CheckPreInc) {
+ int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(IU->StrideOrder[i]);
+ const SCEV *Share = SI->first;
+ if (!isa<SCEVConstant>(SI->first) || Share == Stride)
+ continue;
+ int64_t SSInt = cast<SCEVConstant>(Share)->getValue()->getSExtValue();
+ if (SSInt == SInt)
+ return true; // This can definitely be reused.
+ if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
+ continue;
+ int64_t Scale = SSInt / SInt;
+ bool AllUsesAreAddresses = true;
+ bool AllUsesAreOutsideLoop = true;
+ std::vector<BasedUser> UsersToProcess;
+ const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
+ AllUsesAreAddresses,
+ AllUsesAreOutsideLoop,
+ UsersToProcess);
+ if (AllUsesAreAddresses &&
+ ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess)) {
+ if (!CheckPreInc)
+ return true;
+ // Any pre-inc iv use?
+ IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[Share];
+ for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+ E = StrideUses.Users.end(); I != E; ++I) {
+ if (!I->isUseOfPostIncrementedValue())
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
+/// isUsedByExitBranch - Return true if icmp is used by a loop terminating
+/// conditional branch or it's and / or with other conditions before being used
+/// as the condition.
+static bool isUsedByExitBranch(ICmpInst *Cond, Loop *L) {
+ BasicBlock *CondBB = Cond->getParent();
+ if (!L->isLoopExiting(CondBB))
+ return false;
+ BranchInst *TermBr = dyn_cast<BranchInst>(CondBB->getTerminator());
+ if (!TermBr || !TermBr->isConditional())
+ return false;
+
+ Value *User = *Cond->use_begin();
+ Instruction *UserInst = dyn_cast<Instruction>(User);
+ while (UserInst &&
+ (UserInst->getOpcode() == Instruction::And ||
+ UserInst->getOpcode() == Instruction::Or)) {
+ if (!UserInst->hasOneUse() || UserInst->getParent() != CondBB)
+ return false;
+ User = *User->use_begin();
+ UserInst = dyn_cast<Instruction>(User);
+ }
+ return User == TermBr;
+}
+
+static bool ShouldCountToZero(ICmpInst *Cond, IVStrideUse* &CondUse,
+ ScalarEvolution *SE, Loop *L,
+ const TargetLowering *TLI = 0) {
+ if (!L->contains(Cond->getParent()))
+ return false;
+
+ if (!isa<SCEVConstant>(CondUse->getOffset()))
+ return false;
+
+ // Handle only tests for equality for the moment.
+ if (!Cond->isEquality() || !Cond->hasOneUse())
+ return false;
+ if (!isUsedByExitBranch(Cond, L))
+ return false;
+
+ Value *CondOp0 = Cond->getOperand(0);
+ const SCEV *IV = SE->getSCEV(CondOp0);
+ const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
+ if (!AR || !AR->isAffine())
+ return false;
+
+ const SCEVConstant *SC = dyn_cast<SCEVConstant>(AR->getStepRecurrence(*SE));
+ if (!SC || SC->getValue()->getSExtValue() < 0)
+ // If it's already counting down, don't do anything.
+ return false;
+
+ // If the RHS of the comparison is not an loop invariant, the rewrite
+ // cannot be done. Also bail out if it's already comparing against a zero.
+ // If we are checking this before cmp stride optimization, check if it's
+ // comparing against a already legal immediate.
+ Value *RHS = Cond->getOperand(1);
+ ConstantInt *RHSC = dyn_cast<ConstantInt>(RHS);
+ if (!L->isLoopInvariant(RHS) ||
+ (RHSC && RHSC->isZero()) ||
+ (RHSC && TLI && TLI->isLegalICmpImmediate(RHSC->getSExtValue())))
+ return false;
+
+ // Make sure the IV is only used for counting. Value may be preinc or
+ // postinc; 2 uses in either case.
+ if (!CondOp0->hasNUses(2))
+ return false;
+
+ return true;
+}
+
+/// OptimizeLoopTermCond - Change loop terminating condition to use the
/// postinc iv when possible.
void LoopStrengthReduce::OptimizeLoopTermCond(Loop *L) {
- // Finally, get the terminating condition for the loop if possible. If we
- // can, we want to change it to use a post-incremented version of its
- // induction variable, to allow coalescing the live ranges for the IV into
- // one register value.
BasicBlock *LatchBlock = L->getLoopLatch();
- BasicBlock *ExitingBlock = L->getExitingBlock();
-
- if (!ExitingBlock)
- // Multiple exits, just look at the exit in the latch block if there is one.
- ExitingBlock = LatchBlock;
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (!TermBr)
- return;
- if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
- return;
+ bool LatchExit = L->isLoopExiting(LatchBlock);
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
- // Search IVUsesByStride to find Cond's IVUse if there is one.
- IVStrideUse *CondUse = 0;
- const SCEV *const *CondStride = 0;
- ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
- if (!FindIVUserForCond(Cond, CondUse, CondStride))
- return; // setcc doesn't use the IV.
-
- if (ExitingBlock != LatchBlock) {
- if (!Cond->hasOneUse())
- // See below, we don't want the condition to be cloned.
- return;
-
- // If exiting block is the latch block, we know it's safe and profitable to
- // transform the icmp to use post-inc iv. Otherwise do so only if it would
- // not reuse another iv and its iv would be reused by other uses. We are
- // optimizing for the case where the icmp is the only use of the iv.
- IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[*CondStride];
- for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
- E = StrideUses.Users.end(); I != E; ++I) {
- if (I->getUser() == Cond)
- continue;
- if (!I->isUseOfPostIncrementedValue())
- return;
- }
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
+ BasicBlock *ExitingBlock = ExitingBlocks[i];
- // FIXME: This is expensive, and worse still ChangeCompareStride does a
- // similar check. Can we perform all the icmp related transformations after
- // StrengthReduceStridedIVUsers?
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(*CondStride)) {
- int64_t SInt = SC->getValue()->getSExtValue();
- for (unsigned NewStride = 0, ee = IU->StrideOrder.size(); NewStride != ee;
- ++NewStride) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[NewStride]);
- if (!isa<SCEVConstant>(SI->first) || SI->first == *CondStride)
- continue;
- int64_t SSInt =
- cast<SCEVConstant>(SI->first)->getValue()->getSExtValue();
- if (SSInt == SInt)
- return; // This can definitely be reused.
- if (unsigned(abs64(SSInt)) < SInt || (SSInt % SInt) != 0)
- continue;
- int64_t Scale = SSInt / SInt;
- bool AllUsesAreAddresses = true;
- bool AllUsesAreOutsideLoop = true;
- std::vector<BasedUser> UsersToProcess;
- const SCEV *CommonExprs = CollectIVUsers(SI->first, *SI->second, L,
- AllUsesAreAddresses,
- AllUsesAreOutsideLoop,
- UsersToProcess);
- // Avoid rewriting the compare instruction with an iv of new stride
- // if it's likely the new stride uses will be rewritten using the
- // stride of the compare instruction.
- if (AllUsesAreAddresses &&
- ValidScale(!CommonExprs->isZero(), Scale, UsersToProcess))
- return;
- }
- }
+ // Finally, get the terminating condition for the loop if possible. If we
+ // can, we want to change it to use a post-incremented version of its
+ // induction variable, to allow coalescing the live ranges for the IV into
+ // one register value.
- StrideNoReuse.insert(*CondStride);
- }
+ BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
+ if (!TermBr)
+ continue;
+ // FIXME: Overly conservative, termination condition could be an 'or' etc..
+ if (TermBr->isUnconditional() || !isa<ICmpInst>(TermBr->getCondition()))
+ continue;
- // If the trip count is computed in terms of a max (due to ScalarEvolution
- // being unable to find a sufficient guard, for example), change the loop
- // comparison to use SLT or ULT instead of NE.
- Cond = OptimizeMax(L, Cond, CondUse);
-
- // If possible, change stride and operands of the compare instruction to
- // eliminate one stride.
- if (ExitingBlock == LatchBlock)
- Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
-
- // It's possible for the setcc instruction to be anywhere in the loop, and
- // possible for it to have multiple users. If it is not immediately before
- // the latch block branch, move it.
- if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
- if (Cond->hasOneUse()) { // Condition has a single use, just move it.
- Cond->moveBefore(TermBr);
- } else {
- // Otherwise, clone the terminating condition and insert into the loopend.
- Cond = cast<ICmpInst>(Cond->clone());
- Cond->setName(L->getHeader()->getName() + ".termcond");
- LatchBlock->getInstList().insert(TermBr, Cond);
-
- // Clone the IVUse, as the old use still exists!
- IU->IVUsesByStride[*CondStride]->addUser(CondUse->getOffset(), Cond,
- CondUse->getOperandValToReplace());
- CondUse = &IU->IVUsesByStride[*CondStride]->Users.back();
+ // Search IVUsesByStride to find Cond's IVUse if there is one.
+ IVStrideUse *CondUse = 0;
+ const SCEV *CondStride = 0;
+ ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+ if (!FindIVUserForCond(Cond, CondUse, CondStride))
+ continue;
+
+ // If the latch block is exiting and it's not a single block loop, it's
+ // not safe to use postinc iv in other exiting blocks. FIXME: overly
+ // conservative? How about icmp stride optimization?
+ bool UsePostInc = !(e > 1 && LatchExit && ExitingBlock != LatchBlock);
+ if (UsePostInc && ExitingBlock != LatchBlock) {
+ if (!Cond->hasOneUse())
+ // See below, we don't want the condition to be cloned.
+ UsePostInc = false;
+ else {
+ // If exiting block is the latch block, we know it's safe and profitable
+ // to transform the icmp to use post-inc iv. Otherwise do so only if it
+ // would not reuse another iv and its iv would be reused by other uses.
+ // We are optimizing for the case where the icmp is the only use of the
+ // iv.
+ IVUsersOfOneStride &StrideUses = *IU->IVUsesByStride[CondStride];
+ for (ilist<IVStrideUse>::iterator I = StrideUses.Users.begin(),
+ E = StrideUses.Users.end(); I != E; ++I) {
+ if (I->getUser() == Cond)
+ continue;
+ if (!I->isUseOfPostIncrementedValue()) {
+ UsePostInc = false;
+ break;
+ }
+ }
+ }
+
+ // If iv for the stride might be shared and any of the users use pre-inc
+ // iv might be used, then it's not safe to use post-inc iv.
+ if (UsePostInc &&
+ isa<SCEVConstant>(CondStride) &&
+ StrideMightBeShared(CondStride, L, true))
+ UsePostInc = false;
}
- }
- // If we get to here, we know that we can transform the setcc instruction to
- // use the post-incremented version of the IV, allowing us to coalesce the
- // live ranges for the IV correctly.
- CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), *CondStride));
- CondUse->setIsUseOfPostIncrementedValue(true);
- Changed = true;
+ // If the trip count is computed in terms of a max (due to ScalarEvolution
+ // being unable to find a sufficient guard, for example), change the loop
+ // comparison to use SLT or ULT instead of NE.
+ Cond = OptimizeMax(L, Cond, CondUse);
+
+ // If possible, change stride and operands of the compare instruction to
+ // eliminate one stride. However, avoid rewriting the compare instruction
+ // with an iv of new stride if it's likely the new stride uses will be
+ // rewritten using the stride of the compare instruction.
+ if (ExitingBlock == LatchBlock && isa<SCEVConstant>(CondStride)) {
+ // If the condition stride is a constant and it's the only use, we might
+ // want to optimize it first by turning it to count toward zero.
+ if (!StrideMightBeShared(CondStride, L, false) &&
+ !ShouldCountToZero(Cond, CondUse, SE, L, TLI))
+ Cond = ChangeCompareStride(L, Cond, CondUse, CondStride);
+ }
- ++NumLoopCond;
-}
+ if (!UsePostInc)
+ continue;
-/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
-/// when to exit the loop is used only for that purpose, try to rearrange things
-/// so it counts down to a test against zero.
-void LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+ DEBUG(errs() << " Change loop exiting icmp to use postinc iv: "
+ << *Cond << '\n');
- // If the number of times the loop is executed isn't computable, give up.
- const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(L);
- if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
- return;
+ // It's possible for the setcc instruction to be anywhere in the loop, and
+ // possible for it to have multiple users. If it is not immediately before
+ // the exiting block branch, move it.
+ if (&*++BasicBlock::iterator(Cond) != (Instruction*)TermBr) {
+ if (Cond->hasOneUse()) { // Condition has a single use, just move it.
+ Cond->moveBefore(TermBr);
+ } else {
+ // Otherwise, clone the terminating condition and insert into the
+ // loopend.
+ Cond = cast<ICmpInst>(Cond->clone());
+ Cond->setName(L->getHeader()->getName() + ".termcond");
+ ExitingBlock->getInstList().insert(TermBr, Cond);
+
+ // Clone the IVUse, as the old use still exists!
+ IU->IVUsesByStride[CondStride]->addUser(CondUse->getOffset(), Cond,
+ CondUse->getOperandValToReplace());
+ CondUse = &IU->IVUsesByStride[CondStride]->Users.back();
+ }
+ }
- // Get the terminating condition for the loop if possible (this isn't
- // necessarily in the latch, or a block that's a predecessor of the header).
- if (!L->getExitBlock())
- return; // More than one loop exit blocks.
+ // If we get to here, we know that we can transform the setcc instruction to
+ // use the post-incremented version of the IV, allowing us to coalesce the
+ // live ranges for the IV correctly.
+ CondUse->setOffset(SE->getMinusSCEV(CondUse->getOffset(), CondStride));
+ CondUse->setIsUseOfPostIncrementedValue(true);
+ Changed = true;
- // Okay, there is one exit block. Try to find the condition that causes the
- // loop to be exited.
- BasicBlock *ExitingBlock = L->getExitingBlock();
- if (!ExitingBlock)
- return; // More than one block exiting!
+ ++NumLoopCond;
+ }
+}
- // Okay, we've computed the exiting block. See what condition causes us to
- // exit.
- //
- // FIXME: we should be able to handle switch instructions (with a single exit)
- BranchInst *TermBr = dyn_cast<BranchInst>(ExitingBlock->getTerminator());
- if (TermBr == 0) return;
- assert(TermBr->isConditional() && "If unconditional, it can't be in loop!");
- if (!isa<ICmpInst>(TermBr->getCondition()))
- return;
- ICmpInst *Cond = cast<ICmpInst>(TermBr->getCondition());
+bool LoopStrengthReduce::OptimizeLoopCountIVOfStride(const SCEV* &Stride,
+ IVStrideUse* &CondUse,
+ Loop *L) {
+ // If the only use is an icmp of a loop exiting conditional branch, then
+ // attempt the optimization.
+ BasedUser User = BasedUser(*CondUse, SE);
+ assert(isa<ICmpInst>(User.Inst) && "Expecting an ICMPInst!");
+ ICmpInst *Cond = cast<ICmpInst>(User.Inst);
+
+ // Less strict check now that compare stride optimization is done.
+ if (!ShouldCountToZero(Cond, CondUse, SE, L))
+ return false;
- // Handle only tests for equality for the moment, and only stride 1.
- if (Cond->getPredicate() != CmpInst::ICMP_EQ)
- return;
- const SCEV *IV = SE->getSCEV(Cond->getOperand(0));
- const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(IV);
- const SCEV *One = SE->getIntegerSCEV(1, BackedgeTakenCount->getType());
- if (!AR || !AR->isAffine() || AR->getStepRecurrence(*SE) != One)
- return;
- // If the RHS of the comparison is defined inside the loop, the rewrite
- // cannot be done.
- if (Instruction *CR = dyn_cast<Instruction>(Cond->getOperand(1)))
- if (L->contains(CR->getParent()))
- return;
+ Value *CondOp0 = Cond->getOperand(0);
+ PHINode *PHIExpr = dyn_cast<PHINode>(CondOp0);
+ Instruction *Incr;
+ if (!PHIExpr) {
+ // Value tested is postinc. Find the phi node.
+ Incr = dyn_cast<BinaryOperator>(CondOp0);
+ // FIXME: Just use User.OperandValToReplace here?
+ if (!Incr || Incr->getOpcode() != Instruction::Add)
+ return false;
- // Make sure the IV is only used for counting. Value may be preinc or
- // postinc; 2 uses in either case.
- if (!Cond->getOperand(0)->hasNUses(2))
- return;
- PHINode *phi = dyn_cast<PHINode>(Cond->getOperand(0));
- Instruction *incr;
- if (phi && phi->getParent()==L->getHeader()) {
- // value tested is preinc. Find the increment.
- // A CmpInst is not a BinaryOperator; we depend on this.
- Instruction::use_iterator UI = phi->use_begin();
- incr = dyn_cast<BinaryOperator>(UI);
- if (!incr)
- incr = dyn_cast<BinaryOperator>(++UI);
- // 1 use for postinc value, the phi. Unnecessarily conservative?
- if (!incr || !incr->hasOneUse() || incr->getOpcode()!=Instruction::Add)
- return;
- } else {
- // Value tested is postinc. Find the phi node.
- incr = dyn_cast<BinaryOperator>(Cond->getOperand(0));
- if (!incr || incr->getOpcode()!=Instruction::Add)
- return;
-
- Instruction::use_iterator UI = Cond->getOperand(0)->use_begin();
- phi = dyn_cast<PHINode>(UI);
- if (!phi)
- phi = dyn_cast<PHINode>(++UI);
+ PHIExpr = dyn_cast<PHINode>(Incr->getOperand(0));
+ if (!PHIExpr)
+ return false;
// 1 use for preinc value, the increment.
- if (!phi || phi->getParent()!=L->getHeader() || !phi->hasOneUse())
- return;
+ if (!PHIExpr->hasOneUse())
+ return false;
+ } else {
+ assert(isa<PHINode>(CondOp0) &&
+ "Unexpected loop exiting counting instruction sequence!");
+ PHIExpr = cast<PHINode>(CondOp0);
+ // Value tested is preinc. Find the increment.
+ // A CmpInst is not a BinaryOperator; we depend on this.
+ Instruction::use_iterator UI = PHIExpr->use_begin();
+ Incr = dyn_cast<BinaryOperator>(UI);
+ if (!Incr)
+ Incr = dyn_cast<BinaryOperator>(++UI);
+ // One use for postinc value, the phi. Unnecessarily conservative?
+ if (!Incr || !Incr->hasOneUse() || Incr->getOpcode() != Instruction::Add)
+ return false;
}
// Replace the increment with a decrement.
- BinaryOperator *decr =
- BinaryOperator::Create(Instruction::Sub, incr->getOperand(0),
- incr->getOperand(1), "tmp", incr);
- incr->replaceAllUsesWith(decr);
- incr->eraseFromParent();
+ DEBUG(errs() << "LSR: Examining use ");
+ DEBUG(WriteAsOperand(errs(), CondOp0, /*PrintType=*/false));
+ DEBUG(errs() << " in Inst: " << *Cond << '\n');
+ BinaryOperator *Decr = BinaryOperator::Create(Instruction::Sub,
+ Incr->getOperand(0), Incr->getOperand(1), "tmp", Incr);
+ Incr->replaceAllUsesWith(Decr);
+ Incr->eraseFromParent();
// Substitute endval-startval for the original startval, and 0 for the
- // original endval. Since we're only testing for equality this is OK even
+ // original endval. Since we're only testing for equality this is OK even
// if the computation wraps around.
BasicBlock *Preheader = L->getLoopPreheader();
Instruction *PreInsertPt = Preheader->getTerminator();
- int inBlock = L->contains(phi->getIncomingBlock(0)) ? 1 : 0;
- Value *startVal = phi->getIncomingValue(inBlock);
- Value *endVal = Cond->getOperand(1);
- // FIXME check for case where both are constant
+ unsigned InBlock = L->contains(PHIExpr->getIncomingBlock(0)) ? 1 : 0;
+ Value *StartVal = PHIExpr->getIncomingValue(InBlock);
+ Value *EndVal = Cond->getOperand(1);
+ DEBUG(errs() << " Optimize loop counting iv to count down ["
+ << *EndVal << " .. " << *StartVal << "]\n");
+
+ // FIXME: check for case where both are constant.
Constant* Zero = ConstantInt::get(Cond->getOperand(1)->getType(), 0);
- BinaryOperator *NewStartVal =
- BinaryOperator::Create(Instruction::Sub, endVal, startVal,
- "tmp", PreInsertPt);
- phi->setIncomingValue(inBlock, NewStartVal);
+ BinaryOperator *NewStartVal = BinaryOperator::Create(Instruction::Sub,
+ EndVal, StartVal, "tmp", PreInsertPt);
+ PHIExpr->setIncomingValue(InBlock, NewStartVal);
Cond->setOperand(1, Zero);
+ DEBUG(errs() << " New icmp: " << *Cond << "\n");
+
+ int64_t SInt = cast<SCEVConstant>(Stride)->getValue()->getSExtValue();
+ const SCEV *NewStride = 0;
+ bool Found = false;
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ const SCEV *OldStride = IU->StrideOrder[i];
+ if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(OldStride))
+ if (SC->getValue()->getSExtValue() == -SInt) {
+ Found = true;
+ NewStride = OldStride;
+ break;
+ }
+ }
+
+ if (!Found)
+ NewStride = SE->getIntegerSCEV(-SInt, Stride->getType());
+ IU->AddUser(NewStride, CondUse->getOffset(), Cond, Cond->getOperand(0));
+ IU->IVUsesByStride[Stride]->removeUser(CondUse);
+
+ CondUse = &IU->IVUsesByStride[NewStride]->Users.back();
+ Stride = NewStride;
- Changed = true;
+ ++NumCountZero;
+
+ return true;
}
-bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
+/// OptimizeLoopCountIV - If, after all sharing of IVs, the IV used for deciding
+/// when to exit the loop is used only for that purpose, try to rearrange things
+/// so it counts down to a test against zero.
+bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
+ bool ThisChanged = false;
+ for (unsigned i = 0, e = IU->StrideOrder.size(); i != e; ++i) {
+ const SCEV *Stride = IU->StrideOrder[i];
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
+ IU->IVUsesByStride.find(Stride);
+ assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SI->first->isLoopInvariant(L))
+ continue;
+ // If stride is a constant and it has an icmpinst use, check if we can
+ // optimize the loop to count down.
+ if (isa<SCEVConstant>(Stride) && SI->second->Users.size() == 1) {
+ Instruction *User = SI->second->Users.begin()->getUser();
+ if (!isa<ICmpInst>(User))
+ continue;
+ const SCEV *CondStride = Stride;
+ IVStrideUse *Use = &*SI->second->Users.begin();
+ if (!OptimizeLoopCountIVOfStride(CondStride, Use, L))
+ continue;
+ ThisChanged = true;
+ // Now check if it's possible to reuse this iv for other stride uses.
+ for (unsigned j = 0, ee = IU->StrideOrder.size(); j != ee; ++j) {
+ const SCEV *SStride = IU->StrideOrder[j];
+ if (SStride == CondStride)
+ continue;
+ std::map<const SCEV *, IVUsersOfOneStride *>::iterator SII =
+ IU->IVUsesByStride.find(SStride);
+ assert(SII != IU->IVUsesByStride.end() && "Stride doesn't exist!");
+ // FIXME: Generalize to non-affine IV's.
+ if (!SII->first->isLoopInvariant(L))
+ continue;
+ // FIXME: Rewrite other stride using CondStride.
+ }
+ }
+ }
+
+ Changed |= ThisChanged;
+ return ThisChanged;
+}
+
+bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
IU = &getAnalysis<IVUsers>();
LI = &getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
Changed = false;
+ // If LoopSimplify form is not available, stay out of trouble.
+ if (!L->getLoopPreheader() || !L->getLoopLatch())
+ return false;
+
if (!IU->IVUsesByStride.empty()) {
DEBUG(errs() << "\nLSR on \"" << L->getHeader()->getParent()->getName()
<< "\" ";
@@ -2545,7 +2771,7 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
// Change loop terminating condition to use the postinc iv when possible
// and optimize loop terminating compare. FIXME: Move this after
- // StrengthReduceStridedIVUsers?
+ // StrengthReduceIVUsersOfStride?
OptimizeLoopTermCond(L);
// FIXME: We can shrink overlarge IV's here. e.g. if the code has
@@ -2561,26 +2787,12 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
// IVsByStride keeps IVs for one particular loop.
assert(IVsByStride.empty() && "Stale entries in IVsByStride?");
- // Note: this processes each stride/type pair individually. All users
- // passed into StrengthReduceStridedIVUsers have the same type AND stride.
- // Also, note that we iterate over IVUsesByStride indirectly by using
- // StrideOrder. This extra layer of indirection makes the ordering of
- // strides deterministic - not dependent on map order.
- for (unsigned Stride = 0, e = IU->StrideOrder.size();
- Stride != e; ++Stride) {
- std::map<const SCEV *, IVUsersOfOneStride *>::iterator SI =
- IU->IVUsesByStride.find(IU->StrideOrder[Stride]);
- assert(SI != IU->IVUsesByStride.end() && "Stride doesn't exist!");
- // FIXME: Generalize to non-affine IV's.
- if (!SI->first->isLoopInvariant(L))
- continue;
- StrengthReduceStridedIVUsers(SI->first, *SI->second, L);
- }
- }
+ StrengthReduceIVUsers(L);
- // After all sharing is done, see if we can adjust the loop to test against
- // zero instead of counting up to a maximum. This is usually faster.
- OptimizeLoopCountIV(L);
+ // After all sharing is done, see if we can adjust the loop to test against
+ // zero instead of counting up to a maximum. This is usually faster.
+ OptimizeLoopCountIV(L);
+ }
// We're done analyzing this loop; release all the state we built up for it.
IVsByStride.clear();
diff --git a/lib/Transforms/Scalar/LoopUnroll.cpp b/lib/Transforms/Scalar/LoopUnroll.cpp
deleted file mode 100644
index 837ec59..0000000
--- a/lib/Transforms/Scalar/LoopUnroll.cpp
+++ /dev/null
@@ -1,177 +0,0 @@
-//===-- LoopUnroll.cpp - Loop unroller pass -------------------------------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This pass implements a simple loop unroller. It works best when loops have
-// been canonicalized by the -indvars pass, allowing it to determine the trip
-// counts of loops easily.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "loop-unroll"
-#include "llvm/IntrinsicInst.h"
-#include "llvm/Transforms/Scalar.h"
-#include "llvm/Analysis/LoopInfo.h"
-#include "llvm/Analysis/LoopPass.h"
-#include "llvm/Support/CommandLine.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Transforms/Utils/UnrollLoop.h"
-#include <climits>
-
-using namespace llvm;
-
-static cl::opt<unsigned>
-UnrollThreshold("unroll-threshold", cl::init(100), cl::Hidden,
- cl::desc("The cut-off point for automatic loop unrolling"));
-
-static cl::opt<unsigned>
-UnrollCount("unroll-count", cl::init(0), cl::Hidden,
- cl::desc("Use this unroll count for all loops, for testing purposes"));
-
-static cl::opt<bool>
-UnrollAllowPartial("unroll-allow-partial", cl::init(false), cl::Hidden,
- cl::desc("Allows loops to be partially unrolled until "
- "-unroll-threshold loop size is reached."));
-
-namespace {
- class LoopUnroll : public LoopPass {
- public:
- static char ID; // Pass ID, replacement for typeid
- LoopUnroll() : LoopPass(&ID) {}
-
- /// A magic value for use with the Threshold parameter to indicate
- /// that the loop unroll should be performed regardless of how much
- /// code expansion would result.
- static const unsigned NoThreshold = UINT_MAX;
-
- bool runOnLoop(Loop *L, LPPassManager &LPM);
-
- /// This transformation requires natural loop information & requires that
- /// loop preheaders be inserted into the CFG...
- ///
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.addRequiredID(LoopSimplifyID);
- AU.addRequiredID(LCSSAID);
- AU.addRequired<LoopInfo>();
- AU.addPreservedID(LCSSAID);
- AU.addPreserved<LoopInfo>();
- // FIXME: Loop unroll requires LCSSA. And LCSSA requires dom info.
- // If loop unroll does not preserve dom info then LCSSA pass on next
- // loop will receive invalid dom info.
- // For now, recreate dom info, if loop is unrolled.
- AU.addPreserved<DominatorTree>();
- AU.addPreserved<DominanceFrontier>();
- }
- };
-}
-
-char LoopUnroll::ID = 0;
-static RegisterPass<LoopUnroll> X("loop-unroll", "Unroll loops");
-
-Pass *llvm::createLoopUnrollPass() { return new LoopUnroll(); }
-
-/// ApproximateLoopSize - Approximate the size of the loop.
-static unsigned ApproximateLoopSize(const Loop *L) {
- unsigned Size = 0;
- for (Loop::block_iterator I = L->block_begin(), E = L->block_end();
- I != E; ++I) {
- BasicBlock *BB = *I;
- Instruction *Term = BB->getTerminator();
- for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
- if (isa<PHINode>(I) && BB == L->getHeader()) {
- // Ignore PHI nodes in the header.
- } else if (I->hasOneUse() && I->use_back() == Term) {
- // Ignore instructions only used by the loop terminator.
- } else if (isa<DbgInfoIntrinsic>(I)) {
- // Ignore debug instructions
- } else if (isa<GetElementPtrInst>(I) && I->hasOneUse()) {
- // Ignore GEP as they generally are subsumed into a load or store.
- } else if (isa<CallInst>(I)) {
- // Estimate size overhead introduced by call instructions which
- // is higher than other instructions. Here 3 and 10 are magic
- // numbers that help one isolated test case from PR2067 without
- // negatively impacting measured benchmarks.
- Size += isa<IntrinsicInst>(I) ? 3 : 10;
- } else {
- ++Size;
- }
-
- // TODO: Ignore expressions derived from PHI and constants if inval of phi
- // is a constant, or if operation is associative. This will get induction
- // variables.
- }
- }
-
- return Size;
-}
-
-bool LoopUnroll::runOnLoop(Loop *L, LPPassManager &LPM) {
- assert(L->isLCSSAForm());
- LoopInfo *LI = &getAnalysis<LoopInfo>();
-
- BasicBlock *Header = L->getHeader();
- DEBUG(errs() << "Loop Unroll: F[" << Header->getParent()->getName()
- << "] Loop %" << Header->getName() << "\n");
- (void)Header;
-
- // Find trip count
- unsigned TripCount = L->getSmallConstantTripCount();
- unsigned Count = UnrollCount;
-
- // Automatically select an unroll count.
- if (Count == 0) {
- // Conservative heuristic: if we know the trip count, see if we can
- // completely unroll (subject to the threshold, checked below); otherwise
- // try to find greatest modulo of the trip count which is still under
- // threshold value.
- if (TripCount == 0)
- return false;
- Count = TripCount;
- }
-
- // Enforce the threshold.
- if (UnrollThreshold != NoThreshold) {
- unsigned LoopSize = ApproximateLoopSize(L);
- DEBUG(errs() << " Loop Size = " << LoopSize << "\n");
- uint64_t Size = (uint64_t)LoopSize*Count;
- if (TripCount != 1 && Size > UnrollThreshold) {
- DEBUG(errs() << " Too large to fully unroll with count: " << Count
- << " because size: " << Size << ">" << UnrollThreshold << "\n");
- if (!UnrollAllowPartial) {
- DEBUG(errs() << " will not try to unroll partially because "
- << "-unroll-allow-partial not given\n");
- return false;
- }
- // Reduce unroll count to be modulo of TripCount for partial unrolling
- Count = UnrollThreshold / LoopSize;
- while (Count != 0 && TripCount%Count != 0) {
- Count--;
- }
- if (Count < 2) {
- DEBUG(errs() << " could not unroll partially\n");
- return false;
- }
- DEBUG(errs() << " partially unrolling with count: " << Count << "\n");
- }
- }
-
- // Unroll the loop.
- Function *F = L->getHeader()->getParent();
- if (!UnrollLoop(L, Count, LI, &LPM))
- return false;
-
- // FIXME: Reconstruct dom info, because it is not preserved properly.
- DominatorTree *DT = getAnalysisIfAvailable<DominatorTree>();
- if (DT) {
- DT->runOnFunction(*F);
- DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>();
- if (DF)
- DF->runOnFunction(*F);
- }
- return true;
-}
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index c7b00da..38d267a 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -32,7 +32,6 @@
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
#include "llvm/Instructions.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/InlineCost.h"
#include "llvm/Analysis/LoopInfo.h"
@@ -407,6 +406,10 @@ bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
initLoopData();
Function *F = loopHeader->getParent();
+ // If LoopSimplify was unable to form a preheader, don't do any unswitching.
+ if (!loopPreheader)
+ return false;
+
// If the condition is trivial, always unswitch. There is no code growth for
// this case.
if (!IsTrivialUnswitchCondition(LoopCond)) {
@@ -957,7 +960,7 @@ void LoopUnswitch::SimplifyCode(std::vector<Instruction*> &Worklist, Loop *L) {
Worklist.pop_back();
// Simple constant folding.
- if (Constant *C = ConstantFoldInstruction(I, I->getContext())) {
+ if (Constant *C = ConstantFoldInstruction(I)) {
ReplaceUsesOfWith(I, C, Worklist, L, LPM);
continue;
}
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index af29f97..8466918 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -27,7 +27,6 @@
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
#include "llvm/Assembly/Writer.h"
#include "llvm/Support/CFG.h"
@@ -198,8 +197,7 @@ static BinaryOperator *isReassociableOp(Value *V, unsigned Opcode) {
/// LowerNegateToMultiply - Replace 0-X with X*-1.
///
static Instruction *LowerNegateToMultiply(Instruction *Neg,
- std::map<AssertingVH<>, unsigned> &ValueRankMap,
- LLVMContext &Context) {
+ std::map<AssertingVH<>, unsigned> &ValueRankMap) {
Constant *Cst = Constant::getAllOnesValue(Neg->getType());
Instruction *Res = BinaryOperator::CreateMul(Neg->getOperand(1), Cst, "",Neg);
@@ -255,7 +253,6 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
std::vector<ValueEntry> &Ops) {
Value *LHS = I->getOperand(0), *RHS = I->getOperand(1);
unsigned Opcode = I->getOpcode();
- LLVMContext &Context = I->getContext();
// First step, linearize the expression if it is in ((A+B)+(C+D)) form.
BinaryOperator *LHSBO = isReassociableOp(LHS, Opcode);
@@ -265,13 +262,11 @@ void Reassociate::LinearizeExprTree(BinaryOperator *I,
// transform them into multiplies by -1 so they can be reassociated.
if (I->getOpcode() == Instruction::Mul) {
if (!LHSBO && LHS->hasOneUse() && BinaryOperator::isNeg(LHS)) {
- LHS = LowerNegateToMultiply(cast<Instruction>(LHS),
- ValueRankMap, Context);
+ LHS = LowerNegateToMultiply(cast<Instruction>(LHS), ValueRankMap);
LHSBO = isReassociableOp(LHS, Opcode);
}
if (!RHSBO && RHS->hasOneUse() && BinaryOperator::isNeg(RHS)) {
- RHS = LowerNegateToMultiply(cast<Instruction>(RHS),
- ValueRankMap, Context);
+ RHS = LowerNegateToMultiply(cast<Instruction>(RHS), ValueRankMap);
RHSBO = isReassociableOp(RHS, Opcode);
}
}
@@ -373,7 +368,7 @@ void Reassociate::RewriteExprTree(BinaryOperator *I,
// version of the value is returned, and BI is left pointing at the instruction
// that should be processed next by the reassociation pass.
//
-static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
+static Value *NegateValue(Value *V, Instruction *BI) {
// We are trying to expose opportunity for reassociation. One of the things
// that we want to do to achieve this is to push a negation as deep into an
// expression chain as possible, to expose the add instructions. In practice,
@@ -386,8 +381,8 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
if (Instruction *I = dyn_cast<Instruction>(V))
if (I->getOpcode() == Instruction::Add && I->hasOneUse()) {
// Push the negates through the add.
- I->setOperand(0, NegateValue(Context, I->getOperand(0), BI));
- I->setOperand(1, NegateValue(Context, I->getOperand(1), BI));
+ I->setOperand(0, NegateValue(I->getOperand(0), BI));
+ I->setOperand(1, NegateValue(I->getOperand(1), BI));
// We must move the add instruction here, because the neg instructions do
// not dominate the old add instruction in general. By moving it, we are
@@ -407,7 +402,7 @@ static Value *NegateValue(LLVMContext &Context, Value *V, Instruction *BI) {
/// ShouldBreakUpSubtract - Return true if we should break up this subtract of
/// X-Y into (X + -Y).
-static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) {
+static bool ShouldBreakUpSubtract(Instruction *Sub) {
// If this is a negation, we can't split it up!
if (BinaryOperator::isNeg(Sub))
return false;
@@ -431,7 +426,7 @@ static bool ShouldBreakUpSubtract(LLVMContext &Context, Instruction *Sub) {
/// BreakUpSubtract - If we have (X-Y), and if either X is an add, or if this is
/// only used by an add, transform this into (X+(0-Y)) to promote better
/// reassociation.
-static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
+static Instruction *BreakUpSubtract(Instruction *Sub,
std::map<AssertingVH<>, unsigned> &ValueRankMap) {
// Convert a subtract into an add and a neg instruction... so that sub
// instructions can be commuted with other add instructions...
@@ -439,7 +434,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
// Calculate the negative value of Operand 1 of the sub instruction...
// and set it as the RHS of the add instruction we just made...
//
- Value *NegVal = NegateValue(Context, Sub->getOperand(1), Sub);
+ Value *NegVal = NegateValue(Sub->getOperand(1), Sub);
Instruction *New =
BinaryOperator::CreateAdd(Sub->getOperand(0), NegVal, "", Sub);
New->takeName(Sub);
@@ -457,8 +452,7 @@ static Instruction *BreakUpSubtract(LLVMContext &Context, Instruction *Sub,
/// by one, change this into a multiply by a constant to assist with further
/// reassociation.
static Instruction *ConvertShiftToMul(Instruction *Shl,
- std::map<AssertingVH<>, unsigned> &ValueRankMap,
- LLVMContext &Context) {
+ std::map<AssertingVH<>, unsigned> &ValueRankMap) {
// If an operand of this shift is a reassociable multiply, or if the shift
// is used by a reassociable multiply or add, turn into a multiply.
if (isReassociableOp(Shl->getOperand(0), Instruction::Mul) ||
@@ -781,13 +775,11 @@ Value *Reassociate::OptimizeExpression(BinaryOperator *I,
/// ReassociateBB - Inspect all of the instructions in this basic block,
/// reassociating them as we go.
void Reassociate::ReassociateBB(BasicBlock *BB) {
- LLVMContext &Context = BB->getContext();
-
for (BasicBlock::iterator BBI = BB->begin(); BBI != BB->end(); ) {
Instruction *BI = BBI++;
if (BI->getOpcode() == Instruction::Shl &&
isa<ConstantInt>(BI->getOperand(1)))
- if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap, Context)) {
+ if (Instruction *NI = ConvertShiftToMul(BI, ValueRankMap)) {
MadeChange = true;
BI = NI;
}
@@ -800,8 +792,8 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
// If this is a subtract instruction which is not already in negate form,
// see if we can convert it to X+-Y.
if (BI->getOpcode() == Instruction::Sub) {
- if (ShouldBreakUpSubtract(Context, BI)) {
- BI = BreakUpSubtract(Context, BI, ValueRankMap);
+ if (ShouldBreakUpSubtract(BI)) {
+ BI = BreakUpSubtract(BI, ValueRankMap);
MadeChange = true;
} else if (BinaryOperator::isNeg(BI)) {
// Otherwise, this is a negation. See if the operand is a multiply tree
@@ -809,7 +801,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
if (isReassociableOp(BI->getOperand(1), Instruction::Mul) &&
(!BI->hasOneUse() ||
!isReassociableOp(BI->use_back(), Instruction::Mul))) {
- BI = LowerNegateToMultiply(BI, ValueRankMap, Context);
+ BI = LowerNegateToMultiply(BI, ValueRankMap);
MadeChange = true;
}
}
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 509a6db..c202a2c 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -795,9 +795,14 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
return markOverdefined(&EVI);
Value *AggVal = EVI.getAggregateOperand();
- unsigned i = *EVI.idx_begin();
- LatticeVal EltVal = getStructValueState(AggVal, i);
- mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ if (isa<StructType>(AggVal->getType())) {
+ unsigned i = *EVI.idx_begin();
+ LatticeVal EltVal = getStructValueState(AggVal, i);
+ mergeInValue(getValueState(&EVI), &EVI, EltVal);
+ } else {
+ // Otherwise, must be extracting from an array.
+ return markOverdefined(&EVI);
+ }
}
void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
index c047fca..001267a 100644
--- a/lib/Transforms/Scalar/SCCVN.cpp
+++ b/lib/Transforms/Scalar/SCCVN.cpp
@@ -507,7 +507,7 @@ void ValueTable::erase(Value *V) {
/// verifyRemoved - Verify that the value is removed from all internal data
/// structures.
void ValueTable::verifyRemoved(const Value *V) const {
- for (DenseMap<Value*, uint32_t>::iterator
+ for (DenseMap<Value*, uint32_t>::const_iterator
I = valueNumbering.begin(), E = valueNumbering.end(); I != E; ++I) {
assert(I->first != V && "Inst still occurs in value numbering map!");
}
@@ -629,9 +629,6 @@ bool SCCVN::runOnFunction(Function& F) {
}
}
- // FIXME: This code is commented out for now, because it can lead to the
- // insertion of a lot of redundant PHIs being inserted by SSAUpdater.
-#if 0
// Perform a forward data-flow to compute availability at all points on
// the CFG.
do {
@@ -709,7 +706,6 @@ bool SCCVN::runOnFunction(Function& F) {
CurInst->eraseFromParent();
}
}
-#endif
VT.clear();
for (DenseMap<BasicBlock*, ValueNumberScope*>::iterator
diff --git a/lib/Transforms/Scalar/Scalar.cpp b/lib/Transforms/Scalar/Scalar.cpp
index 5669da0..b54565c 100644
--- a/lib/Transforms/Scalar/Scalar.cpp
+++ b/lib/Transforms/Scalar/Scalar.cpp
@@ -26,10 +26,6 @@ void LLVMAddCFGSimplificationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createCFGSimplificationPass());
}
-void LLVMAddCondPropagationPass(LLVMPassManagerRef PM) {
- unwrap(PM)->add(createCondPropagationPass());
-}
-
void LLVMAddDeadStoreEliminationPass(LLVMPassManagerRef PM) {
unwrap(PM)->add(createDeadStoreEliminationPass());
}
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 575c93b..611505e 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -100,7 +100,7 @@ public:
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
- void EmitPutChar(Value *Char, IRBuilder<> &B);
+ Value *EmitPutChar(Value *Char, IRBuilder<> &B);
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
/// some pointer.
@@ -252,18 +252,20 @@ Value *LibCallOptimization::EmitUnaryFloatFnCall(Value *Op, const char *Name,
/// EmitPutChar - Emit a call to the putchar function. This assumes that Char
/// is an integer.
-void LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
+Value *LibCallOptimization::EmitPutChar(Value *Char, IRBuilder<> &B) {
Module *M = Caller->getParent();
Value *PutChar = M->getOrInsertFunction("putchar", Type::getInt32Ty(*Context),
Type::getInt32Ty(*Context), NULL);
CallInst *CI = B.CreateCall(PutChar,
B.CreateIntCast(Char,
Type::getInt32Ty(*Context),
+ /*isSigned*/true,
"chari"),
"putchar");
if (const Function *F = dyn_cast<Function>(PutChar->stripPointerCasts()))
CI->setCallingConv(F->getCallingConv());
+ return CI;
}
/// EmitPutS - Emit a call to the puts function. This assumes that Str is
@@ -302,7 +304,8 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
Type::getInt32Ty(*Context),
Type::getInt32Ty(*Context),
File->getType(), NULL);
- Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), "chari");
+ Char = B.CreateIntCast(Char, Type::getInt32Ty(*Context), /*isSigned*/true,
+ "chari");
CallInst *CI = B.CreateCall2(F, Char, File, "fputc");
if (const Function *Fn = dyn_cast<Function>(F->stripPointerCasts()))
@@ -955,6 +958,17 @@ struct MemCmpOpt : public LibCallOptimization {
return B.CreateZExt(B.CreateXor(LHSV, RHSV, "shortdiff"), CI->getType());
}
+ // Constant folding: memcmp(x, y, l) -> cnst (all arguments are constant)
+ std::string LHSStr, RHSStr;
+ if (GetConstantStringInfo(LHS, LHSStr) &&
+ GetConstantStringInfo(RHS, RHSStr)) {
+ // Make sure we're not reading out-of-bounds memory.
+ if (Len > LHSStr.length() || Len > RHSStr.length())
+ return 0;
+ uint64_t Ret = memcmp(LHSStr.data(), RHSStr.data(), Len);
+ return ConstantInt::get(CI->getType(), Ret);
+ }
+
return 0;
}
};
@@ -1314,11 +1328,13 @@ struct PrintFOpt : public LibCallOptimization {
return CI->use_empty() ? (Value*)CI :
ConstantInt::get(CI->getType(), 0);
- // printf("x") -> putchar('x'), even for '%'.
+ // printf("x") -> putchar('x'), even for '%'. Return the result of putchar
+ // in case there is an error writing to stdout.
if (FormatStr.size() == 1) {
- EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context), FormatStr[0]), B);
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), 1);
+ Value *Res = EmitPutChar(ConstantInt::get(Type::getInt32Ty(*Context),
+ FormatStr[0]), B);
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("foo\n") --> puts("foo")
@@ -1339,9 +1355,10 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%c", chr) --> putchar(*(i8*)dst)
if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
isa<IntegerType>(CI->getOperand(2)->getType())) {
- EmitPutChar(CI->getOperand(2), B);
- return CI->use_empty() ? (Value*)CI :
- ConstantInt::get(CI->getType(), 1);
+ Value *Res = EmitPutChar(CI->getOperand(2), B);
+
+ if (CI->use_empty()) return CI;
+ return B.CreateIntCast(Res, CI->getType(), true);
}
// printf("%s\n", str) --> puts(str)
@@ -2479,10 +2496,6 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// lround, lroundf, lroundl:
// * lround(cnst) -> cnst'
//
-// memcmp:
-// * memcmp(x,y,l) -> cnst
-// (if all arguments are constant and strlen(x) <= l and strlen(y) <= l)
-//
// pow, powf, powl:
// * pow(exp(x),y) -> exp(x*y)
// * pow(sqrt(x),y) -> pow(x,y*0.5)
diff --git a/lib/Transforms/Scalar/TailDuplication.cpp b/lib/Transforms/Scalar/TailDuplication.cpp
index 4864e23..b06ae3d 100644
--- a/lib/Transforms/Scalar/TailDuplication.cpp
+++ b/lib/Transforms/Scalar/TailDuplication.cpp
@@ -359,8 +359,7 @@ void TailDup::eliminateUnconditionalBranch(BranchInst *Branch) {
Instruction *Inst = BI++;
if (isInstructionTriviallyDead(Inst))
Inst->eraseFromParent();
- else if (Constant *C = ConstantFoldInstruction(Inst,
- Inst->getContext())) {
+ else if (Constant *C = ConstantFoldInstruction(Inst)) {
Inst->replaceAllUsesWith(C);
Inst->eraseFromParent();
}
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index b56e170..4119cb9 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -25,7 +25,7 @@
// unlikely, that the return returns something else (like constant 0), and
// can still be TRE'd. It can be TRE'd if ALL OTHER return instructions in
// the function return the exact same value.
-// 4. If it can prove that callees do not access theier caller stack frame,
+// 4. If it can prove that callees do not access their caller stack frame,
// they are marked as eligible for tail call elimination (by the code
// generator).
//
@@ -58,6 +58,7 @@
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/Pass.h"
+#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Support/CFG.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
@@ -75,7 +76,7 @@ namespace {
private:
bool ProcessReturningBlock(ReturnInst *RI, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
- std::vector<PHINode*> &ArgumentPHIs,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail);
bool CanMoveAboveCall(Instruction *I, CallInst *CI);
Value *CanTransformAccumulatorRecursion(Instruction *I, CallInst *CI);
@@ -90,7 +91,6 @@ FunctionPass *llvm::createTailCallEliminationPass() {
return new TailCallElim();
}
-
/// AllocaMightEscapeToCalls - Return true if this alloca may be accessed by
/// callees of this function. We only do very simple analysis right now, this
/// could be expanded in the future to use mod/ref information for particular
@@ -100,7 +100,7 @@ static bool AllocaMightEscapeToCalls(AllocaInst *AI) {
return true;
}
-/// FunctionContainsAllocas - Scan the specified basic block for alloca
+/// CheckForEscapingAllocas - Scan the specified basic block for alloca
/// instructions. If it contains any that might be accessed by calls, return
/// true.
static bool CheckForEscapingAllocas(BasicBlock *BB,
@@ -127,7 +127,7 @@ bool TailCallElim::runOnFunction(Function &F) {
BasicBlock *OldEntry = 0;
bool TailCallsAreMarkedTail = false;
- std::vector<PHINode*> ArgumentPHIs;
+ SmallVector<PHINode*, 8> ArgumentPHIs;
bool MadeChange = false;
bool FunctionContainsEscapingAllocas = false;
@@ -154,7 +154,6 @@ bool TailCallElim::runOnFunction(Function &F) {
/// happen. This bug is PR962.
if (FunctionContainsEscapingAllocas)
return false;
-
// Second pass, change any tail calls to loops.
for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB)
@@ -204,7 +203,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
if (I->mayHaveSideEffects()) // This also handles volatile loads.
return false;
- if (LoadInst* L = dyn_cast<LoadInst>(I)) {
+ if (LoadInst *L = dyn_cast<LoadInst>(I)) {
// Loads may always be moved above calls without side effects.
if (CI->mayHaveSideEffects()) {
// Non-volatile loads may be moved above a call with side effects if it
@@ -235,7 +234,7 @@ bool TailCallElim::CanMoveAboveCall(Instruction *I, CallInst *CI) {
// We currently handle static constants and arguments that are not modified as
// part of the recursion.
//
-static bool isDynamicConstant(Value *V, CallInst *CI) {
+static bool isDynamicConstant(Value *V, CallInst *CI, ReturnInst *RI) {
if (isa<Constant>(V)) return true; // Static constants are always dyn consts
// Check to see if this is an immutable argument, if so, the value
@@ -253,6 +252,15 @@ static bool isDynamicConstant(Value *V, CallInst *CI) {
if (CI->getOperand(ArgNo+1) == Arg)
return true;
}
+
+ // Switch cases are always constant integers. If the value is being switched
+ // on and the return is only reachable from one of its cases, it's
+ // effectively constant.
+ if (BasicBlock *UniquePred = RI->getParent()->getUniquePredecessor())
+ if (SwitchInst *SI = dyn_cast<SwitchInst>(UniquePred->getTerminator()))
+ if (SI->getCondition() == V)
+ return SI->getDefaultDest() != RI->getParent();
+
// Not a constant or immutable argument, we can't safely transform.
return false;
}
@@ -265,10 +273,6 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
Function *F = TheRI->getParent()->getParent();
Value *ReturnedValue = 0;
- // TODO: Handle multiple value ret instructions;
- if (isa<StructType>(F->getReturnType()))
- return 0;
-
for (Function::iterator BBI = F->begin(), E = F->end(); BBI != E; ++BBI)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BBI->getTerminator()))
if (RI != TheRI) {
@@ -278,7 +282,7 @@ static Value *getCommonReturnValue(ReturnInst *TheRI, CallInst *CI) {
// evaluatable at the start of the initial invocation of the function,
// instead of at the end of the evaluation.
//
- if (!isDynamicConstant(RetOp, CI))
+ if (!isDynamicConstant(RetOp, CI, RI))
return 0;
if (ReturnedValue && RetOp != ReturnedValue)
@@ -315,7 +319,7 @@ Value *TailCallElim::CanTransformAccumulatorRecursion(Instruction *I,
bool TailCallElim::ProcessReturningBlock(ReturnInst *Ret, BasicBlock *&OldEntry,
bool &TailCallsAreMarkedTail,
- std::vector<PHINode*> &ArgumentPHIs,
+ SmallVector<PHINode*, 8> &ArgumentPHIs,
bool CannotTailCallElimCallsMarkedTail) {
BasicBlock *BB = Ret->getParent();
Function *F = BB->getParent();
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index c728c0b..2974592 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -275,8 +275,6 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
/// SplitEdge - Split the edge connecting specified block. Pass P must
/// not be NULL.
BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
- assert(!isa<IndirectBrInst>(BB->getTerminator()) &&
- "Cannot split an edge from an IndirectBrInst");
TerminatorInst *LatchTerm = BB->getTerminator();
unsigned SuccNum = 0;
#ifndef NDEBUG
@@ -386,6 +384,12 @@ BasicBlock *llvm::SplitBlockPredecessors(BasicBlock *BB,
bool IsLoopEntry = !!L;
bool SplitMakesNewLoopHeader = false;
for (unsigned i = 0; i != NumPreds; ++i) {
+ // This is slightly more strict than necessary; the minimum requirement
+ // is that there be no more than one indirectbr branching to BB. And
+ // all BlockAddress uses would need to be updated.
+ assert(!isa<IndirectBrInst>(Preds[i]->getTerminator()) &&
+ "Cannot split an edge from an IndirectBrInst");
+
Preds[i]->getTerminator()->replaceUsesOfWith(BB, NewBB);
if (LI) {
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index fd8862c..162d7b3 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -20,6 +20,7 @@
#include "llvm/IntrinsicInst.h"
#include "llvm/GlobalVariable.h"
#include "llvm/Function.h"
+#include "llvm/LLVMContext.h"
#include "llvm/Support/CFG.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include "llvm/Analysis/ConstantFolding.h"
@@ -322,8 +323,6 @@ void PruningFunctionCloner::CloneBlock(const BasicBlock *BB,
/// mapping its operands through ValueMap if they are available.
Constant *PruningFunctionCloner::
ConstantFoldMappedInstruction(const Instruction *I) {
- LLVMContext &Context = I->getContext();
-
SmallVector<Constant*, 8> Ops;
for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
if (Constant *Op = dyn_cast_or_null<Constant>(MapValue(I->getOperand(i),
@@ -333,9 +332,8 @@ ConstantFoldMappedInstruction(const Instruction *I) {
return 0; // All operands not constant!
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
- return ConstantFoldCompareInstOperands(CI->getPredicate(),
- &Ops[0], Ops.size(),
- Context, TD);
+ return ConstantFoldCompareInstOperands(CI->getPredicate(), Ops[0], Ops[1],
+ TD);
if (const LoadInst *LI = dyn_cast<LoadInst>(I))
if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
@@ -346,7 +344,28 @@ ConstantFoldMappedInstruction(const Instruction *I) {
CE);
return ConstantFoldInstOperands(I->getOpcode(), I->getType(), &Ops[0],
- Ops.size(), Context, TD);
+ Ops.size(), TD);
+}
+
+static MDNode *UpdateInlinedAtInfo(MDNode *InsnMD, MDNode *TheCallMD,
+ LLVMContext &Context) {
+ DILocation ILoc(InsnMD);
+ if (ILoc.isNull()) return InsnMD;
+
+ DILocation CallLoc(TheCallMD);
+ if (CallLoc.isNull()) return InsnMD;
+
+ DILocation OrigLocation = ILoc.getOrigLocation();
+ MDNode *NewLoc = TheCallMD;
+ if (!OrigLocation.isNull())
+ NewLoc = UpdateInlinedAtInfo(OrigLocation.getNode(), TheCallMD, Context);
+
+ SmallVector<Value *, 4> MDVs;
+ MDVs.push_back(InsnMD->getElement(0)); // Line
+ MDVs.push_back(InsnMD->getElement(1)); // Col
+ MDVs.push_back(InsnMD->getElement(2)); // Scope
+ MDVs.push_back(NewLoc);
+ return MDNode::get(Context, MDVs.data(), MDVs.size());
}
/// CloneAndPruneFunctionInto - This works exactly like CloneFunctionInto,
@@ -361,7 +380,8 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
SmallVectorImpl<ReturnInst*> &Returns,
const char *NameSuffix,
ClonedCodeInfo *CodeInfo,
- const TargetData *TD) {
+ const TargetData *TD,
+ Instruction *TheCall) {
assert(NameSuffix && "NameSuffix cannot be null!");
#ifndef NDEBUG
@@ -400,19 +420,52 @@ void llvm::CloneAndPruneFunctionInto(Function *NewFunc, const Function *OldFunc,
// references as we go. This uses ValueMap to do all the hard work.
//
BasicBlock::iterator I = NewBB->begin();
+
+ LLVMContext &Context = OldFunc->getContext();
+ unsigned DbgKind = Context.getMetadata().getMDKind("dbg");
+ MDNode *TheCallMD = NULL;
+ SmallVector<Value *, 4> MDVs;
+ if (TheCall && TheCall->hasMetadata())
+ TheCallMD = Context.getMetadata().getMD(DbgKind, TheCall);
// Handle PHI nodes specially, as we have to remove references to dead
// blocks.
if (PHINode *PN = dyn_cast<PHINode>(I)) {
// Skip over all PHI nodes, remembering them for later.
BasicBlock::const_iterator OldI = BI->begin();
- for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI)
+ for (; (PN = dyn_cast<PHINode>(I)); ++I, ++OldI) {
+ if (I->hasMetadata()) {
+ if (TheCallMD) {
+ if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) {
+ MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context);
+ Context.getMetadata().addMD(DbgKind, NewMD, I);
+ }
+ } else {
+ // The cloned instruction has dbg info but the call instruction
+ // does not have dbg info. Remove dbg info from cloned instruction.
+ Context.getMetadata().removeMD(DbgKind, I);
+ }
+ }
PHIToResolve.push_back(cast<PHINode>(OldI));
+ }
}
// Otherwise, remap the rest of the instructions normally.
- for (; I != NewBB->end(); ++I)
+ for (; I != NewBB->end(); ++I) {
+ if (I->hasMetadata()) {
+ if (TheCallMD) {
+ if (MDNode *IMD = Context.getMetadata().getMD(DbgKind, I)) {
+ MDNode *NewMD = UpdateInlinedAtInfo(IMD, TheCallMD, Context);
+ Context.getMetadata().addMD(DbgKind, NewMD, I);
+ }
+ } else {
+ // The cloned instruction has dbg info but the call instruction
+ // does not have dbg info. Remove dbg info from cloned instruction.
+ Context.getMetadata().removeMD(DbgKind, I);
+ }
+ }
RemapInstruction(I, ValueMap);
+ }
}
// Defer PHI resolution until rest of function is resolved, PHI resolution
diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp
index 20f5a4a..043046c 100644
--- a/lib/Transforms/Utils/InlineFunction.cpp
+++ b/lib/Transforms/Utils/InlineFunction.cpp
@@ -386,7 +386,7 @@ bool llvm::InlineFunction(CallSite CS, CallGraph *CG, const TargetData *TD,
// (which can happen, e.g., because an argument was constant), but we'll be
// happy with whatever the cloner can do.
CloneAndPruneFunctionInto(Caller, CalledFunc, ValueMap, Returns, ".i",
- &InlinedFunctionInfo, TD);
+ &InlinedFunctionInfo, TD, TheCall);
// Remember the first block that is newly cloned over.
FirstNewBlock = LastBlock; ++FirstNewBlock;
diff --git a/lib/Transforms/Utils/LCSSA.cpp b/lib/Transforms/Utils/LCSSA.cpp
index 56e662e..590d667 100644
--- a/lib/Transforms/Utils/LCSSA.cpp
+++ b/lib/Transforms/Utils/LCSSA.cpp
@@ -50,7 +50,6 @@ namespace {
LCSSA() : LoopPass(&ID) {}
// Cached analysis information for the current function.
- LoopInfo *LI;
DominatorTree *DT;
std::vector<BasicBlock*> LoopBlocks;
PredIteratorCache PredCache;
@@ -64,6 +63,9 @@ namespace {
///
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesCFG();
+
+ // LCSSA doesn't actually require LoopSimplify, but the PassManager
+ // doesn't know how to schedule LoopSimplify by itself.
AU.addRequiredID(LoopSimplifyID);
AU.addPreservedID(LoopSimplifyID);
AU.addRequiredTransitive<LoopInfo>();
@@ -121,7 +123,6 @@ static bool BlockDominatesAnExit(BasicBlock *BB,
bool LCSSA::runOnLoop(Loop *TheLoop, LPPassManager &LPM) {
L = TheLoop;
- LI = &LPM.getAnalysis<LoopInfo>();
DT = &getAnalysis<DominatorTree>();
// Get the set of exiting blocks.
@@ -216,7 +217,7 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
SSAUpdate.Initialize(Inst);
// Insert the LCSSA phi's into all of the exit blocks dominated by the
- // value., and add them to the Phi's map.
+ // value, and add them to the Phi's map.
for (SmallVectorImpl<BasicBlock*>::const_iterator BBI = ExitBlocks.begin(),
BBE = ExitBlocks.end(); BBI != BBE; ++BBI) {
BasicBlock *ExitBB = *BBI;
@@ -230,8 +231,17 @@ bool LCSSA::ProcessInstruction(Instruction *Inst,
PN->reserveOperandSpace(PredCache.GetNumPreds(ExitBB));
// Add inputs from inside the loop for this PHI.
- for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI)
+ for (BasicBlock **PI = PredCache.GetPreds(ExitBB); *PI; ++PI) {
PN->addIncoming(Inst, *PI);
+
+ // If the exit block has a predecessor not within the loop, arrange for
+ // the incoming value use corresponding to that predecessor to be
+ // rewritten in terms of a different LCSSA PHI.
+ if (!inLoop(*PI))
+ UsesToRewrite.push_back(
+ &PN->getOperandUse(
+ PN->getOperandNumForIncomingValue(PN->getNumIncomingValues()-1)));
+ }
// Remember that this phi makes the value alive in this block.
SSAUpdate.AddAvailableValue(ExitBB, PN);
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 543ddf1..aef0f5f 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -24,10 +24,14 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Analysis/ConstantFolding.h"
#include "llvm/Analysis/DebugInfo.h"
+#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -236,7 +240,7 @@ bool llvm::ConstantFoldTerminator(BasicBlock *BB) {
//===----------------------------------------------------------------------===//
-// Local dead code elimination...
+// Local dead code elimination.
//
/// isInstructionTriviallyDead - Return true if the result produced by the
@@ -248,6 +252,9 @@ bool llvm::isInstructionTriviallyDead(Instruction *I) {
// We don't want debug info removed by anything this general.
if (isa<DbgInfoIntrinsic>(I)) return false;
+ // Likewise for memory use markers.
+ if (isa<MemoryUseIntrinsic>(I)) return false;
+
if (!I->mayHaveSideEffects()) return true;
// Special case intrinsics that "may have side effects" but can be deleted
@@ -323,9 +330,53 @@ llvm::RecursivelyDeleteDeadPHINode(PHINode *PN) {
}
//===----------------------------------------------------------------------===//
-// Control Flow Graph Restructuring...
+// Control Flow Graph Restructuring.
//
+
+/// RemovePredecessorAndSimplify - Like BasicBlock::removePredecessor, this
+/// method is called when we're about to delete Pred as a predecessor of BB. If
+/// BB contains any PHI nodes, this drops the entries in the PHI nodes for Pred.
+///
+/// Unlike the removePredecessor method, this attempts to simplify uses of PHI
+/// nodes that collapse into identity values. For example, if we have:
+/// x = phi(1, 0, 0, 0)
+/// y = and x, z
+///
+/// .. and delete the predecessor corresponding to the '1', this will attempt to
+/// recursively fold the and to 0.
+void llvm::RemovePredecessorAndSimplify(BasicBlock *BB, BasicBlock *Pred,
+ TargetData *TD) {
+ // This only adjusts blocks with PHI nodes.
+ if (!isa<PHINode>(BB->begin()))
+ return;
+
+ // Remove the entries for Pred from the PHI nodes in BB, but do not simplify
+ // them down. This will leave us with single entry phi nodes and other phis
+ // that can be removed.
+ BB->removePredecessor(Pred, true);
+
+ WeakVH PhiIt = &BB->front();
+ while (PHINode *PN = dyn_cast<PHINode>(PhiIt)) {
+ PhiIt = &*++BasicBlock::iterator(cast<Instruction>(PhiIt));
+
+ Value *PNV = PN->hasConstantValue();
+ if (PNV == 0) continue;
+
+ // If we're able to simplify the phi to a single value, substitute the new
+ // value into all of its uses.
+ assert(PNV != PN && "hasConstantValue broken");
+
+ ReplaceAndSimplifyAllUses(PN, PNV, TD);
+
+ // If recursive simplification ended up deleting the next PHI node we would
+ // iterate to, then our iterator is invalid, restart scanning from the top
+ // of the block.
+ if (PhiIt == 0) PhiIt = &BB->front();
+ }
+}
+
+
/// MergeBasicBlockIntoOnlyPred - DestBB is a block with one predecessor and its
/// predecessor is known to have one successor (DestBB!). Eliminate the edge
/// between them, moving the instructions in the predecessor into DestBB and
@@ -362,6 +413,174 @@ void llvm::MergeBasicBlockIntoOnlyPred(BasicBlock *DestBB, Pass *P) {
PredBB->eraseFromParent();
}
+/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
+/// almost-empty BB ending in an unconditional branch to Succ, into succ.
+///
+/// Assumption: Succ is the single successor for BB.
+///
+static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
+ assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
+
+ DEBUG(errs() << "Looking to fold " << BB->getName() << " into "
+ << Succ->getName() << "\n");
+ // Shortcut, if there is only a single predecessor it must be BB and merging
+ // is always safe
+ if (Succ->getSinglePredecessor()) return true;
+
+ // Make a list of the predecessors of BB
+ typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
+ BlockSet BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Use that list to make another list of common predecessors of BB and Succ
+ BlockSet CommonPreds;
+ for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
+ PI != PE; ++PI)
+ if (BBPreds.count(*PI))
+ CommonPreds.insert(*PI);
+
+ // Shortcut, if there are no common predecessors, merging is always safe
+ if (CommonPreds.empty())
+ return true;
+
+ // Look at all the phi nodes in Succ, to see if they present a conflict when
+ // merging these blocks
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+
+ // If the incoming value from BB is again a PHINode in
+ // BB which has the same incoming value for *PI as PN does, we can
+ // merge the phi nodes and then the blocks can still be merged
+ PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
+ if (BBPN && BBPN->getParent() == BB) {
+ for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+ PI != PE; PI++) {
+ if (BBPN->getIncomingValueForBlock(*PI)
+ != PN->getIncomingValueForBlock(*PI)) {
+ DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with "
+ << BBPN->getName() << " with regard to common predecessor "
+ << (*PI)->getName() << "\n");
+ return false;
+ }
+ }
+ } else {
+ Value* Val = PN->getIncomingValueForBlock(BB);
+ for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
+ PI != PE; PI++) {
+ // See if the incoming value for the common predecessor is equal to the
+ // one for BB, in which case this phi node will not prevent the merging
+ // of the block.
+ if (Val != PN->getIncomingValueForBlock(*PI)) {
+ DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in "
+ << Succ->getName() << " is conflicting with regard to common "
+ << "predecessor " << (*PI)->getName() << "\n");
+ return false;
+ }
+ }
+ }
+ }
+
+ return true;
+}
+
+/// TryToSimplifyUncondBranchFromEmptyBlock - BB is known to contain an
+/// unconditional branch, and contains no instructions other than PHI nodes,
+/// potential debug intrinsics and the branch. If possible, eliminate BB by
+/// rewriting all the predecessors to branch to the successor block and return
+/// true. If we can't transform, return false.
+bool llvm::TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB) {
+ // We can't eliminate infinite loops.
+ BasicBlock *Succ = cast<BranchInst>(BB->getTerminator())->getSuccessor(0);
+ if (BB == Succ) return false;
+
+ // Check to see if merging these blocks would cause conflicts for any of the
+ // phi nodes in BB or Succ. If not, we can safely merge.
+ if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
+
+ // Check for cases where Succ has multiple predecessors and a PHI node in BB
+ // has uses which will not disappear when the PHI nodes are merged. It is
+ // possible to handle such cases, but difficult: it requires checking whether
+ // BB dominates Succ, which is non-trivial to calculate in the case where
+ // Succ has multiple predecessors. Also, it requires checking whether
+ // constructing the necessary self-referential PHI node doesn't intoduce any
+ // conflicts; this isn't too difficult, but the previous code for doing this
+ // was incorrect.
+ //
+ // Note that if this check finds a live use, BB dominates Succ, so BB is
+ // something like a loop pre-header (or rarely, a part of an irreducible CFG);
+ // folding the branch isn't profitable in that case anyway.
+ if (!Succ->getSinglePredecessor()) {
+ BasicBlock::iterator BBI = BB->begin();
+ while (isa<PHINode>(*BBI)) {
+ for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
+ UI != E; ++UI) {
+ if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
+ if (PN->getIncomingBlock(UI) != BB)
+ return false;
+ } else {
+ return false;
+ }
+ }
+ ++BBI;
+ }
+ }
+
+ DEBUG(errs() << "Killing Trivial BB: \n" << *BB);
+
+ if (isa<PHINode>(Succ->begin())) {
+ // If there is more than one pred of succ, and there are PHI nodes in
+ // the successor, then we need to add incoming edges for the PHI nodes
+ //
+ const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
+
+ // Loop over all of the PHI nodes in the successor of BB.
+ for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
+ PHINode *PN = cast<PHINode>(I);
+ Value *OldVal = PN->removeIncomingValue(BB, false);
+ assert(OldVal && "No entry in PHI for Pred BB!");
+
+ // If this incoming value is one of the PHI nodes in BB, the new entries
+ // in the PHI node are the entries from the old PHI.
+ if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
+ PHINode *OldValPN = cast<PHINode>(OldVal);
+ for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
+ // Note that, since we are merging phi nodes and BB and Succ might
+ // have common predecessors, we could end up with a phi node with
+ // identical incoming branches. This will be cleaned up later (and
+ // will trigger asserts if we try to clean it up now, without also
+ // simplifying the corresponding conditional branch).
+ PN->addIncoming(OldValPN->getIncomingValue(i),
+ OldValPN->getIncomingBlock(i));
+ } else {
+ // Add an incoming value for each of the new incoming values.
+ for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
+ PN->addIncoming(OldVal, BBPreds[i]);
+ }
+ }
+ }
+
+ while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
+ if (Succ->getSinglePredecessor()) {
+ // BB is the only predecessor of Succ, so Succ will end up with exactly
+ // the same predecessors BB had.
+ Succ->getInstList().splice(Succ->begin(),
+ BB->getInstList(), BB->begin());
+ } else {
+ // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
+ assert(PN->use_empty() && "There shouldn't be any uses here!");
+ PN->eraseFromParent();
+ }
+ }
+
+ // Everything that jumped to BB now goes to Succ.
+ BB->replaceAllUsesWith(Succ);
+ if (!Succ->hasName()) Succ->takeName(BB);
+ BB->eraseFromParent(); // Delete the old basic block.
+ return true;
+}
+
+
+
/// OnlyUsedByDbgIntrinsics - Return true if the instruction I is only used
/// by DbgIntrinsics. If DbgInUses is specified then the vector is filled
/// with the DbgInfoIntrinsic that use the instruction I.
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index cd8d952..2ab0972 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -23,6 +23,11 @@
//
// This pass also guarantees that loops will have exactly one backedge.
//
+// Indirectbr instructions introduce several complications. If the loop
+// contains or is entered by an indirectbr instruction, it may not be possible
+// to transform the loop and make these guarantees. Client code should check
+// that these conditions are true before relying on them.
+//
// Note that the simplifycfg pass will clean up blocks which are split out but
// end up being unnecessary, so usage of this pass should not pessimize
// generated code.
@@ -81,17 +86,15 @@ namespace {
AU.addPreservedID(BreakCriticalEdgesID); // No critical edges added.
}
- /// verifyAnalysis() - Verify loop nest.
- void verifyAnalysis() const {
- assert(L->isLoopSimplifyForm() && "LoopSimplify form not preserved!");
- }
+ /// verifyAnalysis() - Verify LoopSimplifyForm's guarantees.
+ void verifyAnalysis() const;
private:
bool ProcessLoop(Loop *L, LPPassManager &LPM);
BasicBlock *RewriteLoopExitBlock(Loop *L, BasicBlock *Exit);
BasicBlock *InsertPreheaderForLoop(Loop *L);
Loop *SeparateNestedLoop(Loop *L, LPPassManager &LPM);
- void InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
+ BasicBlock *InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader);
void PlaceSplitBlockCarefully(BasicBlock *NewBB,
SmallVectorImpl<BasicBlock*> &SplitPreds,
Loop *L);
@@ -160,8 +163,10 @@ ReprocessLoop:
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
Preheader = InsertPreheaderForLoop(L);
- NumInserted++;
- Changed = true;
+ if (Preheader) {
+ NumInserted++;
+ Changed = true;
+ }
}
// Next, check to make sure that all exit nodes of the loop only have
@@ -180,21 +185,22 @@ ReprocessLoop:
// Must be exactly this loop: no subloops, parent loops, or non-loop preds
// allowed.
if (!L->contains(*PI)) {
- RewriteLoopExitBlock(L, ExitBlock);
- NumInserted++;
- Changed = true;
+ if (RewriteLoopExitBlock(L, ExitBlock)) {
+ NumInserted++;
+ Changed = true;
+ }
break;
}
}
// If the header has more than two predecessors at this point (from the
// preheader and from multiple backedges), we must adjust the loop.
- unsigned NumBackedges = L->getNumBackEdges();
- if (NumBackedges != 1) {
+ BasicBlock *LoopLatch = L->getLoopLatch();
+ if (!LoopLatch) {
// If this is really a nested loop, rip it out into a child loop. Don't do
// this for loops with a giant number of backedges, just factor them into a
// common backedge instead.
- if (NumBackedges < 8) {
+ if (L->getNumBackEdges() < 8) {
if (SeparateNestedLoop(L, LPM)) {
++NumNested;
// This is a big restructuring change, reprocess the whole loop.
@@ -207,9 +213,11 @@ ReprocessLoop:
// If we either couldn't, or didn't want to, identify nesting of the loops,
// insert a new block that all backedges target, then make it jump to the
// loop header.
- InsertUniqueBackedgeBlock(L, Preheader);
- NumInserted++;
- Changed = true;
+ LoopLatch = InsertUniqueBackedgeBlock(L, Preheader);
+ if (LoopLatch) {
+ NumInserted++;
+ Changed = true;
+ }
}
// Scan over the PHI nodes in the loop header. Since they now have only two
@@ -233,7 +241,14 @@ ReprocessLoop:
// loop-invariant instructions out of the way to open up more
// opportunities, and the disadvantage of having the responsibility
// to preserve dominator information.
- if (ExitBlocks.size() > 1 && L->getUniqueExitBlock()) {
+ bool UniqueExit = true;
+ if (!ExitBlocks.empty())
+ for (unsigned i = 1, e = ExitBlocks.size(); i != e; ++i)
+ if (ExitBlocks[i] != ExitBlocks[0]) {
+ UniqueExit = false;
+ break;
+ }
+ if (UniqueExit) {
SmallVector<BasicBlock*, 8> ExitingBlocks;
L->getExitingBlocks(ExitingBlocks);
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
@@ -251,7 +266,8 @@ ReprocessLoop:
Instruction *Inst = I++;
if (Inst == CI)
continue;
- if (!L->makeLoopInvariant(Inst, Changed, Preheader->getTerminator())) {
+ if (!L->makeLoopInvariant(Inst, Changed,
+ Preheader ? Preheader->getTerminator() : 0)) {
AllInvariant = false;
break;
}
@@ -303,8 +319,15 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
SmallVector<BasicBlock*, 8> OutsideBlocks;
for (pred_iterator PI = pred_begin(Header), PE = pred_end(Header);
PI != PE; ++PI)
- if (!L->contains(*PI)) // Coming in from outside the loop?
- OutsideBlocks.push_back(*PI); // Keep track of it...
+ if (!L->contains(*PI)) { // Coming in from outside the loop?
+ // If the loop is branched to from an indirect branch, we won't
+ // be able to fully transform the loop, because it prohibits
+ // edge splitting.
+ if (isa<IndirectBrInst>((*PI)->getTerminator())) return 0;
+
+ // Keep track of it.
+ OutsideBlocks.push_back(*PI);
+ }
// Split out the loop pre-header.
BasicBlock *NewBB =
@@ -324,8 +347,12 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
SmallVector<BasicBlock*, 8> LoopBlocks;
for (pred_iterator I = pred_begin(Exit), E = pred_end(Exit); I != E; ++I)
- if (L->contains(*I))
+ if (L->contains(*I)) {
+ // Don't do this if the loop is exited via an indirect branch.
+ if (isa<IndirectBrInst>((*I)->getTerminator())) return 0;
+
LoopBlocks.push_back(*I);
+ }
assert(!LoopBlocks.empty() && "No edges coming in from outside the loop?");
BasicBlock *NewBB = SplitBlockPredecessors(Exit, &LoopBlocks[0],
@@ -519,13 +546,18 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
/// backedges to target a new basic block and have that block branch to the loop
/// header. This ensures that loops have exactly one backedge.
///
-void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
+BasicBlock *
+LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
assert(L->getNumBackEdges() > 1 && "Must have > 1 backedge!");
// Get information about the loop
BasicBlock *Header = L->getHeader();
Function *F = Header->getParent();
+ // Unique backedge insertion currently depends on having a preheader.
+ if (!Preheader)
+ return 0;
+
// Figure out which basic blocks contain back-edges to the loop header.
std::vector<BasicBlock*> BackedgeBlocks;
for (pred_iterator I = pred_begin(Header), E = pred_end(Header); I != E; ++I)
@@ -612,4 +644,40 @@ void LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
DT->splitBlock(BEBlock);
if (DominanceFrontier *DF = getAnalysisIfAvailable<DominanceFrontier>())
DF->splitBlock(BEBlock);
+
+ return BEBlock;
+}
+
+void LoopSimplify::verifyAnalysis() const {
+ // It used to be possible to just assert L->isLoopSimplifyForm(), however
+ // with the introduction of indirectbr, there are now cases where it's
+ // not possible to transform a loop as necessary. We can at least check
+ // that there is an indirectbr near any time there's trouble.
+
+ // Indirectbr can interfere with preheader and unique backedge insertion.
+ if (!L->getLoopPreheader() || !L->getLoopLatch()) {
+ bool HasIndBrPred = false;
+ for (pred_iterator PI = pred_begin(L->getHeader()),
+ PE = pred_end(L->getHeader()); PI != PE; ++PI)
+ if (isa<IndirectBrInst>((*PI)->getTerminator())) {
+ HasIndBrPred = true;
+ break;
+ }
+ assert(HasIndBrPred &&
+ "LoopSimplify has no excuse for missing loop header info!");
+ }
+
+ // Indirectbr can interfere with exit block canonicalization.
+ if (!L->hasDedicatedExits()) {
+ bool HasIndBrExiting = false;
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i)
+ if (isa<IndirectBrInst>((ExitingBlocks[i])->getTerminator())) {
+ HasIndBrExiting = true;
+ break;
+ }
+ assert(HasIndBrExiting &&
+ "LoopSimplify has no excuse for missing exit block info!");
+ }
}
diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp
index d68427a..6232f32 100644
--- a/lib/Transforms/Utils/LoopUnroll.cpp
+++ b/lib/Transforms/Utils/LoopUnroll.cpp
@@ -108,8 +108,19 @@ static BasicBlock *FoldBlockIntoPredecessor(BasicBlock *BB, LoopInfo* LI) {
bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM) {
assert(L->isLCSSAForm());
- BasicBlock *Header = L->getHeader();
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) {
+ DEBUG(errs() << " Can't unroll; loop preheader-insertion failed.\n");
+ return false;
+ }
+
BasicBlock *LatchBlock = L->getLoopLatch();
+ if (!LatchBlock) {
+ DEBUG(errs() << " Can't unroll; loop exit-block-insertion failed.\n");
+ return false;
+ }
+
+ BasicBlock *Header = L->getHeader();
BranchInst *BI = dyn_cast<BranchInst>(LatchBlock->getTerminator());
if (!BI || BI->isUnconditional()) {
@@ -351,8 +362,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, LoopInfo* LI, LPPassManager* LPM)
if (isInstructionTriviallyDead(Inst))
(*BB)->getInstList().erase(Inst);
- else if (Constant *C = ConstantFoldInstruction(Inst,
- Header->getContext())) {
+ else if (Constant *C = ConstantFoldInstruction(Inst)) {
Inst->replaceAllUsesWith(C);
(*BB)->getInstList().erase(Inst);
}
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 8e1fb98..8dbc808 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -78,166 +78,6 @@ static void AddPredecessorToBlock(BasicBlock *Succ, BasicBlock *NewPred,
PN->addIncoming(PN->getIncomingValueForBlock(ExistPred), NewPred);
}
-/// CanPropagatePredecessorsForPHIs - Return true if we can fold BB, an
-/// almost-empty BB ending in an unconditional branch to Succ, into succ.
-///
-/// Assumption: Succ is the single successor for BB.
-///
-static bool CanPropagatePredecessorsForPHIs(BasicBlock *BB, BasicBlock *Succ) {
- assert(*succ_begin(BB) == Succ && "Succ is not successor of BB!");
-
- DEBUG(errs() << "Looking to fold " << BB->getName() << " into "
- << Succ->getName() << "\n");
- // Shortcut, if there is only a single predecessor it must be BB and merging
- // is always safe
- if (Succ->getSinglePredecessor()) return true;
-
- // Make a list of the predecessors of BB
- typedef SmallPtrSet<BasicBlock*, 16> BlockSet;
- BlockSet BBPreds(pred_begin(BB), pred_end(BB));
-
- // Use that list to make another list of common predecessors of BB and Succ
- BlockSet CommonPreds;
- for (pred_iterator PI = pred_begin(Succ), PE = pred_end(Succ);
- PI != PE; ++PI)
- if (BBPreds.count(*PI))
- CommonPreds.insert(*PI);
-
- // Shortcut, if there are no common predecessors, merging is always safe
- if (CommonPreds.empty())
- return true;
-
- // Look at all the phi nodes in Succ, to see if they present a conflict when
- // merging these blocks
- for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
-
- // If the incoming value from BB is again a PHINode in
- // BB which has the same incoming value for *PI as PN does, we can
- // merge the phi nodes and then the blocks can still be merged
- PHINode *BBPN = dyn_cast<PHINode>(PN->getIncomingValueForBlock(BB));
- if (BBPN && BBPN->getParent() == BB) {
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
- if (BBPN->getIncomingValueForBlock(*PI)
- != PN->getIncomingValueForBlock(*PI)) {
- DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in "
- << Succ->getName() << " is conflicting with "
- << BBPN->getName() << " with regard to common predecessor "
- << (*PI)->getName() << "\n");
- return false;
- }
- }
- } else {
- Value* Val = PN->getIncomingValueForBlock(BB);
- for (BlockSet::iterator PI = CommonPreds.begin(), PE = CommonPreds.end();
- PI != PE; PI++) {
- // See if the incoming value for the common predecessor is equal to the
- // one for BB, in which case this phi node will not prevent the merging
- // of the block.
- if (Val != PN->getIncomingValueForBlock(*PI)) {
- DEBUG(errs() << "Can't fold, phi node " << PN->getName() << " in "
- << Succ->getName() << " is conflicting with regard to common "
- << "predecessor " << (*PI)->getName() << "\n");
- return false;
- }
- }
- }
- }
-
- return true;
-}
-
-/// TryToSimplifyUncondBranchFromEmptyBlock - BB contains an unconditional
-/// branch to Succ, and contains no instructions other than PHI nodes and the
-/// branch. If possible, eliminate BB.
-static bool TryToSimplifyUncondBranchFromEmptyBlock(BasicBlock *BB,
- BasicBlock *Succ) {
- // Check to see if merging these blocks would cause conflicts for any of the
- // phi nodes in BB or Succ. If not, we can safely merge.
- if (!CanPropagatePredecessorsForPHIs(BB, Succ)) return false;
-
- // Check for cases where Succ has multiple predecessors and a PHI node in BB
- // has uses which will not disappear when the PHI nodes are merged. It is
- // possible to handle such cases, but difficult: it requires checking whether
- // BB dominates Succ, which is non-trivial to calculate in the case where
- // Succ has multiple predecessors. Also, it requires checking whether
- // constructing the necessary self-referential PHI node doesn't intoduce any
- // conflicts; this isn't too difficult, but the previous code for doing this
- // was incorrect.
- //
- // Note that if this check finds a live use, BB dominates Succ, so BB is
- // something like a loop pre-header (or rarely, a part of an irreducible CFG);
- // folding the branch isn't profitable in that case anyway.
- if (!Succ->getSinglePredecessor()) {
- BasicBlock::iterator BBI = BB->begin();
- while (isa<PHINode>(*BBI)) {
- for (Value::use_iterator UI = BBI->use_begin(), E = BBI->use_end();
- UI != E; ++UI) {
- if (PHINode* PN = dyn_cast<PHINode>(*UI)) {
- if (PN->getIncomingBlock(UI) != BB)
- return false;
- } else {
- return false;
- }
- }
- ++BBI;
- }
- }
-
- DEBUG(errs() << "Killing Trivial BB: \n" << *BB);
-
- if (isa<PHINode>(Succ->begin())) {
- // If there is more than one pred of succ, and there are PHI nodes in
- // the successor, then we need to add incoming edges for the PHI nodes
- //
- const SmallVector<BasicBlock*, 16> BBPreds(pred_begin(BB), pred_end(BB));
-
- // Loop over all of the PHI nodes in the successor of BB.
- for (BasicBlock::iterator I = Succ->begin(); isa<PHINode>(I); ++I) {
- PHINode *PN = cast<PHINode>(I);
- Value *OldVal = PN->removeIncomingValue(BB, false);
- assert(OldVal && "No entry in PHI for Pred BB!");
-
- // If this incoming value is one of the PHI nodes in BB, the new entries
- // in the PHI node are the entries from the old PHI.
- if (isa<PHINode>(OldVal) && cast<PHINode>(OldVal)->getParent() == BB) {
- PHINode *OldValPN = cast<PHINode>(OldVal);
- for (unsigned i = 0, e = OldValPN->getNumIncomingValues(); i != e; ++i)
- // Note that, since we are merging phi nodes and BB and Succ might
- // have common predecessors, we could end up with a phi node with
- // identical incoming branches. This will be cleaned up later (and
- // will trigger asserts if we try to clean it up now, without also
- // simplifying the corresponding conditional branch).
- PN->addIncoming(OldValPN->getIncomingValue(i),
- OldValPN->getIncomingBlock(i));
- } else {
- // Add an incoming value for each of the new incoming values.
- for (unsigned i = 0, e = BBPreds.size(); i != e; ++i)
- PN->addIncoming(OldVal, BBPreds[i]);
- }
- }
- }
-
- while (PHINode *PN = dyn_cast<PHINode>(&BB->front())) {
- if (Succ->getSinglePredecessor()) {
- // BB is the only predecessor of Succ, so Succ will end up with exactly
- // the same predecessors BB had.
- Succ->getInstList().splice(Succ->begin(),
- BB->getInstList(), BB->begin());
- } else {
- // We explicitly check for such uses in CanPropagatePredecessorsForPHIs.
- assert(PN->use_empty() && "There shouldn't be any uses here!");
- PN->eraseFromParent();
- }
- }
-
- // Everything that jumped to BB now goes to Succ.
- BB->replaceAllUsesWith(Succ);
- if (!Succ->hasName()) Succ->takeName(BB);
- BB->eraseFromParent(); // Delete the old basic block.
- return true;
-}
/// GetIfCondition - Given a basic block (BB) with two predecessors (and
/// presumably PHI nodes in it), check to see if the merge at this block is due
@@ -1217,7 +1057,7 @@ static bool FoldCondBranchOnPHI(BranchInst *BI) {
}
// Check for trivial simplification.
- if (Constant *C = ConstantFoldInstruction(N, BB->getContext())) {
+ if (Constant *C = ConstantFoldInstruction(N)) {
TranslateMap[BBI] = C;
delete N; // Constant folded away, don't need actual inst
} else {
@@ -1983,13 +1823,11 @@ bool llvm::SimplifyCFG(BasicBlock *BB) {
if (BI->isUnconditional()) {
BasicBlock::iterator BBI = BB->getFirstNonPHI();
- BasicBlock *Succ = BI->getSuccessor(0);
// Ignore dbg intrinsics.
while (isa<DbgInfoIntrinsic>(BBI))
++BBI;
- if (BBI->isTerminator() && // Terminator is the only non-phi instruction!
- Succ != BB) // Don't hurt infinite loops!
- if (TryToSimplifyUncondBranchFromEmptyBlock(BB, Succ))
+ if (BBI->isTerminator()) // Terminator is the only non-phi instruction!
+ if (TryToSimplifyUncondBranchFromEmptyBlock(BB))
return true;
} else { // Conditional branch
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 9a803a1..82d7914 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -1238,7 +1238,8 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
return;
}
- if (V->getValueID() == Value::PseudoSourceValueVal) {
+ if (V->getValueID() == Value::PseudoSourceValueVal ||
+ V->getValueID() == Value::FixedStackPseudoSourceValueVal) {
V->print(Out);
return;
}
@@ -1497,8 +1498,8 @@ static void PrintLinkage(GlobalValue::LinkageTypes LT,
case GlobalValue::AvailableExternallyLinkage:
Out << "available_externally ";
break;
- case GlobalValue::GhostLinkage:
- llvm_unreachable("GhostLinkage not allowed in AsmWriter!");
+ // This is invalid syntax and just a debugging aid.
+ case GlobalValue::GhostLinkage: Out << "ghost "; break;
}
}
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 000a063..c622558 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -318,7 +318,7 @@ Constant* ConstantInt::get(const Type* Ty, const APInt& V) {
return C;
}
-ConstantInt* ConstantInt::get(const IntegerType* Ty, const StringRef& Str,
+ConstantInt* ConstantInt::get(const IntegerType* Ty, StringRef Str,
uint8_t radix) {
return get(Ty->getContext(), APInt(Ty->getBitWidth(), Str, radix));
}
@@ -362,7 +362,7 @@ Constant* ConstantFP::get(const Type* Ty, double V) {
}
-Constant* ConstantFP::get(const Type* Ty, const StringRef& Str) {
+Constant* ConstantFP::get(const Type* Ty, StringRef Str) {
LLVMContext &Context = Ty->getContext();
APFloat FV(*TypeToFloatSemantics(Ty->getScalarType()), Str);
@@ -508,7 +508,7 @@ Constant* ConstantArray::get(const ArrayType* T, Constant* const* Vals,
/// Otherwise, the length parameter specifies how much of the string to use
/// and it won't be null terminated.
///
-Constant* ConstantArray::get(LLVMContext &Context, const StringRef &Str,
+Constant* ConstantArray::get(LLVMContext &Context, StringRef Str,
bool AddNull) {
std::vector<Constant*> ElementVals;
for (unsigned i = 0; i < Str.size(); ++i)
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index 1a34180..78cd4dc 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -1860,8 +1860,9 @@ LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
}
LLVMValueRef LLVMBuildIntCast(LLVMBuilderRef B, LLVMValueRef Val,
- LLVMTypeRef DestTy, const char *Name) {
- return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), Name));
+ LLVMTypeRef DestTy, int isSigned,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateIntCast(unwrap(Val), unwrap(DestTy), isSigned, Name));
}
LLVMValueRef LLVMBuildFPCast(LLVMBuilderRef B, LLVMValueRef Val,
@@ -1987,13 +1988,15 @@ int LLVMCreateMemoryBufferWithContentsOfFile(const char *Path,
int LLVMCreateMemoryBufferWithSTDIN(LLVMMemoryBufferRef *OutMemBuf,
char **OutMessage) {
- if (MemoryBuffer *MB = MemoryBuffer::getSTDIN()) {
- *OutMemBuf = wrap(MB);
- return 0;
+ MemoryBuffer *MB = MemoryBuffer::getSTDIN();
+ if (!MB->getBufferSize()) {
+ delete MB;
+ *OutMessage = strdup("stdin is empty.");
+ return 1;
}
-
- *OutMessage = strdup("stdin is empty.");
- return 1;
+
+ *OutMemBuf = wrap(MB);
+ return 0;
}
void LLVMDisposeMemoryBuffer(LLVMMemoryBufferRef MemBuf) {
diff --git a/lib/VMCore/Globals.cpp b/lib/VMCore/Globals.cpp
index 03ceecb..94bf3de 100644
--- a/lib/VMCore/Globals.cpp
+++ b/lib/VMCore/Globals.cpp
@@ -16,7 +16,6 @@
#include "llvm/GlobalVariable.h"
#include "llvm/GlobalAlias.h"
#include "llvm/DerivedTypes.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Module.h"
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/Support/ErrorHandling.h"
@@ -95,8 +94,7 @@ void GlobalValue::copyAttributesFrom(const GlobalValue *Src) {
// GlobalVariable Implementation
//===----------------------------------------------------------------------===//
-GlobalVariable::GlobalVariable(LLVMContext &Context, const Type *Ty,
- bool constant, LinkageTypes Link,
+GlobalVariable::GlobalVariable(const Type *Ty, bool constant, LinkageTypes Link,
Constant *InitVal, const Twine &Name,
bool ThreadLocal, unsigned AddressSpace)
: GlobalValue(PointerType::get(Ty, AddressSpace),
@@ -173,6 +171,21 @@ void GlobalVariable::replaceUsesOfWithOnConstant(Value *From, Value *To,
this->setOperand(0, cast<Constant>(To));
}
+void GlobalVariable::setInitializer(Constant *InitVal) {
+ if (InitVal == 0) {
+ if (hasInitializer()) {
+ Op<0>().set(0);
+ NumOperands = 0;
+ }
+ } else {
+ assert(InitVal->getType() == getType()->getElementType() &&
+ "Initializer type must match GlobalVariable type");
+ if (!hasInitializer())
+ NumOperands = 1;
+ Op<0>().set(InitVal);
+ }
+}
+
/// copyAttributesFrom - copy all additional attributes (those not needed to
/// create a GlobalVariable) from the GlobalVariable Src to this one.
void GlobalVariable::copyAttributesFrom(const GlobalValue *Src) {
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index 3a36a1b..16de1af 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -26,16 +26,16 @@ InlineAsm::~InlineAsm() {
// NOTE: when memoizing the function type, we have to be careful to handle the
// case when the type gets refined.
-InlineAsm *InlineAsm::get(const FunctionType *Ty, const StringRef &AsmString,
- const StringRef &Constraints, bool hasSideEffects,
+InlineAsm *InlineAsm::get(const FunctionType *Ty, StringRef AsmString,
+ StringRef Constraints, bool hasSideEffects,
bool isAlignStack) {
// FIXME: memoize!
return new InlineAsm(Ty, AsmString, Constraints, hasSideEffects,
isAlignStack);
}
-InlineAsm::InlineAsm(const FunctionType *Ty, const StringRef &asmString,
- const StringRef &constraints, bool hasSideEffects,
+InlineAsm::InlineAsm(const FunctionType *Ty, StringRef asmString,
+ StringRef constraints, bool hasSideEffects,
bool isAlignStack)
: Value(PointerType::getUnqual(Ty),
Value::InlineAsmVal),
@@ -54,7 +54,7 @@ const FunctionType *InlineAsm::getFunctionType() const {
/// Parse - Analyze the specified string (e.g. "==&{eax}") and fill in the
/// fields in this structure. If the constraint string is not understood,
/// return true, otherwise return false.
-bool InlineAsm::ConstraintInfo::Parse(const StringRef &Str,
+bool InlineAsm::ConstraintInfo::Parse(StringRef Str,
std::vector<InlineAsm::ConstraintInfo> &ConstraintsSoFar) {
StringRef::iterator I = Str.begin(), E = Str.end();
@@ -149,7 +149,7 @@ bool InlineAsm::ConstraintInfo::Parse(const StringRef &Str,
}
std::vector<InlineAsm::ConstraintInfo>
-InlineAsm::ParseConstraints(const StringRef &Constraints) {
+InlineAsm::ParseConstraints(StringRef Constraints) {
std::vector<ConstraintInfo> Result;
// Scan the constraints string.
@@ -183,7 +183,7 @@ InlineAsm::ParseConstraints(const StringRef &Constraints) {
/// Verify - Verify that the specified constraint string is reasonable for the
/// specified function type, and otherwise validate the constraint string.
-bool InlineAsm::Verify(const FunctionType *Ty, const StringRef &ConstStr) {
+bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
if (Ty->isVarArg()) return false;
std::vector<ConstraintInfo> Constraints = ParseConstraints(ConstStr);
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 279bc73..b03ee93 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -24,8 +24,6 @@
#include "llvm/Support/CallSite.h"
#include "llvm/Support/ConstantRange.h"
#include "llvm/Support/MathExtras.h"
-#include "llvm/Target/TargetData.h"
-
using namespace llvm;
//===----------------------------------------------------------------------===//
@@ -465,9 +463,11 @@ static Instruction *createMalloc(Instruction *InsertBefore,
ArraySize = ConstantInt::get(IntPtrTy, 1);
else if (ArraySize->getType() != IntPtrTy) {
if (InsertBefore)
- ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertBefore);
+ ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+ "", InsertBefore);
else
- ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false, "", InsertAtEnd);
+ ArraySize = CastInst::CreateIntegerCast(ArraySize, IntPtrTy, false,
+ "", InsertAtEnd);
}
if (!IsConstantOne(ArraySize)) {
@@ -494,22 +494,21 @@ static Instruction *createMalloc(Instruction *InsertBefore,
BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
Module* M = BB->getParent()->getParent();
const Type *BPTy = Type::getInt8PtrTy(BB->getContext());
- if (!MallocF)
+ Value *MallocFunc = MallocF;
+ if (!MallocFunc)
// prototype malloc as "void *malloc(size_t)"
- MallocF = cast<Function>(M->getOrInsertFunction("malloc", BPTy,
- IntPtrTy, NULL));
- if (!MallocF->doesNotAlias(0)) MallocF->setDoesNotAlias(0);
+ MallocFunc = M->getOrInsertFunction("malloc", BPTy, IntPtrTy, NULL);
const PointerType *AllocPtrType = PointerType::getUnqual(AllocTy);
CallInst *MCall = NULL;
Instruction *Result = NULL;
if (InsertBefore) {
- MCall = CallInst::Create(MallocF, AllocSize, "malloccall", InsertBefore);
+ MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall", InsertBefore);
Result = MCall;
if (Result->getType() != AllocPtrType)
// Create a cast instruction to convert to the right type...
Result = new BitCastInst(MCall, AllocPtrType, Name, InsertBefore);
} else {
- MCall = CallInst::Create(MallocF, AllocSize, "malloccall");
+ MCall = CallInst::Create(MallocFunc, AllocSize, "malloccall");
Result = MCall;
if (Result->getType() != AllocPtrType) {
InsertAtEnd->getInstList().push_back(MCall);
@@ -518,6 +517,10 @@ static Instruction *createMalloc(Instruction *InsertBefore,
}
}
MCall->setTailCall();
+ if (Function *F = dyn_cast<Function>(MallocFunc)) {
+ MCall->setCallingConv(F->getCallingConv());
+ if (!F->doesNotAlias(0)) F->setDoesNotAlias(0);
+ }
assert(MCall->getType() != Type::getVoidTy(BB->getContext()) &&
"Malloc has void return type");
@@ -567,8 +570,7 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
const Type *VoidTy = Type::getVoidTy(M->getContext());
const Type *IntPtrTy = Type::getInt8PtrTy(M->getContext());
// prototype free as "void free(void*)"
- Constant *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
-
+ Value *FreeFunc = M->getOrInsertFunction("free", VoidTy, IntPtrTy, NULL);
CallInst* Result = NULL;
Value *PtrCast = Source;
if (InsertBefore) {
@@ -581,6 +583,8 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
Result = CallInst::Create(FreeFunc, PtrCast, "");
}
Result->setTailCall();
+ if (Function *F = dyn_cast<Function>(FreeFunc))
+ Result->setCallingConv(F->getCallingConv());
return Result;
}
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 4fadfed..24e715b 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -39,6 +39,17 @@ MDString *MDString::get(LLVMContext &Context, StringRef Str) {
new MDString(Context, Entry.getKey());
}
+MDString *MDString::get(LLVMContext &Context, const char *Str) {
+ LLVMContextImpl *pImpl = Context.pImpl;
+ StringMapEntry<MDString *> &Entry =
+ pImpl->MDStringCache.GetOrCreateValue(Str ? StringRef(Str) : StringRef());
+ MDString *&S = Entry.getValue();
+ if (S) return S;
+
+ return S =
+ new MDString(Context, Entry.getKey());
+}
+
//===----------------------------------------------------------------------===//
// MDNode implementation.
//
@@ -341,11 +352,11 @@ MDNode *MetadataContextImpl::getMD(unsigned MDKind, const Instruction *Inst) {
/// getMDs - Get the metadata attached to an Instruction.
void MetadataContextImpl::
getMDs(const Instruction *Inst, SmallVectorImpl<MDPairTy> &MDs) const {
- MDStoreTy::iterator I = MetadataStore.find(Inst);
+ MDStoreTy::const_iterator I = MetadataStore.find(Inst);
if (I == MetadataStore.end())
return;
MDs.resize(I->second.size());
- for (MDMapTy::iterator MI = I->second.begin(), ME = I->second.end();
+ for (MDMapTy::const_iterator MI = I->second.begin(), ME = I->second.end();
MI != ME; ++MI)
// MD kinds are numbered from 1.
MDs[MI->first - 1] = std::make_pair(MI->first, MI->second);
diff --git a/lib/VMCore/Module.cpp b/lib/VMCore/Module.cpp
index add2449..3efd3e3 100644
--- a/lib/VMCore/Module.cpp
+++ b/lib/VMCore/Module.cpp
@@ -31,8 +31,7 @@ using namespace llvm;
//
GlobalVariable *ilist_traits<GlobalVariable>::createSentinel() {
- GlobalVariable *Ret = new GlobalVariable(getGlobalContext(),
- Type::getInt32Ty(getGlobalContext()),
+ GlobalVariable *Ret = new GlobalVariable(Type::getInt32Ty(getGlobalContext()),
false, GlobalValue::ExternalLinkage);
// This should not be garbage monitored.
LeakDetector::removeGarbageObject(Ret);
@@ -56,7 +55,7 @@ template class SymbolTableListTraits<GlobalAlias, Module>;
// Primitive Module methods.
//
-Module::Module(const StringRef &MID, LLVMContext& C)
+Module::Module(StringRef MID, LLVMContext& C)
: Context(C), ModuleID(MID), DataLayout("") {
ValSymTab = new ValueSymbolTable();
TypeSymTab = new TypeSymbolTable();
@@ -115,7 +114,7 @@ Module::PointerSize Module::getPointerSize() const {
/// getNamedValue - Return the first global value in the module with
/// the specified name, of arbitrary type. This method returns null
/// if a global with the specified name is not found.
-GlobalValue *Module::getNamedValue(const StringRef &Name) const {
+GlobalValue *Module::getNamedValue(StringRef Name) const {
return cast_or_null<GlobalValue>(getValueSymbolTable().lookup(Name));
}
@@ -128,7 +127,7 @@ GlobalValue *Module::getNamedValue(const StringRef &Name) const {
// it. This is nice because it allows most passes to get away with not handling
// the symbol table directly for this common task.
//
-Constant *Module::getOrInsertFunction(const StringRef &Name,
+Constant *Module::getOrInsertFunction(StringRef Name,
const FunctionType *Ty,
AttrListPtr AttributeList) {
// See if we have a definition for the specified function already.
@@ -161,7 +160,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name,
return F;
}
-Constant *Module::getOrInsertTargetIntrinsic(const StringRef &Name,
+Constant *Module::getOrInsertTargetIntrinsic(StringRef Name,
const FunctionType *Ty,
AttrListPtr AttributeList) {
// See if we have a definition for the specified function already.
@@ -178,7 +177,7 @@ Constant *Module::getOrInsertTargetIntrinsic(const StringRef &Name,
return F;
}
-Constant *Module::getOrInsertFunction(const StringRef &Name,
+Constant *Module::getOrInsertFunction(StringRef Name,
const FunctionType *Ty) {
AttrListPtr AttributeList = AttrListPtr::get((AttributeWithIndex *)0, 0);
return getOrInsertFunction(Name, Ty, AttributeList);
@@ -189,7 +188,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name,
// This version of the method takes a null terminated list of function
// arguments, which makes it easier for clients to use.
//
-Constant *Module::getOrInsertFunction(const StringRef &Name,
+Constant *Module::getOrInsertFunction(StringRef Name,
AttrListPtr AttributeList,
const Type *RetTy, ...) {
va_list Args;
@@ -208,7 +207,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name,
AttributeList);
}
-Constant *Module::getOrInsertFunction(const StringRef &Name,
+Constant *Module::getOrInsertFunction(StringRef Name,
const Type *RetTy, ...) {
va_list Args;
va_start(Args, RetTy);
@@ -229,7 +228,7 @@ Constant *Module::getOrInsertFunction(const StringRef &Name,
// getFunction - Look up the specified function in the module symbol table.
// If it does not exist, return null.
//
-Function *Module::getFunction(const StringRef &Name) const {
+Function *Module::getFunction(StringRef Name) const {
return dyn_cast_or_null<Function>(getNamedValue(Name));
}
@@ -244,7 +243,7 @@ Function *Module::getFunction(const StringRef &Name) const {
/// If AllowLocal is set to true, this function will return types that
/// have an local. By default, these types are not returned.
///
-GlobalVariable *Module::getGlobalVariable(const StringRef &Name,
+GlobalVariable *Module::getGlobalVariable(StringRef Name,
bool AllowLocal) const {
if (GlobalVariable *Result =
dyn_cast_or_null<GlobalVariable>(getNamedValue(Name)))
@@ -259,7 +258,7 @@ GlobalVariable *Module::getGlobalVariable(const StringRef &Name,
/// with a constantexpr cast to the right type.
/// 3. Finally, if the existing global is the correct delclaration, return the
/// existing global.
-Constant *Module::getOrInsertGlobal(const StringRef &Name, const Type *Ty) {
+Constant *Module::getOrInsertGlobal(StringRef Name, const Type *Ty) {
// See if we have a definition for the specified global already.
GlobalVariable *GV = dyn_cast_or_null<GlobalVariable>(getNamedValue(Name));
if (GV == 0) {
@@ -286,21 +285,21 @@ Constant *Module::getOrInsertGlobal(const StringRef &Name, const Type *Ty) {
// getNamedAlias - Look up the specified global in the module symbol table.
// If it does not exist, return null.
//
-GlobalAlias *Module::getNamedAlias(const StringRef &Name) const {
+GlobalAlias *Module::getNamedAlias(StringRef Name) const {
return dyn_cast_or_null<GlobalAlias>(getNamedValue(Name));
}
/// getNamedMetadata - Return the first NamedMDNode in the module with the
/// specified name. This method returns null if a NamedMDNode with the
//// specified name is not found.
-NamedMDNode *Module::getNamedMetadata(const StringRef &Name) const {
+NamedMDNode *Module::getNamedMetadata(StringRef Name) const {
return dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name));
}
/// getOrInsertNamedMetadata - Return the first named MDNode in the module
/// with the specified name. This method returns a new NamedMDNode if a
/// NamedMDNode with the specified name is not found.
-NamedMDNode *Module::getOrInsertNamedMetadata(const StringRef &Name) {
+NamedMDNode *Module::getOrInsertNamedMetadata(StringRef Name) {
NamedMDNode *NMD =
dyn_cast_or_null<NamedMDNode>(getValueSymbolTable().lookup(Name));
if (!NMD)
@@ -317,7 +316,7 @@ NamedMDNode *Module::getOrInsertNamedMetadata(const StringRef &Name) {
// there is already an entry for this name, true is returned and the symbol
// table is not modified.
//
-bool Module::addTypeName(const StringRef &Name, const Type *Ty) {
+bool Module::addTypeName(StringRef Name, const Type *Ty) {
TypeSymbolTable &ST = getTypeSymbolTable();
if (ST.lookup(Name)) return true; // Already in symtab...
@@ -331,7 +330,7 @@ bool Module::addTypeName(const StringRef &Name, const Type *Ty) {
/// getTypeByName - Return the type with the specified name in this module, or
/// null if there is none by that name.
-const Type *Module::getTypeByName(const StringRef &Name) const {
+const Type *Module::getTypeByName(StringRef Name) const {
const TypeSymbolTable &ST = getTypeSymbolTable();
return cast_or_null<Type>(ST.lookup(Name));
}
@@ -377,14 +376,14 @@ void Module::dropAllReferences() {
I->dropAllReferences();
}
-void Module::addLibrary(const StringRef& Lib) {
+void Module::addLibrary(StringRef Lib) {
for (Module::lib_iterator I = lib_begin(), E = lib_end(); I != E; ++I)
if (*I == Lib)
return;
LibraryList.push_back(Lib);
}
-void Module::removeLibrary(const StringRef& Lib) {
+void Module::removeLibrary(StringRef Lib) {
LibraryListType::iterator I = LibraryList.begin();
LibraryListType::iterator E = LibraryList.end();
for (;I != E; ++I)
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index a17eed8..1232fe2 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -149,7 +149,7 @@ public:
return I != PassInfoMap.end() ? I->second : 0;
}
- const PassInfo *GetPassInfo(const StringRef &Arg) const {
+ const PassInfo *GetPassInfo(StringRef Arg) const {
StringMapType::const_iterator I = PassInfoStringMap.find(Arg);
return I != PassInfoStringMap.end() ? I->second : 0;
}
@@ -238,7 +238,7 @@ const PassInfo *Pass::lookupPassInfo(intptr_t TI) {
return getPassRegistrar()->GetPassInfo(TI);
}
-const PassInfo *Pass::lookupPassInfo(const StringRef &Arg) {
+const PassInfo *Pass::lookupPassInfo(StringRef Arg) {
return getPassRegistrar()->GetPassInfo(Arg);
}
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index eb097ed..d3d61f5 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -746,7 +746,7 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
}
/// Remove analysis passes that are not used any longer
-void PMDataManager::removeDeadPasses(Pass *P, const StringRef &Msg,
+void PMDataManager::removeDeadPasses(Pass *P, StringRef Msg,
enum PassDebuggingString DBG_STR) {
SmallVector<Pass *, 12> DeadPasses;
@@ -768,7 +768,7 @@ void PMDataManager::removeDeadPasses(Pass *P, const StringRef &Msg,
freePass(*I, Msg, DBG_STR);
}
-void PMDataManager::freePass(Pass *P, const StringRef &Msg,
+void PMDataManager::freePass(Pass *P, StringRef Msg,
enum PassDebuggingString DBG_STR) {
dumpPassInfo(P, FREEING_MSG, DBG_STR, Msg);
@@ -972,7 +972,7 @@ void PMDataManager::dumpPassArguments() const {
void PMDataManager::dumpPassInfo(Pass *P, enum PassDebuggingString S1,
enum PassDebuggingString S2,
- const StringRef &Msg) {
+ StringRef Msg) {
if (PassDebugging < Executions)
return;
errs() << (void*)this << std::string(getDepth()*2+1, ' ');
@@ -1028,7 +1028,7 @@ void PMDataManager::dumpPreservedSet(const Pass *P) const {
dumpAnalysisUsage("Preserved", P, analysisUsage.getPreservedSet());
}
-void PMDataManager::dumpAnalysisUsage(const StringRef &Msg, const Pass *P,
+void PMDataManager::dumpAnalysisUsage(StringRef Msg, const Pass *P,
const AnalysisUsage::VectorType &Set) const {
assert(PassDebugging >= Details);
if (Set.empty())
diff --git a/lib/VMCore/TypeSymbolTable.cpp b/lib/VMCore/TypeSymbolTable.cpp
index 3440a77..0d0cdf5 100644
--- a/lib/VMCore/TypeSymbolTable.cpp
+++ b/lib/VMCore/TypeSymbolTable.cpp
@@ -31,7 +31,7 @@ TypeSymbolTable::~TypeSymbolTable() {
}
}
-std::string TypeSymbolTable::getUniqueName(const StringRef &BaseName) const {
+std::string TypeSymbolTable::getUniqueName(StringRef BaseName) const {
std::string TryName = BaseName;
const_iterator End = tmap.end();
@@ -43,7 +43,7 @@ std::string TypeSymbolTable::getUniqueName(const StringRef &BaseName) const {
}
// lookup a type by name - returns null on failure
-Type* TypeSymbolTable::lookup(const StringRef &Name) const {
+Type* TypeSymbolTable::lookup(StringRef Name) const {
const_iterator TI = tmap.find(Name);
Type* result = 0;
if (TI != tmap.end())
@@ -51,7 +51,6 @@ Type* TypeSymbolTable::lookup(const StringRef &Name) const {
return result;
}
-
// remove - Remove a type from the symbol table...
Type* TypeSymbolTable::remove(iterator Entry) {
assert(Entry != tmap.end() && "Invalid entry to remove!");
@@ -80,7 +79,7 @@ Type* TypeSymbolTable::remove(iterator Entry) {
// insert - Insert a type into the symbol table with the specified name...
-void TypeSymbolTable::insert(const StringRef &Name, const Type* T) {
+void TypeSymbolTable::insert(StringRef Name, const Type* T) {
assert(T && "Can't insert null type into symbol table!");
if (tmap.insert(std::make_pair(Name, T)).second) {
diff --git a/lib/VMCore/ValueSymbolTable.cpp b/lib/VMCore/ValueSymbolTable.cpp
index 7765a98..9d39a50 100644
--- a/lib/VMCore/ValueSymbolTable.cpp
+++ b/lib/VMCore/ValueSymbolTable.cpp
@@ -77,7 +77,7 @@ void ValueSymbolTable::removeValueName(ValueName *V) {
/// createValueName - This method attempts to create a value name and insert
/// it into the symbol table with the specified name. If it conflicts, it
/// auto-renames the name and returns that instead.
-ValueName *ValueSymbolTable::createValueName(const StringRef &Name, Value *V) {
+ValueName *ValueSymbolTable::createValueName(StringRef Name, Value *V) {
// In the common case, the name is not already in the symbol table.
ValueName &Entry = vmap.GetOrCreateValue(Name);
if (Entry.getValue() == 0) {
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 5990e48..7ab7b15 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -780,9 +780,13 @@ void Verifier::visitSwitchInst(SwitchInst &SI) {
// Check to make sure that all of the constants in the switch instruction
// have the same type as the switched-on value.
const Type *SwitchTy = SI.getCondition()->getType();
- for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i)
+ SmallPtrSet<ConstantInt*, 32> Constants;
+ for (unsigned i = 1, e = SI.getNumCases(); i != e; ++i) {
Assert1(SI.getCaseValue(i)->getType() == SwitchTy,
"Switch constants must all be same type as switch value!", &SI);
+ Assert2(Constants.insert(SI.getCaseValue(i)),
+ "Duplicate integer as switch case", &SI, SI.getCaseValue(i));
+ }
visitTerminatorInst(SI);
}
OpenPOWER on IntegriCloud