summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2010-03-03 17:27:15 +0000
committerrdivacky <rdivacky@FreeBSD.org>2010-03-03 17:27:15 +0000
commit8230c40430a1325b5cc5bc0221931487b4bd573c (patch)
tree836a05cff50ca46176117b86029f061fa4db54f0 /lib
parentf25ddd991a5601d0101602c4c263a58c7af4b8a2 (diff)
downloadFreeBSD-src-8230c40430a1325b5cc5bc0221931487b4bd573c.zip
FreeBSD-src-8230c40430a1325b5cc5bc0221931487b4bd573c.tar.gz
Update LLVM to 97654.
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/AliasAnalysisCounter.cpp2
-rw-r--r--lib/Analysis/AliasAnalysisEvaluator.cpp6
-rw-r--r--lib/Analysis/BasicAliasAnalysis.cpp4
-rw-r--r--lib/Analysis/CaptureTracking.cpp2
-rw-r--r--lib/Analysis/ConstantFolding.cpp53
-rw-r--r--lib/Analysis/DebugInfo.cpp9
-rw-r--r--lib/Analysis/IPA/Andersens.cpp2868
-rw-r--r--lib/Analysis/IPA/CMakeLists.txt1
-rw-r--r--lib/Analysis/IPA/GlobalsModRef.cpp4
-rw-r--r--lib/Analysis/IVUsers.cpp9
-rw-r--r--lib/Analysis/InlineCost.cpp4
-rw-r--r--lib/Analysis/InstructionSimplify.cpp26
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp15
-rw-r--r--lib/Analysis/PHITransAddr.cpp60
-rw-r--r--lib/Analysis/PointerTracking.cpp2
-rw-r--r--lib/Analysis/ScalarEvolution.cpp214
-rw-r--r--lib/Analysis/ScalarEvolutionAliasAnalysis.cpp8
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp298
-rw-r--r--lib/Analysis/ValueTracking.cpp12
-rw-r--r--lib/AsmParser/LLParser.cpp48
-rw-r--r--lib/Bitcode/Reader/BitReader.cpp46
-rw-r--r--lib/Bitcode/Reader/BitcodeReader.cpp13
-rw-r--r--lib/Bitcode/Writer/ValueEnumerator.cpp5
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp42
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp17
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp79
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.cpp16
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfPrinter.h3
-rw-r--r--lib/CodeGen/CMakeLists.txt1
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp5
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp15
-rw-r--r--lib/CodeGen/DeadMachineInstructionElim.cpp2
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp33
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp139
-rw-r--r--lib/CodeGen/LiveVariables.cpp1
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp30
-rw-r--r--lib/CodeGen/MachineCSE.cpp208
-rw-r--r--lib/CodeGen/MachineFunction.cpp3
-rw-r--r--lib/CodeGen/MachineInstr.cpp33
-rw-r--r--lib/CodeGen/MachineLICM.cpp45
-rw-r--r--lib/CodeGen/MachineSink.cpp2
-rw-r--r--lib/CodeGen/PBQP/HeuristicSolver.h4
-rw-r--r--lib/CodeGen/PHIElimination.cpp10
-rw-r--r--lib/CodeGen/PHIElimination.h43
-rw-r--r--lib/CodeGen/Passes.cpp2
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp7
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp4
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp6
-rw-r--r--lib/CodeGen/ScheduleDAGInstrs.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp73
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp28
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp11
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp16
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp220
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp125
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp1294
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp51
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp23
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp36
-rw-r--r--lib/CodeGen/TargetLoweringObjectFileImpl.cpp34
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp4
-rw-r--r--lib/CodeGen/VirtRegMap.cpp6
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp2
-rw-r--r--lib/CompilerDriver/CompilationGraph.cpp5
-rw-r--r--lib/CompilerDriver/Main.cpp7
-rw-r--r--lib/CompilerDriver/Tool.cpp21
-rw-r--r--lib/ExecutionEngine/ExecutionEngine.cpp4
-rw-r--r--lib/ExecutionEngine/ExecutionEngineBindings.cpp77
-rw-r--r--lib/ExecutionEngine/Interpreter/Execution.cpp10
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp6
-rw-r--r--lib/Linker/LinkModules.cpp2
-rw-r--r--lib/MC/CMakeLists.txt1
-rw-r--r--lib/MC/MCAsmStreamer.cpp10
-rw-r--r--lib/MC/MCAssembler.cpp82
-rw-r--r--lib/MC/MCMachOStreamer.cpp21
-rw-r--r--lib/MC/MCNullStreamer.cpp3
-rw-r--r--lib/MC/MCParser/AsmParser.cpp26
-rw-r--r--lib/MC/TargetAsmBackend.cpp19
-rw-r--r--lib/Support/APFloat.cpp86
-rw-r--r--lib/Support/APInt.cpp10
-rw-r--r--lib/Support/CommandLine.cpp14
-rw-r--r--lib/Support/FormattedStream.cpp2
-rw-r--r--lib/Support/GraphWriter.cpp2
-rw-r--r--lib/Support/MemoryBuffer.cpp3
-rw-r--r--lib/Support/Regex.cpp76
-rw-r--r--lib/Support/StringRef.cpp105
-rw-r--r--lib/Support/Triple.cpp9
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp24
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h4
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp107
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.h1
-rw-r--r--lib/Target/ARM/ARMISelDAGToDAG.cpp7
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp111
-rw-r--r--lib/Target/ARM/ARMISelLowering.h6
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td74
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td708
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td803
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td118
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td677
-rw-r--r--lib/Target/ARM/ARMInstrVFP.td2
-rw-r--r--lib/Target/ARM/ARMJITInfo.cpp4
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp10
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp7
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp6
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMInstPrinter.h1
-rw-r--r--lib/Target/ARM/README.txt2
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp6
-rw-r--r--lib/Target/Alpha/AlphaISelDAGToDAG.cpp15
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.td6
-rw-r--r--lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp10
-rw-r--r--lib/Target/CBackend/CBackend.cpp247
-rw-r--r--lib/Target/CBackend/CTargetMachine.h3
-rw-r--r--lib/Target/CellSPU/SPU64InstrInfo.td8
-rw-r--r--lib/Target/CellSPU/SPUISelDAGToDAG.cpp129
-rw-r--r--lib/Target/CellSPU/SPUISelLowering.cpp3
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp35
-rw-r--r--lib/Target/CppBackend/CPPTargetMachine.h3
-rw-r--r--lib/Target/MBlaze/AsmPrinter/CMakeLists.txt9
-rw-r--r--lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp302
-rw-r--r--lib/Target/MBlaze/AsmPrinter/Makefile17
-rw-r--r--lib/Target/MBlaze/CMakeLists.txt27
-rw-r--r--lib/Target/MBlaze/MBlaze.h39
-rw-r--r--lib/Target/MBlaze/MBlaze.td85
-rw-r--r--lib/Target/MBlaze/MBlazeCallingConv.td41
-rw-r--r--lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp75
-rw-r--r--lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp339
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.cpp881
-rw-r--r--lib/Target/MBlaze/MBlazeISelLowering.h146
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFPU.td223
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFSL.td153
-rw-r--r--lib/Target/MBlaze/MBlazeInstrFormats.td246
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.cpp222
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.h242
-rw-r--r--lib/Target/MBlaze/MBlazeInstrInfo.td672
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp109
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsicInfo.h33
-rw-r--r--lib/Target/MBlaze/MBlazeIntrinsics.td137
-rw-r--r--lib/Target/MBlaze/MBlazeMCAsmInfo.cpp27
-rw-r--r--lib/Target/MBlaze/MBlazeMCAsmInfo.h30
-rw-r--r--lib/Target/MBlaze/MBlazeMachineFunction.h136
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.cpp378
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.h90
-rw-r--r--lib/Target/MBlaze/MBlazeRegisterInfo.td186
-rw-r--r--lib/Target/MBlaze/MBlazeSchedule.td63
-rw-r--r--lib/Target/MBlaze/MBlazeSubtarget.cpp31
-rw-r--r--lib/Target/MBlaze/MBlazeSubtarget.h79
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.cpp66
-rw-r--r--lib/Target/MBlaze/MBlazeTargetMachine.h69
-rw-r--r--lib/Target/MBlaze/MBlazeTargetObjectFile.cpp88
-rw-r--r--lib/Target/MBlaze/MBlazeTargetObjectFile.h41
-rw-r--r--lib/Target/MBlaze/Makefile23
-rw-r--r--lib/Target/MBlaze/TargetInfo/CMakeLists.txt7
-rw-r--r--lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp19
-rw-r--r--lib/Target/MBlaze/TargetInfo/Makefile15
-rw-r--r--lib/Target/MSIL/MSILWriter.cpp10
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp327
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp13
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td2
-rw-r--r--lib/Target/Mips/MipsISelDAGToDAG.cpp35
-rw-r--r--lib/Target/Mips/MipsInstrInfo.td9
-rw-r--r--lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp22
-rw-r--r--lib/Target/PIC16/PIC16ABINames.h55
-rw-r--r--lib/Target/PIC16/PIC16DebugInfo.cpp2
-rw-r--r--lib/Target/PIC16/PIC16ISelDAGToDAG.cpp10
-rw-r--r--lib/Target/PIC16/PIC16ISelDAGToDAG.h4
-rw-r--r--lib/Target/PIC16/PIC16ISelLowering.cpp25
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp299
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Cloner.h83
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp26
-rw-r--r--lib/Target/PIC16/PIC16Passes/PIC16Overlay.h23
-rw-r--r--lib/Target/PowerPC/PPCHazardRecognizers.cpp2
-rw-r--r--lib/Target/PowerPC/PPCISelDAGToDAG.cpp12
-rw-r--r--lib/Target/PowerPC/PPCISelLowering.cpp2
-rw-r--r--lib/Target/PowerPC/PPCInstr64Bit.td11
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp50
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.td28
-rw-r--r--lib/Target/PowerPC/PPCRegisterInfo.td12
-rw-r--r--lib/Target/PowerPC/README.txt2
-rw-r--r--lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp46
-rw-r--r--lib/Target/Sparc/README.txt1
-rw-r--r--lib/Target/Sparc/SparcISelDAGToDAG.cpp16
-rw-r--r--lib/Target/Sparc/SparcMachineFunctionInfo.h2
-rw-r--r--lib/Target/SystemZ/SystemZISelDAGToDAG.cpp56
-rw-r--r--lib/Target/SystemZ/SystemZInstrFP.td2
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.td2
-rw-r--r--lib/Target/SystemZ/SystemZMachineFunctionInfo.h3
-rw-r--r--lib/Target/TargetData.cpp2
-rw-r--r--lib/Target/TargetLoweringObjectFile.cpp2
-rw-r--r--lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp2
-rw-r--r--lib/Target/X86/CMakeLists.txt1
-rw-r--r--lib/Target/X86/README-SSE.txt8
-rw-r--r--lib/Target/X86/README.txt5
-rw-r--r--lib/Target/X86/X86.h11
-rw-r--r--lib/Target/X86/X86AsmBackend.cpp34
-rw-r--r--lib/Target/X86/X86FastISel.cpp2
-rw-r--r--lib/Target/X86/X86ISelDAGToDAG.cpp383
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp286
-rw-r--r--lib/Target/X86/X86ISelLowering.h2
-rw-r--r--lib/Target/X86/X86Instr64bit.td36
-rw-r--r--lib/Target/X86/X86InstrFPStack.td2
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp22
-rw-r--r--lib/Target/X86/X86InstrInfo.td173
-rw-r--r--lib/Target/X86/X86InstrMMX.td17
-rw-r--r--lib/Target/X86/X86InstrSSE.td416
-rw-r--r--lib/Target/X86/X86MCAsmInfo.cpp11
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp10
-rw-r--r--lib/Target/X86/X86RegisterInfo.h3
-rw-r--r--lib/Target/X86/X86RegisterInfo.td63
-rw-r--r--lib/Target/X86/X86Subtarget.h23
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp6
-rw-r--r--lib/Target/X86/X86TargetObjectFile.cpp64
-rw-r--r--lib/Target/X86/X86TargetObjectFile.h22
-rw-r--r--lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp23
-rw-r--r--lib/Target/XCore/XCoreISelDAGToDAG.cpp17
-rw-r--r--lib/Target/XCore/XCoreISelLowering.cpp43
-rw-r--r--lib/Target/XCore/XCoreISelLowering.h10
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp13
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.td38
-rw-r--r--lib/Transforms/Hello/Hello.cpp1
-rw-r--r--lib/Transforms/IPO/ArgumentPromotion.cpp6
-rw-r--r--lib/Transforms/IPO/DeadArgumentElimination.cpp4
-rw-r--r--lib/Transforms/IPO/FunctionAttrs.cpp8
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp174
-rw-r--r--lib/Transforms/InstCombine/InstCombineAddSub.cpp6
-rw-r--r--lib/Transforms/InstCombine/InstCombineAndOrXor.cpp10
-rw-r--r--lib/Transforms/InstCombine/InstCombineCalls.cpp23
-rw-r--r--lib/Transforms/InstCombine/InstCombineCasts.cpp22
-rw-r--r--lib/Transforms/InstCombine/InstCombineCompares.cpp22
-rw-r--r--lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp28
-rw-r--r--lib/Transforms/InstCombine/InstCombineMulDivRem.cpp8
-rw-r--r--lib/Transforms/InstCombine/InstCombinePHI.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineSelect.cpp28
-rw-r--r--lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstCombineVectorOps.cpp4
-rw-r--r--lib/Transforms/InstCombine/InstructionCombining.cpp8
-rw-r--r--lib/Transforms/Scalar/ABCD.cpp4
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp4
-rw-r--r--lib/Transforms/Scalar/GVN.cpp135
-rw-r--r--lib/Transforms/Scalar/IndVarSimplify.cpp74
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp4
-rw-r--r--lib/Transforms/Scalar/LICM.cpp4
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp364
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp2
-rw-r--r--lib/Transforms/Scalar/Reassociate.cpp2
-rw-r--r--lib/Transforms/Scalar/SCCP.cpp58
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp36
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp318
-rw-r--r--lib/Transforms/Utils/AddrModeMatcher.cpp8
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp29
-rw-r--r--lib/Transforms/Utils/Local.cpp4
-rw-r--r--lib/Transforms/Utils/LoopSimplify.cpp48
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp12
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp10
-rw-r--r--lib/VMCore/AsmWriter.cpp38
-rw-r--r--lib/VMCore/Attributes.cpp2
-rw-r--r--lib/VMCore/ConstantFold.cpp97
-rw-r--r--lib/VMCore/Constants.cpp67
-rw-r--r--lib/VMCore/Core.cpp196
-rw-r--r--lib/VMCore/Function.cpp12
-rw-r--r--lib/VMCore/InlineAsm.cpp2
-rw-r--r--lib/VMCore/Instructions.cpp62
-rw-r--r--lib/VMCore/LLVMContextImpl.h12
-rw-r--r--lib/VMCore/Metadata.cpp7
-rw-r--r--lib/VMCore/PassManager.cpp25
-rw-r--r--lib/VMCore/Type.cpp30
-rw-r--r--lib/VMCore/Value.cpp12
-rw-r--r--lib/VMCore/ValueTypes.cpp3
-rw-r--r--lib/VMCore/Verifier.cpp48
270 files changed, 13415 insertions, 7093 deletions
diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp
index 761cd46..1053955 100644
--- a/lib/Analysis/AliasAnalysisCounter.cpp
+++ b/lib/Analysis/AliasAnalysisCounter.cpp
@@ -162,7 +162,7 @@ AliasAnalysisCounter::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
errs() << MRString << ": Ptr: ";
errs() << "[" << Size << "B] ";
WriteAsOperand(errs(), P, true, M);
- errs() << "\t<->" << *CS.getInstruction();
+ errs() << "\t<->" << *CS.getInstruction() << '\n';
}
return R;
}
diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp
index 6b0a956..308b9e3 100644
--- a/lib/Analysis/AliasAnalysisEvaluator.cpp
+++ b/lib/Analysis/AliasAnalysisEvaluator.cpp
@@ -115,11 +115,11 @@ bool AAEval::runOnFunction(Function &F) {
SetVector<CallSite> CallSites;
for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I)
- if (isa<PointerType>(I->getType())) // Add all pointer arguments
+ if (I->getType()->isPointerTy()) // Add all pointer arguments
Pointers.insert(I);
for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
- if (isa<PointerType>(I->getType())) // Add all pointer instructions
+ if (I->getType()->isPointerTy()) // Add all pointer instructions
Pointers.insert(&*I);
Instruction &Inst = *I;
User::op_iterator OI = Inst.op_begin();
@@ -128,7 +128,7 @@ bool AAEval::runOnFunction(Function &F) {
isa<Function>(CS.getCalledValue()))
++OI; // Skip actual functions for direct function calls.
for (; OI != Inst.op_end(); ++OI)
- if (isa<PointerType>((*OI)->getType()) && !isa<ConstantPointerNull>(*OI))
+ if ((*OI)->getType()->isPointerTy() && !isa<ConstantPointerNull>(*OI))
Pointers.insert(*OI);
if (CS.getInstruction()) CallSites.insert(CS);
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index 36b831c..31a649d 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -290,7 +290,7 @@ BasicAliasAnalysis::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI, ++ArgNo) {
// Only look at the no-capture pointer arguments.
- if (!isa<PointerType>((*CI)->getType()) ||
+ if (!(*CI)->getType()->isPointerTy() ||
!CS.paramHasAttr(ArgNo+1, Attribute::NoCapture))
continue;
@@ -662,7 +662,7 @@ BasicAliasAnalysis::aliasCheck(const Value *V1, unsigned V1Size,
// Are we checking for alias of the same value?
if (V1 == V2) return MustAlias;
- if (!isa<PointerType>(V1->getType()) || !isa<PointerType>(V2->getType()))
+ if (!V1->getType()->isPointerTy() || !V2->getType()->isPointerTy())
return NoAlias; // Scalars cannot alias each other
// Figure out what objects these things are pointing to if we can.
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index 10a8b11..8767c18 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -44,7 +44,7 @@ static int const Threshold = 20;
/// counts as capturing it or not.
bool llvm::PointerMayBeCaptured(const Value *V,
bool ReturnCaptures, bool StoreCaptures) {
- assert(isa<PointerType>(V->getType()) && "Capture is for pointers only!");
+ assert(V->getType()->isPointerTy() && "Capture is for pointers only!");
SmallVector<Use*, Threshold> Worklist;
SmallSet<Use*, Threshold> Visited;
int Count = 0;
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 808e6fa..114db2d 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -359,7 +359,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C,
MapTy = Type::getInt32PtrTy(C->getContext());
else if (LoadTy->isDoubleTy())
MapTy = Type::getInt64PtrTy(C->getContext());
- else if (isa<VectorType>(LoadTy)) {
+ else if (LoadTy->isVectorTy()) {
MapTy = IntegerType::get(C->getContext(),
TD.getTypeAllocSizeInBits(LoadTy));
MapTy = PointerType::getUnqual(MapTy);
@@ -605,7 +605,7 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
SmallVector<Constant*, 32> NewIdxs;
do {
if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
- if (isa<PointerType>(ATy)) {
+ if (ATy->isPointerTy()) {
// The only pointer indexing we'll do is on the first index of the GEP.
if (!NewIdxs.empty())
break;
@@ -783,45 +783,12 @@ Constant *llvm::ConstantFoldInstOperands(unsigned Opcode, const Type *DestTy,
// If the input is a ptrtoint, turn the pair into a ptr to ptr bitcast if
// the int size is >= the ptr size. This requires knowing the width of a
// pointer, so it can't be done in ConstantExpr::getCast.
- if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0])) {
+ if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Ops[0]))
if (TD &&
- TD->getPointerSizeInBits() <=
- CE->getType()->getScalarSizeInBits()) {
- if (CE->getOpcode() == Instruction::PtrToInt)
- return FoldBitCast(CE->getOperand(0), DestTy, *TD);
-
- // If there's a constant offset added to the integer value before
- // it is casted back to a pointer, see if the expression can be
- // converted into a GEP.
- if (CE->getOpcode() == Instruction::Add)
- if (ConstantInt *L = dyn_cast<ConstantInt>(CE->getOperand(0)))
- if (ConstantExpr *R = dyn_cast<ConstantExpr>(CE->getOperand(1)))
- if (R->getOpcode() == Instruction::PtrToInt)
- if (GlobalVariable *GV =
- dyn_cast<GlobalVariable>(R->getOperand(0))) {
- const PointerType *GVTy = cast<PointerType>(GV->getType());
- if (const ArrayType *AT =
- dyn_cast<ArrayType>(GVTy->getElementType())) {
- const Type *ElTy = AT->getElementType();
- uint64_t AllocSize = TD->getTypeAllocSize(ElTy);
- APInt PSA(L->getValue().getBitWidth(), AllocSize);
- if (ElTy == cast<PointerType>(DestTy)->getElementType() &&
- L->getValue().urem(PSA) == 0) {
- APInt ElemIdx = L->getValue().udiv(PSA);
- if (ElemIdx.ult(APInt(ElemIdx.getBitWidth(),
- AT->getNumElements()))) {
- Constant *Index[] = {
- Constant::getNullValue(CE->getType()),
- ConstantInt::get(ElTy->getContext(), ElemIdx)
- };
- return
- ConstantExpr::getGetElementPtr(GV, &Index[0], 2);
- }
- }
- }
- }
- }
- }
+ TD->getPointerSizeInBits() <= CE->getType()->getScalarSizeInBits() &&
+ CE->getOpcode() == Instruction::PtrToInt)
+ return FoldBitCast(CE->getOperand(0), DestTy, *TD);
+
return ConstantExpr::getCast(Opcode, Ops[0], DestTy);
case Instruction::Trunc:
case Instruction::ZExt:
@@ -1179,6 +1146,12 @@ llvm::ConstantFoldCall(Function *F,
return 0;
}
+ if (isa<UndefValue>(Operands[0])) {
+ if (Name.startswith("llvm.bswap"))
+ return Operands[0];
+ return 0;
+ }
+
return 0;
}
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 258f1db..5cfe666 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -1007,12 +1007,15 @@ DIVariable DIFactory::CreateComplexVariable(unsigned Tag, DIDescriptor Context,
/// CreateBlock - This creates a descriptor for a lexical block with the
/// specified parent VMContext.
-DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context) {
+DILexicalBlock DIFactory::CreateLexicalBlock(DIDescriptor Context,
+ unsigned LineNo, unsigned Col) {
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_lexical_block),
- Context.getNode()
+ Context.getNode(),
+ ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
+ ConstantInt::get(Type::getInt32Ty(VMContext), Col)
};
- return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 2));
+ return DILexicalBlock(MDNode::get(VMContext, &Elts[0], 4));
}
/// CreateNameSpace - This creates new descriptor for a namespace
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
deleted file mode 100644
index 4180206..0000000
--- a/lib/Analysis/IPA/Andersens.cpp
+++ /dev/null
@@ -1,2868 +0,0 @@
-//===- Andersens.cpp - Andersen's Interprocedural Alias Analysis ----------===//
-//
-// The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
-//
-//===----------------------------------------------------------------------===//
-//
-// This file defines an implementation of Andersen's interprocedural alias
-// analysis
-//
-// In pointer analysis terms, this is a subset-based, flow-insensitive,
-// field-sensitive, and context-insensitive algorithm pointer algorithm.
-//
-// This algorithm is implemented as three stages:
-// 1. Object identification.
-// 2. Inclusion constraint identification.
-// 3. Offline constraint graph optimization
-// 4. Inclusion constraint solving.
-//
-// The object identification stage identifies all of the memory objects in the
-// program, which includes globals, heap allocated objects, and stack allocated
-// objects.
-//
-// The inclusion constraint identification stage finds all inclusion constraints
-// in the program by scanning the program, looking for pointer assignments and
-// other statements that effect the points-to graph. For a statement like "A =
-// B", this statement is processed to indicate that A can point to anything that
-// B can point to. Constraints can handle copies, loads, and stores, and
-// address taking.
-//
-// The offline constraint graph optimization portion includes offline variable
-// substitution algorithms intended to compute pointer and location
-// equivalences. Pointer equivalences are those pointers that will have the
-// same points-to sets, and location equivalences are those variables that
-// always appear together in points-to sets. It also includes an offline
-// cycle detection algorithm that allows cycles to be collapsed sooner
-// during solving.
-//
-// The inclusion constraint solving phase iteratively propagates the inclusion
-// constraints until a fixed point is reached. This is an O(N^3) algorithm.
-//
-// Function constraints are handled as if they were structs with X fields.
-// Thus, an access to argument X of function Y is an access to node index
-// getNode(Y) + X. This representation allows handling of indirect calls
-// without any issues. To wit, an indirect call Y(a,b) is equivalent to
-// *(Y + 1) = a, *(Y + 2) = b.
-// The return node for a function is always located at getNode(F) +
-// CallReturnPos. The arguments start at getNode(F) + CallArgPos.
-//
-// Future Improvements:
-// Use of BDD's.
-//===----------------------------------------------------------------------===//
-
-#define DEBUG_TYPE "anders-aa"
-#include "llvm/Constants.h"
-#include "llvm/DerivedTypes.h"
-#include "llvm/Instructions.h"
-#include "llvm/Module.h"
-#include "llvm/Pass.h"
-#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/InstIterator.h"
-#include "llvm/Support/InstVisitor.h"
-#include "llvm/Analysis/AliasAnalysis.h"
-#include "llvm/Analysis/MemoryBuiltins.h"
-#include "llvm/Analysis/Passes.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/System/Atomic.h"
-#include "llvm/ADT/Statistic.h"
-#include "llvm/ADT/SparseBitVector.h"
-#include "llvm/ADT/DenseSet.h"
-#include <algorithm>
-#include <set>
-#include <list>
-#include <map>
-#include <stack>
-#include <vector>
-#include <queue>
-
-// Determining the actual set of nodes the universal set can consist of is very
-// expensive because it means propagating around very large sets. We rely on
-// other analysis being able to determine which nodes can never be pointed to in
-// order to disambiguate further than "points-to anything".
-#define FULL_UNIVERSAL 0
-
-using namespace llvm;
-#ifndef NDEBUG
-STATISTIC(NumIters , "Number of iterations to reach convergence");
-#endif
-STATISTIC(NumConstraints, "Number of constraints");
-STATISTIC(NumNodes , "Number of nodes");
-STATISTIC(NumUnified , "Number of variables unified");
-STATISTIC(NumErased , "Number of redundant constraints erased");
-
-static const unsigned SelfRep = (unsigned)-1;
-static const unsigned Unvisited = (unsigned)-1;
-// Position of the function return node relative to the function node.
-static const unsigned CallReturnPos = 1;
-// Position of the function call node relative to the function node.
-static const unsigned CallFirstArgPos = 2;
-
-namespace {
- struct BitmapKeyInfo {
- static inline SparseBitVector<> *getEmptyKey() {
- return reinterpret_cast<SparseBitVector<> *>(-1);
- }
- static inline SparseBitVector<> *getTombstoneKey() {
- return reinterpret_cast<SparseBitVector<> *>(-2);
- }
- static unsigned getHashValue(const SparseBitVector<> *bitmap) {
- return bitmap->getHashValue();
- }
- static bool isEqual(const SparseBitVector<> *LHS,
- const SparseBitVector<> *RHS) {
- if (LHS == RHS)
- return true;
- else if (LHS == getEmptyKey() || RHS == getEmptyKey()
- || LHS == getTombstoneKey() || RHS == getTombstoneKey())
- return false;
-
- return *LHS == *RHS;
- }
- };
-
- class Andersens : public ModulePass, public AliasAnalysis,
- private InstVisitor<Andersens> {
- struct Node;
-
- /// Constraint - Objects of this structure are used to represent the various
- /// constraints identified by the algorithm. The constraints are 'copy',
- /// for statements like "A = B", 'load' for statements like "A = *B",
- /// 'store' for statements like "*A = B", and AddressOf for statements like
- /// A = alloca; The Offset is applied as *(A + K) = B for stores,
- /// A = *(B + K) for loads, and A = B + K for copies. It is
- /// illegal on addressof constraints (because it is statically
- /// resolvable to A = &C where C = B + K)
-
- struct Constraint {
- enum ConstraintType { Copy, Load, Store, AddressOf } Type;
- unsigned Dest;
- unsigned Src;
- unsigned Offset;
-
- Constraint(ConstraintType Ty, unsigned D, unsigned S, unsigned O = 0)
- : Type(Ty), Dest(D), Src(S), Offset(O) {
- assert((Offset == 0 || Ty != AddressOf) &&
- "Offset is illegal on addressof constraints");
- }
-
- bool operator==(const Constraint &RHS) const {
- return RHS.Type == Type
- && RHS.Dest == Dest
- && RHS.Src == Src
- && RHS.Offset == Offset;
- }
-
- bool operator!=(const Constraint &RHS) const {
- return !(*this == RHS);
- }
-
- bool operator<(const Constraint &RHS) const {
- if (RHS.Type != Type)
- return RHS.Type < Type;
- else if (RHS.Dest != Dest)
- return RHS.Dest < Dest;
- else if (RHS.Src != Src)
- return RHS.Src < Src;
- return RHS.Offset < Offset;
- }
- };
-
- // Information DenseSet requires implemented in order to be able to do
- // it's thing
- struct PairKeyInfo {
- static inline std::pair<unsigned, unsigned> getEmptyKey() {
- return std::make_pair(~0U, ~0U);
- }
- static inline std::pair<unsigned, unsigned> getTombstoneKey() {
- return std::make_pair(~0U - 1, ~0U - 1);
- }
- static unsigned getHashValue(const std::pair<unsigned, unsigned> &P) {
- return P.first ^ P.second;
- }
- static unsigned isEqual(const std::pair<unsigned, unsigned> &LHS,
- const std::pair<unsigned, unsigned> &RHS) {
- return LHS == RHS;
- }
- };
-
- struct ConstraintKeyInfo {
- static inline Constraint getEmptyKey() {
- return Constraint(Constraint::Copy, ~0U, ~0U, ~0U);
- }
- static inline Constraint getTombstoneKey() {
- return Constraint(Constraint::Copy, ~0U - 1, ~0U - 1, ~0U - 1);
- }
- static unsigned getHashValue(const Constraint &C) {
- return C.Src ^ C.Dest ^ C.Type ^ C.Offset;
- }
- static bool isEqual(const Constraint &LHS,
- const Constraint &RHS) {
- return LHS.Type == RHS.Type && LHS.Dest == RHS.Dest
- && LHS.Src == RHS.Src && LHS.Offset == RHS.Offset;
- }
- };
-
- // Node class - This class is used to represent a node in the constraint
- // graph. Due to various optimizations, it is not always the case that
- // there is a mapping from a Node to a Value. In particular, we add
- // artificial Node's that represent the set of pointed-to variables shared
- // for each location equivalent Node.
- struct Node {
- private:
- static volatile sys::cas_flag Counter;
-
- public:
- Value *Val;
- SparseBitVector<> *Edges;
- SparseBitVector<> *PointsTo;
- SparseBitVector<> *OldPointsTo;
- std::list<Constraint> Constraints;
-
- // Pointer and location equivalence labels
- unsigned PointerEquivLabel;
- unsigned LocationEquivLabel;
- // Predecessor edges, both real and implicit
- SparseBitVector<> *PredEdges;
- SparseBitVector<> *ImplicitPredEdges;
- // Set of nodes that point to us, only use for location equivalence.
- SparseBitVector<> *PointedToBy;
- // Number of incoming edges, used during variable substitution to early
- // free the points-to sets
- unsigned NumInEdges;
- // True if our points-to set is in the Set2PEClass map
- bool StoredInHash;
- // True if our node has no indirect constraints (complex or otherwise)
- bool Direct;
- // True if the node is address taken, *or* it is part of a group of nodes
- // that must be kept together. This is set to true for functions and
- // their arg nodes, which must be kept at the same position relative to
- // their base function node.
- bool AddressTaken;
-
- // Nodes in cycles (or in equivalence classes) are united together using a
- // standard union-find representation with path compression. NodeRep
- // gives the index into GraphNodes for the representative Node.
- unsigned NodeRep;
-
- // Modification timestamp. Assigned from Counter.
- // Used for work list prioritization.
- unsigned Timestamp;
-
- explicit Node(bool direct = true) :
- Val(0), Edges(0), PointsTo(0), OldPointsTo(0),
- PointerEquivLabel(0), LocationEquivLabel(0), PredEdges(0),
- ImplicitPredEdges(0), PointedToBy(0), NumInEdges(0),
- StoredInHash(false), Direct(direct), AddressTaken(false),
- NodeRep(SelfRep), Timestamp(0) { }
-
- Node *setValue(Value *V) {
- assert(Val == 0 && "Value already set for this node!");
- Val = V;
- return this;
- }
-
- /// getValue - Return the LLVM value corresponding to this node.
- ///
- Value *getValue() const { return Val; }
-
- /// addPointerTo - Add a pointer to the list of pointees of this node,
- /// returning true if this caused a new pointer to be added, or false if
- /// we already knew about the points-to relation.
- bool addPointerTo(unsigned Node) {
- return PointsTo->test_and_set(Node);
- }
-
- /// intersects - Return true if the points-to set of this node intersects
- /// with the points-to set of the specified node.
- bool intersects(Node *N) const;
-
- /// intersectsIgnoring - Return true if the points-to set of this node
- /// intersects with the points-to set of the specified node on any nodes
- /// except for the specified node to ignore.
- bool intersectsIgnoring(Node *N, unsigned) const;
-
- // Timestamp a node (used for work list prioritization)
- void Stamp() {
- Timestamp = sys::AtomicIncrement(&Counter);
- --Timestamp;
- }
-
- bool isRep() const {
- return( (int) NodeRep < 0 );
- }
- };
-
- struct WorkListElement {
- Node* node;
- unsigned Timestamp;
- WorkListElement(Node* n, unsigned t) : node(n), Timestamp(t) {}
-
- // Note that we reverse the sense of the comparison because we
- // actually want to give low timestamps the priority over high,
- // whereas priority is typically interpreted as a greater value is
- // given high priority.
- bool operator<(const WorkListElement& that) const {
- return( this->Timestamp > that.Timestamp );
- }
- };
-
- // Priority-queue based work list specialized for Nodes.
- class WorkList {
- std::priority_queue<WorkListElement> Q;
-
- public:
- void insert(Node* n) {
- Q.push( WorkListElement(n, n->Timestamp) );
- }
-
- // We automatically discard non-representative nodes and nodes
- // that were in the work list twice (we keep a copy of the
- // timestamp in the work list so we can detect this situation by
- // comparing against the node's current timestamp).
- Node* pop() {
- while( !Q.empty() ) {
- WorkListElement x = Q.top(); Q.pop();
- Node* INode = x.node;
-
- if( INode->isRep() &&
- INode->Timestamp == x.Timestamp ) {
- return(x.node);
- }
- }
- return(0);
- }
-
- bool empty() {
- return Q.empty();
- }
- };
-
- /// GraphNodes - This vector is populated as part of the object
- /// identification stage of the analysis, which populates this vector with a
- /// node for each memory object and fills in the ValueNodes map.
- std::vector<Node> GraphNodes;
-
- /// ValueNodes - This map indicates the Node that a particular Value* is
- /// represented by. This contains entries for all pointers.
- DenseMap<Value*, unsigned> ValueNodes;
-
- /// ObjectNodes - This map contains entries for each memory object in the
- /// program: globals, alloca's and mallocs.
- DenseMap<Value*, unsigned> ObjectNodes;
-
- /// ReturnNodes - This map contains an entry for each function in the
- /// program that returns a value.
- DenseMap<Function*, unsigned> ReturnNodes;
-
- /// VarargNodes - This map contains the entry used to represent all pointers
- /// passed through the varargs portion of a function call for a particular
- /// function. An entry is not present in this map for functions that do not
- /// take variable arguments.
- DenseMap<Function*, unsigned> VarargNodes;
-
-
- /// Constraints - This vector contains a list of all of the constraints
- /// identified by the program.
- std::vector<Constraint> Constraints;
-
- // Map from graph node to maximum K value that is allowed (for functions,
- // this is equivalent to the number of arguments + CallFirstArgPos)
- std::map<unsigned, unsigned> MaxK;
-
- /// This enum defines the GraphNodes indices that correspond to important
- /// fixed sets.
- enum {
- UniversalSet = 0,
- NullPtr = 1,
- NullObject = 2,
- NumberSpecialNodes
- };
- // Stack for Tarjan's
- std::stack<unsigned> SCCStack;
- // Map from Graph Node to DFS number
- std::vector<unsigned> Node2DFS;
- // Map from Graph Node to Deleted from graph.
- std::vector<bool> Node2Deleted;
- // Same as Node Maps, but implemented as std::map because it is faster to
- // clear
- std::map<unsigned, unsigned> Tarjan2DFS;
- std::map<unsigned, bool> Tarjan2Deleted;
- // Current DFS number
- unsigned DFSNumber;
-
- // Work lists.
- WorkList w1, w2;
- WorkList *CurrWL, *NextWL; // "current" and "next" work lists
-
- // Offline variable substitution related things
-
- // Temporary rep storage, used because we can't collapse SCC's in the
- // predecessor graph by uniting the variables permanently, we can only do so
- // for the successor graph.
- std::vector<unsigned> VSSCCRep;
- // Mapping from node to whether we have visited it during SCC finding yet.
- std::vector<bool> Node2Visited;
- // During variable substitution, we create unknowns to represent the unknown
- // value that is a dereference of a variable. These nodes are known as
- // "ref" nodes (since they represent the value of dereferences).
- unsigned FirstRefNode;
- // During HVN, we create represent address taken nodes as if they were
- // unknown (since HVN, unlike HU, does not evaluate unions).
- unsigned FirstAdrNode;
- // Current pointer equivalence class number
- unsigned PEClass;
- // Mapping from points-to sets to equivalence classes
- typedef DenseMap<SparseBitVector<> *, unsigned, BitmapKeyInfo> BitVectorMap;
- BitVectorMap Set2PEClass;
- // Mapping from pointer equivalences to the representative node. -1 if we
- // have no representative node for this pointer equivalence class yet.
- std::vector<int> PEClass2Node;
- // Mapping from pointer equivalences to representative node. This includes
- // pointer equivalent but not location equivalent variables. -1 if we have
- // no representative node for this pointer equivalence class yet.
- std::vector<int> PENLEClass2Node;
- // Union/Find for HCD
- std::vector<unsigned> HCDSCCRep;
- // HCD's offline-detected cycles; "Statically DeTected"
- // -1 if not part of such a cycle, otherwise a representative node.
- std::vector<int> SDT;
- // Whether to use SDT (UniteNodes can use it during solving, but not before)
- bool SDTActive;
-
- public:
- static char ID;
- Andersens() : ModulePass(&ID) {}
-
- bool runOnModule(Module &M) {
- InitializeAliasAnalysis(this);
- IdentifyObjects(M);
- CollectConstraints(M);
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-constraints"
- DEBUG(PrintConstraints());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
- SolveConstraints();
- DEBUG(PrintPointsToGraph());
-
- // Free the constraints list, as we don't need it to respond to alias
- // requests.
- std::vector<Constraint>().swap(Constraints);
- //These are needed for Print() (-analyze in opt)
- //ObjectNodes.clear();
- //ReturnNodes.clear();
- //VarargNodes.clear();
- return false;
- }
-
- void releaseMemory() {
- // FIXME: Until we have transitively required passes working correctly,
- // this cannot be enabled! Otherwise, using -count-aa with the pass
- // causes memory to be freed too early. :(
-#if 0
- // The memory objects and ValueNodes data structures at the only ones that
- // are still live after construction.
- std::vector<Node>().swap(GraphNodes);
- ValueNodes.clear();
-#endif
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AliasAnalysis::getAnalysisUsage(AU);
- AU.setPreservesAll(); // Does not transform code
- }
-
- /// getAdjustedAnalysisPointer - This method is used when a pass implements
- /// an analysis interface through multiple inheritance. If needed, it
- /// should override this to adjust the this pointer as needed for the
- /// specified pass info.
- virtual void *getAdjustedAnalysisPointer(const PassInfo *PI) {
- if (PI->isPassID(&AliasAnalysis::ID))
- return (AliasAnalysis*)this;
- return this;
- }
-
- //------------------------------------------------
- // Implement the AliasAnalysis API
- //
- AliasResult alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size);
- virtual ModRefResult getModRefInfo(CallSite CS, Value *P, unsigned Size);
- virtual ModRefResult getModRefInfo(CallSite CS1, CallSite CS2);
- bool pointsToConstantMemory(const Value *P);
-
- virtual void deleteValue(Value *V) {
- ValueNodes.erase(V);
- getAnalysis<AliasAnalysis>().deleteValue(V);
- }
-
- virtual void copyValue(Value *From, Value *To) {
- ValueNodes[To] = ValueNodes[From];
- getAnalysis<AliasAnalysis>().copyValue(From, To);
- }
-
- private:
- /// getNode - Return the node corresponding to the specified pointer scalar.
- ///
- unsigned getNode(Value *V) {
- if (Constant *C = dyn_cast<Constant>(V))
- if (!isa<GlobalValue>(C))
- return getNodeForConstantPointer(C);
-
- DenseMap<Value*, unsigned>::iterator I = ValueNodes.find(V);
- if (I == ValueNodes.end()) {
-#ifndef NDEBUG
- V->dump();
-#endif
- llvm_unreachable("Value does not have a node in the points-to graph!");
- }
- return I->second;
- }
-
- /// getObject - Return the node corresponding to the memory object for the
- /// specified global or allocation instruction.
- unsigned getObject(Value *V) const {
- DenseMap<Value*, unsigned>::const_iterator I = ObjectNodes.find(V);
- assert(I != ObjectNodes.end() &&
- "Value does not have an object in the points-to graph!");
- return I->second;
- }
-
- /// getReturnNode - Return the node representing the return value for the
- /// specified function.
- unsigned getReturnNode(Function *F) const {
- DenseMap<Function*, unsigned>::const_iterator I = ReturnNodes.find(F);
- assert(I != ReturnNodes.end() && "Function does not return a value!");
- return I->second;
- }
-
- /// getVarargNode - Return the node representing the variable arguments
- /// formal for the specified function.
- unsigned getVarargNode(Function *F) const {
- DenseMap<Function*, unsigned>::const_iterator I = VarargNodes.find(F);
- assert(I != VarargNodes.end() && "Function does not take var args!");
- return I->second;
- }
-
- /// getNodeValue - Get the node for the specified LLVM value and set the
- /// value for it to be the specified value.
- unsigned getNodeValue(Value &V) {
- unsigned Index = getNode(&V);
- GraphNodes[Index].setValue(&V);
- return Index;
- }
-
- unsigned UniteNodes(unsigned First, unsigned Second,
- bool UnionByRank = true);
- unsigned FindNode(unsigned Node);
- unsigned FindNode(unsigned Node) const;
-
- void IdentifyObjects(Module &M);
- void CollectConstraints(Module &M);
- bool AnalyzeUsesOfFunction(Value *);
- void CreateConstraintGraph();
- void OptimizeConstraints();
- unsigned FindEquivalentNode(unsigned, unsigned);
- void ClumpAddressTaken();
- void RewriteConstraints();
- void HU();
- void HVN();
- void HCD();
- void Search(unsigned Node);
- void UnitePointerEquivalences();
- void SolveConstraints();
- bool QueryNode(unsigned Node);
- void Condense(unsigned Node);
- void HUValNum(unsigned Node);
- void HVNValNum(unsigned Node);
- unsigned getNodeForConstantPointer(Constant *C);
- unsigned getNodeForConstantPointerTarget(Constant *C);
- void AddGlobalInitializerConstraints(unsigned, Constant *C);
-
- void AddConstraintsForNonInternalLinkage(Function *F);
- void AddConstraintsForCall(CallSite CS, Function *F);
- bool AddConstraintsForExternalCall(CallSite CS, Function *F);
-
-
- void PrintNode(const Node *N) const;
- void PrintConstraints() const ;
- void PrintConstraint(const Constraint &) const;
- void PrintLabels() const;
- void PrintPointsToGraph() const;
-
- //===------------------------------------------------------------------===//
- // Instruction visitation methods for adding constraints
- //
- friend class InstVisitor<Andersens>;
- void visitReturnInst(ReturnInst &RI);
- void visitInvokeInst(InvokeInst &II) { visitCallSite(CallSite(&II)); }
- void visitCallInst(CallInst &CI) {
- if (isMalloc(&CI)) visitAlloc(CI);
- else visitCallSite(CallSite(&CI));
- }
- void visitCallSite(CallSite CS);
- void visitAllocaInst(AllocaInst &I);
- void visitAlloc(Instruction &I);
- void visitLoadInst(LoadInst &LI);
- void visitStoreInst(StoreInst &SI);
- void visitGetElementPtrInst(GetElementPtrInst &GEP);
- void visitPHINode(PHINode &PN);
- void visitCastInst(CastInst &CI);
- void visitICmpInst(ICmpInst &ICI) {} // NOOP!
- void visitFCmpInst(FCmpInst &ICI) {} // NOOP!
- void visitSelectInst(SelectInst &SI);
- void visitVAArg(VAArgInst &I);
- void visitInstruction(Instruction &I);
-
- //===------------------------------------------------------------------===//
- // Implement Analyize interface
- //
- void print(raw_ostream &O, const Module*) const {
- PrintPointsToGraph();
- }
- };
-}
-
-char Andersens::ID = 0;
-static RegisterPass<Andersens>
-X("anders-aa", "Andersen's Interprocedural Alias Analysis (experimental)",
- false, true);
-static RegisterAnalysisGroup<AliasAnalysis> Y(X);
-
-// Initialize Timestamp Counter (static).
-volatile llvm::sys::cas_flag Andersens::Node::Counter = 0;
-
-ModulePass *llvm::createAndersensPass() { return new Andersens(); }
-
-//===----------------------------------------------------------------------===//
-// AliasAnalysis Interface Implementation
-//===----------------------------------------------------------------------===//
-
-AliasAnalysis::AliasResult Andersens::alias(const Value *V1, unsigned V1Size,
- const Value *V2, unsigned V2Size) {
- Node *N1 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V1)))];
- Node *N2 = &GraphNodes[FindNode(getNode(const_cast<Value*>(V2)))];
-
- // Check to see if the two pointers are known to not alias. They don't alias
- // if their points-to sets do not intersect.
- if (!N1->intersectsIgnoring(N2, NullObject))
- return NoAlias;
-
- return AliasAnalysis::alias(V1, V1Size, V2, V2Size);
-}
-
-AliasAnalysis::ModRefResult
-Andersens::getModRefInfo(CallSite CS, Value *P, unsigned Size) {
- // The only thing useful that we can contribute for mod/ref information is
- // when calling external function calls: if we know that memory never escapes
- // from the program, it cannot be modified by an external call.
- //
- // NOTE: This is not really safe, at least not when the entire program is not
- // available. The deal is that the external function could call back into the
- // program and modify stuff. We ignore this technical niggle for now. This
- // is, after all, a "research quality" implementation of Andersen's analysis.
- if (Function *F = CS.getCalledFunction())
- if (F->isDeclaration()) {
- Node *N1 = &GraphNodes[FindNode(getNode(P))];
-
- if (N1->PointsTo->empty())
- return NoModRef;
-#if FULL_UNIVERSAL
- if (!UniversalSet->PointsTo->test(FindNode(getNode(P))))
- return NoModRef; // Universal set does not contain P
-#else
- if (!N1->PointsTo->test(UniversalSet))
- return NoModRef; // P doesn't point to the universal set.
-#endif
- }
-
- return AliasAnalysis::getModRefInfo(CS, P, Size);
-}
-
-AliasAnalysis::ModRefResult
-Andersens::getModRefInfo(CallSite CS1, CallSite CS2) {
- return AliasAnalysis::getModRefInfo(CS1,CS2);
-}
-
-/// pointsToConstantMemory - If we can determine that this pointer only points
-/// to constant memory, return true. In practice, this means that if the
-/// pointer can only point to constant globals, functions, or the null pointer,
-/// return true.
-///
-bool Andersens::pointsToConstantMemory(const Value *P) {
- Node *N = &GraphNodes[FindNode(getNode(const_cast<Value*>(P)))];
- unsigned i;
-
- for (SparseBitVector<>::iterator bi = N->PointsTo->begin();
- bi != N->PointsTo->end();
- ++bi) {
- i = *bi;
- Node *Pointee = &GraphNodes[i];
- if (Value *V = Pointee->getValue()) {
- if (!isa<GlobalValue>(V) || (isa<GlobalVariable>(V) &&
- !cast<GlobalVariable>(V)->isConstant()))
- return AliasAnalysis::pointsToConstantMemory(P);
- } else {
- if (i != NullObject)
- return AliasAnalysis::pointsToConstantMemory(P);
- }
- }
-
- return true;
-}
-
-//===----------------------------------------------------------------------===//
-// Object Identification Phase
-//===----------------------------------------------------------------------===//
-
-/// IdentifyObjects - This stage scans the program, adding an entry to the
-/// GraphNodes list for each memory object in the program (global stack or
-/// heap), and populates the ValueNodes and ObjectNodes maps for these objects.
-///
-void Andersens::IdentifyObjects(Module &M) {
- unsigned NumObjects = 0;
-
- // Object #0 is always the universal set: the object that we don't know
- // anything about.
- assert(NumObjects == UniversalSet && "Something changed!");
- ++NumObjects;
-
- // Object #1 always represents the null pointer.
- assert(NumObjects == NullPtr && "Something changed!");
- ++NumObjects;
-
- // Object #2 always represents the null object (the object pointed to by null)
- assert(NumObjects == NullObject && "Something changed!");
- ++NumObjects;
-
- // Add all the globals first.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- ObjectNodes[I] = NumObjects++;
- ValueNodes[I] = NumObjects++;
- }
-
- // Add nodes for all of the functions and the instructions inside of them.
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- // The function itself is a memory object.
- unsigned First = NumObjects;
- ValueNodes[F] = NumObjects++;
- if (isa<PointerType>(F->getFunctionType()->getReturnType()))
- ReturnNodes[F] = NumObjects++;
- if (F->getFunctionType()->isVarArg())
- VarargNodes[F] = NumObjects++;
-
-
- // Add nodes for all of the incoming pointer arguments.
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I)
- {
- if (isa<PointerType>(I->getType()))
- ValueNodes[I] = NumObjects++;
- }
- MaxK[First] = NumObjects - First;
-
- // Scan the function body, creating a memory object for each heap/stack
- // allocation in the body of the function and a node to represent all
- // pointer values defined by instructions and used as operands.
- for (inst_iterator II = inst_begin(F), E = inst_end(F); II != E; ++II) {
- // If this is an heap or stack allocation, create a node for the memory
- // object.
- if (isa<PointerType>(II->getType())) {
- ValueNodes[&*II] = NumObjects++;
- if (AllocaInst *AI = dyn_cast<AllocaInst>(&*II))
- ObjectNodes[AI] = NumObjects++;
- else if (isMalloc(&*II))
- ObjectNodes[&*II] = NumObjects++;
- }
-
- // Calls to inline asm need to be added as well because the callee isn't
- // referenced anywhere else.
- if (CallInst *CI = dyn_cast<CallInst>(&*II)) {
- Value *Callee = CI->getCalledValue();
- if (isa<InlineAsm>(Callee))
- ValueNodes[Callee] = NumObjects++;
- }
- }
- }
-
- // Now that we know how many objects to create, make them all now!
- GraphNodes.resize(NumObjects);
- NumNodes += NumObjects;
-}
-
-//===----------------------------------------------------------------------===//
-// Constraint Identification Phase
-//===----------------------------------------------------------------------===//
-
-/// getNodeForConstantPointer - Return the node corresponding to the constant
-/// pointer itself.
-unsigned Andersens::getNodeForConstantPointer(Constant *C) {
- assert(isa<PointerType>(C->getType()) && "Not a constant pointer!");
-
- if (isa<ConstantPointerNull>(C) || isa<UndefValue>(C))
- return NullPtr;
- else if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return getNode(GV);
- else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- switch (CE->getOpcode()) {
- case Instruction::GetElementPtr:
- return getNodeForConstantPointer(CE->getOperand(0));
- case Instruction::IntToPtr:
- return UniversalSet;
- case Instruction::BitCast:
- return getNodeForConstantPointer(CE->getOperand(0));
- default:
- errs() << "Constant Expr not yet handled: " << *CE << "\n";
- llvm_unreachable(0);
- }
- } else {
- llvm_unreachable("Unknown constant pointer!");
- }
- return 0;
-}
-
-/// getNodeForConstantPointerTarget - Return the node POINTED TO by the
-/// specified constant pointer.
-unsigned Andersens::getNodeForConstantPointerTarget(Constant *C) {
- assert(isa<PointerType>(C->getType()) && "Not a constant pointer!");
-
- if (isa<ConstantPointerNull>(C))
- return NullObject;
- else if (GlobalValue *GV = dyn_cast<GlobalValue>(C))
- return getObject(GV);
- else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(C)) {
- switch (CE->getOpcode()) {
- case Instruction::GetElementPtr:
- return getNodeForConstantPointerTarget(CE->getOperand(0));
- case Instruction::IntToPtr:
- return UniversalSet;
- case Instruction::BitCast:
- return getNodeForConstantPointerTarget(CE->getOperand(0));
- default:
- errs() << "Constant Expr not yet handled: " << *CE << "\n";
- llvm_unreachable(0);
- }
- } else {
- llvm_unreachable("Unknown constant pointer!");
- }
- return 0;
-}
-
-/// AddGlobalInitializerConstraints - Add inclusion constraints for the memory
-/// object N, which contains values indicated by C.
-void Andersens::AddGlobalInitializerConstraints(unsigned NodeIndex,
- Constant *C) {
- if (C->getType()->isSingleValueType()) {
- if (isa<PointerType>(C->getType()))
- Constraints.push_back(Constraint(Constraint::Copy, NodeIndex,
- getNodeForConstantPointer(C)));
- } else if (C->isNullValue()) {
- Constraints.push_back(Constraint(Constraint::Copy, NodeIndex,
- NullObject));
- return;
- } else if (!isa<UndefValue>(C)) {
- // If this is an array or struct, include constraints for each element.
- assert(isa<ConstantArray>(C) || isa<ConstantStruct>(C));
- for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i)
- AddGlobalInitializerConstraints(NodeIndex,
- cast<Constant>(C->getOperand(i)));
- }
-}
-
-/// AddConstraintsForNonInternalLinkage - If this function does not have
-/// internal linkage, realize that we can't trust anything passed into or
-/// returned by this function.
-void Andersens::AddConstraintsForNonInternalLinkage(Function *F) {
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end(); I != E; ++I)
- if (isa<PointerType>(I->getType()))
- // If this is an argument of an externally accessible function, the
- // incoming pointer might point to anything.
- Constraints.push_back(Constraint(Constraint::Copy, getNode(I),
- UniversalSet));
-}
-
-/// AddConstraintsForCall - If this is a call to a "known" function, add the
-/// constraints and return true. If this is a call to an unknown function,
-/// return false.
-bool Andersens::AddConstraintsForExternalCall(CallSite CS, Function *F) {
- assert(F->isDeclaration() && "Not an external function!");
-
- // These functions don't induce any points-to constraints.
- if (F->getName() == "atoi" || F->getName() == "atof" ||
- F->getName() == "atol" || F->getName() == "atoll" ||
- F->getName() == "remove" || F->getName() == "unlink" ||
- F->getName() == "rename" || F->getName() == "memcmp" ||
- F->getName() == "llvm.memset" ||
- F->getName() == "strcmp" || F->getName() == "strncmp" ||
- F->getName() == "execl" || F->getName() == "execlp" ||
- F->getName() == "execle" || F->getName() == "execv" ||
- F->getName() == "execvp" || F->getName() == "chmod" ||
- F->getName() == "puts" || F->getName() == "write" ||
- F->getName() == "open" || F->getName() == "create" ||
- F->getName() == "truncate" || F->getName() == "chdir" ||
- F->getName() == "mkdir" || F->getName() == "rmdir" ||
- F->getName() == "read" || F->getName() == "pipe" ||
- F->getName() == "wait" || F->getName() == "time" ||
- F->getName() == "stat" || F->getName() == "fstat" ||
- F->getName() == "lstat" || F->getName() == "strtod" ||
- F->getName() == "strtof" || F->getName() == "strtold" ||
- F->getName() == "fopen" || F->getName() == "fdopen" ||
- F->getName() == "freopen" ||
- F->getName() == "fflush" || F->getName() == "feof" ||
- F->getName() == "fileno" || F->getName() == "clearerr" ||
- F->getName() == "rewind" || F->getName() == "ftell" ||
- F->getName() == "ferror" || F->getName() == "fgetc" ||
- F->getName() == "fgetc" || F->getName() == "_IO_getc" ||
- F->getName() == "fwrite" || F->getName() == "fread" ||
- F->getName() == "fgets" || F->getName() == "ungetc" ||
- F->getName() == "fputc" ||
- F->getName() == "fputs" || F->getName() == "putc" ||
- F->getName() == "ftell" || F->getName() == "rewind" ||
- F->getName() == "_IO_putc" || F->getName() == "fseek" ||
- F->getName() == "fgetpos" || F->getName() == "fsetpos" ||
- F->getName() == "printf" || F->getName() == "fprintf" ||
- F->getName() == "sprintf" || F->getName() == "vprintf" ||
- F->getName() == "vfprintf" || F->getName() == "vsprintf" ||
- F->getName() == "scanf" || F->getName() == "fscanf" ||
- F->getName() == "sscanf" || F->getName() == "__assert_fail" ||
- F->getName() == "modf")
- return true;
-
-
- // These functions do induce points-to edges.
- if (F->getName() == "llvm.memcpy" ||
- F->getName() == "llvm.memmove" ||
- F->getName() == "memmove") {
-
- const FunctionType *FTy = F->getFunctionType();
- if (FTy->getNumParams() > 1 &&
- isa<PointerType>(FTy->getParamType(0)) &&
- isa<PointerType>(FTy->getParamType(1))) {
-
- // *Dest = *Src, which requires an artificial graph node to represent the
- // constraint. It is broken up into *Dest = temp, temp = *Src
- unsigned FirstArg = getNode(CS.getArgument(0));
- unsigned SecondArg = getNode(CS.getArgument(1));
- unsigned TempArg = GraphNodes.size();
- GraphNodes.push_back(Node());
- Constraints.push_back(Constraint(Constraint::Store,
- FirstArg, TempArg));
- Constraints.push_back(Constraint(Constraint::Load,
- TempArg, SecondArg));
- // In addition, Dest = Src
- Constraints.push_back(Constraint(Constraint::Copy,
- FirstArg, SecondArg));
- return true;
- }
- }
-
- // Result = Arg0
- if (F->getName() == "realloc" || F->getName() == "strchr" ||
- F->getName() == "strrchr" || F->getName() == "strstr" ||
- F->getName() == "strtok") {
- const FunctionType *FTy = F->getFunctionType();
- if (FTy->getNumParams() > 0 &&
- isa<PointerType>(FTy->getParamType(0))) {
- Constraints.push_back(Constraint(Constraint::Copy,
- getNode(CS.getInstruction()),
- getNode(CS.getArgument(0))));
- return true;
- }
- }
-
- return false;
-}
-
-
-
-/// AnalyzeUsesOfFunction - Look at all of the users of the specified function.
-/// If this is used by anything complex (i.e., the address escapes), return
-/// true.
-bool Andersens::AnalyzeUsesOfFunction(Value *V) {
-
- if (!isa<PointerType>(V->getType())) return true;
-
- for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
- if (isa<LoadInst>(*UI)) {
- return false;
- } else if (StoreInst *SI = dyn_cast<StoreInst>(*UI)) {
- if (V == SI->getOperand(1)) {
- return false;
- } else if (SI->getOperand(1)) {
- return true; // Storing the pointer
- }
- } else if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(*UI)) {
- if (AnalyzeUsesOfFunction(GEP)) return true;
- } else if (isFreeCall(*UI)) {
- return false;
- } else if (CallInst *CI = dyn_cast<CallInst>(*UI)) {
- // Make sure that this is just the function being called, not that it is
- // passing into the function.
- for (unsigned i = 1, e = CI->getNumOperands(); i != e; ++i)
- if (CI->getOperand(i) == V) return true;
- } else if (InvokeInst *II = dyn_cast<InvokeInst>(*UI)) {
- // Make sure that this is just the function being called, not that it is
- // passing into the function.
- for (unsigned i = 3, e = II->getNumOperands(); i != e; ++i)
- if (II->getOperand(i) == V) return true;
- } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(*UI)) {
- if (CE->getOpcode() == Instruction::GetElementPtr ||
- CE->getOpcode() == Instruction::BitCast) {
- if (AnalyzeUsesOfFunction(CE))
- return true;
- } else {
- return true;
- }
- } else if (ICmpInst *ICI = dyn_cast<ICmpInst>(*UI)) {
- if (!isa<ConstantPointerNull>(ICI->getOperand(1)))
- return true; // Allow comparison against null.
- } else {
- return true;
- }
- return false;
-}
-
-/// CollectConstraints - This stage scans the program, adding a constraint to
-/// the Constraints list for each instruction in the program that induces a
-/// constraint, and setting up the initial points-to graph.
-///
-void Andersens::CollectConstraints(Module &M) {
- // First, the universal set points to itself.
- Constraints.push_back(Constraint(Constraint::AddressOf, UniversalSet,
- UniversalSet));
- Constraints.push_back(Constraint(Constraint::Store, UniversalSet,
- UniversalSet));
-
- // Next, the null pointer points to the null object.
- Constraints.push_back(Constraint(Constraint::AddressOf, NullPtr, NullObject));
-
- // Next, add any constraints on global variables and their initializers.
- for (Module::global_iterator I = M.global_begin(), E = M.global_end();
- I != E; ++I) {
- // Associate the address of the global object as pointing to the memory for
- // the global: &G = <G memory>
- unsigned ObjectIndex = getObject(I);
- Node *Object = &GraphNodes[ObjectIndex];
- Object->setValue(I);
- Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(*I),
- ObjectIndex));
-
- if (I->hasDefinitiveInitializer()) {
- AddGlobalInitializerConstraints(ObjectIndex, I->getInitializer());
- } else {
- // If it doesn't have an initializer (i.e. it's defined in another
- // translation unit), it points to the universal set.
- Constraints.push_back(Constraint(Constraint::Copy, ObjectIndex,
- UniversalSet));
- }
- }
-
- for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) {
- // Set up the return value node.
- if (isa<PointerType>(F->getFunctionType()->getReturnType()))
- GraphNodes[getReturnNode(F)].setValue(F);
- if (F->getFunctionType()->isVarArg())
- GraphNodes[getVarargNode(F)].setValue(F);
-
- // Set up incoming argument nodes.
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I)
- if (isa<PointerType>(I->getType()))
- getNodeValue(*I);
-
- // At some point we should just add constraints for the escaping functions
- // at solve time, but this slows down solving. For now, we simply mark
- // address taken functions as escaping and treat them as external.
- if (!F->hasLocalLinkage() || AnalyzeUsesOfFunction(F))
- AddConstraintsForNonInternalLinkage(F);
-
- if (!F->isDeclaration()) {
- // Scan the function body, creating a memory object for each heap/stack
- // allocation in the body of the function and a node to represent all
- // pointer values defined by instructions and used as operands.
- visit(F);
- } else {
- // External functions that return pointers return the universal set.
- if (isa<PointerType>(F->getFunctionType()->getReturnType()))
- Constraints.push_back(Constraint(Constraint::Copy,
- getReturnNode(F),
- UniversalSet));
-
- // Any pointers that are passed into the function have the universal set
- // stored into them.
- for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
- I != E; ++I)
- if (isa<PointerType>(I->getType())) {
- // Pointers passed into external functions could have anything stored
- // through them.
- Constraints.push_back(Constraint(Constraint::Store, getNode(I),
- UniversalSet));
- // Memory objects passed into external function calls can have the
- // universal set point to them.
-#if FULL_UNIVERSAL
- Constraints.push_back(Constraint(Constraint::Copy,
- UniversalSet,
- getNode(I)));
-#else
- Constraints.push_back(Constraint(Constraint::Copy,
- getNode(I),
- UniversalSet));
-#endif
- }
-
- // If this is an external varargs function, it can also store pointers
- // into any pointers passed through the varargs section.
- if (F->getFunctionType()->isVarArg())
- Constraints.push_back(Constraint(Constraint::Store, getVarargNode(F),
- UniversalSet));
- }
- }
- NumConstraints += Constraints.size();
-}
-
-
-void Andersens::visitInstruction(Instruction &I) {
-#ifdef NDEBUG
- return; // This function is just a big assert.
-#endif
- if (isa<BinaryOperator>(I))
- return;
- // Most instructions don't have any effect on pointer values.
- switch (I.getOpcode()) {
- case Instruction::Br:
- case Instruction::Switch:
- case Instruction::Unwind:
- case Instruction::Unreachable:
- case Instruction::ICmp:
- case Instruction::FCmp:
- return;
- default:
- // Is this something we aren't handling yet?
- errs() << "Unknown instruction: " << I;
- llvm_unreachable(0);
- }
-}
-
-void Andersens::visitAllocaInst(AllocaInst &I) {
- visitAlloc(I);
-}
-
-void Andersens::visitAlloc(Instruction &I) {
- unsigned ObjectIndex = getObject(&I);
- GraphNodes[ObjectIndex].setValue(&I);
- Constraints.push_back(Constraint(Constraint::AddressOf, getNodeValue(I),
- ObjectIndex));
-}
-
-void Andersens::visitReturnInst(ReturnInst &RI) {
- if (RI.getNumOperands() && isa<PointerType>(RI.getOperand(0)->getType()))
- // return V --> <Copy/retval{F}/v>
- Constraints.push_back(Constraint(Constraint::Copy,
- getReturnNode(RI.getParent()->getParent()),
- getNode(RI.getOperand(0))));
-}
-
-void Andersens::visitLoadInst(LoadInst &LI) {
- if (isa<PointerType>(LI.getType()))
- // P1 = load P2 --> <Load/P1/P2>
- Constraints.push_back(Constraint(Constraint::Load, getNodeValue(LI),
- getNode(LI.getOperand(0))));
-}
-
-void Andersens::visitStoreInst(StoreInst &SI) {
- if (isa<PointerType>(SI.getOperand(0)->getType()))
- // store P1, P2 --> <Store/P2/P1>
- Constraints.push_back(Constraint(Constraint::Store,
- getNode(SI.getOperand(1)),
- getNode(SI.getOperand(0))));
-}
-
-void Andersens::visitGetElementPtrInst(GetElementPtrInst &GEP) {
- // P1 = getelementptr P2, ... --> <Copy/P1/P2>
- Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(GEP),
- getNode(GEP.getOperand(0))));
-}
-
-void Andersens::visitPHINode(PHINode &PN) {
- if (isa<PointerType>(PN.getType())) {
- unsigned PNN = getNodeValue(PN);
- for (unsigned i = 0, e = PN.getNumIncomingValues(); i != e; ++i)
- // P1 = phi P2, P3 --> <Copy/P1/P2>, <Copy/P1/P3>, ...
- Constraints.push_back(Constraint(Constraint::Copy, PNN,
- getNode(PN.getIncomingValue(i))));
- }
-}
-
-void Andersens::visitCastInst(CastInst &CI) {
- Value *Op = CI.getOperand(0);
- if (isa<PointerType>(CI.getType())) {
- if (isa<PointerType>(Op->getType())) {
- // P1 = cast P2 --> <Copy/P1/P2>
- Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI),
- getNode(CI.getOperand(0))));
- } else {
- // P1 = cast int --> <Copy/P1/Univ>
-#if 0
- Constraints.push_back(Constraint(Constraint::Copy, getNodeValue(CI),
- UniversalSet));
-#else
- getNodeValue(CI);
-#endif
- }
- } else if (isa<PointerType>(Op->getType())) {
- // int = cast P1 --> <Copy/Univ/P1>
-#if 0
- Constraints.push_back(Constraint(Constraint::Copy,
- UniversalSet,
- getNode(CI.getOperand(0))));
-#else
- getNode(CI.getOperand(0));
-#endif
- }
-}
-
-void Andersens::visitSelectInst(SelectInst &SI) {
- if (isa<PointerType>(SI.getType())) {
- unsigned SIN = getNodeValue(SI);
- // P1 = select C, P2, P3 ---> <Copy/P1/P2>, <Copy/P1/P3>
- Constraints.push_back(Constraint(Constraint::Copy, SIN,
- getNode(SI.getOperand(1))));
- Constraints.push_back(Constraint(Constraint::Copy, SIN,
- getNode(SI.getOperand(2))));
- }
-}
-
-void Andersens::visitVAArg(VAArgInst &I) {
- llvm_unreachable("vaarg not handled yet!");
-}
-
-/// AddConstraintsForCall - Add constraints for a call with actual arguments
-/// specified by CS to the function specified by F. Note that the types of
-/// arguments might not match up in the case where this is an indirect call and
-/// the function pointer has been casted. If this is the case, do something
-/// reasonable.
-void Andersens::AddConstraintsForCall(CallSite CS, Function *F) {
- Value *CallValue = CS.getCalledValue();
- bool IsDeref = F == NULL;
-
- // If this is a call to an external function, try to handle it directly to get
- // some taste of context sensitivity.
- if (F && F->isDeclaration() && AddConstraintsForExternalCall(CS, F))
- return;
-
- if (isa<PointerType>(CS.getType())) {
- unsigned CSN = getNode(CS.getInstruction());
- if (!F || isa<PointerType>(F->getFunctionType()->getReturnType())) {
- if (IsDeref)
- Constraints.push_back(Constraint(Constraint::Load, CSN,
- getNode(CallValue), CallReturnPos));
- else
- Constraints.push_back(Constraint(Constraint::Copy, CSN,
- getNode(CallValue) + CallReturnPos));
- } else {
- // If the function returns a non-pointer value, handle this just like we
- // treat a nonpointer cast to pointer.
- Constraints.push_back(Constraint(Constraint::Copy, CSN,
- UniversalSet));
- }
- } else if (F && isa<PointerType>(F->getFunctionType()->getReturnType())) {
-#if FULL_UNIVERSAL
- Constraints.push_back(Constraint(Constraint::Copy,
- UniversalSet,
- getNode(CallValue) + CallReturnPos));
-#else
- Constraints.push_back(Constraint(Constraint::Copy,
- getNode(CallValue) + CallReturnPos,
- UniversalSet));
-#endif
-
-
- }
-
- CallSite::arg_iterator ArgI = CS.arg_begin(), ArgE = CS.arg_end();
- bool external = !F || F->isDeclaration();
- if (F) {
- // Direct Call
- Function::arg_iterator AI = F->arg_begin(), AE = F->arg_end();
- for (; AI != AE && ArgI != ArgE; ++AI, ++ArgI)
- {
-#if !FULL_UNIVERSAL
- if (external && isa<PointerType>((*ArgI)->getType()))
- {
- // Add constraint that ArgI can now point to anything due to
- // escaping, as can everything it points to. The second portion of
- // this should be taken care of by universal = *universal
- Constraints.push_back(Constraint(Constraint::Copy,
- getNode(*ArgI),
- UniversalSet));
- }
-#endif
- if (isa<PointerType>(AI->getType())) {
- if (isa<PointerType>((*ArgI)->getType())) {
- // Copy the actual argument into the formal argument.
- Constraints.push_back(Constraint(Constraint::Copy, getNode(AI),
- getNode(*ArgI)));
- } else {
- Constraints.push_back(Constraint(Constraint::Copy, getNode(AI),
- UniversalSet));
- }
- } else if (isa<PointerType>((*ArgI)->getType())) {
-#if FULL_UNIVERSAL
- Constraints.push_back(Constraint(Constraint::Copy,
- UniversalSet,
- getNode(*ArgI)));
-#else
- Constraints.push_back(Constraint(Constraint::Copy,
- getNode(*ArgI),
- UniversalSet));
-#endif
- }
- }
- } else {
- //Indirect Call
- unsigned ArgPos = CallFirstArgPos;
- for (; ArgI != ArgE; ++ArgI) {
- if (isa<PointerType>((*ArgI)->getType())) {
- // Copy the actual argument into the formal argument.
- Constraints.push_back(Constraint(Constraint::Store,
- getNode(CallValue),
- getNode(*ArgI), ArgPos++));
- } else {
- Constraints.push_back(Constraint(Constraint::Store,
- getNode (CallValue),
- UniversalSet, ArgPos++));
- }
- }
- }
- // Copy all pointers passed through the varargs section to the varargs node.
- if (F && F->getFunctionType()->isVarArg())
- for (; ArgI != ArgE; ++ArgI)
- if (isa<PointerType>((*ArgI)->getType()))
- Constraints.push_back(Constraint(Constraint::Copy, getVarargNode(F),
- getNode(*ArgI)));
- // If more arguments are passed in than we track, just drop them on the floor.
-}
-
-void Andersens::visitCallSite(CallSite CS) {
- if (isa<PointerType>(CS.getType()))
- getNodeValue(*CS.getInstruction());
-
- if (Function *F = CS.getCalledFunction()) {
- AddConstraintsForCall(CS, F);
- } else {
- AddConstraintsForCall(CS, NULL);
- }
-}
-
-//===----------------------------------------------------------------------===//
-// Constraint Solving Phase
-//===----------------------------------------------------------------------===//
-
-/// intersects - Return true if the points-to set of this node intersects
-/// with the points-to set of the specified node.
-bool Andersens::Node::intersects(Node *N) const {
- return PointsTo->intersects(N->PointsTo);
-}
-
-/// intersectsIgnoring - Return true if the points-to set of this node
-/// intersects with the points-to set of the specified node on any nodes
-/// except for the specified node to ignore.
-bool Andersens::Node::intersectsIgnoring(Node *N, unsigned Ignoring) const {
- // TODO: If we are only going to call this with the same value for Ignoring,
- // we should move the special values out of the points-to bitmap.
- bool WeHadIt = PointsTo->test(Ignoring);
- bool NHadIt = N->PointsTo->test(Ignoring);
- bool Result = false;
- if (WeHadIt)
- PointsTo->reset(Ignoring);
- if (NHadIt)
- N->PointsTo->reset(Ignoring);
- Result = PointsTo->intersects(N->PointsTo);
- if (WeHadIt)
- PointsTo->set(Ignoring);
- if (NHadIt)
- N->PointsTo->set(Ignoring);
- return Result;
-}
-
-
-/// Clump together address taken variables so that the points-to sets use up
-/// less space and can be operated on faster.
-
-void Andersens::ClumpAddressTaken() {
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-renumber"
- std::vector<unsigned> Translate;
- std::vector<Node> NewGraphNodes;
-
- Translate.resize(GraphNodes.size());
- unsigned NewPos = 0;
-
- for (unsigned i = 0; i < Constraints.size(); ++i) {
- Constraint &C = Constraints[i];
- if (C.Type == Constraint::AddressOf) {
- GraphNodes[C.Src].AddressTaken = true;
- }
- }
- for (unsigned i = 0; i < NumberSpecialNodes; ++i) {
- unsigned Pos = NewPos++;
- Translate[i] = Pos;
- NewGraphNodes.push_back(GraphNodes[i]);
- DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n");
- }
-
- // I believe this ends up being faster than making two vectors and splicing
- // them.
- for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) {
- if (GraphNodes[i].AddressTaken) {
- unsigned Pos = NewPos++;
- Translate[i] = Pos;
- NewGraphNodes.push_back(GraphNodes[i]);
- DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n");
- }
- }
-
- for (unsigned i = NumberSpecialNodes; i < GraphNodes.size(); ++i) {
- if (!GraphNodes[i].AddressTaken) {
- unsigned Pos = NewPos++;
- Translate[i] = Pos;
- NewGraphNodes.push_back(GraphNodes[i]);
- DEBUG(dbgs() << "Renumbering node " << i << " to node " << Pos << "\n");
- }
- }
-
- for (DenseMap<Value*, unsigned>::iterator Iter = ValueNodes.begin();
- Iter != ValueNodes.end();
- ++Iter)
- Iter->second = Translate[Iter->second];
-
- for (DenseMap<Value*, unsigned>::iterator Iter = ObjectNodes.begin();
- Iter != ObjectNodes.end();
- ++Iter)
- Iter->second = Translate[Iter->second];
-
- for (DenseMap<Function*, unsigned>::iterator Iter = ReturnNodes.begin();
- Iter != ReturnNodes.end();
- ++Iter)
- Iter->second = Translate[Iter->second];
-
- for (DenseMap<Function*, unsigned>::iterator Iter = VarargNodes.begin();
- Iter != VarargNodes.end();
- ++Iter)
- Iter->second = Translate[Iter->second];
-
- for (unsigned i = 0; i < Constraints.size(); ++i) {
- Constraint &C = Constraints[i];
- C.Src = Translate[C.Src];
- C.Dest = Translate[C.Dest];
- }
-
- GraphNodes.swap(NewGraphNodes);
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
-}
-
-/// The technique used here is described in "Exploiting Pointer and Location
-/// Equivalence to Optimize Pointer Analysis. In the 14th International Static
-/// Analysis Symposium (SAS), August 2007." It is known as the "HVN" algorithm,
-/// and is equivalent to value numbering the collapsed constraint graph without
-/// evaluating unions. This is used as a pre-pass to HU in order to resolve
-/// first order pointer dereferences and speed up/reduce memory usage of HU.
-/// Running both is equivalent to HRU without the iteration
-/// HVN in more detail:
-/// Imagine the set of constraints was simply straight line code with no loops
-/// (we eliminate cycles, so there are no loops), such as:
-/// E = &D
-/// E = &C
-/// E = F
-/// F = G
-/// G = F
-/// Applying value numbering to this code tells us:
-/// G == F == E
-///
-/// For HVN, this is as far as it goes. We assign new value numbers to every
-/// "address node", and every "reference node".
-/// To get the optimal result for this, we use a DFS + SCC (since all nodes in a
-/// cycle must have the same value number since the = operation is really
-/// inclusion, not overwrite), and value number nodes we receive points-to sets
-/// before we value our own node.
-/// The advantage of HU over HVN is that HU considers the inclusion property, so
-/// that if you have
-/// E = &D
-/// E = &C
-/// E = F
-/// F = G
-/// F = &D
-/// G = F
-/// HU will determine that G == F == E. HVN will not, because it cannot prove
-/// that the points to information ends up being the same because they all
-/// receive &D from E anyway.
-
-void Andersens::HVN() {
- DEBUG(dbgs() << "Beginning HVN\n");
- // Build a predecessor graph. This is like our constraint graph with the
- // edges going in the opposite direction, and there are edges for all the
- // constraints, instead of just copy constraints. We also build implicit
- // edges for constraints are implied but not explicit. I.E for the constraint
- // a = &b, we add implicit edges *a = b. This helps us capture more cycles
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- Constraint &C = Constraints[i];
- if (C.Type == Constraint::AddressOf) {
- GraphNodes[C.Src].AddressTaken = true;
- GraphNodes[C.Src].Direct = false;
-
- // Dest = &src edge
- unsigned AdrNode = C.Src + FirstAdrNode;
- if (!GraphNodes[C.Dest].PredEdges)
- GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
- GraphNodes[C.Dest].PredEdges->set(AdrNode);
-
- // *Dest = src edge
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].ImplicitPredEdges)
- GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].ImplicitPredEdges->set(C.Src);
- } else if (C.Type == Constraint::Load) {
- if (C.Offset == 0) {
- // dest = *src edge
- if (!GraphNodes[C.Dest].PredEdges)
- GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
- GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode);
- } else {
- GraphNodes[C.Dest].Direct = false;
- }
- } else if (C.Type == Constraint::Store) {
- if (C.Offset == 0) {
- // *dest = src edge
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].PredEdges)
- GraphNodes[RefNode].PredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].PredEdges->set(C.Src);
- }
- } else {
- // Dest = Src edge and *Dest = *Src edge
- if (!GraphNodes[C.Dest].PredEdges)
- GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
- GraphNodes[C.Dest].PredEdges->set(C.Src);
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].ImplicitPredEdges)
- GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode);
- }
- }
- PEClass = 1;
- // Do SCC finding first to condense our predecessor graph
- DFSNumber = 0;
- Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
- Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
- Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
-
- for (unsigned i = 0; i < FirstRefNode; ++i) {
- unsigned Node = VSSCCRep[i];
- if (!Node2Visited[Node])
- HVNValNum(Node);
- }
- for (BitVectorMap::iterator Iter = Set2PEClass.begin();
- Iter != Set2PEClass.end();
- ++Iter)
- delete Iter->first;
- Set2PEClass.clear();
- Node2DFS.clear();
- Node2Deleted.clear();
- Node2Visited.clear();
- DEBUG(dbgs() << "Finished HVN\n");
-
-}
-
-/// This is the workhorse of HVN value numbering. We combine SCC finding at the
-/// same time because it's easy.
-void Andersens::HVNValNum(unsigned NodeIndex) {
- unsigned MyDFS = DFSNumber++;
- Node *N = &GraphNodes[NodeIndex];
- Node2Visited[NodeIndex] = true;
- Node2DFS[NodeIndex] = MyDFS;
-
- // First process all our explicit edges
- if (N->PredEdges)
- for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
- Iter != N->PredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- if (!Node2Deleted[j]) {
- if (!Node2Visited[j])
- HVNValNum(j);
- if (Node2DFS[NodeIndex] > Node2DFS[j])
- Node2DFS[NodeIndex] = Node2DFS[j];
- }
- }
-
- // Now process all the implicit edges
- if (N->ImplicitPredEdges)
- for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin();
- Iter != N->ImplicitPredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- if (!Node2Deleted[j]) {
- if (!Node2Visited[j])
- HVNValNum(j);
- if (Node2DFS[NodeIndex] > Node2DFS[j])
- Node2DFS[NodeIndex] = Node2DFS[j];
- }
- }
-
- // See if we found any cycles
- if (MyDFS == Node2DFS[NodeIndex]) {
- while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
- unsigned CycleNodeIndex = SCCStack.top();
- Node *CycleNode = &GraphNodes[CycleNodeIndex];
- VSSCCRep[CycleNodeIndex] = NodeIndex;
- // Unify the nodes
- N->Direct &= CycleNode->Direct;
-
- if (CycleNode->PredEdges) {
- if (!N->PredEdges)
- N->PredEdges = new SparseBitVector<>;
- *(N->PredEdges) |= CycleNode->PredEdges;
- delete CycleNode->PredEdges;
- CycleNode->PredEdges = NULL;
- }
- if (CycleNode->ImplicitPredEdges) {
- if (!N->ImplicitPredEdges)
- N->ImplicitPredEdges = new SparseBitVector<>;
- *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges;
- delete CycleNode->ImplicitPredEdges;
- CycleNode->ImplicitPredEdges = NULL;
- }
-
- SCCStack.pop();
- }
-
- Node2Deleted[NodeIndex] = true;
-
- if (!N->Direct) {
- GraphNodes[NodeIndex].PointerEquivLabel = PEClass++;
- return;
- }
-
- // Collect labels of successor nodes
- bool AllSame = true;
- unsigned First = ~0;
- SparseBitVector<> *Labels = new SparseBitVector<>;
- bool Used = false;
-
- if (N->PredEdges)
- for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
- Iter != N->PredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- unsigned Label = GraphNodes[j].PointerEquivLabel;
- // Ignore labels that are equal to us or non-pointers
- if (j == NodeIndex || Label == 0)
- continue;
- if (First == (unsigned)~0)
- First = Label;
- else if (First != Label)
- AllSame = false;
- Labels->set(Label);
- }
-
- // We either have a non-pointer, a copy of an existing node, or a new node.
- // Assign the appropriate pointer equivalence label.
- if (Labels->empty()) {
- GraphNodes[NodeIndex].PointerEquivLabel = 0;
- } else if (AllSame) {
- GraphNodes[NodeIndex].PointerEquivLabel = First;
- } else {
- GraphNodes[NodeIndex].PointerEquivLabel = Set2PEClass[Labels];
- if (GraphNodes[NodeIndex].PointerEquivLabel == 0) {
- unsigned EquivClass = PEClass++;
- Set2PEClass[Labels] = EquivClass;
- GraphNodes[NodeIndex].PointerEquivLabel = EquivClass;
- Used = true;
- }
- }
- if (!Used)
- delete Labels;
- } else {
- SCCStack.push(NodeIndex);
- }
-}
-
-/// The technique used here is described in "Exploiting Pointer and Location
-/// Equivalence to Optimize Pointer Analysis. In the 14th International Static
-/// Analysis Symposium (SAS), August 2007." It is known as the "HU" algorithm,
-/// and is equivalent to value numbering the collapsed constraint graph
-/// including evaluating unions.
-void Andersens::HU() {
- DEBUG(dbgs() << "Beginning HU\n");
- // Build a predecessor graph. This is like our constraint graph with the
- // edges going in the opposite direction, and there are edges for all the
- // constraints, instead of just copy constraints. We also build implicit
- // edges for constraints are implied but not explicit. I.E for the constraint
- // a = &b, we add implicit edges *a = b. This helps us capture more cycles
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- Constraint &C = Constraints[i];
- if (C.Type == Constraint::AddressOf) {
- GraphNodes[C.Src].AddressTaken = true;
- GraphNodes[C.Src].Direct = false;
-
- GraphNodes[C.Dest].PointsTo->set(C.Src);
- // *Dest = src edge
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].ImplicitPredEdges)
- GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].ImplicitPredEdges->set(C.Src);
- GraphNodes[C.Src].PointedToBy->set(C.Dest);
- } else if (C.Type == Constraint::Load) {
- if (C.Offset == 0) {
- // dest = *src edge
- if (!GraphNodes[C.Dest].PredEdges)
- GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
- GraphNodes[C.Dest].PredEdges->set(C.Src + FirstRefNode);
- } else {
- GraphNodes[C.Dest].Direct = false;
- }
- } else if (C.Type == Constraint::Store) {
- if (C.Offset == 0) {
- // *dest = src edge
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].PredEdges)
- GraphNodes[RefNode].PredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].PredEdges->set(C.Src);
- }
- } else {
- // Dest = Src edge and *Dest = *Src edg
- if (!GraphNodes[C.Dest].PredEdges)
- GraphNodes[C.Dest].PredEdges = new SparseBitVector<>;
- GraphNodes[C.Dest].PredEdges->set(C.Src);
- unsigned RefNode = C.Dest + FirstRefNode;
- if (!GraphNodes[RefNode].ImplicitPredEdges)
- GraphNodes[RefNode].ImplicitPredEdges = new SparseBitVector<>;
- GraphNodes[RefNode].ImplicitPredEdges->set(C.Src + FirstRefNode);
- }
- }
- PEClass = 1;
- // Do SCC finding first to condense our predecessor graph
- DFSNumber = 0;
- Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
- Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
- Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
-
- for (unsigned i = 0; i < FirstRefNode; ++i) {
- if (FindNode(i) == i) {
- unsigned Node = VSSCCRep[i];
- if (!Node2Visited[Node])
- Condense(Node);
- }
- }
-
- // Reset tables for actual labeling
- Node2DFS.clear();
- Node2Visited.clear();
- Node2Deleted.clear();
- // Pre-grow our densemap so that we don't get really bad behavior
- Set2PEClass.resize(GraphNodes.size());
-
- // Visit the condensed graph and generate pointer equivalence labels.
- Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
- for (unsigned i = 0; i < FirstRefNode; ++i) {
- if (FindNode(i) == i) {
- unsigned Node = VSSCCRep[i];
- if (!Node2Visited[Node])
- HUValNum(Node);
- }
- }
- // PEClass nodes will be deleted by the deleting of N->PointsTo in our caller.
- Set2PEClass.clear();
- DEBUG(dbgs() << "Finished HU\n");
-}
-
-
-/// Implementation of standard Tarjan SCC algorithm as modified by Nuutilla.
-void Andersens::Condense(unsigned NodeIndex) {
- unsigned MyDFS = DFSNumber++;
- Node *N = &GraphNodes[NodeIndex];
- Node2Visited[NodeIndex] = true;
- Node2DFS[NodeIndex] = MyDFS;
-
- // First process all our explicit edges
- if (N->PredEdges)
- for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
- Iter != N->PredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- if (!Node2Deleted[j]) {
- if (!Node2Visited[j])
- Condense(j);
- if (Node2DFS[NodeIndex] > Node2DFS[j])
- Node2DFS[NodeIndex] = Node2DFS[j];
- }
- }
-
- // Now process all the implicit edges
- if (N->ImplicitPredEdges)
- for (SparseBitVector<>::iterator Iter = N->ImplicitPredEdges->begin();
- Iter != N->ImplicitPredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- if (!Node2Deleted[j]) {
- if (!Node2Visited[j])
- Condense(j);
- if (Node2DFS[NodeIndex] > Node2DFS[j])
- Node2DFS[NodeIndex] = Node2DFS[j];
- }
- }
-
- // See if we found any cycles
- if (MyDFS == Node2DFS[NodeIndex]) {
- while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
- unsigned CycleNodeIndex = SCCStack.top();
- Node *CycleNode = &GraphNodes[CycleNodeIndex];
- VSSCCRep[CycleNodeIndex] = NodeIndex;
- // Unify the nodes
- N->Direct &= CycleNode->Direct;
-
- *(N->PointsTo) |= CycleNode->PointsTo;
- delete CycleNode->PointsTo;
- CycleNode->PointsTo = NULL;
- if (CycleNode->PredEdges) {
- if (!N->PredEdges)
- N->PredEdges = new SparseBitVector<>;
- *(N->PredEdges) |= CycleNode->PredEdges;
- delete CycleNode->PredEdges;
- CycleNode->PredEdges = NULL;
- }
- if (CycleNode->ImplicitPredEdges) {
- if (!N->ImplicitPredEdges)
- N->ImplicitPredEdges = new SparseBitVector<>;
- *(N->ImplicitPredEdges) |= CycleNode->ImplicitPredEdges;
- delete CycleNode->ImplicitPredEdges;
- CycleNode->ImplicitPredEdges = NULL;
- }
- SCCStack.pop();
- }
-
- Node2Deleted[NodeIndex] = true;
-
- // Set up number of incoming edges for other nodes
- if (N->PredEdges)
- for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
- Iter != N->PredEdges->end();
- ++Iter)
- ++GraphNodes[VSSCCRep[*Iter]].NumInEdges;
- } else {
- SCCStack.push(NodeIndex);
- }
-}
-
-void Andersens::HUValNum(unsigned NodeIndex) {
- Node *N = &GraphNodes[NodeIndex];
- Node2Visited[NodeIndex] = true;
-
- // Eliminate dereferences of non-pointers for those non-pointers we have
- // already identified. These are ref nodes whose non-ref node:
- // 1. Has already been visited determined to point to nothing (and thus, a
- // dereference of it must point to nothing)
- // 2. Any direct node with no predecessor edges in our graph and with no
- // points-to set (since it can't point to anything either, being that it
- // receives no points-to sets and has none).
- if (NodeIndex >= FirstRefNode) {
- unsigned j = VSSCCRep[FindNode(NodeIndex - FirstRefNode)];
- if ((Node2Visited[j] && !GraphNodes[j].PointerEquivLabel)
- || (GraphNodes[j].Direct && !GraphNodes[j].PredEdges
- && GraphNodes[j].PointsTo->empty())){
- return;
- }
- }
- // Process all our explicit edges
- if (N->PredEdges)
- for (SparseBitVector<>::iterator Iter = N->PredEdges->begin();
- Iter != N->PredEdges->end();
- ++Iter) {
- unsigned j = VSSCCRep[*Iter];
- if (!Node2Visited[j])
- HUValNum(j);
-
- // If this edge turned out to be the same as us, or got no pointer
- // equivalence label (and thus points to nothing) , just decrement our
- // incoming edges and continue.
- if (j == NodeIndex || GraphNodes[j].PointerEquivLabel == 0) {
- --GraphNodes[j].NumInEdges;
- continue;
- }
-
- *(N->PointsTo) |= GraphNodes[j].PointsTo;
-
- // If we didn't end up storing this in the hash, and we're done with all
- // the edges, we don't need the points-to set anymore.
- --GraphNodes[j].NumInEdges;
- if (!GraphNodes[j].NumInEdges && !GraphNodes[j].StoredInHash) {
- delete GraphNodes[j].PointsTo;
- GraphNodes[j].PointsTo = NULL;
- }
- }
- // If this isn't a direct node, generate a fresh variable.
- if (!N->Direct) {
- N->PointsTo->set(FirstRefNode + NodeIndex);
- }
-
- // See If we have something equivalent to us, if not, generate a new
- // equivalence class.
- if (N->PointsTo->empty()) {
- delete N->PointsTo;
- N->PointsTo = NULL;
- } else {
- if (N->Direct) {
- N->PointerEquivLabel = Set2PEClass[N->PointsTo];
- if (N->PointerEquivLabel == 0) {
- unsigned EquivClass = PEClass++;
- N->StoredInHash = true;
- Set2PEClass[N->PointsTo] = EquivClass;
- N->PointerEquivLabel = EquivClass;
- }
- } else {
- N->PointerEquivLabel = PEClass++;
- }
- }
-}
-
-/// Rewrite our list of constraints so that pointer equivalent nodes are
-/// replaced by their the pointer equivalence class representative.
-void Andersens::RewriteConstraints() {
- std::vector<Constraint> NewConstraints;
- DenseSet<Constraint, ConstraintKeyInfo> Seen;
-
- PEClass2Node.clear();
- PENLEClass2Node.clear();
-
- // We may have from 1 to Graphnodes + 1 equivalence classes.
- PEClass2Node.insert(PEClass2Node.begin(), GraphNodes.size() + 1, -1);
- PENLEClass2Node.insert(PENLEClass2Node.begin(), GraphNodes.size() + 1, -1);
-
- // Rewrite constraints, ignoring non-pointer constraints, uniting equivalent
- // nodes, and rewriting constraints to use the representative nodes.
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- Constraint &C = Constraints[i];
- unsigned RHSNode = FindNode(C.Src);
- unsigned LHSNode = FindNode(C.Dest);
- unsigned RHSLabel = GraphNodes[VSSCCRep[RHSNode]].PointerEquivLabel;
- unsigned LHSLabel = GraphNodes[VSSCCRep[LHSNode]].PointerEquivLabel;
-
- // First we try to eliminate constraints for things we can prove don't point
- // to anything.
- if (LHSLabel == 0) {
- DEBUG(PrintNode(&GraphNodes[LHSNode]));
- DEBUG(dbgs() << " is a non-pointer, ignoring constraint.\n");
- continue;
- }
- if (RHSLabel == 0) {
- DEBUG(PrintNode(&GraphNodes[RHSNode]));
- DEBUG(dbgs() << " is a non-pointer, ignoring constraint.\n");
- continue;
- }
- // This constraint may be useless, and it may become useless as we translate
- // it.
- if (C.Src == C.Dest && C.Type == Constraint::Copy)
- continue;
-
- C.Src = FindEquivalentNode(RHSNode, RHSLabel);
- C.Dest = FindEquivalentNode(FindNode(LHSNode), LHSLabel);
- if ((C.Src == C.Dest && C.Type == Constraint::Copy)
- || Seen.count(C))
- continue;
-
- Seen.insert(C);
- NewConstraints.push_back(C);
- }
- Constraints.swap(NewConstraints);
- PEClass2Node.clear();
-}
-
-/// See if we have a node that is pointer equivalent to the one being asked
-/// about, and if so, unite them and return the equivalent node. Otherwise,
-/// return the original node.
-unsigned Andersens::FindEquivalentNode(unsigned NodeIndex,
- unsigned NodeLabel) {
- if (!GraphNodes[NodeIndex].AddressTaken) {
- if (PEClass2Node[NodeLabel] != -1) {
- // We found an existing node with the same pointer label, so unify them.
- // We specifically request that Union-By-Rank not be used so that
- // PEClass2Node[NodeLabel] U= NodeIndex and not the other way around.
- return UniteNodes(PEClass2Node[NodeLabel], NodeIndex, false);
- } else {
- PEClass2Node[NodeLabel] = NodeIndex;
- PENLEClass2Node[NodeLabel] = NodeIndex;
- }
- } else if (PENLEClass2Node[NodeLabel] == -1) {
- PENLEClass2Node[NodeLabel] = NodeIndex;
- }
-
- return NodeIndex;
-}
-
-void Andersens::PrintLabels() const {
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- if (i < FirstRefNode) {
- PrintNode(&GraphNodes[i]);
- } else if (i < FirstAdrNode) {
- DEBUG(dbgs() << "REF(");
- PrintNode(&GraphNodes[i-FirstRefNode]);
- DEBUG(dbgs() <<")");
- } else {
- DEBUG(dbgs() << "ADR(");
- PrintNode(&GraphNodes[i-FirstAdrNode]);
- DEBUG(dbgs() <<")");
- }
-
- DEBUG(dbgs() << " has pointer label " << GraphNodes[i].PointerEquivLabel
- << " and SCC rep " << VSSCCRep[i]
- << " and is " << (GraphNodes[i].Direct ? "Direct" : "Not direct")
- << "\n");
- }
-}
-
-/// The technique used here is described in "The Ant and the
-/// Grasshopper: Fast and Accurate Pointer Analysis for Millions of
-/// Lines of Code. In Programming Language Design and Implementation
-/// (PLDI), June 2007." It is known as the "HCD" (Hybrid Cycle
-/// Detection) algorithm. It is called a hybrid because it performs an
-/// offline analysis and uses its results during the solving (online)
-/// phase. This is just the offline portion; the results of this
-/// operation are stored in SDT and are later used in SolveContraints()
-/// and UniteNodes().
-void Andersens::HCD() {
- DEBUG(dbgs() << "Starting HCD.\n");
- HCDSCCRep.resize(GraphNodes.size());
-
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- GraphNodes[i].Edges = new SparseBitVector<>;
- HCDSCCRep[i] = i;
- }
-
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- Constraint &C = Constraints[i];
- assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size());
- if (C.Type == Constraint::AddressOf) {
- continue;
- } else if (C.Type == Constraint::Load) {
- if( C.Offset == 0 )
- GraphNodes[C.Dest].Edges->set(C.Src + FirstRefNode);
- } else if (C.Type == Constraint::Store) {
- if( C.Offset == 0 )
- GraphNodes[C.Dest + FirstRefNode].Edges->set(C.Src);
- } else {
- GraphNodes[C.Dest].Edges->set(C.Src);
- }
- }
-
- Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
- Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
- Node2Visited.insert(Node2Visited.begin(), GraphNodes.size(), false);
- SDT.insert(SDT.begin(), GraphNodes.size() / 2, -1);
-
- DFSNumber = 0;
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- unsigned Node = HCDSCCRep[i];
- if (!Node2Deleted[Node])
- Search(Node);
- }
-
- for (unsigned i = 0; i < GraphNodes.size(); ++i)
- if (GraphNodes[i].Edges != NULL) {
- delete GraphNodes[i].Edges;
- GraphNodes[i].Edges = NULL;
- }
-
- while( !SCCStack.empty() )
- SCCStack.pop();
-
- Node2DFS.clear();
- Node2Visited.clear();
- Node2Deleted.clear();
- HCDSCCRep.clear();
- DEBUG(dbgs() << "HCD complete.\n");
-}
-
-// Component of HCD:
-// Use Nuutila's variant of Tarjan's algorithm to detect
-// Strongly-Connected Components (SCCs). For non-trivial SCCs
-// containing ref nodes, insert the appropriate information in SDT.
-void Andersens::Search(unsigned Node) {
- unsigned MyDFS = DFSNumber++;
-
- Node2Visited[Node] = true;
- Node2DFS[Node] = MyDFS;
-
- for (SparseBitVector<>::iterator Iter = GraphNodes[Node].Edges->begin(),
- End = GraphNodes[Node].Edges->end();
- Iter != End;
- ++Iter) {
- unsigned J = HCDSCCRep[*Iter];
- assert(GraphNodes[J].isRep() && "Debug check; must be representative");
- if (!Node2Deleted[J]) {
- if (!Node2Visited[J])
- Search(J);
- if (Node2DFS[Node] > Node2DFS[J])
- Node2DFS[Node] = Node2DFS[J];
- }
- }
-
- if( MyDFS != Node2DFS[Node] ) {
- SCCStack.push(Node);
- return;
- }
-
- // This node is the root of a SCC, so process it.
- //
- // If the SCC is "non-trivial" (not a singleton) and contains a reference
- // node, we place this SCC into SDT. We unite the nodes in any case.
- if (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS) {
- SparseBitVector<> SCC;
-
- SCC.set(Node);
-
- bool Ref = (Node >= FirstRefNode);
-
- Node2Deleted[Node] = true;
-
- do {
- unsigned P = SCCStack.top(); SCCStack.pop();
- Ref |= (P >= FirstRefNode);
- SCC.set(P);
- HCDSCCRep[P] = Node;
- } while (!SCCStack.empty() && Node2DFS[SCCStack.top()] >= MyDFS);
-
- if (Ref) {
- unsigned Rep = SCC.find_first();
- assert(Rep < FirstRefNode && "The SCC didn't have a non-Ref node!");
-
- SparseBitVector<>::iterator i = SCC.begin();
-
- // Skip over the non-ref nodes
- while( *i < FirstRefNode )
- ++i;
-
- while( i != SCC.end() )
- SDT[ (*i++) - FirstRefNode ] = Rep;
- }
- }
-}
-
-
-/// Optimize the constraints by performing offline variable substitution and
-/// other optimizations.
-void Andersens::OptimizeConstraints() {
- DEBUG(dbgs() << "Beginning constraint optimization\n");
-
- SDTActive = false;
-
- // Function related nodes need to stay in the same relative position and can't
- // be location equivalent.
- for (std::map<unsigned, unsigned>::iterator Iter = MaxK.begin();
- Iter != MaxK.end();
- ++Iter) {
- for (unsigned i = Iter->first;
- i != Iter->first + Iter->second;
- ++i) {
- GraphNodes[i].AddressTaken = true;
- GraphNodes[i].Direct = false;
- }
- }
-
- ClumpAddressTaken();
- FirstRefNode = GraphNodes.size();
- FirstAdrNode = FirstRefNode + GraphNodes.size();
- GraphNodes.insert(GraphNodes.end(), 2 * GraphNodes.size(),
- Node(false));
- VSSCCRep.resize(GraphNodes.size());
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- VSSCCRep[i] = i;
- }
- HVN();
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- Node *N = &GraphNodes[i];
- delete N->PredEdges;
- N->PredEdges = NULL;
- delete N->ImplicitPredEdges;
- N->ImplicitPredEdges = NULL;
- }
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-labels"
- DEBUG(PrintLabels());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
- RewriteConstraints();
- // Delete the adr nodes.
- GraphNodes.resize(FirstRefNode * 2);
-
- // Now perform HU
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- Node *N = &GraphNodes[i];
- if (FindNode(i) == i) {
- N->PointsTo = new SparseBitVector<>;
- N->PointedToBy = new SparseBitVector<>;
- // Reset our labels
- }
- VSSCCRep[i] = i;
- N->PointerEquivLabel = 0;
- }
- HU();
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-labels"
- DEBUG(PrintLabels());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
- RewriteConstraints();
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- if (FindNode(i) == i) {
- Node *N = &GraphNodes[i];
- delete N->PointsTo;
- N->PointsTo = NULL;
- delete N->PredEdges;
- N->PredEdges = NULL;
- delete N->ImplicitPredEdges;
- N->ImplicitPredEdges = NULL;
- delete N->PointedToBy;
- N->PointedToBy = NULL;
- }
- }
-
- // perform Hybrid Cycle Detection (HCD)
- HCD();
- SDTActive = true;
-
- // No longer any need for the upper half of GraphNodes (for ref nodes).
- GraphNodes.erase(GraphNodes.begin() + FirstRefNode, GraphNodes.end());
-
- // HCD complete.
-
- DEBUG(dbgs() << "Finished constraint optimization\n");
- FirstRefNode = 0;
- FirstAdrNode = 0;
-}
-
-/// Unite pointer but not location equivalent variables, now that the constraint
-/// graph is built.
-void Andersens::UnitePointerEquivalences() {
- DEBUG(dbgs() << "Uniting remaining pointer equivalences\n");
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- if (GraphNodes[i].AddressTaken && GraphNodes[i].isRep()) {
- unsigned Label = GraphNodes[i].PointerEquivLabel;
-
- if (Label && PENLEClass2Node[Label] != -1)
- UniteNodes(i, PENLEClass2Node[Label]);
- }
- }
- DEBUG(dbgs() << "Finished remaining pointer equivalences\n");
- PENLEClass2Node.clear();
-}
-
-/// Create the constraint graph used for solving points-to analysis.
-///
-void Andersens::CreateConstraintGraph() {
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i) {
- Constraint &C = Constraints[i];
- assert (C.Src < GraphNodes.size() && C.Dest < GraphNodes.size());
- if (C.Type == Constraint::AddressOf)
- GraphNodes[C.Dest].PointsTo->set(C.Src);
- else if (C.Type == Constraint::Load)
- GraphNodes[C.Src].Constraints.push_back(C);
- else if (C.Type == Constraint::Store)
- GraphNodes[C.Dest].Constraints.push_back(C);
- else if (C.Offset != 0)
- GraphNodes[C.Src].Constraints.push_back(C);
- else
- GraphNodes[C.Src].Edges->set(C.Dest);
- }
-}
-
-// Perform DFS and cycle detection.
-bool Andersens::QueryNode(unsigned Node) {
- assert(GraphNodes[Node].isRep() && "Querying a non-rep node");
- unsigned OurDFS = ++DFSNumber;
- SparseBitVector<> ToErase;
- SparseBitVector<> NewEdges;
- Tarjan2DFS[Node] = OurDFS;
-
- // Changed denotes a change from a recursive call that we will bubble up.
- // Merged is set if we actually merge a node ourselves.
- bool Changed = false, Merged = false;
-
- for (SparseBitVector<>::iterator bi = GraphNodes[Node].Edges->begin();
- bi != GraphNodes[Node].Edges->end();
- ++bi) {
- unsigned RepNode = FindNode(*bi);
- // If this edge points to a non-representative node but we are
- // already planning to add an edge to its representative, we have no
- // need for this edge anymore.
- if (RepNode != *bi && NewEdges.test(RepNode)){
- ToErase.set(*bi);
- continue;
- }
-
- // Continue about our DFS.
- if (!Tarjan2Deleted[RepNode]){
- if (Tarjan2DFS[RepNode] == 0) {
- Changed |= QueryNode(RepNode);
- // May have been changed by QueryNode
- RepNode = FindNode(RepNode);
- }
- if (Tarjan2DFS[RepNode] < Tarjan2DFS[Node])
- Tarjan2DFS[Node] = Tarjan2DFS[RepNode];
- }
-
- // We may have just discovered that this node is part of a cycle, in
- // which case we can also erase it.
- if (RepNode != *bi) {
- ToErase.set(*bi);
- NewEdges.set(RepNode);
- }
- }
-
- GraphNodes[Node].Edges->intersectWithComplement(ToErase);
- GraphNodes[Node].Edges |= NewEdges;
-
- // If this node is a root of a non-trivial SCC, place it on our
- // worklist to be processed.
- if (OurDFS == Tarjan2DFS[Node]) {
- while (!SCCStack.empty() && Tarjan2DFS[SCCStack.top()] >= OurDFS) {
- Node = UniteNodes(Node, SCCStack.top());
-
- SCCStack.pop();
- Merged = true;
- }
- Tarjan2Deleted[Node] = true;
-
- if (Merged)
- NextWL->insert(&GraphNodes[Node]);
- } else {
- SCCStack.push(Node);
- }
-
- return(Changed | Merged);
-}
-
-/// SolveConstraints - This stage iteratively processes the constraints list
-/// propagating constraints (adding edges to the Nodes in the points-to graph)
-/// until a fixed point is reached.
-///
-/// We use a variant of the technique called "Lazy Cycle Detection", which is
-/// described in "The Ant and the Grasshopper: Fast and Accurate Pointer
-/// Analysis for Millions of Lines of Code. In Programming Language Design and
-/// Implementation (PLDI), June 2007."
-/// The paper describes performing cycle detection one node at a time, which can
-/// be expensive if there are no cycles, but there are long chains of nodes that
-/// it heuristically believes are cycles (because it will DFS from each node
-/// without state from previous nodes).
-/// Instead, we use the heuristic to build a worklist of nodes to check, then
-/// cycle detect them all at the same time to do this more cheaply. This
-/// catches cycles slightly later than the original technique did, but does it
-/// make significantly cheaper.
-
-void Andersens::SolveConstraints() {
- CurrWL = &w1;
- NextWL = &w2;
-
- OptimizeConstraints();
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa-constraints"
- DEBUG(PrintConstraints());
-#undef DEBUG_TYPE
-#define DEBUG_TYPE "anders-aa"
-
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- Node *N = &GraphNodes[i];
- N->PointsTo = new SparseBitVector<>;
- N->OldPointsTo = new SparseBitVector<>;
- N->Edges = new SparseBitVector<>;
- }
- CreateConstraintGraph();
- UnitePointerEquivalences();
- assert(SCCStack.empty() && "SCC Stack should be empty by now!");
- Node2DFS.clear();
- Node2Deleted.clear();
- Node2DFS.insert(Node2DFS.begin(), GraphNodes.size(), 0);
- Node2Deleted.insert(Node2Deleted.begin(), GraphNodes.size(), false);
- DFSNumber = 0;
- DenseSet<Constraint, ConstraintKeyInfo> Seen;
- DenseSet<std::pair<unsigned,unsigned>, PairKeyInfo> EdgesChecked;
-
- // Order graph and add initial nodes to work list.
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- Node *INode = &GraphNodes[i];
-
- // Add to work list if it's a representative and can contribute to the
- // calculation right now.
- if (INode->isRep() && !INode->PointsTo->empty()
- && (!INode->Edges->empty() || !INode->Constraints.empty())) {
- INode->Stamp();
- CurrWL->insert(INode);
- }
- }
- std::queue<unsigned int> TarjanWL;
-#if !FULL_UNIVERSAL
- // "Rep and special variables" - in order for HCD to maintain conservative
- // results when !FULL_UNIVERSAL, we need to treat the special variables in
- // the same way that the !FULL_UNIVERSAL tweak does throughout the rest of
- // the analysis - it's ok to add edges from the special nodes, but never
- // *to* the special nodes.
- std::vector<unsigned int> RSV;
-#endif
- while( !CurrWL->empty() ) {
- DEBUG(dbgs() << "Starting iteration #" << ++NumIters << "\n");
-
- Node* CurrNode;
- unsigned CurrNodeIndex;
-
- // Actual cycle checking code. We cycle check all of the lazy cycle
- // candidates from the last iteration in one go.
- if (!TarjanWL.empty()) {
- DFSNumber = 0;
-
- Tarjan2DFS.clear();
- Tarjan2Deleted.clear();
- while (!TarjanWL.empty()) {
- unsigned int ToTarjan = TarjanWL.front();
- TarjanWL.pop();
- if (!Tarjan2Deleted[ToTarjan]
- && GraphNodes[ToTarjan].isRep()
- && Tarjan2DFS[ToTarjan] == 0)
- QueryNode(ToTarjan);
- }
- }
-
- // Add to work list if it's a representative and can contribute to the
- // calculation right now.
- while( (CurrNode = CurrWL->pop()) != NULL ) {
- CurrNodeIndex = CurrNode - &GraphNodes[0];
- CurrNode->Stamp();
-
-
- // Figure out the changed points to bits
- SparseBitVector<> CurrPointsTo;
- CurrPointsTo.intersectWithComplement(CurrNode->PointsTo,
- CurrNode->OldPointsTo);
- if (CurrPointsTo.empty())
- continue;
-
- *(CurrNode->OldPointsTo) |= CurrPointsTo;
-
- // Check the offline-computed equivalencies from HCD.
- bool SCC = false;
- unsigned Rep;
-
- if (SDT[CurrNodeIndex] >= 0) {
- SCC = true;
- Rep = FindNode(SDT[CurrNodeIndex]);
-
-#if !FULL_UNIVERSAL
- RSV.clear();
-#endif
- for (SparseBitVector<>::iterator bi = CurrPointsTo.begin();
- bi != CurrPointsTo.end(); ++bi) {
- unsigned Node = FindNode(*bi);
-#if !FULL_UNIVERSAL
- if (Node < NumberSpecialNodes) {
- RSV.push_back(Node);
- continue;
- }
-#endif
- Rep = UniteNodes(Rep,Node);
- }
-#if !FULL_UNIVERSAL
- RSV.push_back(Rep);
-#endif
-
- NextWL->insert(&GraphNodes[Rep]);
-
- if ( ! CurrNode->isRep() )
- continue;
- }
-
- Seen.clear();
-
- /* Now process the constraints for this node. */
- for (std::list<Constraint>::iterator li = CurrNode->Constraints.begin();
- li != CurrNode->Constraints.end(); ) {
- li->Src = FindNode(li->Src);
- li->Dest = FindNode(li->Dest);
-
- // Delete redundant constraints
- if( Seen.count(*li) ) {
- std::list<Constraint>::iterator lk = li; li++;
-
- CurrNode->Constraints.erase(lk);
- ++NumErased;
- continue;
- }
- Seen.insert(*li);
-
- // Src and Dest will be the vars we are going to process.
- // This may look a bit ugly, but what it does is allow us to process
- // both store and load constraints with the same code.
- // Load constraints say that every member of our RHS solution has K
- // added to it, and that variable gets an edge to LHS. We also union
- // RHS+K's solution into the LHS solution.
- // Store constraints say that every member of our LHS solution has K
- // added to it, and that variable gets an edge from RHS. We also union
- // RHS's solution into the LHS+K solution.
- unsigned *Src;
- unsigned *Dest;
- unsigned K = li->Offset;
- unsigned CurrMember;
- if (li->Type == Constraint::Load) {
- Src = &CurrMember;
- Dest = &li->Dest;
- } else if (li->Type == Constraint::Store) {
- Src = &li->Src;
- Dest = &CurrMember;
- } else {
- // TODO Handle offseted copy constraint
- li++;
- continue;
- }
-
- // See if we can use Hybrid Cycle Detection (that is, check
- // if it was a statically detected offline equivalence that
- // involves pointers; if so, remove the redundant constraints).
- if( SCC && K == 0 ) {
-#if FULL_UNIVERSAL
- CurrMember = Rep;
-
- if (GraphNodes[*Src].Edges->test_and_set(*Dest))
- if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
- NextWL->insert(&GraphNodes[*Dest]);
-#else
- for (unsigned i=0; i < RSV.size(); ++i) {
- CurrMember = RSV[i];
-
- if (*Dest < NumberSpecialNodes)
- continue;
- if (GraphNodes[*Src].Edges->test_and_set(*Dest))
- if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
- NextWL->insert(&GraphNodes[*Dest]);
- }
-#endif
- // since all future elements of the points-to set will be
- // equivalent to the current ones, the complex constraints
- // become redundant.
- //
- std::list<Constraint>::iterator lk = li; li++;
-#if !FULL_UNIVERSAL
- // In this case, we can still erase the constraints when the
- // elements of the points-to sets are referenced by *Dest,
- // but not when they are referenced by *Src (i.e. for a Load
- // constraint). This is because if another special variable is
- // put into the points-to set later, we still need to add the
- // new edge from that special variable.
- if( lk->Type != Constraint::Load)
-#endif
- GraphNodes[CurrNodeIndex].Constraints.erase(lk);
- } else {
- const SparseBitVector<> &Solution = CurrPointsTo;
-
- for (SparseBitVector<>::iterator bi = Solution.begin();
- bi != Solution.end();
- ++bi) {
- CurrMember = *bi;
-
- // Need to increment the member by K since that is where we are
- // supposed to copy to/from. Note that in positive weight cycles,
- // which occur in address taking of fields, K can go past
- // MaxK[CurrMember] elements, even though that is all it could point
- // to.
- if (K > 0 && K > MaxK[CurrMember])
- continue;
- else
- CurrMember = FindNode(CurrMember + K);
-
- // Add an edge to the graph, so we can just do regular
- // bitmap ior next time. It may also let us notice a cycle.
-#if !FULL_UNIVERSAL
- if (*Dest < NumberSpecialNodes)
- continue;
-#endif
- if (GraphNodes[*Src].Edges->test_and_set(*Dest))
- if (GraphNodes[*Dest].PointsTo |= *(GraphNodes[*Src].PointsTo))
- NextWL->insert(&GraphNodes[*Dest]);
-
- }
- li++;
- }
- }
- SparseBitVector<> NewEdges;
- SparseBitVector<> ToErase;
-
- // Now all we have left to do is propagate points-to info along the
- // edges, erasing the redundant edges.
- for (SparseBitVector<>::iterator bi = CurrNode->Edges->begin();
- bi != CurrNode->Edges->end();
- ++bi) {
-
- unsigned DestVar = *bi;
- unsigned Rep = FindNode(DestVar);
-
- // If we ended up with this node as our destination, or we've already
- // got an edge for the representative, delete the current edge.
- if (Rep == CurrNodeIndex ||
- (Rep != DestVar && NewEdges.test(Rep))) {
- ToErase.set(DestVar);
- continue;
- }
-
- std::pair<unsigned,unsigned> edge(CurrNodeIndex,Rep);
-
- // This is where we do lazy cycle detection.
- // If this is a cycle candidate (equal points-to sets and this
- // particular edge has not been cycle-checked previously), add to the
- // list to check for cycles on the next iteration.
- if (!EdgesChecked.count(edge) &&
- *(GraphNodes[Rep].PointsTo) == *(CurrNode->PointsTo)) {
- EdgesChecked.insert(edge);
- TarjanWL.push(Rep);
- }
- // Union the points-to sets into the dest
-#if !FULL_UNIVERSAL
- if (Rep >= NumberSpecialNodes)
-#endif
- if (GraphNodes[Rep].PointsTo |= CurrPointsTo) {
- NextWL->insert(&GraphNodes[Rep]);
- }
- // If this edge's destination was collapsed, rewrite the edge.
- if (Rep != DestVar) {
- ToErase.set(DestVar);
- NewEdges.set(Rep);
- }
- }
- CurrNode->Edges->intersectWithComplement(ToErase);
- CurrNode->Edges |= NewEdges;
- }
-
- // Switch to other work list.
- WorkList* t = CurrWL; CurrWL = NextWL; NextWL = t;
- }
-
-
- Node2DFS.clear();
- Node2Deleted.clear();
- for (unsigned i = 0; i < GraphNodes.size(); ++i) {
- Node *N = &GraphNodes[i];
- delete N->OldPointsTo;
- delete N->Edges;
- }
- SDTActive = false;
- SDT.clear();
-}
-
-//===----------------------------------------------------------------------===//
-// Union-Find
-//===----------------------------------------------------------------------===//
-
-// Unite nodes First and Second, returning the one which is now the
-// representative node. First and Second are indexes into GraphNodes
-unsigned Andersens::UniteNodes(unsigned First, unsigned Second,
- bool UnionByRank) {
- assert (First < GraphNodes.size() && Second < GraphNodes.size() &&
- "Attempting to merge nodes that don't exist");
-
- Node *FirstNode = &GraphNodes[First];
- Node *SecondNode = &GraphNodes[Second];
-
- assert (SecondNode->isRep() && FirstNode->isRep() &&
- "Trying to unite two non-representative nodes!");
- if (First == Second)
- return First;
-
- if (UnionByRank) {
- int RankFirst = (int) FirstNode ->NodeRep;
- int RankSecond = (int) SecondNode->NodeRep;
-
- // Rank starts at -1 and gets decremented as it increases.
- // Translation: higher rank, lower NodeRep value, which is always negative.
- if (RankFirst > RankSecond) {
- unsigned t = First; First = Second; Second = t;
- Node* tp = FirstNode; FirstNode = SecondNode; SecondNode = tp;
- } else if (RankFirst == RankSecond) {
- FirstNode->NodeRep = (unsigned) (RankFirst - 1);
- }
- }
-
- SecondNode->NodeRep = First;
-#if !FULL_UNIVERSAL
- if (First >= NumberSpecialNodes)
-#endif
- if (FirstNode->PointsTo && SecondNode->PointsTo)
- FirstNode->PointsTo |= *(SecondNode->PointsTo);
- if (FirstNode->Edges && SecondNode->Edges)
- FirstNode->Edges |= *(SecondNode->Edges);
- if (!SecondNode->Constraints.empty())
- FirstNode->Constraints.splice(FirstNode->Constraints.begin(),
- SecondNode->Constraints);
- if (FirstNode->OldPointsTo) {
- delete FirstNode->OldPointsTo;
- FirstNode->OldPointsTo = new SparseBitVector<>;
- }
-
- // Destroy interesting parts of the merged-from node.
- delete SecondNode->OldPointsTo;
- delete SecondNode->Edges;
- delete SecondNode->PointsTo;
- SecondNode->Edges = NULL;
- SecondNode->PointsTo = NULL;
- SecondNode->OldPointsTo = NULL;
-
- NumUnified++;
- DEBUG(dbgs() << "Unified Node ");
- DEBUG(PrintNode(FirstNode));
- DEBUG(dbgs() << " and Node ");
- DEBUG(PrintNode(SecondNode));
- DEBUG(dbgs() << "\n");
-
- if (SDTActive)
- if (SDT[Second] >= 0) {
- if (SDT[First] < 0)
- SDT[First] = SDT[Second];
- else {
- UniteNodes( FindNode(SDT[First]), FindNode(SDT[Second]) );
- First = FindNode(First);
- }
- }
-
- return First;
-}
-
-// Find the index into GraphNodes of the node representing Node, performing
-// path compression along the way
-unsigned Andersens::FindNode(unsigned NodeIndex) {
- assert (NodeIndex < GraphNodes.size()
- && "Attempting to find a node that can't exist");
- Node *N = &GraphNodes[NodeIndex];
- if (N->isRep())
- return NodeIndex;
- else
- return (N->NodeRep = FindNode(N->NodeRep));
-}
-
-// Find the index into GraphNodes of the node representing Node,
-// don't perform path compression along the way (for Print)
-unsigned Andersens::FindNode(unsigned NodeIndex) const {
- assert (NodeIndex < GraphNodes.size()
- && "Attempting to find a node that can't exist");
- const Node *N = &GraphNodes[NodeIndex];
- if (N->isRep())
- return NodeIndex;
- else
- return FindNode(N->NodeRep);
-}
-
-//===----------------------------------------------------------------------===//
-// Debugging Output
-//===----------------------------------------------------------------------===//
-
-void Andersens::PrintNode(const Node *N) const {
- if (N == &GraphNodes[UniversalSet]) {
- dbgs() << "<universal>";
- return;
- } else if (N == &GraphNodes[NullPtr]) {
- dbgs() << "<nullptr>";
- return;
- } else if (N == &GraphNodes[NullObject]) {
- dbgs() << "<null>";
- return;
- }
- if (!N->getValue()) {
- dbgs() << "artificial" << (intptr_t) N;
- return;
- }
-
- assert(N->getValue() != 0 && "Never set node label!");
- Value *V = N->getValue();
- if (Function *F = dyn_cast<Function>(V)) {
- if (isa<PointerType>(F->getFunctionType()->getReturnType()) &&
- N == &GraphNodes[getReturnNode(F)]) {
- dbgs() << F->getName() << ":retval";
- return;
- } else if (F->getFunctionType()->isVarArg() &&
- N == &GraphNodes[getVarargNode(F)]) {
- dbgs() << F->getName() << ":vararg";
- return;
- }
- }
-
- if (Instruction *I = dyn_cast<Instruction>(V))
- dbgs() << I->getParent()->getParent()->getName() << ":";
- else if (Argument *Arg = dyn_cast<Argument>(V))
- dbgs() << Arg->getParent()->getName() << ":";
-
- if (V->hasName())
- dbgs() << V->getName();
- else
- dbgs() << "(unnamed)";
-
- if (isa<GlobalValue>(V) || isa<AllocaInst>(V) || isMalloc(V))
- if (N == &GraphNodes[getObject(V)])
- dbgs() << "<mem>";
-}
-void Andersens::PrintConstraint(const Constraint &C) const {
- if (C.Type == Constraint::Store) {
- dbgs() << "*";
- if (C.Offset != 0)
- dbgs() << "(";
- }
- PrintNode(&GraphNodes[C.Dest]);
- if (C.Type == Constraint::Store && C.Offset != 0)
- dbgs() << " + " << C.Offset << ")";
- dbgs() << " = ";
- if (C.Type == Constraint::Load) {
- dbgs() << "*";
- if (C.Offset != 0)
- dbgs() << "(";
- }
- else if (C.Type == Constraint::AddressOf)
- dbgs() << "&";
- PrintNode(&GraphNodes[C.Src]);
- if (C.Offset != 0 && C.Type != Constraint::Store)
- dbgs() << " + " << C.Offset;
- if (C.Type == Constraint::Load && C.Offset != 0)
- dbgs() << ")";
- dbgs() << "\n";
-}
-
-void Andersens::PrintConstraints() const {
- dbgs() << "Constraints:\n";
-
- for (unsigned i = 0, e = Constraints.size(); i != e; ++i)
- PrintConstraint(Constraints[i]);
-}
-
-void Andersens::PrintPointsToGraph() const {
- dbgs() << "Points-to graph:\n";
- for (unsigned i = 0, e = GraphNodes.size(); i != e; ++i) {
- const Node *N = &GraphNodes[i];
- if (FindNode(i) != i) {
- PrintNode(N);
- dbgs() << "\t--> same as ";
- PrintNode(&GraphNodes[FindNode(i)]);
- dbgs() << "\n";
- } else {
- dbgs() << "[" << (N->PointsTo->count()) << "] ";
- PrintNode(N);
- dbgs() << "\t--> ";
-
- bool first = true;
- for (SparseBitVector<>::iterator bi = N->PointsTo->begin();
- bi != N->PointsTo->end();
- ++bi) {
- if (!first)
- dbgs() << ", ";
- PrintNode(&GraphNodes[*bi]);
- first = false;
- }
- dbgs() << "\n";
- }
- }
-}
diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt
index 1ebb0be..007ad22 100644
--- a/lib/Analysis/IPA/CMakeLists.txt
+++ b/lib/Analysis/IPA/CMakeLists.txt
@@ -1,5 +1,4 @@
add_llvm_library(LLVMipa
- Andersens.cpp
CallGraph.cpp
CallGraphSCCPass.cpp
FindUsedTypes.cpp
diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp
index ec94bc8..7b43089 100644
--- a/lib/Analysis/IPA/GlobalsModRef.cpp
+++ b/lib/Analysis/IPA/GlobalsModRef.cpp
@@ -213,7 +213,7 @@ void GlobalsModRef::AnalyzeGlobals(Module &M) {
++NumNonAddrTakenGlobalVars;
// If this global holds a pointer type, see if it is an indirect global.
- if (isa<PointerType>(I->getType()->getElementType()) &&
+ if (I->getType()->getElementType()->isPointerTy() &&
AnalyzeIndirectGlobalMemory(I))
++NumIndirectGlobalVars;
}
@@ -231,7 +231,7 @@ bool GlobalsModRef::AnalyzeUsesOfPointer(Value *V,
std::vector<Function*> &Readers,
std::vector<Function*> &Writers,
GlobalValue *OkayStoreDest) {
- if (!isa<PointerType>(V->getType())) return true;
+ if (!V->getType()->isPointerTy()) return true;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E; ++UI)
if (LoadInst *LI = dyn_cast<LoadInst>(*UI)) {
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 4ce6868..98a436f 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -142,8 +142,7 @@ static bool getSCEVStartAndStride(const SCEV *&SH, Loop *L, Loop *UseLoop,
/// the loop, resulting in reg-reg copies (if we use the pre-inc value when we
/// should use the post-inc value).
static bool IVUseShouldUsePostIncValue(Instruction *User, Instruction *IV,
- Loop *L, LoopInfo *LI, DominatorTree *DT,
- Pass *P) {
+ Loop *L, DominatorTree *DT) {
// If the user is in the loop, use the preinc value.
if (L->contains(User)) return false;
@@ -223,7 +222,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
// Descend recursively, but not into PHI nodes outside the current loop.
// It's important to see the entire expression outside the loop to get
// choices that depend on addressing mode use right, although we won't
- // consider references ouside the loop in all cases.
+ // consider references outside the loop in all cases.
// If User is already in Processed, we don't want to recurse into it again,
// but do want to record a second reference in the same instruction.
bool AddUserToIVUsers = false;
@@ -245,7 +244,7 @@ bool IVUsers::AddUsersIfInteresting(Instruction *I) {
// Okay, we found a user that we cannot reduce. Analyze the instruction
// and decide what to do with it. If we are a use inside of the loop, use
// the value before incrementation, otherwise use it after incrementation.
- if (IVUseShouldUsePostIncValue(User, I, L, LI, DT, this)) {
+ if (IVUseShouldUsePostIncValue(User, I, L, DT)) {
// The value used will be incremented by the stride more than we are
// expecting, so subtract this off.
const SCEV *NewStart = SE->getMinusSCEV(Start, Stride);
@@ -331,7 +330,7 @@ void IVUsers::print(raw_ostream &OS, const Module *M) const {
}
OS << ":\n";
- // Use a defualt AssemblyAnnotationWriter to suppress the default info
+ // Use a default AssemblyAnnotationWriter to suppress the default info
// comments, which aren't relevant here.
AssemblyAnnotationWriter Annotator;
for (ilist<IVStrideUse>::const_iterator UI = IVUses.begin(),
diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp
index 972d034..ca50a17 100644
--- a/lib/Analysis/InlineCost.cpp
+++ b/lib/Analysis/InlineCost.cpp
@@ -84,7 +84,7 @@ unsigned InlineCostAnalyzer::FunctionInfo::
//
unsigned InlineCostAnalyzer::FunctionInfo::
CountCodeReductionForAlloca(Value *V) {
- if (!isa<PointerType>(V->getType())) return 0; // Not a pointer
+ if (!V->getType()->isPointerTy()) return 0; // Not a pointer
unsigned Reduction = 0;
for (Value::use_iterator UI = V->use_begin(), E = V->use_end(); UI != E;++UI){
Instruction *I = cast<Instruction>(*UI);
@@ -175,7 +175,7 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB) {
this->usesDynamicAlloca = true;
}
- if (isa<ExtractElementInst>(II) || isa<VectorType>(II->getType()))
+ if (isa<ExtractElementInst>(II) || II->getType()->isVectorTy())
++NumVectorInsts;
if (const CastInst *CI = dyn_cast<CastInst>(II)) {
diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp
index b53ac13..1f8053a 100644
--- a/lib/Analysis/InstructionSimplify.cpp
+++ b/lib/Analysis/InstructionSimplify.cpp
@@ -283,6 +283,32 @@ Value *llvm::SimplifyFCmpInst(unsigned Predicate, Value *LHS, Value *RHS,
// True if unordered.
return ConstantInt::getTrue(CFP->getContext());
}
+ // Check whether the constant is an infinity.
+ if (CFP->getValueAPF().isInfinity()) {
+ if (CFP->getValueAPF().isNegative()) {
+ switch (Pred) {
+ case FCmpInst::FCMP_OLT:
+ // No value is ordered and less than negative infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_UGE:
+ // All values are unordered with or at least negative infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ } else {
+ switch (Pred) {
+ case FCmpInst::FCMP_OGT:
+ // No value is ordered and greater than infinity.
+ return ConstantInt::getFalse(CFP->getContext());
+ case FCmpInst::FCMP_ULE:
+ // All values are unordered with and at most infinity.
+ return ConstantInt::getTrue(CFP->getContext());
+ default:
+ break;
+ }
+ }
+ }
}
}
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index 2d74709d..2aa2f17 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -580,7 +580,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
void MemoryDependenceAnalysis::
getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
SmallVectorImpl<NonLocalDepResult> &Result) {
- assert(isa<PointerType>(Pointer->getType()) &&
+ assert(Pointer->getType()->isPointerTy() &&
"Can't get pointer deps of a non-pointer!");
Result.clear();
@@ -861,7 +861,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
// Get the PHI translated pointer in this predecessor. This can fail if
// not translatable, in which case the getAddr() returns null.
PHITransAddr PredPointer(Pointer);
- PredPointer.PHITranslateValue(BB, Pred);
+ PredPointer.PHITranslateValue(BB, Pred, 0);
Value *PredPtrVal = PredPointer.getAddr();
@@ -1009,13 +1009,20 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
/// in more places that cached info does not necessarily keep.
void MemoryDependenceAnalysis::invalidateCachedPointerInfo(Value *Ptr) {
// If Ptr isn't really a pointer, just ignore it.
- if (!isa<PointerType>(Ptr->getType())) return;
+ if (!Ptr->getType()->isPointerTy()) return;
// Flush store info for the pointer.
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, false));
// Flush load info for the pointer.
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(Ptr, true));
}
+/// invalidateCachedPredecessors - Clear the PredIteratorCache info.
+/// This needs to be done when the CFG changes, e.g., due to splitting
+/// critical edges.
+void MemoryDependenceAnalysis::invalidateCachedPredecessors() {
+ PredCache->clear();
+}
+
/// removeInstruction - Remove an instruction from the dependence analysis,
/// updating the dependence of instructions that previously depended on it.
/// This method attempts to keep the cache coherent using the reverse map.
@@ -1050,7 +1057,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
// Remove it from both the load info and the store info. The instruction
// can't be in either of these maps if it is non-pointer.
- if (isa<PointerType>(RemInst->getType())) {
+ if (RemInst->getType()->isPointerTy()) {
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false));
RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true));
}
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
index 334a188..8e4fa03 100644
--- a/lib/Analysis/PHITransAddr.cpp
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -134,7 +134,8 @@ static void RemoveInstInputs(Value *V,
}
Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
- BasicBlock *PredBB) {
+ BasicBlock *PredBB,
+ const DominatorTree *DT) {
// If this is a non-instruction value, it can't require PHI translation.
Instruction *Inst = dyn_cast<Instruction>(V);
if (Inst == 0) return V;
@@ -177,7 +178,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
// operands need to be phi translated, and if so, reconstruct it.
if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
- Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB);
+ Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB, DT);
if (PHIIn == 0) return 0;
if (PHIIn == BC->getOperand(0))
return BC;
@@ -193,7 +194,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
UI != E; ++UI) {
if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
- if (BCI->getType() == BC->getType())
+ if (BCI->getType() == BC->getType() &&
+ (!DT || DT->dominates(BCI->getParent(), PredBB)))
return BCI;
}
return 0;
@@ -204,7 +206,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
SmallVector<Value*, 8> GEPOps;
bool AnyChanged = false;
for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
- Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB);
+ Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB, DT);
if (GEPOp == 0) return 0;
AnyChanged |= GEPOp != GEP->getOperand(i);
@@ -229,7 +231,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
if (GEPI->getType() == GEP->getType() &&
GEPI->getNumOperands() == GEPOps.size() &&
- GEPI->getParent()->getParent() == CurBB->getParent()) {
+ GEPI->getParent()->getParent() == CurBB->getParent() &&
+ (!DT || DT->dominates(GEPI->getParent(), PredBB))) {
bool Mismatch = false;
for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
if (GEPI->getOperand(i) != GEPOps[i]) {
@@ -251,7 +254,7 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
- Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB);
+ Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB, DT);
if (LHS == 0) return 0;
// If the PHI translated LHS is an add of a constant, fold the immediates.
@@ -287,7 +290,8 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
if (BO->getOpcode() == Instruction::Add &&
BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
- BO->getParent()->getParent() == CurBB->getParent())
+ BO->getParent()->getParent() == CurBB->getParent() &&
+ (!DT || DT->dominates(BO->getParent(), PredBB)))
return BO;
}
@@ -300,33 +304,24 @@ Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
/// PHITranslateValue - PHI translate the current address up the CFG from
-/// CurBB to Pred, updating our state the reflect any needed changes. This
-/// returns true on failure and sets Addr to null.
-bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB) {
+/// CurBB to Pred, updating our state to reflect any needed changes. If the
+/// dominator tree DT is non-null, the translated value must dominate
+/// PredBB. This returns true on failure and sets Addr to null.
+bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree *DT) {
assert(Verify() && "Invalid PHITransAddr!");
- Addr = PHITranslateSubExpr(Addr, CurBB, PredBB);
+ Addr = PHITranslateSubExpr(Addr, CurBB, PredBB, DT);
assert(Verify() && "Invalid PHITransAddr!");
- return Addr == 0;
-}
-/// GetAvailablePHITranslatedSubExpr - Return the value computed by
-/// PHITranslateSubExpr if it dominates PredBB, otherwise return null.
-Value *PHITransAddr::
-GetAvailablePHITranslatedSubExpr(Value *V, BasicBlock *CurBB,BasicBlock *PredBB,
- const DominatorTree &DT) const {
- PHITransAddr Tmp(V, TD);
- Tmp.PHITranslateValue(CurBB, PredBB);
-
- // See if PHI translation succeeds.
- V = Tmp.getAddr();
-
- // Make sure the value is live in the predecessor.
- if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
- if (!DT.dominates(Inst->getParent(), PredBB))
- return 0;
- return V;
-}
+ if (DT) {
+ // Make sure the value is live in the predecessor.
+ if (Instruction *Inst = dyn_cast_or_null<Instruction>(Addr))
+ if (!DT->dominates(Inst->getParent(), PredBB))
+ Addr = 0;
+ }
+ return Addr == 0;
+}
/// PHITranslateWithInsertion - PHI translate this value into the specified
/// predecessor block, inserting a computation of the value if it is
@@ -365,8 +360,9 @@ InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
SmallVectorImpl<Instruction*> &NewInsts) {
// See if we have a version of this value already available and dominating
// PredBB. If so, there is no need to insert a new instance of it.
- if (Value *Res = GetAvailablePHITranslatedSubExpr(InVal, CurBB, PredBB, DT))
- return Res;
+ PHITransAddr Tmp(InVal, TD);
+ if (!Tmp.PHITranslateValue(CurBB, PredBB, &DT))
+ return Tmp.getAddr();
// If we don't have an available version of this value, it must be an
// instruction.
diff --git a/lib/Analysis/PointerTracking.cpp b/lib/Analysis/PointerTracking.cpp
index 8da07e7..ce7ac89 100644
--- a/lib/Analysis/PointerTracking.cpp
+++ b/lib/Analysis/PointerTracking.cpp
@@ -231,7 +231,7 @@ void PointerTracking::print(raw_ostream &OS, const Module* M) const {
// this should be safe for the same reason its safe for SCEV.
PointerTracking &PT = *const_cast<PointerTracking*>(this);
for (inst_iterator I=inst_begin(*FF), E=inst_end(*FF); I != E; ++I) {
- if (!isa<PointerType>(I->getType()))
+ if (!I->getType()->isPointerTy())
continue;
Value *Base;
const SCEV *Limit, *Offset;
diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp
index 9ee7d3a..b979f33 100644
--- a/lib/Analysis/ScalarEvolution.cpp
+++ b/lib/Analysis/ScalarEvolution.cpp
@@ -214,8 +214,8 @@ bool SCEVCastExpr::properlyDominates(BasicBlock *BB, DominatorTree *DT) const {
SCEVTruncateExpr::SCEVTruncateExpr(const FoldingSetNodeID &ID,
const SCEV *op, const Type *ty)
: SCEVCastExpr(ID, scTruncate, op, ty) {
- assert((Op->getType()->isIntegerTy() || isa<PointerType>(Op->getType())) &&
- (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate non-integer value!");
}
@@ -226,8 +226,8 @@ void SCEVTruncateExpr::print(raw_ostream &OS) const {
SCEVZeroExtendExpr::SCEVZeroExtendExpr(const FoldingSetNodeID &ID,
const SCEV *op, const Type *ty)
: SCEVCastExpr(ID, scZeroExtend, op, ty) {
- assert((Op->getType()->isIntegerTy() || isa<PointerType>(Op->getType())) &&
- (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot zero extend non-integer value!");
}
@@ -238,8 +238,8 @@ void SCEVZeroExtendExpr::print(raw_ostream &OS) const {
SCEVSignExtendExpr::SCEVSignExtendExpr(const FoldingSetNodeID &ID,
const SCEV *op, const Type *ty)
: SCEVCastExpr(ID, scSignExtend, op, ty) {
- assert((Op->getType()->isIntegerTy() || isa<PointerType>(Op->getType())) &&
- (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
+ assert((Op->getType()->isIntegerTy() || Op->getType()->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot sign extend non-integer value!");
}
@@ -416,7 +416,7 @@ bool SCEVUnknown::isOffsetOf(const Type *&CTy, Constant *&FieldNo) const {
cast<PointerType>(CE->getOperand(0)->getType())->getElementType();
// Ignore vector types here so that ScalarEvolutionExpander doesn't
// emit getelementptrs that index into vectors.
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
+ if (Ty->isStructTy() || Ty->isArrayTy()) {
CTy = Ty;
FieldNo = CE->getOperand(2);
return true;
@@ -518,9 +518,9 @@ namespace {
// Order pointer values after integer values. This helps SCEVExpander
// form GEPs.
- if (isa<PointerType>(LU->getType()) && !isa<PointerType>(RU->getType()))
+ if (LU->getType()->isPointerTy() && !RU->getType()->isPointerTy())
return false;
- if (isa<PointerType>(RU->getType()) && !isa<PointerType>(LU->getType()))
+ if (RU->getType()->isPointerTy() && !LU->getType()->isPointerTy())
return true;
// Compare getValueID values.
@@ -616,7 +616,7 @@ namespace {
/// When this routine is finished, we know that any duplicates in the vector are
/// consecutive and that complexity is monotonically increasing.
///
-/// Note that we go take special precautions to ensure that we get determinstic
+/// Note that we go take special precautions to ensure that we get deterministic
/// results from this routine. In other words, we don't want the results of
/// this to depend on where the addresses of various SCEV objects happened to
/// land in memory.
@@ -744,7 +744,7 @@ static const SCEV *BinomialCoefficient(const SCEV *It, unsigned K,
// We need at least W + T bits for the multiplication step
unsigned CalculationBits = W + T;
- // Calcuate 2^T, at width T+W.
+ // Calculate 2^T, at width T+W.
APInt DivFactor = APInt(CalculationBits, 1).shl(T);
// Calculate the multiplicative inverse of K! / 2^T;
@@ -921,9 +921,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
if (MaxBECount == RecastedMaxBECount) {
const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no unsigned overflow.
- const SCEV *ZMul =
- getMulExpr(CastedMaxBECount,
- getTruncateOrZeroExtend(Step, Start->getType()));
+ const SCEV *ZMul = getMulExpr(CastedMaxBECount, Step);
const SCEV *Add = getAddExpr(Start, ZMul);
const SCEV *OperandExtendedAdd =
getAddExpr(getZeroExtendExpr(Start, WideTy),
@@ -937,9 +935,7 @@ const SCEV *ScalarEvolution::getZeroExtendExpr(const SCEV *Op,
// Similar to above, only this time treat the step value as signed.
// This covers loops that count down.
- const SCEV *SMul =
- getMulExpr(CastedMaxBECount,
- getTruncateOrSignExtend(Step, Start->getType()));
+ const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
Add = getAddExpr(Start, SMul);
OperandExtendedAdd =
getAddExpr(getZeroExtendExpr(Start, WideTy),
@@ -1060,9 +1056,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
if (MaxBECount == RecastedMaxBECount) {
const Type *WideTy = IntegerType::get(getContext(), BitWidth * 2);
// Check whether Start+Step*MaxBECount has no signed overflow.
- const SCEV *SMul =
- getMulExpr(CastedMaxBECount,
- getTruncateOrSignExtend(Step, Start->getType()));
+ const SCEV *SMul = getMulExpr(CastedMaxBECount, Step);
const SCEV *Add = getAddExpr(Start, SMul);
const SCEV *OperandExtendedAdd =
getAddExpr(getSignExtendExpr(Start, WideTy),
@@ -1076,9 +1070,7 @@ const SCEV *ScalarEvolution::getSignExtendExpr(const SCEV *Op,
// Similar to above, only this time treat the step value as unsigned.
// This covers loops that count up with an unsigned step.
- const SCEV *UMul =
- getMulExpr(CastedMaxBECount,
- getTruncateOrZeroExtend(Step, Start->getType()));
+ const SCEV *UMul = getMulExpr(CastedMaxBECount, Step);
Add = getAddExpr(Start, UMul);
OperandExtendedAdd =
getAddExpr(getSignExtendExpr(Start, WideTy),
@@ -1418,7 +1410,7 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl<const SCEV *> &Ops,
// If we deleted at least one add, we added operands to the end of the list,
// and they are not necessarily sorted. Recurse to resort and resimplify
- // any operands we just aquired.
+ // any operands we just acquired.
if (DeletedAdd)
return getAddExpr(Ops);
}
@@ -1725,7 +1717,7 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl<const SCEV *> &Ops,
// If we deleted at least one mul, we added operands to the end of the list,
// and they are not necessarily sorted. Recurse to resort and resimplify
- // any operands we just aquired.
+ // any operands we just acquired.
if (DeletedMul)
return getMulExpr(Ops);
}
@@ -1973,6 +1965,12 @@ ScalarEvolution::getAddRecExpr(SmallVectorImpl<const SCEV *> &Operands,
return getAddRecExpr(Operands, L, HasNUW, HasNSW); // {X,+,0} --> X
}
+ // It's tempting to want to call getMaxBackedgeTakenCount count here and
+ // use that information to infer NUW and NSW flags. However, computing a
+ // BE count requires calling getAddRecExpr, so we may not yet have a
+ // meaningful BE count at this point (and if we don't, we'd be stuck
+ // with a SCEVCouldNotCompute as the cached BE count).
+
// If HasNSW is true and all the operands are non-negative, infer HasNUW.
if (!HasNUW && HasNSW) {
bool All = true;
@@ -2308,7 +2306,7 @@ const SCEV *ScalarEvolution::getUnknown(Value *V) {
/// has access to target-specific information.
bool ScalarEvolution::isSCEVable(const Type *Ty) const {
// Integers and pointers are always SCEVable.
- return Ty->isIntegerTy() || isa<PointerType>(Ty);
+ return Ty->isIntegerTy() || Ty->isPointerTy();
}
/// getTypeSizeInBits - Return the size in bits of the specified type,
@@ -2326,7 +2324,7 @@ uint64_t ScalarEvolution::getTypeSizeInBits(const Type *Ty) const {
// The only other support type is pointer. Without TargetData, conservatively
// assume pointers are 64-bit.
- assert(isa<PointerType>(Ty) && "isSCEVable permitted a non-SCEVable type!");
+ assert(Ty->isPointerTy() && "isSCEVable permitted a non-SCEVable type!");
return 64;
}
@@ -2341,7 +2339,7 @@ const Type *ScalarEvolution::getEffectiveSCEVType(const Type *Ty) const {
return Ty;
// The only other support type is pointer.
- assert(isa<PointerType>(Ty) && "Unexpected non-pointer non-integer type!");
+ assert(Ty->isPointerTy() && "Unexpected non-pointer non-integer type!");
if (TD) return TD->getIntPtrType(getContext());
// Without TargetData, conservatively assume pointers are 64-bit.
@@ -2412,8 +2410,8 @@ const SCEV *
ScalarEvolution::getTruncateOrZeroExtend(const SCEV *V,
const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
- (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
@@ -2429,8 +2427,8 @@ const SCEV *
ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
- (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or zero extend with non-integer arguments!");
if (getTypeSizeInBits(SrcTy) == getTypeSizeInBits(Ty))
return V; // No conversion
@@ -2445,8 +2443,8 @@ ScalarEvolution::getTruncateOrSignExtend(const SCEV *V,
const SCEV *
ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
- (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or zero extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrZeroExtend cannot truncate!");
@@ -2461,8 +2459,8 @@ ScalarEvolution::getNoopOrZeroExtend(const SCEV *V, const Type *Ty) {
const SCEV *
ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
- (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or sign extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrSignExtend cannot truncate!");
@@ -2478,8 +2476,8 @@ ScalarEvolution::getNoopOrSignExtend(const SCEV *V, const Type *Ty) {
const SCEV *
ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
- (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot noop or any extend with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) <= getTypeSizeInBits(Ty) &&
"getNoopOrAnyExtend cannot truncate!");
@@ -2493,8 +2491,8 @@ ScalarEvolution::getNoopOrAnyExtend(const SCEV *V, const Type *Ty) {
const SCEV *
ScalarEvolution::getTruncateOrNoop(const SCEV *V, const Type *Ty) {
const Type *SrcTy = V->getType();
- assert((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
- (Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
+ assert((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
+ (Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Cannot truncate or noop with non-integer arguments!");
assert(getTypeSizeInBits(SrcTy) >= getTypeSizeInBits(Ty) &&
"getTruncateOrNoop cannot extend!");
@@ -2551,12 +2549,12 @@ PushDefUseChildren(Instruction *I,
/// the Scalars map if they reference SymName. This is used during PHI
/// resolution.
void
-ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) {
+ScalarEvolution::ForgetSymbolicName(Instruction *PN, const SCEV *SymName) {
SmallVector<Instruction *, 16> Worklist;
- PushDefUseChildren(I, Worklist);
+ PushDefUseChildren(PN, Worklist);
SmallPtrSet<Instruction *, 8> Visited;
- Visited.insert(I);
+ Visited.insert(PN);
while (!Worklist.empty()) {
Instruction *I = Worklist.pop_back_val();
if (!Visited.insert(I)) continue;
@@ -2570,12 +2568,15 @@ ScalarEvolution::ForgetSymbolicName(Instruction *I, const SCEV *SymName) {
continue;
// SCEVUnknown for a PHI either means that it has an unrecognized
- // structure, or it's a PHI that's in the progress of being computed
- // by createNodeForPHI. In the former case, additional loop trip
- // count information isn't going to change anything. In the later
- // case, createNodeForPHI will perform the necessary updates on its
- // own when it gets to that point.
- if (!isa<PHINode>(I) || !isa<SCEVUnknown>(It->second)) {
+ // structure, it's a PHI that's in the progress of being computed
+ // by createNodeForPHI, or it's a single-value PHI. In the first case,
+ // additional loop trip count information isn't going to change anything.
+ // In the second case, createNodeForPHI will perform the necessary
+ // updates on its own when it gets to that point. In the third, we do
+ // want to forget the SCEVUnknown.
+ if (!isa<PHINode>(I) ||
+ !isa<SCEVUnknown>(It->second) ||
+ (I != PN && It->second == SymName)) {
ValuesAtScopes.erase(It->second);
Scalars.erase(It);
}
@@ -2698,9 +2699,21 @@ const SCEV *ScalarEvolution::createNodeForPHI(PHINode *PN) {
return SymbolicName;
}
- // It's tempting to recognize PHIs with a unique incoming value, however
- // this leads passes like indvars to break LCSSA form. Fortunately, such
- // PHIs are rare, as instcombine zaps them.
+ // If the PHI has a single incoming value, follow that value, unless the
+ // PHI's incoming blocks are in a different loop, in which case doing so
+ // risks breaking LCSSA form. Instcombine would normally zap these, but
+ // it doesn't have DominatorTree information, so it may miss cases.
+ if (Value *V = PN->hasConstantValue(DT)) {
+ bool AllSameLoop = true;
+ Loop *PNLoop = LI->getLoopFor(PN->getParent());
+ for (size_t i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (LI->getLoopFor(PN->getIncomingBlock(i)) != PNLoop) {
+ AllSameLoop = false;
+ break;
+ }
+ if (AllSameLoop)
+ return getSCEV(V);
+ }
// If it's not a loop phi, we can't handle it yet.
return getUnknown(PN);
@@ -2733,7 +2746,7 @@ const SCEV *ScalarEvolution::createNodeForGEP(GEPOperator *GEP) {
} else {
// For an array, add the element offset, explicitly scaled.
const SCEV *LocalOffset = getSCEV(Index);
- // Getelementptr indicies are signed.
+ // Getelementptr indices are signed.
LocalOffset = getTruncateOrSignExtend(LocalOffset, IntPtrTy);
// Lower "inbounds" GEPs to NSW arithmetic.
LocalOffset = getMulExpr(LocalOffset, getSizeOfExpr(*GTI),
@@ -2936,7 +2949,6 @@ ScalarEvolution::getUnsignedRange(const SCEV *S) {
if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// For a SCEVUnknown, ask ValueTracking.
- unsigned BitWidth = getTypeSizeInBits(U->getType());
APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt Zeros(BitWidth, 0), Ones(BitWidth, 0);
ComputeMaskedBits(U->getValue(), Mask, Zeros, Ones, TD);
@@ -3208,7 +3220,7 @@ const SCEV *ScalarEvolution::createSCEV(Value *V) {
const Type *Z0Ty = Z0->getType();
unsigned Z0TySize = getTypeSizeInBits(Z0Ty);
- // If C is a low-bits mask, the zero extend is zerving to
+ // If C is a low-bits mask, the zero extend is serving to
// mask off the high bits. Complement the operand and
// re-apply the zext.
if (APIntOps::isMask(Z0TySize, CI->getValue()))
@@ -3393,7 +3405,7 @@ PushLoopPHIs(const Loop *L, SmallVectorImpl<Instruction *> &Worklist) {
const ScalarEvolution::BackedgeTakenInfo &
ScalarEvolution::getBackedgeTakenInfo(const Loop *L) {
// Initially insert a CouldNotCompute for this loop. If the insertion
- // succeeds, procede to actually compute a backedge-taken count and
+ // succeeds, proceed to actually compute a backedge-taken count and
// update the value. The temporary CouldNotCompute value tells SCEV
// code elsewhere that it shouldn't attempt to request a new
// backedge-taken count, which could result in infinite recursion.
@@ -3485,6 +3497,35 @@ void ScalarEvolution::forgetLoop(const Loop *L) {
}
}
+/// forgetValue - This method should be called by the client when it has
+/// changed a value in a way that may effect its value, or which may
+/// disconnect it from a def-use chain linking it to a loop.
+void ScalarEvolution::forgetValue(Value *V) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (!I) return;
+
+ // Drop information about expressions based on loop-header PHIs.
+ SmallVector<Instruction *, 16> Worklist;
+ Worklist.push_back(I);
+
+ SmallPtrSet<Instruction *, 8> Visited;
+ while (!Worklist.empty()) {
+ I = Worklist.pop_back_val();
+ if (!Visited.insert(I)) continue;
+
+ std::map<SCEVCallbackVH, const SCEV *>::iterator It =
+ Scalars.find(static_cast<Value *>(I));
+ if (It != Scalars.end()) {
+ ValuesAtScopes.erase(It->second);
+ Scalars.erase(It);
+ if (PHINode *PN = dyn_cast<PHINode>(I))
+ ConstantEvolutionLoopExitValue.erase(PN);
+ }
+
+ PushDefUseChildren(I, Worklist);
+ }
+}
+
/// ComputeBackedgeTakenCount - Compute the number of times the backedge
/// of the specified loop will execute.
ScalarEvolution::BackedgeTakenInfo
@@ -3581,7 +3622,7 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExit(const Loop *L,
return getCouldNotCompute();
}
- // Procede to the next level to examine the exit condition expression.
+ // Proceed to the next level to examine the exit condition expression.
return ComputeBackedgeTakenCountFromExitCond(L, ExitBr->getCondition(),
ExitBr->getSuccessor(0),
ExitBr->getSuccessor(1));
@@ -3670,10 +3711,23 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCond(const Loop *L,
}
// With an icmp, it may be feasible to compute an exact backedge-taken count.
- // Procede to the next level to examine the icmp.
+ // Proceed to the next level to examine the icmp.
if (ICmpInst *ExitCondICmp = dyn_cast<ICmpInst>(ExitCond))
return ComputeBackedgeTakenCountFromExitCondICmp(L, ExitCondICmp, TBB, FBB);
+ // Check for a constant condition. These are normally stripped out by
+ // SimplifyCFG, but ScalarEvolution may be used by a pass which wishes to
+ // preserve the CFG and is temporarily leaving constant conditions
+ // in place.
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(ExitCond)) {
+ if (L->contains(FBB) == !CI->getZExtValue())
+ // The backedge is always taken.
+ return getCouldNotCompute();
+ else
+ // The backedge is never taken.
+ return getIntegerSCEV(0, CI->getType());
+ }
+
// If it's not an integer or pointer comparison then compute it the hard way.
return ComputeBackedgeTakenCountExhaustively(L, ExitCond, !L->contains(TBB));
}
@@ -3697,14 +3751,10 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
// Handle common loops like: for (X = "string"; *X; ++X)
if (LoadInst *LI = dyn_cast<LoadInst>(ExitCond->getOperand(0)))
if (Constant *RHS = dyn_cast<Constant>(ExitCond->getOperand(1))) {
- const SCEV *ItCnt =
+ BackedgeTakenInfo ItCnt =
ComputeLoadConstantCompareBackedgeTakenCount(LI, RHS, L, Cond);
- if (!isa<SCEVCouldNotCompute>(ItCnt)) {
- unsigned BitWidth = getTypeSizeInBits(ItCnt->getType());
- return BackedgeTakenInfo(ItCnt,
- isa<SCEVConstant>(ItCnt) ? ItCnt :
- getConstant(APInt::getMaxValue(BitWidth)-1));
- }
+ if (ItCnt.hasAnyInfo())
+ return ItCnt;
}
const SCEV *LHS = getSCEV(ExitCond->getOperand(0));
@@ -3738,14 +3788,14 @@ ScalarEvolution::ComputeBackedgeTakenCountFromExitCondICmp(const Loop *L,
switch (Cond) {
case ICmpInst::ICMP_NE: { // while (X != Y)
// Convert to: while (X-Y != 0)
- const SCEV *TC = HowFarToZero(getMinusSCEV(LHS, RHS), L);
- if (!isa<SCEVCouldNotCompute>(TC)) return TC;
+ BackedgeTakenInfo BTI = HowFarToZero(getMinusSCEV(LHS, RHS), L);
+ if (BTI.hasAnyInfo()) return BTI;
break;
}
case ICmpInst::ICMP_EQ: { // while (X == Y)
// Convert to: while (X-Y == 0)
- const SCEV *TC = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
- if (!isa<SCEVCouldNotCompute>(TC)) return TC;
+ BackedgeTakenInfo BTI = HowFarToNonZero(getMinusSCEV(LHS, RHS), L);
+ if (BTI.hasAnyInfo()) return BTI;
break;
}
case ICmpInst::ICMP_SLT: {
@@ -3832,7 +3882,7 @@ GetAddressedElementFromGlobal(GlobalVariable *GV,
/// ComputeLoadConstantCompareBackedgeTakenCount - Given an exit condition of
/// 'icmp op load X, cst', try to see if we can compute the backedge
/// execution count.
-const SCEV *
+ScalarEvolution::BackedgeTakenInfo
ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
LoadInst *LI,
Constant *RHS,
@@ -3841,6 +3891,7 @@ ScalarEvolution::ComputeLoadConstantCompareBackedgeTakenCount(
if (LI->isVolatile()) return getCouldNotCompute();
// Check to see if the loaded pointer is a getelementptr of a global.
+ // TODO: Use SCEV instead of manually grubbing with GEPs.
GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getOperand(0));
if (!GEP) return getCouldNotCompute();
@@ -4190,14 +4241,15 @@ const SCEV *ScalarEvolution::computeSCEVAtScope(const SCEV *V, const Loop *L) {
}
}
- Constant *C;
+ Constant *C = 0;
if (const CmpInst *CI = dyn_cast<CmpInst>(I))
C = ConstantFoldCompareInstOperands(CI->getPredicate(),
Operands[0], Operands[1], TD);
else
C = ConstantFoldInstOperands(I->getOpcode(), I->getType(),
&Operands[0], Operands.size(), TD);
- return getSCEV(C);
+ if (C)
+ return getSCEV(C);
}
}
@@ -4405,7 +4457,8 @@ SolveQuadraticEquation(const SCEVAddRecExpr *AddRec, ScalarEvolution &SE) {
/// HowFarToZero - Return the number of times a backedge comparing the specified
/// value to zero will execute. If not computable, return CouldNotCompute.
-const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
// If the value is a constant
if (const SCEVConstant *C = dyn_cast<SCEVConstant>(V)) {
// If the value is already zero, the branch will execute zero times.
@@ -4485,7 +4538,8 @@ const SCEV *ScalarEvolution::HowFarToZero(const SCEV *V, const Loop *L) {
/// HowFarToNonZero - Return the number of times a backedge checking the
/// specified value for nonzero will execute. If not computable, return
/// CouldNotCompute
-const SCEV *ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
+ScalarEvolution::BackedgeTakenInfo
+ScalarEvolution::HowFarToNonZero(const SCEV *V, const Loop *L) {
// Loops that look like: while (X == 0) are very strange indeed. We don't
// handle them yet except for the trivial case. This could be expanded in the
// future as needed.
@@ -4726,7 +4780,7 @@ bool ScalarEvolution::isImpliedCond(Value *CondValue,
ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
bool Inverse) {
- // Recursivly handle And and Or conditions.
+ // Recursively handle And and Or conditions.
if (BinaryOperator *BO = dyn_cast<BinaryOperator>(CondValue)) {
if (BO->getOpcode() == Instruction::And) {
if (!Inverse)
@@ -4929,7 +4983,7 @@ bool ScalarEvolution::isImpliedCond(Value *CondValue,
}
/// isImpliedCondOperands - Test whether the condition described by Pred,
-/// LHS, and RHS is true whenever the condition desribed by Pred, FoundLHS,
+/// LHS, and RHS is true whenever the condition described by Pred, FoundLHS,
/// and FoundRHS is true.
bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
const SCEV *LHS, const SCEV *RHS,
@@ -4944,7 +4998,7 @@ bool ScalarEvolution::isImpliedCondOperands(ICmpInst::Predicate Pred,
}
/// isImpliedCondOperandsHelper - Test whether the condition described by
-/// Pred, LHS, and RHS is true whenever the condition desribed by Pred,
+/// Pred, LHS, and RHS is true whenever the condition described by Pred,
/// FoundLHS, and FoundRHS is true.
bool
ScalarEvolution::isImpliedCondOperandsHelper(ICmpInst::Predicate Pred,
@@ -5102,7 +5156,7 @@ ScalarEvolution::HowManyLessThans(const SCEV *LHS, const SCEV *RHS,
// If MaxEnd is within a step of the maximum integer value in its type,
// adjust it down to the minimum value which would produce the same effect.
- // This allows the subsequent ceiling divison of (N+(step-1))/step to
+ // This allows the subsequent ceiling division of (N+(step-1))/step to
// compute the correct value.
const SCEV *StepMinusOne = getMinusSCEV(Step,
getIntegerSCEV(1, Step->getType()));
@@ -5319,8 +5373,8 @@ ScalarEvolution::ScalarEvolution()
bool ScalarEvolution::runOnFunction(Function &F) {
this->F = &F;
LI = &getAnalysis<LoopInfo>();
- DT = &getAnalysis<DominatorTree>();
TD = getAnalysisIfAvailable<TargetData>();
+ DT = &getAnalysis<DominatorTree>();
return false;
}
@@ -5379,7 +5433,7 @@ static void PrintLoopInfo(raw_ostream &OS, ScalarEvolution *SE,
}
void ScalarEvolution::print(raw_ostream &OS, const Module *) const {
- // ScalarEvolution's implementaiton of the print method is to print
+ // ScalarEvolution's implementation of the print method is to print
// out SCEV values of all instructions that are interesting. Doing
// this potentially causes it to create new SCEV objects though,
// which technically conflicts with the const qualifier. This isn't
diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
index 498c4a8..17b254f 100644
--- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
+++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp
@@ -10,6 +10,10 @@
// This file defines the ScalarEvolutionAliasAnalysis pass, which implements a
// simple alias analysis implemented in terms of ScalarEvolution queries.
//
+// This differs from traditional loop dependence analysis in that it tests
+// for dependencies within a single iteration of a loop, rather than
+// dependences between different iterations.
+//
// ScalarEvolution has a more complete understanding of pointer arithmetic
// than BasicAliasAnalysis' collection of ad-hoc analyses.
//
@@ -41,7 +45,7 @@ namespace {
return (AliasAnalysis*)this;
return this;
}
-
+
private:
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
virtual bool runOnFunction(Function &F);
@@ -89,7 +93,7 @@ ScalarEvolutionAliasAnalysis::GetBaseValue(const SCEV *S) {
} else if (const SCEVAddExpr *A = dyn_cast<SCEVAddExpr>(S)) {
// If there's a pointer operand, it'll be sorted at the end of the list.
const SCEV *Last = A->getOperand(A->getNumOperands()-1);
- if (isa<PointerType>(Last->getType()))
+ if (Last->getType()->isPointerTy())
return GetBaseValue(Last);
} else if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
// This is a leaf node.
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index c2e1f89..f5f10c8 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -144,15 +144,34 @@ Value *SCEVExpander::InsertBinop(Instruction::BinaryOps Opcode,
}
}
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(LHS) || !L->isLoopInvariant(RHS)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
// If we haven't found this binop, insert it.
Value *BO = Builder.CreateBinOp(Opcode, LHS, RHS, "tmp");
rememberInstruction(BO);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
return BO;
}
/// FactorOutConstant - Test if S is divisible by Factor, using signed
/// division. If so, update S with Factor divided out and return true.
-/// S need not be evenly divisble if a reasonable remainder can be
+/// S need not be evenly divisible if a reasonable remainder can be
/// computed.
/// TODO: When ScalarEvolution gets a SCEVSDivExpr, this can be made
/// unnecessary; in its place, just signed-divide Ops[i] by the scale and
@@ -462,7 +481,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
break;
}
- // If none of the operands were convertable to proper GEP indices, cast
+ // If none of the operands were convertible to proper GEP indices, cast
// the base to i8* and do an ugly getelementptr with that. It's still
// better than ptrtoint+arithmetic+inttoptr at least.
if (!AnyNonZeroIndices) {
@@ -493,12 +512,56 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
}
}
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V) || !L->isLoopInvariant(Idx)) break;
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
// Emit a GEP.
Value *GEP = Builder.CreateGEP(V, Idx, "uglygep");
rememberInstruction(GEP);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
return GEP;
}
+ // Save the original insertion point so we can restore it when we're done.
+ BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
+ BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
+
+ // Move the insertion point out of as many loops as we can.
+ while (const Loop *L = SE.LI->getLoopFor(Builder.GetInsertBlock())) {
+ if (!L->isLoopInvariant(V)) break;
+
+ bool AnyIndexNotLoopInvariant = false;
+ for (SmallVectorImpl<Value *>::const_iterator I = GepIndices.begin(),
+ E = GepIndices.end(); I != E; ++I)
+ if (!L->isLoopInvariant(*I)) {
+ AnyIndexNotLoopInvariant = true;
+ break;
+ }
+ if (AnyIndexNotLoopInvariant)
+ break;
+
+ BasicBlock *Preheader = L->getLoopPreheader();
+ if (!Preheader) break;
+
+ // Ok, move up a level.
+ Builder.SetInsertPoint(Preheader, Preheader->getTerminator());
+ }
+
// Insert a pretty getelementptr. Note that this GEP is not marked inbounds,
// because ScalarEvolution may have changed the address arithmetic to
// compute a value which is beyond the end of the allocated object.
@@ -511,6 +574,11 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
"scevgep");
Ops.push_back(SE.getUnknown(GEP));
rememberInstruction(GEP);
+
+ // Restore the original insert point.
+ if (SaveInsertBB)
+ restoreInsertPoint(SaveInsertBB, SaveInsertPt);
+
return expand(SE.getAddExpr(Ops));
}
@@ -528,70 +596,179 @@ static bool isNonConstantNegative(const SCEV *F) {
return SC->getValue()->getValue().isNegative();
}
-Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
- int NumOperands = S->getNumOperands();
- const Type *Ty = SE.getEffectiveSCEVType(S->getType());
+/// PickMostRelevantLoop - Given two loops pick the one that's most relevant for
+/// SCEV expansion. If they are nested, this is the most nested. If they are
+/// neighboring, pick the later.
+static const Loop *PickMostRelevantLoop(const Loop *A, const Loop *B,
+ DominatorTree &DT) {
+ if (!A) return B;
+ if (!B) return A;
+ if (A->contains(B)) return B;
+ if (B->contains(A)) return A;
+ if (DT.dominates(A->getHeader(), B->getHeader())) return B;
+ if (DT.dominates(B->getHeader(), A->getHeader())) return A;
+ return A; // Arbitrarily break the tie.
+}
- // Find the index of an operand to start with. Choose the operand with
- // pointer type, if there is one, or the last operand otherwise.
- int PIdx = 0;
- for (; PIdx != NumOperands - 1; ++PIdx)
- if (isa<PointerType>(S->getOperand(PIdx)->getType())) break;
+/// GetRelevantLoop - Get the most relevant loop associated with the given
+/// expression, according to PickMostRelevantLoop.
+static const Loop *GetRelevantLoop(const SCEV *S, LoopInfo &LI,
+ DominatorTree &DT) {
+ if (isa<SCEVConstant>(S))
+ return 0;
+ if (const SCEVUnknown *U = dyn_cast<SCEVUnknown>(S)) {
+ if (const Instruction *I = dyn_cast<Instruction>(U->getValue()))
+ return LI.getLoopFor(I->getParent());
+ return 0;
+ }
+ if (const SCEVNAryExpr *N = dyn_cast<SCEVNAryExpr>(S)) {
+ const Loop *L = 0;
+ if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(S))
+ L = AR->getLoop();
+ for (SCEVNAryExpr::op_iterator I = N->op_begin(), E = N->op_end();
+ I != E; ++I)
+ L = PickMostRelevantLoop(L, GetRelevantLoop(*I, LI, DT), DT);
+ return L;
+ }
+ if (const SCEVCastExpr *C = dyn_cast<SCEVCastExpr>(S))
+ return GetRelevantLoop(C->getOperand(), LI, DT);
+ if (const SCEVUDivExpr *D = dyn_cast<SCEVUDivExpr>(S))
+ return PickMostRelevantLoop(GetRelevantLoop(D->getLHS(), LI, DT),
+ GetRelevantLoop(D->getRHS(), LI, DT),
+ DT);
+ llvm_unreachable("Unexpected SCEV type!");
+}
- // Expand code for the operand that we chose.
- Value *V = expand(S->getOperand(PIdx));
+/// LoopCompare - Compare loops by PickMostRelevantLoop.
+class LoopCompare {
+ DominatorTree &DT;
+public:
+ explicit LoopCompare(DominatorTree &dt) : DT(dt) {}
+
+ bool operator()(std::pair<const Loop *, const SCEV *> LHS,
+ std::pair<const Loop *, const SCEV *> RHS) const {
+ // Compare loops with PickMostRelevantLoop.
+ if (LHS.first != RHS.first)
+ return PickMostRelevantLoop(LHS.first, RHS.first, DT) != LHS.first;
+
+ // If one operand is a non-constant negative and the other is not,
+ // put the non-constant negative on the right so that a sub can
+ // be used instead of a negate and add.
+ if (isNonConstantNegative(LHS.second)) {
+ if (!isNonConstantNegative(RHS.second))
+ return false;
+ } else if (isNonConstantNegative(RHS.second))
+ return true;
- // Turn things like ptrtoint+arithmetic+inttoptr into GEP. See the
- // comments on expandAddToGEP for details.
- if (const PointerType *PTy = dyn_cast<PointerType>(V->getType())) {
- // Take the operand at PIdx out of the list.
- const SmallVectorImpl<const SCEV *> &Ops = S->getOperands();
- SmallVector<const SCEV *, 8> NewOps;
- NewOps.insert(NewOps.end(), Ops.begin(), Ops.begin() + PIdx);
- NewOps.insert(NewOps.end(), Ops.begin() + PIdx + 1, Ops.end());
- // Make a GEP.
- return expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, V);
+ // Otherwise they are equivalent according to this comparison.
+ return false;
}
+};
- // Otherwise, we'll expand the rest of the SCEVAddExpr as plain integer
- // arithmetic.
- V = InsertNoopCastOfTo(V, Ty);
+Value *SCEVExpander::visitAddExpr(const SCEVAddExpr *S) {
+ const Type *Ty = SE.getEffectiveSCEVType(S->getType());
- // Emit a bunch of add instructions
- for (int i = NumOperands-1; i >= 0; --i) {
- if (i == PIdx) continue;
- const SCEV *Op = S->getOperand(i);
- if (isNonConstantNegative(Op)) {
+ // Collect all the add operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal, and
+ // so that pointer operands are inserted first, which the code below relies on
+ // to form more involved GEPs.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVAddExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
+ *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants and
+ // pointer operands precede non-pointer operands.
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+ // Emit instructions to add all the operands. Hoist as much as possible
+ // out of loops, and form meaningful getelementptrs where possible.
+ Value *Sum = 0;
+ for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+ I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ const Loop *CurLoop = I->first;
+ const SCEV *Op = I->second;
+ if (!Sum) {
+ // This is the first operand. Just expand it.
+ Sum = expand(Op);
+ ++I;
+ } else if (const PointerType *PTy = dyn_cast<PointerType>(Sum->getType())) {
+ // The running sum expression is a pointer. Try to form a getelementptr
+ // at this level with that as the base.
+ SmallVector<const SCEV *, 4> NewOps;
+ for (; I != E && I->first == CurLoop; ++I)
+ NewOps.push_back(I->second);
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, Sum);
+ } else if (const PointerType *PTy = dyn_cast<PointerType>(Op->getType())) {
+ // The running sum is an integer, and there's a pointer at this level.
+ // Try to form a getelementptr.
+ SmallVector<const SCEV *, 4> NewOps;
+ NewOps.push_back(SE.getUnknown(Sum));
+ for (++I; I != E && I->first == CurLoop; ++I)
+ NewOps.push_back(I->second);
+ Sum = expandAddToGEP(NewOps.begin(), NewOps.end(), PTy, Ty, expand(Op));
+ } else if (isNonConstantNegative(Op)) {
+ // Instead of doing a negate and add, just do a subtract.
Value *W = expandCodeFor(SE.getNegativeSCEV(Op), Ty);
- V = InsertBinop(Instruction::Sub, V, W);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ Sum = InsertBinop(Instruction::Sub, Sum, W);
+ ++I;
} else {
+ // A simple add.
Value *W = expandCodeFor(Op, Ty);
- V = InsertBinop(Instruction::Add, V, W);
+ Sum = InsertNoopCastOfTo(Sum, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Sum)) std::swap(Sum, W);
+ Sum = InsertBinop(Instruction::Add, Sum, W);
+ ++I;
}
}
- return V;
+
+ return Sum;
}
Value *SCEVExpander::visitMulExpr(const SCEVMulExpr *S) {
const Type *Ty = SE.getEffectiveSCEVType(S->getType());
- int FirstOp = 0; // Set if we should emit a subtract.
- if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(S->getOperand(0)))
- if (SC->getValue()->isAllOnesValue())
- FirstOp = 1;
-
- int i = S->getNumOperands()-2;
- Value *V = expandCodeFor(S->getOperand(i+1), Ty);
-
- // Emit a bunch of multiply instructions
- for (; i >= FirstOp; --i) {
- Value *W = expandCodeFor(S->getOperand(i), Ty);
- V = InsertBinop(Instruction::Mul, V, W);
+
+ // Collect all the mul operands in a loop, along with their associated loops.
+ // Iterate in reverse so that constants are emitted last, all else equal.
+ SmallVector<std::pair<const Loop *, const SCEV *>, 8> OpsAndLoops;
+ for (std::reverse_iterator<SCEVMulExpr::op_iterator> I(S->op_end()),
+ E(S->op_begin()); I != E; ++I)
+ OpsAndLoops.push_back(std::make_pair(GetRelevantLoop(*I, *SE.LI, *SE.DT),
+ *I));
+
+ // Sort by loop. Use a stable sort so that constants follow non-constants.
+ std::stable_sort(OpsAndLoops.begin(), OpsAndLoops.end(), LoopCompare(*SE.DT));
+
+ // Emit instructions to mul all the operands. Hoist as much as possible
+ // out of loops.
+ Value *Prod = 0;
+ for (SmallVectorImpl<std::pair<const Loop *, const SCEV *> >::iterator
+ I = OpsAndLoops.begin(), E = OpsAndLoops.end(); I != E; ) {
+ const SCEV *Op = I->second;
+ if (!Prod) {
+ // This is the first operand. Just expand it.
+ Prod = expand(Op);
+ ++I;
+ } else if (Op->isAllOnesValue()) {
+ // Instead of doing a multiply by negative one, just do a negate.
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ Prod = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), Prod);
+ ++I;
+ } else {
+ // A simple mul.
+ Value *W = expandCodeFor(Op, Ty);
+ Prod = InsertNoopCastOfTo(Prod, Ty);
+ // Canonicalize a constant to the RHS.
+ if (isa<Constant>(Prod)) std::swap(Prod, W);
+ Prod = InsertBinop(Instruction::Mul, Prod, W);
+ ++I;
+ }
}
- // -1 * ... ---> 0 - ...
- if (FirstOp == 1)
- V = InsertBinop(Instruction::Sub, Constant::getNullValue(Ty), V);
- return V;
+ return Prod;
}
Value *SCEVExpander::visitUDivExpr(const SCEVUDivExpr *S) {
@@ -658,6 +835,19 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
IncV = 0;
break;
}
+ // If any of the operands don't dominate the insert position, bail.
+ // Addrec operands are always loop-invariant, so this can only happen
+ // if there are instructions which haven't been hoisted.
+ for (User::op_iterator OI = IncV->op_begin()+1,
+ OE = IncV->op_end(); OI != OE; ++OI)
+ if (Instruction *OInst = dyn_cast<Instruction>(OI))
+ if (!SE.DT->dominates(OInst, IVIncInsertPos)) {
+ IncV = 0;
+ break;
+ }
+ if (!IncV)
+ break;
+ // Advance to the next instruction.
IncV = dyn_cast<Instruction>(IncV->getOperand(0));
if (!IncV)
break;
@@ -702,7 +892,7 @@ SCEVExpander::getAddRecExprPHILiterally(const SCEVAddRecExpr *Normalized,
// negative, insert a sub instead of an add for the increment (unless it's a
// constant, because subtracts of constants are canonicalized to adds).
const SCEV *Step = Normalized->getStepRecurrence(SE);
- bool isPointer = isa<PointerType>(ExpandTy);
+ bool isPointer = ExpandTy->isPointerTy();
bool isNegative = !isPointer && isNonConstantNegative(Step);
if (isNegative)
Step = SE.getNegativeSCEV(Step);
@@ -807,7 +997,7 @@ Value *SCEVExpander::expandAddRecExprLiterally(const SCEVAddRecExpr *S) {
const Type *ExpandTy = PostLoopScale ? IntTy : STy;
PHINode *PN = getAddRecExprPHILiterally(Normalized, L, ExpandTy, IntTy);
- // Accomodate post-inc mode, if necessary.
+ // Accommodate post-inc mode, if necessary.
Value *Result;
if (L != PostIncLoop)
Result = PN;
@@ -852,7 +1042,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
PHINode *CanonicalIV = 0;
if (PHINode *PN = L->getCanonicalInductionVariable())
if (SE.isSCEVable(PN->getType()) &&
- isa<IntegerType>(SE.getEffectiveSCEVType(PN->getType())) &&
+ SE.getEffectiveSCEVType(PN->getType())->isIntegerTy() &&
SE.getTypeSizeInBits(PN->getType()) >= SE.getTypeSizeInBits(Ty))
CanonicalIV = PN;
@@ -1074,7 +1264,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
// If the SCEV is computable at this level, insert it into the header
// after the PHIs (and after any other instructions that we've inserted
// there) so that it is guaranteed to dominate any user inside the loop.
- if (L && S->hasComputableLoopEvolution(L))
+ if (L && S->hasComputableLoopEvolution(L) && L != PostIncLoop)
InsertPt = L->getHeader()->getFirstNonPHI();
while (isInsertedInstruction(InsertPt))
InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
@@ -1118,7 +1308,7 @@ void SCEVExpander::rememberInstruction(Value *I) {
}
void SCEVExpander::restoreInsertPoint(BasicBlock *BB, BasicBlock::iterator I) {
- // If we aquired more instructions since the old insert point was saved,
+ // If we acquired more instructions since the old insert point was saved,
// advance past them.
while (isInsertedInstruction(I)) ++I;
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 7cc9c0d..09344a3 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -49,7 +49,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
assert(V && "No Value?");
assert(Depth <= MaxDepth && "Limit Search Depth");
unsigned BitWidth = Mask.getBitWidth();
- assert((V->getType()->isIntOrIntVectorTy() || isa<PointerType>(V->getType()))
+ assert((V->getType()->isIntOrIntVectorTy() || V->getType()->isPointerTy())
&& "Not integer or pointer type!");
assert((!TD ||
TD->getTypeSizeInBits(V->getType()->getScalarType()) == BitWidth) &&
@@ -249,7 +249,7 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
unsigned SrcBitWidth;
// Note that we handle pointer operands here because of inttoptr/ptrtoint
// which fall through here.
- if (isa<PointerType>(SrcTy))
+ if (SrcTy->isPointerTy())
SrcBitWidth = TD->getTypeSizeInBits(SrcTy);
else
SrcBitWidth = SrcTy->getScalarSizeInBits();
@@ -269,10 +269,10 @@ void llvm::ComputeMaskedBits(Value *V, const APInt &Mask,
}
case Instruction::BitCast: {
const Type *SrcTy = I->getOperand(0)->getType();
- if ((SrcTy->isIntegerTy() || isa<PointerType>(SrcTy)) &&
+ if ((SrcTy->isIntegerTy() || SrcTy->isPointerTy()) &&
// TODO: For now, not handling conversions like:
// (bitcast i64 %x to <2 x i32>)
- !isa<VectorType>(I->getType())) {
+ !I->getType()->isVectorTy()) {
ComputeMaskedBits(I->getOperand(0), Mask, KnownZero, KnownOne, TD,
Depth+1);
return;
@@ -980,7 +980,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) {
/// may not be represented in the result.
static Value *GetLinearExpression(Value *V, APInt &Scale, APInt &Offset,
const TargetData *TD, unsigned Depth) {
- assert(isa<IntegerType>(V->getType()) && "Not an integer value");
+ assert(V->getType()->isIntegerTy() && "Not an integer value");
// Limit our recursion depth.
if (Depth == 6) {
@@ -1253,7 +1253,7 @@ Value *llvm::FindInsertedValue(Value *V, const unsigned *idx_begin,
if (idx_begin == idx_end)
return V;
// We have indices, so V should have an indexable type
- assert((isa<StructType>(V->getType()) || isa<ArrayType>(V->getType()))
+ assert((V->getType()->isStructTy() || V->getType()->isArrayTy())
&& "Not looking at a struct or array?");
assert(ExtractValueInst::getIndexedType(V->getType(), idx_begin, idx_end)
&& "Invalid indices for type?");
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index 9cae0d2..8083a07 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -614,7 +614,7 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc,
Aliasee = ID.ConstantVal;
}
- if (!isa<PointerType>(Aliasee->getType()))
+ if (!Aliasee->getType()->isPointerTy())
return Error(AliaseeLoc, "alias must have pointer type");
// Okay, create the alias but do not insert it into the module yet.
@@ -685,7 +685,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc,
return true;
}
- if (isa<FunctionType>(Ty) || Ty->isLabelTy())
+ if (Ty->isFunctionTy() || Ty->isLabelTy())
return Error(TyLoc, "invalid type for global variable");
GlobalVariable *GV = 0;
@@ -791,7 +791,7 @@ GlobalValue *LLParser::GetGlobalVal(const std::string &Name, const Type *Ty,
GlobalValue *FwdVal;
if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
// Function types can return opaque but functions can't.
- if (isa<OpaqueType>(FT->getReturnType())) {
+ if (FT->getReturnType()->isOpaqueTy()) {
Error(Loc, "function may not return opaque type");
return 0;
}
@@ -836,7 +836,7 @@ GlobalValue *LLParser::GetGlobalVal(unsigned ID, const Type *Ty, LocTy Loc) {
GlobalValue *FwdVal;
if (const FunctionType *FT = dyn_cast<FunctionType>(PTy->getElementType())) {
// Function types can return opaque but functions can't.
- if (isa<OpaqueType>(FT->getReturnType())) {
+ if (FT->getReturnType()->isOpaqueTy()) {
Error(Loc, "function may not return opaque type");
return 0;
}
@@ -1515,7 +1515,7 @@ bool LLParser::ParseArgumentList(std::vector<ArgInfo> &ArgList,
Name = "";
}
- if (!ArgTy->isFirstClassType() && !isa<OpaqueType>(ArgTy))
+ if (!ArgTy->isFirstClassType() && !ArgTy->isOpaqueTy())
return Error(TypeLoc, "invalid type for function argument");
ArgList.push_back(ArgInfo(TypeLoc, ArgTy, Attrs, Name));
@@ -1785,7 +1785,7 @@ Value *LLParser::PerFunctionState::GetVal(const std::string &Name,
}
// Don't make placeholders with invalid type.
- if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) {
+ if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
P.Error(Loc, "invalid use of a non-first-class type");
return 0;
}
@@ -1826,7 +1826,7 @@ Value *LLParser::PerFunctionState::GetVal(unsigned ID, const Type *Ty,
return 0;
}
- if (!Ty->isFirstClassType() && !isa<OpaqueType>(Ty) && !Ty->isLabelTy()) {
+ if (!Ty->isFirstClassType() && !Ty->isOpaqueTy() && !Ty->isLabelTy()) {
P.Error(Loc, "invalid use of a non-first-class type");
return 0;
}
@@ -2256,7 +2256,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
} else {
assert(Opc == Instruction::ICmp && "Unexpected opcode for CmpInst!");
if (!Val0->getType()->isIntOrIntVectorTy() &&
- !isa<PointerType>(Val0->getType()))
+ !Val0->getType()->isPointerTy())
return Error(ID.Loc, "icmp requires pointer or integer operands");
ID.ConstantVal = ConstantExpr::getICmp(Pred, Val0, Val1);
}
@@ -2370,7 +2370,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) {
return true;
if (Opc == Instruction::GetElementPtr) {
- if (Elts.size() == 0 || !isa<PointerType>(Elts[0]->getType()))
+ if (Elts.size() == 0 || !Elts[0]->getType()->isPointerTy())
return Error(ID.Loc, "getelementptr requires pointer operand");
if (!GetElementPtrInst::getIndexedType(Elts[0]->getType(),
@@ -2470,7 +2470,7 @@ bool LLParser::ParseGlobalValueVector(SmallVectorImpl<Constant*> &Elts) {
bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
PerFunctionState *PFS) {
- if (isa<FunctionType>(Ty))
+ if (Ty->isFunctionTy())
return Error(ID.Loc, "functions are not values, refer to them as pointers");
switch (ID.Kind) {
@@ -2509,7 +2509,7 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
V = GetGlobalVal(ID.UIntVal, Ty, ID.Loc);
return V == 0;
case ValID::t_APSInt:
- if (!isa<IntegerType>(Ty))
+ if (!Ty->isIntegerTy())
return Error(ID.Loc, "integer constant must have integer type");
ID.APSIntVal.extOrTrunc(Ty->getPrimitiveSizeInBits());
V = ConstantInt::get(Context, ID.APSIntVal);
@@ -2535,19 +2535,19 @@ bool LLParser::ConvertValIDToValue(const Type *Ty, ValID &ID, Value *&V,
return false;
case ValID::t_Null:
- if (!isa<PointerType>(Ty))
+ if (!Ty->isPointerTy())
return Error(ID.Loc, "null must be a pointer type");
V = ConstantPointerNull::get(cast<PointerType>(Ty));
return false;
case ValID::t_Undef:
// FIXME: LabelTy should not be a first-class type.
if ((!Ty->isFirstClassType() || Ty->isLabelTy()) &&
- !isa<OpaqueType>(Ty))
+ !Ty->isOpaqueTy())
return Error(ID.Loc, "invalid type for undef constant");
V = UndefValue::get(Ty);
return false;
case ValID::t_EmptyArray:
- if (!isa<ArrayType>(Ty) || cast<ArrayType>(Ty)->getNumElements() != 0)
+ if (!Ty->isArrayTy() || cast<ArrayType>(Ty)->getNumElements() != 0)
return Error(ID.Loc, "invalid empty array initializer");
V = UndefValue::get(Ty);
return false;
@@ -2662,7 +2662,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) {
}
if (!FunctionType::isValidReturnType(RetType) ||
- isa<OpaqueType>(RetType))
+ RetType->isOpaqueTy())
return Error(RetTypeLoc, "invalid function return type");
LocTy NameLoc = Lex.getLoc();
@@ -3186,7 +3186,7 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) {
ParseToken(lltok::lsquare, "expected '[' with switch table"))
return true;
- if (!isa<IntegerType>(Cond->getType()))
+ if (!Cond->getType()->isIntegerTy())
return Error(CondLoc, "switch condition must have integer type");
// Parse the jump table pairs.
@@ -3229,7 +3229,7 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) {
ParseToken(lltok::lsquare, "expected '[' with indirectbr"))
return true;
- if (!isa<PointerType>(Address->getType()))
+ if (!Address->getType()->isPointerTy())
return Error(AddrLoc, "indirectbr address must have pointer type");
// Parse the destination list.
@@ -3436,7 +3436,7 @@ bool LLParser::ParseCompare(Instruction *&Inst, PerFunctionState &PFS,
} else {
assert(Opc == Instruction::ICmp && "Unknown opcode for CmpInst!");
if (!LHS->getType()->isIntOrIntVectorTy() &&
- !isa<PointerType>(LHS->getType()))
+ !LHS->getType()->isPointerTy())
return Error(Loc, "icmp requires integer operands");
Inst = new ICmpInst(CmpInst::Predicate(Pred), LHS, RHS);
}
@@ -3761,7 +3761,7 @@ bool LLParser::ParseFree(Instruction *&Inst, PerFunctionState &PFS,
BasicBlock* BB) {
Value *Val; LocTy Loc;
if (ParseTypeAndValue(Val, Loc, PFS)) return true;
- if (!isa<PointerType>(Val->getType()))
+ if (!Val->getType()->isPointerTy())
return Error(Loc, "operand to free must be a pointer");
Inst = CallInst::CreateFree(Val, BB);
return false;
@@ -3778,7 +3778,7 @@ int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS,
ParseOptionalCommaAlign(Alignment, AteExtraComma))
return true;
- if (!isa<PointerType>(Val->getType()) ||
+ if (!Val->getType()->isPointerTy() ||
!cast<PointerType>(Val->getType())->getElementType()->isFirstClassType())
return Error(Loc, "load operand must be a pointer to a first class type");
@@ -3799,7 +3799,7 @@ int LLParser::ParseStore(Instruction *&Inst, PerFunctionState &PFS,
ParseOptionalCommaAlign(Alignment, AteExtraComma))
return true;
- if (!isa<PointerType>(Ptr->getType()))
+ if (!Ptr->getType()->isPointerTy())
return Error(PtrLoc, "store operand must be a pointer");
if (!Val->getType()->isFirstClassType())
return Error(Loc, "store operand must be a first class value");
@@ -3821,7 +3821,7 @@ bool LLParser::ParseGetResult(Instruction *&Inst, PerFunctionState &PFS) {
ParseUInt32(Element, EltLoc))
return true;
- if (!isa<StructType>(Val->getType()) && !isa<ArrayType>(Val->getType()))
+ if (!Val->getType()->isStructTy() && !Val->getType()->isArrayTy())
return Error(ValLoc, "getresult inst requires an aggregate operand");
if (!ExtractValueInst::getIndexedType(Val->getType(), Element))
return Error(EltLoc, "invalid getresult index for value");
@@ -3838,7 +3838,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
if (ParseTypeAndValue(Ptr, Loc, PFS)) return true;
- if (!isa<PointerType>(Ptr->getType()))
+ if (!Ptr->getType()->isPointerTy())
return Error(Loc, "base of getelementptr must be a pointer");
SmallVector<Value*, 16> Indices;
@@ -3849,7 +3849,7 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) {
break;
}
if (ParseTypeAndValue(Val, EltLoc, PFS)) return true;
- if (!isa<IntegerType>(Val->getType()))
+ if (!Val->getType()->isIntegerTy())
return Error(EltLoc, "getelementptr index must be an integer");
Indices.push_back(Val);
}
diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp
index 7537435..15844c0 100644
--- a/lib/Bitcode/Reader/BitReader.cpp
+++ b/lib/Bitcode/Reader/BitReader.cpp
@@ -45,26 +45,44 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef,
/* Reads a module from the specified path, returning via the OutModule parameter
a module provider which performs lazy deserialization. Returns 0 on success.
Optionally returns a human-readable error message via OutMessage. */
-LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
- LLVMModuleProviderRef *OutMP,
- char **OutMessage) {
- return LLVMGetBitcodeModuleProviderInContext(wrap(&getGlobalContext()),
- MemBuf, OutMP, OutMessage);
-}
-
-LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
- LLVMMemoryBufferRef MemBuf,
- LLVMModuleProviderRef *OutMP,
- char **OutMessage) {
+LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleRef *OutM,
+ char **OutMessage) {
std::string Message;
- *OutMP = reinterpret_cast<LLVMModuleProviderRef>(
- getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef), &Message));
- if (!*OutMP) {
+ *OutM = wrap(getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef),
+ &Message));
+ if (!*OutM) {
if (OutMessage)
*OutMessage = strdup(Message.c_str());
return 1;
}
return 0;
+
+}
+
+LLVMBool LLVMGetBitcodeModule(LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM,
+ char **OutMessage) {
+ return LLVMGetBitcodeModuleInContext(LLVMGetGlobalContext(), MemBuf, OutM,
+ OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModuleInContext instead. */
+LLVMBool LLVMGetBitcodeModuleProviderInContext(LLVMContextRef ContextRef,
+ LLVMMemoryBufferRef MemBuf,
+ LLVMModuleProviderRef *OutMP,
+ char **OutMessage) {
+ return LLVMGetBitcodeModuleInContext(ContextRef, MemBuf,
+ reinterpret_cast<LLVMModuleRef*>(OutMP),
+ OutMessage);
+}
+
+/* Deprecated: Use LLVMGetBitcodeModule instead. */
+LLVMBool LLVMGetBitcodeModuleProvider(LLVMMemoryBufferRef MemBuf,
+ LLVMModuleProviderRef *OutMP,
+ char **OutMessage) {
+ return LLVMGetBitcodeModuleProviderInContext(LLVMGetGlobalContext(), MemBuf,
+ OutMP, OutMessage);
}
diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp
index cebfbf6..a328837 100644
--- a/lib/Bitcode/Reader/BitcodeReader.cpp
+++ b/lib/Bitcode/Reader/BitcodeReader.cpp
@@ -963,12 +963,12 @@ bool BitcodeReader::ParseConstants() {
V = Constant::getNullValue(CurTy);
break;
case bitc::CST_CODE_INTEGER: // INTEGER: [intval]
- if (!isa<IntegerType>(CurTy) || Record.empty())
+ if (!CurTy->isIntegerTy() || Record.empty())
return Error("Invalid CST_INTEGER record");
V = ConstantInt::get(CurTy, DecodeSignRotatedValue(Record[0]));
break;
case bitc::CST_CODE_WIDE_INTEGER: {// WIDE_INTEGER: [n x intval]
- if (!isa<IntegerType>(CurTy) || Record.empty())
+ if (!CurTy->isIntegerTy() || Record.empty())
return Error("Invalid WIDE_INTEGER record");
unsigned NumWords = Record.size();
@@ -1407,7 +1407,7 @@ bool BitcodeReader::ParseModule() {
if (Record.size() < 6)
return Error("Invalid MODULE_CODE_GLOBALVAR record");
const Type *Ty = getTypeByID(Record[0]);
- if (!isa<PointerType>(Ty))
+ if (!Ty->isPointerTy())
return Error("Global not a pointer type!");
unsigned AddressSpace = cast<PointerType>(Ty)->getAddressSpace();
Ty = cast<PointerType>(Ty)->getElementType();
@@ -1450,7 +1450,7 @@ bool BitcodeReader::ParseModule() {
if (Record.size() < 8)
return Error("Invalid MODULE_CODE_FUNCTION record");
const Type *Ty = getTypeByID(Record[0]);
- if (!isa<PointerType>(Ty))
+ if (!Ty->isPointerTy())
return Error("Function not a pointer type!");
const FunctionType *FTy =
dyn_cast<FunctionType>(cast<PointerType>(Ty)->getElementType());
@@ -1491,7 +1491,7 @@ bool BitcodeReader::ParseModule() {
if (Record.size() < 3)
return Error("Invalid MODULE_ALIAS record");
const Type *Ty = getTypeByID(Record[0]);
- if (!isa<PointerType>(Ty))
+ if (!Ty->isPointerTy())
return Error("Function not a pointer type!");
GlobalAlias *NewGA = new GlobalAlias(Ty, GetDecodedLinkage(Record[2]),
@@ -1622,6 +1622,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
if (Stream.EnterSubBlock(bitc::FUNCTION_BLOCK_ID))
return Error("Malformed block record");
+ InstructionList.clear();
unsigned ModuleValueListSize = ValueList.size();
// Add all the function arguments to the value table.
@@ -1932,7 +1933,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) {
const Type *ReturnType = F->getReturnType();
if (Vs.size() > 1 ||
- (isa<StructType>(ReturnType) &&
+ (ReturnType->isStructTy() &&
(Vs.empty() || Vs[0]->getType() != ReturnType))) {
Value *RV = UndefValue::get(ReturnType);
for (unsigned i = 0, e = Vs.size(); i != e; ++i) {
diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp
index 595497f..aa4c3af 100644
--- a/lib/Bitcode/Writer/ValueEnumerator.cpp
+++ b/lib/Bitcode/Writer/ValueEnumerator.cpp
@@ -27,7 +27,7 @@ static bool isSingleValueType(const std::pair<const llvm::Type*,
}
static bool isIntegerValue(const std::pair<const Value*, unsigned> &V) {
- return isa<IntegerType>(V.first->getType());
+ return V.first->getType()->isIntegerTy();
}
static bool CompareByFrequency(const std::pair<const llvm::Type*,
@@ -39,8 +39,6 @@ static bool CompareByFrequency(const std::pair<const llvm::Type*,
/// ValueEnumerator - Enumerate module-level information.
ValueEnumerator::ValueEnumerator(const Module *M) {
- InstructionCount = 0;
-
// Enumerate the global variables.
for (Module::const_global_iterator I = M->global_begin(),
E = M->global_end(); I != E; ++I)
@@ -377,6 +375,7 @@ void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) {
void ValueEnumerator::incorporateFunction(const Function &F) {
+ InstructionCount = 0;
NumModuleValues = Values.size();
// Adding function arguments to the value table.
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index fc08384..bd2b1b6 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -917,11 +917,10 @@ void AsmPrinter::EmitAlignment(unsigned NumBits, const GlobalValue *GV,
if (NumBits == 0) return; // No need to emit alignment.
- unsigned FillValue = 0;
if (getCurrentSection()->getKind().isText())
- FillValue = MAI->getTextAlignFillValue();
-
- OutStreamer.EmitValueToAlignment(1 << NumBits, FillValue, 1, 0);
+ OutStreamer.EmitCodeAlignment(1 << NumBits);
+ else
+ OutStreamer.EmitValueToAlignment(1 << NumBits, 0, 1, 0);
}
/// LowerConstant - Lower the specified LLVM Constant to an MCExpr.
@@ -1717,7 +1716,7 @@ void AsmPrinter::EmitBasicBlockStart(const MachineBasicBlock *MBB) const {
}
// Print the main label for the block.
- if (MBB->pred_empty() || MBB->isOnlyReachableByFallthrough()) {
+ if (MBB->pred_empty() || isBlockOnlyReachableByFallthrough(MBB)) {
if (VerboseAsm) {
// NOTE: Want this comment at start of line.
O << MAI->getCommentString() << " BB#" << MBB->getNumber() << ':';
@@ -1764,6 +1763,39 @@ void AsmPrinter::printOffset(int64_t Offset) const {
O << Offset;
}
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+bool AsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // If the block is completely empty, then it definitely does fall through.
+ if (Pred->empty())
+ return true;
+
+ // Otherwise, check the last instruction.
+ const MachineInstr &LastInst = Pred->back();
+ return !LastInst.getDesc().isBarrier();
+}
+
+
+
GCMetadataPrinter *AsmPrinter::GetOrCreateGCPrinter(GCStrategy *S) {
if (!S->usesMetadata())
return 0;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index 5093dd9..5ad1e5e 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -238,7 +238,18 @@ public:
LIndex = DSI;
}
}
- setLastInsn(LastInsn);
+
+ unsigned CurrentLastInsnIndex = 0;
+ if (const MachineInstr *CL = getLastInsn())
+ CurrentLastInsnIndex = MIIndexMap[CL];
+ unsigned FIndex = MIIndexMap[getFirstInsn()];
+
+ // Set LastInsn as the last instruction for this scope only if
+ // it follows
+ // 1) this scope's first instruction and
+ // 2) current last instruction for this scope, if any.
+ if (LIndex >= CurrentLastInsnIndex && LIndex >= FIndex)
+ setLastInsn(LastInsn);
}
#ifndef NDEBUG
@@ -1166,7 +1177,9 @@ DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
return SPDie;
SPDie = new DIE(dwarf::DW_TAG_subprogram);
- addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
+ // Constructors and operators for anonymous aggregates do not have names.
+ if (!SP.getName().empty())
+ addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
StringRef LinkageName = SP.getLinkageName();
if (!LinkageName.empty())
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index c6c59f5..2b08ba4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -638,18 +638,18 @@ void DwarfException::EmitExceptionTable() {
const unsigned LandingPadSize = SizeOfEncodedValue(dwarf::DW_EH_PE_udata4);
bool IsSJLJ = MAI->getExceptionHandlingType() == ExceptionHandling::SjLj;
bool HaveTTData = IsSJLJ ? (!TypeInfos.empty() || !FilterIds.empty()) : true;
- unsigned SizeSites;
+ unsigned CallSiteTableLength;
if (IsSJLJ)
- SizeSites = 0;
+ CallSiteTableLength = 0;
else
- SizeSites = CallSites.size() *
+ CallSiteTableLength = CallSites.size() *
(SiteStartSize + SiteLengthSize + LandingPadSize);
for (unsigned i = 0, e = CallSites.size(); i < e; ++i) {
- SizeSites += MCAsmInfo::getULEB128Size(CallSites[i].Action);
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(CallSites[i].Action);
if (IsSJLJ)
- SizeSites += MCAsmInfo::getULEB128Size(i);
+ CallSiteTableLength += MCAsmInfo::getULEB128Size(i);
}
// Type infos.
@@ -698,7 +698,20 @@ void DwarfException::EmitExceptionTable() {
Asm->OutStreamer.SwitchSection(LSDASection);
Asm->EmitAlignment(2, 0, 0, false);
+ // Emit the LSDA.
O << "GCC_except_table" << SubprogramCount << ":\n";
+ EmitLabel("exception", SubprogramCount);
+
+ if (IsSJLJ) {
+ SmallString<16> LSDAName;
+ raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
+ "_LSDA_" << Asm->getFunctionNumber();
+ O << LSDAName.str() << ":\n";
+ }
+
+ // Emit the LSDA header.
+ EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
+ EmitEncodingByte(TTypeEncoding, "@TType");
// The type infos need to be aligned. GCC does this by inserting padding just
// before the type infos. However, this changes the size of the exception
@@ -707,7 +720,7 @@ void DwarfException::EmitExceptionTable() {
// So by increasing the size by inserting padding, you may increase the number
// of bytes used for writing the size. If it increases, say by one byte, then
// you now need to output one less byte of padding to get the type infos
- // aligned. However this decreases the size of the exception table. This
+ // aligned. However this decreases the size of the exception table. This
// changes the value you have to output for the exception table size. Due to
// the variable length encoding, the number of bytes used for writing the
// length may decrease. If so, you then have to increase the amount of
@@ -716,41 +729,35 @@ void DwarfException::EmitExceptionTable() {
// We chose another solution: don't output padding inside the table like GCC
// does, instead output it before the table.
unsigned SizeTypes = TypeInfos.size() * TypeFormatSize;
- unsigned TyOffset = sizeof(int8_t) + // Call site format
- MCAsmInfo::getULEB128Size(SizeSites) + // Call site table length
- SizeSites + SizeActions + SizeTypes;
- unsigned TotalSize = sizeof(int8_t) + // LPStart format
- sizeof(int8_t) + // TType format
- (HaveTTData ?
- MCAsmInfo::getULEB128Size(TyOffset) : 0) + // TType base offset
- TyOffset;
+ unsigned CallSiteTableLengthSize =
+ MCAsmInfo::getULEB128Size(CallSiteTableLength);
+ unsigned TTypeBaseOffset =
+ sizeof(int8_t) + // Call site format
+ CallSiteTableLengthSize + // Call site table length size
+ CallSiteTableLength + // Call site table length
+ SizeActions + // Actions size
+ SizeTypes;
+ unsigned TTypeBaseOffsetSize = MCAsmInfo::getULEB128Size(TTypeBaseOffset);
+ unsigned TotalSize =
+ sizeof(int8_t) + // LPStart format
+ sizeof(int8_t) + // TType format
+ (HaveTTData ? TTypeBaseOffsetSize : 0) + // TType base offset size
+ TTypeBaseOffset; // TType base offset
unsigned SizeAlign = (4 - TotalSize) & 3;
- for (unsigned i = 0; i != SizeAlign; ++i) {
- Asm->EmitInt8(0);
- EOL("Padding");
- }
-
- EmitLabel("exception", SubprogramCount);
-
- if (IsSJLJ) {
- SmallString<16> LSDAName;
- raw_svector_ostream(LSDAName) << MAI->getPrivateGlobalPrefix() <<
- "_LSDA_" << Asm->getFunctionNumber();
- O << LSDAName.str() << ":\n";
+ if (HaveTTData) {
+ // Account for any extra padding that will be added to the call site table
+ // length.
+ EmitULEB128(TTypeBaseOffset, "@TType base offset", SizeAlign);
+ SizeAlign = 0;
}
- // Emit the header.
- EmitEncodingByte(dwarf::DW_EH_PE_omit, "@LPStart");
- EmitEncodingByte(TTypeEncoding, "@TType");
-
- if (HaveTTData)
- EmitULEB128(TyOffset, "@TType base offset");
-
// SjLj Exception handling
if (IsSJLJ) {
EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
- EmitULEB128(SizeSites, "Call site table length");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
// Emit the landing pad site information.
unsigned idx = 0;
@@ -791,7 +798,9 @@ void DwarfException::EmitExceptionTable() {
// Emit the landing pad call site table.
EmitEncodingByte(dwarf::DW_EH_PE_udata4, "Call site");
- EmitULEB128(SizeSites, "Call site table length");
+
+ // Add extra padding if it wasn't added to the TType base offset.
+ EmitULEB128(CallSiteTableLength, "Call site table length", SizeAlign);
for (SmallVectorImpl<CallSiteEntry>::const_iterator
I = CallSites.begin(), E = CallSites.end(); I != E; ++I) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
index 1299d04..28ff0eb 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.cpp
@@ -159,29 +159,35 @@ void DwarfPrinter::EmitSLEB128(int Value, const char *Desc) const {
Value >>= 7;
IsMore = Value != Sign || ((Byte ^ Sign) & 0x40) != 0;
if (IsMore) Byte |= 0x80;
-
Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
} while (IsMore);
}
/// EmitULEB128 - emit the specified signed leb128 value.
-void DwarfPrinter::EmitULEB128(unsigned Value, const char *Desc) const {
+void DwarfPrinter::EmitULEB128(unsigned Value, const char *Desc,
+ unsigned PadTo) const {
if (Asm->VerboseAsm && Desc)
Asm->OutStreamer.AddComment(Desc);
- if (MAI->hasLEB128()) {
+ if (MAI->hasLEB128() && PadTo == 0) {
O << "\t.uleb128\t" << Value;
Asm->OutStreamer.AddBlankLine();
return;
}
- // If we don't have .uleb128, emit as .bytes.
+ // If we don't have .uleb128 or we want to emit padding, emit as .bytes.
do {
unsigned char Byte = static_cast<unsigned char>(Value & 0x7f);
Value >>= 7;
- if (Value) Byte |= 0x80;
+ if (Value || PadTo != 0) Byte |= 0x80;
Asm->OutStreamer.EmitIntValue(Byte, 1, /*addrspace*/0);
} while (Value);
+
+ if (PadTo) {
+ if (PadTo > 1)
+ Asm->OutStreamer.EmitFill(PadTo - 1, 0x80/*fillval*/, 0/*addrspace*/);
+ Asm->OutStreamer.EmitFill(1, 0/*fillval*/, 0/*addrspace*/);
+ }
}
diff --git a/lib/CodeGen/AsmPrinter/DwarfPrinter.h b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
index 73de398..bd715f2 100644
--- a/lib/CodeGen/AsmPrinter/DwarfPrinter.h
+++ b/lib/CodeGen/AsmPrinter/DwarfPrinter.h
@@ -111,7 +111,8 @@ public:
void EmitSLEB128(int Value, const char *Desc) const;
/// EmitULEB128 - emit the specified unsigned leb128 value.
- void EmitULEB128(unsigned Value, const char *Desc = 0) const;
+ void EmitULEB128(unsigned Value, const char *Desc = 0,
+ unsigned PadTo = 0) const;
/// PrintLabelName - Print label name in form used by Dwarf writer.
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 62cf339..d385b86 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -22,6 +22,7 @@ add_llvm_library(LLVMCodeGen
LiveVariables.cpp
LowerSubregs.cpp
MachineBasicBlock.cpp
+ MachineCSE.cpp
MachineDominators.cpp
MachineFunction.cpp
MachineFunctionAnalysis.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
index 2bedd04..a328d0e 100644
--- a/lib/CodeGen/CalcSpillWeights.cpp
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -131,10 +131,7 @@ bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
if (Hint.first || Hint.second)
li.weight *= 1.01F;
- // Divide the weight of the interval by its size. This encourages
- // spilling of intervals that are large and have few uses, and
- // discourages spilling of small intervals with many uses.
- li.weight /= lis->getApproximateInstructionCount(li) * SlotIndex::NUM;
+ lis->normalizeSpillWeight(li);
}
}
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index a13a310..3ff2a04 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -102,13 +102,6 @@ bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
// Conservatively ignore EH landing pads.
if (MBB->isLandingPad()) return false;
- // Ignore blocks which look like they might have EH-related control flow.
- // At the time of this writing, there are blocks which AnalyzeBranch
- // thinks end in single uncoditional branches, yet which have two CFG
- // successors. Code in this file is not prepared to reason about such things.
- if (!MBB->empty() && MBB->back().isEHLabel())
- return false;
-
// Aggressively handle return blocks and similar constructs.
if (MBB->succ_empty()) return true;
@@ -118,6 +111,14 @@ bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) {
// Make sure the terminator is understood.
if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond))
return false;
+ // Ignore blocks which look like they might have EH-related control flow.
+ // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't
+ // recognize the possibility of a control transfer through an unwind.
+ // Such blocks contain EH_LABEL instructions, however they may be in the
+ // middle of the block. Instead of searching for them, just check to see
+ // if the CFG disagrees with AnalyzeBranch.
+ if (1u + !Cond.empty() != MBB->succ_size())
+ return false;
// Make sure we have the option of reversing the condition.
if (!Cond.empty() && TII->ReverseBranchCondition(Cond))
return false;
diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp
index a215a19..d69c995 100644
--- a/lib/CodeGen/DeadMachineInstructionElim.cpp
+++ b/lib/CodeGen/DeadMachineInstructionElim.cpp
@@ -55,7 +55,7 @@ FunctionPass *llvm::createDeadMachineInstructionElimPass() {
bool DeadMachineInstructionElim::isDead(const MachineInstr *MI) const {
// Don't delete instructions with side effects.
bool SawStore = false;
- if (!MI->isSafeToMove(TII, SawStore, 0) && !MI->isPHI())
+ if (!MI->isSafeToMove(TII, 0, SawStore) && !MI->isPHI())
return false;
// Examine each operand.
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 278de02..fd442db 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -67,6 +67,9 @@ static cl::opt<bool> VerifyMachineCode("verify-machineinstrs", cl::Hidden,
cl::desc("Verify generated machine code"),
cl::init(getenv("LLVM_VERIFY_MACHINEINSTRS")!=NULL));
+static cl::opt<bool> EnableMachineCSE("machine-cse", cl::Hidden,
+ cl::desc("Enable Machine CSE"));
+
static cl::opt<cl::boolOrDefault>
AsmVerbose("asm-verbose", cl::desc("Add comments to directives."),
cl::init(cl::BOU_UNSET));
@@ -115,9 +118,10 @@ LLVMTargetMachine::setCodeModelForStatic() {
bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
// Add common CodeGen passes.
- if (addCommonCodeGenPasses(PM, OptLevel))
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify))
return true;
OwningPtr<MCContext> Context(new MCContext());
@@ -193,12 +197,13 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM,
///
bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM,
JITCodeEmitter &JCE,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
// Make sure the code model is set.
setCodeModelForJIT();
// Add common CodeGen passes.
- if (addCommonCodeGenPasses(PM, OptLevel))
+ if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify))
return true;
addCodeEmitter(PM, OptLevel, JCE);
@@ -221,13 +226,19 @@ static void printAndVerify(PassManagerBase &PM,
/// emitting to assembly files or machine code output.
///
bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
// Standard LLVM-Level Passes.
+ // Before running any passes, run the verifier to determine if the input
+ // coming from the front-end and/or optimizer is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
// Optionally, tun split-GEPs and no-load GVN.
if (EnableSplitGEPGVN) {
PM.add(createGEPSplitterPass());
- PM.add(createGVNPass(/*NoPRE=*/false, /*NoLoads=*/true));
+ PM.add(createGVNPass(/*NoLoads=*/true));
}
// Run loop strength reduction before anything else.
@@ -235,9 +246,6 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
PM.add(createLoopStrengthReducePass(getTargetLowering()));
if (PrintLSR)
PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs()));
-#ifndef NDEBUG
- PM.add(createVerifierPass());
-#endif
}
// Turn exception handling constructs into something the code generators can
@@ -277,6 +285,11 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
"*** Final LLVM Code input to ISel ***\n",
&dbgs()));
+ // All passes which modify the LLVM IR are now complete; run the verifier
+ // to ensure that the IR is valid.
+ if (!DisableVerify)
+ PM.add(createVerifierPass());
+
// Standard Lower-Level Passes.
// Set up a MachineFunction for the rest of CodeGen to work on.
@@ -307,6 +320,8 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
if (OptLevel != CodeGenOpt::None) {
PM.add(createOptimizeExtsPass());
+ if (EnableMachineCSE)
+ PM.add(createMachineCSEPass());
if (!DisableMachineLICM)
PM.add(createMachineLICMPass());
if (!DisableMachineSink)
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index f6bf433..ccda66f 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -329,24 +329,43 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
DEBUG(dbgs() << " +" << NewLR);
interval.addRange(NewLR);
- // Iterate over all of the blocks that the variable is completely
- // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
- // live interval.
- for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
- E = vi.AliveBlocks.end(); I != E; ++I) {
- MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
- LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
- interval.addRange(LR);
- DEBUG(dbgs() << " +" << LR);
+ bool PHIJoin = lv_->isPHIJoin(interval.reg);
+
+ if (PHIJoin) {
+ // A phi join register is killed at the end of the MBB and revived as a new
+ // valno in the killing blocks.
+ assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks");
+ DEBUG(dbgs() << " phi-join");
+ ValNo->addKill(indexes_->getTerminatorGap(mbb));
+ ValNo->setHasPHIKill(true);
+ } else {
+ // Iterate over all of the blocks that the variable is completely
+ // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the
+ // live interval.
+ for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(),
+ E = vi.AliveBlocks.end(); I != E; ++I) {
+ MachineBasicBlock *aliveBlock = mf_->getBlockNumbered(*I);
+ LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), ValNo);
+ interval.addRange(LR);
+ DEBUG(dbgs() << " +" << LR);
+ }
}
// Finally, this virtual register is live from the start of any killing
// block to the 'use' slot of the killing instruction.
for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) {
MachineInstr *Kill = vi.Kills[i];
- SlotIndex killIdx =
- getInstructionIndex(Kill).getDefIndex();
- LiveRange LR(getMBBStartIdx(Kill->getParent()), killIdx, ValNo);
+ SlotIndex Start = getMBBStartIdx(Kill->getParent());
+ SlotIndex killIdx = getInstructionIndex(Kill).getDefIndex();
+
+ // Create interval with one of a NEW value number. Note that this value
+ // number isn't actually defined by an instruction, weird huh? :)
+ if (PHIJoin) {
+ ValNo = interval.getNextValue(SlotIndex(Start, true), 0, false,
+ VNInfoAllocator);
+ ValNo->setIsPHIDef(true);
+ }
+ LiveRange LR(Start, killIdx, ValNo);
interval.addRange(LR);
ValNo->addKill(killIdx);
DEBUG(dbgs() << " +" << LR);
@@ -409,48 +428,11 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
interval.print(dbgs(), tri_);
});
} else {
- // Otherwise, this must be because of phi elimination. If this is the
- // first redefinition of the vreg that we have seen, go back and change
- // the live range in the PHI block to be a different value number.
- if (interval.containsOneValue()) {
-
- VNInfo *VNI = interval.getValNumInfo(0);
- // Phi elimination may have reused the register for multiple identical
- // phi nodes. There will be a kill per phi. Remove the old ranges that
- // we now know have an incorrect number.
- for (unsigned ki=0, ke=vi.Kills.size(); ki != ke; ++ki) {
- MachineInstr *Killer = vi.Kills[ki];
- SlotIndex Start = getMBBStartIdx(Killer->getParent());
- SlotIndex End = getInstructionIndex(Killer).getDefIndex();
- DEBUG({
- dbgs() << "\n\t\trenaming [" << Start << "," << End << "] in: ";
- interval.print(dbgs(), tri_);
- });
- interval.removeRange(Start, End);
-
- // Replace the interval with one of a NEW value number. Note that
- // this value number isn't actually defined by an instruction, weird
- // huh? :)
- LiveRange LR(Start, End,
- interval.getNextValue(SlotIndex(Start, true),
- 0, false, VNInfoAllocator));
- LR.valno->setIsPHIDef(true);
- interval.addRange(LR);
- LR.valno->addKill(End);
- }
-
- MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def);
- VNI->addKill(indexes_->getTerminatorGap(killMBB));
- VNI->setHasPHIKill(true);
- DEBUG({
- dbgs() << " RESULT: ";
- interval.print(dbgs(), tri_);
- });
- }
-
+ assert(lv_->isPHIJoin(interval.reg) && "Multiply defined register");
// In the case of PHI elimination, each variable definition is only
// live until the end of the block. We've already taken care of the
// rest of the live range.
+
SlotIndex defIndex = MIIdx.getDefIndex();
if (MO.isEarlyClobber())
defIndex = MIIdx.getUseIndex();
@@ -468,7 +450,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
interval.addRange(LR);
ValNo->addKill(indexes_->getTerminatorGap(mbb));
ValNo->setHasPHIKill(true);
- DEBUG(dbgs() << " +" << LR);
+ DEBUG(dbgs() << " phi-join +" << LR);
}
}
@@ -1358,11 +1340,9 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
MachineBasicBlock *MBB = MI->getParent();
if (ImpUse && MI != ReMatDefMI) {
- // Re-matting an instruction with virtual register use. Update the
- // register interval's spill weight to HUGE_VALF to prevent it from
- // being spilled.
- LiveInterval &ImpLi = getInterval(ImpUse);
- ImpLi.weight = HUGE_VALF;
+ // Re-matting an instruction with virtual register use. Prevent interval
+ // from being spilled.
+ getInterval(ImpUse).markNotSpillable();
}
unsigned MBBId = MBB->getNumber();
@@ -1414,7 +1394,7 @@ rewriteInstructionsForSpills(const LiveInterval &li, bool TrySplit,
LiveInterval &nI = getOrCreateInterval(NewVReg);
if (!TrySplit) {
// The spill weight is now infinity as it cannot be spilled again.
- nI.weight = HUGE_VALF;
+ nI.markNotSpillable();
continue;
}
@@ -1562,6 +1542,28 @@ LiveIntervals::handleSpilledImpDefs(const LiveInterval &li, VirtRegMap &vrm,
}
}
+float
+LiveIntervals::getSpillWeight(bool isDef, bool isUse, unsigned loopDepth) {
+ // Limit the loop depth ridiculousness.
+ if (loopDepth > 200)
+ loopDepth = 200;
+
+ // The loop depth is used to roughly estimate the number of times the
+ // instruction is executed. Something like 10^d is simple, but will quickly
+ // overflow a float. This expression behaves like 10^d for small d, but is
+ // more tempered for large d. At d=200 we get 6.7e33 which leaves a bit of
+ // headroom before overflow.
+ float lc = powf(1 + (100.0f / (loopDepth+10)), (float)loopDepth);
+
+ return (isDef + isUse) * lc;
+}
+
+void
+LiveIntervals::normalizeSpillWeights(std::vector<LiveInterval*> &NewLIs) {
+ for (unsigned i = 0, e = NewLIs.size(); i != e; ++i)
+ normalizeSpillWeight(*NewLIs[i]);
+}
+
std::vector<LiveInterval*> LiveIntervals::
addIntervalsForSpillsFast(const LiveInterval &li,
const MachineLoopInfo *loopInfo,
@@ -1570,8 +1572,7 @@ addIntervalsForSpillsFast(const LiveInterval &li,
std::vector<LiveInterval*> added;
- assert(li.weight != HUGE_VALF &&
- "attempt to spill already spilled interval!");
+ assert(li.isSpillable() && "attempt to spill already spilled interval!");
DEBUG({
dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
@@ -1607,10 +1608,7 @@ addIntervalsForSpillsFast(const LiveInterval &li,
// create a new register for this spill
LiveInterval &nI = getOrCreateInterval(NewVReg);
-
- // the spill weight is now infinity as it
- // cannot be spilled again
- nI.weight = HUGE_VALF;
+ nI.markNotSpillable();
// Rewrite register operands to use the new vreg.
for (SmallVectorImpl<unsigned>::iterator I = Indices.begin(),
@@ -1664,8 +1662,7 @@ addIntervalsForSpills(const LiveInterval &li,
if (EnableFastSpilling)
return addIntervalsForSpillsFast(li, loopInfo, vrm);
- assert(li.weight != HUGE_VALF &&
- "attempt to spill already spilled interval!");
+ assert(li.isSpillable() && "attempt to spill already spilled interval!");
DEBUG({
dbgs() << "\t\t\t\tadding intervals for spills for interval: ";
@@ -1739,6 +1736,7 @@ addIntervalsForSpills(const LiveInterval &li,
}
handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ normalizeSpillWeights(NewLIs);
return NewLIs;
}
@@ -1814,6 +1812,7 @@ addIntervalsForSpills(const LiveInterval &li,
// Insert spills / restores if we are splitting.
if (!TrySplit) {
handleSpilledImpDefs(li, vrm, rc, NewLIs);
+ normalizeSpillWeights(NewLIs);
return NewLIs;
}
@@ -1930,11 +1929,10 @@ addIntervalsForSpills(const LiveInterval &li,
unsigned ImpUse = getReMatImplicitUse(li, ReMatDefMI);
if (ImpUse) {
// Re-matting an instruction with virtual register use. Add the
- // register as an implicit use on the use MI and update the register
- // interval's spill weight to HUGE_VALF to prevent it from being
- // spilled.
+ // register as an implicit use on the use MI and mark the register
+ // interval as unspillable.
LiveInterval &ImpLi = getInterval(ImpUse);
- ImpLi.weight = HUGE_VALF;
+ ImpLi.markNotSpillable();
MI->addOperand(MachineOperand::CreateReg(ImpUse, false, true));
}
}
@@ -1973,6 +1971,7 @@ addIntervalsForSpills(const LiveInterval &li,
}
handleSpilledImpDefs(li, vrm, rc, RetNewLIs);
+ normalizeSpillWeights(RetNewLIs);
return RetNewLIs;
}
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 8a124dc..68c8539 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -510,6 +510,7 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) {
PHIVarInfo = new SmallVector<unsigned, 4>[MF->getNumBlockIDs()];
std::fill(PhysRegDef, PhysRegDef + NumRegs, (MachineInstr*)0);
std::fill(PhysRegUse, PhysRegUse + NumRegs, (MachineInstr*)0);
+ PHIJoins.clear();
/// Get some space for a respectable number of registers.
VirtRegInfo.resize(64);
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index 655a0bf..64134ce 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -143,36 +143,6 @@ MachineBasicBlock::iterator MachineBasicBlock::getFirstTerminator() {
return I;
}
-/// isOnlyReachableViaFallthough - Return true if this basic block has
-/// exactly one predecessor and the control transfer mechanism between
-/// the predecessor and this block is a fall-through.
-bool MachineBasicBlock::isOnlyReachableByFallthrough() const {
- // If this is a landing pad, it isn't a fall through. If it has no preds,
- // then nothing falls through to it.
- if (isLandingPad() || pred_empty())
- return false;
-
- // If there isn't exactly one predecessor, it can't be a fall through.
- const_pred_iterator PI = pred_begin(), PI2 = PI;
- ++PI2;
- if (PI2 != pred_end())
- return false;
-
- // The predecessor has to be immediately before this block.
- const MachineBasicBlock *Pred = *PI;
-
- if (!Pred->isLayoutSuccessor(this))
- return false;
-
- // If the block is completely empty, then it definitely does fall through.
- if (Pred->empty())
- return true;
-
- // Otherwise, check the last instruction.
- const MachineInstr &LastInst = Pred->back();
- return !LastInst.getDesc().isBarrier();
-}
-
void MachineBasicBlock::dump() const {
print(dbgs());
}
diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp
new file mode 100644
index 0000000..023ace2
--- /dev/null
+++ b/lib/CodeGen/MachineCSE.cpp
@@ -0,0 +1,208 @@
+//===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass performs global common subexpression elimination on machine
+// instructions using a scoped hash table based value numbering scheme. It
+// must be run while the machine function is still in SSA form.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "machine-cse"
+#include "llvm/CodeGen/Passes.h"
+#include "llvm/CodeGen/MachineDominators.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/ScopedHashTable.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/Support/Debug.h"
+
+using namespace llvm;
+
+namespace llvm {
+ template<> struct DenseMapInfo<MachineInstr*> {
+ static inline MachineInstr *getEmptyKey() {
+ return 0;
+ }
+
+ static inline MachineInstr *getTombstoneKey() {
+ return reinterpret_cast<MachineInstr*>(-1);
+ }
+
+ static unsigned getHashValue(const MachineInstr* const &MI) {
+ unsigned Hash = MI->getOpcode() * 37;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = MI->getOperand(i);
+ uint64_t Key = (uint64_t)MO.getType() << 32;
+ switch (MO.getType()) {
+ default: break;
+ case MachineOperand::MO_Register:
+ if (MO.isDef() && TargetRegisterInfo::isVirtualRegister(MO.getReg()))
+ continue; // Skip virtual register defs.
+ Key |= MO.getReg();
+ break;
+ case MachineOperand::MO_Immediate:
+ Key |= MO.getImm();
+ break;
+ case MachineOperand::MO_FrameIndex:
+ case MachineOperand::MO_ConstantPoolIndex:
+ case MachineOperand::MO_JumpTableIndex:
+ Key |= MO.getIndex();
+ break;
+ case MachineOperand::MO_MachineBasicBlock:
+ Key |= DenseMapInfo<void*>::getHashValue(MO.getMBB());
+ break;
+ case MachineOperand::MO_GlobalAddress:
+ Key |= DenseMapInfo<void*>::getHashValue(MO.getGlobal());
+ break;
+ case MachineOperand::MO_BlockAddress:
+ Key |= DenseMapInfo<void*>::getHashValue(MO.getBlockAddress());
+ break;
+ }
+ Key += ~(Key << 32);
+ Key ^= (Key >> 22);
+ Key += ~(Key << 13);
+ Key ^= (Key >> 8);
+ Key += (Key << 3);
+ Key ^= (Key >> 15);
+ Key += ~(Key << 27);
+ Key ^= (Key >> 31);
+ Hash = (unsigned)Key + Hash * 37;
+ }
+ return Hash;
+ }
+
+ static bool isEqual(const MachineInstr* const &LHS,
+ const MachineInstr* const &RHS) {
+ if (RHS == getEmptyKey() || RHS == getTombstoneKey() ||
+ LHS == getEmptyKey() || LHS == getTombstoneKey())
+ return LHS == RHS;
+ return LHS->isIdenticalTo(RHS, MachineInstr::IgnoreVRegDefs);
+ }
+ };
+} // end llvm namespace
+
+namespace {
+ class MachineCSE : public MachineFunctionPass {
+ const TargetInstrInfo *TII;
+ MachineRegisterInfo *MRI;
+ MachineDominatorTree *DT;
+ ScopedHashTable<MachineInstr*, unsigned> VNT;
+ unsigned CurrVN;
+ public:
+ static char ID; // Pass identification
+ MachineCSE() : MachineFunctionPass(&ID), CurrVN(0) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF);
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ AU.addRequired<MachineDominatorTree>();
+ AU.addPreserved<MachineDominatorTree>();
+ }
+
+ private:
+ bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
+ bool ProcessBlock(MachineDomTreeNode *Node);
+ };
+} // end anonymous namespace
+
+char MachineCSE::ID = 0;
+static RegisterPass<MachineCSE>
+X("machine-cse", "Machine Common Subexpression Elimination");
+
+FunctionPass *llvm::createMachineCSEPass() { return new MachineCSE(); }
+
+bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
+ MachineBasicBlock *MBB) {
+ bool Changed = false;
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (MO.isReg() && MO.isUse()) {
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ MachineInstr *DefMI = MRI->getVRegDef(Reg);
+ if (DefMI->getParent() == MBB) {
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*DefMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx) &&
+ TargetRegisterInfo::isVirtualRegister(SrcReg) &&
+ !SrcSubIdx && !DstSubIdx) {
+ MO.setReg(SrcReg);
+ Changed = true;
+ }
+ }
+ }
+ }
+
+ return Changed;
+}
+
+static bool hasLivePhysRegDefUse(MachineInstr *MI) {
+ for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = MI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg)
+ continue;
+ if (TargetRegisterInfo::isPhysicalRegister(Reg) &&
+ !(MO.isDef() && MO.isDead()))
+ return true;
+ }
+ return false;
+}
+
+bool MachineCSE::ProcessBlock(MachineDomTreeNode *Node) {
+ bool Changed = false;
+
+ ScopedHashTableScope<MachineInstr*, unsigned> VNTS(VNT);
+ MachineBasicBlock *MBB = Node->getBlock();
+ for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E;
+ ++I) {
+ MachineInstr *MI = &*I;
+ bool SawStore = false;
+ if (!MI->isSafeToMove(TII, 0, SawStore))
+ continue;
+ // Ignore copies or instructions that read / write physical registers
+ // (except for dead defs of physical registers).
+ unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ if (TII->isMoveInstr(*MI, SrcReg, DstReg, SrcSubIdx, DstSubIdx))
+ continue;
+ if (hasLivePhysRegDefUse(MI))
+ continue;
+
+ bool FoundCSE = VNT.count(MI);
+ if (!FoundCSE) {
+ // Look for trivial copy coalescing opportunities.
+ if (PerformTrivialCoalescing(MI, MBB))
+ FoundCSE = VNT.count(MI);
+ }
+
+ if (FoundCSE)
+ DEBUG(dbgs() << "Found a common subexpression: " << *MI);
+ else
+ VNT.insert(MI, ++CurrVN);
+ }
+
+ // Recursively call ProcessBlock with childred.
+ const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
+ for (unsigned i = 0, e = Children.size(); i != e; ++i)
+ Changed |= ProcessBlock(Children[i]);
+
+ return Changed;
+}
+
+bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
+ TII = MF.getTarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+ DT = &getAnalysis<MachineDominatorTree>();
+ return ProcessBlock(DT->getRootNode());
+}
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index f141c56..4377d5b 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -95,6 +95,9 @@ MachineFunction::MachineFunction(Function *F, const TargetMachine &TM,
MFInfo = 0;
FrameInfo = new (Allocator.Allocate<MachineFrameInfo>())
MachineFrameInfo(*TM.getFrameInfo());
+ if (Fn->hasFnAttr(Attribute::StackAlignment))
+ FrameInfo->setMaxAlignment(Attribute::getStackAlignmentFromAttrs(
+ Fn->getAttributes().getFnAttributes()));
ConstantPool = new (Allocator.Allocate<MachineConstantPool>())
MachineConstantPool(TM.getTargetData());
Alignment = TM.getTargetLowering()->getFunctionAlignment(F);
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index b6d98e8..cba93f1 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -18,6 +18,7 @@
#include "llvm/Type.h"
#include "llvm/Value.h"
#include "llvm/Assembly/Writer.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineMemOperand.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
@@ -701,6 +702,28 @@ void MachineInstr::addMemOperand(MachineFunction &MF,
MemRefsEnd = NewMemRefsEnd;
}
+bool MachineInstr::isIdenticalTo(const MachineInstr *Other,
+ MICheckType Check) const {
+ if (Other->getOpcode() != getOpcode() ||
+ Other->getNumOperands() != getNumOperands())
+ return false;
+ for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
+ const MachineOperand &MO = getOperand(i);
+ const MachineOperand &OMO = Other->getOperand(i);
+ if (Check != CheckDefs && MO.isReg() && MO.isDef()) {
+ if (Check == IgnoreDefs)
+ continue;
+ // Check == IgnoreVRegDefs
+ if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ TargetRegisterInfo::isPhysicalRegister(OMO.getReg()))
+ if (MO.getReg() != OMO.getReg())
+ return false;
+ } else if (!MO.isIdenticalTo(OMO))
+ return false;
+ }
+ return true;
+}
+
/// removeFromParent - This method unlinks 'this' from the containing basic
/// block, and returns it, but does not delete it.
MachineInstr *MachineInstr::removeFromParent() {
@@ -959,8 +982,8 @@ void MachineInstr::copyPredicates(const MachineInstr *MI) {
/// SawStore is set to true, it means that there is a store (or call) between
/// the instruction's location and its intended destination.
bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
- bool &SawStore,
- AliasAnalysis *AA) const {
+ AliasAnalysis *AA,
+ bool &SawStore) const {
// Ignore stuff that we obviously can't move.
if (TID->mayStore() || TID->isCall()) {
SawStore = true;
@@ -985,11 +1008,11 @@ bool MachineInstr::isSafeToMove(const TargetInstrInfo *TII,
/// isSafeToReMat - Return true if it's safe to rematerialize the specified
/// instruction which defined the specified register instead of copying it.
bool MachineInstr::isSafeToReMat(const TargetInstrInfo *TII,
- unsigned DstReg,
- AliasAnalysis *AA) const {
+ AliasAnalysis *AA,
+ unsigned DstReg) const {
bool SawStore = false;
if (!TII->isTriviallyReMaterializable(this, AA) ||
- !isSafeToMove(TII, SawStore, AA))
+ !isSafeToMove(TII, AA, SawStore))
return false;
for (unsigned i = 0, e = getNumOperands(); i != e; ++i) {
const MachineOperand &MO = getOperand(i);
diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp
index 92c84f3..0361694 100644
--- a/lib/CodeGen/MachineLICM.cpp
+++ b/lib/CodeGen/MachineLICM.cpp
@@ -252,32 +252,6 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
return false;
}
- DEBUG({
- dbgs() << "--- Checking if we can hoist " << I;
- if (I.getDesc().getImplicitUses()) {
- dbgs() << " * Instruction has implicit uses:\n";
-
- const TargetRegisterInfo *TRI = TM->getRegisterInfo();
- for (const unsigned *ImpUses = I.getDesc().getImplicitUses();
- *ImpUses; ++ImpUses)
- dbgs() << " -> " << TRI->getName(*ImpUses) << "\n";
- }
-
- if (I.getDesc().getImplicitDefs()) {
- dbgs() << " * Instruction has implicit defines:\n";
-
- const TargetRegisterInfo *TRI = TM->getRegisterInfo();
- for (const unsigned *ImpDefs = I.getDesc().getImplicitDefs();
- *ImpDefs; ++ImpDefs)
- dbgs() << " -> " << TRI->getName(*ImpDefs) << "\n";
- }
- });
-
- if (I.getDesc().getImplicitDefs() || I.getDesc().getImplicitUses()) {
- DEBUG(dbgs() << "Cannot hoist with implicit defines or uses\n");
- return false;
- }
-
// The instruction is loop invariant if all of its operands are.
for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
const MachineOperand &MO = I.getOperand(i);
@@ -311,6 +285,10 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
} else if (!MO.isDead()) {
// A def that isn't dead. We can't move it.
return false;
+ } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
+ // If the reg is live into the loop, we can't hoist an instruction
+ // which would clobber it.
+ return false;
}
}
@@ -467,7 +445,7 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI,
std::vector<const MachineInstr*> &PrevMIs) {
for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
const MachineInstr *PrevMI = PrevMIs[i];
- if (TII->isIdentical(MI, PrevMI, RegInfo))
+ if (TII->produceSameValue(MI, PrevMI))
return PrevMI;
}
return 0;
@@ -480,9 +458,20 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI,
if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
+
+ // Replace virtual registers defined by MI by their counterparts defined
+ // by Dup.
for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
const MachineOperand &MO = MI->getOperand(i);
- if (MO.isReg() && MO.isDef())
+
+ // Physical registers may not differ here.
+ assert((!MO.isReg() || MO.getReg() == 0 ||
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
+ MO.getReg() == Dup->getOperand(i).getReg()) &&
+ "Instructions with different phys regs are not identical!");
+
+ if (MO.isReg() && MO.isDef() &&
+ !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
RegInfo->replaceRegWith(MO.getReg(), Dup->getOperand(i).getReg());
}
MI->eraseFromParent();
diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp
index c391576..9ba7d14 100644
--- a/lib/CodeGen/MachineSink.cpp
+++ b/lib/CodeGen/MachineSink.cpp
@@ -149,7 +149,7 @@ bool MachineSinking::ProcessBlock(MachineBasicBlock &MBB) {
/// instruction out of its current block into a successor.
bool MachineSinking::SinkInstruction(MachineInstr *MI, bool &SawStore) {
// Check if it's safe to move the instruction.
- if (!MI->isSafeToMove(TII, SawStore, AA))
+ if (!MI->isSafeToMove(TII, AA, SawStore))
return false;
// FIXME: This should include support for sinking instructions within the
diff --git a/lib/CodeGen/PBQP/HeuristicSolver.h b/lib/CodeGen/PBQP/HeuristicSolver.h
index c156264..bd18b52 100644
--- a/lib/CodeGen/PBQP/HeuristicSolver.h
+++ b/lib/CodeGen/PBQP/HeuristicSolver.h
@@ -229,7 +229,7 @@ namespace PBQP {
}
/// \brief Apply rule R1.
- /// @param nItr Node iterator for node to apply R1 to.
+ /// @param xnItr Node iterator for node to apply R1 to.
///
/// Node will be automatically pushed to the solver stack.
void applyR1(Graph::NodeItr xnItr) {
@@ -277,7 +277,7 @@ namespace PBQP {
}
/// \brief Apply rule R2.
- /// @param nItr Node iterator for node to apply R2 to.
+ /// @param xnItr Node iterator for node to apply R2 to.
///
/// Node will be automatically pushed to the solver stack.
void applyR2(Graph::NodeItr xnItr) {
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index b740c68..bdfd448 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -55,8 +55,6 @@ void llvm::PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const {
bool llvm::PHIElimination::runOnMachineFunction(MachineFunction &Fn) {
MRI = &Fn.getRegInfo();
- PHIDefs.clear();
- PHIKills.clear();
bool Changed = false;
// Split critical edges to help the coalescer
@@ -215,10 +213,6 @@ void llvm::PHIElimination::LowerAtomicPHINode(
TII->copyRegToReg(MBB, AfterPHIsIt, DestReg, IncomingReg, RC, RC);
}
- // Record PHI def.
- assert(!hasPHIDef(DestReg) && "Vreg has multiple phi-defs?");
- PHIDefs[DestReg] = &MBB;
-
// Update live variable information if there is any.
LiveVariables *LV = getAnalysisIfAvailable<LiveVariables>();
if (LV) {
@@ -229,6 +223,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Increment use count of the newly created virtual register.
VI.NumUses++;
+ LV->setPHIJoin(IncomingReg);
// When we are reusing the incoming register, it may already have been
// killed in this block. The old kill will also have been inserted at
@@ -276,9 +271,6 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// path the PHI.
MachineBasicBlock &opBlock = *MPhi->getOperand(i*2+2).getMBB();
- // Record the kill.
- PHIKills[SrcReg].insert(&opBlock);
-
// If source is defined by an implicit def, there is no need to insert a
// copy.
MachineInstr *DefMI = MRI->getVRegDef(SrcReg);
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index f3ab9e2..ff4aa20 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -22,17 +22,8 @@ namespace llvm {
/// Lower PHI instructions to copies.
class PHIElimination : public MachineFunctionPass {
MachineRegisterInfo *MRI; // Machine register information
- private:
-
- typedef SmallSet<MachineBasicBlock*, 4> PHIKillList;
- typedef DenseMap<unsigned, PHIKillList> PHIKillMap;
- typedef DenseMap<unsigned, MachineBasicBlock*> PHIDefMap;
public:
-
- typedef PHIKillList::iterator phi_kill_iterator;
- typedef PHIKillList::const_iterator const_phi_kill_iterator;
-
static char ID; // Pass identification, replacement for typeid
PHIElimination() : MachineFunctionPass(&ID) {}
@@ -40,38 +31,6 @@ namespace llvm {
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
- /// Return true if the given vreg was defined by a PHI intsr prior to
- /// lowering.
- bool hasPHIDef(unsigned vreg) const {
- return PHIDefs.count(vreg);
- }
-
- /// Returns the block in which the PHI instruction which defined the
- /// given vreg used to reside.
- MachineBasicBlock* getPHIDefBlock(unsigned vreg) {
- PHIDefMap::iterator phiDefItr = PHIDefs.find(vreg);
- assert(phiDefItr != PHIDefs.end() && "vreg has no phi-def.");
- return phiDefItr->second;
- }
-
- /// Returns true if the given vreg was killed by a PHI instr.
- bool hasPHIKills(unsigned vreg) const {
- return PHIKills.count(vreg);
- }
-
- /// Returns an iterator over the BasicBlocks which contained PHI
- /// kills of this register prior to lowering.
- phi_kill_iterator phiKillsBegin(unsigned vreg) {
- PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
- assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
- return phiKillItr->second.begin();
- }
- phi_kill_iterator phiKillsEnd(unsigned vreg) {
- PHIKillMap::iterator phiKillItr = PHIKills.find(vreg);
- assert(phiKillItr != PHIKills.end() && "vreg has no phi-kills.");
- return phiKillItr->second.end();
- }
-
private:
/// EliminatePHINodes - Eliminate phi nodes by inserting copy instructions
/// in predecessor basic blocks.
@@ -139,8 +98,6 @@ namespace llvm {
typedef DenseMap<BBVRegPair, unsigned> VRegPHIUse;
VRegPHIUse VRegPHIUseCount;
- PHIDefMap PHIDefs;
- PHIKillMap PHIKills;
// Defs of PHI sources which are implicit_def.
SmallPtrSet<MachineInstr*, 4> ImpDefs;
diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp
index f67eb79..5ea2941 100644
--- a/lib/CodeGen/Passes.cpp
+++ b/lib/CodeGen/Passes.cpp
@@ -34,7 +34,7 @@ static cl::opt<RegisterRegAlloc::FunctionPassCtor, false,
RegisterPassParser<RegisterRegAlloc> >
RegAlloc("regalloc",
cl::init(&createLinearScanRegisterAllocator),
- cl::desc("Register allocator to use: (default = linearscan)"));
+ cl::desc("Register allocator to use (default=linearscan)"));
//===---------------------------------------------------------------------===//
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 040259e..138e711 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -175,9 +175,10 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
MachineBasicBlock::iterator I = *i;
// If call frames are not being included as part of the stack frame, and
- // there is no dynamic allocation (therefore referencing frame slots off
- // sp), leave the pseudo ops alone. We'll eliminate them later.
- if (RegInfo->hasReservedCallFrame(Fn) || RegInfo->hasFP(Fn))
+ // the target doesn't indicate otherwise, remove the call frame pseudos
+ // here. The sub/add sp instruction pairs are still inserted, but we don't
+ // need to track the SP adjustment for frame index elimination.
+ if (RegInfo->canSimplifyCallFramePseudos(Fn))
RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I);
}
}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 8e44a57..5c5a394 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -334,10 +334,6 @@ namespace {
SmallVector<unsigned, 256> &inactiveCounts,
bool SkipDGRegs);
- /// assignVirt2StackSlot - assigns this virtual register to a
- /// stack slot. returns the stack slot
- int assignVirt2StackSlot(unsigned virtReg);
-
void ComputeRelatedRegClasses();
template <typename ItTy>
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index 2701faf..81cfd8f 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -57,7 +57,7 @@
using namespace llvm;
static RegisterRegAlloc
-registerPBQPRepAlloc("pbqp", "PBQP register allocator.",
+registerPBQPRepAlloc("pbqp", "PBQP register allocator",
llvm::createPBQPRegisterAllocator);
static cl::opt<bool>
@@ -867,10 +867,6 @@ bool PBQPRegAlloc::runOnMachineFunction(MachineFunction &MF) {
// Find the vreg intervals in need of allocation.
findVRegIntervalsToAlloc();
- // If there aren't any then we're done here.
- if (vregIntervalsToAlloc.empty() && emptyVRegIntervals.empty())
- return true;
-
// If there are non-empty intervals allocate them using pbqp.
if (!vregIntervalsToAlloc.empty()) {
diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp
index 56dd533..badf34e 100644
--- a/lib/CodeGen/ScheduleDAGInstrs.cpp
+++ b/lib/CodeGen/ScheduleDAGInstrs.cpp
@@ -72,7 +72,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) {
} else {
return V;
}
- assert(isa<IntegerType>(V->getType()) && "Unexpected operand type!");
+ assert(V->getType()->isIntegerTy() && "Unexpected operand type!");
} while (1);
}
@@ -87,7 +87,7 @@ static const Value *getUnderlyingObject(const Value *V) {
break;
const Value *O = getUnderlyingObjectFromInt(cast<User>(V)->getOperand(0));
// If that succeeded in finding a pointer, continue the search.
- if (!isa<PointerType>(O->getType()))
+ if (!O->getType()->isPointerTy())
break;
V = O;
} while (1);
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 7da7848..e4ff44d 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -1064,7 +1064,7 @@ SDValue DAGCombiner::visitADD(SDNode *N) {
if (VT.isInteger() && !VT.isVector()) {
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
@@ -1136,7 +1136,7 @@ SDValue DAGCombiner::visitADDC(SDNode *N) {
// fold (addc a, b) -> (or a, b), CARRY_FALSE iff a and b share no bits.
APInt LHSZero, LHSOne;
APInt RHSZero, RHSOne;
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
DAG.ComputeMaskedBits(N0, Mask, LHSZero, LHSOne);
if (LHSZero.getBoolValue()) {
@@ -1786,7 +1786,7 @@ SDValue DAGCombiner::visitAND(SDNode *N) {
SDValue RAND = ReassociateOps(ISD::AND, N->getDebugLoc(), N0, N1);
if (RAND.getNode() != 0)
return RAND;
- // fold (and (or x, 0xFFFF), 0xFF) -> 0xFF
+ // fold (and (or x, C), D) -> D if (C & D) == D
if (N1C && N0.getOpcode() == ISD::OR)
if (ConstantSDNode *ORI = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
if ((ORI->getAPIntValue() & N1C->getAPIntValue()) == N1C->getAPIntValue())
@@ -2025,13 +2025,15 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
if (ROR.getNode() != 0)
return ROR;
// Canonicalize (or (and X, c1), c2) -> (and (or X, c2), c1|c2)
+ // iff (c1 & c2) == 0.
if (N1C && N0.getOpcode() == ISD::AND && N0.getNode()->hasOneUse() &&
isa<ConstantSDNode>(N0.getOperand(1))) {
ConstantSDNode *C1 = cast<ConstantSDNode>(N0.getOperand(1));
- return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
- DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
- N0.getOperand(0), N1),
- DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
+ if ((C1->getAPIntValue() & N1C->getAPIntValue()) != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::OR, N0.getDebugLoc(), VT,
+ N0.getOperand(0), N1),
+ DAG.FoldConstantArithmetic(ISD::OR, VT, N1C, C1));
}
// fold (or (setcc x), (setcc y)) -> (setcc (or x, y))
if (isSetCCEquivalent(N0, LL, LR, CC0) && isSetCCEquivalent(N1, RL, RR, CC1)){
@@ -2754,7 +2756,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
if (N1C && N0.getOpcode() == ISD::CTLZ &&
N1C->getAPIntValue() == Log2_32(VT.getSizeInBits())) {
APInt KnownZero, KnownOne;
- APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits());
+ APInt Mask = APInt::getAllOnesValue(VT.getScalarType().getSizeInBits());
DAG.ComputeMaskedBits(N0.getOperand(0), Mask, KnownZero, KnownOne);
// If any of the input bits are KnownOne, then the input couldn't be all
@@ -4655,7 +4657,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
DAG.DeleteNode(Trunc);
}
// Replace the uses of SRL with SETCC
- DAG.ReplaceAllUsesOfValueWith(N1, SetCC);
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
removeFromWorkList(N1.getNode());
DAG.DeleteNode(N1.getNode());
return SDValue(N, 0); // Return N so it doesn't get rechecked!
@@ -4663,6 +4666,56 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) {
}
}
}
+
+ // Transform br(xor(x, y)) -> br(x != y)
+ // Transform br(xor(xor(x,y), 1)) -> br (x == y)
+ if (N1.hasOneUse() && N1.getOpcode() == ISD::XOR) {
+ SDNode *TheXor = N1.getNode();
+ SDValue Op0 = TheXor->getOperand(0);
+ SDValue Op1 = TheXor->getOperand(1);
+ if (Op0.getOpcode() == Op1.getOpcode()) {
+ // Avoid missing important xor optimizations.
+ SDValue Tmp = visitXOR(TheXor);
+ if (Tmp.getNode()) {
+ DEBUG(dbgs() << "\nReplacing.8 ";
+ TheXor->dump(&DAG);
+ dbgs() << "\nWith: ";
+ Tmp.getNode()->dump(&DAG);
+ dbgs() << '\n');
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, Tmp, &DeadNodes);
+ removeFromWorkList(TheXor);
+ DAG.DeleteNode(TheXor);
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, Tmp, N2);
+ }
+ }
+
+ if (Op0.getOpcode() != ISD::SETCC && Op1.getOpcode() != ISD::SETCC) {
+ bool Equal = false;
+ if (ConstantSDNode *RHSCI = dyn_cast<ConstantSDNode>(Op0))
+ if (RHSCI->getAPIntValue() == 1 && Op0.hasOneUse() &&
+ Op0.getOpcode() == ISD::XOR) {
+ TheXor = Op0.getNode();
+ Equal = true;
+ }
+
+ EVT SetCCVT = N1.getValueType();
+ if (LegalTypes)
+ SetCCVT = TLI.getSetCCResultType(SetCCVT);
+ SDValue SetCC = DAG.getSetCC(TheXor->getDebugLoc(),
+ SetCCVT,
+ Op0, Op1,
+ Equal ? ISD::SETEQ : ISD::SETNE);
+ // Replace the uses of XOR with SETCC
+ WorkListRemover DeadNodes(*this);
+ DAG.ReplaceAllUsesOfValueWith(N1, SetCC, &DeadNodes);
+ removeFromWorkList(N1.getNode());
+ DAG.DeleteNode(N1.getNode());
+ return DAG.getNode(ISD::BRCOND, N->getDebugLoc(),
+ MVT::Other, Chain, SetCC, N2);
+ }
+ }
return SDValue();
}
@@ -5012,7 +5065,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
assert(N->getValueType(2) == MVT::Other && "Malformed indexed loads?");
if (N->hasNUsesOfValue(0, 0) && N->hasNUsesOfValue(0, 1)) {
SDValue Undef = DAG.getUNDEF(N->getValueType(0));
- DEBUG(dbgs() << "\nReplacing.6 ";
+ DEBUG(dbgs() << "\nReplacing.7 ";
N->dump(&DAG);
dbgs() << "\nWith: ";
Undef.getNode()->dump(&DAG);
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 35ef5b7..1d76c7c 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -350,6 +350,34 @@ bool FastISel::SelectCall(User *I) {
(void)TargetSelectInstruction(cast<Instruction>(I));
return true;
}
+ case Intrinsic::dbg_value: {
+ // This requires target support, but right now X86 is the only Fast target.
+ DbgValueInst *DI = cast<DbgValueInst>(I);
+ const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ Value *V = DI->getValue();
+ if (!V) {
+ // Currently the optimizer can produce this; insert an undef to
+ // help debugging. Probably the optimizer should not do this.
+ BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
+ addMetadata(DI->getVariable());
+ } else if (ConstantInt *CI = dyn_cast<ConstantInt>(V)) {
+ BuildMI(MBB, DL, II).addImm(CI->getZExtValue()).addImm(DI->getOffset()).
+ addMetadata(DI->getVariable());
+ } else if (ConstantFP *CF = dyn_cast<ConstantFP>(V)) {
+ BuildMI(MBB, DL, II).addFPImm(CF).addImm(DI->getOffset()).
+ addMetadata(DI->getVariable());
+ } else if (unsigned Reg = lookUpRegForValue(V)) {
+ BuildMI(MBB, DL, II).addReg(Reg, RegState::Debug).addImm(DI->getOffset()).
+ addMetadata(DI->getVariable());
+ } else {
+ // We can't yet handle anything else here because it would require
+ // generating code, thus altering codegen because of debug info.
+ // Insert an undef so we can see what we dropped.
+ BuildMI(MBB, DL, II).addReg(0U).addImm(DI->getOffset()).
+ addMetadata(DI->getVariable());
+ }
+ return true;
+ }
case Intrinsic::eh_exception: {
EVT VT = TLI.getValueType(I->getType());
switch (TLI.getOperationAction(ISD::EXCEPTIONADDR, VT)) {
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index e9321da..c7ab34f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -1539,7 +1539,6 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
// the result as a vector.
// Create the stack frame object.
EVT VT = Node->getValueType(0);
- EVT OpVT = Node->getOperand(0).getValueType();
EVT EltVT = VT.getVectorElementType();
DebugLoc dl = Node->getDebugLoc();
SDValue FIPtr = DAG.CreateStackTemporary(VT);
@@ -1559,8 +1558,9 @@ SDValue SelectionDAGLegalize::ExpandVectorBuildThroughStack(SDNode* Node) {
SDValue Idx = DAG.getConstant(Offset, FIPtr.getValueType());
Idx = DAG.getNode(ISD::ADD, dl, FIPtr.getValueType(), FIPtr, Idx);
- // If EltVT smaller than OpVT, only store the bits necessary.
- if (!OpVT.isVector() && EltVT.bitsLT(OpVT)) {
+ // If the destination vector element type is narrower than the source
+ // element type, only store the bits necessary.
+ if (EltVT.bitsLT(Node->getOperand(i).getValueType().getScalarType())) {
Stores.push_back(DAG.getTruncStore(DAG.getEntryNode(), dl,
Node->getOperand(i), Idx, SV, Offset,
EltVT, false, false, 0));
@@ -1899,8 +1899,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node,
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), false,
/*isReturnValueUsed=*/true,
- Callee, Args, DAG,
- Node->getDebugLoc(), DAG.GetOrdering(Node));
+ Callee, Args, DAG, Node->getDebugLoc());
// Legalize the call sequence, starting with the chain. This will advance
// the LastCALLSEQ_END to the legalized version of the CALLSEQ_END node that
@@ -2308,7 +2307,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
false, false, false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("abort", TLI.getPointerTy()),
- Args, DAG, dl, DAG.GetOrdering(Node));
+ Args, DAG, dl);
Results.push_back(CallResult.second);
break;
}
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index e4d123f..81f28ad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1080,8 +1080,8 @@ ExpandShiftWithKnownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
SDValue Amt = N->getOperand(1);
EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
EVT ShTy = Amt.getValueType();
- unsigned ShBits = ShTy.getSizeInBits();
- unsigned NVTBits = NVT.getSizeInBits();
+ unsigned ShBits = ShTy.getScalarType().getSizeInBits();
+ unsigned NVTBits = NVT.getScalarType().getSizeInBits();
assert(isPowerOf2_32(NVTBits) &&
"Expanded integer type size not a power of two!");
DebugLoc dl = N->getDebugLoc();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
index 0d929f1..f3e7ca4f 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp
@@ -1034,8 +1034,7 @@ SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT,
TLI.LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, TLI.getLibcallCallingConv(LC), false,
/*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl,
- DAG.GetOrdering(DAG.getEntryNode().getNode()));
+ Callee, Args, DAG, dl);
return CallInfo.first;
}
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index b51c61b..06e7b8c 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -218,8 +218,20 @@ void ScheduleDAGSDNodes::BuildSchedUnits() {
// Check to see if the scheduler cares about latencies.
bool UnitLatencies = ForceUnitLatencies();
- for (SelectionDAG::allnodes_iterator NI = DAG->allnodes_begin(),
- E = DAG->allnodes_end(); NI != E; ++NI) {
+ // Add all nodes in depth first order.
+ SmallVector<SDNode*, 64> Worklist;
+ SmallPtrSet<SDNode*, 64> Visited;
+ Worklist.push_back(DAG->getRoot().getNode());
+ Visited.insert(DAG->getRoot().getNode());
+
+ while (!Worklist.empty()) {
+ SDNode *NI = Worklist.pop_back_val();
+
+ // Add all operands to the worklist unless they've already been added.
+ for (unsigned i = 0, e = NI->getNumOperands(); i != e; ++i)
+ if (Visited.insert(NI->getOperand(i).getNode()))
+ Worklist.push_back(NI->getOperand(i).getNode());
+
if (isPassiveNode(NI)) // Leaf node, e.g. a TargetImmediate.
continue;
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 43cf37e..002bc68 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -468,18 +468,20 @@ static void AddNodeIDNode(FoldingSetNodeID &ID, const SDNode *N) {
}
/// encodeMemSDNodeFlags - Generic routine for computing a value for use in
-/// the CSE map that carries volatility, indexing mode, and
+/// the CSE map that carries volatility, temporalness, indexing mode, and
/// extension/truncation information.
///
static inline unsigned
-encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile) {
+encodeMemSDNodeFlags(int ConvType, ISD::MemIndexedMode AM, bool isVolatile,
+ bool isNonTemporal) {
assert((ConvType & 3) == ConvType &&
"ConvType may not require more than 2 bits!");
assert((AM & 7) == AM &&
"AM may not require more than 3 bits!");
return ConvType |
(AM << 2) |
- (isVolatile << 5);
+ (isVolatile << 5) |
+ (isNonTemporal << 6);
}
//===----------------------------------------------------------------------===//
@@ -860,14 +862,14 @@ SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
/// getNOT - Create a bitwise NOT operation as (XOR Val, -1).
///
SDValue SelectionDAG::getNOT(DebugLoc DL, SDValue Val, EVT VT) {
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ EVT EltVT = VT.getScalarType();
SDValue NegOne =
getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
return getNode(ISD::XOR, DL, VT, Val, NegOne);
}
SDValue SelectionDAG::getConstant(uint64_t Val, EVT VT, bool isT) {
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ EVT EltVT = VT.getScalarType();
assert((EltVT.getSizeInBits() >= 64 ||
(uint64_t)((int64_t)Val >> EltVT.getSizeInBits()) + 1 < 2) &&
"getConstant with a uint64_t value that doesn't fit in the type!");
@@ -881,7 +883,7 @@ SDValue SelectionDAG::getConstant(const APInt &Val, EVT VT, bool isT) {
SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
assert(VT.isInteger() && "Cannot create FP integer constant!");
- EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ EVT EltVT = VT.getScalarType();
assert(Val.getBitWidth() == EltVT.getSizeInBits() &&
"APInt size does not match type size!");
@@ -924,8 +926,7 @@ SDValue SelectionDAG::getConstantFP(const APFloat& V, EVT VT, bool isTarget) {
SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
assert(VT.isFloatingPoint() && "Cannot create integer FP constant!");
- EVT EltVT =
- VT.isVector() ? VT.getVectorElementType() : VT;
+ EVT EltVT = VT.getScalarType();
// Do the map lookup using the actual bit pattern for the floating point
// value, so that we don't have problems with 0.0 comparing equal to -0.0, and
@@ -959,8 +960,7 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
}
SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) {
- EVT EltVT =
- VT.isVector() ? VT.getVectorElementType() : VT;
+ EVT EltVT = VT.getScalarType();
if (EltVT==MVT::f32)
return getConstantFP(APFloat((float)Val), VT, isTarget);
else
@@ -1345,7 +1345,7 @@ SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
}
SDValue SelectionDAG::getSrcValue(const Value *V) {
- assert((!V || isa<PointerType>(V->getType())) &&
+ assert((!V || V->getType()->isPointerTy()) &&
"SrcValue is not a pointer?");
FoldingSetNodeID ID;
@@ -2233,6 +2233,29 @@ bool SelectionDAG::isKnownNeverNaN(SDValue Op) const {
return false;
}
+bool SelectionDAG::isKnownNeverZero(SDValue Op) const {
+ // If the value is a constant, we can obviously see if it is a zero or not.
+ if (const ConstantFPSDNode *C = dyn_cast<ConstantFPSDNode>(Op))
+ return !C->isZero();
+
+ // TODO: Recognize more cases here.
+
+ return false;
+}
+
+bool SelectionDAG::isEqualTo(SDValue A, SDValue B) const {
+ // Check the obvious case.
+ if (A == B) return true;
+
+ // For for negative and positive zero.
+ if (const ConstantFPSDNode *CA = dyn_cast<ConstantFPSDNode>(A))
+ if (const ConstantFPSDNode *CB = dyn_cast<ConstantFPSDNode>(B))
+ if (CA->isZero() && CB->isZero()) return true;
+
+ // Otherwise they may not be equal.
+ return false;
+}
+
bool SelectionDAG::isVerifiedDebugInfoDesc(SDValue Op) const {
GlobalAddressSDNode *GA = dyn_cast<GlobalAddressSDNode>(Op);
if (!GA) return false;
@@ -3081,8 +3104,7 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) {
/// operand.
static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG,
DebugLoc dl) {
- unsigned NumBits = VT.isVector() ?
- VT.getVectorElementType().getSizeInBits() : VT.getSizeInBits();
+ unsigned NumBits = VT.getScalarType().getSizeInBits();
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(Value)) {
APInt Val = APInt(NumBits, C->getZExtValue() & 255);
unsigned Shift = 8;
@@ -3474,7 +3496,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst,
/*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMCPY),
TLI.getPointerTy()),
- Args, *this, dl, GetOrdering(Chain.getNode()));
+ Args, *this, dl);
return CallResult.second;
}
@@ -3523,7 +3545,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst,
/*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMMOVE),
TLI.getPointerTy()),
- Args, *this, dl, GetOrdering(Chain.getNode()));
+ Args, *this, dl);
return CallResult.second;
}
@@ -3582,7 +3604,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst,
/*isReturnValueUsed=*/false,
getExternalSymbol(TLI.getLibcallName(RTLIB::MEMSET),
TLI.getPointerTy()),
- Args, *this, dl, GetOrdering(Chain.getNode()));
+ Args, *this, dl);
return CallResult.second;
}
@@ -3845,7 +3867,8 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::LOAD, VTs, Ops, 3);
ID.AddInteger(MemVT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile()));
+ ID.AddInteger(encodeMemSDNodeFlags(ExtType, AM, MMO->isVolatile(),
+ MMO->isNonTemporal()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
@@ -3926,7 +3949,8 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(VT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile()));
+ ID.AddInteger(encodeMemSDNodeFlags(false, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -3989,7 +4013,8 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ISD::STORE, VTs, Ops, 4);
ID.AddInteger(SVT.getRawBits());
- ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile()));
+ ID.AddInteger(encodeMemSDNodeFlags(true, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal()));
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
@@ -4548,91 +4573,13 @@ SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDNode *SelectionDAG::SelectNodeTo(SDNode *N, unsigned MachineOpc,
SDVTList VTs, const SDValue *Ops,
unsigned NumOps) {
- return MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT) {
- SDVTList VTs = getVTList(VT);
- return MorphNodeTo(N, Opc, VTs, 0, 0);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT, SDValue Op1) {
- SDVTList VTs = getVTList(VT);
- SDValue Ops[] = { Op1 };
- return MorphNodeTo(N, Opc, VTs, Ops, 1);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT, SDValue Op1,
- SDValue Op2) {
- SDVTList VTs = getVTList(VT);
- SDValue Ops[] = { Op1, Op2 };
- return MorphNodeTo(N, Opc, VTs, Ops, 2);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT, SDValue Op1,
- SDValue Op2, SDValue Op3) {
- SDVTList VTs = getVTList(VT);
- SDValue Ops[] = { Op1, Op2, Op3 };
- return MorphNodeTo(N, Opc, VTs, Ops, 3);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT, const SDValue *Ops,
- unsigned NumOps) {
- SDVTList VTs = getVTList(VT);
- return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2, const SDValue *Ops,
- unsigned NumOps) {
- SDVTList VTs = getVTList(VT1, VT2);
- return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2) {
- SDVTList VTs = getVTList(VT1, VT2);
- return MorphNodeTo(N, Opc, VTs, (SDValue *)0, 0);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2, EVT VT3,
- const SDValue *Ops, unsigned NumOps) {
- SDVTList VTs = getVTList(VT1, VT2, VT3);
- return MorphNodeTo(N, Opc, VTs, Ops, NumOps);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2,
- SDValue Op1) {
- SDVTList VTs = getVTList(VT1, VT2);
- SDValue Ops[] = { Op1 };
- return MorphNodeTo(N, Opc, VTs, Ops, 1);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2,
- SDValue Op1, SDValue Op2) {
- SDVTList VTs = getVTList(VT1, VT2);
- SDValue Ops[] = { Op1, Op2 };
- return MorphNodeTo(N, Opc, VTs, Ops, 2);
-}
-
-SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
- EVT VT1, EVT VT2,
- SDValue Op1, SDValue Op2,
- SDValue Op3) {
- SDVTList VTs = getVTList(VT1, VT2);
- SDValue Ops[] = { Op1, Op2, Op3 };
- return MorphNodeTo(N, Opc, VTs, Ops, 3);
+ N = MorphNodeTo(N, ~MachineOpc, VTs, Ops, NumOps);
+ // Reset the NodeID to -1.
+ N->setNodeId(-1);
+ return N;
}
-/// MorphNodeTo - These *mutate* the specified node to have the specified
+/// MorphNodeTo - This *mutates* the specified node to have the specified
/// return type, opcode, and operands.
///
/// Note that MorphNodeTo returns the resultant node. If there is already a
@@ -4708,12 +4655,14 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
// Delete any nodes that are still dead after adding the uses for the
// new operands.
- SmallVector<SDNode *, 16> DeadNodes;
- for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
- E = DeadNodeSet.end(); I != E; ++I)
- if ((*I)->use_empty())
- DeadNodes.push_back(*I);
- RemoveDeadNodes(DeadNodes);
+ if (!DeadNodeSet.empty()) {
+ SmallVector<SDNode *, 16> DeadNodes;
+ for (SmallPtrSet<SDNode *, 16>::iterator I = DeadNodeSet.begin(),
+ E = DeadNodeSet.end(); I != E; ++I)
+ if ((*I)->use_empty())
+ DeadNodes.push_back(*I);
+ RemoveDeadNodes(DeadNodes);
+ }
if (IP)
CSEMap.InsertNode(N, IP); // Memoize the new node.
@@ -5293,8 +5242,11 @@ GlobalAddressSDNode::GlobalAddressSDNode(unsigned Opc, const GlobalValue *GA,
MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs, EVT memvt,
MachineMemOperand *mmo)
: SDNode(Opc, dl, VTs), MemoryVT(memvt), MMO(mmo) {
- SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
+ assert(isNonTemporal() == MMO->isNonTemporal() &&
+ "Non-temporal encoding error!");
assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
}
@@ -5303,7 +5255,8 @@ MemSDNode::MemSDNode(unsigned Opc, DebugLoc dl, SDVTList VTs,
MachineMemOperand *mmo)
: SDNode(Opc, dl, VTs, Ops, NumOps),
MemoryVT(memvt), MMO(mmo) {
- SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile());
+ SubclassData = encodeMemSDNodeFlags(0, ISD::UNINDEXED, MMO->isVolatile(),
+ MMO->isNonTemporal());
assert(isVolatile() == MMO->isVolatile() && "Volatile encoding error!");
assert(memvt.getStoreSize() == MMO->getSize() && "Size mismatch!");
}
@@ -5472,15 +5425,15 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
if (const TargetInstrInfo *TII = G->getTarget().getInstrInfo())
if (getMachineOpcode() < TII->getNumOpcodes())
return TII->get(getMachineOpcode()).getName();
- return "<<Unknown Machine Node>>";
+ return "<<Unknown Machine Node #" + utostr(getOpcode()) + ">>";
}
if (G) {
const TargetLowering &TLI = G->getTargetLoweringInfo();
const char *Name = TLI.getTargetNodeName(getOpcode());
if (Name) return Name;
- return "<<Unknown Target Node>>";
+ return "<<Unknown Target Node #" + utostr(getOpcode()) + ">>";
}
- return "<<Unknown Node>>";
+ return "<<Unknown Node #" + utostr(getOpcode()) + ">>";
#ifndef NDEBUG
case ISD::DELETED_NODE:
@@ -5917,6 +5870,9 @@ void SDNode::print_details(raw_ostream &OS, const SelectionDAG *G) const {
if (G)
if (unsigned Order = G->GetOrdering(this))
OS << " [ORD=" << Order << ']';
+
+ if (getNodeId() != -1)
+ OS << " [ID=" << getNodeId() << ']';
}
void SDNode::print(raw_ostream &OS, const SelectionDAG *G) const {
@@ -6305,31 +6261,37 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
return true;
}
+#ifdef XDEBUG
static void checkForCyclesHelper(const SDNode *N,
- std::set<const SDNode *> &visited) {
- if (visited.find(N) != visited.end()) {
+ SmallPtrSet<const SDNode*, 32> &Visited,
+ SmallPtrSet<const SDNode*, 32> &Checked) {
+ // If this node has already been checked, don't check it again.
+ if (Checked.count(N))
+ return;
+
+ // If a node has already been visited on this depth-first walk, reject it as
+ // a cycle.
+ if (!Visited.insert(N)) {
dbgs() << "Offending node:\n";
N->dumprFull();
- assert(0 && "Detected cycle in SelectionDAG");
+ errs() << "Detected cycle in SelectionDAG\n";
+ abort();
}
-
- std::set<const SDNode*>::iterator i;
- bool inserted;
-
- tie(i, inserted) = visited.insert(N);
- assert(inserted && "Missed cycle");
-
- for(unsigned i = 0; i < N->getNumOperands(); ++i) {
- checkForCyclesHelper(N->getOperand(i).getNode(), visited);
- }
- visited.erase(i);
+
+ for(unsigned i = 0, e = N->getNumOperands(); i != e; ++i)
+ checkForCyclesHelper(N->getOperand(i).getNode(), Visited, Checked);
+
+ Checked.insert(N);
+ Visited.erase(N);
}
+#endif
void llvm::checkForCycles(const llvm::SDNode *N) {
#ifdef XDEBUG
assert(N && "Checking nonexistant SDNode");
- std::set<const SDNode *> visited;
- checkForCyclesHelper(N, visited);
+ SmallPtrSet<const SDNode*, 32> visited;
+ SmallPtrSet<const SDNode*, 32> checked;
+ checkForCyclesHelper(N, visited, checked);
#endif
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 85ecb95..308742b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -155,7 +155,7 @@ namespace {
/// this value and returns the result as a ValueVTs value. This uses
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
- SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+ SDValue getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
SDValue &Chain, SDValue *Flag) const;
/// getCopyToRegs - Emit a series of CopyToReg nodes that copies the
@@ -163,14 +163,14 @@ namespace {
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- unsigned Order, SDValue &Chain, SDValue *Flag) const;
+ SDValue &Chain, SDValue *Flag) const;
/// AddInlineAsmOperands - Add this value to the specified inlineasm node
/// operand list. This adds the code marker, matching input operand index
/// (if applicable), and includes the number of values added into it.
void AddInlineAsmOperands(unsigned Code,
bool HasMatching, unsigned MatchingIdx,
- SelectionDAG &DAG, unsigned Order,
+ SelectionDAG &DAG,
std::vector<SDValue> &Ops) const;
};
}
@@ -180,7 +180,7 @@ namespace {
/// larger then ValueVT then AssertOp can be used to specify whether the extra
/// bits are known to be zero (ISD::AssertZext) or sign extended from ValueVT
/// (ISD::AssertSext).
-static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl,
const SDValue *Parts,
unsigned NumParts, EVT PartVT, EVT ValueVT,
ISD::NodeType AssertOp = ISD::DELETED_NODE) {
@@ -205,9 +205,9 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
EVT HalfVT = EVT::getIntegerVT(*DAG.getContext(), RoundBits/2);
if (RoundParts > 2) {
- Lo = getCopyFromParts(DAG, dl, Order, Parts, RoundParts / 2,
+ Lo = getCopyFromParts(DAG, dl, Parts, RoundParts / 2,
PartVT, HalfVT);
- Hi = getCopyFromParts(DAG, dl, Order, Parts + RoundParts / 2,
+ Hi = getCopyFromParts(DAG, dl, Parts + RoundParts / 2,
RoundParts / 2, PartVT, HalfVT);
} else {
Lo = DAG.getNode(ISD::BIT_CONVERT, dl, HalfVT, Parts[0]);
@@ -223,7 +223,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
// Assemble the trailing non-power-of-2 part.
unsigned OddParts = NumParts - RoundParts;
EVT OddVT = EVT::getIntegerVT(*DAG.getContext(), OddParts * PartBits);
- Hi = getCopyFromParts(DAG, dl, Order,
+ Hi = getCopyFromParts(DAG, dl,
Parts + RoundParts, OddParts, PartVT, OddVT);
// Combine the round and odd parts.
@@ -259,7 +259,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
// If the register was not expanded, truncate or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i], 1,
+ Ops[i] = getCopyFromParts(DAG, dl, &Parts[i], 1,
PartVT, IntermediateVT);
} else if (NumParts > 0) {
// If the intermediate type was expanded, build the intermediate
@@ -268,7 +268,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- Ops[i] = getCopyFromParts(DAG, dl, Order, &Parts[i * Factor], Factor,
+ Ops[i] = getCopyFromParts(DAG, dl, &Parts[i * Factor], Factor,
PartVT, IntermediateVT);
}
@@ -292,7 +292,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
assert(ValueVT.isFloatingPoint() && PartVT.isInteger() &&
!PartVT.isVector() && "Unexpected split");
EVT IntVT = EVT::getIntegerVT(*DAG.getContext(), ValueVT.getSizeInBits());
- Val = getCopyFromParts(DAG, dl, Order, Parts, NumParts, PartVT, IntVT);
+ Val = getCopyFromParts(DAG, dl, Parts, NumParts, PartVT, IntVT);
}
}
@@ -349,7 +349,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
/// getCopyToParts - Create a series of nodes that contain the specified value
/// split into legal parts. If the parts contain more bits than Val, then, for
/// integers, ExtendKind can be used to specify how to generate the extra bits.
-static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
+static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl,
SDValue Val, SDValue *Parts, unsigned NumParts,
EVT PartVT,
ISD::NodeType ExtendKind = ISD::ANY_EXTEND) {
@@ -417,7 +417,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
SDValue OddVal = DAG.getNode(ISD::SRL, dl, ValueVT, Val,
DAG.getConstant(RoundBits,
TLI.getPointerTy()));
- getCopyToParts(DAG, dl, Order, OddVal, Parts + RoundParts,
+ getCopyToParts(DAG, dl, OddVal, Parts + RoundParts,
OddParts, PartVT);
if (TLI.isBigEndian())
@@ -514,7 +514,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
// If the register was not expanded, promote or copy the value,
// as appropriate.
for (unsigned i = 0; i != NumParts; ++i)
- getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i], 1, PartVT);
+ getCopyToParts(DAG, dl, Ops[i], &Parts[i], 1, PartVT);
} else if (NumParts > 0) {
// If the intermediate type was expanded, split each the value into
// legal parts.
@@ -522,7 +522,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc dl, unsigned Order,
"Must expand into a divisible number of parts!");
unsigned Factor = NumParts / NumIntermediates;
for (unsigned i = 0; i != NumIntermediates; ++i)
- getCopyToParts(DAG, dl, Order, Ops[i], &Parts[i*Factor], Factor, PartVT);
+ getCopyToParts(DAG, dl, Ops[i], &Parts[i*Factor], Factor, PartVT);
}
}
@@ -680,7 +680,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
getCurDebugLoc());
}
- if (isa<StructType>(C->getType()) || isa<ArrayType>(C->getType())) {
+ if (C->getType()->isStructTy() || C->getType()->isArrayTy()) {
assert((isa<ConstantAggregateZero>(C) || isa<UndefValue>(C)) &&
"Unknown struct or array constant!");
@@ -747,8 +747,7 @@ SDValue SelectionDAGBuilder::getValue(const Value *V) {
RegsForValue RFV(*DAG.getContext(), TLI, InReg, V->getType());
SDValue Chain = DAG.getEntryNode();
- return RFV.getCopyFromRegs(DAG, getCurDebugLoc(),
- SDNodeOrder, Chain, NULL);
+ return RFV.getCopyFromRegs(DAG, getCurDebugLoc(), Chain, NULL);
}
/// Get the EVTs and ArgFlags collections that represent the legalized return
@@ -849,14 +848,12 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
Chain = DAG.getNode(ISD::TokenFactor, getCurDebugLoc(),
MVT::Other, &Chains[0], NumValues);
- } else {
- for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
- SmallVector<EVT, 4> ValueVTs;
- ComputeValueVTs(TLI, I.getOperand(i)->getType(), ValueVTs);
- unsigned NumValues = ValueVTs.size();
- if (NumValues == 0) continue;
-
- SDValue RetOp = getValue(I.getOperand(i));
+ } else if (I.getNumOperands() != 0) {
+ SmallVector<EVT, 4> ValueVTs;
+ ComputeValueVTs(TLI, I.getOperand(0)->getType(), ValueVTs);
+ unsigned NumValues = ValueVTs.size();
+ if (NumValues) {
+ SDValue RetOp = getValue(I.getOperand(0));
for (unsigned j = 0, f = NumValues; j != f; ++j) {
EVT VT = ValueVTs[j];
@@ -881,7 +878,7 @@ void SelectionDAGBuilder::visitRet(ReturnInst &I) {
unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT);
EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT);
SmallVector<SDValue, 4> Parts(NumParts);
- getCopyToParts(DAG, getCurDebugLoc(), SDNodeOrder,
+ getCopyToParts(DAG, getCurDebugLoc(),
SDValue(RetOp.getNode(), RetOp.getResNo() + j),
&Parts[0], NumParts, PartVT, ExtendKind);
@@ -2080,7 +2077,7 @@ void SelectionDAGBuilder::visitIndirectBr(IndirectBrInst &I) {
void SelectionDAGBuilder::visitFSub(User &I) {
// -0.0 - X --> fneg
const Type *Ty = I.getType();
- if (isa<VectorType>(Ty)) {
+ if (Ty->isVectorTy()) {
if (ConstantVector *CV = dyn_cast<ConstantVector>(I.getOperand(0))) {
const VectorType *DestTy = cast<VectorType>(I.getType());
const Type *ElTy = DestTy->getElementType();
@@ -2117,7 +2114,7 @@ void SelectionDAGBuilder::visitBinary(User &I, unsigned OpCode) {
void SelectionDAGBuilder::visitShift(User &I, unsigned Opcode) {
SDValue Op1 = getValue(I.getOperand(0));
SDValue Op2 = getValue(I.getOperand(1));
- if (!isa<VectorType>(I.getType()) &&
+ if (!I.getType()->isVectorTy() &&
Op2.getValueType() != TLI.getShiftAmountTy()) {
// If the operand is smaller than the shift count type, promote it.
EVT PTy = TLI.getPointerTy();
@@ -2890,7 +2887,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(CallInst &I,
///
/// where Op is the hexidecimal representation of floating point value.
static SDValue
-GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) {
+GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) {
SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x007fffff, MVT::i32));
SDValue t2 = DAG.getNode(ISD::OR, dl, MVT::i32, t1,
@@ -2905,7 +2902,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl, unsigned Order) {
/// where Op is the hexidecimal representation of floating point value.
static SDValue
GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI,
- DebugLoc dl, unsigned Order) {
+ DebugLoc dl) {
SDValue t0 = DAG.getNode(ISD::AND, dl, MVT::i32, Op,
DAG.getConstant(0x7f800000, MVT::i32));
SDValue t1 = DAG.getNode(ISD::SRL, dl, MVT::i32, t0,
@@ -3089,13 +3086,13 @@ SelectionDAGBuilder::visitLog(CallInst &I) {
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Scale the exponent by log(2) [0.69314718f].
- SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
getF32Constant(DAG, 0x3f317218));
// Get the significand and build it into a floating-point number with
// exponent of 1.
- SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+ SDValue X = GetSignificand(DAG, Op1, dl);
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
@@ -3199,11 +3196,11 @@ SelectionDAGBuilder::visitLog2(CallInst &I) {
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Get the exponent.
- SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+ SDValue LogOfExponent = GetExponent(DAG, Op1, TLI, dl);
// Get the significand and build it into a floating-point number with
// exponent of 1.
- SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+ SDValue X = GetSignificand(DAG, Op1, dl);
// Different possible minimax approximations of significand in
// floating-point for various degrees of accuracy over [1,2].
@@ -3308,13 +3305,13 @@ SelectionDAGBuilder::visitLog10(CallInst &I) {
SDValue Op1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::i32, Op);
// Scale the exponent by log10(2) [0.30102999f].
- SDValue Exp = GetExponent(DAG, Op1, TLI, dl, SDNodeOrder);
+ SDValue Exp = GetExponent(DAG, Op1, TLI, dl);
SDValue LogOfExponent = DAG.getNode(ISD::FMUL, dl, MVT::f32, Exp,
getF32Constant(DAG, 0x3e9a209a));
// Get the significand and build it into a floating-point number with
// exponent of 1.
- SDValue X = GetSignificand(DAG, Op1, dl, SDNodeOrder);
+ SDValue X = GetSignificand(DAG, Op1, dl);
if (LimitFloatPrecision <= 6) {
// For floating-point precision of 6:
@@ -4287,8 +4284,8 @@ isInTailCallPosition(CallSite CS, Attributes CalleeRetAttr,
// Check for a truly no-op bitcast.
if (isa<BitCastInst>(U) &&
(U->getOperand(0)->getType() == U->getType() ||
- (isa<PointerType>(U->getOperand(0)->getType()) &&
- isa<PointerType>(U->getType()))))
+ (U->getOperand(0)->getType()->isPointerTy() &&
+ U->getType()->isPointerTy())))
continue;
// Otherwise it's not a true no-op.
return false;
@@ -4396,7 +4393,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
CS.getCallingConv(),
isTailCall,
!CS.getInstruction()->use_empty(),
- Callee, Args, DAG, getCurDebugLoc(), SDNodeOrder);
+ Callee, Args, DAG, getCurDebugLoc());
assert((isTailCall || Result.second.getNode()) &&
"Non-null chain expected with non-tail call!");
assert((Result.second.getNode() || !Result.first.getNode()) &&
@@ -4444,7 +4441,7 @@ void SelectionDAGBuilder::LowerCallTo(CallSite CS, SDValue Callee,
unsigned NumRegs = TLI.getNumRegisters(RetTy->getContext(), VT);
SDValue ReturnValue =
- getCopyFromParts(DAG, getCurDebugLoc(), SDNodeOrder, &Values[CurReg], NumRegs,
+ getCopyFromParts(DAG, getCurDebugLoc(), &Values[CurReg], NumRegs,
RegisterVT, VT, AssertOp);
ReturnValues.push_back(ReturnValue);
CurReg += NumRegs;
@@ -4541,9 +4538,9 @@ bool SelectionDAGBuilder::visitMemCmpCall(CallInst &I) {
return false;
Value *LHS = I.getOperand(1), *RHS = I.getOperand(2);
- if (!isa<PointerType>(LHS->getType()) || !isa<PointerType>(RHS->getType()) ||
- !isa<IntegerType>(I.getOperand(3)->getType()) ||
- !isa<IntegerType>(I.getType()))
+ if (!LHS->getType()->isPointerTy() || !RHS->getType()->isPointerTy() ||
+ !I.getOperand(3)->getType()->isIntegerTy() ||
+ !I.getType()->isIntegerTy())
return false;
ConstantInt *Size = dyn_cast<ConstantInt>(I.getOperand(3));
@@ -4711,8 +4708,7 @@ void SelectionDAGBuilder::visitCall(CallInst &I) {
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
- unsigned Order, SDValue &Chain,
- SDValue *Flag) const {
+ SDValue &Chain, SDValue *Flag) const {
// Assemble the legal parts into the final values.
SmallVector<SDValue, 4> Values(ValueVTs.size());
SmallVector<SDValue, 8> Parts;
@@ -4777,7 +4773,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
Parts[i] = P;
}
- Values[Value] = getCopyFromParts(DAG, dl, Order, Parts.begin(),
+ Values[Value] = getCopyFromParts(DAG, dl, Parts.begin(),
NumRegs, RegisterVT, ValueVT);
Part += NumRegs;
Parts.clear();
@@ -4793,8 +4789,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, DebugLoc dl,
/// Chain/Flag as the input and updates them for the output Chain/Flag.
/// If the Flag pointer is NULL, no flag is used.
void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
- unsigned Order, SDValue &Chain,
- SDValue *Flag) const {
+ SDValue &Chain, SDValue *Flag) const {
// Get the list of the values's legal parts.
unsigned NumRegs = Regs.size();
SmallVector<SDValue, 8> Parts(NumRegs);
@@ -4803,7 +4798,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
unsigned NumParts = TLI->getNumRegisters(*DAG.getContext(), ValueVT);
EVT RegisterVT = RegVTs[Value];
- getCopyToParts(DAG, dl, Order,
+ getCopyToParts(DAG, dl,
Val.getValue(Val.getResNo() + Value),
&Parts[Part], NumParts, RegisterVT);
Part += NumParts;
@@ -4844,7 +4839,7 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl,
/// values added into it.
void RegsForValue::AddInlineAsmOperands(unsigned Code,
bool HasMatching,unsigned MatchingIdx,
- SelectionDAG &DAG, unsigned Order,
+ SelectionDAG &DAG,
std::vector<SDValue> &Ops) const {
assert(Regs.size() < (1 << 13) && "Too many inline asm outputs!");
unsigned Flag = Code | (Regs.size() << 3);
@@ -5434,7 +5429,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
2 /* REGDEF */ ,
false,
0,
- DAG, SDNodeOrder,
+ DAG,
AsmNodeOperands);
break;
}
@@ -5482,10 +5477,10 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
// Use the produced MatchedRegs object to
MatchedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
- SDNodeOrder, Chain, &Flag);
+ Chain, &Flag);
MatchedRegs.AddInlineAsmOperands(1 /*REGUSE*/,
true, OpInfo.getMatchedOperand(),
- DAG, SDNodeOrder, AsmNodeOperands);
+ DAG, AsmNodeOperands);
break;
} else {
assert(((OpFlag & 7) == 4) && "Unknown matching constraint!");
@@ -5546,11 +5541,10 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
}
OpInfo.AssignedRegs.getCopyToRegs(InOperandVal, DAG, getCurDebugLoc(),
- SDNodeOrder, Chain, &Flag);
+ Chain, &Flag);
OpInfo.AssignedRegs.AddInlineAsmOperands(1/*REGUSE*/, false, 0,
- DAG, SDNodeOrder,
- AsmNodeOperands);
+ DAG, AsmNodeOperands);
break;
}
case InlineAsm::isClobber: {
@@ -5558,7 +5552,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
// allocator is aware that the physreg got clobbered.
if (!OpInfo.AssignedRegs.Regs.empty())
OpInfo.AssignedRegs.AddInlineAsmOperands(6 /* EARLYCLOBBER REGDEF */,
- false, 0, DAG, SDNodeOrder,
+ false, 0, DAG,
AsmNodeOperands);
break;
}
@@ -5578,7 +5572,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
// and set it as the value of the call.
if (!RetValRegs.Regs.empty()) {
SDValue Val = RetValRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
- SDNodeOrder, Chain, &Flag);
+ Chain, &Flag);
// FIXME: Why don't we do this for inline asms with MRVs?
if (CS.getType()->isSingleValueType() && CS.getType()->isSized()) {
@@ -5618,7 +5612,7 @@ void SelectionDAGBuilder::visitInlineAsm(CallSite CS) {
RegsForValue &OutRegs = IndirectStoresToEmit[i].first;
Value *Ptr = IndirectStoresToEmit[i].second;
SDValue OutVal = OutRegs.getCopyFromRegs(DAG, getCurDebugLoc(),
- SDNodeOrder, Chain, &Flag);
+ Chain, &Flag);
StoresToEmit.push_back(std::make_pair(OutVal, Ptr));
}
@@ -5683,8 +5677,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
CallingConv::ID CallConv, bool isTailCall,
bool isReturnValueUsed,
SDValue Callee,
- ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl,
- unsigned Order) {
+ ArgListTy &Args, SelectionDAG &DAG, DebugLoc dl) {
// Handle all of the outgoing arguments.
SmallVector<ISD::OutputArg, 32> Outs;
for (unsigned i = 0, e = Args.size(); i != e; ++i) {
@@ -5735,7 +5728,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
else if (Args[i].isZExt)
ExtendKind = ISD::ZERO_EXTEND;
- getCopyToParts(DAG, dl, Order, Op, &Parts[0], NumParts,
+ getCopyToParts(DAG, dl, Op, &Parts[0], NumParts,
PartVT, ExtendKind);
for (unsigned j = 0; j != NumParts; ++j) {
@@ -5814,7 +5807,7 @@ TargetLowering::LowerCallTo(SDValue Chain, const Type *RetTy,
EVT RegisterVT = getRegisterType(RetTy->getContext(), VT);
unsigned NumRegs = getNumRegisters(RetTy->getContext(), VT);
- ReturnValues.push_back(getCopyFromParts(DAG, dl, Order, &InVals[CurReg],
+ ReturnValues.push_back(getCopyFromParts(DAG, dl, &InVals[CurReg],
NumRegs, RegisterVT, VT,
AssertOp));
CurReg += NumRegs;
@@ -5854,7 +5847,7 @@ void SelectionDAGBuilder::CopyValueToVirtualRegister(Value *V, unsigned Reg) {
RegsForValue RFV(V->getContext(), TLI, Reg, V->getType());
SDValue Chain = DAG.getEntryNode();
- RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), SDNodeOrder, Chain, 0);
+ RFV.getCopyToRegs(Op, DAG, getCurDebugLoc(), Chain, 0);
PendingExports.push_back(Chain);
}
@@ -5980,7 +5973,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
EVT VT = ValueVTs[0];
EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT);
ISD::NodeType AssertOp = ISD::DELETED_NODE;
- SDValue ArgValue = getCopyFromParts(DAG, dl, 0, &InVals[0], 1,
+ SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1,
RegVT, VT, AssertOp);
MachineFunction& MF = SDB->DAG.getMachineFunction();
@@ -6014,7 +6007,7 @@ void SelectionDAGISel::LowerArguments(BasicBlock *LLVMBB) {
else if (F.paramHasAttr(Idx, Attribute::ZExt))
AssertOp = ISD::AssertZext;
- ArgValues.push_back(getCopyFromParts(DAG, dl, 0, &InVals[i],
+ ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i],
NumParts, PartVT, VT,
AssertOp));
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index eead526..2e2020d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -723,9 +723,9 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// code to the MachineBasicBlock.
if (TimePassesIsEnabled) {
NamedRegionTimer T("Instruction Selection", GroupName);
- InstructionSelect();
+ DoInstructionSelection();
} else {
- InstructionSelect();
+ DoInstructionSelection();
}
DEBUG(dbgs() << "Selected selection DAG:\n");
@@ -765,6 +765,66 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
DEBUG(BB->dump());
}
+void SelectionDAGISel::DoInstructionSelection() {
+ DEBUG(errs() << "===== Instruction selection begins:\n");
+
+ PreprocessISelDAG();
+
+ // Select target instructions for the DAG.
+ {
+ // Number all nodes with a topological order and set DAGSize.
+ DAGSize = CurDAG->AssignTopologicalOrder();
+
+ // Create a dummy node (which is not added to allnodes), that adds
+ // a reference to the root node, preventing it from being deleted,
+ // and tracking any changes of the root.
+ HandleSDNode Dummy(CurDAG->getRoot());
+ ISelPosition = SelectionDAG::allnodes_iterator(CurDAG->getRoot().getNode());
+ ++ISelPosition;
+
+ // The AllNodes list is now topological-sorted. Visit the
+ // nodes by starting at the end of the list (the root of the
+ // graph) and preceding back toward the beginning (the entry
+ // node).
+ while (ISelPosition != CurDAG->allnodes_begin()) {
+ SDNode *Node = --ISelPosition;
+ // Skip dead nodes. DAGCombiner is expected to eliminate all dead nodes,
+ // but there are currently some corner cases that it misses. Also, this
+ // makes it theoretically possible to disable the DAGCombiner.
+ if (Node->use_empty())
+ continue;
+
+ SDNode *ResNode = Select(Node);
+
+ // FIXME: This is pretty gross. 'Select' should be changed to not return
+ // anything at all and this code should be nuked with a tactical strike.
+
+ // If node should not be replaced, continue with the next one.
+ if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE)
+ continue;
+ // Replace node.
+ if (ResNode)
+ ReplaceUses(Node, ResNode);
+
+ // If after the replacement this node is not used any more,
+ // remove this dead node.
+ if (Node->use_empty()) { // Don't delete EntryToken, etc.
+ ISelUpdater ISU(ISelPosition);
+ CurDAG->RemoveDeadNode(Node, &ISU);
+ }
+ }
+
+ CurDAG->setRoot(Dummy.getValue());
+ }
+ DEBUG(errs() << "===== Instruction selection ends:\n");
+
+ PostprocessISelDAG();
+
+ // FIXME: This shouldn't be needed, remove it.
+ CurDAG->RemoveDeadNodes();
+}
+
+
void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
MachineFunction &MF,
MachineModuleInfo *MMI,
@@ -1316,13 +1376,29 @@ static SDNode *findFlagUse(SDNode *N) {
/// This function recursively traverses up the operand chain, ignoring
/// certain nodes.
static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
- SDNode *Root,
- SmallPtrSet<SDNode*, 16> &Visited) {
- if (Use->getNodeId() < Def->getNodeId() ||
- !Visited.insert(Use))
+ SDNode *Root, SmallPtrSet<SDNode*, 16> &Visited,
+ bool IgnoreChains) {
+ // The NodeID's are given uniques ID's where a node ID is guaranteed to be
+ // greater than all of its (recursive) operands. If we scan to a point where
+ // 'use' is smaller than the node we're scanning for, then we know we will
+ // never find it.
+ //
+ // The Use may be -1 (unassigned) if it is a newly allocated node. This can
+ // happen because we scan down to newly selected nodes in the case of flag
+ // uses.
+ if ((Use->getNodeId() < Def->getNodeId() && Use->getNodeId() != -1))
+ return false;
+
+ // Don't revisit nodes if we already scanned it and didn't fail, we know we
+ // won't fail if we scan it again.
+ if (!Visited.insert(Use))
return false;
for (unsigned i = 0, e = Use->getNumOperands(); i != e; ++i) {
+ // Ignore chain uses, they are validated by HandleMergeInputChains.
+ if (Use->getOperand(i).getValueType() == MVT::Other && IgnoreChains)
+ continue;
+
SDNode *N = Use->getOperand(i).getNode();
if (N == Def) {
if (Use == ImmedUse || Use == Root)
@@ -1332,7 +1408,7 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
}
// Traverse up the operand chain.
- if (findNonImmUse(N, Def, ImmedUse, Root, Visited))
+ if (findNonImmUse(N, Def, ImmedUse, Root, Visited, IgnoreChains))
return true;
}
return false;
@@ -1347,9 +1423,10 @@ static bool findNonImmUse(SDNode *Use, SDNode* Def, SDNode *ImmedUse,
/// have one non-chain use, we only need to watch out for load/op/store
/// and load/op/cmp case where the root (store / cmp) may reach the load via
/// its chain operand.
-static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse) {
+static inline bool isNonImmUse(SDNode *Root, SDNode *Def, SDNode *ImmedUse,
+ bool IgnoreChains) {
SmallPtrSet<SDNode*, 16> Visited;
- return findNonImmUse(Root, Def, ImmedUse, Root, Visited);
+ return findNonImmUse(Root, Def, ImmedUse, Root, Visited, IgnoreChains);
}
/// IsProfitableToFold - Returns true if it's profitable to fold the specific
@@ -1362,7 +1439,8 @@ bool SelectionDAGISel::IsProfitableToFold(SDValue N, SDNode *U,
/// IsLegalToFold - Returns true if the specific operand node N of
/// U can be folded during instruction selection that starts at Root.
-bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const {
+bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root,
+ bool IgnoreChains) const {
if (OptLevel == CodeGenOpt::None) return false;
// If Root use can somehow reach N through a path that that doesn't contain
@@ -1416,7 +1494,7 @@ bool SelectionDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const {
VT = Root->getValueType(Root->getNumValues()-1);
}
- return !isNonImmUse(Root, N.getNode(), U);
+ return !isNonImmUse(Root, N.getNode(), U, IgnoreChains);
}
SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
@@ -1428,6 +1506,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) {
VTs.push_back(MVT::Flag);
SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(),
VTs, &Ops[0], Ops.size());
+ New->setNodeId(-1);
return New.getNode();
}
@@ -1443,7 +1522,1200 @@ SDNode *SelectionDAGISel::Select_EH_LABEL(SDNode *N) {
MVT::Other, Tmp, Chain);
}
+/// GetVBR - decode a vbr encoding whose top bit is set.
+ALWAYS_INLINE static uint64_t
+GetVBR(uint64_t Val, const unsigned char *MatcherTable, unsigned &Idx) {
+ assert(Val >= 128 && "Not a VBR");
+ Val &= 127; // Remove first vbr bit.
+
+ unsigned Shift = 7;
+ uint64_t NextBits;
+ do {
+ NextBits = MatcherTable[Idx++];
+ Val |= (NextBits&127) << Shift;
+ Shift += 7;
+ } while (NextBits & 128);
+
+ return Val;
+}
+
+
+/// UpdateChainsAndFlags - When a match is complete, this method updates uses of
+/// interior flag and chain results to use the new flag and chain results.
+void SelectionDAGISel::
+UpdateChainsAndFlags(SDNode *NodeToMatch, SDValue InputChain,
+ const SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SDValue InputFlag,
+ const SmallVectorImpl<SDNode*> &FlagResultNodesMatched,
+ bool isMorphNodeTo) {
+ SmallVector<SDNode*, 4> NowDeadNodes;
+
+ ISelUpdater ISU(ISelPosition);
+
+ // Now that all the normal results are replaced, we replace the chain and
+ // flag results if present.
+ if (!ChainNodesMatched.empty()) {
+ assert(InputChain.getNode() != 0 &&
+ "Matched input chains but didn't produce a chain");
+ // Loop over all of the nodes we matched that produced a chain result.
+ // Replace all the chain results with the final chain we ended up with.
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ SDNode *ChainNode = ChainNodesMatched[i];
+
+ // If this node was already deleted, don't look at it.
+ if (ChainNode->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ // Don't replace the results of the root node if we're doing a
+ // MorphNodeTo.
+ if (ChainNode == NodeToMatch && isMorphNodeTo)
+ continue;
+
+ SDValue ChainVal = SDValue(ChainNode, ChainNode->getNumValues()-1);
+ if (ChainVal.getValueType() == MVT::Flag)
+ ChainVal = ChainVal.getValue(ChainVal->getNumValues()-2);
+ assert(ChainVal.getValueType() == MVT::Other && "Not a chain?");
+ CurDAG->ReplaceAllUsesOfValueWith(ChainVal, InputChain, &ISU);
+
+ // If the node became dead, delete it.
+ if (ChainNode->use_empty())
+ NowDeadNodes.push_back(ChainNode);
+ }
+ }
+
+ // If the result produces a flag, update any flag results in the matched
+ // pattern with the flag result.
+ if (InputFlag.getNode() != 0) {
+ // Handle any interior nodes explicitly marked.
+ for (unsigned i = 0, e = FlagResultNodesMatched.size(); i != e; ++i) {
+ SDNode *FRN = FlagResultNodesMatched[i];
+
+ // If this node was already deleted, don't look at it.
+ if (FRN->getOpcode() == ISD::DELETED_NODE)
+ continue;
+
+ assert(FRN->getValueType(FRN->getNumValues()-1) == MVT::Flag &&
+ "Doesn't have a flag result");
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(FRN, FRN->getNumValues()-1),
+ InputFlag, &ISU);
+
+ // If the node became dead, delete it.
+ if (FRN->use_empty())
+ NowDeadNodes.push_back(FRN);
+ }
+ }
+
+ if (!NowDeadNodes.empty())
+ CurDAG->RemoveDeadNodes(NowDeadNodes, &ISU);
+
+ DEBUG(errs() << "ISEL: Match complete!\n");
+}
+
+enum ChainResult {
+ CR_Simple,
+ CR_InducesCycle,
+ CR_LeadsToInteriorNode
+};
+
+/// WalkChainUsers - Walk down the users of the specified chained node that is
+/// part of the pattern we're matching, looking at all of the users we find.
+/// This determines whether something is an interior node, whether we have a
+/// non-pattern node in between two pattern nodes (which prevent folding because
+/// it would induce a cycle) and whether we have a TokenFactor node sandwiched
+/// between pattern nodes (in which case the TF becomes part of the pattern).
+///
+/// The walk we do here is guaranteed to be small because we quickly get down to
+/// already selected nodes "below" us.
+static ChainResult
+WalkChainUsers(SDNode *ChainedNode,
+ SmallVectorImpl<SDNode*> &ChainedNodesInPattern,
+ SmallVectorImpl<SDNode*> &InteriorChainedNodes) {
+ ChainResult Result = CR_Simple;
+
+ for (SDNode::use_iterator UI = ChainedNode->use_begin(),
+ E = ChainedNode->use_end(); UI != E; ++UI) {
+ // Make sure the use is of the chain, not some other value we produce.
+ if (UI.getUse().getValueType() != MVT::Other) continue;
+
+ SDNode *User = *UI;
+
+ // If we see an already-selected machine node, then we've gone beyond the
+ // pattern that we're selecting down into the already selected chunk of the
+ // DAG.
+ if (User->isMachineOpcode() ||
+ User->getOpcode() == ISD::HANDLENODE) // Root of the graph.
+ continue;
+
+ if (User->getOpcode() == ISD::CopyToReg ||
+ User->getOpcode() == ISD::CopyFromReg ||
+ User->getOpcode() == ISD::INLINEASM) {
+ // If their node ID got reset to -1 then they've already been selected.
+ // Treat them like a MachineOpcode.
+ if (User->getNodeId() == -1)
+ continue;
+ }
+
+ // If we have a TokenFactor, we handle it specially.
+ if (User->getOpcode() != ISD::TokenFactor) {
+ // If the node isn't a token factor and isn't part of our pattern, then it
+ // must be a random chained node in between two nodes we're selecting.
+ // This happens when we have something like:
+ // x = load ptr
+ // call
+ // y = x+4
+ // store y -> ptr
+ // Because we structurally match the load/store as a read/modify/write,
+ // but the call is chained between them. We cannot fold in this case
+ // because it would induce a cycle in the graph.
+ if (!std::count(ChainedNodesInPattern.begin(),
+ ChainedNodesInPattern.end(), User))
+ return CR_InducesCycle;
+
+ // Otherwise we found a node that is part of our pattern. For example in:
+ // x = load ptr
+ // y = x+4
+ // store y -> ptr
+ // This would happen when we're scanning down from the load and see the
+ // store as a user. Record that there is a use of ChainedNode that is
+ // part of the pattern and keep scanning uses.
+ Result = CR_LeadsToInteriorNode;
+ InteriorChainedNodes.push_back(User);
+ continue;
+ }
+
+ // If we found a TokenFactor, there are two cases to consider: first if the
+ // TokenFactor is just hanging "below" the pattern we're matching (i.e. no
+ // uses of the TF are in our pattern) we just want to ignore it. Second,
+ // the TokenFactor can be sandwiched in between two chained nodes, like so:
+ // [Load chain]
+ // ^
+ // |
+ // [Load]
+ // ^ ^
+ // | \ DAG's like cheese
+ // / \ do you?
+ // / |
+ // [TokenFactor] [Op]
+ // ^ ^
+ // | |
+ // \ /
+ // \ /
+ // [Store]
+ //
+ // In this case, the TokenFactor becomes part of our match and we rewrite it
+ // as a new TokenFactor.
+ //
+ // To distinguish these two cases, do a recursive walk down the uses.
+ switch (WalkChainUsers(User, ChainedNodesInPattern, InteriorChainedNodes)) {
+ case CR_Simple:
+ // If the uses of the TokenFactor are just already-selected nodes, ignore
+ // it, it is "below" our pattern.
+ continue;
+ case CR_InducesCycle:
+ // If the uses of the TokenFactor lead to nodes that are not part of our
+ // pattern that are not selected, folding would turn this into a cycle,
+ // bail out now.
+ return CR_InducesCycle;
+ case CR_LeadsToInteriorNode:
+ break; // Otherwise, keep processing.
+ }
+
+ // Okay, we know we're in the interesting interior case. The TokenFactor
+ // is now going to be considered part of the pattern so that we rewrite its
+ // uses (it may have uses that are not part of the pattern) with the
+ // ultimate chain result of the generated code. We will also add its chain
+ // inputs as inputs to the ultimate TokenFactor we create.
+ Result = CR_LeadsToInteriorNode;
+ ChainedNodesInPattern.push_back(User);
+ InteriorChainedNodes.push_back(User);
+ continue;
+ }
+
+ return Result;
+}
+
+/// HandleMergeInputChains - This implements the OPC_EmitMergeInputChains
+/// operation for when the pattern matched at least one node with a chains. The
+/// input vector contains a list of all of the chained nodes that we match. We
+/// must determine if this is a valid thing to cover (i.e. matching it won't
+/// induce cycles in the DAG) and if so, creating a TokenFactor node. that will
+/// be used as the input node chain for the generated nodes.
+static SDValue
+HandleMergeInputChains(SmallVectorImpl<SDNode*> &ChainNodesMatched,
+ SelectionDAG *CurDAG) {
+ // Walk all of the chained nodes we've matched, recursively scanning down the
+ // users of the chain result. This adds any TokenFactor nodes that are caught
+ // in between chained nodes to the chained and interior nodes list.
+ SmallVector<SDNode*, 3> InteriorChainedNodes;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ if (WalkChainUsers(ChainNodesMatched[i], ChainNodesMatched,
+ InteriorChainedNodes) == CR_InducesCycle)
+ return SDValue(); // Would induce a cycle.
+ }
+
+ // Okay, we have walked all the matched nodes and collected TokenFactor nodes
+ // that we are interested in. Form our input TokenFactor node.
+ SmallVector<SDValue, 3> InputChains;
+ for (unsigned i = 0, e = ChainNodesMatched.size(); i != e; ++i) {
+ // Add the input chain of this node to the InputChains list (which will be
+ // the operands of the generated TokenFactor) if it's not an interior node.
+ SDNode *N = ChainNodesMatched[i];
+ if (N->getOpcode() != ISD::TokenFactor) {
+ if (std::count(InteriorChainedNodes.begin(),InteriorChainedNodes.end(),N))
+ continue;
+
+ // Otherwise, add the input chain.
+ SDValue InChain = ChainNodesMatched[i]->getOperand(0);
+ assert(InChain.getValueType() == MVT::Other && "Not a chain");
+ InputChains.push_back(InChain);
+ continue;
+ }
+
+ // If we have a token factor, we want to add all inputs of the token factor
+ // that are not part of the pattern we're matching.
+ for (unsigned op = 0, e = N->getNumOperands(); op != e; ++op) {
+ if (!std::count(ChainNodesMatched.begin(), ChainNodesMatched.end(),
+ N->getOperand(op).getNode()))
+ InputChains.push_back(N->getOperand(op));
+ }
+ }
+
+ SDValue Res;
+ if (InputChains.size() == 1)
+ return InputChains[0];
+ return CurDAG->getNode(ISD::TokenFactor, ChainNodesMatched[0]->getDebugLoc(),
+ MVT::Other, &InputChains[0], InputChains.size());
+}
+
+/// MorphNode - Handle morphing a node in place for the selector.
+SDNode *SelectionDAGISel::
+MorphNode(SDNode *Node, unsigned TargetOpc, SDVTList VTList,
+ const SDValue *Ops, unsigned NumOps, unsigned EmitNodeInfo) {
+ // It is possible we're using MorphNodeTo to replace a node with no
+ // normal results with one that has a normal result (or we could be
+ // adding a chain) and the input could have flags and chains as well.
+ // In this case we need to shifting the operands down.
+ // FIXME: This is a horrible hack and broken in obscure cases, no worse
+ // than the old isel though. We should sink this into MorphNodeTo.
+ int OldFlagResultNo = -1, OldChainResultNo = -1;
+
+ unsigned NTMNumResults = Node->getNumValues();
+ if (Node->getValueType(NTMNumResults-1) == MVT::Flag) {
+ OldFlagResultNo = NTMNumResults-1;
+ if (NTMNumResults != 1 &&
+ Node->getValueType(NTMNumResults-2) == MVT::Other)
+ OldChainResultNo = NTMNumResults-2;
+ } else if (Node->getValueType(NTMNumResults-1) == MVT::Other)
+ OldChainResultNo = NTMNumResults-1;
+
+ // Call the underlying SelectionDAG routine to do the transmogrification. Note
+ // that this deletes operands of the old node that become dead.
+ SDNode *Res = CurDAG->MorphNodeTo(Node, ~TargetOpc, VTList, Ops, NumOps);
+
+ // MorphNodeTo can operate in two ways: if an existing node with the
+ // specified operands exists, it can just return it. Otherwise, it
+ // updates the node in place to have the requested operands.
+ if (Res == Node) {
+ // If we updated the node in place, reset the node ID. To the isel,
+ // this should be just like a newly allocated machine node.
+ Res->setNodeId(-1);
+ }
+
+ unsigned ResNumResults = Res->getNumValues();
+ // Move the flag if needed.
+ if ((EmitNodeInfo & OPFL_FlagOutput) && OldFlagResultNo != -1 &&
+ (unsigned)OldFlagResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldFlagResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ if ((EmitNodeInfo & OPFL_FlagOutput) != 0)
+ --ResNumResults;
+
+ // Move the chain reference if needed.
+ if ((EmitNodeInfo & OPFL_Chain) && OldChainResultNo != -1 &&
+ (unsigned)OldChainResultNo != ResNumResults-1)
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(Node, OldChainResultNo),
+ SDValue(Res, ResNumResults-1));
+
+ // Otherwise, no replacement happened because the node already exists. Replace
+ // Uses of the old node with the new one.
+ if (Res != Node)
+ CurDAG->ReplaceAllUsesWith(Node, Res);
+
+ return Res;
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+ALWAYS_INLINE static bool
+CheckSame(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const SmallVectorImpl<SDValue> &RecordedNodes) {
+ // Accept if it is exactly the same as a previously recorded node.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ return N == RecordedNodes[RecNo];
+}
+
+/// CheckPatternPredicate - Implements OP_CheckPatternPredicate.
+ALWAYS_INLINE static bool
+CheckPatternPredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SelectionDAGISel &SDISel) {
+ return SDISel.CheckPatternPredicate(MatcherTable[MatcherIndex++]);
+}
+
+/// CheckNodePredicate - Implements OP_CheckNodePredicate.
+ALWAYS_INLINE static bool
+CheckNodePredicate(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SelectionDAGISel &SDISel, SDNode *N) {
+ return SDISel.CheckNodePredicate(N, MatcherTable[MatcherIndex++]);
+}
+
+ALWAYS_INLINE static bool
+CheckOpcode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDNode *N) {
+ return N->getOpcode() == MatcherTable[MatcherIndex++];
+}
+
+ALWAYS_INLINE static bool
+CheckType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (N.getValueType() == VT) return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && N.getValueType() == TLI.getPointerTy();
+}
+
+ALWAYS_INLINE static bool
+CheckChildType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI,
+ unsigned ChildNo) {
+ if (ChildNo >= N.getNumOperands())
+ return false; // Match fails if out of range child #.
+ return ::CheckType(MatcherTable, MatcherIndex, N.getOperand(ChildNo), TLI);
+}
+
+
+ALWAYS_INLINE static bool
+CheckCondCode(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ return cast<CondCodeSDNode>(N)->get() ==
+ (ISD::CondCode)MatcherTable[MatcherIndex++];
+}
+
+ALWAYS_INLINE static bool
+CheckValueType(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, const TargetLowering &TLI) {
+ MVT::SimpleValueType VT = (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (cast<VTSDNode>(N)->getVT() == VT)
+ return true;
+
+ // Handle the case when VT is iPTR.
+ return VT == MVT::iPTR && cast<VTSDNode>(N)->getVT() == TLI.getPointerTy();
+}
+
+ALWAYS_INLINE static bool
+CheckInteger(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N);
+ return C != 0 && C->getSExtValue() == Val;
+}
+
+ALWAYS_INLINE static bool
+CheckAndImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::AND) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C != 0 && SDISel.CheckAndMask(N.getOperand(0), C, Val);
+}
+
+ALWAYS_INLINE static bool
+CheckOrImm(const unsigned char *MatcherTable, unsigned &MatcherIndex,
+ SDValue N, SelectionDAGISel &SDISel) {
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+
+ if (N->getOpcode() != ISD::OR) return false;
+
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(N->getOperand(1));
+ return C != 0 && SDISel.CheckOrMask(N.getOperand(0), C, Val);
+}
+
+/// IsPredicateKnownToFail - If we know how and can do so without pushing a
+/// scope, evaluate the current node. If the current predicate is known to
+/// fail, set Result=true and return anything. If the current predicate is
+/// known to pass, set Result=false and return the MatcherIndex to continue
+/// with. If the current predicate is unknown, set Result=false and return the
+/// MatcherIndex to continue with.
+static unsigned IsPredicateKnownToFail(const unsigned char *Table,
+ unsigned Index, SDValue N,
+ bool &Result, SelectionDAGISel &SDISel,
+ SmallVectorImpl<SDValue> &RecordedNodes){
+ switch (Table[Index++]) {
+ default:
+ Result = false;
+ return Index-1; // Could not evaluate this predicate.
+ case SelectionDAGISel::OPC_CheckSame:
+ Result = !::CheckSame(Table, Index, N, RecordedNodes);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPatternPredicate:
+ Result = !::CheckPatternPredicate(Table, Index, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckPredicate:
+ Result = !::CheckNodePredicate(Table, Index, SDISel, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckOpcode:
+ Result = !::CheckOpcode(Table, Index, N.getNode());
+ return Index;
+ case SelectionDAGISel::OPC_CheckType:
+ Result = !::CheckType(Table, Index, N, SDISel.TLI);
+ return Index;
+ case SelectionDAGISel::OPC_CheckChild0Type:
+ case SelectionDAGISel::OPC_CheckChild1Type:
+ case SelectionDAGISel::OPC_CheckChild2Type:
+ case SelectionDAGISel::OPC_CheckChild3Type:
+ case SelectionDAGISel::OPC_CheckChild4Type:
+ case SelectionDAGISel::OPC_CheckChild5Type:
+ case SelectionDAGISel::OPC_CheckChild6Type:
+ case SelectionDAGISel::OPC_CheckChild7Type:
+ Result = !::CheckChildType(Table, Index, N, SDISel.TLI,
+ Table[Index-1] - SelectionDAGISel::OPC_CheckChild0Type);
+ return Index;
+ case SelectionDAGISel::OPC_CheckCondCode:
+ Result = !::CheckCondCode(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckValueType:
+ Result = !::CheckValueType(Table, Index, N, SDISel.TLI);
+ return Index;
+ case SelectionDAGISel::OPC_CheckInteger:
+ Result = !::CheckInteger(Table, Index, N);
+ return Index;
+ case SelectionDAGISel::OPC_CheckAndImm:
+ Result = !::CheckAndImm(Table, Index, N, SDISel);
+ return Index;
+ case SelectionDAGISel::OPC_CheckOrImm:
+ Result = !::CheckOrImm(Table, Index, N, SDISel);
+ return Index;
+ }
+}
+
+
+struct MatchScope {
+ /// FailIndex - If this match fails, this is the index to continue with.
+ unsigned FailIndex;
+
+ /// NodeStack - The node stack when the scope was formed.
+ SmallVector<SDValue, 4> NodeStack;
+
+ /// NumRecordedNodes - The number of recorded nodes when the scope was formed.
+ unsigned NumRecordedNodes;
+
+ /// NumMatchedMemRefs - The number of matched memref entries.
+ unsigned NumMatchedMemRefs;
+
+ /// InputChain/InputFlag - The current chain/flag
+ SDValue InputChain, InputFlag;
+
+ /// HasChainNodesMatched - True if the ChainNodesMatched list is non-empty.
+ bool HasChainNodesMatched, HasFlagResultNodesMatched;
+};
+
+SDNode *SelectionDAGISel::
+SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable,
+ unsigned TableSize) {
+ // FIXME: Should these even be selected? Handle these cases in the caller?
+ switch (NodeToMatch->getOpcode()) {
+ default:
+ break;
+ case ISD::EntryToken: // These nodes remain the same.
+ case ISD::BasicBlock:
+ case ISD::Register:
+ case ISD::HANDLENODE:
+ case ISD::TargetConstant:
+ case ISD::TargetConstantFP:
+ case ISD::TargetConstantPool:
+ case ISD::TargetFrameIndex:
+ case ISD::TargetExternalSymbol:
+ case ISD::TargetBlockAddress:
+ case ISD::TargetJumpTable:
+ case ISD::TargetGlobalTLSAddress:
+ case ISD::TargetGlobalAddress:
+ case ISD::TokenFactor:
+ case ISD::CopyFromReg:
+ case ISD::CopyToReg:
+ NodeToMatch->setNodeId(-1); // Mark selected.
+ return 0;
+ case ISD::AssertSext:
+ case ISD::AssertZext:
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, 0),
+ NodeToMatch->getOperand(0));
+ return 0;
+ case ISD::INLINEASM: return Select_INLINEASM(NodeToMatch);
+ case ISD::EH_LABEL: return Select_EH_LABEL(NodeToMatch);
+ case ISD::UNDEF: return Select_UNDEF(NodeToMatch);
+ }
+
+ assert(!NodeToMatch->isMachineOpcode() && "Node already selected!");
+
+ // Set up the node stack with NodeToMatch as the only node on the stack.
+ SmallVector<SDValue, 8> NodeStack;
+ SDValue N = SDValue(NodeToMatch, 0);
+ NodeStack.push_back(N);
+
+ // MatchScopes - Scopes used when matching, if a match failure happens, this
+ // indicates where to continue checking.
+ SmallVector<MatchScope, 8> MatchScopes;
+
+ // RecordedNodes - This is the set of nodes that have been recorded by the
+ // state machine.
+ SmallVector<SDValue, 8> RecordedNodes;
+
+ // MatchedMemRefs - This is the set of MemRef's we've seen in the input
+ // pattern.
+ SmallVector<MachineMemOperand*, 2> MatchedMemRefs;
+
+ // These are the current input chain and flag for use when generating nodes.
+ // Various Emit operations change these. For example, emitting a copytoreg
+ // uses and updates these.
+ SDValue InputChain, InputFlag;
+
+ // ChainNodesMatched - If a pattern matches nodes that have input/output
+ // chains, the OPC_EmitMergeInputChains operation is emitted which indicates
+ // which ones they are. The result is captured into this list so that we can
+ // update the chain results when the pattern is complete.
+ SmallVector<SDNode*, 3> ChainNodesMatched;
+ SmallVector<SDNode*, 3> FlagResultNodesMatched;
+
+ DEBUG(errs() << "ISEL: Starting pattern match on root node: ";
+ NodeToMatch->dump(CurDAG);
+ errs() << '\n');
+
+ // Determine where to start the interpreter. Normally we start at opcode #0,
+ // but if the state machine starts with an OPC_SwitchOpcode, then we
+ // accelerate the first lookup (which is guaranteed to be hot) with the
+ // OpcodeOffset table.
+ unsigned MatcherIndex = 0;
+
+ if (!OpcodeOffset.empty()) {
+ // Already computed the OpcodeOffset table, just index into it.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ DEBUG(errs() << " Initial Opcode index to " << MatcherIndex << "\n");
+
+ } else if (MatcherTable[0] == OPC_SwitchOpcode) {
+ // Otherwise, the table isn't computed, but the state machine does start
+ // with an OPC_SwitchOpcode instruction. Populate the table now, since this
+ // is the first time we're selecting an instruction.
+ unsigned Idx = 1;
+ while (1) {
+ // Get the size of this case.
+ unsigned CaseSize = MatcherTable[Idx++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, Idx);
+ if (CaseSize == 0) break;
+
+ // Get the opcode, add the index to the table.
+ unsigned Opc = MatcherTable[Idx++];
+ if (Opc >= OpcodeOffset.size())
+ OpcodeOffset.resize((Opc+1)*2);
+ OpcodeOffset[Opc] = Idx;
+ Idx += CaseSize;
+ }
+
+ // Okay, do the lookup for the first opcode.
+ if (N.getOpcode() < OpcodeOffset.size())
+ MatcherIndex = OpcodeOffset[N.getOpcode()];
+ }
+
+ while (1) {
+ assert(MatcherIndex < TableSize && "Invalid index");
+ BuiltinOpcodes Opcode = (BuiltinOpcodes)MatcherTable[MatcherIndex++];
+ switch (Opcode) {
+ case OPC_Scope: {
+ // Okay, the semantics of this operation are that we should push a scope
+ // then evaluate the first child. However, pushing a scope only to have
+ // the first check fail (which then pops it) is inefficient. If we can
+ // determine immediately that the first check (or first several) will
+ // immediately fail, don't even bother pushing a scope for them.
+ unsigned FailIndex;
+
+ while (1) {
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+ // Found the end of the scope with no match.
+ if (NumToSkip == 0) {
+ FailIndex = 0;
+ break;
+ }
+
+ FailIndex = MatcherIndex+NumToSkip;
+
+ // If we can't evaluate this predicate without pushing a scope (e.g. if
+ // it is a 'MoveParent') or if the predicate succeeds on this node, we
+ // push the scope and evaluate the full predicate chain.
+ bool Result;
+ MatcherIndex = IsPredicateKnownToFail(MatcherTable, MatcherIndex, N,
+ Result, *this, RecordedNodes);
+ if (!Result)
+ break;
+
+ DEBUG(errs() << " Skipped scope entry at index " << MatcherIndex
+ << " continuing at " << FailIndex << "\n");
+
+
+ // Otherwise, we know that this case of the Scope is guaranteed to fail,
+ // move to the next case.
+ MatcherIndex = FailIndex;
+ }
+
+ // If the whole scope failed to match, bail.
+ if (FailIndex == 0) break;
+
+ // Push a MatchScope which indicates where to go if the first child fails
+ // to match.
+ MatchScope NewEntry;
+ NewEntry.FailIndex = FailIndex;
+ NewEntry.NodeStack.append(NodeStack.begin(), NodeStack.end());
+ NewEntry.NumRecordedNodes = RecordedNodes.size();
+ NewEntry.NumMatchedMemRefs = MatchedMemRefs.size();
+ NewEntry.InputChain = InputChain;
+ NewEntry.InputFlag = InputFlag;
+ NewEntry.HasChainNodesMatched = !ChainNodesMatched.empty();
+ NewEntry.HasFlagResultNodesMatched = !FlagResultNodesMatched.empty();
+ MatchScopes.push_back(NewEntry);
+ continue;
+ }
+ case OPC_RecordNode:
+ // Remember this node, it may end up being an operand in the pattern.
+ RecordedNodes.push_back(N);
+ continue;
+
+ case OPC_RecordChild0: case OPC_RecordChild1:
+ case OPC_RecordChild2: case OPC_RecordChild3:
+ case OPC_RecordChild4: case OPC_RecordChild5:
+ case OPC_RecordChild6: case OPC_RecordChild7: {
+ unsigned ChildNo = Opcode-OPC_RecordChild0;
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+
+ RecordedNodes.push_back(N->getOperand(ChildNo));
+ continue;
+ }
+ case OPC_RecordMemRef:
+ MatchedMemRefs.push_back(cast<MemSDNode>(N)->getMemOperand());
+ continue;
+
+ case OPC_CaptureFlagInput:
+ // If the current node has an input flag, capture it in InputFlag.
+ if (N->getNumOperands() != 0 &&
+ N->getOperand(N->getNumOperands()-1).getValueType() == MVT::Flag)
+ InputFlag = N->getOperand(N->getNumOperands()-1);
+ continue;
+
+ case OPC_MoveChild: {
+ unsigned ChildNo = MatcherTable[MatcherIndex++];
+ if (ChildNo >= N.getNumOperands())
+ break; // Match fails if out of range child #.
+ N = N.getOperand(ChildNo);
+ NodeStack.push_back(N);
+ continue;
+ }
+
+ case OPC_MoveParent:
+ // Pop the current node off the NodeStack.
+ NodeStack.pop_back();
+ assert(!NodeStack.empty() && "Node stack imbalance!");
+ N = NodeStack.back();
+ continue;
+
+ case OPC_CheckSame:
+ if (!::CheckSame(MatcherTable, MatcherIndex, N, RecordedNodes)) break;
+ continue;
+ case OPC_CheckPatternPredicate:
+ if (!::CheckPatternPredicate(MatcherTable, MatcherIndex, *this)) break;
+ continue;
+ case OPC_CheckPredicate:
+ if (!::CheckNodePredicate(MatcherTable, MatcherIndex, *this,
+ N.getNode()))
+ break;
+ continue;
+ case OPC_CheckComplexPat:
+ if (!CheckComplexPattern(NodeToMatch, N,
+ MatcherTable[MatcherIndex++], RecordedNodes))
+ break;
+ continue;
+ case OPC_CheckOpcode:
+ if (!::CheckOpcode(MatcherTable, MatcherIndex, N.getNode())) break;
+ continue;
+
+ case OPC_CheckType:
+ if (!::CheckType(MatcherTable, MatcherIndex, N, TLI)) break;
+ continue;
+
+ case OPC_SwitchOpcode: {
+ unsigned CurNodeOpcode = N.getOpcode();
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ // If the opcode matches, then we will execute this case.
+ if (CurNodeOpcode == MatcherTable[MatcherIndex++])
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(errs() << " OpcodeSwitch from " << SwitchStart
+ << " to " << MatcherIndex << "\n");
+ continue;
+ }
+
+ case OPC_SwitchType: {
+ MVT::SimpleValueType CurNodeVT = N.getValueType().getSimpleVT().SimpleTy;
+ unsigned SwitchStart = MatcherIndex-1; (void)SwitchStart;
+ unsigned CaseSize;
+ while (1) {
+ // Get the size of this case.
+ CaseSize = MatcherTable[MatcherIndex++];
+ if (CaseSize & 128)
+ CaseSize = GetVBR(CaseSize, MatcherTable, MatcherIndex);
+ if (CaseSize == 0) break;
+
+ MVT::SimpleValueType CaseVT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (CaseVT == MVT::iPTR)
+ CaseVT = TLI.getPointerTy().SimpleTy;
+
+ // If the VT matches, then we will execute this case.
+ if (CurNodeVT == CaseVT)
+ break;
+
+ // Otherwise, skip over this case.
+ MatcherIndex += CaseSize;
+ }
+
+ // If no cases matched, bail out.
+ if (CaseSize == 0) break;
+
+ // Otherwise, execute the case we found.
+ DEBUG(errs() << " TypeSwitch[" << EVT(CurNodeVT).getEVTString()
+ << "] from " << SwitchStart << " to " << MatcherIndex<<'\n');
+ continue;
+ }
+ case OPC_CheckChild0Type: case OPC_CheckChild1Type:
+ case OPC_CheckChild2Type: case OPC_CheckChild3Type:
+ case OPC_CheckChild4Type: case OPC_CheckChild5Type:
+ case OPC_CheckChild6Type: case OPC_CheckChild7Type:
+ if (!::CheckChildType(MatcherTable, MatcherIndex, N, TLI,
+ Opcode-OPC_CheckChild0Type))
+ break;
+ continue;
+ case OPC_CheckCondCode:
+ if (!::CheckCondCode(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckValueType:
+ if (!::CheckValueType(MatcherTable, MatcherIndex, N, TLI)) break;
+ continue;
+ case OPC_CheckInteger:
+ if (!::CheckInteger(MatcherTable, MatcherIndex, N)) break;
+ continue;
+ case OPC_CheckAndImm:
+ if (!::CheckAndImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+ case OPC_CheckOrImm:
+ if (!::CheckOrImm(MatcherTable, MatcherIndex, N, *this)) break;
+ continue;
+
+ case OPC_CheckFoldableChainNode: {
+ assert(NodeStack.size() != 1 && "No parent node");
+ // Verify that all intermediate nodes between the root and this one have
+ // a single use.
+ bool HasMultipleUses = false;
+ for (unsigned i = 1, e = NodeStack.size()-1; i != e; ++i)
+ if (!NodeStack[i].hasOneUse()) {
+ HasMultipleUses = true;
+ break;
+ }
+ if (HasMultipleUses) break;
+
+ // Check to see that the target thinks this is profitable to fold and that
+ // we can fold it without inducing cycles in the graph.
+ if (!IsProfitableToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch) ||
+ !IsLegalToFold(N, NodeStack[NodeStack.size()-2].getNode(),
+ NodeToMatch, true/*We validate our own chains*/))
+ break;
+
+ continue;
+ }
+ case OPC_EmitInteger: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ int64_t Val = MatcherTable[MatcherIndex++];
+ if (Val & 128)
+ Val = GetVBR(Val, MatcherTable, MatcherIndex);
+ RecordedNodes.push_back(CurDAG->getTargetConstant(Val, VT));
+ continue;
+ }
+ case OPC_EmitRegister: {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ unsigned RegNo = MatcherTable[MatcherIndex++];
+ RecordedNodes.push_back(CurDAG->getRegister(RegNo, VT));
+ continue;
+ }
+
+ case OPC_EmitConvertToTarget: {
+ // Convert from IMM/FPIMM to target version.
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Imm = RecordedNodes[RecNo];
+
+ if (Imm->getOpcode() == ISD::Constant) {
+ int64_t Val = cast<ConstantSDNode>(Imm)->getZExtValue();
+ Imm = CurDAG->getTargetConstant(Val, Imm.getValueType());
+ } else if (Imm->getOpcode() == ISD::ConstantFP) {
+ const ConstantFP *Val=cast<ConstantFPSDNode>(Imm)->getConstantFPValue();
+ Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType());
+ }
+
+ RecordedNodes.push_back(Imm);
+ continue;
+ }
+
+ case OPC_EmitMergeInputChains: {
+ assert(InputChain.getNode() == 0 &&
+ "EmitMergeInputChains should be the first chain producing node");
+ // This node gets a list of nodes we matched in the input that have
+ // chains. We want to token factor all of the input chains to these nodes
+ // together. However, if any of the input chains is actually one of the
+ // nodes matched in this pattern, then we have an intra-match reference.
+ // Ignore these because the newly token factored chain should not refer to
+ // the old nodes.
+ unsigned NumChains = MatcherTable[MatcherIndex++];
+ assert(NumChains != 0 && "Can't TF zero chains");
+
+ assert(ChainNodesMatched.empty() &&
+ "Should only have one EmitMergeInputChains per match");
+
+ // Read all of the chained nodes.
+ for (unsigned i = 0; i != NumChains; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ ChainNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+
+ // FIXME: What if other value results of the node have uses not matched
+ // by this pattern?
+ if (ChainNodesMatched.back() != NodeToMatch &&
+ !RecordedNodes[RecNo].hasOneUse()) {
+ ChainNodesMatched.clear();
+ break;
+ }
+ }
+
+ // If the inner loop broke out, the match fails.
+ if (ChainNodesMatched.empty())
+ break;
+
+ // Merge the input chains if they are not intra-pattern references.
+ InputChain = HandleMergeInputChains(ChainNodesMatched, CurDAG);
+
+ if (InputChain.getNode() == 0)
+ break; // Failed to merge.
+
+ continue;
+ }
+
+ case OPC_EmitCopyToReg: {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ unsigned DestPhysReg = MatcherTable[MatcherIndex++];
+
+ if (InputChain.getNode() == 0)
+ InputChain = CurDAG->getEntryNode();
+
+ InputChain = CurDAG->getCopyToReg(InputChain, NodeToMatch->getDebugLoc(),
+ DestPhysReg, RecordedNodes[RecNo],
+ InputFlag);
+
+ InputFlag = InputChain.getValue(1);
+ continue;
+ }
+
+ case OPC_EmitNodeXForm: {
+ unsigned XFormNo = MatcherTable[MatcherIndex++];
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ RecordedNodes.push_back(RunSDNodeXForm(RecordedNodes[RecNo], XFormNo));
+ continue;
+ }
+
+ case OPC_EmitNode:
+ case OPC_MorphNodeTo: {
+ uint16_t TargetOpc = MatcherTable[MatcherIndex++];
+ TargetOpc |= (unsigned short)MatcherTable[MatcherIndex++] << 8;
+ unsigned EmitNodeInfo = MatcherTable[MatcherIndex++];
+ // Get the result VT list.
+ unsigned NumVTs = MatcherTable[MatcherIndex++];
+ SmallVector<EVT, 4> VTs;
+ for (unsigned i = 0; i != NumVTs; ++i) {
+ MVT::SimpleValueType VT =
+ (MVT::SimpleValueType)MatcherTable[MatcherIndex++];
+ if (VT == MVT::iPTR) VT = TLI.getPointerTy().SimpleTy;
+ VTs.push_back(VT);
+ }
+
+ if (EmitNodeInfo & OPFL_Chain)
+ VTs.push_back(MVT::Other);
+ if (EmitNodeInfo & OPFL_FlagOutput)
+ VTs.push_back(MVT::Flag);
+
+ // This is hot code, so optimize the two most common cases of 1 and 2
+ // results.
+ SDVTList VTList;
+ if (VTs.size() == 1)
+ VTList = CurDAG->getVTList(VTs[0]);
+ else if (VTs.size() == 2)
+ VTList = CurDAG->getVTList(VTs[0], VTs[1]);
+ else
+ VTList = CurDAG->getVTList(VTs.data(), VTs.size());
+
+ // Get the operand list.
+ unsigned NumOps = MatcherTable[MatcherIndex++];
+ SmallVector<SDValue, 8> Ops;
+ for (unsigned i = 0; i != NumOps; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid EmitNode");
+ Ops.push_back(RecordedNodes[RecNo]);
+ }
+
+ // If there are variadic operands to add, handle them now.
+ if (EmitNodeInfo & OPFL_VariadicInfo) {
+ // Determine the start index to copy from.
+ unsigned FirstOpToCopy = getNumFixedFromVariadicInfo(EmitNodeInfo);
+ FirstOpToCopy += (EmitNodeInfo & OPFL_Chain) ? 1 : 0;
+ assert(NodeToMatch->getNumOperands() >= FirstOpToCopy &&
+ "Invalid variadic node");
+ // Copy all of the variadic operands, not including a potential flag
+ // input.
+ for (unsigned i = FirstOpToCopy, e = NodeToMatch->getNumOperands();
+ i != e; ++i) {
+ SDValue V = NodeToMatch->getOperand(i);
+ if (V.getValueType() == MVT::Flag) break;
+ Ops.push_back(V);
+ }
+ }
+
+ // If this has chain/flag inputs, add them.
+ if (EmitNodeInfo & OPFL_Chain)
+ Ops.push_back(InputChain);
+ if ((EmitNodeInfo & OPFL_FlagInput) && InputFlag.getNode() != 0)
+ Ops.push_back(InputFlag);
+
+ // Create the node.
+ SDNode *Res = 0;
+ if (Opcode != OPC_MorphNodeTo) {
+ // If this is a normal EmitNode command, just create the new node and
+ // add the results to the RecordedNodes list.
+ Res = CurDAG->getMachineNode(TargetOpc, NodeToMatch->getDebugLoc(),
+ VTList, Ops.data(), Ops.size());
+
+ // Add all the non-flag/non-chain results to the RecordedNodes list.
+ for (unsigned i = 0, e = VTs.size(); i != e; ++i) {
+ if (VTs[i] == MVT::Other || VTs[i] == MVT::Flag) break;
+ RecordedNodes.push_back(SDValue(Res, i));
+ }
+
+ } else {
+ Res = MorphNode(NodeToMatch, TargetOpc, VTList, Ops.data(), Ops.size(),
+ EmitNodeInfo);
+ }
+
+ // If the node had chain/flag results, update our notion of the current
+ // chain and flag.
+ if (EmitNodeInfo & OPFL_FlagOutput) {
+ InputFlag = SDValue(Res, VTs.size()-1);
+ if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-2);
+ } else if (EmitNodeInfo & OPFL_Chain)
+ InputChain = SDValue(Res, VTs.size()-1);
+
+ // If the OPFL_MemRefs flag is set on this node, slap all of the
+ // accumulated memrefs onto it.
+ //
+ // FIXME: This is vastly incorrect for patterns with multiple outputs
+ // instructions that access memory and for ComplexPatterns that match
+ // loads.
+ if (EmitNodeInfo & OPFL_MemRefs) {
+ MachineSDNode::mmo_iterator MemRefs =
+ MF->allocateMemRefsArray(MatchedMemRefs.size());
+ std::copy(MatchedMemRefs.begin(), MatchedMemRefs.end(), MemRefs);
+ cast<MachineSDNode>(Res)
+ ->setMemRefs(MemRefs, MemRefs + MatchedMemRefs.size());
+ }
+
+ DEBUG(errs() << " "
+ << (Opcode == OPC_MorphNodeTo ? "Morphed" : "Created")
+ << " node: "; Res->dump(CurDAG); errs() << "\n");
+
+ // If this was a MorphNodeTo then we're completely done!
+ if (Opcode == OPC_MorphNodeTo) {
+ // Update chain and flag uses.
+ UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
+ InputFlag, FlagResultNodesMatched, true);
+ return Res;
+ }
+
+ continue;
+ }
+
+ case OPC_MarkFlagResults: {
+ unsigned NumNodes = MatcherTable[MatcherIndex++];
+
+ // Read and remember all the flag-result nodes.
+ for (unsigned i = 0; i != NumNodes; ++i) {
+ unsigned RecNo = MatcherTable[MatcherIndex++];
+ if (RecNo & 128)
+ RecNo = GetVBR(RecNo, MatcherTable, MatcherIndex);
+
+ assert(RecNo < RecordedNodes.size() && "Invalid CheckSame");
+ FlagResultNodesMatched.push_back(RecordedNodes[RecNo].getNode());
+ }
+ continue;
+ }
+
+ case OPC_CompleteMatch: {
+ // The match has been completed, and any new nodes (if any) have been
+ // created. Patch up references to the matched dag to use the newly
+ // created nodes.
+ unsigned NumResults = MatcherTable[MatcherIndex++];
+
+ for (unsigned i = 0; i != NumResults; ++i) {
+ unsigned ResSlot = MatcherTable[MatcherIndex++];
+ if (ResSlot & 128)
+ ResSlot = GetVBR(ResSlot, MatcherTable, MatcherIndex);
+
+ assert(ResSlot < RecordedNodes.size() && "Invalid CheckSame");
+ SDValue Res = RecordedNodes[ResSlot];
+
+ // FIXME2: Eliminate this horrible hack by fixing the 'Gen' program
+ // after (parallel) on input patterns are removed. This would also
+ // allow us to stop encoding #results in OPC_CompleteMatch's table
+ // entry.
+ if (NodeToMatch->getNumValues() <= i ||
+ NodeToMatch->getValueType(i) == MVT::Other ||
+ NodeToMatch->getValueType(i) == MVT::Flag)
+ break;
+ assert((NodeToMatch->getValueType(i) == Res.getValueType() ||
+ NodeToMatch->getValueType(i) == MVT::iPTR ||
+ Res.getValueType() == MVT::iPTR ||
+ NodeToMatch->getValueType(i).getSizeInBits() ==
+ Res.getValueType().getSizeInBits()) &&
+ "invalid replacement");
+ CurDAG->ReplaceAllUsesOfValueWith(SDValue(NodeToMatch, i), Res);
+ }
+
+ // If the root node defines a flag, add it to the flag nodes to update
+ // list.
+ if (NodeToMatch->getValueType(NodeToMatch->getNumValues()-1) == MVT::Flag)
+ FlagResultNodesMatched.push_back(NodeToMatch);
+
+ // Update chain and flag uses.
+ UpdateChainsAndFlags(NodeToMatch, InputChain, ChainNodesMatched,
+ InputFlag, FlagResultNodesMatched, false);
+
+ assert(NodeToMatch->use_empty() &&
+ "Didn't replace all uses of the node?");
+
+ // FIXME: We just return here, which interacts correctly with SelectRoot
+ // above. We should fix this to not return an SDNode* anymore.
+ return 0;
+ }
+ }
+
+ // If the code reached this point, then the match failed. See if there is
+ // another child to try in the current 'Scope', otherwise pop it until we
+ // find a case to check.
+ while (1) {
+ if (MatchScopes.empty()) {
+ CannotYetSelect(NodeToMatch);
+ return 0;
+ }
+
+ // Restore the interpreter state back to the point where the scope was
+ // formed.
+ MatchScope &LastScope = MatchScopes.back();
+ RecordedNodes.resize(LastScope.NumRecordedNodes);
+ NodeStack.clear();
+ NodeStack.append(LastScope.NodeStack.begin(), LastScope.NodeStack.end());
+ N = NodeStack.back();
+
+ DEBUG(errs() << " Match failed at index " << MatcherIndex
+ << " continuing at " << LastScope.FailIndex << "\n");
+
+ if (LastScope.NumMatchedMemRefs != MatchedMemRefs.size())
+ MatchedMemRefs.resize(LastScope.NumMatchedMemRefs);
+ MatcherIndex = LastScope.FailIndex;
+
+ InputChain = LastScope.InputChain;
+ InputFlag = LastScope.InputFlag;
+ if (!LastScope.HasChainNodesMatched)
+ ChainNodesMatched.clear();
+ if (!LastScope.HasFlagResultNodesMatched)
+ FlagResultNodesMatched.clear();
+
+ // Check to see what the offset is at the new MatcherIndex. If it is zero
+ // we have reached the end of this scope, otherwise we have another child
+ // in the current scope to try.
+ unsigned NumToSkip = MatcherTable[MatcherIndex++];
+ if (NumToSkip & 128)
+ NumToSkip = GetVBR(NumToSkip, MatcherTable, MatcherIndex);
+
+ // If we have another child in this scope to match, update FailIndex and
+ // try it.
+ if (NumToSkip != 0) {
+ LastScope.FailIndex = MatcherIndex+NumToSkip;
+ break;
+ }
+
+ // End of this scope, pop it and try the next child in the containing
+ // scope.
+ MatchScopes.pop_back();
+ }
+ }
+}
+
+
+
void SelectionDAGISel::CannotYetSelect(SDNode *N) {
+ if (N->getOpcode() == ISD::INTRINSIC_W_CHAIN ||
+ N->getOpcode() == ISD::INTRINSIC_WO_CHAIN ||
+ N->getOpcode() == ISD::INTRINSIC_VOID)
+ return CannotYetSelectIntrinsic(N);
+
std::string msg;
raw_string_ostream Msg(msg);
Msg << "Cannot yet select: ";
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index e88af4f..0e54ca4 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -1441,8 +1441,10 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
case ISD::TRUNCATE: {
// Simplify the input, using demanded bit information, and compute the known
// zero/one bits live out.
+ unsigned OperandBitWidth =
+ Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
APInt TruncMask = NewMask;
- TruncMask.zext(Op.getOperand(0).getValueSizeInBits());
+ TruncMask.zext(OperandBitWidth);
if (SimplifyDemandedBits(Op.getOperand(0), TruncMask,
KnownZero, KnownOne, TLO, Depth+1))
return true;
@@ -1453,15 +1455,14 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// on the known demanded bits.
if (Op.getOperand(0).getNode()->hasOneUse()) {
SDValue In = Op.getOperand(0);
- unsigned InBitWidth = In.getValueSizeInBits();
switch (In.getOpcode()) {
default: break;
case ISD::SRL:
// Shrink SRL by a constant if none of the high bits shifted in are
// demanded.
if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(In.getOperand(1))){
- APInt HighBits = APInt::getHighBitsSet(InBitWidth,
- InBitWidth - BitWidth);
+ APInt HighBits = APInt::getHighBitsSet(OperandBitWidth,
+ OperandBitWidth - BitWidth);
HighBits = HighBits.lshr(ShAmt->getZExtValue());
HighBits.trunc(BitWidth);
@@ -1607,7 +1608,7 @@ static bool ValueHasExactlyOneBitSet(SDValue Val, const SelectionDAG &DAG) {
// Fall back to ComputeMaskedBits to catch other known cases.
EVT OpVT = Val.getValueType();
- unsigned BitWidth = OpVT.getSizeInBits();
+ unsigned BitWidth = OpVT.getScalarType().getSizeInBits();
APInt Mask = APInt::getAllOnesValue(BitWidth);
APInt KnownZero, KnownOne;
DAG.ComputeMaskedBits(Val, Mask, KnownZero, KnownOne);
@@ -1775,7 +1776,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
break; // todo, be more careful with signed comparisons
}
} else if (N0.getOpcode() == ISD::SIGN_EXTEND_INREG &&
- (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
+ (Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
EVT ExtSrcTy = cast<VTSDNode>(N0.getOperand(1))->getVT();
unsigned ExtSrcTyBits = ExtSrcTy.getSizeInBits();
EVT ExtDstTy = N0.getValueType();
@@ -1809,22 +1810,21 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
Cond);
} else if ((N1C->isNullValue() || N1C->getAPIntValue() == 1) &&
(Cond == ISD::SETEQ || Cond == ISD::SETNE)) {
-
// SETCC (SETCC), [0|1], [EQ|NE] -> SETCC
- if (N0.getOpcode() == ISD::SETCC) {
+ if (N0.getOpcode() == ISD::SETCC &&
+ isTypeLegal(VT) && VT.bitsLE(N0.getValueType())) {
bool TrueWhenTrue = (Cond == ISD::SETEQ) ^ (N1C->getAPIntValue() != 1);
if (TrueWhenTrue)
- return N0;
-
+ return DAG.getNode(ISD::TRUNCATE, dl, VT, N0);
// Invert the condition.
ISD::CondCode CC = cast<CondCodeSDNode>(N0.getOperand(2))->get();
CC = ISD::getSetCCInverse(CC,
N0.getOperand(0).getValueType().isInteger());
return DAG.getSetCC(dl, VT, N0.getOperand(0), N0.getOperand(1), CC);
}
-
+
if ((N0.getOpcode() == ISD::XOR ||
- (N0.getOpcode() == ISD::AND &&
+ (N0.getOpcode() == ISD::AND &&
N0.getOperand(0).getOpcode() == ISD::XOR &&
N0.getOperand(1) == N0.getOperand(0).getOperand(1))) &&
isa<ConstantSDNode>(N0.getOperand(1)) &&
@@ -1847,9 +1847,36 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1,
N0.getOperand(0).getOperand(0),
N0.getOperand(1));
}
+
return DAG.getSetCC(dl, VT, Val, N1,
Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
}
+ } else if (N1C->getAPIntValue() == 1 &&
+ (VT == MVT::i1 ||
+ getBooleanContents() == ZeroOrOneBooleanContent)) {
+ SDValue Op0 = N0;
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+
+ if ((Op0.getOpcode() == ISD::XOR) &&
+ Op0.getOperand(0).getOpcode() == ISD::SETCC &&
+ Op0.getOperand(1).getOpcode() == ISD::SETCC) {
+ // (xor (setcc), (setcc)) == / != 1 -> (setcc) != / == (setcc)
+ Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ;
+ return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1),
+ Cond);
+ } else if (Op0.getOpcode() == ISD::AND &&
+ isa<ConstantSDNode>(Op0.getOperand(1)) &&
+ cast<ConstantSDNode>(Op0.getOperand(1))->getAPIntValue() == 1) {
+ // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0.
+ if (Op0.getValueType() != VT)
+ Op0 = DAG.getNode(ISD::AND, dl, VT,
+ DAG.getNode(ISD::TRUNCATE, dl, VT, Op0.getOperand(0)),
+ DAG.getConstant(1, VT));
+ return DAG.getSetCC(dl, VT, Op0,
+ DAG.getConstant(0, Op0.getValueType()),
+ Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ);
+ }
}
}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index e7b0cff..ce72b2f 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -662,7 +662,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
if (!tii_->isTriviallyReMaterializable(DefMI, AA))
return false;
bool SawStore = false;
- if (!DefMI->isSafeToMove(tii_, SawStore, AA))
+ if (!DefMI->isSafeToMove(tii_, AA, SawStore))
return false;
if (TID.getNumDefs() != 1)
return false;
@@ -702,7 +702,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
for (const unsigned* SR = tri_->getSubRegisters(DstReg); *SR; ++SR) {
if (!li_->hasInterval(*SR))
continue;
- DLR = li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
+ const LiveRange *DLR =
+ li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
if (DLR && DLR->valno->getCopy() == CopyMI)
DLR->valno->setCopy(0);
}
@@ -741,9 +742,21 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
NewMI->addOperand(MO);
if (MO.isDef() && li_->hasInterval(MO.getReg())) {
unsigned Reg = MO.getReg();
- DLR = li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
+ const LiveRange *DLR =
+ li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
if (DLR && DLR->valno->getCopy() == CopyMI)
DLR->valno->setCopy(0);
+ // Handle subregs as well
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ for (const unsigned* SR = tri_->getSubRegisters(Reg); *SR; ++SR) {
+ if (!li_->hasInterval(*SR))
+ continue;
+ const LiveRange *DLR =
+ li_->getInterval(*SR).getLiveRangeContaining(DefIdx);
+ if (DLR && DLR->valno->getCopy() == CopyMI)
+ DLR->valno->setCopy(0);
+ }
+ }
}
}
@@ -752,6 +765,7 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
CopyMI->eraseFromParent();
ReMatCopies.insert(CopyMI);
ReMatDefs.insert(DefMI);
+ DEBUG(dbgs() << "Remat: " << *NewMI);
++NumReMats;
return true;
}
@@ -1705,6 +1719,7 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
(AdjustCopiesBackFrom(SrcInt, DstInt, CopyMI) ||
RemoveCopyByCommutingDef(SrcInt, DstInt, CopyMI))) {
JoinedCopies.insert(CopyMI);
+ DEBUG(dbgs() << "Trivial!\n");
return true;
}
@@ -1864,7 +1879,7 @@ static unsigned ComputeUltimateVN(VNInfo *VNI,
// If the VN has already been computed, just return it.
if (ThisValNoAssignments[VN] >= 0)
return ThisValNoAssignments[VN];
-// assert(ThisValNoAssignments[VN] != -2 && "Cyclic case?");
+ assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers");
// If this val is not a copy from the other val, then it must be a new value
// number in the destination.
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index a0fccab..e9e998f 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -150,6 +150,11 @@ void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB,
MBB.insert(I, MI);
}
+bool TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1) const {
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
+}
+
MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
MachineFunction &MF) const {
assert(!Orig->getDesc().isNotDuplicable() &&
@@ -157,37 +162,6 @@ MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig,
return MF.CloneMachineInstr(Orig);
}
-bool
-TargetInstrInfoImpl::isIdentical(const MachineInstr *MI,
- const MachineInstr *Other,
- const MachineRegisterInfo *MRI) const {
- if (MI->getOpcode() != Other->getOpcode() ||
- MI->getNumOperands() != Other->getNumOperands())
- return false;
-
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &MO = MI->getOperand(i);
- const MachineOperand &OMO = Other->getOperand(i);
- if (MO.isReg() && MO.isDef()) {
- assert(OMO.isReg() && OMO.isDef());
- unsigned Reg = MO.getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
- if (Reg != OMO.getReg())
- return false;
- } else if (MRI->getRegClass(MO.getReg()) !=
- MRI->getRegClass(OMO.getReg()))
- return false;
-
- continue;
- }
-
- if (!MO.isIdenticalTo(OMO))
- return false;
- }
-
- return true;
-}
-
unsigned
TargetInstrInfoImpl::GetFunctionSizeInBytes(const MachineFunction &MF) const {
unsigned FnSize = 0;
diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
index 190b533..ef6e129 100644
--- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
+++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp
@@ -33,6 +33,7 @@
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/StringExtras.h"
using namespace llvm;
+using namespace dwarf;
//===----------------------------------------------------------------------===//
// ELF
@@ -549,8 +550,8 @@ void TargetLoweringObjectFileMachO::Initialize(MCContext &Ctx,
}
// Exception Handling.
- LSDASection = getMachOSection("__DATA", "__gcc_except_tab", 0,
- SectionKind::getDataRel());
+ LSDASection = getMachOSection("__TEXT", "__gcc_except_tab", 0,
+ SectionKind::getReadOnlyWithRel());
EHFrameSection =
getMachOSection("__TEXT", "__eh_frame",
MCSectionMachO::S_COALESCED |
@@ -738,11 +739,23 @@ getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding) const {
// The mach-o version of this method defaults to returning a stub reference.
- if (Encoding & dwarf::DW_EH_PE_indirect) {
+ if (Encoding & DW_EH_PE_indirect) {
+ MachineModuleInfoMachO &MachOMMI =
+ MMI->getObjFileInfo<MachineModuleInfoMachO>();
+
SmallString<128> Name;
Mang->getNameWithPrefix(Name, GV, true);
Name += "$non_lazy_ptr";
+
+ // Add information about the stub reference to MachOMMI so that the stub
+ // gets emitted by the asmprinter.
MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
+ MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
+ if (StubSym == 0) {
+ Name.clear();
+ Mang->getNameWithPrefix(Name, GV, false);
+ StubSym = getContext().GetOrCreateSymbol(Name.str());
+ }
return TargetLoweringObjectFile::
getSymbolForDwarfReference(Sym, MMI,
@@ -753,6 +766,21 @@ getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
}
+unsigned TargetLoweringObjectFileMachO::getPersonalityEncoding() const {
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
+
+unsigned TargetLoweringObjectFileMachO::getLSDAEncoding() const {
+ return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getFDEEncoding() const {
+ return DW_EH_PE_pcrel;
+}
+
+unsigned TargetLoweringObjectFileMachO::getTTypeEncoding() const {
+ return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
+}
//===----------------------------------------------------------------------===//
// COFF
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 6f4ca82..0ba3843 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -160,7 +160,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
MachineBasicBlock::iterator OldPos) {
// Check if it's safe to move this instruction.
bool SeenStore = true; // Be conservative.
- if (!MI->isSafeToMove(TII, SeenStore, AA))
+ if (!MI->isSafeToMove(TII, AA, SeenStore))
return false;
unsigned DefReg = 0;
@@ -1028,7 +1028,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
// copying it.
if (DefMI &&
DefMI->getDesc().isAsCheapAsAMove() &&
- DefMI->isSafeToReMat(TII, regB, AA) &&
+ DefMI->isSafeToReMat(TII, AA, regB) &&
isProfitableToReMat(regB, rc, mi, DefMI, mbbi, Dist)){
DEBUG(dbgs() << "2addr: REMATTING : " << *DefMI << "\n");
unsigned regASubIdx = mi->getOperand(DstIdx).getSubReg();
diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp
index 5956b61..ed02696 100644
--- a/lib/CodeGen/VirtRegMap.cpp
+++ b/lib/CodeGen/VirtRegMap.cpp
@@ -261,19 +261,21 @@ bool VirtRegMap::FindUnusedRegisters(LiveIntervals* LIs) {
void VirtRegMap::print(raw_ostream &OS, const Module* M) const {
const TargetRegisterInfo* TRI = MF->getTarget().getRegisterInfo();
+ const MachineRegisterInfo &MRI = MF->getRegInfo();
OS << "********** REGISTER MAP **********\n";
for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i) {
if (Virt2PhysMap[i] != (unsigned)VirtRegMap::NO_PHYS_REG)
OS << "[reg" << i << " -> " << TRI->getName(Virt2PhysMap[i])
- << "]\n";
+ << "] " << MRI.getRegClass(i)->getName() << "\n";
}
for (unsigned i = TargetRegisterInfo::FirstVirtualRegister,
e = MF->getRegInfo().getLastVirtReg(); i <= e; ++i)
if (Virt2StackSlotMap[i] != VirtRegMap::NO_STACK_SLOT)
- OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i] << "]\n";
+ OS << "[reg" << i << " -> fi#" << Virt2StackSlotMap[i]
+ << "] " << MRI.getRegClass(i)->getName() << "\n";
OS << '\n';
}
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 84e0398..7aa0a91 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -46,7 +46,7 @@ namespace {
static cl::opt<RewriterName>
RewriterOpt("rewriter",
- cl::desc("Rewriter to use: (default: local)"),
+ cl::desc("Rewriter to use (default=local)"),
cl::Prefix,
cl::values(clEnumVal(local, "local rewriter"),
clEnumVal(trivial, "trivial rewriter"),
diff --git a/lib/CompilerDriver/CompilationGraph.cpp b/lib/CompilerDriver/CompilationGraph.cpp
index 524607b..7d1c7fe 100644
--- a/lib/CompilerDriver/CompilationGraph.cpp
+++ b/lib/CompilerDriver/CompilationGraph.cpp
@@ -34,7 +34,8 @@ namespace llvmc {
const std::string& LanguageMap::GetLanguage(const sys::Path& File) const {
StringRef suf = File.getSuffix();
- LanguageMap::const_iterator Lang = this->find(suf);
+ LanguageMap::const_iterator Lang =
+ this->find(suf.empty() ? "*empty*" : suf);
if (Lang == this->end())
throw std::runtime_error("File '" + File.str() +
"' has unknown suffix '" + suf.str() + '\'');
@@ -313,7 +314,7 @@ int CompilationGraph::Build (const sys::Path& TempDir,
JoinTool* JT = &dynamic_cast<JoinTool&>(*CurNode->ToolPtr.getPtr());
// Are there any files in the join list?
- if (JT->JoinListEmpty())
+ if (JT->JoinListEmpty() && !(JT->WorksOnEmpty() && InputFilenames.empty()))
continue;
Action CurAction = JT->GenerateAction(CurNode->HasChildren(),
diff --git a/lib/CompilerDriver/Main.cpp b/lib/CompilerDriver/Main.cpp
index 3a3487a..b5e507d 100644
--- a/lib/CompilerDriver/Main.cpp
+++ b/lib/CompilerDriver/Main.cpp
@@ -100,7 +100,8 @@ int Main(int argc, char** argv) {
ProgramName = argv[0];
cl::ParseCommandLineOptions
- (argc, argv, "LLVM Compiler Driver (Work In Progress)", true);
+ (argc, argv, "LLVM Compiler Driver (Work In Progress)",
+ /* ReadResponseFiles = */ false);
PluginLoader Plugins;
Plugins.RunInitialization(langMap, graph);
@@ -126,10 +127,6 @@ int Main(int argc, char** argv) {
return 0;
}
- if (InputFilenames.empty()) {
- throw std::runtime_error("no input files");
- }
-
if (Time) {
GlobalTimeLog = new std::stringstream;
GlobalTimeLog->precision(2);
diff --git a/lib/CompilerDriver/Tool.cpp b/lib/CompilerDriver/Tool.cpp
index 9f4ab49..5e558ca 100644
--- a/lib/CompilerDriver/Tool.cpp
+++ b/lib/CompilerDriver/Tool.cpp
@@ -17,6 +17,8 @@
#include "llvm/ADT/StringExtras.h"
#include "llvm/System/Path.h"
+#include <algorithm>
+
using namespace llvm;
using namespace llvmc;
@@ -71,3 +73,22 @@ sys::Path Tool::OutFilename(const sys::Path& In,
}
return Out;
}
+
+namespace {
+ template <class A, class B>
+ bool CompareFirst (std::pair<A,B> p1, std::pair<A,B> p2) {
+ return std::less<A>()(p1.first, p2.first);
+ }
+}
+
+StrVector Tool::SortArgs(ArgsVector& Args) const {
+ StrVector Out;
+
+ // HACK: this won't be needed when we'll migrate away from CommandLine.
+ std::stable_sort(Args.begin(), Args.end(), &CompareFirst<unsigned, std::string>);
+ for (ArgsVector::iterator B = Args.begin(), E = Args.end(); B != E; ++B) {
+ Out.push_back(B->second);
+ }
+
+ return Out;
+}
diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp
index 6db3ef9..b2e2a04 100644
--- a/lib/ExecutionEngine/ExecutionEngine.cpp
+++ b/lib/ExecutionEngine/ExecutionEngine.cpp
@@ -344,7 +344,7 @@ int ExecutionEngine::runFunctionAsMain(Function *Fn,
}
// FALLS THROUGH
case 0:
- if (!isa<IntegerType>(FTy->getReturnType()) &&
+ if (!FTy->getReturnType()->isIntegerTy() &&
!FTy->getReturnType()->isVoidTy()) {
llvm_report_error("Invalid return type of main() supplied");
}
@@ -614,7 +614,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) {
GV.IntVal.doubleToBits(GV.DoubleVal);
break;
case Type::PointerTyID:
- assert(isa<PointerType>(DestTy) && "Invalid bitcast");
+ assert(DestTy->isPointerTy() && "Invalid bitcast");
break; // getConstantValue(Op0) above already converted it
}
return GV;
diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
index 141cb27..c7495d4 100644
--- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp
+++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp
@@ -87,11 +87,11 @@ void LLVMDisposeGenericValue(LLVMGenericValueRef GenVal) {
/*===-- Operations on execution engines -----------------------------------===*/
-LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
- LLVMModuleProviderRef MP,
- char **OutError) {
+LLVMBool LLVMCreateExecutionEngineForModule(LLVMExecutionEngineRef *OutEE,
+ LLVMModuleRef M,
+ char **OutError) {
std::string Error;
- EngineBuilder builder(unwrap(MP));
+ EngineBuilder builder(unwrap(M));
builder.setEngineKind(EngineKind::Either)
.setErrorStr(&Error);
if (ExecutionEngine *EE = builder.create()){
@@ -102,11 +102,11 @@ LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
return 1;
}
-LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
- LLVMModuleProviderRef MP,
- char **OutError) {
+LLVMBool LLVMCreateInterpreterForModule(LLVMExecutionEngineRef *OutInterp,
+ LLVMModuleRef M,
+ char **OutError) {
std::string Error;
- EngineBuilder builder(unwrap(MP));
+ EngineBuilder builder(unwrap(M));
builder.setEngineKind(EngineKind::Interpreter)
.setErrorStr(&Error);
if (ExecutionEngine *Interp = builder.create()) {
@@ -117,12 +117,12 @@ LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
return 1;
}
-LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
- LLVMModuleProviderRef MP,
- unsigned OptLevel,
- char **OutError) {
+LLVMBool LLVMCreateJITCompilerForModule(LLVMExecutionEngineRef *OutJIT,
+ LLVMModuleRef M,
+ unsigned OptLevel,
+ char **OutError) {
std::string Error;
- EngineBuilder builder(unwrap(MP));
+ EngineBuilder builder(unwrap(M));
builder.setEngineKind(EngineKind::JIT)
.setErrorStr(&Error)
.setOptLevel((CodeGenOpt::Level)OptLevel);
@@ -134,6 +134,35 @@ LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
return 1;
}
+LLVMBool LLVMCreateExecutionEngine(LLVMExecutionEngineRef *OutEE,
+ LLVMModuleProviderRef MP,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateExecutionEngineForModule(OutEE,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OutError);
+}
+
+LLVMBool LLVMCreateInterpreter(LLVMExecutionEngineRef *OutInterp,
+ LLVMModuleProviderRef MP,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateInterpreterForModule(OutInterp,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OutError);
+}
+
+LLVMBool LLVMCreateJITCompiler(LLVMExecutionEngineRef *OutJIT,
+ LLVMModuleProviderRef MP,
+ unsigned OptLevel,
+ char **OutError) {
+ /* The module provider is now actually a module. */
+ return LLVMCreateJITCompilerForModule(OutJIT,
+ reinterpret_cast<LLVMModuleRef>(MP),
+ OptLevel, OutError);
+}
+
+
void LLVMDisposeExecutionEngine(LLVMExecutionEngineRef EE) {
delete unwrap(EE);
}
@@ -173,17 +202,29 @@ void LLVMFreeMachineCodeForFunction(LLVMExecutionEngineRef EE, LLVMValueRef F) {
unwrap(EE)->freeMachineCodeForFunction(unwrap<Function>(F));
}
+void LLVMAddModule(LLVMExecutionEngineRef EE, LLVMModuleRef M){
+ unwrap(EE)->addModule(unwrap(M));
+}
+
void LLVMAddModuleProvider(LLVMExecutionEngineRef EE, LLVMModuleProviderRef MP){
- unwrap(EE)->addModule(unwrap(MP));
+ /* The module provider is now actually a module. */
+ LLVMAddModule(EE, reinterpret_cast<LLVMModuleRef>(MP));
+}
+
+LLVMBool LLVMRemoveModule(LLVMExecutionEngineRef EE, LLVMModuleRef M,
+ LLVMModuleRef *OutMod, char **OutError) {
+ Module *Mod = unwrap(M);
+ unwrap(EE)->removeModule(Mod);
+ *OutMod = wrap(Mod);
+ return 0;
}
LLVMBool LLVMRemoveModuleProvider(LLVMExecutionEngineRef EE,
LLVMModuleProviderRef MP,
LLVMModuleRef *OutMod, char **OutError) {
- Module *M = unwrap(MP);
- unwrap(EE)->removeModule(M);
- *OutMod = wrap(M);
- return 0;
+ /* The module provider is now actually a module. */
+ return LLVMRemoveModule(EE, reinterpret_cast<LLVMModuleRef>(MP), OutMod,
+ OutError);
}
LLVMBool LLVMFindFunction(LLVMExecutionEngineRef EE, const char *Name,
diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp
index e234cf1..a2aad5a 100644
--- a/lib/ExecutionEngine/Interpreter/Execution.cpp
+++ b/lib/ExecutionEngine/Interpreter/Execution.cpp
@@ -761,7 +761,7 @@ void Interpreter::visitAllocaInst(AllocaInst &I) {
GenericValue Interpreter::executeGEPOperation(Value *Ptr, gep_type_iterator I,
gep_type_iterator E,
ExecutionContext &SF) {
- assert(isa<PointerType>(Ptr->getType()) &&
+ assert(Ptr->getType()->isPointerTy() &&
"Cannot getElementOffset of a nonpointer type!");
uint64_t Total = 0;
@@ -1031,7 +1031,7 @@ GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
ExecutionContext &SF) {
uint32_t DBitWidth = cast<IntegerType>(DstTy)->getBitWidth();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- assert(isa<PointerType>(SrcVal->getType()) && "Invalid PtrToInt instruction");
+ assert(SrcVal->getType()->isPointerTy() && "Invalid PtrToInt instruction");
Dest.IntVal = APInt(DBitWidth, (intptr_t) Src.PointerVal);
return Dest;
@@ -1040,7 +1040,7 @@ GenericValue Interpreter::executePtrToIntInst(Value *SrcVal, const Type *DstTy,
GenericValue Interpreter::executeIntToPtrInst(Value *SrcVal, const Type *DstTy,
ExecutionContext &SF) {
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- assert(isa<PointerType>(DstTy) && "Invalid PtrToInt instruction");
+ assert(DstTy->isPointerTy() && "Invalid PtrToInt instruction");
uint32_t PtrSize = TD.getPointerSizeInBits();
if (PtrSize != Src.IntVal.getBitWidth())
@@ -1055,8 +1055,8 @@ GenericValue Interpreter::executeBitCastInst(Value *SrcVal, const Type *DstTy,
const Type *SrcTy = SrcVal->getType();
GenericValue Dest, Src = getOperandValue(SrcVal, SF);
- if (isa<PointerType>(DstTy)) {
- assert(isa<PointerType>(SrcTy) && "Invalid BitCast");
+ if (DstTy->isPointerTy()) {
+ assert(SrcTy->isPointerTy() && "Invalid BitCast");
Dest.PointerVal = Src.PointerVal;
} else if (DstTy->isIntegerTy()) {
if (SrcTy->isFloatTy()) {
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 18a996e..dd74d73 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -415,8 +415,8 @@ GenericValue JIT::runFunction(Function *F,
switch (ArgValues.size()) {
case 3:
if (FTy->getParamType(0)->isIntegerTy(32) &&
- isa<PointerType>(FTy->getParamType(1)) &&
- isa<PointerType>(FTy->getParamType(2))) {
+ FTy->getParamType(1)->isPointerTy() &&
+ FTy->getParamType(2)->isPointerTy()) {
int (*PF)(int, char **, const char **) =
(int(*)(int, char **, const char **))(intptr_t)FPtr;
@@ -430,7 +430,7 @@ GenericValue JIT::runFunction(Function *F,
break;
case 2:
if (FTy->getParamType(0)->isIntegerTy(32) &&
- isa<PointerType>(FTy->getParamType(1))) {
+ FTy->getParamType(1)->isPointerTy()) {
int (*PF)(int, char **) = (int(*)(int, char **))(intptr_t)FPtr;
// Call the function.
diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp
index 7f441b0..8487c83 100644
--- a/lib/Linker/LinkModules.cpp
+++ b/lib/Linker/LinkModules.cpp
@@ -159,7 +159,7 @@ static bool RecursiveResolveTypesI(const Type *DstTy, const Type *SrcTy,
if (DstTy == SrcTy) return false; // If already equal, noop
// If we found our opaque type, resolve it now!
- if (isa<OpaqueType>(DstTy) || isa<OpaqueType>(SrcTy))
+ if (DstTy->isOpaqueTy() || SrcTy->isOpaqueTy())
return ResolveTypes(DstTy, SrcTy);
// Two types cannot be resolved together if they are of different primitive
diff --git a/lib/MC/CMakeLists.txt b/lib/MC/CMakeLists.txt
index 9ead33b..4cf71dc 100644
--- a/lib/MC/CMakeLists.txt
+++ b/lib/MC/CMakeLists.txt
@@ -18,4 +18,5 @@ add_llvm_library(LLVMMC
MCStreamer.cpp
MCSymbol.cpp
MCValue.cpp
+ TargetAsmBackend.cpp
)
diff --git a/lib/MC/MCAsmStreamer.cpp b/lib/MC/MCAsmStreamer.cpp
index 6add1b4..66a0a24 100644
--- a/lib/MC/MCAsmStreamer.cpp
+++ b/lib/MC/MCAsmStreamer.cpp
@@ -134,6 +134,9 @@ public:
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
+
virtual void EmitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0);
@@ -513,6 +516,13 @@ void MCAsmStreamer::EmitValueToAlignment(unsigned ByteAlignment, int64_t Value,
EmitEOL();
}
+void MCAsmStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ // Emit with a text fill value.
+ EmitValueToAlignment(ByteAlignment, MAI.getTextAlignFillValue(),
+ 1, MaxBytesToEmit);
+}
+
void MCAsmStreamer::EmitValueToOffset(const MCExpr *Offset,
unsigned char Value) {
// FIXME: Verify that Offset is associated with the current section.
diff --git a/lib/MC/MCAssembler.cpp b/lib/MC/MCAssembler.cpp
index 653fbf2..96227db 100644
--- a/lib/MC/MCAssembler.cpp
+++ b/lib/MC/MCAssembler.cpp
@@ -42,6 +42,8 @@ STATISTIC(EmittedFragments, "Number of emitted assembler fragments");
static void WriteFileData(raw_ostream &OS, const MCSectionData &SD,
MachObjectWriter &MOW);
+static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW);
+
/// isVirtualSection - Check if this is a section which does not actually exist
/// in the object file.
static bool isVirtualSection(const MCSection &Section) {
@@ -51,7 +53,7 @@ static bool isVirtualSection(const MCSection &Section) {
return (Type == MCSectionMachO::S_ZEROFILL);
}
-static unsigned getFixupKindLog2Size(MCFixupKind Kind) {
+static unsigned getFixupKindLog2Size(unsigned Kind) {
switch (Kind) {
default: llvm_unreachable("invalid fixup kind!");
case X86::reloc_pcrel_1byte:
@@ -64,7 +66,7 @@ static unsigned getFixupKindLog2Size(MCFixupKind Kind) {
}
}
-static bool isFixupKindPCRel(MCFixupKind Kind) {
+static bool isFixupKindPCRel(unsigned Kind) {
switch (Kind) {
default:
return false;
@@ -439,6 +441,7 @@ public:
std::vector<MachRelocationEntry> &Relocs) {
uint32_t Address = Fragment.getOffset() + Fixup.Offset;
unsigned IsPCRel = 0;
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
unsigned Type = RIT_Vanilla;
// See <reloc.h>.
@@ -454,12 +457,10 @@ public:
Value2 = SD->getFragment()->getAddress() + SD->getOffset();
}
- unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
-
// The value which goes in the fixup is current value of the expression.
Fixup.FixedValue = Value - Value2 + Target.getConstant();
if (isFixupKindPCRel(Fixup.Kind)) {
- Fixup.FixedValue -= Address + (1 << Log2Size);
+ Fixup.FixedValue -= Address;
IsPCRel = 1;
}
@@ -507,6 +508,7 @@ public:
uint32_t Value = 0;
unsigned Index = 0;
unsigned IsPCRel = 0;
+ unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
unsigned IsExtern = 0;
unsigned Type = 0;
@@ -544,10 +546,8 @@ public:
// The value which goes in the fixup is current value of the expression.
Fixup.FixedValue = Value + Target.getConstant();
- unsigned Log2Size = getFixupKindLog2Size(Fixup.Kind);
-
if (isFixupKindPCRel(Fixup.Kind)) {
- Fixup.FixedValue -= Address + (1<<Log2Size);
+ Fixup.FixedValue -= Address;
IsPCRel = 1;
}
@@ -1060,6 +1060,64 @@ void MCAssembler::LayoutSection(MCSectionData &SD) {
SD.setFileSize(Address - SD.getAddress());
}
+/// WriteNopData - Write optimal nops to the output file for the \arg Count
+/// bytes. This returns the number of bytes written. It may return 0 if
+/// the \arg Count is more than the maximum optimal nops.
+///
+/// FIXME this is X86 32-bit specific and should move to a better place.
+static uint64_t WriteNopData(uint64_t Count, MachObjectWriter &MOW) {
+ static const uint8_t Nops[16][16] = {
+ // nop
+ {0x90},
+ // xchg %ax,%ax
+ {0x66, 0x90},
+ // nopl (%[re]ax)
+ {0x0f, 0x1f, 0x00},
+ // nopl 0(%[re]ax)
+ {0x0f, 0x1f, 0x40, 0x00},
+ // nopl 0(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw 0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopw %cs:0L(%[re]ax,%[re]ax,1)
+ {0x66, 0x2e, 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0(%[re]ax,%[re]ax,1)
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ // nopw 0(%[re]ax,%[re]ax,1)
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00},
+ // nopw 0(%[re]ax,%[re]ax,1)
+ // nopl 0L(%[re]ax) */
+ {0x66, 0x0f, 0x1f, 0x44, 0x00, 0x00,
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ // nopl 0L(%[re]ax)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00},
+ // nopl 0L(%[re]ax)
+ // nopl 0L(%[re]ax,%[re]ax,1)
+ {0x0f, 0x1f, 0x80, 0x00, 0x00, 0x00, 0x00,
+ 0x0f, 0x1f, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00}
+ };
+
+ if (Count > 15)
+ return 0;
+
+ for (uint64_t i = 0; i < Count; i++)
+ MOW.Write8 (uint8_t(Nops[Count - 1][i]));
+
+ return Count;
+}
+
/// WriteFileData - Write the \arg F data to the output file.
static void WriteFileData(raw_ostream &OS, const MCFragment &F,
MachObjectWriter &MOW) {
@@ -1083,6 +1141,14 @@ static void WriteFileData(raw_ostream &OS, const MCFragment &F,
"' is not a divisor of padding size '" +
Twine(AF.getFileSize()) + "'");
+ // See if we are aligning with nops, and if so do that first to try to fill
+ // the Count bytes. Then if that did not fill any bytes or there are any
+ // bytes left to fill use the the Value and ValueSize to fill the rest.
+ if (AF.getEmitNops()) {
+ uint64_t NopByteCount = WriteNopData(Count, MOW);
+ Count -= NopByteCount;
+ }
+
for (uint64_t i = 0; i != Count; ++i) {
switch (AF.getValueSize()) {
default:
diff --git a/lib/MC/MCMachOStreamer.cpp b/lib/MC/MCMachOStreamer.cpp
index 0c9627d..a7a8a5d 100644
--- a/lib/MC/MCMachOStreamer.cpp
+++ b/lib/MC/MCMachOStreamer.cpp
@@ -137,6 +137,8 @@ public:
virtual void EmitValueToAlignment(unsigned ByteAlignment, int64_t Value = 0,
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0);
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0);
virtual void EmitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0);
@@ -333,15 +335,13 @@ void MCMachOStreamer::EmitBytes(StringRef Data, unsigned AddrSpace) {
void MCMachOStreamer::EmitValue(const MCExpr *Value, unsigned Size,
unsigned AddrSpace) {
- // Assume the front-end will have evaluate things absolute expressions, so
- // just create data + fixup.
MCDataFragment *DF = dyn_cast_or_null<MCDataFragment>(getCurrentFragment());
if (!DF)
DF = new MCDataFragment(CurSectionData);
// Avoid fixups when possible.
int64_t AbsValue;
- if (Value->EvaluateAsAbsolute(AbsValue)) {
+ if (AddValueSymbols(Value)->EvaluateAsAbsolute(AbsValue)) {
// FIXME: Endianness assumption.
for (unsigned i = 0; i != Size; ++i)
DF->getContents().push_back(uint8_t(AbsValue >> (i * 8)));
@@ -359,7 +359,20 @@ void MCMachOStreamer::EmitValueToAlignment(unsigned ByteAlignment,
if (MaxBytesToEmit == 0)
MaxBytesToEmit = ByteAlignment;
new MCAlignFragment(ByteAlignment, Value, ValueSize, MaxBytesToEmit,
- CurSectionData);
+ false /* EmitNops */, CurSectionData);
+
+ // Update the maximum alignment on the current section if necessary.
+ if (ByteAlignment > CurSectionData->getAlignment())
+ CurSectionData->setAlignment(ByteAlignment);
+}
+
+void MCMachOStreamer::EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit) {
+ if (MaxBytesToEmit == 0)
+ MaxBytesToEmit = ByteAlignment;
+ // FIXME the 0x90 is the default x86 1 byte nop opcode.
+ new MCAlignFragment(ByteAlignment, 0x90, 1, MaxBytesToEmit,
+ true /* EmitNops */, CurSectionData);
// Update the maximum alignment on the current section if necessary.
if (ByteAlignment > CurSectionData->getAlignment())
diff --git a/lib/MC/MCNullStreamer.cpp b/lib/MC/MCNullStreamer.cpp
index 46e9ebf..ab61799 100644
--- a/lib/MC/MCNullStreamer.cpp
+++ b/lib/MC/MCNullStreamer.cpp
@@ -55,6 +55,9 @@ namespace {
unsigned ValueSize = 1,
unsigned MaxBytesToEmit = 0) {}
+ virtual void EmitCodeAlignment(unsigned ByteAlignment,
+ unsigned MaxBytesToEmit = 0) {}
+
virtual void EmitValueToOffset(const MCExpr *Offset,
unsigned char Value = 0) {}
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index 51ad5d1..6185c30 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -146,7 +146,7 @@ bool AsmParser::Run() {
// FIXME: Target hook & command line option for initial section.
Out.SwitchSection(getMachOSection("__TEXT", "__text",
MCSectionMachO::S_ATTR_PURE_INSTRUCTIONS,
- 0, SectionKind()));
+ 0, SectionKind::getText()));
// Prime the lexer.
@@ -914,8 +914,10 @@ bool AsmParser::ParseDirectiveDarwinSection() {
return Error(Loc, ErrorStr.c_str());
// FIXME: Arch specific.
+ bool isText = Segment == "__TEXT"; // FIXME: Hack.
Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
- SectionKind()));
+ isText ? SectionKind::getText()
+ : SectionKind::getDataRel()));
return false;
}
@@ -929,8 +931,10 @@ bool AsmParser::ParseDirectiveSectionSwitch(const char *Segment,
Lex();
// FIXME: Arch specific.
+ bool isText = StringRef(Segment) == "__TEXT"; // FIXME: Hack.
Out.SwitchSection(getMachOSection(Segment, Section, TAA, StubSize,
- SectionKind()));
+ isText ? SectionKind::getText()
+ : SectionKind::getDataRel()));
// Set the implicit alignment, if any.
//
@@ -1237,8 +1241,14 @@ bool AsmParser::ParseDirectiveAlign(bool IsPow2, unsigned ValueSize) {
}
}
- // FIXME: Target specific behavior about how the "extra" bytes are filled.
- Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
+ // FIXME: hard code the parser to use EmitCodeAlignment for text when using
+ // the TextAlignFillValue.
+ if(Out.getCurrentSection()->getKind().isText() &&
+ Lexer.getMAI().getTextAlignFillValue() == FillExpr)
+ Out.EmitCodeAlignment(Alignment, MaxBytesToFill);
+ else
+ // FIXME: Target specific behavior about how the "extra" bytes are filled.
+ Out.EmitValueToAlignment(Alignment, FillExpr, ValueSize, MaxBytesToFill);
return false;
}
@@ -1362,7 +1372,7 @@ bool AsmParser::ParseDirectiveComm(bool IsLocal) {
if (IsLocal) {
Out.EmitZerofill(getMachOSection("__DATA", "__bss",
MCSectionMachO::S_ZEROFILL, 0,
- SectionKind()),
+ SectionKind::getBSS()),
Sym, Size, 1 << Pow2Alignment);
return false;
}
@@ -1398,7 +1408,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
// Create the zerofill section but no symbol
Out.EmitZerofill(getMachOSection(Segment, Section,
MCSectionMachO::S_ZEROFILL, 0,
- SectionKind()));
+ SectionKind::getBSS()));
return false;
}
@@ -1456,7 +1466,7 @@ bool AsmParser::ParseDirectiveDarwinZerofill() {
// FIXME: Arch specific.
Out.EmitZerofill(getMachOSection(Segment, Section,
MCSectionMachO::S_ZEROFILL, 0,
- SectionKind()),
+ SectionKind::getBSS()),
Sym, Size, 1 << Pow2Alignment);
return false;
diff --git a/lib/MC/TargetAsmBackend.cpp b/lib/MC/TargetAsmBackend.cpp
new file mode 100644
index 0000000..918d272
--- /dev/null
+++ b/lib/MC/TargetAsmBackend.cpp
@@ -0,0 +1,19 @@
+//===-- TargetAsmBackend.cpp - Target Assembly Backend ---------------------==//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+TargetAsmBackend::TargetAsmBackend(const Target &T)
+ : TheTarget(T)
+{
+}
+
+TargetAsmBackend::~TargetAsmBackend() {
+}
diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp
index 1e6d22f..619f061 100644
--- a/lib/Support/APFloat.cpp
+++ b/lib/Support/APFloat.cpp
@@ -17,6 +17,7 @@
#include "llvm/ADT/FoldingSet.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include <limits.h>
#include <cstring>
using namespace llvm;
@@ -625,17 +626,58 @@ APFloat::copySignificand(const APFloat &rhs)
/* Make this number a NaN, with an arbitrary but deterministic value
for the significand. If double or longer, this is a signalling NaN,
which may not be ideal. If float, this is QNaN(0). */
-void
-APFloat::makeNaN(unsigned type)
+void APFloat::makeNaN(bool SNaN, bool Negative, const APInt *fill)
{
category = fcNaN;
- // FIXME: Add double and long double support for QNaN(0).
- if (semantics->precision == 24 && semantics->maxExponent == 127) {
- type |= 0x7fc00000U;
- type &= ~0x80000000U;
- } else
- type = ~0U;
- APInt::tcSet(significandParts(), type, partCount());
+ sign = Negative;
+
+ integerPart *significand = significandParts();
+ unsigned numParts = partCount();
+
+ // Set the significand bits to the fill.
+ if (!fill || fill->getNumWords() < numParts)
+ APInt::tcSet(significand, 0, numParts);
+ if (fill) {
+ APInt::tcAssign(significand, fill->getRawData(),
+ std::min(fill->getNumWords(), numParts));
+
+ // Zero out the excess bits of the significand.
+ unsigned bitsToPreserve = semantics->precision - 1;
+ unsigned part = bitsToPreserve / 64;
+ bitsToPreserve %= 64;
+ significand[part] &= ((1ULL << bitsToPreserve) - 1);
+ for (part++; part != numParts; ++part)
+ significand[part] = 0;
+ }
+
+ unsigned QNaNBit = semantics->precision - 2;
+
+ if (SNaN) {
+ // We always have to clear the QNaN bit to make it an SNaN.
+ APInt::tcClearBit(significand, QNaNBit);
+
+ // If there are no bits set in the payload, we have to set
+ // *something* to make it a NaN instead of an infinity;
+ // conventionally, this is the next bit down from the QNaN bit.
+ if (APInt::tcIsZero(significand, numParts))
+ APInt::tcSetBit(significand, QNaNBit - 1);
+ } else {
+ // We always have to set the QNaN bit to make it a QNaN.
+ APInt::tcSetBit(significand, QNaNBit);
+ }
+
+ // For x87 extended precision, we want to make a NaN, not a
+ // pseudo-NaN. Maybe we should expose the ability to make
+ // pseudo-NaNs?
+ if (semantics == &APFloat::x87DoubleExtended)
+ APInt::tcSetBit(significand, QNaNBit + 1);
+}
+
+APFloat APFloat::makeNaN(const fltSemantics &Sem, bool SNaN, bool Negative,
+ const APInt *fill) {
+ APFloat value(Sem, uninitialized);
+ value.makeNaN(SNaN, Negative, fill);
+ return value;
}
APFloat &
@@ -700,9 +742,14 @@ APFloat::APFloat(const fltSemantics &ourSemantics) {
sign = false;
}
+APFloat::APFloat(const fltSemantics &ourSemantics, uninitializedTag tag) {
+ assertArithmeticOK(ourSemantics);
+ // Allocates storage if necessary but does not initialize it.
+ initialize(&ourSemantics);
+}
APFloat::APFloat(const fltSemantics &ourSemantics,
- fltCategory ourCategory, bool negative, unsigned type)
+ fltCategory ourCategory, bool negative)
{
assertArithmeticOK(ourSemantics);
initialize(&ourSemantics);
@@ -711,7 +758,7 @@ APFloat::APFloat(const fltSemantics &ourSemantics,
if (category == fcNormal)
category = fcZero;
else if (ourCategory == fcNaN)
- makeNaN(type);
+ makeNaN();
}
APFloat::APFloat(const fltSemantics &ourSemantics, const StringRef& text)
@@ -2345,11 +2392,24 @@ APFloat::convertFromDecimalString(const StringRef &str, roundingMode rounding_mo
if (decDigitValue(*D.firstSigDigit) >= 10U) {
category = fcZero;
fs = opOK;
- } else if ((D.normalizedExponent + 1) * 28738
- <= 8651 * (semantics->minExponent - (int) semantics->precision)) {
+
+ /* Check whether the normalized exponent is high enough to overflow
+ max during the log-rebasing in the max-exponent check below. */
+ } else if (D.normalizedExponent - 1 > INT_MAX / 42039) {
+ fs = handleOverflow(rounding_mode);
+
+ /* If it wasn't, then it also wasn't high enough to overflow max
+ during the log-rebasing in the min-exponent check. Check that it
+ won't overflow min in either check, then perform the min-exponent
+ check. */
+ } else if (D.normalizedExponent - 1 < INT_MIN / 42039 ||
+ (D.normalizedExponent + 1) * 28738 <=
+ 8651 * (semantics->minExponent - (int) semantics->precision)) {
/* Underflow to zero and round. */
zeroSignificand();
fs = normalize(rounding_mode, lfLessThanHalf);
+
+ /* We can finally safely perform the max-exponent check. */
} else if ((D.normalizedExponent - 1) * 42039
>= 12655 * semantics->maxExponent) {
/* Overflow and round. */
diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp
index 3bce3f3..6a6384a 100644
--- a/lib/Support/APInt.cpp
+++ b/lib/Support/APInt.cpp
@@ -2344,13 +2344,21 @@ APInt::tcExtractBit(const integerPart *parts, unsigned int bit)
& ((integerPart) 1 << bit % integerPartWidth)) != 0;
}
-/* Set the given bit of a bignum. */
+/* Set the given bit of a bignum. */
void
APInt::tcSetBit(integerPart *parts, unsigned int bit)
{
parts[bit / integerPartWidth] |= (integerPart) 1 << (bit % integerPartWidth);
}
+/* Clears the given bit of a bignum. */
+void
+APInt::tcClearBit(integerPart *parts, unsigned int bit)
+{
+ parts[bit / integerPartWidth] &=
+ ~((integerPart) 1 << (bit % integerPartWidth));
+}
+
/* Returns the bit number of the least significant set bit of a
number. If the input number has no bits set -1U is returned. */
unsigned int
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 961dc1f..2ab4103 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -650,7 +650,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
if (Handler == 0) {
if (SinkOpts.empty()) {
errs() << ProgramName << ": Unknown command line argument '"
- << argv[i] << "'. Try: '" << argv[0] << " --help'\n";
+ << argv[i] << "'. Try: '" << argv[0] << " -help'\n";
ErrorParsing = true;
} else {
for (SmallVectorImpl<Option*>::iterator I = SinkOpts.begin(),
@@ -673,7 +673,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
errs() << ProgramName
<< ": Not enough positional command line arguments specified!\n"
<< "Must specify at least " << NumPositionalRequired
- << " positional arguments: See: " << argv[0] << " --help\n";
+ << " positional arguments: See: " << argv[0] << " -help\n";
ErrorParsing = true;
} else if (!HasUnlimitedPositionals
@@ -681,7 +681,7 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
errs() << ProgramName
<< ": Too many positional arguments specified!\n"
<< "Can specify at most " << PositionalOpts.size()
- << " positional arguments: See: " << argv[0] << " --help\n";
+ << " positional arguments: See: " << argv[0] << " -help\n";
ErrorParsing = true;
} else if (ConsumeAfterOpt == 0) {
@@ -1029,7 +1029,7 @@ void generic_parser_base::printOptionInfo(const Option &O,
//===----------------------------------------------------------------------===//
-// --help and --help-hidden option implementation
+// -help and -help-hidden option implementation
//
static int OptNameCompare(const void *LHS, const void *RHS) {
@@ -1134,7 +1134,7 @@ static HelpPrinter NormalPrinter(false);
static HelpPrinter HiddenPrinter(true);
static cl::opt<HelpPrinter, true, parser<bool> >
-HOp("help", cl::desc("Display available options (--help-hidden for more)"),
+HOp("help", cl::desc("Display available options (-help-hidden for more)"),
cl::location(NormalPrinter), cl::ValueDisallowed);
static cl::opt<HelpPrinter, true, parser<bool> >
@@ -1222,8 +1222,8 @@ void cl::PrintHelpMessage() {
// NormalPrinter variable is a HelpPrinter and the help gets printed when
// its operator= is invoked. That's because the "normal" usages of the
// help printer is to be assigned true/false depending on whether the
- // --help option was given or not. Since we're circumventing that we have
- // to make it look like --help was given, so we assign true.
+ // -help option was given or not. Since we're circumventing that we have
+ // to make it look like -help was given, so we assign true.
NormalPrinter = true;
}
diff --git a/lib/Support/FormattedStream.cpp b/lib/Support/FormattedStream.cpp
index 39b6cb3..c72b5a1 100644
--- a/lib/Support/FormattedStream.cpp
+++ b/lib/Support/FormattedStream.cpp
@@ -56,8 +56,6 @@ void formatted_raw_ostream::ComputeColumn(const char *Ptr, size_t Size) {
/// PadToColumn - Align the output to some column number.
///
/// \param NewCol - The column to move to.
-/// \param MinPad - The minimum space to give after the most recent
-/// I/O, even if the current column + minpad > newcol.
///
formatted_raw_ostream &formatted_raw_ostream::PadToColumn(unsigned NewCol) {
// Figure out what's in the buffer and add it to the column count.
diff --git a/lib/Support/GraphWriter.cpp b/lib/Support/GraphWriter.cpp
index c8bca6e..ec84f9b 100644
--- a/lib/Support/GraphWriter.cpp
+++ b/lib/Support/GraphWriter.cpp
@@ -137,7 +137,7 @@ void llvm::DisplayGraph(const sys::Path &Filename, bool wait,
args.clear();
args.push_back(gv.c_str());
args.push_back(PSFilename.c_str());
- args.push_back("-spartan");
+ args.push_back("--spartan");
args.push_back(0);
ErrMsg.clear();
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index 9253b01..eb046d0 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -174,7 +174,8 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
#ifdef O_BINARY
OpenFlags |= O_BINARY; // Open input file in binary mode on win32.
#endif
- int FD = ::open(Filename.str().c_str(), O_RDONLY|OpenFlags);
+ SmallString<256> PathBuf(Filename.begin(), Filename.end());
+ int FD = ::open(PathBuf.c_str(), O_RDONLY|OpenFlags);
if (FD == -1) {
if (ErrStr) *ErrStr = strerror(errno);
return 0;
diff --git a/lib/Support/Regex.cpp b/lib/Support/Regex.cpp
index 618ca05..a7631de 100644
--- a/lib/Support/Regex.cpp
+++ b/lib/Support/Regex.cpp
@@ -90,3 +90,79 @@ bool Regex::match(const StringRef &String, SmallVectorImpl<StringRef> *Matches){
return true;
}
+
+std::string Regex::sub(StringRef Repl, StringRef String,
+ std::string *Error) {
+ SmallVector<StringRef, 8> Matches;
+
+ // Reset error, if given.
+ if (Error && !Error->empty()) *Error = "";
+
+ // Return the input if there was no match.
+ if (!match(String, &Matches))
+ return String;
+
+ // Otherwise splice in the replacement string, starting with the prefix before
+ // the match.
+ std::string Res(String.begin(), Matches[0].begin());
+
+ // Then the replacement string, honoring possible substitutions.
+ while (!Repl.empty()) {
+ // Skip to the next escape.
+ std::pair<StringRef, StringRef> Split = Repl.split('\\');
+
+ // Add the skipped substring.
+ Res += Split.first;
+
+ // Check for terminimation and trailing backslash.
+ if (Split.second.empty()) {
+ if (Repl.size() != Split.first.size() &&
+ Error && Error->empty())
+ *Error = "replacement string contained trailing backslash";
+ break;
+ }
+
+ // Otherwise update the replacement string and interpret escapes.
+ Repl = Split.second;
+
+ // FIXME: We should have a StringExtras function for mapping C99 escapes.
+ switch (Repl[0]) {
+ // Treat all unrecognized characters as self-quoting.
+ default:
+ Res += Repl[0];
+ Repl = Repl.substr(1);
+ break;
+
+ // Single character escapes.
+ case 't':
+ Res += '\t';
+ Repl = Repl.substr(1);
+ break;
+ case 'n':
+ Res += '\n';
+ Repl = Repl.substr(1);
+ break;
+
+ // Decimal escapes are backreferences.
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9': {
+ // Extract the backreference number.
+ StringRef Ref = Repl.slice(0, Repl.find_first_not_of("0123456789"));
+ Repl = Repl.substr(Ref.size());
+
+ unsigned RefValue;
+ if (!Ref.getAsInteger(10, RefValue) &&
+ RefValue < Matches.size())
+ Res += Matches[RefValue];
+ else if (Error && Error->empty())
+ *Error = "invalid backreference string '" + Ref.str() + "'";
+ break;
+ }
+ }
+ }
+
+ // And finally the suffix.
+ Res += StringRef(Matches[0].end(), String.end() - Matches[0].end());
+
+ return Res;
+}
diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp
index ae2640b..2b262dc 100644
--- a/lib/Support/StringRef.cpp
+++ b/lib/Support/StringRef.cpp
@@ -8,6 +8,7 @@
//===----------------------------------------------------------------------===//
#include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/APInt.h"
using namespace llvm;
@@ -172,23 +173,28 @@ size_t StringRef::count(StringRef Str) const {
return Count;
}
+static unsigned GetAutoSenseRadix(StringRef &Str) {
+ if (Str.startswith("0x")) {
+ Str = Str.substr(2);
+ return 16;
+ } else if (Str.startswith("0b")) {
+ Str = Str.substr(2);
+ return 2;
+ } else if (Str.startswith("0")) {
+ return 8;
+ } else {
+ return 10;
+ }
+}
+
+
/// GetAsUnsignedInteger - Workhorse method that converts a integer character
/// sequence of radix up to 36 to an unsigned long long value.
static bool GetAsUnsignedInteger(StringRef Str, unsigned Radix,
unsigned long long &Result) {
// Autosense radix if not specified.
- if (Radix == 0) {
- if (Str.startswith("0x")) {
- Str = Str.substr(2);
- Radix = 16;
- } else if (Str.startswith("0b")) {
- Str = Str.substr(2);
- Radix = 2;
- } else if (Str.startswith("0"))
- Radix = 8;
- else
- Radix = 10;
- }
+ if (Radix == 0)
+ Radix = GetAutoSenseRadix(Str);
// Empty strings (after the radix autosense) are invalid.
if (Str.empty()) return true;
@@ -272,3 +278,78 @@ bool StringRef::getAsInteger(unsigned Radix, unsigned &Result) const {
Result = Val;
return false;
}
+
+bool StringRef::getAsInteger(unsigned Radix, APInt &Result) const {
+ StringRef Str = *this;
+
+ // Autosense radix if not specified.
+ if (Radix == 0)
+ Radix = GetAutoSenseRadix(Str);
+
+ assert(Radix > 1 && Radix <= 36);
+
+ // Empty strings (after the radix autosense) are invalid.
+ if (Str.empty()) return true;
+
+ // Skip leading zeroes. This can be a significant improvement if
+ // it means we don't need > 64 bits.
+ while (!Str.empty() && Str.front() == '0')
+ Str = Str.substr(1);
+
+ // If it was nothing but zeroes....
+ if (Str.empty()) {
+ Result = APInt(64, 0);
+ return false;
+ }
+
+ // (Over-)estimate the required number of bits.
+ unsigned Log2Radix = 0;
+ while ((1U << Log2Radix) < Radix) Log2Radix++;
+ bool IsPowerOf2Radix = ((1U << Log2Radix) == Radix);
+
+ unsigned BitWidth = Log2Radix * Str.size();
+ if (BitWidth < Result.getBitWidth())
+ BitWidth = Result.getBitWidth(); // don't shrink the result
+ else
+ Result.zext(BitWidth);
+
+ APInt RadixAP, CharAP; // unused unless !IsPowerOf2Radix
+ if (!IsPowerOf2Radix) {
+ // These must have the same bit-width as Result.
+ RadixAP = APInt(BitWidth, Radix);
+ CharAP = APInt(BitWidth, 0);
+ }
+
+ // Parse all the bytes of the string given this radix.
+ Result = 0;
+ while (!Str.empty()) {
+ unsigned CharVal;
+ if (Str[0] >= '0' && Str[0] <= '9')
+ CharVal = Str[0]-'0';
+ else if (Str[0] >= 'a' && Str[0] <= 'z')
+ CharVal = Str[0]-'a'+10;
+ else if (Str[0] >= 'A' && Str[0] <= 'Z')
+ CharVal = Str[0]-'A'+10;
+ else
+ return true;
+
+ // If the parsed value is larger than the integer radix, the string is
+ // invalid.
+ if (CharVal >= Radix)
+ return true;
+
+ // Add in this character.
+ if (IsPowerOf2Radix) {
+ Result <<= Log2Radix;
+ Result |= CharVal;
+ } else {
+ Result *= RadixAP;
+ CharAP = CharVal;
+ Result += CharAP;
+ }
+
+ Str = Str.substr(1);
+ }
+
+ return false;
+}
diff --git a/lib/Support/Triple.cpp b/lib/Support/Triple.cpp
index 5a76184..61bf0a7 100644
--- a/lib/Support/Triple.cpp
+++ b/lib/Support/Triple.cpp
@@ -40,6 +40,7 @@ const char *Triple::getArchTypeName(ArchType Kind) {
case x86: return "i386";
case x86_64: return "x86_64";
case xcore: return "xcore";
+ case mblaze: return "mblaze";
}
return "<invalid>";
@@ -62,6 +63,8 @@ const char *Triple::getArchTypePrefix(ArchType Kind) {
case ppc64:
case ppc: return "ppc";
+ case mblaze: return "mblaze";
+
case sparcv9:
case sparc: return "sparc";
@@ -127,6 +130,8 @@ Triple::ArchType Triple::getArchTypeForLLVMName(StringRef Name) {
return ppc64;
if (Name == "ppc")
return ppc;
+ if (Name == "mblaze")
+ return mblaze;
if (Name == "sparc")
return sparc;
if (Name == "sparcv9")
@@ -198,6 +203,8 @@ const char *Triple::getArchNameForAssembler() {
return "ppc";
if (Str == "powerpc64")
return "ppc64";
+ if (Str == "mblaze" || Str == "microblaze")
+ return "mblaze";
if (Str == "arm")
return "arm";
if (Str == "armv4t" || Str == "thumbv4t")
@@ -234,6 +241,8 @@ void Triple::Parse() const {
Arch = ppc;
else if ((ArchName == "powerpc64") || (ArchName == "ppu"))
Arch = ppc64;
+ else if (ArchName == "mblaze")
+ Arch = mblaze;
else if (ArchName == "arm" ||
ArchName.startswith("armv") ||
ArchName == "xscale")
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index 6fe7c2c..8e537d8 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -643,6 +643,13 @@ ARMBaseInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
DebugLoc DL = DebugLoc::getUnknownLoc();
if (I != MBB.end()) DL = I->getDebugLoc();
+ // tGPR is used sometimes in ARM instructions that need to avoid using
+ // certain registers. Just treat it as GPR here.
+ if (DestRC == ARM::tGPRRegisterClass)
+ DestRC = ARM::GPRRegisterClass;
+ if (SrcRC == ARM::tGPRRegisterClass)
+ SrcRC = ARM::GPRRegisterClass;
+
if (DestRC != SrcRC) {
if (DestRC->getSize() != SrcRC->getSize())
return false;
@@ -697,6 +704,11 @@ storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectSize(FI),
Align);
+ // tGPR is used sometimes in ARM instructions that need to avoid using
+ // certain registers. Just treat it as GPR here.
+ if (RC == ARM::tGPRRegisterClass)
+ RC = ARM::GPRRegisterClass;
+
if (RC == ARM::GPRRegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::STR))
.addReg(SrcReg, getKillRegState(isKill))
@@ -745,6 +757,11 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
MFI.getObjectSize(FI),
Align);
+ // tGPR is used sometimes in ARM instructions that need to avoid using
+ // certain registers. Just treat it as GPR here.
+ if (RC == ARM::tGPRRegisterClass)
+ RC = ARM::GPRRegisterClass;
+
if (RC == ARM::GPRRegisterClass) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::LDR), DestReg)
.addFrameIndex(FI).addReg(0).addImm(0).addMemOperand(MMO));
@@ -1020,9 +1037,8 @@ ARMBaseInstrInfo::duplicate(MachineInstr *Orig, MachineFunction &MF) const {
return MI;
}
-bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
- const MachineInstr *MI1,
- const MachineRegisterInfo *MRI) const {
+bool ARMBaseInstrInfo::produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1) const {
int Opcode = MI0->getOpcode();
if (Opcode == ARM::t2LDRpci ||
Opcode == ARM::t2LDRpci_pic ||
@@ -1051,7 +1067,7 @@ bool ARMBaseInstrInfo::isIdentical(const MachineInstr *MI0,
return ACPV0->hasSameValue(ACPV1);
}
- return TargetInstrInfoImpl::isIdentical(MI0, MI1, MRI);
+ return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs);
}
/// getInstrPredicate - If instruction is predicated, returns its predicate
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 0d9d4a7..e095acc 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -289,8 +289,8 @@ public:
MachineInstr *duplicate(MachineInstr *Orig, MachineFunction &MF) const;
- virtual bool isIdentical(const MachineInstr *MI, const MachineInstr *Other,
- const MachineRegisterInfo *MRI) const;
+ virtual bool produceSameValue(const MachineInstr *MI0,
+ const MachineInstr *MI1) const;
};
static inline
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 91e3550..8044966 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -513,7 +513,7 @@ cannotEliminateFrame(const MachineFunction &MF) const {
}
/// estimateStackSize - Estimate and return the size of the frame.
-static unsigned estimateStackSize(MachineFunction &MF, MachineFrameInfo *MFI) {
+static unsigned estimateStackSize(MachineFunction &MF) {
const MachineFrameInfo *FFI = MF.getFrameInfo();
int Offset = 0;
for (int i = FFI->getObjectIndexBegin(); i != 0; ++i) {
@@ -671,8 +671,16 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
}
}
+ // If any of the stack slot references may be out of range of an immediate
+ // offset, make sure a register (or a spill slot) is available for the
+ // register scavenger. Note that if we're indexing off the frame pointer, the
+ // effective stack size is 4 bytes larger since the FP points to the stack
+ // slot of the previous FP.
+ bool BigStack = RS &&
+ estimateStackSize(MF) + (hasFP(MF) ? 4 : 0) >= estimateRSStackSizeLimit(MF);
+
bool ExtraCSSpill = false;
- if (!CanEliminateFrame || cannotEliminateFrame(MF)) {
+ if (BigStack || !CanEliminateFrame || cannotEliminateFrame(MF)) {
AFI->setHasStackFrame(true);
// If LR is not spilled, but at least one of R4, R5, R6, and R7 is spilled.
@@ -727,51 +735,43 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// callee-saved register or reserve a special spill slot to facilitate
// register scavenging. Thumb1 needs a spill slot for stack pointer
// adjustments also, even when the frame itself is small.
- if (RS && !ExtraCSSpill) {
- MachineFrameInfo *MFI = MF.getFrameInfo();
- // If any of the stack slot references may be out of range of an
- // immediate offset, make sure a register (or a spill slot) is
- // available for the register scavenger. Note that if we're indexing
- // off the frame pointer, the effective stack size is 4 bytes larger
- // since the FP points to the stack slot of the previous FP.
- if (estimateStackSize(MF, MFI) + (hasFP(MF) ? 4 : 0)
- >= estimateRSStackSizeLimit(MF)) {
- // If any non-reserved CS register isn't spilled, just spill one or two
- // extra. That should take care of it!
- unsigned NumExtras = TargetAlign / 4;
- SmallVector<unsigned, 2> Extras;
- while (NumExtras && !UnspilledCS1GPRs.empty()) {
- unsigned Reg = UnspilledCS1GPRs.back();
- UnspilledCS1GPRs.pop_back();
+ if (BigStack && !ExtraCSSpill) {
+ // If any non-reserved CS register isn't spilled, just spill one or two
+ // extra. That should take care of it!
+ unsigned NumExtras = TargetAlign / 4;
+ SmallVector<unsigned, 2> Extras;
+ while (NumExtras && !UnspilledCS1GPRs.empty()) {
+ unsigned Reg = UnspilledCS1GPRs.back();
+ UnspilledCS1GPRs.pop_back();
+ if (!isReservedReg(MF, Reg)) {
+ Extras.push_back(Reg);
+ NumExtras--;
+ }
+ }
+ // For non-Thumb1 functions, also check for hi-reg CS registers
+ if (!AFI->isThumb1OnlyFunction()) {
+ while (NumExtras && !UnspilledCS2GPRs.empty()) {
+ unsigned Reg = UnspilledCS2GPRs.back();
+ UnspilledCS2GPRs.pop_back();
if (!isReservedReg(MF, Reg)) {
Extras.push_back(Reg);
NumExtras--;
}
}
- // For non-Thumb1 functions, also check for hi-reg CS registers
- if (!AFI->isThumb1OnlyFunction()) {
- while (NumExtras && !UnspilledCS2GPRs.empty()) {
- unsigned Reg = UnspilledCS2GPRs.back();
- UnspilledCS2GPRs.pop_back();
- if (!isReservedReg(MF, Reg)) {
- Extras.push_back(Reg);
- NumExtras--;
- }
- }
- }
- if (Extras.size() && NumExtras == 0) {
- for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
- MF.getRegInfo().setPhysRegUsed(Extras[i]);
- AFI->setCSRegisterIsSpilled(Extras[i]);
- }
- } else if (!AFI->isThumb1OnlyFunction()) {
- // note: Thumb1 functions spill to R12, not the stack.
- // Reserve a slot closest to SP or frame pointer.
- const TargetRegisterClass *RC = ARM::GPRRegisterClass;
- RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
- RC->getAlignment(),
- false));
+ }
+ if (Extras.size() && NumExtras == 0) {
+ for (unsigned i = 0, e = Extras.size(); i != e; ++i) {
+ MF.getRegInfo().setPhysRegUsed(Extras[i]);
+ AFI->setCSRegisterIsSpilled(Extras[i]);
}
+ } else if (!AFI->isThumb1OnlyFunction()) {
+ // note: Thumb1 functions spill to R12, not the stack. Reserve a slot
+ // closest to SP or frame pointer.
+ const TargetRegisterClass *RC = ARM::GPRRegisterClass;
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ RS->setScavengingFrameIndex(MFI->CreateStackObject(RC->getSize(),
+ RC->getAlignment(),
+ false));
}
}
}
@@ -1085,6 +1085,15 @@ hasReservedCallFrame(MachineFunction &MF) const {
return !MF.getFrameInfo()->hasVarSizedObjects();
}
+// canSimplifyCallFramePseudos - If there is a reserved call frame, the
+// call frame pseudos can be simplified. Unlike most targets, having a FP
+// is not sufficient here since we still may reference some objects via SP
+// even when FP is available in Thumb2 mode.
+bool ARMBaseRegisterInfo::
+canSimplifyCallFramePseudos(MachineFunction &MF) const {
+ return hasReservedCallFrame(MF) || MF.getFrameInfo()->hasVarSizedObjects();
+}
+
static void
emitSPUpdate(bool isARM,
MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
@@ -1119,13 +1128,14 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
- "This eliminateCallFramePseudoInstr does not suppor Thumb1!");
+ "This eliminateCallFramePseudoInstr does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
// Replace the pseudo instruction with a new instruction...
unsigned Opc = Old->getOpcode();
- ARMCC::CondCodes Pred = (ARMCC::CondCodes)Old->getOperand(1).getImm();
- // FIXME: Thumb2 version of ADJCALLSTACKUP and ADJCALLSTACKDOWN?
+ int PIdx = Old->findFirstPredOperandIdx();
+ ARMCC::CondCodes Pred = (PIdx == -1)
+ ? ARMCC::AL : (ARMCC::CondCodes)Old->getOperand(PIdx).getImm();
if (Opc == ARM::ADJCALLSTACKDOWN || Opc == ARM::tADJCALLSTACKDOWN) {
// Note: PredReg is operand 2 for ADJCALLSTACKDOWN.
unsigned PredReg = Old->getOperand(2).getReg();
@@ -1149,7 +1159,6 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MachineInstr &MI = *II;
MachineBasicBlock &MBB = *MI.getParent();
MachineFunction &MF = *MBB.getParent();
- const MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
"This eliminateFrameIndex does not support Thumb1!");
@@ -1160,12 +1169,12 @@ ARMBaseRegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
}
int FrameIndex = MI.getOperand(i).getIndex();
- int Offset = MFI->getObjectOffset(FrameIndex) + MFI->getStackSize() + SPAdj;
unsigned FrameReg;
- Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
+ int Offset = getFrameIndexReference(MF, FrameIndex, FrameReg);
if (FrameReg != ARM::SP)
SPAdj = 0;
+ Offset += SPAdj;
// Modify MI as necessary to handle as much of 'Offset' as possible
bool Done = false;
@@ -1256,7 +1265,7 @@ emitPrologue(MachineFunction &MF) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
- "This emitPrologue does not suppor Thumb1!");
+ "This emitPrologue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
unsigned NumBytes = MFI->getStackSize();
@@ -1417,7 +1426,7 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
MachineFrameInfo *MFI = MF.getFrameInfo();
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
assert(!AFI->isThumb1OnlyFunction() &&
- "This emitEpilogue does not suppor Thumb1!");
+ "This emitEpilogue does not support Thumb1!");
bool isARM = !AFI->isThumbFunction();
unsigned VARegSaveSize = AFI->getVarArgsRegSaveSize();
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.h b/lib/Target/ARM/ARMBaseRegisterInfo.h
index 33ba21d..64f6ff1 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.h
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.h
@@ -138,6 +138,7 @@ public:
virtual bool requiresFrameIndexScavenging(const MachineFunction &MF) const;
virtual bool hasReservedCallFrame(MachineFunction &MF) const;
+ virtual bool canSimplifyCallFramePseudos(MachineFunction &MF) const;
virtual void eliminateCallFramePseudoInstr(MachineFunction &MF,
MachineBasicBlock &MBB,
diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp
index df4ae70..013e00a 100644
--- a/lib/Target/ARM/ARMISelDAGToDAG.cpp
+++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp
@@ -58,8 +58,6 @@ public:
return "ARM Instruction Selection";
}
- virtual void InstructionSelect();
-
/// getI32Imm - Return a target constant of type i32 with the specified
/// value.
inline SDValue getI32Imm(unsigned Imm) {
@@ -203,11 +201,6 @@ static bool isOpcWithIntImmediate(SDNode *N, unsigned Opc, unsigned& Imm) {
}
-void ARMDAGToDAGISel::InstructionSelect() {
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
bool ARMDAGToDAGISel::SelectShifterOperandReg(SDNode *Op,
SDValue N,
SDValue &BaseReg,
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index adf1644..6a2c6bb 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -294,6 +294,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setTargetDAGCombine(ISD::SIGN_EXTEND);
setTargetDAGCombine(ISD::ZERO_EXTEND);
setTargetDAGCombine(ISD::ANY_EXTEND);
+ setTargetDAGCombine(ISD::SELECT_CC);
}
computeRegisterProperties();
@@ -544,6 +545,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::VZIP: return "ARMISD::VZIP";
case ARMISD::VUZP: return "ARMISD::VUZP";
case ARMISD::VTRN: return "ARMISD::VTRN";
+ case ARMISD::FMAX: return "ARMISD::FMAX";
+ case ARMISD::FMIN: return "ARMISD::FMIN";
}
}
@@ -921,7 +924,7 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
// These operations are automatically eliminated by the prolog/epilog pass
Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
- SDValue StackPtr = DAG.getRegister(ARM::SP, MVT::i32);
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
RegsToPassVector RegsToPass;
SmallVector<SDValue, 8> MemOpChains;
@@ -970,8 +973,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
VA, ArgLocs[++i], StackPtr, MemOpChains, Flags);
} else {
assert(VA.isMemLoc());
- if (StackPtr.getNode() == 0)
- StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Op1,
dl, DAG, VA, Flags));
@@ -984,8 +985,6 @@ ARMTargetLowering::LowerCall(SDValue Chain, SDValue Callee,
RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
} else {
assert(VA.isMemLoc());
- if (StackPtr.getNode() == 0)
- StackPtr = DAG.getCopyFromReg(Chain, dl, ARM::SP, getPointerTy());
MemOpChains.push_back(LowerMemOpCallTo(Chain, StackPtr, Arg,
dl, DAG, VA, Flags));
@@ -1283,8 +1282,7 @@ ARMTargetLowering::LowerToTLSGeneralDynamicModel(GlobalAddressSDNode *GA,
LowerCallTo(Chain, (const Type *) Type::getInt32Ty(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, false, /*isReturnValueUsed=*/true,
- DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl,
- DAG.GetOrdering(Chain.getNode()));
+ DAG.getExternalSymbol("__tls_get_addr", PtrVT), Args, DAG, dl);
return CallResult.first;
}
@@ -3856,23 +3854,106 @@ static SDValue PerformExtendCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// PerformSELECT_CCCombine - Target-specific DAG combining for ISD::SELECT_CC
+/// to match f32 max/min patterns to use NEON vmax/vmin instructions.
+static SDValue PerformSELECT_CCCombine(SDNode *N, SelectionDAG &DAG,
+ const ARMSubtarget *ST) {
+ // If the target supports NEON, try to use vmax/vmin instructions for f32
+ // selects like "x < y ? x : y". Unless the FiniteOnlyFPMath option is set,
+ // be careful about NaNs: NEON's vmax/vmin return NaN if either operand is
+ // a NaN; only do the transformation when it matches that behavior.
+
+ // For now only do this when using NEON for FP operations; if using VFP, it
+ // is not obvious that the benefit outweighs the cost of switching to the
+ // NEON pipeline.
+ if (!ST->hasNEON() || !ST->useNEONForSinglePrecisionFP() ||
+ N->getValueType(0) != MVT::f32)
+ return SDValue();
+
+ SDValue CondLHS = N->getOperand(0);
+ SDValue CondRHS = N->getOperand(1);
+ SDValue LHS = N->getOperand(2);
+ SDValue RHS = N->getOperand(3);
+ ISD::CondCode CC = cast<CondCodeSDNode>(N->getOperand(4))->get();
+
+ unsigned Opcode = 0;
+ bool IsReversed;
+ if (DAG.isEqualTo(LHS, CondLHS) && DAG.isEqualTo(RHS, CondRHS)) {
+ IsReversed = false; // x CC y ? x : y
+ } else if (DAG.isEqualTo(LHS, CondRHS) && DAG.isEqualTo(RHS, CondLHS)) {
+ IsReversed = true ; // x CC y ? y : x
+ } else {
+ return SDValue();
+ }
+
+ bool IsUnordered;
+ switch (CC) {
+ default: break;
+ case ISD::SETOLT:
+ case ISD::SETOLE:
+ case ISD::SETLT:
+ case ISD::SETLE:
+ case ISD::SETULT:
+ case ISD::SETULE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmin(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETULT || CC == ISD::SETULE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For less-than-or-equal comparisons, "+0 <= -0" will be true but vmin
+ // will return -0, so vmin can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETLE || CC == ISD::SETOLE || CC == ISD::SETULE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMAX : ARMISD::FMIN;
+ break;
+
+ case ISD::SETOGT:
+ case ISD::SETOGE:
+ case ISD::SETGT:
+ case ISD::SETGE:
+ case ISD::SETUGT:
+ case ISD::SETUGE:
+ // If LHS is NaN, an ordered comparison will be false and the result will
+ // be the RHS, but vmax(NaN, RHS) = NaN. Avoid this by checking that LHS
+ // != NaN. Likewise, for unordered comparisons, check for RHS != NaN.
+ IsUnordered = (CC == ISD::SETUGT || CC == ISD::SETUGE);
+ if (!DAG.isKnownNeverNaN(IsUnordered ? RHS : LHS))
+ break;
+ // For greater-than-or-equal comparisons, "-0 >= +0" will be true but vmax
+ // will return +0, so vmax can only be used for unsafe math or if one of
+ // the operands is known to be nonzero.
+ if ((CC == ISD::SETGE || CC == ISD::SETOGE || CC == ISD::SETUGE) &&
+ !UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
+ break;
+ Opcode = IsReversed ? ARMISD::FMIN : ARMISD::FMAX;
+ break;
+ }
+
+ if (!Opcode)
+ return SDValue();
+ return DAG.getNode(Opcode, N->getDebugLoc(), N->getValueType(0), LHS, RHS);
+}
+
SDValue ARMTargetLowering::PerformDAGCombine(SDNode *N,
DAGCombinerInfo &DCI) const {
switch (N->getOpcode()) {
default: break;
- case ISD::ADD: return PerformADDCombine(N, DCI);
- case ISD::SUB: return PerformSUBCombine(N, DCI);
+ case ISD::ADD: return PerformADDCombine(N, DCI);
+ case ISD::SUB: return PerformSUBCombine(N, DCI);
case ARMISD::VMOVRRD: return PerformVMOVRRDCombine(N, DCI);
- case ISD::INTRINSIC_WO_CHAIN:
- return PerformIntrinsicCombine(N, DCI.DAG);
+ case ISD::INTRINSIC_WO_CHAIN: return PerformIntrinsicCombine(N, DCI.DAG);
case ISD::SHL:
case ISD::SRA:
- case ISD::SRL:
- return PerformShiftCombine(N, DCI.DAG, Subtarget);
+ case ISD::SRL: return PerformShiftCombine(N, DCI.DAG, Subtarget);
case ISD::SIGN_EXTEND:
case ISD::ZERO_EXTEND:
- case ISD::ANY_EXTEND:
- return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::ANY_EXTEND: return PerformExtendCombine(N, DCI.DAG, Subtarget);
+ case ISD::SELECT_CC: return PerformSELECT_CCCombine(N, DCI.DAG, Subtarget);
}
return SDValue();
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 3c5df45..f8f8adc 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -131,7 +131,11 @@ namespace llvm {
VREV16, // reverse elements within 16-bit halfwords
VZIP, // zip (interleave)
VUZP, // unzip (deinterleave)
- VTRN // transpose
+ VTRN, // transpose
+
+ // Floating-point max and min:
+ FMAX,
+ FMIN
};
}
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index db60458..76595fa 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -708,6 +708,20 @@ class AI3ldsbpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{24} = 1; // P bit
let Inst{27-25} = 0b000;
}
+class AI3lddpr<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-25} = 0b000;
+}
+
// Pre-indexed stores
class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -723,6 +737,19 @@ class AI3sthpr<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{24} = 1; // P bit
let Inst{27-25} = 0b000;
}
+class AI3stdpr<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePre, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 1; // W bit
+ let Inst{24} = 1; // P bit
+ let Inst{27-25} = 0b000;
+}
// Post-indexed loads
class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin,
@@ -734,7 +761,7 @@ class AI3ldhpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{6} = 0; // S bit
let Inst{7} = 1;
let Inst{20} = 1; // L bit
- let Inst{21} = 1; // W bit
+ let Inst{21} = 0; // W bit
let Inst{24} = 0; // P bit
let Inst{27-25} = 0b000;
}
@@ -747,7 +774,7 @@ class AI3ldshpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{6} = 1; // S bit
let Inst{7} = 1;
let Inst{20} = 1; // L bit
- let Inst{21} = 1; // W bit
+ let Inst{21} = 0; // W bit
let Inst{24} = 0; // P bit
let Inst{27-25} = 0b000;
}
@@ -760,7 +787,20 @@ class AI3ldsbpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{6} = 1; // S bit
let Inst{7} = 1;
let Inst{20} = 1; // L bit
- let Inst{21} = 1; // W bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-25} = 0b000;
+}
+class AI3lddpo<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 0; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
let Inst{24} = 0; // P bit
let Inst{27-25} = 0b000;
}
@@ -775,7 +815,20 @@ class AI3sthpo<dag oops, dag iops, Format f, InstrItinClass itin,
let Inst{6} = 0; // S bit
let Inst{7} = 1;
let Inst{20} = 0; // L bit
- let Inst{21} = 1; // W bit
+ let Inst{21} = 0; // W bit
+ let Inst{24} = 0; // P bit
+ let Inst{27-25} = 0b000;
+}
+class AI3stdpo<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr, list<dag> pattern>
+ : I<oops, iops, AddrMode3, Size4Bytes, IndexModePost, f, itin,
+ opc, asm, cstr, pattern> {
+ let Inst{4} = 1;
+ let Inst{5} = 1; // H bit
+ let Inst{6} = 1; // S bit
+ let Inst{7} = 1;
+ let Inst{20} = 0; // L bit
+ let Inst{21} = 0; // W bit
let Inst{24} = 0; // P bit
let Inst{27-25} = 0b000;
}
@@ -1150,6 +1203,19 @@ class T2Iidxldst<bit signed, bits<2> opcod, bit load, bit pre,
let Inst{8} = 1; // The W bit.
}
+// Helper class for disassembly only
+// A6.3.16 & A6.3.17
+// T2Imac - Thumb2 multiply [accumulate, and absolute difference] instructions.
+class T2I_mac<bit long, bits<3> op22_20, bits<4> op7_4, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
+ : T2I<oops, iops, itin, opc, asm, pattern> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b011;
+ let Inst{23} = long;
+ let Inst{22-20} = op22_20;
+ let Inst{7-4} = op7_4;
+}
+
// Tv5Pat - Same as Pat<>, but requires V5T Thumb mode.
class Tv5Pat<dag pattern, dag result> : Pat<pattern, result> {
list<Predicate> Predicates = [IsThumb1Only, HasV5T];
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 1c6f78a..87c6f6e 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -130,8 +130,6 @@ def IsThumb2 : Predicate<"Subtarget->isThumb2()">;
def IsARM : Predicate<"!Subtarget->isThumb()">;
def IsDarwin : Predicate<"Subtarget->isTargetDarwin()">;
def IsNotDarwin : Predicate<"!Subtarget->isTargetDarwin()">;
-def CarryDefIsUnused : Predicate<"!N->hasAnyUseOfValue(1)">;
-def CarryDefIsUsed : Predicate<"N->hasAnyUseOfValue(1)">;
// FIXME: Eventually this will be just "hasV6T2Ops".
def UseMovt : Predicate<"Subtarget->useMovt()">;
@@ -176,7 +174,7 @@ def imm16_31 : PatLeaf<(i32 imm), [{
return (int32_t)N->getZExtValue() >= 16 && (int32_t)N->getZExtValue() < 32;
}]>;
-def so_imm_neg :
+def so_imm_neg :
PatLeaf<(imm), [{
return ARM_AM::getSOImmVal(-(int)N->getZExtValue()) != -1;
}], so_imm_neg_XFORM>;
@@ -194,7 +192,7 @@ def sext_16_node : PatLeaf<(i32 GPR:$a), [{
/// bf_inv_mask_imm predicate - An AND mask to clear an arbitrary width bitfield
/// e.g., 0xf000ffff
def bf_inv_mask_imm : Operand<i32>,
- PatLeaf<(imm), [{
+ PatLeaf<(imm), [{
uint32_t v = (uint32_t)N->getZExtValue();
if (v == 0xffffffff)
return 0;
@@ -227,7 +225,7 @@ def lo16AllZero : PatLeaf<(i32 imm), [{
return (((uint32_t)N->getZExtValue()) & 0xFFFFUL) == 0;
}], hi16>;
-/// imm0_65535 predicate - True if the 32-bit immediate is in the range
+/// imm0_65535 predicate - True if the 32-bit immediate is in the range
/// [0.65535].
def imm0_65535 : PatLeaf<(i32 imm), [{
return (uint32_t)N->getZExtValue() < 65536;
@@ -236,6 +234,21 @@ def imm0_65535 : PatLeaf<(i32 imm), [{
class BinOpFrag<dag res> : PatFrag<(ops node:$LHS, node:$RHS), res>;
class UnOpFrag <dag res> : PatFrag<(ops node:$Src), res>;
+/// adde and sube predicates - True based on whether the carry flag output
+/// will be needed or not.
+def adde_dead_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+ [{return !N->hasAnyUseOfValue(1);}]>;
+def sube_dead_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+ [{return !N->hasAnyUseOfValue(1);}]>;
+def adde_live_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (adde node:$LHS, node:$RHS),
+ [{return N->hasAnyUseOfValue(1);}]>;
+def sube_live_carry :
+ PatFrag<(ops node:$LHS, node:$RHS), (sube node:$LHS, node:$RHS),
+ [{return N->hasAnyUseOfValue(1);}]>;
+
//===----------------------------------------------------------------------===//
// Operand Definitions.
//
@@ -501,6 +514,22 @@ multiclass AI_unary_rrot<bits<8> opcod, string opc, PatFrag opnode> {
}
}
+multiclass AI_unary_rrot_np<bits<8> opcod, string opc> {
+ def r : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src),
+ IIC_iUNAr, opc, "\t$dst, $src",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{11-10} = 0b00;
+ let Inst{19-16} = 0b1111;
+ }
+ def r_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$src, i32imm:$rot),
+ IIC_iUNAsi, opc, "\t$dst, $src, ror $rot",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{19-16} = 0b1111;
+ }
+}
+
/// AI_bin_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
@@ -510,13 +539,29 @@ multiclass AI_bin_rrot<bits<8> opcod, string opc, PatFrag opnode> {
Requires<[IsARM, HasV6]> {
let Inst{11-10} = 0b00;
}
- def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
+ i32imm:$rot),
IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
[(set GPR:$dst, (opnode GPR:$LHS,
(rotr GPR:$RHS, rot_imm:$rot)))]>,
Requires<[IsARM, HasV6]>;
}
+// For disassembly only.
+multiclass AI_bin_rrot_np<bits<8> opcod, string opc> {
+ def rr : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS),
+ IIC_iALUr, opc, "\t$dst, $LHS, $RHS",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{11-10} = 0b00;
+ }
+ def rr_rot : AExtI<opcod, (outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS,
+ i32imm:$rot),
+ IIC_iALUsi, opc, "\t$dst, $LHS, $RHS, ror $rot",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]>;
+}
+
/// AI1_adde_sube_irs - Define instructions and patterns for adde and sube.
let Uses = [CPSR] in {
multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
@@ -524,13 +569,13 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def ri : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
DPFrm, IIC_iALUi, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ Requires<[IsARM]> {
let Inst{25} = 1;
}
def rr : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
DPFrm, IIC_iALUr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ Requires<[IsARM]> {
let isCommutable = Commutable;
let Inst{11-4} = 0b00000000;
let Inst{25} = 0;
@@ -538,7 +583,7 @@ multiclass AI1_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
def rs : AsI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, opc, "\t$dst, $a, $b",
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ Requires<[IsARM]> {
let Inst{25} = 0;
}
}
@@ -549,16 +594,14 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
def Sri : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
DPFrm, IIC_iALUi, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, so_imm:$b))]>,
- Requires<[IsARM, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ Requires<[IsARM]> {
let Inst{20} = 1;
let Inst{25} = 1;
}
def Srr : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
DPFrm, IIC_iALUr, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, GPR:$b))]>,
- Requires<[IsARM, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ Requires<[IsARM]> {
let Inst{11-4} = 0b00000000;
let Inst{20} = 1;
let Inst{25} = 0;
@@ -566,8 +609,7 @@ multiclass AI1_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
def Srs : AXI1<opcod, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, !strconcat(opc, "\t$dst, $a, $b"),
[(set GPR:$dst, (opnode GPR:$a, so_reg:$b))]>,
- Requires<[IsARM, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ Requires<[IsARM]> {
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -593,13 +635,16 @@ PseudoInst<(outs), (ins cpinst_operand:$instid, cpinst_operand:$cpidx,
i32imm:$size), NoItinerary,
"${instid:label} ${cpidx:cpentry}", []>;
-let Defs = [SP], Uses = [SP] in {
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def ADJCALLSTACKUP :
PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2, pred:$p), NoItinerary,
"@ ADJCALLSTACKUP $amt1",
[(ARMcallseq_end timm:$amt1, timm:$amt2)]>;
-def ADJCALLSTACKDOWN :
+def ADJCALLSTACKDOWN :
PseudoInst<(outs), (ins i32imm:$amt, pred:$p), NoItinerary,
"@ ADJCALLSTACKDOWN $amt",
[(ARMcallseq_start timm:$amt)]>;
@@ -633,6 +678,14 @@ def WFI : AI<(outs), (ins), MiscFrm, NoItinerary, "wfi", "",
let Inst{7-0} = 0b00000011;
}
+def SEL : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, NoItinerary, "sel",
+ "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{27-20} = 0b01101000;
+ let Inst{7-4} = 0b1011;
+}
+
def SEV : AI<(outs), (ins), MiscFrm, NoItinerary, "sev", "",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM, HasV6T2]> {
@@ -664,6 +717,34 @@ def CPS : AXI<(outs),(ins i32imm:$opt), MiscFrm, NoItinerary, "cps${opt:cps}",
let Inst{5} = 0;
}
+// Preload signals the memory system of possible future data/instruction access.
+// These are for disassembly only.
+multiclass APreLoad<bit data, bit read, string opc> {
+
+ def i : AXI<(outs), (ins GPR:$base, i32imm:$imm), MiscFrm, NoItinerary,
+ !strconcat(opc, "\t[$base, $imm]"), []> {
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 0; // 0 for immediate form
+ let Inst{24} = data;
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ }
+
+ def r : AXI<(outs), (ins addrmode2:$addr), MiscFrm, NoItinerary,
+ !strconcat(opc, "\t$addr"), []> {
+ let Inst{31-26} = 0b111101;
+ let Inst{25} = 1; // 1 for register form
+ let Inst{24} = data;
+ let Inst{22} = read;
+ let Inst{21-20} = 0b01;
+ let Inst{4} = 0;
+ }
+}
+
+defm PLD : APreLoad<1, 1, "pld">;
+defm PLDW : APreLoad<1, 0, "pldw">;
+defm PLI : APreLoad<0, 1, "pli">;
+
def SETENDBE : AXI<(outs),(ins), MiscFrm, NoItinerary, "setend\tbe",
[/* For disassembly only; pattern left blank */]>,
Requires<[IsARM]> {
@@ -761,7 +842,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
"(${label}_${id}-(",
"${:private}PCRELL${:uid}+8))\n"),
!strconcat("${:private}PCRELL${:uid}:\n\t",
- "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
+ "add$p\t$dst, pc, #${:private}PCRELV${:uid}")),
[]> {
let Inst{25} = 1;
}
@@ -771,7 +852,7 @@ def LEApcrelJT : AXI1<0x0, (outs GPR:$dst),
//
let isReturn = 1, isTerminator = 1, isBarrier = 1 in
- def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
+ def BX_RET : AI<(outs), (ins), BrMiscFrm, IIC_Br,
"bx", "\tlr", [(ARMretflag)]> {
let Inst{3-0} = 0b1110;
let Inst{7-4} = 0b0001;
@@ -828,9 +909,10 @@ let isCall = 1,
}
// ARMv4T
- def BX : ABXIx2<(outs), (ins GPR:$func, variable_ops),
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BX : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
IIC_Br, "mov\tlr, pc\n\tbx\t$func",
- [(ARMcall_nolink GPR:$func)]>,
+ [(ARMcall_nolink tGPR:$func)]>,
Requires<[IsARM, IsNotDarwin]> {
let Inst{7-4} = 0b0001;
let Inst{19-8} = 0b111111111111;
@@ -865,9 +947,10 @@ let isCall = 1,
}
// ARMv4T
- def BXr9 : ABXIx2<(outs), (ins GPR:$func, variable_ops),
+ // Note: Restrict $func to the tGPR regclass to prevent it being in LR.
+ def BXr9 : ABXIx2<(outs), (ins tGPR:$func, variable_ops),
IIC_Br, "mov\tlr, pc\n\tbx\t$func",
- [(ARMcall_nolink GPR:$func)]>, Requires<[IsARM, IsDarwin]> {
+ [(ARMcall_nolink tGPR:$func)]>, Requires<[IsARM, IsDarwin]> {
let Inst{7-4} = 0b0001;
let Inst{19-8} = 0b111111111111;
let Inst{27-20} = 0b00010010;
@@ -917,7 +1000,7 @@ let isBranch = 1, isTerminator = 1 in {
} // isBarrier = 1
// FIXME: should be able to write a pattern for ARMBrcond, but can't use
- // a two-value operand where a dag node expects two operands. :(
+ // a two-value operand where a dag node expects two operands. :(
def Bcc : ABI<0b1010, (outs), (ins brtarget:$target),
IIC_Br, "b", "\t$target",
[/*(ARMbrcond bb:$target, imm:$cc, CCR:$ccr)*/]>;
@@ -931,25 +1014,61 @@ def BXJ : ABI<0b0001, (outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
let Inst{7-4} = 0b0010;
}
-// Supervisor call (software interrupt) -- for disassembly only
+// Secure Monitor Call is a system instruction -- for disassembly only
+def SMC : ABI<0b0001, (outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0110;
+ let Inst{7-4} = 0b0111;
+}
+
+// Supervisor Call (Software Interrupt) -- for disassembly only
let isCall = 1 in {
def SVC : ABI<0b1111, (outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc",
[/* For disassembly only; pattern left blank */]>;
}
+// Store Return State is a system instruction -- for disassembly only
+def SRSW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode),
+ NoItinerary, "srs${addr:submode}\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b110; // W = 1
+}
+
+def SRS : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, i32imm:$mode),
+ NoItinerary, "srs${addr:submode}\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b100; // W = 0
+}
+
+// Return From Exception is a system instruction -- for disassembly only
+def RFEW : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base),
+ NoItinerary, "rfe${addr:submode}\t$base!",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b011; // W = 1
+}
+
+def RFE : ABXI<{1,0,0,?}, (outs), (ins addrmode4:$addr, GPR:$base),
+ NoItinerary, "rfe${addr:submode}\t$base",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-28} = 0b1111;
+ let Inst{22-20} = 0b001; // W = 0
+}
+
//===----------------------------------------------------------------------===//
// Load / store Instructions.
//
// Load
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def LDR : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
"ldr", "\t$dst, $addr",
[(set GPR:$dst, (load addrmode2:$addr))]>;
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
def LDRcp : AI2ldw<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm, IIC_iLoadr,
"ldr", "\t$dst, $addr", []>;
@@ -958,7 +1077,7 @@ def LDRH : AI3ldh<(outs GPR:$dst), (ins addrmode3:$addr), LdMiscFrm,
IIC_iLoadr, "ldrh", "\t$dst, $addr",
[(set GPR:$dst, (zextloadi16 addrmode3:$addr))]>;
-def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
+def LDRB : AI2ldb<(outs GPR:$dst), (ins addrmode2:$addr), LdFrm,
IIC_iLoadr, "ldrb", "\t$dst, $addr",
[(set GPR:$dst, (zextloadi8 addrmode2:$addr))]>;
@@ -1017,9 +1136,22 @@ def LDRSB_PRE : AI3ldsbpr<(outs GPR:$dst, GPR:$base_wb),
def LDRSB_POST: AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
"ldrsb", "\t$dst, [$base], $offset", "$base = $base_wb", []>;
+
+// For disassembly only
+def LDRD_PRE : AI3lddpr<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
+ (ins addrmode3:$addr), LdMiscFrm, IIC_iLoadr,
+ "ldrd", "\t$dst1, $dst2, $addr!", "$addr.base = $base_wb", []>,
+ Requires<[IsARM, HasV5TE]>;
+
+// For disassembly only
+def LDRD_POST : AI3lddpo<(outs GPR:$dst1, GPR:$dst2, GPR:$base_wb),
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadr,
+ "ldrd", "\t$dst1, $dst2, [$base], $offset", "$base = $base_wb", []>,
+ Requires<[IsARM, HasV5TE]>;
+
}
-// LDRT and LDRBT are for disassembly only.
+// LDRT, LDRBT, LDRSBT, LDRHT, LDRSHT are for disassembly only.
def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
(ins GPR:$base, am2offset:$offset), LdFrm, IIC_iLoadru,
@@ -1028,8 +1160,26 @@ def LDRT : AI2ldwpo<(outs GPR:$dst, GPR:$base_wb),
}
def LDRBT : AI2ldbpo<(outs GPR:$dst, GPR:$base_wb),
- (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
- "ldrb", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ (ins GPR:$base,am2offset:$offset), LdFrm, IIC_iLoadru,
+ "ldrbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+
+def LDRSBT : AI3ldsbpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am2offset:$offset), LdMiscFrm, IIC_iLoadru,
+ "ldrsbt", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+
+def LDRHT : AI3ldhpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base, am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+ "ldrht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
+ let Inst{21} = 1; // overwrite
+}
+
+def LDRSHT : AI3ldshpo<(outs GPR:$dst, GPR:$base_wb),
+ (ins GPR:$base,am3offset:$offset), LdMiscFrm, IIC_iLoadru,
+ "ldrsht", "\t$dst, [$base], $offset", "$base = $base_wb", []> {
let Inst{21} = 1; // overwrite
}
@@ -1039,8 +1189,8 @@ def STR : AI2stw<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
[(store GPR:$src, addrmode2:$addr)]>;
// Stores with truncate
-def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm, IIC_iStorer,
- "strh", "\t$src, $addr",
+def STRH : AI3sth<(outs), (ins GPR:$src, addrmode3:$addr), StMiscFrm,
+ IIC_iStorer, "strh", "\t$src, $addr",
[(truncstorei16 GPR:$src, addrmode3:$addr)]>;
def STRB : AI2stb<(outs), (ins GPR:$src, addrmode2:$addr), StFrm, IIC_iStorer,
@@ -1055,51 +1205,65 @@ def STRD : AI3std<(outs), (ins GPR:$src1, GPR:$src2, addrmode3:$addr),
// Indexed stores
def STR_PRE : AI2stwpr<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base, am2offset:$offset),
+ (ins GPR:$src, GPR:$base, am2offset:$offset),
StFrm, IIC_iStoreru,
"str", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb,
(pre_store GPR:$src, GPR:$base, am2offset:$offset))]>;
def STR_POST : AI2stwpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
+ (ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
"str", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb,
(post_store GPR:$src, GPR:$base, am2offset:$offset))]>;
def STRH_PRE : AI3sthpr<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am3offset:$offset),
+ (ins GPR:$src, GPR:$base,am3offset:$offset),
StMiscFrm, IIC_iStoreru,
"strh", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb,
(pre_truncsti16 GPR:$src, GPR:$base,am3offset:$offset))]>;
def STRH_POST: AI3sthpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am3offset:$offset),
+ (ins GPR:$src, GPR:$base,am3offset:$offset),
StMiscFrm, IIC_iStoreru,
"strh", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb, (post_truncsti16 GPR:$src,
GPR:$base, am3offset:$offset))]>;
def STRB_PRE : AI2stbpr<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
+ (ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
"strb", "\t$src, [$base, $offset]!", "$base = $base_wb",
[(set GPR:$base_wb, (pre_truncsti8 GPR:$src,
GPR:$base, am2offset:$offset))]>;
def STRB_POST: AI2stbpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
+ (ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
"strb", "\t$src, [$base], $offset", "$base = $base_wb",
[(set GPR:$base_wb, (post_truncsti8 GPR:$src,
GPR:$base, am2offset:$offset))]>;
-// STRT and STRBT are for disassembly only.
+// For disassembly only
+def STRD_PRE : AI3stdpr<(outs GPR:$base_wb),
+ (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+ StMiscFrm, IIC_iStoreru,
+ "strd", "\t$src1, $src2, [$base, $offset]!",
+ "$base = $base_wb", []>;
+
+// For disassembly only
+def STRD_POST: AI3stdpo<(outs GPR:$base_wb),
+ (ins GPR:$src1, GPR:$src2, GPR:$base, am3offset:$offset),
+ StMiscFrm, IIC_iStoreru,
+ "strd", "\t$src1, $src2, [$base], $offset",
+ "$base = $base_wb", []>;
+
+// STRT, STRBT, and STRHT are for disassembly only.
def STRT : AI2stwpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
+ (ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
"strt", "\t$src, [$base], $offset", "$base = $base_wb",
[/* For disassembly only; pattern left blank */]> {
@@ -1107,13 +1271,21 @@ def STRT : AI2stwpo<(outs GPR:$base_wb),
}
def STRBT : AI2stbpo<(outs GPR:$base_wb),
- (ins GPR:$src, GPR:$base,am2offset:$offset),
+ (ins GPR:$src, GPR:$base,am2offset:$offset),
StFrm, IIC_iStoreru,
"strbt", "\t$src, [$base], $offset", "$base = $base_wb",
[/* For disassembly only; pattern left blank */]> {
let Inst{21} = 1; // overwrite
}
+def STRHT: AI3sthpo<(outs GPR:$base_wb),
+ (ins GPR:$src, GPR:$base,am3offset:$offset),
+ StMiscFrm, IIC_iStoreru,
+ "strht", "\t$src, [$base], $offset", "$base = $base_wb",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{21} = 1; // overwrite
+}
+
//===----------------------------------------------------------------------===//
// Load / store multiple Instructions.
//
@@ -1141,7 +1313,7 @@ def MOVr : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
let Inst{25} = 0;
}
-def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
+def MOVs : AsI1<0b1101, (outs GPR:$dst), (ins so_reg:$src),
DPSoRegFrm, IIC_iMOVsr,
"mov", "\t$dst, $src", [(set GPR:$dst, so_reg:$src)]>, UnaryDP {
let Inst{25} = 0;
@@ -1154,7 +1326,7 @@ def MOVi : AsI1<0b1101, (outs GPR:$dst), (ins so_imm:$src), DPFrm, IIC_iMOVi,
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
+def MOVi16 : AI1<0b1000, (outs GPR:$dst), (ins i32imm:$src),
DPFrm, IIC_iMOVi,
"movw", "\t$dst, $src",
[(set GPR:$dst, imm0_65535:$src)]>,
@@ -1168,7 +1340,7 @@ def MOVTi16 : AI1<0b1010, (outs GPR:$dst), (ins GPR:$src, i32imm:$imm),
DPFrm, IIC_iMOVi,
"movt", "\t$dst, $imm",
[(set GPR:$dst,
- (or (and GPR:$src, 0xffff),
+ (or (and GPR:$src, 0xffff),
lo16AllZero:$imm))]>, UnaryDP,
Requires<[IsARM, HasV6T2]> {
let Inst{20} = 0;
@@ -1187,7 +1359,7 @@ def MOVrx : AsI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo, IIC_iMOVsi,
// due to flag operands.
let Defs = [CPSR] in {
-def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
+def MOVsrl_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
IIC_iMOVsi, "movs", "\t$dst, $src, lsr #1",
[(set GPR:$dst, (ARMsrl_flag GPR:$src))]>, UnaryDP;
def MOVsra_flag : AI1<0b1101, (outs GPR:$dst), (ins GPR:$src), Pseudo,
@@ -1211,7 +1383,11 @@ defm SXTAB : AI_bin_rrot<0b01101010,
defm SXTAH : AI_bin_rrot<0b01101011,
"sxtah", BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
-// TODO: SXT(A){B|H}16
+// For disassembly only
+defm SXTB16 : AI_unary_rrot_np<0b01101000, "sxtb16">;
+
+// For disassembly only
+defm SXTAB16 : AI_bin_rrot_np<0b01101000, "sxtab16">;
// Zero extenders
@@ -1235,9 +1411,9 @@ defm UXTAH : AI_bin_rrot<0b01101111, "uxtah",
}
// This isn't safe in general, the add is two 16-bit units, not a 32-bit add.
-//defm UXTAB16 : xxx<"uxtab16", 0xff00ff>;
+// For disassembly only
+defm UXTAB16 : AI_bin_rrot_np<0b01101100, "uxtab16">;
-// TODO: UXT(A){B|H}16
def SBFX : I<(outs GPR:$dst),
(ins GPR:$src, imm0_31:$lsb, imm0_31:$width),
@@ -1273,13 +1449,13 @@ defm SUBS : AI1_bin_s_irs<0b0010, "subs",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
defm ADC : AI1_adde_sube_irs<0b0101, "adc",
- BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+ BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
defm SBC : AI1_adde_sube_irs<0b0110, "sbc",
- BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+ BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
defm ADCS : AI1_adde_sube_s_irs<0b0101, "adcs",
- BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+ BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
defm SBCS : AI1_adde_sube_s_irs<0b0110, "sbcs",
- BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+ BinOpFrag<(sube_live_carry node:$LHS, node:$RHS) >>;
// These don't define reg/reg forms, because they are handled above.
def RSBri : AsI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_imm:$b), DPFrm,
@@ -1313,14 +1489,14 @@ def RSBSrs : AI1<0b0011, (outs GPR:$dst), (ins GPR:$a, so_reg:$b), DPSoRegFrm,
let Uses = [CPSR] in {
def RSCri : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
DPFrm, IIC_iALUi, "rsc", "\t$dst, $a, $b",
- [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+ Requires<[IsARM]> {
let Inst{25} = 1;
}
def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rsc", "\t$dst, $a, $b",
- [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+ Requires<[IsARM]> {
let Inst{25} = 0;
}
}
@@ -1329,15 +1505,15 @@ def RSCrs : AsI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
let Defs = [CPSR], Uses = [CPSR] in {
def RSCSri : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_imm:$b),
DPFrm, IIC_iALUi, "rscs\t$dst, $a, $b",
- [(set GPR:$dst, (sube so_imm:$b, GPR:$a))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ [(set GPR:$dst, (sube_dead_carry so_imm:$b, GPR:$a))]>,
+ Requires<[IsARM]> {
let Inst{20} = 1;
let Inst{25} = 1;
}
def RSCSrs : AXI1<0b0111, (outs GPR:$dst), (ins GPR:$a, so_reg:$b),
DPSoRegFrm, IIC_iALUsr, "rscs\t$dst, $a, $b",
- [(set GPR:$dst, (sube so_reg:$b, GPR:$a))]>,
- Requires<[IsARM, CarryDefIsUnused]> {
+ [(set GPR:$dst, (sube_dead_carry so_reg:$b, GPR:$a))]>,
+ Requires<[IsARM]> {
let Inst{20} = 1;
let Inst{25} = 0;
}
@@ -1358,10 +1534,9 @@ def : ARMPat<(add GPR:$src, so_imm_neg:$imm),
// (mul X, 2^n+1) -> (add (X << n), X)
// (mul X, 2^n-1) -> (rsb X, (X << n))
-// Saturating adds/subtracts -- for disassembly only
-
+// ARM Arithmetic Instruction -- for disassembly only
// GPR:$dst = GPR:$a op GPR:$b
-class AQI<bits<8> op27_20, bits<4> op7_4, string opc>
+class AAI<bits<8> op27_20, bits<4> op7_4, string opc>
: AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b), DPFrm, IIC_iALUr,
opc, "\t$dst, $a, $b",
[/* For disassembly only; pattern left blank */]> {
@@ -1369,22 +1544,116 @@ class AQI<bits<8> op27_20, bits<4> op7_4, string opc>
let Inst{7-4} = op7_4;
}
-def QADD : AQI<0b00010000, 0b0101, "qadd">;
-def QADD16 : AQI<0b01100010, 0b0001, "qadd16">;
-def QADD8 : AQI<0b01100010, 0b1001, "qadd8">;
-def QASX : AQI<0b01100010, 0b0011, "qasx">;
-def QDADD : AQI<0b00010100, 0b0101, "qdadd">;
-def QDSUB : AQI<0b00010110, 0b0101, "qdsub">;
-def QSAX : AQI<0b01100010, 0b0101, "qsax">;
-def QSUB : AQI<0b00010010, 0b0101, "qsub">;
-def QSUB16 : AQI<0b01100010, 0b0111, "qsub16">;
-def QSUB8 : AQI<0b01100010, 0b1111, "qsub8">;
-def UQADD16 : AQI<0b01100110, 0b0001, "uqadd16">;
-def UQADD8 : AQI<0b01100110, 0b1001, "uqadd8">;
-def UQASX : AQI<0b01100110, 0b0011, "uqasx">;
-def UQSAX : AQI<0b01100110, 0b0101, "uqsax">;
-def UQSUB16 : AQI<0b01100110, 0b0111, "uqsub16">;
-def UQSUB8 : AQI<0b01100110, 0b1111, "uqsub8">;
+// Saturating add/subtract -- for disassembly only
+
+def QADD : AAI<0b00010000, 0b0101, "qadd">;
+def QADD16 : AAI<0b01100010, 0b0001, "qadd16">;
+def QADD8 : AAI<0b01100010, 0b1001, "qadd8">;
+def QASX : AAI<0b01100010, 0b0011, "qasx">;
+def QDADD : AAI<0b00010100, 0b0101, "qdadd">;
+def QDSUB : AAI<0b00010110, 0b0101, "qdsub">;
+def QSAX : AAI<0b01100010, 0b0101, "qsax">;
+def QSUB : AAI<0b00010010, 0b0101, "qsub">;
+def QSUB16 : AAI<0b01100010, 0b0111, "qsub16">;
+def QSUB8 : AAI<0b01100010, 0b1111, "qsub8">;
+def UQADD16 : AAI<0b01100110, 0b0001, "uqadd16">;
+def UQADD8 : AAI<0b01100110, 0b1001, "uqadd8">;
+def UQASX : AAI<0b01100110, 0b0011, "uqasx">;
+def UQSAX : AAI<0b01100110, 0b0101, "uqsax">;
+def UQSUB16 : AAI<0b01100110, 0b0111, "uqsub16">;
+def UQSUB8 : AAI<0b01100110, 0b1111, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def SASX : AAI<0b01100001, 0b0011, "sasx">;
+def SADD16 : AAI<0b01100001, 0b0001, "sadd16">;
+def SADD8 : AAI<0b01100001, 0b1001, "sadd8">;
+def SSAX : AAI<0b01100001, 0b0101, "ssax">;
+def SSUB16 : AAI<0b01100001, 0b0111, "ssub16">;
+def SSUB8 : AAI<0b01100001, 0b1111, "ssub8">;
+def UASX : AAI<0b01100101, 0b0011, "uasx">;
+def UADD16 : AAI<0b01100101, 0b0001, "uadd16">;
+def UADD8 : AAI<0b01100101, 0b1001, "uadd8">;
+def USAX : AAI<0b01100101, 0b0101, "usax">;
+def USUB16 : AAI<0b01100101, 0b0111, "usub16">;
+def USUB8 : AAI<0b01100101, 0b1111, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def SHASX : AAI<0b01100011, 0b0011, "shasx">;
+def SHADD16 : AAI<0b01100011, 0b0001, "shadd16">;
+def SHADD8 : AAI<0b01100011, 0b1001, "shadd8">;
+def SHSAX : AAI<0b01100011, 0b0101, "shsax">;
+def SHSUB16 : AAI<0b01100011, 0b0111, "shsub16">;
+def SHSUB8 : AAI<0b01100011, 0b1111, "shsub8">;
+def UHASX : AAI<0b01100111, 0b0011, "uhasx">;
+def UHADD16 : AAI<0b01100111, 0b0001, "uhadd16">;
+def UHADD8 : AAI<0b01100111, 0b1001, "uhadd8">;
+def UHSAX : AAI<0b01100111, 0b0101, "uhsax">;
+def UHSUB16 : AAI<0b01100111, 0b0111, "uhsub16">;
+def UHSUB8 : AAI<0b01100111, 0b1111, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def USAD8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ MulFrm /* for convenience */, NoItinerary, "usad8",
+ "\t$dst, $a, $b", []>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{27-20} = 0b01111000;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b0001;
+}
+def USADA8 : AI<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ MulFrm /* for convenience */, NoItinerary, "usada8",
+ "\t$dst, $a, $b, $acc", []>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{27-20} = 0b01111000;
+ let Inst{7-4} = 0b0001;
+}
+
+// Signed/Unsigned saturate -- for disassembly only
+
+def SSATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+ DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-21} = 0b0110101;
+ let Inst{6-4} = 0b001;
+}
+
+def SSATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+ DPFrm, NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-21} = 0b0110101;
+ let Inst{6-4} = 0b101;
+}
+
+def SSAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+ NoItinerary, "ssat16", "\t$dst, $bit_pos, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-20} = 0b01101010;
+ let Inst{7-4} = 0b0011;
+}
+
+def USATlsl : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+ DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-21} = 0b0110111;
+ let Inst{6-4} = 0b001;
+}
+
+def USATasr : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a, i32imm:$shamt),
+ DPFrm, NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-21} = 0b0110111;
+ let Inst{6-4} = 0b101;
+}
+
+def USAT16 : AI<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), DPFrm,
+ NoItinerary, "usat16", "\t$dst, $bit_pos, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-20} = 0b01101110;
+ let Inst{7-4} = 0b0011;
+}
//===----------------------------------------------------------------------===//
// Bitwise Instructions.
@@ -1408,6 +1677,17 @@ def BFC : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
let Inst{6-0} = 0b0011111;
}
+// A8.6.18 BFI - Bitfield insert (Encoding A1)
+// Added for disassembler with the pattern field purposely left blank.
+def BFI : I<(outs GPR:$dst), (ins GPR:$src, bf_inv_mask_imm:$imm),
+ AddrMode1, Size4Bytes, IndexModeNone, DPFrm, IIC_iUNAsi,
+ "bfi", "\t$dst, $src, $imm", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6T2]> {
+ let Inst{27-21} = 0b0111110;
+ let Inst{6-4} = 0b001; // Rn: Inst{3-0} != 15
+}
+
def MVNr : AsI1<0b1111, (outs GPR:$dst), (ins GPR:$src), DPFrm, IIC_iMOVr,
"mvn", "\t$dst, $src",
[(set GPR:$dst, (not GPR:$src))]>, UnaryDP {
@@ -1420,7 +1700,7 @@ def MVNs : AsI1<0b1111, (outs GPR:$dst), (ins so_reg:$src), DPSoRegFrm,
let Inst{25} = 0;
}
let isReMaterializable = 1, isAsCheapAsAMove = 1 in
-def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
+def MVNi : AsI1<0b1111, (outs GPR:$dst), (ins so_imm:$imm), DPFrm,
IIC_iMOVi, "mvn", "\t$dst, $imm",
[(set GPR:$dst, so_imm_not:$imm)]>,UnaryDP {
let Inst{25} = 1;
@@ -1483,6 +1763,14 @@ def SMMUL : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
let Inst{15-12} = 0b1111;
}
+def SMMULR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ IIC_iMUL32, "smmulr", "\t$dst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b0011; // R = 1
+ let Inst{15-12} = 0b1111;
+}
+
def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iMAC32, "smmla", "\t$dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]>,
@@ -1490,6 +1778,12 @@ def SMMLA : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
let Inst{7-4} = 0b0001;
}
+def SMMLAR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+ IIC_iMAC32, "smmlar", "\t$dst, $a, $b, $c",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b0011; // R = 1
+}
def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
IIC_iMAC32, "smmls", "\t$dst, $a, $b, $c",
@@ -1498,6 +1792,13 @@ def SMMLS : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
let Inst{7-4} = 0b1101;
}
+def SMMLSR : AMul2I <0b0111010, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c),
+ IIC_iMAC32, "smmlsr", "\t$dst, $a, $b, $c",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV6]> {
+ let Inst{7-4} = 0b1111; // R = 1
+}
+
multiclass AI_smul<string opc, PatFrag opnode> {
def BB : AMulxyI<0b0001011, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
IIC_iMUL32, !strconcat(opc, "bb"), "\t$dst, $a, $b",
@@ -1569,7 +1870,7 @@ multiclass AI_smla<string opc, PatFrag opnode> {
def BT : AMulxyI<0b0001000, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
IIC_iMAC16, !strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16)))))]>,
+ (sra GPR:$b, (i32 16)))))]>,
Requires<[IsARM, HasV5TE]> {
let Inst{5} = 0;
let Inst{6} = 1;
@@ -1648,7 +1949,54 @@ def SMLALTT : AMulxyI<0b0001010,(outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
let Inst{6} = 1;
}
-// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+// Helper class for AI_smld -- for disassembly only
+class AMulDualI<bit long, bit sub, bit swap, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm>
+ : AI<oops, iops, MulFrm, itin, opc, asm, []>, Requires<[IsARM, HasV6]> {
+ let Inst{4} = 1;
+ let Inst{5} = swap;
+ let Inst{6} = sub;
+ let Inst{7} = 0;
+ let Inst{21-20} = 0b00;
+ let Inst{22} = long;
+ let Inst{27-23} = 0b01110;
+}
+
+multiclass AI_smld<bit sub, string opc> {
+
+ def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b, $acc">;
+
+ def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc),
+ NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b, $acc">;
+
+ def LD : AMulDualI<1, sub, 0, (outs GPR:$ldst,GPR:$hdst), (ins GPR:$a,GPR:$b),
+ NoItinerary, !strconcat(opc, "ld"), "\t$ldst, $hdst, $a, $b">;
+
+ def LDX : AMulDualI<1, sub, 1, (outs GPR:$ldst,GPR:$hdst),(ins GPR:$a,GPR:$b),
+ NoItinerary, !strconcat(opc, "ldx"),"\t$ldst, $hdst, $a, $b">;
+
+}
+
+defm SMLA : AI_smld<0, "smla">;
+defm SMLS : AI_smld<1, "smls">;
+
+multiclass AI_sdml<bit sub, string opc> {
+
+ def D : AMulDualI<0, sub, 0, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ NoItinerary, !strconcat(opc, "d"), "\t$dst, $a, $b"> {
+ let Inst{15-12} = 0b1111;
+ }
+
+ def DX : AMulDualI<0, sub, 1, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ NoItinerary, !strconcat(opc, "dx"), "\t$dst, $a, $b"> {
+ let Inst{15-12} = 0b1111;
+ }
+
+}
+
+defm SMUA : AI_sdml<0, "smua">;
+defm SMUS : AI_sdml<1, "smus">;
//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
@@ -1706,7 +2054,7 @@ def REVSH : AMiscA1I<0b01101111, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
def PKHBT : AMiscA1I<0b01101000, (outs GPR:$dst),
(ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt",
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
(and (shl GPR:$src2, (i32 imm:$shamt)),
0xFFFF0000)))]>,
@@ -1723,7 +2071,7 @@ def : ARMV6Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
def PKHTB : AMiscA1I<0b01101000, (outs GPR:$dst),
(ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt",
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
(and (sra GPR:$src2, imm16_31:$shamt),
0xFFFF)))]>, Requires<[IsARM, HasV6]> {
@@ -1769,7 +2117,7 @@ def : ARMPat<(ARMcmpZ GPR:$src, so_imm_neg:$imm),
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :(
+// a two-value operand where a dag node expects two operands. :(
def MOVCCr : AI1<0b1101, (outs GPR:$dst), (ins GPR:$false, GPR:$true), DPFrm,
IIC_iCMOVr, "mov", "\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -1807,6 +2155,7 @@ def Int_MemBarrierV7 : AInoP<(outs), (ins),
Requires<[IsARM, HasV7]> {
let Inst{31-4} = 0xf57ff05;
// FIXME: add support for options other than a full system DMB
+ // See DMB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
@@ -1817,6 +2166,7 @@ def Int_SyncBarrierV7 : AInoP<(outs), (ins),
Requires<[IsARM, HasV7]> {
let Inst{31-4} = 0xf57ff04;
// FIXME: add support for options other than a full system DSB
+ // See DSB disassembly-only variants below.
let Inst{3-0} = 0b1111;
}
@@ -1839,6 +2189,64 @@ def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
}
}
+// Helper class for multiclass MemB -- for disassembly only
+class AMBI<string opc, string asm>
+ : AInoP<(outs), (ins), MiscFrm, NoItinerary, opc, asm,
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-20} = 0xf57;
+}
+
+multiclass MemB<bits<4> op7_4, string opc> {
+
+ def st : AMBI<opc, "\tst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1110;
+ }
+
+ def ish : AMBI<opc, "\tish"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1011;
+ }
+
+ def ishst : AMBI<opc, "\tishst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1010;
+ }
+
+ def nsh : AMBI<opc, "\tnsh"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0111;
+ }
+
+ def nshst : AMBI<opc, "\tnshst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0110;
+ }
+
+ def osh : AMBI<opc, "\tosh"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0011;
+ }
+
+ def oshst : AMBI<opc, "\toshst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0010;
+ }
+}
+
+// These DMB variants are for disassembly only.
+defm DMB : MemB<0b0101, "dmb">;
+
+// These DSB variants are for disassembly only.
+defm DSB : MemB<0b0100, "dsb">;
+
+// ISB has only full system option -- for disassembly only
+def ISBsy : AMBI<"isb", ""> {
+ let Inst{7-4} = 0b0110;
+ let Inst{3-0} = 0b1111;
+}
+
let usesCustomInserter = 1 in {
let Uses = [CPSR] in {
def ATOMIC_LOAD_ADD_I8 : PseudoInst<
@@ -1978,6 +2386,14 @@ def STREXD : AIstrex<0b01, (outs GPR:$success),
[]>;
}
+// Clear-Exclusive is for disassembly only.
+def CLREX : AXI<(outs), (ins), MiscFrm, NoItinerary, "clrex",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-20} = 0xf57;
+ let Inst{7-4} = 0b0001;
+}
+
// SWP/SWPB are deprecated in V6/V7 and for disassembly only.
let mayLoad = 1 in {
def SWP : AI<(outs GPR:$dst), (ins GPR:$src, GPR:$ptr), LdStExFrm, NoItinerary,
@@ -2049,7 +2465,7 @@ let Defs =
// Two piece so_imms.
let isReMaterializable = 1 in
-def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src),
+def MOVi2pieces : AI1x2<(outs GPR:$dst), (ins so_imm2part:$src),
Pseudo, IIC_iMOVi,
"mov", "\t$dst, $src",
[(set GPR:$dst, so_imm2part:$src)]>,
@@ -2074,7 +2490,7 @@ def : ARMPat<(add GPR:$LHS, so_neg_imm2part:$RHS),
// FIXME: Remove this when we can do generalized remat.
let isReMaterializable = 1 in
def MOVi32imm : AI1x2<(outs GPR:$dst), (ins i32imm:$src), Pseudo, IIC_iMOVi,
- "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
+ "movw", "\t$dst, ${src:lo16}\n\tmovt${p}\t$dst, ${src:hi16}",
[(set GPR:$dst, (i32 imm:$src))]>,
Requires<[IsARM, HasV6T2]>;
@@ -2201,6 +2617,102 @@ def CDP2 : ABXI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
let Inst{4} = 0;
}
+class ACI<dag oops, dag iops, string opc, string asm>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, BrFrm, NoItinerary,
+ opc, asm, "", [/* For disassembly only; pattern left blank */]> {
+ let Inst{27-25} = 0b110;
+}
+
+multiclass LdStCop<bits<4> op31_28, bit load, string opc> {
+
+ def _OFFSET : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ opc, "\tp$cop, cr$CRd, $addr"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _PRE : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ opc, "\tp$cop, cr$CRd, $addr!"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _POST : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
+ opc, "\tp$cop, cr$CRd, [$base], $offset"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def _OPTION : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, i32imm:$option),
+ opc, "\tp$cop, cr$CRd, [$base], $option"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 0; // D = 0
+ let Inst{20} = load;
+ }
+
+ def L_OFFSET : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ opc, "l\tp$cop, cr$CRd, $addr"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_PRE : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, addrmode2:$addr),
+ opc, "l\tp$cop, cr$CRd, $addr!"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 1; // P = 1
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_POST : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, am2offset:$offset),
+ opc, "l\tp$cop, cr$CRd, [$base], $offset"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{21} = 1; // W = 1
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+
+ def L_OPTION : ACI<(outs),
+ (ins nohash_imm:$cop, nohash_imm:$CRd, GPR:$base, nohash_imm:$option),
+ opc, "l\tp$cop, cr$CRd, [$base], $option"> {
+ let Inst{31-28} = op31_28;
+ let Inst{24} = 0; // P = 0
+ let Inst{23} = 1; // U = 1
+ let Inst{21} = 0; // W = 0
+ let Inst{22} = 1; // D = 1
+ let Inst{20} = load;
+ }
+}
+
+defm LDC : LdStCop<{?,?,?,?}, 1, "ldc">;
+defm LDC2 : LdStCop<0b1111, 1, "ldc2">;
+defm STC : LdStCop<{?,?,?,?}, 0, "stc">;
+defm STC2 : LdStCop<0b1111, 0, "stc2">;
+
def MCR : ABI<0b1110, (outs), (ins nohash_imm:$cop, i32imm:$opc1,
GPR:$Rt, nohash_imm:$CRn, nohash_imm:$CRm, i32imm:$opc2),
NoItinerary, "mcr", "\tp$cop, $opc1, $Rt, cr$CRn, cr$CRm, $opc2",
@@ -2282,14 +2794,28 @@ def MRSsys : ABI<0b0001,(outs GPR:$dst),(ins), NoItinerary,"mrs","\t$dst, spsr",
}
// FIXME: mask is ignored for the time being.
-def MSR : ABI<0b0001,(outs),(ins GPR:$src), NoItinerary, "mrs", "\tcpsr, $src",
+def MSR : ABI<0b0001,(outs),(ins GPR:$src), NoItinerary, "msr", "\tcpsr, $src",
[/* For disassembly only; pattern left blank */]> {
let Inst{23-20} = 0b0010;
let Inst{7-4} = 0b0000;
}
// FIXME: mask is ignored for the time being.
-def MSRsys : ABI<0b0001,(outs),(ins GPR:$src),NoItinerary,"mrs","\tspsr, $src",
+def MSRi : ABI<0b0011,(outs),(ins so_imm:$a), NoItinerary, "msr", "\tcpsr, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0010;
+ let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSRsys : ABI<0b0001,(outs),(ins GPR:$src),NoItinerary,"msr","\tspsr, $src",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{23-20} = 0b0110;
+ let Inst{7-4} = 0b0000;
+}
+
+// FIXME: mask is ignored for the time being.
+def MSRsysi : ABI<0b0011,(outs),(ins so_imm:$a),NoItinerary,"msr","\tspsr, $a",
[/* For disassembly only; pattern left blank */]> {
let Inst{23-20} = 0b0110;
let Inst{7-4} = 0b0000;
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index e2be7ba..3aa0810 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -83,11 +83,17 @@ def NEONvrev32 : SDNode<"ARMISD::VREV32", SDTARMVSHUF>;
def NEONvrev16 : SDNode<"ARMISD::VREV16", SDTARMVSHUF>;
def SDTARMVSHUF2 : SDTypeProfile<2, 2, [SDTCisVec<0>, SDTCisSameAs<0, 1>,
- SDTCisSameAs<0, 2>, SDTCisSameAs<0, 3>]>;
+ SDTCisSameAs<0, 2>,
+ SDTCisSameAs<0, 3>]>;
def NEONzip : SDNode<"ARMISD::VZIP", SDTARMVSHUF2>;
def NEONuzp : SDNode<"ARMISD::VUZP", SDTARMVSHUF2>;
def NEONtrn : SDNode<"ARMISD::VTRN", SDTARMVSHUF2>;
+def SDTARMFMAX : SDTypeProfile<1, 2, [SDTCisVT<0, f32>, SDTCisSameAs<0, 1>,
+ SDTCisSameAs<0, 2>]>;
+def NEONfmax : SDNode<"ARMISD::FMAX", SDTARMFMAX>;
+def NEONfmin : SDNode<"ARMISD::FMIN", SDTARMFMAX>;
+
//===----------------------------------------------------------------------===//
// NEON operand definitions
//===----------------------------------------------------------------------===//
@@ -123,9 +129,7 @@ def h64imm : Operand<i64> {
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
def VLDMD : NI<(outs),
(ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
- IIC_fpLoadm,
- "vldm", "${addr:submode} ${addr:base}, $dst1",
- []> {
+ IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> {
let Inst{27-25} = 0b110;
let Inst{20} = 1;
let Inst{11-9} = 0b101;
@@ -133,9 +137,7 @@ def VLDMD : NI<(outs),
def VLDMS : NI<(outs),
(ins addrmode_neonldstm:$addr, reglist:$dst1, variable_ops),
- IIC_fpLoadm,
- "vldm", "${addr:submode} ${addr:base}, $dst1",
- []> {
+ IIC_fpLoadm, "vldm", "${addr:submode} ${addr:base}, $dst1", []> {
let Inst{27-25} = 0b110;
let Inst{20} = 1;
let Inst{11-9} = 0b101;
@@ -144,10 +146,9 @@ def VLDMS : NI<(outs),
*/
// Use vldmia to load a Q register as a D register pair.
-def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
- IIC_fpLoadm,
- "vldmia", "$addr, ${dst:dregpair}",
- [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
+def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr), IIC_fpLoadm,
+ "vldmia", "$addr, ${dst:dregpair}",
+ [(set QPR:$dst, (v2f64 (load addrmode4:$addr)))]> {
let Inst{27-25} = 0b110;
let Inst{24} = 0; // P bit
let Inst{23} = 1; // U bit
@@ -156,10 +157,9 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
}
// Use vstmia to store a Q register as a D register pair.
-def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
- IIC_fpStorem,
- "vstmia", "$addr, ${src:dregpair}",
- [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
+def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr), IIC_fpStorem,
+ "vstmia", "$addr, ${src:dregpair}",
+ [(store (v2f64 QPR:$src), addrmode4:$addr)]> {
let Inst{27-25} = 0b110;
let Inst{24} = 0; // P bit
let Inst{23} = 1; // U bit
@@ -191,6 +191,29 @@ def VLD1q32 : VLD1Q<0b1000, "vld1", "32", v4i32, int_arm_neon_vld1>;
def VLD1qf : VLD1Q<0b1000, "vld1", "32", v4f32, int_arm_neon_vld1>;
def VLD1q64 : VLD1Q<0b1100, "vld1", "64", v2i64, int_arm_neon_vld1>;
+// These (dreg triple/quadruple) are for disassembly only.
+class VLD1D3<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b10, 0b0110, op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
+ (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+ "\\{$dst1, $dst2, $dst3\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+class VLD1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0,0b10,0b0010,op7_4,(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
+ (ins addrmode6:$addr), IIC_VLD1, OpcodeStr, Dt,
+ "\\{$dst1, $dst2, $dst3, $dst4\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VLD1d8T : VLD1D3<0b0000, "vld1", "8">;
+def VLD1d16T : VLD1D3<0b0100, "vld1", "16">;
+def VLD1d32T : VLD1D3<0b1000, "vld1", "32">;
+//def VLD1d64T : VLD1D3<0b1100, "vld1", "64">;
+
+def VLD1d8Q : VLD1D4<0b0000, "vld1", "8">;
+def VLD1d16Q : VLD1D4<0b0100, "vld1", "16">;
+def VLD1d32Q : VLD1D4<0b1000, "vld1", "32">;
+//def VLD1d64Q : VLD1D4<0b1100, "vld1", "64">;
+
+
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in {
// VLD2 : Vector Load (multiple 2-element structures)
@@ -216,6 +239,16 @@ def VLD2q8 : VLD2Q<0b0000, "vld2", "8">;
def VLD2q16 : VLD2Q<0b0100, "vld2", "16">;
def VLD2q32 : VLD2Q<0b1000, "vld2", "32">;
+// These (double-spaced dreg pair) are for disassembly only.
+class VLD2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0,0b10,0b1001,op7_4, (outs DPR:$dst1, DPR:$dst2),
+ (ins addrmode6:$addr), IIC_VLD2,
+ OpcodeStr, Dt, "\\{$dst1, $dst2\\}, $addr", "", []>;
+
+def VLD2d8D : VLD2Ddbl<0b0000, "vld2", "8">;
+def VLD2d16D : VLD2Ddbl<0b0100, "vld2", "16">;
+def VLD2d32D : VLD2Ddbl<0b1000, "vld2", "32">;
+
// VLD3 : Vector Load (multiple 3-element structures)
class VLD3D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b10,0b0100,op7_4, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
@@ -285,105 +318,64 @@ def VLD4q32b : VLD4WB<0b1000, "vld4", "32">;
class VLD2LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VLD2,
- OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
+ IIC_VLD2, OpcodeStr, Dt, "\\{$dst1[$lane], $dst2[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2", []>;
// vld2 to single-spaced registers.
def VLD2LNd8 : VLD2LN<0b0001, "vld2", "8">;
-def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> {
- let Inst{5} = 0;
-}
-def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> {
- let Inst{6} = 0;
-}
+def VLD2LNd16 : VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 0; }
+def VLD2LNd32 : VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 0; }
// vld2 to double-spaced even registers.
-def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> {
- let Inst{5} = 1;
-}
-def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> {
- let Inst{6} = 1;
-}
+def VLD2LNq16a: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
+def VLD2LNq32a: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
// vld2 to double-spaced odd registers.
-def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> {
- let Inst{5} = 1;
-}
-def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> {
- let Inst{6} = 1;
-}
+def VLD2LNq16b: VLD2LN<0b0101, "vld2", "16"> { let Inst{5} = 1; }
+def VLD2LNq32b: VLD2LN<0b1001, "vld2", "32"> { let Inst{6} = 1; }
// VLD3LN : Vector Load (single 3-element structure to one lane)
class VLD3LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b10,op11_8,{?,?,?,?}, (outs DPR:$dst1, DPR:$dst2, DPR:$dst3),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VLD3,
- OpcodeStr, Dt,
+ nohash_imm:$lane), IIC_VLD3, OpcodeStr, Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3", []>;
// vld3 to single-spaced registers.
-def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> {
- let Inst{4} = 0;
-}
-def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> {
- let Inst{5-4} = 0b00;
-}
-def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> {
- let Inst{6-4} = 0b000;
-}
+def VLD3LNd8 : VLD3LN<0b0010, "vld3", "8"> { let Inst{4} = 0; }
+def VLD3LNd16 : VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b00; }
+def VLD3LNd32 : VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b000; }
// vld3 to double-spaced even registers.
-def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> {
- let Inst{5-4} = 0b10;
-}
-def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> {
- let Inst{6-4} = 0b100;
-}
+def VLD3LNq16a: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32a: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
// vld3 to double-spaced odd registers.
-def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> {
- let Inst{5-4} = 0b10;
-}
-def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> {
- let Inst{6-4} = 0b100;
-}
+def VLD3LNq16b: VLD3LN<0b0110, "vld3", "16"> { let Inst{5-4} = 0b10; }
+def VLD3LNq32b: VLD3LN<0b1010, "vld3", "32"> { let Inst{6-4} = 0b100; }
// VLD4LN : Vector Load (single 4-element structure to one lane)
class VLD4LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b10,op11_8,{?,?,?,?},
(outs DPR:$dst1, DPR:$dst2, DPR:$dst3, DPR:$dst4),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VLD4,
- OpcodeStr, Dt,
+ nohash_imm:$lane), IIC_VLD4, OpcodeStr, Dt,
"\\{$dst1[$lane], $dst2[$lane], $dst3[$lane], $dst4[$lane]\\}, $addr",
"$src1 = $dst1, $src2 = $dst2, $src3 = $dst3, $src4 = $dst4", []>;
// vld4 to single-spaced registers.
def VLD4LNd8 : VLD4LN<0b0011, "vld4", "8">;
-def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> {
- let Inst{5} = 0;
-}
-def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> {
- let Inst{6} = 0;
-}
+def VLD4LNd16 : VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 0; }
+def VLD4LNd32 : VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 0; }
// vld4 to double-spaced even registers.
-def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> {
- let Inst{5} = 1;
-}
-def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> {
- let Inst{6} = 1;
-}
+def VLD4LNq16a: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
+def VLD4LNq32a: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
// vld4 to double-spaced odd registers.
-def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> {
- let Inst{5} = 1;
-}
-def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> {
- let Inst{6} = 1;
-}
+def VLD4LNq16b: VLD4LN<0b0111, "vld4", "16"> { let Inst{5} = 1; }
+def VLD4LNq32b: VLD4LN<0b1011, "vld4", "32"> { let Inst{6} = 1; }
// VLD1DUP : Vector Load (single element to all lanes)
// VLD2DUP : Vector Load (single 2-element structure to all lanes)
@@ -418,6 +410,31 @@ def VST1qf : VST1Q<0b1000, "vst1", "32", v4f32, int_arm_neon_vst1>;
def VST1q64 : VST1Q<0b1100, "vst1", "64", v2i64, int_arm_neon_vst1>;
} // hasExtraSrcRegAllocReq
+// These (dreg triple/quadruple) are for disassembly only.
+class VST1D3<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b00, 0b0110, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3), IIC_VST,
+ OpcodeStr, Dt,
+ "\\{$src1, $src2, $src3\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+class VST1D4<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b00, 0b0010, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
+ IIC_VST, OpcodeStr, Dt,
+ "\\{$src1, $src2, $src3, $src4\\}, $addr", "",
+ [/* For disassembly only; pattern left blank */]>;
+
+def VST1d8T : VST1D3<0b0000, "vst1", "8">;
+def VST1d16T : VST1D3<0b0100, "vst1", "16">;
+def VST1d32T : VST1D3<0b1000, "vst1", "32">;
+//def VST1d64T : VST1D3<0b1100, "vst1", "64">;
+
+def VST1d8Q : VST1D4<0b0000, "vst1", "8">;
+def VST1d16Q : VST1D4<0b0100, "vst1", "16">;
+def VST1d32Q : VST1D4<0b1000, "vst1", "32">;
+//def VST1d64Q : VST1D4<0b1100, "vst1", "64">;
+
+
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in {
// VST2 : Vector Store (multiple 2-element structures)
@@ -428,8 +445,7 @@ class VST2D<bits<4> op7_4, string OpcodeStr, string Dt>
class VST2Q<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0011,op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST,
- OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+ IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
"", []>;
def VST2d8 : VST2D<0b0000, "vst2", "8">;
@@ -443,6 +459,16 @@ def VST2q8 : VST2Q<0b0000, "vst2", "8">;
def VST2q16 : VST2Q<0b0100, "vst2", "16">;
def VST2q32 : VST2Q<0b1000, "vst2", "32">;
+// These (double-spaced dreg pair) are for disassembly only.
+class VST2Ddbl<bits<4> op7_4, string OpcodeStr, string Dt>
+ : NLdSt<0, 0b00, 0b1001, op7_4, (outs),
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2), IIC_VST,
+ OpcodeStr, Dt, "\\{$src1, $src2\\}, $addr", "", []>;
+
+def VST2d8D : VST2Ddbl<0b0000, "vst2", "8">;
+def VST2d16D : VST2Ddbl<0b0100, "vst2", "16">;
+def VST2d32D : VST2Ddbl<0b1000, "vst2", "32">;
+
// VST3 : Vector Store (multiple 3-element structures)
class VST3D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0100,op7_4, (outs),
@@ -476,14 +502,12 @@ def VST3q32b : VST3WB<0b1000, "vst3", "32">;
class VST4D<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0000,op7_4, (outs),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST,
- OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+ IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
"", []>;
class VST4WB<bits<4> op7_4, string OpcodeStr, string Dt>
: NLdSt<0,0b00,0b0001,op7_4, (outs GPR:$wb),
(ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4),
- IIC_VST,
- OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
+ IIC_VST, OpcodeStr, Dt, "\\{$src1, $src2, $src3, $src4\\}, $addr",
"$addr.addr = $wb", []>;
def VST4d8 : VST4D<0b0000, "vst4", "8">;
@@ -511,104 +535,63 @@ def VST4q32b : VST4WB<0b1000, "vst4", "32">;
// VST2LN : Vector Store (single 2-element structure from one lane)
class VST2LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
- IIC_VST,
- OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
- "", []>;
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, nohash_imm:$lane),
+ IIC_VST, OpcodeStr, Dt, "\\{$src1[$lane], $src2[$lane]\\}, $addr",
+ "", []>;
// vst2 to single-spaced registers.
def VST2LNd8 : VST2LN<0b0001, "vst2", "8">;
-def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> {
- let Inst{5} = 0;
-}
-def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> {
- let Inst{6} = 0;
-}
+def VST2LNd16 : VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 0; }
+def VST2LNd32 : VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 0; }
// vst2 to double-spaced even registers.
-def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> {
- let Inst{5} = 1;
-}
-def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> {
- let Inst{6} = 1;
-}
+def VST2LNq16a: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
+def VST2LNq32a: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
// vst2 to double-spaced odd registers.
-def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> {
- let Inst{5} = 1;
-}
-def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> {
- let Inst{6} = 1;
-}
+def VST2LNq16b: VST2LN<0b0101, "vst2", "16"> { let Inst{5} = 1; }
+def VST2LNq32b: VST2LN<0b1001, "vst2", "32"> { let Inst{6} = 1; }
// VST3LN : Vector Store (single 3-element structure from one lane)
class VST3LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
- nohash_imm:$lane), IIC_VST,
- OpcodeStr, Dt,
- "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3,
+ nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
+ "\\{$src1[$lane], $src2[$lane], $src3[$lane]\\}, $addr", "", []>;
// vst3 to single-spaced registers.
-def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> {
- let Inst{4} = 0;
-}
-def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> {
- let Inst{5-4} = 0b00;
-}
-def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> {
- let Inst{6-4} = 0b000;
-}
+def VST3LNd8 : VST3LN<0b0010, "vst3", "8"> { let Inst{4} = 0; }
+def VST3LNd16 : VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b00; }
+def VST3LNd32 : VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b000; }
// vst3 to double-spaced even registers.
-def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> {
- let Inst{5-4} = 0b10;
-}
-def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> {
- let Inst{6-4} = 0b100;
-}
+def VST3LNq16a: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32a: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
// vst3 to double-spaced odd registers.
-def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> {
- let Inst{5-4} = 0b10;
-}
-def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> {
- let Inst{6-4} = 0b100;
-}
+def VST3LNq16b: VST3LN<0b0110, "vst3", "16"> { let Inst{5-4} = 0b10; }
+def VST3LNq32b: VST3LN<0b1010, "vst3", "32"> { let Inst{6-4} = 0b100; }
// VST4LN : Vector Store (single 4-element structure from one lane)
class VST4LN<bits<4> op11_8, string OpcodeStr, string Dt>
: NLdSt<1,0b00,op11_8,{?,?,?,?}, (outs),
- (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
- nohash_imm:$lane), IIC_VST,
- OpcodeStr, Dt,
+ (ins addrmode6:$addr, DPR:$src1, DPR:$src2, DPR:$src3, DPR:$src4,
+ nohash_imm:$lane), IIC_VST, OpcodeStr, Dt,
"\\{$src1[$lane], $src2[$lane], $src3[$lane], $src4[$lane]\\}, $addr",
- "", []>;
+ "", []>;
// vst4 to single-spaced registers.
def VST4LNd8 : VST4LN<0b0011, "vst4", "8">;
-def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> {
- let Inst{5} = 0;
-}
-def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> {
- let Inst{6} = 0;
-}
+def VST4LNd16 : VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 0; }
+def VST4LNd32 : VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 0; }
// vst4 to double-spaced even registers.
-def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> {
- let Inst{5} = 1;
-}
-def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> {
- let Inst{6} = 1;
-}
+def VST4LNq16a: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
+def VST4LNq32a: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
// vst4 to double-spaced odd registers.
-def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> {
- let Inst{5} = 1;
-}
-def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> {
- let Inst{6} = 1;
-}
+def VST4LNq16b: VST4LN<0b0111, "vst4", "16"> { let Inst{5} = 1; }
+def VST4LNq32b: VST4LN<0b1011, "vst4", "32"> { let Inst{6} = 1; }
} // mayStore = 1, hasExtraSrcRegAllocReq = 1
@@ -656,34 +639,26 @@ def SubReg_i32_lane : SDNodeXForm<imm, [{
// Instruction Classes
//===----------------------------------------------------------------------===//
-// Basic 2-register operations, both double- and quad-register.
+// Basic 2-register operations: single-, double- and quad-register.
+class N2VS<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
+ IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
class N2VD<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4, (outs DPR:$dst),
(ins DPR:$src), IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "",
[(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src))))]>;
class N2VQ<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
+ bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,
+ string Dt, ValueType ResTy, ValueType OpTy, SDNode OpNode>
: N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 1, op4, (outs QPR:$dst),
(ins QPR:$src), IIC_VUNAQ, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src))))]>;
-// Basic 2-register operations, scalar single-precision.
-class N2VDs<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4, string OpcodeStr,string Dt,
- ValueType ResTy, ValueType OpTy, SDNode OpNode>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src),
- IIC_VUNAD, OpcodeStr, Dt, "$dst, $src", "", []>;
-
-class N2VDsPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
- : NEONFPPat<(ResTy (OpNode SPR:$a)),
- (EXTRACT_SUBREG
- (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)),
- arm_ssubreg_0)>;
-
// Basic 2-register intrinsics, both double- and quad-register.
class N2VDInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op4,
@@ -700,21 +675,6 @@ class N2VQInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
(ins QPR:$src), itin, OpcodeStr, Dt, "$dst, $src", "",
[(set QPR:$dst, (ResTy (IntOp (OpTy QPR:$src))))]>;
-// Basic 2-register intrinsics, scalar single-precision
-class N2VDInts<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
- bits<2> op17_16, bits<5> op11_7, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
- : N2V<op24_23, op21_20, op19_18, op17_16, op11_7, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), itin,
- OpcodeStr, Dt, "$dst, $src", "", []>;
-
-class N2VDIntsPat<SDNode OpNode, NeonI Inst>
- : NEONFPPat<(f32 (OpNode SPR:$a)),
- (EXTRACT_SUBREG
- (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0)),
- arm_ssubreg_0)>;
-
// Narrow 2-register intrinsics.
class N2VNInt<bits<2> op24_23, bits<2> op21_20, bits<2> op19_18,
bits<2> op17_16, bits<5> op11_7, bit op6, bit op4,
@@ -742,15 +702,22 @@ class N2VDShuffle<bits<2> op19_18, bits<5> op11_7, string OpcodeStr, string Dt>
class N2VQShuffle<bits<2> op19_18, bits<5> op11_7,
InstrItinClass itin, string OpcodeStr, string Dt>
: N2V<0b11, 0b11, op19_18, 0b10, op11_7, 1, 0, (outs QPR:$dst1, QPR:$dst2),
- (ins QPR:$src1, QPR:$src2), itin,
- OpcodeStr, Dt, "$dst1, $dst2",
+ (ins QPR:$src1, QPR:$src2), itin, OpcodeStr, Dt, "$dst1, $dst2",
"$src1 = $dst1, $src2 = $dst2", []>;
-// Basic 3-register operations, both double- and quad-register.
+// Basic 3-register operations: single-, double- and quad-register.
+class N3VS<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
+ SDNode OpNode, bit Commutable>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
+ let isCommutable = Commutable;
+}
+
class N3VD<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy,
- SDNode OpNode, bit Commutable>
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -763,9 +730,9 @@ class N3VDX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
ValueType ResTy, ValueType OpTy,
SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
- OpcodeStr, "$dst, $src1, $src2", "",
- [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]> {
+ (outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
+ OpcodeStr, "$dst, $src1, $src2", "",
+ [(set DPR:$dst, (ResTy (OpNode (OpTy DPR:$src1), (OpTy DPR:$src2))))]>{
let isCommutable = Commutable;
}
class N3VDSL<bits<2> op21_20, bits<4> op11_8,
@@ -776,27 +743,23 @@ class N3VDSL<bits<2> op21_20, bits<4> op11_8,
itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_VFP2:$src2),
- imm:$lane)))))]> {
+ (Ty (NEONvduplane (Ty DPR_VFP2:$src2), imm:$lane)))))]>{
let isCommutable = 0;
}
class N3VDSL16<bits<2> op21_20, bits<4> op11_8,
string OpcodeStr, string Dt, ValueType Ty, SDNode ShOp>
: N3V<0, 1, op21_20, op11_8, 1, 0,
(outs DPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- IIC_VMULi16D,
- OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ IIC_VMULi16D, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (Ty DPR:$dst),
(Ty (ShOp (Ty DPR:$src1),
- (Ty (NEONvduplane (Ty DPR_8:$src2),
- imm:$lane)))))]> {
+ (Ty (NEONvduplane (Ty DPR_8:$src2), imm:$lane)))))]> {
let isCommutable = 0;
}
class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy,
- SDNode OpNode, bit Commutable>
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -805,12 +768,11 @@ class N3VQ<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
}
class N3VQX<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr,
- ValueType ResTy, ValueType OpTy,
- SDNode OpNode, bit Commutable>
+ ValueType ResTy, ValueType OpTy, SDNode OpNode, bit Commutable>
: N3VX<op24, op23, op21_20, op11_8, 1, op4,
- (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
- OpcodeStr, "$dst, $src1, $src2", "",
- [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]> {
+ (outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
+ OpcodeStr, "$dst, $src1, $src2", "",
+ [(set QPR:$dst, (ResTy (OpNode (OpTy QPR:$src1), (OpTy QPR:$src2))))]>{
let isCommutable = Commutable;
}
class N3VQSL<bits<2> op21_20, bits<4> op11_8,
@@ -825,13 +787,11 @@ class N3VQSL<bits<2> op21_20, bits<4> op11_8,
imm:$lane)))))]> {
let isCommutable = 0;
}
-class N3VQSL16<bits<2> op21_20, bits<4> op11_8,
- string OpcodeStr, string Dt,
+class N3VQSL16<bits<2> op21_20, bits<4> op11_8, string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy, SDNode ShOp>
: N3V<1, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins QPR:$src1, DPR_8:$src2, nohash_imm:$lane),
- IIC_VMULi16Q,
- OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
+ IIC_VMULi16Q, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (NEONvduplane (OpTy DPR_8:$src2),
@@ -839,27 +799,10 @@ class N3VQSL16<bits<2> op21_20, bits<4> op11_8,
let isCommutable = 0;
}
-// Basic 3-register operations, scalar single-precision
-class N3VDs<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- SDNode OpNode, bit Commutable>
- : N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
- OpcodeStr, Dt, "$dst, $src1, $src2", "", []> {
- let isCommutable = Commutable;
-}
-class N3VDsPat<SDNode OpNode, NeonI Inst>
- : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
- (EXTRACT_SUBREG
- (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
- arm_ssubreg_0)>;
-
// Basic 3-register intrinsics, both double- and quad-register.
class N3VDInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp, bit Commutable>
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 0, op4,
(outs DPR:$dst), (ins DPR:$src1, DPR:$src2), itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -891,8 +834,7 @@ class N3VDIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
class N3VQInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp, bit Commutable>
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp, bit Commutable>
: N3V<op24, op23, op21_20, op11_8, 1, op4,
(outs QPR:$dst), (ins QPR:$src1, QPR:$src2), itin,
OpcodeStr, Dt, "$dst, $src1, $src2", "",
@@ -924,7 +866,15 @@ class N3VQIntSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
let isCommutable = 0;
}
-// Multiply-Add/Sub operations, both double- and quad-register.
+// Multiply-Add/Sub operations: single-, double- and quad-register.
+class N3VSMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType Ty, SDNode MulOp, SDNode OpNode>
+ : N3V<op24, op23, op21_20, op11_8, 0, op4,
+ (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
+ OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
+
class N3VDMulOp<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
InstrItinClass itin, string OpcodeStr, string Dt,
ValueType Ty, SDNode MulOp, SDNode OpNode>
@@ -976,8 +926,8 @@ class N3VQMulOpSL<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$src2,
- (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
- imm:$lane)))))))]>;
+ (ResTy (NEONvduplane (OpTy DPR_VFP2:$src3),
+ imm:$lane)))))))]>;
class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
string OpcodeStr, string Dt,
ValueType ResTy, ValueType OpTy,
@@ -989,25 +939,8 @@ class N3VQMulOpSL16<bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
[(set (ResTy QPR:$dst),
(ResTy (ShOp (ResTy QPR:$src1),
(ResTy (MulOp QPR:$src2,
- (ResTy (NEONvduplane (OpTy DPR_8:$src3),
- imm:$lane)))))))]>;
-
-// Multiply-Add/Sub operations, scalar single-precision
-class N3VDMulOps<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
- InstrItinClass itin, string OpcodeStr, string Dt,
- ValueType Ty, SDNode MulOp, SDNode OpNode>
- : N3V<op24, op23, op21_20, op11_8, 0, op4,
- (outs DPR_VFP2:$dst),
- (ins DPR_VFP2:$src1, DPR_VFP2:$src2, DPR_VFP2:$src3), itin,
- OpcodeStr, Dt, "$dst, $src2, $src3", "$src1 = $dst", []>;
-
-class N3VDMulOpsPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
- : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
- (EXTRACT_SUBREG
- (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$acc, arm_ssubreg_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$a, arm_ssubreg_0),
- (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)), SPR:$b, arm_ssubreg_0)),
- arm_ssubreg_0)>;
+ (ResTy (NEONvduplane (OpTy DPR_8:$src3),
+ imm:$lane)))))))]>;
// Neon 3-argument intrinsics, both double- and quad-register.
// The destination register is also used as the first source operand register.
@@ -1050,9 +983,9 @@ class N3VLInt3SL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
(OpTy DPR:$src2),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$src3),
imm:$lane)))))]>;
-class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp>
+class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst),
(ins QPR:$src1, DPR:$src2, DPR_8:$src3, nohash_imm:$lane), itin,
@@ -1063,7 +996,6 @@ class N3VLInt3SL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass iti
(OpTy (NEONvduplane (OpTy DPR_8:$src3),
imm:$lane)))))]>;
-
// Narrowing 3-register intrinsics.
class N3VNInt<bit op24, bit op23, bits<2> op21_20, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, ValueType TyD, ValueType TyQ,
@@ -1095,9 +1027,9 @@ class N3VLIntSL<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
(ResTy (IntOp (OpTy DPR:$src1),
(OpTy (NEONvduplane (OpTy DPR_VFP2:$src2),
imm:$lane)))))]>;
-class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8, InstrItinClass itin,
- string OpcodeStr, string Dt, ValueType ResTy, ValueType OpTy,
- Intrinsic IntOp>
+class N3VLIntSL16<bit op24, bits<2> op21_20, bits<4> op11_8,
+ InstrItinClass itin, string OpcodeStr, string Dt,
+ ValueType ResTy, ValueType OpTy, Intrinsic IntOp>
: N3V<op24, 1, op21_20, op11_8, 1, 0,
(outs QPR:$dst), (ins DPR:$src1, DPR_8:$src2, nohash_imm:$lane),
itin, OpcodeStr, Dt, "$dst, $src1, $src2[$lane]", "",
@@ -1249,6 +1181,45 @@ class N2VCvtQ<bit op24, bit op23, bits<4> op11_8, bit op7, bit op4,
// S = single int (32 bit) elements
// D = double int (64 bit) elements
+// Neon 2-register vector operations -- for disassembly only.
+
+// First with only element sizes of 8, 16 and 32 bits:
+multiclass N2V_QHS_cmp<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
+ bits<5> op11_7, bit op4, string opc, string Dt,
+ string asm> {
+ // 64-bit vector types.
+ def v8i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "", []>;
+ def v4i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "", []>;
+ def v2i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "", []>;
+ def v2f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 0, op4,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ opc, "f32", asm, "", []> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+
+ // 128-bit vector types.
+ def v16i8 : N2V<op24_23, op21_20, 0b00, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "8"), asm, "", []>;
+ def v8i16 : N2V<op24_23, op21_20, 0b01, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "16"), asm, "", []>;
+ def v4i32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, !strconcat(Dt, "32"), asm, "", []>;
+ def v4f32 : N2V<op24_23, op21_20, 0b10, op17_16, op11_7, 1, op4,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ opc, "f32", asm, "", []> {
+ let Inst{10} = 1; // overwrite F = 1
+ }
+}
+
// Neon 3-register vector operations.
// First with only element sizes of 8, 16 and 32 bits:
@@ -1262,22 +1233,22 @@ multiclass N3V_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
OpcodeStr, !strconcat(Dt, "8"),
v8i8, v8i8, OpNode, Commutable>;
def v4i16 : N3VD<op24, op23, 0b01, op11_8, op4, itinD16,
- OpcodeStr, !strconcat(Dt, "16"),
- v4i16, v4i16, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "16"),
+ v4i16, v4i16, OpNode, Commutable>;
def v2i32 : N3VD<op24, op23, 0b10, op11_8, op4, itinD32,
- OpcodeStr, !strconcat(Dt, "32"),
- v2i32, v2i32, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "32"),
+ v2i32, v2i32, OpNode, Commutable>;
// 128-bit vector types.
def v16i8 : N3VQ<op24, op23, 0b00, op11_8, op4, itinQ16,
- OpcodeStr, !strconcat(Dt, "8"),
- v16i8, v16i8, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "8"),
+ v16i8, v16i8, OpNode, Commutable>;
def v8i16 : N3VQ<op24, op23, 0b01, op11_8, op4, itinQ16,
- OpcodeStr, !strconcat(Dt, "16"),
- v8i16, v8i16, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "16"),
+ v8i16, v8i16, OpNode, Commutable>;
def v4i32 : N3VQ<op24, op23, 0b10, op11_8, op4, itinQ32,
- OpcodeStr, !strconcat(Dt, "32"),
- v4i32, v4i32, OpNode, Commutable>;
+ OpcodeStr, !strconcat(Dt, "32"),
+ v4i32, v4i32, OpNode, Commutable>;
}
multiclass N3VSL_HS<bits<4> op11_8, string OpcodeStr, string Dt, SDNode ShOp> {
@@ -1372,7 +1343,7 @@ multiclass N3VIntSL_HS<bits<4> op11_8,
def v2i32 : N3VDIntSL<0b10, op11_8, itinD32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, IntOp>;
def v8i16 : N3VQIntSL16<0b01, op11_8, itinQ16,
- OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, IntOp>;
def v4i32 : N3VQIntSL<0b10, op11_8, itinQ32,
OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, IntOp>;
}
@@ -1386,8 +1357,8 @@ multiclass N3VInt_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
: N3VInt_HS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
def v8i8 : N3VDInt<op24, op23, 0b00, op11_8, op4, itinD16,
- OpcodeStr, !strconcat(Dt, "8"),
- v8i8, v8i8, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "8"),
+ v8i8, v8i8, IntOp, Commutable>;
def v16i8 : N3VQInt<op24, op23, 0b00, op11_8, op4, itinQ16,
OpcodeStr, !strconcat(Dt, "8"),
v16i8, v16i8, IntOp, Commutable>;
@@ -1402,11 +1373,11 @@ multiclass N3VInt_QHSD<bit op24, bit op23, bits<4> op11_8, bit op4,
: N3VInt_QHS<op24, op23, op11_8, op4, itinD16, itinD32, itinQ16, itinQ32,
OpcodeStr, Dt, IntOp, Commutable> {
def v1i64 : N3VDInt<op24, op23, 0b11, op11_8, op4, itinD32,
- OpcodeStr, !strconcat(Dt, "64"),
- v1i64, v1i64, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "64"),
+ v1i64, v1i64, IntOp, Commutable>;
def v2i64 : N3VQInt<op24, op23, 0b11, op11_8, op4, itinQ32,
- OpcodeStr, !strconcat(Dt, "64"),
- v2i64, v2i64, IntOp, Commutable>;
+ OpcodeStr, !strconcat(Dt, "64"),
+ v2i64, v2i64, IntOp, Commutable>;
}
@@ -1511,9 +1482,11 @@ multiclass N3VMulOpSL_HS<bits<4> op11_8,
def v2i32 : N3VDMulOpSL<0b10, op11_8, itinD32,
OpcodeStr, !strconcat(Dt, "32"), v2i32, mul, ShOp>;
def v8i16 : N3VQMulOpSL16<0b01, op11_8, itinQ16,
- OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16, mul, ShOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v4i16,
+ mul, ShOp>;
def v4i32 : N3VQMulOpSL<0b10, op11_8, itinQ32,
- OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32, mul, ShOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v2i32,
+ mul, ShOp>;
}
// Neon 3-argument intrinsics,
@@ -1522,19 +1495,19 @@ multiclass N3VInt3_QHS<bit op24, bit op23, bits<4> op11_8, bit op4,
string OpcodeStr, string Dt, Intrinsic IntOp> {
// 64-bit vector types.
def v8i8 : N3VDInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16D,
- OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
def v4i16 : N3VDInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16D,
- OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
def v2i32 : N3VDInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32D,
- OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
// 128-bit vector types.
def v16i8 : N3VQInt3<op24, op23, 0b00, op11_8, op4, IIC_VMACi16Q,
- OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+ OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
def v8i16 : N3VQInt3<op24, op23, 0b01, op11_8, op4, IIC_VMACi16Q,
- OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+ OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
def v4i32 : N3VQInt3<op24, op23, 0b10, op11_8, op4, IIC_VMACi32Q,
- OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+ OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
}
@@ -1576,17 +1549,17 @@ multiclass N2VInt_QHS<bits<2> op24_23, bits<2> op21_20, bits<2> op17_16,
def v8i8 : N2VDInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
itinD, OpcodeStr, !strconcat(Dt, "8"), v8i8, v8i8, IntOp>;
def v4i16 : N2VDInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- itinD, OpcodeStr, !strconcat(Dt, "16"), v4i16, v4i16, IntOp>;
+ itinD, OpcodeStr, !strconcat(Dt, "16"),v4i16,v4i16,IntOp>;
def v2i32 : N2VDInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- itinD, OpcodeStr, !strconcat(Dt, "32"), v2i32, v2i32, IntOp>;
+ itinD, OpcodeStr, !strconcat(Dt, "32"),v2i32,v2i32,IntOp>;
// 128-bit vector types.
def v16i8 : N2VQInt<op24_23, op21_20, 0b00, op17_16, op11_7, op4,
- itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8, v16i8, IntOp>;
+ itinQ, OpcodeStr, !strconcat(Dt, "8"), v16i8,v16i8,IntOp>;
def v8i16 : N2VQInt<op24_23, op21_20, 0b01, op17_16, op11_7, op4,
- itinQ, OpcodeStr, !strconcat(Dt, "16"), v8i16, v8i16, IntOp>;
+ itinQ, OpcodeStr, !strconcat(Dt, "16"),v8i16,v8i16,IntOp>;
def v4i32 : N2VQInt<op24_23, op21_20, 0b10, op17_16, op11_7, op4,
- itinQ, OpcodeStr, !strconcat(Dt, "32"), v4i32, v4i32, IntOp>;
+ itinQ, OpcodeStr, !strconcat(Dt, "32"),v4i32,v4i32,IntOp>;
}
@@ -1846,29 +1819,31 @@ def VMULpd : N3VDInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16D, "vmul", "p8",
def VMULpq : N3VQInt<1, 0, 0b00, 0b1001, 1, IIC_VMULi16Q, "vmul", "p8",
v16i8, v16i8, int_arm_neon_vmulp, 1>;
def VMULfd : N3VD<1, 0, 0b00, 0b1101, 1, IIC_VBIND, "vmul", "f32",
- v2f32, v2f32, fmul, 1>;
+ v2f32, v2f32, fmul, 1>;
def VMULfq : N3VQ<1, 0, 0b00, 0b1101, 1, IIC_VBINQ, "vmul", "f32",
- v4f32, v4f32, fmul, 1>;
-defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
-def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
-def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32, v2f32, fmul>;
+ v4f32, v4f32, fmul, 1>;
+defm VMULsl : N3VSL_HS<0b1000, "vmul", "i", mul>;
+def VMULslfd : N3VDSL<0b10, 0b1001, IIC_VBIND, "vmul", "f32", v2f32, fmul>;
+def VMULslfq : N3VQSL<0b10, 0b1001, IIC_VBINQ, "vmul", "f32", v4f32,
+ v2f32, fmul>;
+
def : Pat<(v8i16 (mul (v8i16 QPR:$src1),
(v8i16 (NEONvduplane (v8i16 QPR:$src2), imm:$lane)))),
(v8i16 (VMULslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i16_reg imm:$lane))),
+ (DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (mul (v4i32 QPR:$src1),
(v4i32 (NEONvduplane (v4i32 QPR:$src2), imm:$lane)))),
(v4i32 (VMULslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (fmul (v4f32 QPR:$src1),
(v4f32 (NEONvduplane (v4f32 QPR:$src2), imm:$lane)))),
(v4f32 (VMULslfq (v4f32 QPR:$src1),
(v2f32 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VQDMULH : Vector Saturating Doubling Multiply Returning High Half
@@ -1883,14 +1858,14 @@ def : Pat<(v8i16 (int_arm_neon_vqdmulh (v8i16 QPR:$src1),
imm:$lane)))),
(v8i16 (VQDMULHslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i16_reg imm:$lane))),
+ (DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (int_arm_neon_vqdmulh (v4i32 QPR:$src1),
(v4i32 (NEONvduplane (v4i32 QPR:$src2),
imm:$lane)))),
(v4i32 (VQDMULHslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VQRDMULH : Vector Rounding Saturating Doubling Multiply Returning High Half
@@ -1905,14 +1880,14 @@ def : Pat<(v8i16 (int_arm_neon_vqrdmulh (v8i16 QPR:$src1),
imm:$lane)))),
(v8i16 (VQRDMULHslv8i16 (v8i16 QPR:$src1),
(v4i16 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i16_reg imm:$lane))),
+ (DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (int_arm_neon_vqrdmulh (v4i32 QPR:$src1),
(v4i32 (NEONvduplane (v4i32 QPR:$src2),
imm:$lane)))),
(v4i32 (VQRDMULHslv4i32 (v4i32 QPR:$src1),
(v2i32 (EXTRACT_SUBREG QPR:$src2,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VMULL : Vector Multiply Long (integer and polynomial) (Q = D * D)
@@ -1950,30 +1925,28 @@ def VMLAslfq : N3VQMulOpSL<0b10, 0b0001, IIC_VMACQ, "vmla", "f32",
v4f32, v2f32, fmul, fadd>;
def : Pat<(v8i16 (add (v8i16 QPR:$src1),
- (mul (v8i16 QPR:$src2),
- (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
- (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1),
- (v8i16 QPR:$src2),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLAslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
(v4i16 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i16_reg imm:$lane))),
+ (DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (add (v4i32 QPR:$src1),
- (mul (v4i32 QPR:$src2),
- (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
- (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1),
- (v4i32 QPR:$src2),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLAslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (fadd (v4f32 QPR:$src1),
- (fmul (v4f32 QPR:$src2),
- (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (fmul (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
(v4f32 (VMLAslfq (v4f32 QPR:$src1),
(v4f32 QPR:$src2),
(v2f32 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VMLAL : Vector Multiply Accumulate Long (Q += D * D)
@@ -2003,30 +1976,27 @@ def VMLSslfq : N3VQMulOpSL<0b10, 0b0101, IIC_VMACQ, "vmls", "f32",
v4f32, v2f32, fmul, fsub>;
def : Pat<(v8i16 (sub (v8i16 QPR:$src1),
- (mul (v8i16 QPR:$src2),
- (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
- (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1),
- (v8i16 QPR:$src2),
+ (mul (v8i16 QPR:$src2),
+ (v8i16 (NEONvduplane (v8i16 QPR:$src3), imm:$lane))))),
+ (v8i16 (VMLSslv8i16 (v8i16 QPR:$src1), (v8i16 QPR:$src2),
(v4i16 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i16_reg imm:$lane))),
+ (DSubReg_i16_reg imm:$lane))),
(SubReg_i16_lane imm:$lane)))>;
def : Pat<(v4i32 (sub (v4i32 QPR:$src1),
- (mul (v4i32 QPR:$src2),
- (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
- (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1),
- (v4i32 QPR:$src2),
+ (mul (v4i32 QPR:$src2),
+ (v4i32 (NEONvduplane (v4i32 QPR:$src3), imm:$lane))))),
+ (v4i32 (VMLSslv4i32 (v4i32 QPR:$src1), (v4i32 QPR:$src2),
(v2i32 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
def : Pat<(v4f32 (fsub (v4f32 QPR:$src1),
- (fmul (v4f32 QPR:$src2),
- (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
- (v4f32 (VMLSslfq (v4f32 QPR:$src1),
- (v4f32 QPR:$src2),
+ (fmul (v4f32 QPR:$src2),
+ (v4f32 (NEONvduplane (v4f32 QPR:$src3), imm:$lane))))),
+ (v4f32 (VMLSslfq (v4f32 QPR:$src1), (v4f32 QPR:$src2),
(v2f32 (EXTRACT_SUBREG QPR:$src3,
- (DSubReg_i32_reg imm:$lane))),
+ (DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane)))>;
// VMLSL : Vector Multiply Subtract Long (Q -= D * D)
@@ -2088,6 +2058,10 @@ def VCEQfd : N3VD<0,0,0b00,0b1110,0, IIC_VBIND, "vceq", "f32", v2i32, v2f32,
NEONvceq, 1>;
def VCEQfq : N3VQ<0,0,0b00,0b1110,0, IIC_VBINQ, "vceq", "f32", v4i32, v4f32,
NEONvceq, 1>;
+// For disassembly only.
+defm VCEQz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00010, 0, "vceq", "i",
+ "$dst, $src, #0">;
+
// VCGE : Vector Compare Greater Than or Equal
defm VCGEs : N3V_QHS<0, 0, 0b0011, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vcge", "s", NEONvcge, 0>;
@@ -2097,6 +2071,13 @@ def VCGEfd : N3VD<1,0,0b00,0b1110,0, IIC_VBIND, "vcge", "f32",
v2i32, v2f32, NEONvcge, 0>;
def VCGEfq : N3VQ<1,0,0b00,0b1110,0, IIC_VBINQ, "vcge", "f32", v4i32, v4f32,
NEONvcge, 0>;
+// For disassembly only.
+defm VCGEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00001, 0, "vcge", "s",
+ "$dst, $src, #0">;
+// For disassembly only.
+defm VCLEz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00011, 0, "vcle", "s",
+ "$dst, $src, #0">;
+
// VCGT : Vector Compare Greater Than
defm VCGTs : N3V_QHS<0, 0, 0b0011, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vcgt", "s", NEONvcgt, 0>;
@@ -2106,6 +2087,13 @@ def VCGTfd : N3VD<1,0,0b10,0b1110,0, IIC_VBIND, "vcgt", "f32", v2i32, v2f32,
NEONvcgt, 0>;
def VCGTfq : N3VQ<1,0,0b10,0b1110,0, IIC_VBINQ, "vcgt", "f32", v4i32, v4f32,
NEONvcgt, 0>;
+// For disassembly only.
+defm VCGTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00000, 0, "vcgt", "s",
+ "$dst, $src, #0">;
+// For disassembly only.
+defm VCLTz : N2V_QHS_cmp<0b11, 0b11, 0b01, 0b00100, 0, "vclt", "s",
+ "$dst, $src, #0">;
+
// VACGE : Vector Absolute Compare Greater Than or Equal (aka VCAGE)
def VACGEd : N3VDInt<1, 0, 0b00, 0b1110, 1, IIC_VBIND, "vacge", "f32",
v2i32, v2f32, int_arm_neon_vacged, 0>;
@@ -2247,9 +2235,9 @@ defm VABALu : N3VLInt3_QHS<1,1,0b0101,0, "vabal", "u", int_arm_neon_vabalu>;
// Vector Maximum and Minimum.
// VMAX : Vector Maximum
-defm VMAXs : N3VInt_QHS<0, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMAXs : N3VInt_QHS<0,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vmax", "s", int_arm_neon_vmaxs, 1>;
-defm VMAXu : N3VInt_QHS<1, 0, 0b0110, 0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMAXu : N3VInt_QHS<1,0,0b0110,0, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vmax", "u", int_arm_neon_vmaxu, 1>;
def VMAXfd : N3VDInt<0, 0, 0b00, 0b1111, 0, IIC_VBIND, "vmax", "f32",
v2f32, v2f32, int_arm_neon_vmaxs, 1>;
@@ -2257,9 +2245,9 @@ def VMAXfq : N3VQInt<0, 0, 0b00, 0b1111, 0, IIC_VBINQ, "vmax", "f32",
v4f32, v4f32, int_arm_neon_vmaxs, 1>;
// VMIN : Vector Minimum
-defm VMINs : N3VInt_QHS<0, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMINs : N3VInt_QHS<0,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vmin", "s", int_arm_neon_vmins, 1>;
-defm VMINu : N3VInt_QHS<1, 0, 0b0110, 1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
+defm VMINu : N3VInt_QHS<1,0,0b0110,1, IIC_VBINi4D, IIC_VBINi4D, IIC_VBINi4Q,
IIC_VBINi4Q, "vmin", "u", int_arm_neon_vminu, 1>;
def VMINfd : N3VDInt<0, 0, 0b10, 0b1111, 0, IIC_VBIND, "vmin", "f32",
v2f32, v2f32, int_arm_neon_vmins, 1>;
@@ -2401,16 +2389,17 @@ def VSHLLi32 : N2VLShMax<1, 1, 0b111010, 0b0011, 0, 0, 0, "vshll", "i32",
v2i64, v2i32, NEONvshlli>;
// VSHRN : Vector Shift Right and Narrow
-defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i", NEONvshrn>;
+defm VSHRN : N2VNSh_HSD<0,1,0b1000,0,0,1, IIC_VSHLiD, "vshrn", "i",
+ NEONvshrn>;
// VRSHL : Vector Rounding Shift
defm VRSHLs : N3VInt_QHSD<0,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts, 0>;
+ IIC_VSHLi4Q, "vrshl", "s", int_arm_neon_vrshifts,0>;
defm VRSHLu : N3VInt_QHSD<1,0,0b0101,0, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu, 0>;
+ IIC_VSHLi4Q, "vrshl", "u", int_arm_neon_vrshiftu,0>;
// VRSHR : Vector Rounding Shift Right
-defm VRSHRs : N2VSh_QHSD<0, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>;
-defm VRSHRu : N2VSh_QHSD<1, 1, 0b0010, 1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>;
+defm VRSHRs : N2VSh_QHSD<0,1,0b0010,1, IIC_VSHLi4D, "vrshr", "s", NEONvrshrs>;
+defm VRSHRu : N2VSh_QHSD<1,1,0b0010,1, IIC_VSHLi4D, "vrshr", "u", NEONvrshru>;
// VRSHRN : Vector Rounding Shift Right and Narrow
defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
@@ -2418,14 +2407,14 @@ defm VRSHRN : N2VNSh_HSD<0, 1, 0b1000, 0, 1, 1, IIC_VSHLi4D, "vrshrn", "i",
// VQSHL : Vector Saturating Shift
defm VQSHLs : N3VInt_QHSD<0,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts, 0>;
+ IIC_VSHLi4Q, "vqshl", "s", int_arm_neon_vqshifts,0>;
defm VQSHLu : N3VInt_QHSD<1,0,0b0100,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
- IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu, 0>;
+ IIC_VSHLi4Q, "vqshl", "u", int_arm_neon_vqshiftu,0>;
// VQSHL : Vector Saturating Shift Left (Immediate)
-defm VQSHLsi : N2VSh_QHSD<0, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>;
-defm VQSHLui : N2VSh_QHSD<1, 1, 0b0111, 1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>;
+defm VQSHLsi : N2VSh_QHSD<0,1,0b0111,1, IIC_VSHLi4D, "vqshl", "s", NEONvqshls>;
+defm VQSHLui : N2VSh_QHSD<1,1,0b0111,1, IIC_VSHLi4D, "vqshl", "u", NEONvqshlu>;
// VQSHLU : Vector Saturating Shift Left (Immediate, Unsigned)
-defm VQSHLsu : N2VSh_QHSD<1, 1, 0b0110, 1, IIC_VSHLi4D, "vqshlu", "s", NEONvqshlsu>;
+defm VQSHLsu : N2VSh_QHSD<1,1,0b0110,1, IIC_VSHLi4D, "vqshlu","s",NEONvqshlsu>;
// VQSHRN : Vector Saturating Shift Right and Narrow
defm VQSHRNs : N2VNSh_HSD<0, 1, 0b1001, 0, 0, 1, IIC_VSHLi4D, "vqshrn", "s",
@@ -2438,10 +2427,10 @@ defm VQSHRUN : N2VNSh_HSD<1, 1, 0b1000, 0, 0, 1, IIC_VSHLi4D, "vqshrun", "s",
NEONvqshrnsu>;
// VQRSHL : Vector Saturating Rounding Shift
-defm VQRSHLs : N3VInt_QHSD<0, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+defm VQRSHLs : N3VInt_QHSD<0,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
IIC_VSHLi4Q, "vqrshl", "s",
int_arm_neon_vqrshifts, 0>;
-defm VQRSHLu : N3VInt_QHSD<1, 0, 0b0101, 1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
+defm VQRSHLu : N3VInt_QHSD<1,0,0b0101,1, IIC_VSHLi4D, IIC_VSHLi4D, IIC_VSHLi4Q,
IIC_VSHLi4Q, "vqrshl", "u",
int_arm_neon_vqrshiftu, 0>;
@@ -2508,7 +2497,7 @@ def VNEGs16q : VNEGQ<0b01, "vneg", "s16", v8i16>;
def VNEGs32q : VNEGQ<0b10, "vneg", "s32", v4i32>;
// VNEG : Vector Negate (floating-point)
-def VNEGf32d : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+def VNEGfd : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
(outs DPR:$dst), (ins DPR:$src), IIC_VUNAD,
"vneg", "f32", "$dst, $src", "",
[(set DPR:$dst, (v2f32 (fneg DPR:$src)))]>;
@@ -2547,6 +2536,14 @@ def VCNTq : N2VQInt<0b11, 0b11, 0b00, 0b00, 0b01010, 0,
IIC_VCNTiQ, "vcnt", "8",
v16i8, v16i8, int_arm_neon_vcnt>;
+// Vector Swap -- for disassembly only.
+def VSWPd : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 0, 0,
+ (outs DPR:$dst), (ins DPR:$src), NoItinerary,
+ "vswp", "$dst, $src", "", []>;
+def VSWPq : N2VX<0b11, 0b11, 0b00, 0b10, 0b00000, 1, 0,
+ (outs QPR:$dst), (ins QPR:$src), NoItinerary,
+ "vswp", "$dst, $src", "", []>;
+
// Vector Move Operations.
// VMOV : Vector Move (Register)
@@ -2678,10 +2675,10 @@ def : Pat<(extractelt (v4i32 QPR:$src), imm:$lane),
(DSubReg_i32_reg imm:$lane))),
(SubReg_i32_lane imm:$lane))>;
def : Pat<(extractelt (v2f32 DPR:$src1), imm:$src2),
- (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1), DPR_VFP2)),
+ (EXTRACT_SUBREG (v2f32 (COPY_TO_REGCLASS (v2f32 DPR:$src1),DPR_VFP2)),
(SSubReg_f32_reg imm:$src2))>;
def : Pat<(extractelt (v4f32 QPR:$src1), imm:$src2),
- (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1), QPR_VFP2)),
+ (EXTRACT_SUBREG (v4f32 (COPY_TO_REGCLASS (v4f32 QPR:$src1),QPR_VFP2)),
(SSubReg_f32_reg imm:$src2))>;
//def : Pat<(extractelt (v2i64 QPR:$src1), imm:$src2),
// (EXTRACT_SUBREG QPR:$src1, (DSubReg_f64_reg imm:$src2))>;
@@ -2849,11 +2846,13 @@ def VDUPfqf : N2V<0b11, 0b11, {?,1}, {0,0}, 0b11000, 1, 0,
def : Pat<(v2i64 (NEONvduplane (v2i64 QPR:$src), imm:$lane)),
(INSERT_SUBREG QPR:$src,
- (i64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))),
+ (i64 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_f64_reg imm:$lane))),
(DSubReg_f64_other_reg imm:$lane))>;
def : Pat<(v2f64 (NEONvduplane (v2f64 QPR:$src), imm:$lane)),
(INSERT_SUBREG QPR:$src,
- (f64 (EXTRACT_SUBREG QPR:$src, (DSubReg_f64_reg imm:$lane))),
+ (f64 (EXTRACT_SUBREG QPR:$src,
+ (DSubReg_f64_reg imm:$lane))),
(DSubReg_f64_other_reg imm:$lane))>;
// VMOVN : Vector Narrowing Move
@@ -3092,70 +3091,110 @@ def VTBX4
// NEON instructions for single-precision FP math
//===----------------------------------------------------------------------===//
+class N2VSPat<SDNode OpNode, ValueType ResTy, ValueType OpTy, NeonI Inst>
+ : NEONFPPat<(ResTy (OpNode SPR:$a)),
+ (EXTRACT_SUBREG (Inst (INSERT_SUBREG (OpTy (IMPLICIT_DEF)),
+ SPR:$a, arm_ssubreg_0)),
+ arm_ssubreg_0)>;
+
+class N3VSPat<SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$a, SPR:$b)),
+ (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$a, arm_ssubreg_0),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$b, arm_ssubreg_0)),
+ arm_ssubreg_0)>;
+
+class N3VSMulOpPat<SDNode MulNode, SDNode OpNode, NeonI Inst>
+ : NEONFPPat<(f32 (OpNode SPR:$acc, (f32 (MulNode SPR:$a, SPR:$b)))),
+ (EXTRACT_SUBREG (Inst (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$acc, arm_ssubreg_0),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$a, arm_ssubreg_0),
+ (INSERT_SUBREG (v2f32 (IMPLICIT_DEF)),
+ SPR:$b, arm_ssubreg_0)),
+ arm_ssubreg_0)>;
+
// These need separate instructions because they must use DPR_VFP2 register
// class which have SPR sub-registers.
// Vector Add Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VADDfd_sfp : N3VDs<0, 0, 0b00, 0b1101, 0, "vadd", "f32", v2f32, v2f32, fadd,1>;
-def : N3VDsPat<fadd, VADDfd_sfp>;
+def VADDfd_sfp : N3VS<0,0,0b00,0b1101,0, "vadd", "f32", v2f32, v2f32, fadd, 1>;
+def : N3VSPat<fadd, VADDfd_sfp>;
// Vector Sub Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VSUBfd_sfp : N3VDs<0, 0, 0b10, 0b1101, 0, "vsub", "f32", v2f32, v2f32, fsub,0>;
-def : N3VDsPat<fsub, VSUBfd_sfp>;
+def VSUBfd_sfp : N3VS<0,0,0b10,0b1101,0, "vsub", "f32", v2f32, v2f32, fsub, 0>;
+def : N3VSPat<fsub, VSUBfd_sfp>;
// Vector Multiply Operations used for single-precision FP
let neverHasSideEffects = 1 in
-def VMULfd_sfp : N3VDs<1, 0, 0b00, 0b1101, 1, "vmul", "f32", v2f32, v2f32, fmul,1>;
-def : N3VDsPat<fmul, VMULfd_sfp>;
+def VMULfd_sfp : N3VS<1,0,0b00,0b1101,1, "vmul", "f32", v2f32, v2f32, fmul, 1>;
+def : N3VSPat<fmul, VMULfd_sfp>;
// Vector Multiply-Accumulate/Subtract used for single-precision FP
// vml[as].f32 can cause 4-8 cycle stalls in following ASIMD instructions, so
// we want to avoid them for now. e.g., alternating vmla/vadd instructions.
//let neverHasSideEffects = 1 in
-//def VMLAfd_sfp : N3VDMulOps<0, 0, 0b00, 0b1101, 1, IIC_VMACD, "vmla", "f32", v2f32,fmul,fadd>;
-//def : N3VDMulOpsPat<fmul, fadd, VMLAfd_sfp>;
+//def VMLAfd_sfp : N3VSMulOp<0,0,0b00,0b1101,1, IIC_VMACD, "vmla", "f32",
+// v2f32, fmul, fadd>;
+//def : N3VSMulOpPat<fmul, fadd, VMLAfd_sfp>;
//let neverHasSideEffects = 1 in
-//def VMLSfd_sfp : N3VDMulOps<0, 0, 0b10, 0b1101, 1, IIC_VMACD, "vmls", "f32", v2f32,fmul,fsub>;
-//def : N3VDMulOpsPat<fmul, fsub, VMLSfd_sfp>;
+//def VMLSfd_sfp : N3VSMulOp<0,0,0b10,0b1101,1, IIC_VMACD, "vmls", "f32",
+// v2f32, fmul, fsub>;
+//def : N3VSMulOpPat<fmul, fsub, VMLSfd_sfp>;
// Vector Absolute used for single-precision FP
let neverHasSideEffects = 1 in
-def VABSfd_sfp : N2VDInts<0b11, 0b11, 0b10, 0b01, 0b01110, 0,
- IIC_VUNAD, "vabs", "f32",
- v2f32, v2f32, int_arm_neon_vabs>;
-def : N2VDIntsPat<fabs, VABSfd_sfp>;
+def VABSfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01110, 0, 0,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
+ "vabs", "f32", "$dst, $src", "", []>;
+def : N2VSPat<fabs, f32, v2f32, VABSfd_sfp>;
// Vector Negate used for single-precision FP
let neverHasSideEffects = 1 in
-def VNEGf32d_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
- (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
- "vneg", "f32", "$dst, $src", "", []>;
-def : N2VDIntsPat<fneg, VNEGf32d_sfp>;
+def VNEGfd_sfp : N2V<0b11, 0b11, 0b10, 0b01, 0b01111, 0, 0,
+ (outs DPR_VFP2:$dst), (ins DPR_VFP2:$src), IIC_VUNAD,
+ "vneg", "f32", "$dst, $src", "", []>;
+def : N2VSPat<fneg, f32, v2f32, VNEGfd_sfp>;
+
+// Vector Maximum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMAXfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ "vmax", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmax, VMAXfd_sfp>;
+
+// Vector Minimum used for single-precision FP
+let neverHasSideEffects = 1 in
+def VMINfd_sfp : N3V<0, 0, 0b00, 0b1111, 0, 0, (outs DPR_VFP2:$dst),
+ (ins DPR_VFP2:$src1, DPR_VFP2:$src2), IIC_VBIND,
+ "vmin", "f32", "$dst, $src1, $src2", "", []>;
+def : N3VSPat<NEONfmin, VMINfd_sfp>;
// Vector Convert between single-precision FP and integer
let neverHasSideEffects = 1 in
-def VCVTf2sd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
- v2i32, v2f32, fp_to_sint>;
-def : N2VDsPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
+def VCVTf2sd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01110, 0, "vcvt", "s32.f32",
+ v2i32, v2f32, fp_to_sint>;
+def : N2VSPat<arm_ftosi, f32, v2f32, VCVTf2sd_sfp>;
let neverHasSideEffects = 1 in
-def VCVTf2ud_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
- v2i32, v2f32, fp_to_uint>;
-def : N2VDsPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
+def VCVTf2ud_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01111, 0, "vcvt", "u32.f32",
+ v2i32, v2f32, fp_to_uint>;
+def : N2VSPat<arm_ftoui, f32, v2f32, VCVTf2ud_sfp>;
let neverHasSideEffects = 1 in
-def VCVTs2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
- v2f32, v2i32, sint_to_fp>;
-def : N2VDsPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
+def VCVTs2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01100, 0, "vcvt", "f32.s32",
+ v2f32, v2i32, sint_to_fp>;
+def : N2VSPat<arm_sitof, f32, v2i32, VCVTs2fd_sfp>;
let neverHasSideEffects = 1 in
-def VCVTu2fd_sfp : N2VDs<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
- v2f32, v2i32, uint_to_fp>;
-def : N2VDsPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
+def VCVTu2fd_sfp : N2VS<0b11, 0b11, 0b10, 0b11, 0b01101, 0, "vcvt", "f32.u32",
+ v2f32, v2i32, uint_to_fp>;
+def : N2VSPat<arm_uitof, f32, v2i32, VCVTu2fd_sfp>;
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index 64142ad..1c77f27 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -120,7 +120,10 @@ def t_addrmode_sp : Operand<i32>,
// Miscellaneous Instructions.
//
-let Defs = [SP], Uses = [SP] in {
+// FIXME: Marking these as hasSideEffects is necessary to prevent machine DCE
+// from removing one half of the matched pairs. That breaks PEI, which assumes
+// these will always be in pairs, and asserts if it finds otherwise. Better way?
+let Defs = [SP], Uses = [SP], hasSideEffects = 1 in {
def tADJCALLSTACKUP :
PseudoInst<(outs), (ins i32imm:$amt1, i32imm:$amt2), NoItinerary,
"@ tADJCALLSTACKUP $amt1",
@@ -132,6 +135,55 @@ PseudoInst<(outs), (ins i32imm:$amt), NoItinerary,
[(ARMcallseq_start imm:$amt)]>, Requires<[IsThumb1Only]>;
}
+def tNOP : T1pI<(outs), (ins), NoItinerary, "nop", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = 0b00000000;
+}
+
+def tYIELD : T1pI<(outs), (ins), NoItinerary, "yield", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = 0b00010000;
+}
+
+def tWFE : T1pI<(outs), (ins), NoItinerary, "wfe", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = 0b00100000;
+}
+
+def tWFI : T1pI<(outs), (ins), NoItinerary, "wfi", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = 0b00110000;
+}
+
+def tSEV : T1pI<(outs), (ins), NoItinerary, "sev", "",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101111> {
+ let Inst{9-8} = 0b11;
+ let Inst{7-0} = 0b01000000;
+}
+
+def tSETENDBE : T1I<(outs), (ins), NoItinerary, "setend\tbe",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101101> {
+ let Inst{9-5} = 0b10010;
+ let Inst{3} = 1;
+}
+
+def tSETENDLE : T1I<(outs), (ins), NoItinerary, "setend\tle",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Encoding<0b101101> {
+ let Inst{9-5} = 0b10010;
+ let Inst{3} = 0;
+}
+
// The i32imm operand $val can be used by a debugger to store more information
// about the breakpoint.
def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
@@ -140,6 +192,19 @@ def tBKPT : T1I<(outs), (ins i32imm:$val), NoItinerary, "bkpt\t$val",
let Inst{9-8} = 0b10;
}
+// Change Processor State is a system instruction -- for disassembly only.
+// The singleton $opt operand contains the following information:
+// opt{4-0} = mode ==> don't care
+// opt{5} = changemode ==> 0 (false for 16-bit Thumb instr)
+// opt{8-6} = AIF from Inst{2-0}
+// opt{10-9} = 1:imod from Inst{4} with 0b10 as enable and 0b11 as disable
+//
+// The opt{4-0} and opt{5} sub-fields are to accommodate 32-bit Thumb and ARM
+// CPS which has more options.
+def tCPS : T1I<(outs), (ins i32imm:$opt), NoItinerary, "cps${opt:cps}",
+ [/* For disassembly only; pattern left blank */]>,
+ T1Misc<0b0110011>;
+
// For both thumb1 and thumb2.
let isNotDuplicable = 1 in
def tPICADD : TIt<(outs GPR:$dst), (ins GPR:$lhs, pclabel:$cp), IIC_iALUr,
@@ -208,7 +273,7 @@ let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
let Inst{6-3} = 0b1110; // Rm = lr
}
// Alternative return instruction used by vararg functions.
- def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target", []>,
+ def tBX_RET_vararg : TI<(outs), (ins tGPR:$target), IIC_Br, "bx\t$target",[]>,
T1Special<{1,1,0,?}>; // A6.2.3 & A8.6.25
}
@@ -236,20 +301,20 @@ let isCall = 1,
D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
// Also used for Thumb2
def tBL : TIx2<0b11110, 0b11, 1,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
+ (outs), (ins i32imm:$func, variable_ops), IIC_Br,
"bl\t${func:call}",
[(ARMtcall tglobaladdr:$func)]>,
Requires<[IsThumb, IsNotDarwin]>;
// ARMv5T and above, also used for Thumb2
def tBLXi : TIx2<0b11110, 0b11, 0,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
+ (outs), (ins i32imm:$func, variable_ops), IIC_Br,
"blx\t${func:call}",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb, HasV5T, IsNotDarwin]>;
// Also used for Thumb2
- def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
+ def tBLXr : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
"blx\t$func",
[(ARMtcall GPR:$func)]>,
Requires<[IsThumb, HasV5T, IsNotDarwin]>,
@@ -257,7 +322,7 @@ let isCall = 1,
// ARMv4T
def tBX : TIx2<{?,?,?,?,?}, {?,?}, ?,
- (outs), (ins tGPR:$func, variable_ops), IIC_Br,
+ (outs), (ins tGPR:$func, variable_ops), IIC_Br,
"mov\tlr, pc\n\tbx\t$func",
[(ARMcall_nolink tGPR:$func)]>,
Requires<[IsThumb1Only, IsNotDarwin]>;
@@ -271,20 +336,20 @@ let isCall = 1,
D24, D25, D26, D27, D28, D29, D30, D31, CPSR, FPSCR] in {
// Also used for Thumb2
def tBLr9 : TIx2<0b11110, 0b11, 1,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
+ (outs), (ins i32imm:$func, variable_ops), IIC_Br,
"bl\t${func:call}",
[(ARMtcall tglobaladdr:$func)]>,
Requires<[IsThumb, IsDarwin]>;
// ARMv5T and above, also used for Thumb2
def tBLXi_r9 : TIx2<0b11110, 0b11, 0,
- (outs), (ins i32imm:$func, variable_ops), IIC_Br,
+ (outs), (ins i32imm:$func, variable_ops), IIC_Br,
"blx\t${func:call}",
[(ARMcall tglobaladdr:$func)]>,
Requires<[IsThumb, HasV5T, IsDarwin]>;
// Also used for Thumb2
- def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
+ def tBLXr_r9 : TI<(outs), (ins GPR:$func, variable_ops), IIC_Br,
"blx\t$func",
[(ARMtcall GPR:$func)]>,
Requires<[IsThumb, HasV5T, IsDarwin]>,
@@ -292,7 +357,7 @@ let isCall = 1,
// ARMv4T
def tBXr9 : TIx2<{?,?,?,?,?}, {?,?}, ?,
- (outs), (ins tGPR:$func, variable_ops), IIC_Br,
+ (outs), (ins tGPR:$func, variable_ops), IIC_Br,
"mov\tlr, pc\n\tbx\t$func",
[(ARMcall_nolink tGPR:$func)]>,
Requires<[IsThumb1Only, IsDarwin]>;
@@ -307,7 +372,7 @@ let isBranch = 1, isTerminator = 1 in {
// Far jump
let Defs = [LR] in
- def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br,
+ def tBfar : TIx2<0b11110, 0b11, 1, (outs), (ins brtarget:$target), IIC_Br,
"bl\t$target\t@ far jump",[]>;
def tBR_JTr : T1JTI<(outs),
@@ -340,16 +405,34 @@ let isBranch = 1, isTerminator = 1 in {
T1Misc<{1,0,?,1,?,?,?}>;
}
+// A8.6.218 Supervisor Call (Software Interrupt) -- for disassembly only
+// A8.6.16 B: Encoding T1
+// If Inst{11-8} == 0b1111 then SEE SVC
+let isCall = 1 in {
+def tSVC : T1pI<(outs), (ins i32imm:$svc), IIC_Br, "svc", "\t$svc", []>,
+ Encoding16 {
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1111;
+}
+}
+
+// A8.6.16 B: Encoding T1 -- for disassembly only
+// If Inst{11-8} == 0b1110 then UNDEFINED
+def tTRAP : T1I<(outs), (ins), IIC_Br, "trap", []>, Encoding16 {
+ let Inst{15-12} = 0b1101;
+ let Inst{11-8} = 0b1110;
+}
+
//===----------------------------------------------------------------------===//
// Load Store Instructions.
//
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
-def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
+let canFoldAsLoad = 1, isReMaterializable = 1 in
+def tLDR : T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
"ldr", "\t$dst, $addr",
[(set tGPR:$dst, (load t_addrmode_s4:$addr))]>,
T1LdSt<0b100>;
-def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
+def tLDRi: T1pI4<(outs tGPR:$dst), (ins t_addrmode_s4:$addr), IIC_iLoadr,
"ldr", "\t$dst, $addr",
[]>,
T1LdSt4Imm<{1,?,?}>;
@@ -399,15 +482,14 @@ def tRestore : T1pIs<(outs tGPR:$dst), (ins t_addrmode_sp:$addr), IIC_iLoadi,
// Load tconstpool
// FIXME: Use ldr.n to work around a Darwin assembler bug.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def tLDRpci : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
"ldr", ".n\t$dst, $addr",
[(set tGPR:$dst, (load (ARMWrapper tconstpool:$addr)))]>,
T1Encoding<{0,1,0,0,1,?}>; // A6.2 & A8.6.59
// Special LDR for loads from non-pc-relative constpools.
-let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1,
- mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, mayLoad = 1, isReMaterializable = 1 in
def tLDRcp : T1pIs<(outs tGPR:$dst), (ins i32imm:$addr), IIC_iLoadi,
"ldr", "\t$dst, $addr", []>,
T1LdStSP<{1,?,?}>;
@@ -769,7 +851,7 @@ def tUXTH : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iUNAr,
T1Misc<{0,0,1,0,1,0,?}>;
-// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC DAG operation.
+// Conditional move tMOVCCr - Used to implement the Thumb SELECT_CC operation.
// Expanded after instruction selection into a branch sequence.
let usesCustomInserter = 1 in // Expanded after instruction selection.
def tMOVCCr_pseudo :
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 55c7aa2..316567d 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -13,7 +13,7 @@
// IT block predicate field
def it_pred : Operand<i32> {
- let PrintMethod = "printPredicateOperand";
+ let PrintMethod = "printMandatoryPredicateOperand";
}
// IT block condition mask
@@ -53,10 +53,10 @@ def t2_so_imm_neg_XFORM : SDNodeXForm<imm, [{
// bits [bits 0-7], the 4-bit shift/splat amount is the next 4 bits [bits 8-11].
def t2_so_imm : Operand<i32>,
PatLeaf<(imm), [{
- return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
+ return ARM_AM::getT2SOImmVal((uint32_t)N->getZExtValue()) != -1;
}]>;
-// t2_so_imm_not - Match an immediate that is a complement
+// t2_so_imm_not - Match an immediate that is a complement
// of a t2_so_imm.
def t2_so_imm_not : Operand<i32>,
PatLeaf<(imm), [{
@@ -114,13 +114,13 @@ def imm0_4095 : Operand<i32>,
return (uint32_t)N->getZExtValue() < 4096;
}]>;
-def imm0_4095_neg : PatLeaf<(i32 imm), [{
- return (uint32_t)(-N->getZExtValue()) < 4096;
-}], imm_neg_XFORM>;
+def imm0_4095_neg : PatLeaf<(i32 imm), [{
+ return (uint32_t)(-N->getZExtValue()) < 4096;
+}], imm_neg_XFORM>;
def imm0_255_neg : PatLeaf<(i32 imm), [{
return (uint32_t)(-N->getZExtValue()) < 255;
-}], imm_neg_XFORM>;
+}], imm_neg_XFORM>;
// Define Thumb2 specific addressing modes.
@@ -208,7 +208,7 @@ multiclass T2I_un_irs<bits<4> opcod, string opc, PatFrag opnode,
/// T2I_bin_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns for a
// binary operation that produces a value. These are predicable and can be
/// changed to modify CPSR.
-multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
+multiclass T2I_bin_irs<bits<4> opcod, string opc, PatFrag opnode,
bit Commutable = 0, string wide =""> {
// shifted imm
def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
@@ -368,15 +368,16 @@ multiclass T2I_bin_ii12rs<bits<3> op23_21, string opc, PatFrag opnode,
}
/// T2I_adde_sube_irs - Defines a set of (op reg, {so_imm|r|so_reg}) patterns
-/// for a binary operation that produces a value and use and define the carry
+/// for a binary operation that produces a value and use the carry
/// bit. It's not predicable.
let Uses = [CPSR] in {
-multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Commutable = 0> {
+multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
// shifted imm
def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
opc, "\t$dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUnused]> {
+ Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -387,7 +388,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
opc, ".w\t$dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUnused]> {
+ Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -401,19 +402,23 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
opc, ".w\t$dst, $lhs, $rhs",
[(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUnused]> {
+ Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
let Inst{20} = 0; // The S bit.
}
- // Carry setting variants
+}
+
+// Carry setting variants
+let Defs = [CPSR] in {
+multiclass T2I_adde_sube_s_irs<bits<4> opcod, string opc, PatFrag opnode,
+ bit Commutable = 0> {
// shifted imm
- def Sri : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
- !strconcat(opc, "s\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ def ri : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_imm:$rhs), IIC_iALUi,
+ opc, "\t$dst, $lhs, $rhs",
+ [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>,
+ Requires<[IsThumb2]> {
let Inst{31-27} = 0b11110;
let Inst{25} = 0;
let Inst{24-21} = opcod;
@@ -421,11 +426,10 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
let Inst{15} = 0;
}
// register
- def Srr : T2XI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
- !strconcat(opc, "s.w\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ def rr : T2sI<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALUr,
+ opc, ".w\t$dst, $lhs, $rhs",
+ [(set GPR:$dst, (opnode GPR:$lhs, GPR:$rhs))]>,
+ Requires<[IsThumb2]> {
let isCommutable = Commutable;
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
@@ -436,11 +440,10 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
let Inst{5-4} = 0b00; // type
}
// shifted register
- def Srs : T2XI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
- !strconcat(opc, "s.w\t$dst, $lhs, $rhs"),
- [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
- Requires<[IsThumb2, CarryDefIsUsed]> {
- let Defs = [CPSR];
+ def rs : T2sI<(outs GPR:$dst), (ins GPR:$lhs, t2_so_reg:$rhs), IIC_iALUsi,
+ opc, ".w\t$dst, $lhs, $rhs",
+ [(set GPR:$dst, (opnode GPR:$lhs, t2_so_reg:$rhs))]>,
+ Requires<[IsThumb2]> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b01;
let Inst{24-21} = opcod;
@@ -448,6 +451,7 @@ multiclass T2I_adde_sube_irs<bits<4> opcod, string opc, PatFrag opnode, bit Comm
}
}
}
+}
/// T2I_rbin_s_is - Same as T2I_rbin_is except sets 's' bit.
let Defs = [CPSR] in {
@@ -626,19 +630,6 @@ multiclass T2I_st<bits<2> opcod, string opc, PatFrag opnode> {
}
}
-/// T2I_picld - Defines the PIC load pattern.
-class T2I_picld<string opc, PatFrag opnode> :
- T2I<(outs GPR:$dst), (ins addrmodepc:$addr), IIC_iLoadi,
- !strconcat("\n${addr:label}:\n\t", opc), "\t$dst, $addr",
- [(set GPR:$dst, (opnode addrmodepc:$addr))]>;
-
-/// T2I_picst - Defines the PIC store pattern.
-class T2I_picst<string opc, PatFrag opnode> :
- T2I<(outs), (ins GPR:$src, addrmodepc:$addr), IIC_iStorer,
- !strconcat("\n${addr:label}:\n\t", opc), "\t$src, $addr",
- [(opnode GPR:$src, addrmodepc:$addr)]>;
-
-
/// T2I_unary_rrot - A unary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
@@ -666,6 +657,31 @@ multiclass T2I_unary_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
}
+// DO variant - disassembly only, no pattern
+
+multiclass T2I_unary_rrot_DO<bits<3> opcod, string opc> {
+ def r : T2I<(outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
+ opc, "\t$dst, $src", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def r_rot : T2I<(outs GPR:$dst), (ins GPR:$src, i32imm:$rot), IIC_iUNAsi,
+ opc, "\t$dst, $src, ror $rot", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{19-16} = 0b1111; // Rn
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = {?,?}; // rotate
+ }
+}
+
/// T2I_bin_rrot - A binary operation with two forms: one whose operand is a
/// register and one whose operand is a register rotated by 8/16/24.
multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
@@ -692,6 +708,29 @@ multiclass T2I_bin_rrot<bits<3> opcod, string opc, PatFrag opnode> {
}
}
+// DO variant - disassembly only, no pattern
+
+multiclass T2I_bin_rrot_DO<bits<3> opcod, string opc> {
+ def rr : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS), IIC_iALUr,
+ opc, "\t$dst, $LHS, $RHS", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = 0b00; // rotate
+ }
+ def rr_rot : T2I<(outs GPR:$dst), (ins GPR:$LHS, GPR:$RHS, i32imm:$rot),
+ IIC_iALUsr, opc, "\t$dst, $LHS, $RHS, ror $rot", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0100;
+ let Inst{22-20} = opcod;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 1;
+ let Inst{5-4} = {?,?}; // rotate
+ }
+}
+
//===----------------------------------------------------------------------===//
// Instructions
//===----------------------------------------------------------------------===//
@@ -734,7 +773,7 @@ def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
let Inst{19-16} = 0b1101; // Rn = sp
let Inst{15} = 0;
}
-def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
+def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm),
IIC_iALUi, "addw", "\t$dst, $sp, $imm", []> {
let Inst{31-27} = 0b11110;
let Inst{25} = 1;
@@ -787,6 +826,25 @@ def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
let Inst{15} = 0;
}
+// Signed and unsigned division, for disassembly only
+def t2SDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+ "sdiv", "\t$dst, $a, $b", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011100;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
+def t2UDIV : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iALUi,
+ "udiv", "\t$dst, $a, $b", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-21} = 0b011101;
+ let Inst{20} = 0b1;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = 0b1111;
+}
+
// Pseudo instruction that will expand into a t2SUBrSPi + a copy.
let usesCustomInserter = 1 in { // Expanded after instruction selection.
def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm),
@@ -803,7 +861,7 @@ def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs),
//
// Load
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
defm t2LDR : T2I_ld<0, 0b10, "ldr", UnOpFrag<(load node:$Src)>>;
// Loads with zero extension
@@ -926,9 +984,9 @@ def t2LDRSH_POST : T2Iidxldst<1, 0b01, 1, 0, (outs GPR:$dst, GPR:$base_wb),
}
// Store
-defm t2STR : T2I_st<0b10, "str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
-defm t2STRB : T2I_st<0b00, "strb", BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
-defm t2STRH : T2I_st<0b01, "strh", BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
+defm t2STR :T2I_st<0b10,"str", BinOpFrag<(store node:$LHS, node:$RHS)>>;
+defm t2STRB:T2I_st<0b00,"strb",BinOpFrag<(truncstorei8 node:$LHS, node:$RHS)>>;
+defm t2STRH:T2I_st<0b01,"strh",BinOpFrag<(truncstorei16 node:$LHS, node:$RHS)>>;
// Store doubleword
let mayLoad = 1, hasExtraSrcRegAllocReq = 1 in
@@ -989,7 +1047,7 @@ def t2STRB_POST : T2Iidxldst<0, 0b00, 0, 0, (outs GPR:$base_wb),
let mayLoad = 1, hasExtraDefRegAllocReq = 1 in
def t2LDM : T2XI<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
- IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
+ IIC_iLoadm, "ldm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
@@ -1001,7 +1059,7 @@ def t2LDM : T2XI<(outs),
let mayStore = 1, hasExtraSrcRegAllocReq = 1 in
def t2STM : T2XI<(outs),
(ins addrmode4:$addr, pred:$p, reglist:$wb, variable_ops),
- IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
+ IIC_iStorem, "stm${addr:submode}${p}${addr:wide}\t$addr, $wb", []> {
let Inst{31-27} = 0b11101;
let Inst{26-25} = 0b00;
let Inst{24-23} = {?, ?}; // IA: '01', DB: '10'
@@ -1074,13 +1132,15 @@ defm t2SXTB : T2I_unary_rrot<0b100, "sxtb",
UnOpFrag<(sext_inreg node:$Src, i8)>>;
defm t2SXTH : T2I_unary_rrot<0b000, "sxth",
UnOpFrag<(sext_inreg node:$Src, i16)>>;
+defm t2SXTB16 : T2I_unary_rrot_DO<0b010, "sxtb16">;
defm t2SXTAB : T2I_bin_rrot<0b100, "sxtab",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS, i8))>>;
defm t2SXTAH : T2I_bin_rrot<0b000, "sxtah",
BinOpFrag<(add node:$LHS, (sext_inreg node:$RHS,i16))>>;
+defm t2SXTAB16 : T2I_bin_rrot_DO<0b010, "sxtab16">;
-// TODO: SXT(A){B|H}16
+// TODO: SXT(A){B|H}16 - done for disassembly only
// Zero extenders
@@ -1101,6 +1161,7 @@ defm t2UXTAB : T2I_bin_rrot<0b101, "uxtab",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0x00FF))>>;
defm t2UXTAH : T2I_bin_rrot<0b001, "uxtah",
BinOpFrag<(add node:$LHS, (and node:$RHS, 0xFFFF))>>;
+defm t2UXTAB16 : T2I_bin_rrot_DO<0b011, "uxtab16">;
}
//===----------------------------------------------------------------------===//
@@ -1119,9 +1180,13 @@ defm t2SUBS : T2I_bin_s_irs <0b1101, "sub",
BinOpFrag<(subc node:$LHS, node:$RHS)>>;
defm t2ADC : T2I_adde_sube_irs<0b1010, "adc",
- BinOpFrag<(adde node:$LHS, node:$RHS)>, 1>;
+ BinOpFrag<(adde_dead_carry node:$LHS, node:$RHS)>, 1>;
defm t2SBC : T2I_adde_sube_irs<0b1011, "sbc",
- BinOpFrag<(sube node:$LHS, node:$RHS)>>;
+ BinOpFrag<(sube_dead_carry node:$LHS, node:$RHS)>>;
+defm t2ADCS : T2I_adde_sube_s_irs<0b1010, "adc",
+ BinOpFrag<(adde_live_carry node:$LHS, node:$RHS)>, 1>;
+defm t2SBCS : T2I_adde_sube_s_irs<0b1011, "sbc",
+ BinOpFrag<(sube_live_carry node:$LHS, node:$RHS)>>;
// RSB
defm t2RSB : T2I_rbin_is <0b1110, "rsb",
@@ -1138,6 +1203,155 @@ def : T2Pat<(add GPR:$src, t2_so_imm_neg:$imm),
def : T2Pat<(add GPR:$src, imm0_4095_neg:$imm),
(t2SUBri12 GPR:$src, imm0_4095_neg:$imm)>;
+// Select Bytes -- for disassembly only
+
+def t2SEL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, "sel",
+ "\t$dst, $a, $b", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-24} = 0b010;
+ let Inst{23} = 0b1;
+ let Inst{22-20} = 0b010;
+ let Inst{15-12} = 0b1111;
+ let Inst{7} = 0b1;
+ let Inst{6-4} = 0b000;
+}
+
+// A6.3.13, A6.3.14, A6.3.15 Parallel addition and subtraction (signed/unsigned)
+// And Miscellaneous operations -- for disassembly only
+class T2I_pam<bits<3> op22_20, bits<4> op7_4, string opc>
+ : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), NoItinerary, opc,
+ "\t$dst, $a, $b", [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0101;
+ let Inst{22-20} = op22_20;
+ let Inst{15-12} = 0b1111;
+ let Inst{7-4} = op7_4;
+}
+
+// Saturating add/subtract -- for disassembly only
+
+def t2QADD : T2I_pam<0b000, 0b1000, "qadd">;
+def t2QADD16 : T2I_pam<0b001, 0b0001, "qadd16">;
+def t2QADD8 : T2I_pam<0b000, 0b0001, "qadd8">;
+def t2QASX : T2I_pam<0b010, 0b0001, "qasx">;
+def t2QDADD : T2I_pam<0b000, 0b1001, "qdadd">;
+def t2QDSUB : T2I_pam<0b000, 0b1011, "qdsub">;
+def t2QSAX : T2I_pam<0b110, 0b0001, "qsax">;
+def t2QSUB : T2I_pam<0b000, 0b1010, "qsub">;
+def t2QSUB16 : T2I_pam<0b101, 0b0001, "qsub16">;
+def t2QSUB8 : T2I_pam<0b100, 0b0001, "qsub8">;
+def t2UQADD16 : T2I_pam<0b001, 0b0101, "uqadd16">;
+def t2UQADD8 : T2I_pam<0b000, 0b0101, "uqadd8">;
+def t2UQASX : T2I_pam<0b010, 0b0101, "uqasx">;
+def t2UQSAX : T2I_pam<0b110, 0b0101, "uqsax">;
+def t2UQSUB16 : T2I_pam<0b101, 0b0101, "uqsub16">;
+def t2UQSUB8 : T2I_pam<0b100, 0b0101, "uqsub8">;
+
+// Signed/Unsigned add/subtract -- for disassembly only
+
+def t2SASX : T2I_pam<0b010, 0b0000, "sasx">;
+def t2SADD16 : T2I_pam<0b001, 0b0000, "sadd16">;
+def t2SADD8 : T2I_pam<0b000, 0b0000, "sadd8">;
+def t2SSAX : T2I_pam<0b110, 0b0000, "ssax">;
+def t2SSUB16 : T2I_pam<0b101, 0b0000, "ssub16">;
+def t2SSUB8 : T2I_pam<0b100, 0b0000, "ssub8">;
+def t2UASX : T2I_pam<0b010, 0b0100, "uasx">;
+def t2UADD16 : T2I_pam<0b001, 0b0100, "uadd16">;
+def t2UADD8 : T2I_pam<0b000, 0b0100, "uadd8">;
+def t2USAX : T2I_pam<0b110, 0b0100, "usax">;
+def t2USUB16 : T2I_pam<0b101, 0b0100, "usub16">;
+def t2USUB8 : T2I_pam<0b100, 0b0100, "usub8">;
+
+// Signed/Unsigned halving add/subtract -- for disassembly only
+
+def t2SHASX : T2I_pam<0b010, 0b0010, "shasx">;
+def t2SHADD16 : T2I_pam<0b001, 0b0010, "shadd16">;
+def t2SHADD8 : T2I_pam<0b000, 0b0010, "shadd8">;
+def t2SHSAX : T2I_pam<0b110, 0b0010, "shsax">;
+def t2SHSUB16 : T2I_pam<0b101, 0b0010, "shsub16">;
+def t2SHSUB8 : T2I_pam<0b100, 0b0010, "shsub8">;
+def t2UHASX : T2I_pam<0b010, 0b0110, "uhasx">;
+def t2UHADD16 : T2I_pam<0b001, 0b0110, "uhadd16">;
+def t2UHADD8 : T2I_pam<0b000, 0b0110, "uhadd8">;
+def t2UHSAX : T2I_pam<0b110, 0b0110, "uhsax">;
+def t2UHSUB16 : T2I_pam<0b101, 0b0110, "uhsub16">;
+def t2UHSUB8 : T2I_pam<0b100, 0b0110, "uhsub8">;
+
+// Unsigned Sum of Absolute Differences [and Accumulate] -- for disassembly only
+
+def t2USAD8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ NoItinerary, "usad8", "\t$dst, $a, $b", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2USADA8 : T2I_mac<0, 0b111, 0b0000, (outs GPR:$dst),
+ (ins GPR:$a, GPR:$b, GPR:$acc), NoItinerary, "usada8",
+ "\t$dst, $a, $b, $acc", []>;
+
+// Signed/Unsigned saturate -- for disassembly only
+
+def t2SSATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+ NoItinerary, "ssat", "\t$dst, $bit_pos, $a, lsl $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 0; // sh = '0'
+}
+
+def t2SSATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+ NoItinerary, "ssat", "\t$dst, $bit_pos, $a, asr $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+}
+
+def t2SSAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+ "ssat16", "\t$dst, $bit_pos, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1100;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+}
+
+def t2USATlsl : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+ NoItinerary, "usat", "\t$dst, $bit_pos, $a, lsl $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 0; // sh = '0'
+}
+
+def t2USATasr : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos,GPR:$a,i32imm:$shamt),
+ NoItinerary, "usat", "\t$dst, $bit_pos, $a, asr $shamt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+}
+
+def t2USAT16 : T2I<(outs GPR:$dst), (ins i32imm:$bit_pos, GPR:$a), NoItinerary,
+ "usat16", "\t$dst, $bit_pos, $a",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{25-22} = 0b1110;
+ let Inst{20} = 0;
+ let Inst{15} = 0;
+ let Inst{21} = 1; // sh = '1'
+ let Inst{14-12} = 0b000; // imm3 = '000'
+ let Inst{7-6} = 0b00; // imm2 = '00'
+}
//===----------------------------------------------------------------------===//
// Shift and rotate Instructions.
@@ -1342,6 +1556,8 @@ def t2UMAAL : T2I<(outs GPR:$ldst, GPR:$hdst), (ins GPR:$a, GPR:$b), IIC_iMAC64,
}
} // neverHasSideEffects
+// Rounding variants of the below included for disassembly only
+
// Most significant word multiply
def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
"smmul", "\t$dst, $a, $b",
@@ -1353,6 +1569,15 @@ def t2SMMUL : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
+def t2SMMULR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
+ "smmulr", "\t$dst, $a, $b", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = 0b1111; // Ra = 0b1111 (no accumulate)
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
"smmla", "\t$dst, $a, $b, $c",
[(set GPR:$dst, (add (mulhs GPR:$a, GPR:$b), GPR:$c))]> {
@@ -1363,6 +1588,14 @@ def t2SMMLA : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
+def t2SMMLAR : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+ "smmlar", "\t$dst, $a, $b, $c", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b101;
+ let Inst{15-12} = {?, ?, ?, ?}; // Ra
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
"smmls", "\t$dst, $a, $b, $c",
@@ -1374,6 +1607,15 @@ def t2SMMLS : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
let Inst{7-4} = 0b0000; // No Rounding (Inst{4} = 0)
}
+def t2SMMLSR : T2I <(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$c), IIC_iMAC32,
+ "smmlsr", "\t$dst, $a, $b, $c", []> {
+ let Inst{31-27} = 0b11111;
+ let Inst{26-23} = 0b0110;
+ let Inst{22-20} = 0b110;
+ let Inst{15-12} = {?, ?, ?, ?}; // Ra
+ let Inst{7-4} = 0b0001; // Rounding (Inst{4} = 1)
+}
+
multiclass T2I_smul<string opc, PatFrag opnode> {
def BB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b), IIC_iMUL32,
!strconcat(opc, "bb"), "\t$dst, $a, $b",
@@ -1466,7 +1708,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def BT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
!strconcat(opc, "bt"), "\t$dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sext_inreg GPR:$a, i16),
- (sra GPR:$b, (i32 16)))))]> {
+ (sra GPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1490,7 +1732,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def TT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
!strconcat(opc, "tt"), "\t$dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (opnode (sra GPR:$a, (i32 16)),
- (sra GPR:$b, (i32 16)))))]> {
+ (sra GPR:$b, (i32 16)))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b001;
@@ -1502,7 +1744,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def WB : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
!strconcat(opc, "wb"), "\t$dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sext_inreg GPR:$b, i16)), (i32 16))))]> {
+ (sext_inreg GPR:$b, i16)), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1514,7 +1756,7 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
def WT : T2I<(outs GPR:$dst), (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC16,
!strconcat(opc, "wt"), "\t$dst, $a, $b, $acc",
[(set GPR:$dst, (add GPR:$acc, (sra (opnode GPR:$a,
- (sra GPR:$b, (i32 16))), (i32 16))))]> {
+ (sra GPR:$b, (i32 16))), (i32 16))))]> {
let Inst{31-27} = 0b11111;
let Inst{26-23} = 0b0110;
let Inst{22-20} = 0b011;
@@ -1527,16 +1769,70 @@ multiclass T2I_smla<string opc, PatFrag opnode> {
defm t2SMUL : T2I_smul<"smul", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
defm t2SMLA : T2I_smla<"smla", BinOpFrag<(mul node:$LHS, node:$RHS)>>;
-// TODO: Halfword multiple accumulate long: SMLAL<x><y>
-// TODO: Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
-
+// Halfword multiple accumulate long: SMLAL<x><y> -- for disassembly only
+def t2SMLALBB : T2I_mac<1, 0b100, 0b1000, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbb", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SMLALBT : T2I_mac<1, 0b100, 0b1001, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlalbt", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SMLALTB : T2I_mac<1, 0b100, 0b1010, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltb", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>;
+def t2SMLALTT : T2I_mac<1, 0b100, 0b1011, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaltt", "\t$ldst, $hdst, $a, $b",
+ [/* For disassembly only; pattern left blank */]>;
+
+// Dual halfword multiple: SMUAD, SMUSD, SMLAD, SMLSD, SMLALD, SMLSLD
+// These are for disassembly only.
+
+def t2SMUAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ IIC_iMAC32, "smuad", "\t$dst, $a, $b", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ IIC_iMAC32, "smuadx", "\t$dst, $a, $b", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ IIC_iMAC32, "smusd", "\t$dst, $a, $b", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMUSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst), (ins GPR:$a, GPR:$b),
+ IIC_iMAC32, "smusdx", "\t$dst, $a, $b", []> {
+ let Inst{15-12} = 0b1111;
+}
+def t2SMLAD : T2I_mac<0, 0b010, 0b0000, (outs GPR:$dst),
+ (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlad",
+ "\t$dst, $a, $b, $acc", []>;
+def t2SMLADX : T2I_mac<0, 0b010, 0b0001, (outs GPR:$dst),
+ (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smladx",
+ "\t$dst, $a, $b, $acc", []>;
+def t2SMLSD : T2I_mac<0, 0b100, 0b0000, (outs GPR:$dst),
+ (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsd",
+ "\t$dst, $a, $b, $acc", []>;
+def t2SMLSDX : T2I_mac<0, 0b100, 0b0001, (outs GPR:$dst),
+ (ins GPR:$a, GPR:$b, GPR:$acc), IIC_iMAC32, "smlsdx",
+ "\t$dst, $a, $b, $acc", []>;
+def t2SMLALD : T2I_mac<1, 0b100, 0b1100, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlald",
+ "\t$ldst, $hdst, $a, $b", []>;
+def t2SMLALDX : T2I_mac<1, 0b100, 0b1101, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlaldx",
+ "\t$ldst, $hdst, $a, $b", []>;
+def t2SMLSLD : T2I_mac<1, 0b101, 0b1100, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsld",
+ "\t$ldst, $hdst, $a, $b", []>;
+def t2SMLSLDX : T2I_mac<1, 0b101, 0b1101, (outs GPR:$ldst,GPR:$hdst),
+ (ins GPR:$a,GPR:$b), IIC_iMAC64, "smlsldx",
+ "\t$ldst, $hdst, $a, $b", []>;
//===----------------------------------------------------------------------===//
// Misc. Arithmetic Instructions.
//
-class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops, InstrItinClass itin,
- string opc, string asm, list<dag> pattern>
+class T2I_misc<bits<2> op1, bits<2> op2, dag oops, dag iops,
+ InstrItinClass itin, string opc, string asm, list<dag> pattern>
: T2I<oops, iops, itin, opc, asm, pattern> {
let Inst{31-27} = 0b11111;
let Inst{26-22} = 0b01010;
@@ -1572,7 +1868,7 @@ def t2REVSH : T2I_misc<0b01, 0b11, (outs GPR:$dst), (ins GPR:$src), IIC_iUNAr,
(shl GPR:$src, (i32 8))), i16))]>;
def t2PKHBT : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, LSL $shamt",
+ IIC_iALUsi, "pkhbt", "\t$dst, $src1, $src2, lsl $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF),
(and (shl GPR:$src2, (i32 imm:$shamt)),
0xFFFF0000)))]> {
@@ -1590,7 +1886,7 @@ def : T2Pat<(or (and GPR:$src1, 0xFFFF), (shl GPR:$src2, imm16_31:$shamt)),
(t2PKHBT GPR:$src1, GPR:$src2, imm16_31:$shamt)>;
def t2PKHTB : T2I<(outs GPR:$dst), (ins GPR:$src1, GPR:$src2, i32imm:$shamt),
- IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, ASR $shamt",
+ IIC_iALUsi, "pkhtb", "\t$dst, $src1, $src2, asr $shamt",
[(set GPR:$dst, (or (and GPR:$src1, 0xFFFF0000),
(and (sra GPR:$src2, imm16_31:$shamt),
0xFFFF)))]> {
@@ -1643,7 +1939,7 @@ defm t2TEQ : T2I_cmp_irs<0b0100, "teq",
// Conditional moves
// FIXME: should be able to write a pattern for ARMcmov, but can't use
-// a two-value operand where a dag node expects two operands. :(
+// a two-value operand where a dag node expects two operands. :(
def t2MOVCCr : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true), IIC_iCMOVr,
"mov", ".w\t$dst, $true",
[/*(set GPR:$dst, (ARMcmov GPR:$false, GPR:$true, imm:$cc, CCR:$ccr))*/]>,
@@ -1723,6 +2019,66 @@ def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
}
}
+// Helper class for multiclass T2MemB -- for disassembly only
+class T2I_memb<string opc, string asm>
+ : T2I<(outs), (ins), NoItinerary, opc, asm,
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsThumb2, HasV7]> {
+ let Inst{31-20} = 0xf3b;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+multiclass T2MemB<bits<4> op7_4, string opc> {
+
+ def st : T2I_memb<opc, "\tst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1110;
+ }
+
+ def ish : T2I_memb<opc, "\tish"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1011;
+ }
+
+ def ishst : T2I_memb<opc, "\tishst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b1010;
+ }
+
+ def nsh : T2I_memb<opc, "\tnsh"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0111;
+ }
+
+ def nshst : T2I_memb<opc, "\tnshst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0110;
+ }
+
+ def osh : T2I_memb<opc, "\tosh"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0011;
+ }
+
+ def oshst : T2I_memb<opc, "\toshst"> {
+ let Inst{7-4} = op7_4;
+ let Inst{3-0} = 0b0010;
+ }
+}
+
+// These DMB variants are for disassembly only.
+defm t2DMB : T2MemB<0b0101, "dmb">;
+
+// These DSB variants are for disassembly only.
+defm t2DSB : T2MemB<0b0100, "dsb">;
+
+// ISB has only full system option -- for disassembly only
+def t2ISBsy : T2I_memb<"isb", ""> {
+ let Inst{7-4} = 0b0110;
+ let Inst{3-0} = 0b1111;
+}
+
class T2I_ldrex<bits<2> opcod, dag oops, dag iops, AddrMode am, SizeFlagVal sz,
InstrItinClass itin, string opc, string asm, string cstr,
list<dag> pattern, bits<4> rt2 = 0b1111>
@@ -1789,6 +2145,16 @@ def t2STREXD : T2I_strex<0b11, (outs GPR:$success),
{?, ?, ?, ?}>;
}
+// Clear-Exclusive is for disassembly only.
+def t2CLREX : T2I<(outs), (ins), NoItinerary, "clrex", "",
+ [/* For disassembly only; pattern left blank */]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-20} = 0xf3b;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{7-4} = 0b0010;
+}
+
//===----------------------------------------------------------------------===//
// TLS Instructions
//
@@ -1906,6 +2272,24 @@ def t2TBH :
let Inst{15-8} = 0b11110000;
let Inst{7-4} = 0b0001; // H form
}
+
+// Generic versions of the above two instructions, for disassembly only
+
+def t2TBBgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br,
+ "tbb", "\t[$a, $b]", []>{
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-4} = 0b0000; // B form
+}
+
+def t2TBHgen : T2I<(outs), (ins GPR:$a, GPR:$b), IIC_Br,
+ "tbh", "\t[$a, $b, lsl #1]", []> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0001101;
+ let Inst{15-8} = 0b11110000;
+ let Inst{7-4} = 0b0001; // H form
+}
} // isNotDuplicable, isIndirectBranch
} // isBranch, isTerminator, isBarrier
@@ -1931,6 +2315,119 @@ def t2IT : Thumb2XI<(outs), (ins it_pred:$cc, it_mask:$mask),
let Inst{15-8} = 0b10111111;
}
+// Branch and Exchange Jazelle -- for disassembly only
+// Rm = Inst{19-16}
+def t2BXJ : T2I<(outs), (ins GPR:$func), NoItinerary, "bxj", "\t$func",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111100;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+// Change Processor State is a system instruction -- for disassembly only.
+// The singleton $opt operand contains the following information:
+// opt{4-0} = mode from Inst{4-0}
+// opt{5} = changemode from Inst{17}
+// opt{8-6} = AIF from Inst{8-6}
+// opt{10-9} = imod from Inst{19-18} with 0b10 as enable and 0b11 as disable
+def t2CPS : T2XI<(outs),(ins i32imm:$opt), NoItinerary, "cps${opt:cps}",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-20} = 0b111010;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+// A6.3.4 Branches and miscellaneous control
+// Table A6-14 Change Processor State, and hint instructions
+// Helper class for disassembly only.
+class T2I_hint<bits<8> op7_0, string opc, string asm>
+ : T2I<(outs), (ins), NoItinerary, opc, asm,
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-20} = 0xf3a;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-8} = 0b000;
+ let Inst{7-0} = op7_0;
+}
+
+def t2NOP : T2I_hint<0b00000000, "nop", ".w">;
+def t2YIELD : T2I_hint<0b00000001, "yield", ".w">;
+def t2WFE : T2I_hint<0b00000010, "wfe", ".w">;
+def t2WFI : T2I_hint<0b00000011, "wfi", ".w">;
+def t2SEV : T2I_hint<0b00000100, "sev", ".w">;
+
+def t2DBG : T2I<(outs),(ins i32imm:$opt), NoItinerary, "dbg", "\t$opt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-20} = 0xf3a;
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+ let Inst{10-8} = 0b000;
+ let Inst{7-4} = 0b1111;
+}
+
+// Secure Monitor Call is a system instruction -- for disassembly only
+// Option = Inst{19-16}
+def t2SMC : T2I<(outs), (ins i32imm:$opt), NoItinerary, "smc", "\t$opt",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26-20} = 0b1111111;
+ let Inst{15-12} = 0b1000;
+}
+
+// Store Return State is a system instruction -- for disassembly only
+def t2SRSDBW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000010; // W = 1
+}
+
+def t2SRSDB : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsdb","\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000000; // W = 0
+}
+
+def t2SRSIAW : T2I<(outs),(ins i32imm:$mode),NoItinerary,"srsia","\tsp!, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0011010; // W = 1
+}
+
+def t2SRSIA : T2I<(outs), (ins i32imm:$mode),NoItinerary,"srsia","\tsp, $mode",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0011000; // W = 0
+}
+
+// Return From Exception is a system instruction -- for disassembly only
+def t2RFEDBW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfedb", "\t$base!",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000011; // W = 1
+}
+
+def t2RFEDB : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeab", "\t$base",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0000001; // W = 0
+}
+
+def t2RFEIAW : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base!",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0011011; // W = 1
+}
+
+def t2RFEIA : T2I<(outs), (ins GPR:$base), NoItinerary, "rfeia", "\t$base",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11101;
+ let Inst{26-20} = 0b0011001; // W = 0
+}
+
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
@@ -1970,9 +2467,59 @@ def : T2Pat<(ARMWrapperJT tjumptable:$dst, imm:$id),
// Pseudo instruction that combines ldr from constpool and add pc. This should
// be expanded into two instructions late to allow if-conversion and
// scheduling.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def t2LDRpci_pic : PseudoInst<(outs GPR:$dst), (ins i32imm:$addr, pclabel:$cp),
NoItinerary, "@ ldr.w\t$dst, $addr\n$cp:\n\tadd\t$dst, pc",
[(set GPR:$dst, (ARMpic_add (load (ARMWrapper tconstpool:$addr)),
imm:$cp))]>,
Requires<[IsThumb2]>;
+
+//===----------------------------------------------------------------------===//
+// Move between special register and ARM core register -- for disassembly only
+//
+
+// Rd = Instr{11-8}
+def t2MRS : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, cpsr",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-21} = 0b11111;
+ let Inst{20} = 0; // The R bit.
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+// Rd = Instr{11-8}
+def t2MRSsys : T2I<(outs GPR:$dst), (ins), NoItinerary, "mrs", "\t$dst, spsr",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-21} = 0b11111;
+ let Inst{20} = 1; // The R bit.
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+// FIXME: mask is ignored for the time being.
+// Rn = Inst{19-16}
+def t2MSR : T2I<(outs), (ins GPR:$src), NoItinerary, "msr", "\tcpsr, $src",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-21} = 0b11100;
+ let Inst{20} = 0; // The R bit.
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
+
+// FIXME: mask is ignored for the time being.
+// Rn = Inst{19-16}
+def t2MSRsys : T2I<(outs), (ins GPR:$src), NoItinerary, "msr", "\tspsr, $src",
+ [/* For disassembly only; pattern left blank */]> {
+ let Inst{31-27} = 0b11110;
+ let Inst{26} = 0;
+ let Inst{25-21} = 0b11100;
+ let Inst{20} = 1; // The R bit.
+ let Inst{15-14} = 0b10;
+ let Inst{12} = 0;
+}
diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td
index e516593..7c117ed 100644
--- a/lib/Target/ARM/ARMInstrVFP.td
+++ b/lib/Target/ARM/ARMInstrVFP.td
@@ -54,7 +54,7 @@ def vfp_f64imm : Operand<f64>,
// Load / store Instructions.
//
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def VLDRD : ADI5<0b1101, 0b01, (outs DPR:$dst), (ins addrmode5:$addr),
IIC_fpLoad64, "vldr", ".64\t$dst, $addr",
[(set DPR:$dst, (load addrmode5:$addr))]>;
diff --git a/lib/Target/ARM/ARMJITInfo.cpp b/lib/Target/ARM/ARMJITInfo.cpp
index bef5a06..8c0b720 100644
--- a/lib/Target/ARM/ARMJITInfo.cpp
+++ b/lib/Target/ARM/ARMJITInfo.cpp
@@ -60,7 +60,7 @@ extern "C" {
// whole compilation callback doesn't exist as far as the caller is
// concerned, so we can't just preserve the callee saved regs.
"stmdb sp!, {r0, r1, r2, r3, lr}\n"
-#ifndef __SOFTFP__
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
"fstmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
// The LR contains the address of the stub function on entry.
@@ -83,7 +83,7 @@ extern "C" {
// 6-20 | D0..D7 | Saved VFP registers
// +--------+
//
-#ifndef __SOFTFP__
+#if (defined(__VFP_FP__) && !defined(__SOFTFP__))
// Restore VFP caller-saved registers.
"fldmfdd sp!, {d0, d1, d2, d3, d4, d5, d6, d7}\n"
#endif
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 4e2d181..5b4f02d 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -748,11 +748,19 @@ static bool isMemoryOp(const MachineInstr *MI) {
if (MMO->isVolatile())
return false;
- // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is not.
+ // Unaligned ldr/str is emulated by some kernels, but unaligned ldm/stm is
+ // not.
if (MMO->getAlignment() < 4)
return false;
}
+ // str <undef> could probably be eliminated entirely, but for now we just want
+ // to avoid making a mess of it.
+ // FIXME: Use str <undef> as a wildcard to enable better stm folding.
+ if (MI->getNumOperands() > 0 && MI->getOperand(0).isReg() &&
+ MI->getOperand(0).isUndef())
+ return false;
+
int Opcode = MI->getOpcode();
switch (Opcode) {
default: break;
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index f60cc33..d6d595c 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -122,6 +122,7 @@ namespace {
void printT2AddrModeSoRegOperand(const MachineInstr *MI, int OpNum);
void printPredicateOperand(const MachineInstr *MI, int OpNum);
+ void printMandatoryPredicateOperand(const MachineInstr *MI, int OpNum);
void printSBitModifierOperand(const MachineInstr *MI, int OpNum);
void printPCLabel(const MachineInstr *MI, int OpNum);
void printRegisterList(const MachineInstr *MI, int OpNum);
@@ -786,6 +787,12 @@ void ARMAsmPrinter::printPredicateOperand(const MachineInstr *MI, int OpNum) {
O << ARMCondCodeToString(CC);
}
+void ARMAsmPrinter::printMandatoryPredicateOperand(const MachineInstr *MI,
+ int OpNum) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ O << ARMCondCodeToString(CC);
+}
+
void ARMAsmPrinter::printSBitModifierOperand(const MachineInstr *MI, int OpNum){
unsigned Reg = MI->getOperand(OpNum).getReg();
if (Reg) {
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
index d7d8e09..a2084b0 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.cpp
@@ -325,6 +325,12 @@ void ARMInstPrinter::printPredicateOperand(const MCInst *MI, unsigned OpNum) {
O << ARMCondCodeToString(CC);
}
+void ARMInstPrinter::printMandatoryPredicateOperand(const MCInst *MI,
+ unsigned OpNum) {
+ ARMCC::CondCodes CC = (ARMCC::CondCodes)MI->getOperand(OpNum).getImm();
+ O << ARMCondCodeToString(CC);
+}
+
void ARMInstPrinter::printSBitModifierOperand(const MCInst *MI, unsigned OpNum){
if (MI->getOperand(OpNum).getReg()) {
assert(MI->getOperand(OpNum).getReg() == ARM::CPSR &&
diff --git a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
index 23a7f05..b7964c9 100644
--- a/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
+++ b/lib/Target/ARM/AsmPrinter/ARMInstPrinter.h
@@ -71,6 +71,7 @@ public:
void printT2AddrModeSoRegOperand(const MCInst *MI, unsigned OpNum) {}
void printPredicateOperand(const MCInst *MI, unsigned OpNum);
+ void printMandatoryPredicateOperand(const MCInst *MI, unsigned OpNum);
void printSBitModifierOperand(const MCInst *MI, unsigned OpNum);
void printRegisterList(const MCInst *MI, unsigned OpNum);
void printCPInstOperand(const MCInst *MI, unsigned OpNum,
diff --git a/lib/Target/ARM/README.txt b/lib/Target/ARM/README.txt
index 9efb5a1..57b65cf 100644
--- a/lib/Target/ARM/README.txt
+++ b/lib/Target/ARM/README.txt
@@ -10,6 +10,8 @@ Reimplement 'select' in terms of 'SEL'.
* Implement pre/post increment support. (e.g. PR935)
* Implement smarter constant generation for binops with large immediates.
+A few ARMv6T2 ops should be pattern matched: BFI, SBFX, and UBFX
+
//===---------------------------------------------------------------------===//
Crazy idea: Consider code that uses lots of 8-bit or 16-bit values. By the
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index d6630ce..b61ce29 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -450,9 +450,9 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
Offset -= AFI->getGPRCalleeSavedArea1Offset();
else if (AFI->isGPRCalleeSavedArea2Frame(FrameIndex))
Offset -= AFI->getGPRCalleeSavedArea2Offset();
- else if (hasFP(MF)) {
- assert(SPAdj == 0 && "Unexpected");
- // There is alloca()'s in this function, must reference off the frame
+ else if (MF.getFrameInfo()->hasVarSizedObjects()) {
+ assert(SPAdj == 0 && hasFP(MF) && "Unexpected");
+ // There are alloca()'s in this function, must reference off the frame
// pointer instead.
FrameReg = getFrameRegister(MF);
Offset -= AFI->getFramePtrSpillOffset();
diff --git a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
index d6b17c2..5303d85 100644
--- a/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
+++ b/lib/Target/Alpha/AlphaISelDAGToDAG.cpp
@@ -159,10 +159,6 @@ namespace {
// target-specific node if it hasn't already been changed.
SDNode *Select(SDNode *N);
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "Alpha DAG->DAG Pattern Instruction Selection";
}
@@ -222,20 +218,11 @@ SDNode *AlphaDAGToDAGISel::getGlobalRetAddr() {
return CurDAG->getRegister(GlobalRetAddr, TLI.getPointerTy()).getNode();
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void AlphaDAGToDAGISel::InstructionSelect() {
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
// Select - Convert the specified operand from a target-independent to a
// target-specific node if it hasn't already been changed.
SDNode *AlphaDAGToDAGISel::Select(SDNode *N) {
- if (N->isMachineOpcode()) {
+ if (N->isMachineOpcode())
return NULL; // Already selected.
- }
DebugLoc dl = N->getDebugLoc();
switch (N->getOpcode()) {
diff --git a/lib/Target/Alpha/AlphaInstrInfo.td b/lib/Target/Alpha/AlphaInstrInfo.td
index 8917e86..341c4a7 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.td
+++ b/lib/Target/Alpha/AlphaInstrInfo.td
@@ -92,7 +92,7 @@ def immSExt16int : PatLeaf<(imm), [{ //(int)imm fits in a 16 bit sign extended
((int64_t)N->getZExtValue() << 32) >> 32;
}], SExt16>;
-def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm:$L), [{
+def zappat : PatFrag<(ops node:$LHS), (and node:$LHS, imm), [{
ConstantSDNode *RHS = dyn_cast<ConstantSDNode>(N->getOperand(1));
if (!RHS) return 0;
uint64_t build = get_zapImm(N->getOperand(0), (uint64_t)RHS->getZExtValue());
@@ -602,9 +602,9 @@ def RPCC : MfcForm<0x18, 0xC000, "rpcc $RA", s_rpcc>; //Read process cycle count
def MB : MfcPForm<0x18, 0x4000, "mb", s_imisc>; //memory barrier
def WMB : MfcPForm<0x18, 0x4400, "wmb", s_imisc>; //write memory barrier
-def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 1), (i64 imm:$dev)),
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 1), (i64 imm)),
(WMB)>;
-def : Pat<(membarrier (i64 imm:$ll), (i64 imm:$ls), (i64 imm:$sl), (i64 imm:$ss), (i64 imm:$dev)),
+def : Pat<(membarrier (i64 imm), (i64 imm), (i64 imm), (i64 imm), (i64 imm)),
(MB)>;
//Basic Floating point ops
diff --git a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
index 2c9cc60..c8d71aa 100644
--- a/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
+++ b/lib/Target/Blackfin/BlackfinISelDAGToDAG.cpp
@@ -41,7 +41,7 @@ namespace {
BlackfinDAGToDAGISel(BlackfinTargetMachine &TM, CodeGenOpt::Level OptLevel)
: SelectionDAGISel(TM, OptLevel) {}
- virtual void InstructionSelect();
+ virtual void PostprocessISelDAG();
virtual const char *getPassName() const {
return "Blackfin DAG->DAG Pattern Instruction Selection";
@@ -72,13 +72,7 @@ FunctionPass *llvm::createBlackfinISelDag(BlackfinTargetMachine &TM,
return new BlackfinDAGToDAGISel(TM, OptLevel);
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void BlackfinDAGToDAGISel::InstructionSelect() {
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
- DEBUG(errs() << "Selected selection DAG before regclass fixup:\n");
- DEBUG(CurDAG->dump());
+void BlackfinDAGToDAGISel::PostprocessISelDAG() {
FixRegisterClasses(*CurDAG);
}
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index c1c1d80..10f873f 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -49,7 +49,6 @@
#include "llvm/System/Host.h"
#include "llvm/Config/config.h"
#include <algorithm>
-#include <sstream>
using namespace llvm;
extern "C" void LLVMInitializeCBackendTarget() {
@@ -153,26 +152,16 @@ namespace {
return false;
}
- raw_ostream &printType(formatted_raw_ostream &Out,
- const Type *Ty,
+ raw_ostream &printType(raw_ostream &Out, const Type *Ty,
bool isSigned = false,
const std::string &VariableName = "",
bool IgnoreName = false,
const AttrListPtr &PAL = AttrListPtr());
- std::ostream &printType(std::ostream &Out, const Type *Ty,
- bool isSigned = false,
- const std::string &VariableName = "",
- bool IgnoreName = false,
- const AttrListPtr &PAL = AttrListPtr());
- raw_ostream &printSimpleType(formatted_raw_ostream &Out,
- const Type *Ty,
- bool isSigned,
- const std::string &NameSoFar = "");
- std::ostream &printSimpleType(std::ostream &Out, const Type *Ty,
- bool isSigned,
+ raw_ostream &printSimpleType(raw_ostream &Out, const Type *Ty,
+ bool isSigned,
const std::string &NameSoFar = "");
- void printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
+ void printStructReturnPointerFunctionType(raw_ostream &Out,
const AttrListPtr &PAL,
const PointerType *Ty);
@@ -385,8 +374,8 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
// If this isn't a struct or array type, remove it from our set of types
// to name. This simplifies emission later.
- if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second) &&
- !isa<ArrayType>(I->second)) {
+ if (!I->second->isStructTy() && !I->second->isOpaqueTy() &&
+ !I->second->isArrayTy()) {
TST.remove(I);
} else {
// If this is not used, remove it from the symbol table.
@@ -405,7 +394,7 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
unsigned RenameCounter = 0;
for (std::set<const Type *>::const_iterator I = UT.begin(), E = UT.end();
I != E; ++I)
- if (isa<StructType>(*I) || isa<ArrayType>(*I)) {
+ if ((*I)->isStructTy() || (*I)->isArrayTy()) {
while (M.addTypeName("unnamed"+utostr(RenameCounter), *I))
++RenameCounter;
Changed = true;
@@ -454,11 +443,12 @@ bool CBackendNameAllUsedStructsAndMergeFunctions::runOnModule(Module &M) {
/// printStructReturnPointerFunctionType - This is like printType for a struct
/// return type, except, instead of printing the type as void (*)(Struct*, ...)
/// print it as "Struct (*)(...)", for struct return functions.
-void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
+void CWriter::printStructReturnPointerFunctionType(raw_ostream &Out,
const AttrListPtr &PAL,
const PointerType *TheTy) {
const FunctionType *FTy = cast<FunctionType>(TheTy->getElementType());
- std::stringstream FunctionInnards;
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
FunctionInnards << " (*) (";
bool PrintedType = false;
@@ -470,7 +460,7 @@ void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
FunctionInnards << ", ";
const Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- assert(isa<PointerType>(ArgTy));
+ assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
}
printType(FunctionInnards, ArgTy,
@@ -484,63 +474,14 @@ void CWriter::printStructReturnPointerFunctionType(formatted_raw_ostream &Out,
FunctionInnards << "void";
}
FunctionInnards << ')';
- std::string tstr = FunctionInnards.str();
printType(Out, RetTy,
- /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
}
raw_ostream &
-CWriter::printSimpleType(formatted_raw_ostream &Out, const Type *Ty,
- bool isSigned,
+CWriter::printSimpleType(raw_ostream &Out, const Type *Ty, bool isSigned,
const std::string &NameSoFar) {
- assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) &&
- "Invalid type for printSimpleType");
- switch (Ty->getTypeID()) {
- case Type::VoidTyID: return Out << "void " << NameSoFar;
- case Type::IntegerTyID: {
- unsigned NumBits = cast<IntegerType>(Ty)->getBitWidth();
- if (NumBits == 1)
- return Out << "bool " << NameSoFar;
- else if (NumBits <= 8)
- return Out << (isSigned?"signed":"unsigned") << " char " << NameSoFar;
- else if (NumBits <= 16)
- return Out << (isSigned?"signed":"unsigned") << " short " << NameSoFar;
- else if (NumBits <= 32)
- return Out << (isSigned?"signed":"unsigned") << " int " << NameSoFar;
- else if (NumBits <= 64)
- return Out << (isSigned?"signed":"unsigned") << " long long "<< NameSoFar;
- else {
- assert(NumBits <= 128 && "Bit widths > 128 not implemented yet");
- return Out << (isSigned?"llvmInt128":"llvmUInt128") << " " << NameSoFar;
- }
- }
- case Type::FloatTyID: return Out << "float " << NameSoFar;
- case Type::DoubleTyID: return Out << "double " << NameSoFar;
- // Lacking emulation of FP80 on PPC, etc., we assume whichever of these is
- // present matches host 'long double'.
- case Type::X86_FP80TyID:
- case Type::PPC_FP128TyID:
- case Type::FP128TyID: return Out << "long double " << NameSoFar;
-
- case Type::VectorTyID: {
- const VectorType *VTy = cast<VectorType>(Ty);
- return printSimpleType(Out, VTy->getElementType(), isSigned,
- " __attribute__((vector_size(" +
- utostr(TD->getTypeAllocSize(VTy)) + " ))) " + NameSoFar);
- }
-
- default:
-#ifndef NDEBUG
- errs() << "Unknown primitive type: " << *Ty << "\n";
-#endif
- llvm_unreachable(0);
- }
-}
-
-std::ostream &
-CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned,
- const std::string &NameSoFar) {
- assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) &&
+ assert((Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) &&
"Invalid type for printSimpleType");
switch (Ty->getTypeID()) {
case Type::VoidTyID: return Out << "void " << NameSoFar;
@@ -587,120 +528,16 @@ CWriter::printSimpleType(std::ostream &Out, const Type *Ty, bool isSigned,
// Pass the Type* and the variable name and this prints out the variable
// declaration.
//
-raw_ostream &CWriter::printType(formatted_raw_ostream &Out,
- const Type *Ty,
+raw_ostream &CWriter::printType(raw_ostream &Out, const Type *Ty,
bool isSigned, const std::string &NameSoFar,
bool IgnoreName, const AttrListPtr &PAL) {
- if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) {
- printSimpleType(Out, Ty, isSigned, NameSoFar);
- return Out;
- }
-
- // Check to see if the type is named.
- if (!IgnoreName || isa<OpaqueType>(Ty)) {
- std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
- if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
- }
-
- switch (Ty->getTypeID()) {
- case Type::FunctionTyID: {
- const FunctionType *FTy = cast<FunctionType>(Ty);
- std::stringstream FunctionInnards;
- FunctionInnards << " (" << NameSoFar << ") (";
- unsigned Idx = 1;
- for (FunctionType::param_iterator I = FTy->param_begin(),
- E = FTy->param_end(); I != E; ++I) {
- const Type *ArgTy = *I;
- if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- assert(isa<PointerType>(ArgTy));
- ArgTy = cast<PointerType>(ArgTy)->getElementType();
- }
- if (I != FTy->param_begin())
- FunctionInnards << ", ";
- printType(FunctionInnards, ArgTy,
- /*isSigned=*/PAL.paramHasAttr(Idx, Attribute::SExt), "");
- ++Idx;
- }
- if (FTy->isVarArg()) {
- if (FTy->getNumParams())
- FunctionInnards << ", ...";
- } else if (!FTy->getNumParams()) {
- FunctionInnards << "void";
- }
- FunctionInnards << ')';
- std::string tstr = FunctionInnards.str();
- printType(Out, FTy->getReturnType(),
- /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
- return Out;
- }
- case Type::StructTyID: {
- const StructType *STy = cast<StructType>(Ty);
- Out << NameSoFar + " {\n";
- unsigned Idx = 0;
- for (StructType::element_iterator I = STy->element_begin(),
- E = STy->element_end(); I != E; ++I) {
- Out << " ";
- printType(Out, *I, false, "field" + utostr(Idx++));
- Out << ";\n";
- }
- Out << '}';
- if (STy->isPacked())
- Out << " __attribute__ ((packed))";
- return Out;
- }
-
- case Type::PointerTyID: {
- const PointerType *PTy = cast<PointerType>(Ty);
- std::string ptrName = "*" + NameSoFar;
-
- if (isa<ArrayType>(PTy->getElementType()) ||
- isa<VectorType>(PTy->getElementType()))
- ptrName = "(" + ptrName + ")";
-
- if (!PAL.isEmpty())
- // Must be a function ptr cast!
- return printType(Out, PTy->getElementType(), false, ptrName, true, PAL);
- return printType(Out, PTy->getElementType(), false, ptrName);
- }
-
- case Type::ArrayTyID: {
- const ArrayType *ATy = cast<ArrayType>(Ty);
- unsigned NumElements = ATy->getNumElements();
- if (NumElements == 0) NumElements = 1;
- // Arrays are wrapped in structs to allow them to have normal
- // value semantics (avoiding the array "decay").
- Out << NameSoFar << " { ";
- printType(Out, ATy->getElementType(), false,
- "array[" + utostr(NumElements) + "]");
- return Out << "; }";
- }
-
- case Type::OpaqueTyID: {
- std::string TyName = "struct opaque_" + itostr(OpaqueCounter++);
- assert(TypeNames.find(Ty) == TypeNames.end());
- TypeNames[Ty] = TyName;
- return Out << TyName << ' ' << NameSoFar;
- }
- default:
- llvm_unreachable("Unhandled case in getTypeProps!");
- }
-
- return Out;
-}
-
-// Pass the Type* and the variable name and this prints out the variable
-// declaration.
-//
-std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
- bool isSigned, const std::string &NameSoFar,
- bool IgnoreName, const AttrListPtr &PAL) {
- if (Ty->isPrimitiveType() || Ty->isIntegerTy() || isa<VectorType>(Ty)) {
+ if (Ty->isPrimitiveType() || Ty->isIntegerTy() || Ty->isVectorTy()) {
printSimpleType(Out, Ty, isSigned, NameSoFar);
return Out;
}
// Check to see if the type is named.
- if (!IgnoreName || isa<OpaqueType>(Ty)) {
+ if (!IgnoreName || Ty->isOpaqueTy()) {
std::map<const Type *, std::string>::iterator I = TypeNames.find(Ty);
if (I != TypeNames.end()) return Out << I->second << ' ' << NameSoFar;
}
@@ -708,14 +545,15 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
switch (Ty->getTypeID()) {
case Type::FunctionTyID: {
const FunctionType *FTy = cast<FunctionType>(Ty);
- std::stringstream FunctionInnards;
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
FunctionInnards << " (" << NameSoFar << ") (";
unsigned Idx = 1;
for (FunctionType::param_iterator I = FTy->param_begin(),
E = FTy->param_end(); I != E; ++I) {
const Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- assert(isa<PointerType>(ArgTy));
+ assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
}
if (I != FTy->param_begin())
@@ -731,9 +569,8 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
FunctionInnards << "void";
}
FunctionInnards << ')';
- std::string tstr = FunctionInnards.str();
printType(Out, FTy->getReturnType(),
- /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), tstr);
+ /*isSigned=*/PAL.paramHasAttr(0, Attribute::SExt), FunctionInnards.str());
return Out;
}
case Type::StructTyID: {
@@ -756,8 +593,8 @@ std::ostream &CWriter::printType(std::ostream &Out, const Type *Ty,
const PointerType *PTy = cast<PointerType>(Ty);
std::string ptrName = "*" + NameSoFar;
- if (isa<ArrayType>(PTy->getElementType()) ||
- isa<VectorType>(PTy->getElementType()))
+ if (PTy->getElementType()->isArrayTy() ||
+ PTy->getElementType()->isVectorTy())
ptrName = "(" + ptrName + ")";
if (!PAL.isEmpty())
@@ -1144,7 +981,7 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
Out << "((";
printType(Out, CPV->getType()); // sign doesn't matter
Out << ")/*UNDEF*/";
- if (!isa<VectorType>(CPV->getType())) {
+ if (!CPV->getType()->isVectorTy()) {
Out << "0)";
} else {
Out << "{})";
@@ -1660,7 +1497,7 @@ void CWriter::writeOperandWithCast(Value* Operand, const ICmpInst &Cmp) {
// If the operand was a pointer, convert to a large integer type.
const Type* OpTy = Operand->getType();
- if (isa<PointerType>(OpTy))
+ if (OpTy->isPointerTy())
OpTy = TD->getIntPtrType(Operand->getContext());
Out << "((";
@@ -2102,10 +1939,10 @@ bool CWriter::doInitialization(Module &M) {
// complete. If the value is an aggregate, print out { 0 }, and let
// the compiler figure out the rest of the zeros.
Out << " = " ;
- if (isa<StructType>(I->getInitializer()->getType()) ||
- isa<VectorType>(I->getInitializer()->getType())) {
+ if (I->getInitializer()->getType()->isStructTy() ||
+ I->getInitializer()->getType()->isVectorTy()) {
Out << "{ 0 }";
- } else if (isa<ArrayType>(I->getInitializer()->getType())) {
+ } else if (I->getInitializer()->getType()->isArrayTy()) {
// As with structs and vectors, but with an extra set of braces
// because arrays are wrapped in structs.
Out << "{ { 0 } }";
@@ -2274,7 +2111,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
//
Out << "/* Structure contents */\n";
for (I = TST.begin(); I != End; ++I)
- if (isa<StructType>(I->second) || isa<ArrayType>(I->second))
+ if (I->second->isStructTy() || I->second->isArrayTy())
// Only print out used types!
printContainedStructs(I->second, StructPrinted);
}
@@ -2287,7 +2124,7 @@ void CWriter::printModuleTypes(const TypeSymbolTable &TST) {
void CWriter::printContainedStructs(const Type *Ty,
std::set<const Type*> &StructPrinted) {
// Don't walk through pointers.
- if (isa<PointerType>(Ty) || Ty->isPrimitiveType() || Ty->isIntegerTy())
+ if (Ty->isPointerTy() || Ty->isPrimitiveType() || Ty->isIntegerTy())
return;
// Print all contained types first.
@@ -2295,7 +2132,7 @@ void CWriter::printContainedStructs(const Type *Ty,
E = Ty->subtype_end(); I != E; ++I)
printContainedStructs(*I, StructPrinted);
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty)) {
+ if (Ty->isStructTy() || Ty->isArrayTy()) {
// Check to see if we have already printed this struct.
if (StructPrinted.insert(Ty).second) {
// Print structure type out.
@@ -2328,7 +2165,8 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
const FunctionType *FT = cast<FunctionType>(F->getFunctionType());
const AttrListPtr &PAL = F->getAttributes();
- std::stringstream FunctionInnards;
+ std::string tstr;
+ raw_string_ostream FunctionInnards(tstr);
// Print out the name...
FunctionInnards << GetValueName(F) << '(';
@@ -2383,7 +2221,7 @@ void CWriter::printFunctionSignature(const Function *F, bool Prototype) {
if (PrintedArg) FunctionInnards << ", ";
const Type *ArgTy = *I;
if (PAL.paramHasAttr(Idx, Attribute::ByVal)) {
- assert(isa<PointerType>(ArgTy));
+ assert(ArgTy->isPointerTy());
ArgTy = cast<PointerType>(ArgTy)->getElementType();
}
printType(FunctionInnards, ArgTy,
@@ -2714,7 +2552,7 @@ void CWriter::visitPHINode(PHINode &I) {
void CWriter::visitBinaryOperator(Instruction &I) {
// binary instructions, shift instructions, setCond instructions.
- assert(!isa<PointerType>(I.getType()));
+ assert(!I.getType()->isPointerTy());
// We must cast the results of binary operations which might be promoted.
bool needsCast = false;
@@ -3490,7 +3328,7 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
// exposed, like a global, avoid emitting (&foo)[0], just emit foo instead.
if (isAddressExposed(Ptr)) {
writeOperandInternal(Ptr, Static);
- } else if (I != E && isa<StructType>(*I)) {
+ } else if (I != E && (*I)->isStructTy()) {
// If we didn't already emit the first operand, see if we can print it as
// P->f instead of "P[0].f"
writeOperand(Ptr);
@@ -3505,13 +3343,13 @@ void CWriter::printGEPExpression(Value *Ptr, gep_type_iterator I,
}
for (; I != E; ++I) {
- if (isa<StructType>(*I)) {
+ if ((*I)->isStructTy()) {
Out << ".field" << cast<ConstantInt>(I.getOperand())->getZExtValue();
- } else if (isa<ArrayType>(*I)) {
+ } else if ((*I)->isArrayTy()) {
Out << ".array[";
writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
Out << ']';
- } else if (!isa<VectorType>(*I)) {
+ } else if (!(*I)->isVectorTy()) {
Out << '[';
writeOperandWithCast(I.getOperand(), Instruction::GetElementPtr);
Out << ']';
@@ -3669,7 +3507,7 @@ void CWriter::visitInsertValueInst(InsertValueInst &IVI) {
i != e; ++i) {
const Type *IndexedTy =
ExtractValueInst::getIndexedType(IVI.getOperand(0)->getType(), b, i+1);
- if (isa<ArrayType>(IndexedTy))
+ if (IndexedTy->isArrayTy())
Out << ".array[" << *i << "]";
else
Out << ".field" << *i;
@@ -3690,7 +3528,7 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
i != e; ++i) {
const Type *IndexedTy =
ExtractValueInst::getIndexedType(EVI.getOperand(0)->getType(), b, i+1);
- if (isa<ArrayType>(IndexedTy))
+ if (IndexedTy->isArrayTy())
Out << ".array[" << *i << "]";
else
Out << ".field" << *i;
@@ -3706,7 +3544,8 @@ void CWriter::visitExtractValueInst(ExtractValueInst &EVI) {
bool CTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &o,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(createGCLoweringPass());
diff --git a/lib/Target/CBackend/CTargetMachine.h b/lib/Target/CBackend/CTargetMachine.h
index 715bbda..d178e7f 100644
--- a/lib/Target/CBackend/CTargetMachine.h
+++ b/lib/Target/CBackend/CTargetMachine.h
@@ -27,7 +27,8 @@ struct CTargetMachine : public TargetMachine {
virtual bool addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel);
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
diff --git a/lib/Target/CellSPU/SPU64InstrInfo.td b/lib/Target/CellSPU/SPU64InstrInfo.td
index 06eb149..47cb579 100644
--- a/lib/Target/CellSPU/SPU64InstrInfo.td
+++ b/lib/Target/CellSPU/SPU64InstrInfo.td
@@ -123,8 +123,8 @@ multiclass CompareLogicalGreaterThan64 {
defm I64LGT: CompareLogicalGreaterThan64;
def : Pat<(setugt R64C:$rA, R64C:$rB), I64LGTr64.Fragment>;
-def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
- I64LGTv2i64.Fragment>;
+//def : Pat<(setugt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+// I64LGTv2i64.Fragment>;
// i64 setult:
def : I64SETCCNegCond<setule, I64LGTr64>;
@@ -201,8 +201,8 @@ multiclass CompareGreaterThan64 {
defm I64GT: CompareLogicalGreaterThan64;
def : Pat<(setgt R64C:$rA, R64C:$rB), I64GTr64.Fragment>;
-def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
- I64GTv2i64.Fragment>;
+//def : Pat<(setgt (v2i64 VECREG:$rA), (v2i64 VECREG:$rB)),
+// I64GTv2i64.Fragment>;
// i64 setult:
def : I64SETCCNegCond<setle, I64GTr64>;
diff --git a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
index 1ed06e3..396a921 100644
--- a/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
+++ b/lib/Target/CellSPU/SPUISelDAGToDAG.cpp
@@ -294,15 +294,19 @@ namespace {
((vecVT == MVT::v2i64) &&
((SPU::get_vec_i16imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
(SPU::get_ILHUvec_imm(bvNode, *CurDAG, MVT::i64).getNode() != 0) ||
- (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0))))
- return Select(bvNode);
+ (SPU::get_vec_u18imm(bvNode, *CurDAG, MVT::i64).getNode() != 0)))) {
+ HandleSDNode Dummy(SDValue(bvNode, 0));
+ if (SDNode *N = Select(bvNode))
+ return N;
+ return Dummy.getValue().getNode();
+ }
// No, need to emit a constant pool spill:
std::vector<Constant*> CV;
for (size_t i = 0; i < bvNode->getNumOperands(); ++i) {
ConstantSDNode *V = dyn_cast<ConstantSDNode > (bvNode->getOperand(i));
- CV.push_back(const_cast<ConstantInt *> (V->getConstantIntValue()));
+ CV.push_back(const_cast<ConstantInt *>(V->getConstantIntValue()));
}
Constant *CP = ConstantVector::get(CV);
@@ -311,10 +315,15 @@ namespace {
SDValue CGPoolOffset =
SPU::LowerConstantPool(CPIdx, *CurDAG,
SPUtli.getSPUTargetMachine());
- return SelectCode(CurDAG->getLoad(vecVT, dl,
- CurDAG->getEntryNode(), CGPoolOffset,
- PseudoSourceValue::getConstantPool(), 0,
- false, false, Alignment).getNode());
+
+ HandleSDNode Dummy(CurDAG->getLoad(vecVT, dl,
+ CurDAG->getEntryNode(), CGPoolOffset,
+ PseudoSourceValue::getConstantPool(),0,
+ false, false, Alignment));
+ CurDAG->ReplaceAllUsesWith(SDValue(bvNode, 0), Dummy.getValue());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
}
/// Select - Convert the specified operand from a target-independent to a
@@ -390,10 +399,6 @@ namespace {
return false;
}
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "Cell SPU DAG->DAG Pattern Instruction Selection";
}
@@ -411,16 +416,6 @@ namespace {
};
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void
-SPUDAGToDAGISel::InstructionSelect()
-{
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
/*!
\arg Op The ISD instruction operand
\arg N The address to be tested
@@ -692,9 +687,8 @@ SPUDAGToDAGISel::Select(SDNode *N) {
SDValue Ops[8];
DebugLoc dl = N->getDebugLoc();
- if (N->isMachineOpcode()) {
+ if (N->isMachineOpcode())
return NULL; // Already selected.
- }
if (Opc == ISD::FrameIndex) {
int FI = cast<FrameIndexSDNode>(N)->getIndex();
@@ -759,43 +753,67 @@ SPUDAGToDAGISel::Select(SDNode *N) {
}
SDNode *shufMaskLoad = emitBuildVector(shufMask.getNode());
- SDNode *PromoteScalar =
- SelectCode(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
- Op0VecVT, Op0).getNode());
-
+
+ HandleSDNode PromoteScalar(CurDAG->getNode(SPUISD::PREFSLOT2VEC, dl,
+ Op0VecVT, Op0));
+
+ SDValue PromScalar;
+ if (SDNode *N = SelectCode(PromoteScalar.getValue().getNode()))
+ PromScalar = SDValue(N, 0);
+ else
+ PromScalar = PromoteScalar.getValue();
+
SDValue zextShuffle =
CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
- SDValue(PromoteScalar, 0),
- SDValue(PromoteScalar, 0),
+ PromScalar, PromScalar,
SDValue(shufMaskLoad, 0));
- // N.B.: BIT_CONVERT replaces and updates the zextShuffle node, so we
- // re-use it in the VEC2PREFSLOT selection without needing to explicitly
- // call SelectCode (it's already done for us.)
- SelectCode(CurDAG->getNode(ISD::BIT_CONVERT, dl, OpVecVT, zextShuffle).getNode());
- return SelectCode(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
- zextShuffle).getNode());
+ HandleSDNode Dummy2(zextShuffle);
+ if (SDNode *N = SelectCode(Dummy2.getValue().getNode()))
+ zextShuffle = SDValue(N, 0);
+ else
+ zextShuffle = Dummy2.getValue();
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::VEC2PREFSLOT, dl, OpVT,
+ zextShuffle));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ SelectCode(Dummy.getValue().getNode());
+ return Dummy.getValue().getNode();
} else if (Opc == ISD::ADD && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
- return SelectCode(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)).getNode());
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::ADD64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
} else if (Opc == ISD::SUB && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(getBorrowGenerateShufMask(*CurDAG, dl).getNode());
- return SelectCode(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)).getNode());
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::SUB64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
} else if (Opc == ISD::MUL && (OpVT == MVT::i64 || OpVT == MVT::v2i64)) {
SDNode *CGLoad =
emitBuildVector(getCarryGenerateShufMask(*CurDAG, dl).getNode());
- return SelectCode(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
- N->getOperand(0), N->getOperand(1),
- SDValue(CGLoad, 0)).getNode());
+ HandleSDNode Dummy(CurDAG->getNode(SPUISD::MUL64_MARKER, dl, OpVT,
+ N->getOperand(0), N->getOperand(1),
+ SDValue(CGLoad, 0)));
+ CurDAG->ReplaceAllUsesWith(N, Dummy.getValue().getNode());
+ if (SDNode *N = SelectCode(Dummy.getValue().getNode()))
+ return N;
+ return Dummy.getValue().getNode();
} else if (Opc == ISD::TRUNCATE) {
SDValue Op0 = N->getOperand(0);
if ((Op0.getOpcode() == ISD::SRA || Op0.getOpcode() == ISD::SRL)
@@ -832,17 +850,14 @@ SPUDAGToDAGISel::Select(SDNode *N) {
}
}
} else if (Opc == ISD::SHL) {
- if (OpVT == MVT::i64) {
+ if (OpVT == MVT::i64)
return SelectSHLi64(N, OpVT);
- }
} else if (Opc == ISD::SRL) {
- if (OpVT == MVT::i64) {
+ if (OpVT == MVT::i64)
return SelectSRLi64(N, OpVT);
- }
} else if (Opc == ISD::SRA) {
- if (OpVT == MVT::i64) {
+ if (OpVT == MVT::i64)
return SelectSRAi64(N, OpVT);
- }
} else if (Opc == ISD::FNEG
&& (OpVT == MVT::f64 || OpVT == MVT::v2f64)) {
DebugLoc dl = N->getDebugLoc();
@@ -1224,13 +1239,15 @@ SDNode *SPUDAGToDAGISel::SelectI64Constant(uint64_t Value64, EVT OpVT,
? shufmask.getNode()
: emitBuildVector(shufmask.getNode()));
- SDNode *shufNode =
- Select(CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
+ SDValue shufNode =
+ CurDAG->getNode(SPUISD::SHUFB, dl, OpVecVT,
SDValue(lhsNode, 0), SDValue(rhsNode, 0),
- SDValue(shufMaskNode, 0)).getNode());
-
- return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
- SDValue(shufNode, 0));
+ SDValue(shufMaskNode, 0));
+ HandleSDNode Dummy(shufNode);
+ SDNode *SN = SelectCode(Dummy.getValue().getNode());
+ if (SN == 0) SN = Dummy.getValue().getNode();
+
+ return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT, SDValue(SN, 0));
} else if (i64vec.getOpcode() == ISD::BUILD_VECTOR) {
return CurDAG->getMachineNode(SPU::ORi64_v2i64, dl, OpVT,
SDValue(emitBuildVector(i64vec.getNode()), 0));
diff --git a/lib/Target/CellSPU/SPUISelLowering.cpp b/lib/Target/CellSPU/SPUISelLowering.cpp
index b21eb37..e863ee3 100644
--- a/lib/Target/CellSPU/SPUISelLowering.cpp
+++ b/lib/Target/CellSPU/SPUISelLowering.cpp
@@ -118,8 +118,7 @@ namespace {
TLI.LowerCallTo(InChain, RetTy, isSigned, !isSigned, false, false,
0, TLI.getLibcallCallingConv(LC), false,
/*isReturnValueUsed=*/true,
- Callee, Args, DAG, Op.getDebugLoc(),
- DAG.GetOrdering(InChain.getNode()));
+ Callee, Args, DAG, Op.getDebugLoc());
return CallInfo.first;
}
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index e3f2e9f..9c5893c 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -221,7 +221,7 @@ namespace {
APFloat APF = APFloat(CFP->getValueAPF()); // copy
if (CFP->getType() == Type::getFloatTy(CFP->getContext()))
APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored);
- Out << "ConstantFP::get(getGlobalContext(), ";
+ Out << "ConstantFP::get(mod->getContext(), ";
Out << "APFloat(";
#if HAVE_PRINTF_A
char Buffer[100];
@@ -346,21 +346,21 @@ namespace {
// First, handle the primitive types .. easy
if (Ty->isPrimitiveType() || Ty->isIntegerTy()) {
switch (Ty->getTypeID()) {
- case Type::VoidTyID: return "Type::getVoidTy(getGlobalContext())";
+ case Type::VoidTyID: return "Type::getVoidTy(mod->getContext())";
case Type::IntegerTyID: {
unsigned BitWidth = cast<IntegerType>(Ty)->getBitWidth();
- return "IntegerType::get(getGlobalContext(), " + utostr(BitWidth) + ")";
+ return "IntegerType::get(mod->getContext(), " + utostr(BitWidth) + ")";
}
- case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(getGlobalContext())";
- case Type::FloatTyID: return "Type::getFloatTy(getGlobalContext())";
- case Type::DoubleTyID: return "Type::getDoubleTy(getGlobalContext())";
- case Type::LabelTyID: return "Type::getLabelTy(getGlobalContext())";
+ case Type::X86_FP80TyID: return "Type::getX86_FP80Ty(mod->getContext())";
+ case Type::FloatTyID: return "Type::getFloatTy(mod->getContext())";
+ case Type::DoubleTyID: return "Type::getDoubleTy(mod->getContext())";
+ case Type::LabelTyID: return "Type::getLabelTy(mod->getContext())";
default:
error("Invalid primitive type");
break;
}
// shouldn't be returned, but make it sensible
- return "Type::getVoidTy(getGlobalContext())";
+ return "Type::getVoidTy(mod->getContext())";
}
// Now, see if we've seen the type before and return that
@@ -514,7 +514,7 @@ namespace {
TypeMap::const_iterator I = UnresolvedTypes.find(Ty);
if (I == UnresolvedTypes.end()) {
Out << "PATypeHolder " << typeName;
- Out << "_fwd = OpaqueType::get(getGlobalContext());";
+ Out << "_fwd = OpaqueType::get(mod->getContext());";
nl(Out);
UnresolvedTypes[Ty] = typeName;
}
@@ -615,7 +615,7 @@ namespace {
}
case Type::OpaqueTyID: {
Out << "OpaqueType* " << typeName;
- Out << " = OpaqueType::get(getGlobalContext());";
+ Out << " = OpaqueType::get(mod->getContext());";
nl(Out);
break;
}
@@ -751,7 +751,7 @@ namespace {
if (const ConstantInt *CI = dyn_cast<ConstantInt>(CV)) {
std::string constValue = CI->getValue().toString(10, true);
Out << "ConstantInt* " << constName
- << " = ConstantInt::get(getGlobalContext(), APInt("
+ << " = ConstantInt::get(mod->getContext(), APInt("
<< cast<IntegerType>(CI->getType())->getBitWidth()
<< ", StringRef(\"" << constValue << "\"), 10));";
} else if (isa<ConstantAggregateZero>(CV)) {
@@ -769,7 +769,7 @@ namespace {
CA->getType()->getElementType() ==
Type::getInt8Ty(CA->getContext())) {
Out << "Constant* " << constName <<
- " = ConstantArray::get(getGlobalContext(), \"";
+ " = ConstantArray::get(mod->getContext(), \"";
std::string tmp = CA->getAsString();
bool nullTerminate = false;
if (tmp[tmp.length()-1] == 0) {
@@ -995,7 +995,7 @@ namespace {
void CppWriter::printVariableHead(const GlobalVariable *GV) {
nl(Out) << "GlobalVariable* " << getCppName(GV);
if (is_inline) {
- Out << " = mod->getGlobalVariable(getGlobalContext(), ";
+ Out << " = mod->getGlobalVariable(mod->getContext(), ";
printEscapedString(GV->getName());
Out << ", " << getCppName(GV->getType()->getElementType()) << ",true)";
nl(Out) << "if (!" << getCppName(GV) << ") {";
@@ -1094,7 +1094,7 @@ namespace {
case Instruction::Ret: {
const ReturnInst* ret = cast<ReturnInst>(I);
- Out << "ReturnInst::Create(getGlobalContext(), "
+ Out << "ReturnInst::Create(mod->getContext(), "
<< (ret->getReturnValue() ? opNames[0] + ", " : "") << bbname << ");";
break;
}
@@ -1171,7 +1171,7 @@ namespace {
}
case Instruction::Unreachable: {
Out << "new UnreachableInst("
- << "getGlobalContext(), "
+ << "mod->getContext(), "
<< bbname << ");";
break;
}
@@ -1673,7 +1673,7 @@ namespace {
BI != BE; ++BI) {
std::string bbname(getCppName(BI));
Out << "BasicBlock* " << bbname <<
- " = BasicBlock::Create(getGlobalContext(), \"";
+ " = BasicBlock::Create(mod->getContext(), \"";
if (BI->hasName())
printEscapedString(BI->getName());
Out << "\"," << getCppName(BI->getParent()) << ",0);";
@@ -2009,7 +2009,8 @@ char CppWriter::ID = 0;
bool CPPTargetMachine::addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &o,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel) {
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify) {
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
PM.add(new CppWriter(o));
return false;
diff --git a/lib/Target/CppBackend/CPPTargetMachine.h b/lib/Target/CppBackend/CPPTargetMachine.h
index 1f74f76..b7aae91 100644
--- a/lib/Target/CppBackend/CPPTargetMachine.h
+++ b/lib/Target/CppBackend/CPPTargetMachine.h
@@ -30,7 +30,8 @@ struct CPPTargetMachine : public TargetMachine {
virtual bool addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel);
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
diff --git a/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
new file mode 100644
index 0000000..cfb2fc8
--- /dev/null
+++ b/lib/Target/MBlaze/AsmPrinter/CMakeLists.txt
@@ -0,0 +1,9 @@
+include_directories(
+ ${CMAKE_CURRENT_BINARY_DIR}/..
+ ${CMAKE_CURRENT_SOURCE_DIR}/..
+ )
+
+add_llvm_library(LLVMMBlazeAsmPrinter
+ MBlazeAsmPrinter.cpp
+ )
+add_dependencies(LLVMMBlazeAsmPrinter MBlazeCodeGenTable_gen) \ No newline at end of file
diff --git a/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
new file mode 100644
index 0000000..6fe1026
--- /dev/null
+++ b/lib/Target/MBlaze/AsmPrinter/MBlazeAsmPrinter.cpp
@@ -0,0 +1,302 @@
+//===-- MBlazeAsmPrinter.cpp - MBlaze LLVM assembly writer ----------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains a printer that converts from our internal representation
+// of machine-dependent LLVM code to GAS-format MBlaze assembly language.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-asm-printer"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Module.h"
+#include "llvm/CodeGen/AsmPrinter.h"
+#include "llvm/CodeGen/DwarfWriter.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/MC/MCStreamer.h"
+#include "llvm/MC/MCAsmInfo.h"
+#include "llvm/MC/MCSymbol.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/ADT/Statistic.h"
+#include "llvm/ADT/StringExtras.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/FormattedStream.h"
+#include "llvm/Support/MathExtras.h"
+#include <cctype>
+
+using namespace llvm;
+
+namespace {
+ class MBlazeAsmPrinter : public AsmPrinter {
+ const MBlazeSubtarget *Subtarget;
+ public:
+ explicit MBlazeAsmPrinter(formatted_raw_ostream &O, TargetMachine &TM,
+ MCContext &Ctx, MCStreamer &Streamer,
+ const MCAsmInfo *T )
+ : AsmPrinter(O, TM, Ctx, Streamer, T) {
+ Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+ }
+
+ virtual const char *getPassName() const {
+ return "MBlaze Assembly Printer";
+ }
+
+ bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant, const char *ExtraCode);
+ void printOperand(const MachineInstr *MI, int opNum);
+ void printUnsignedImm(const MachineInstr *MI, int opNum);
+ void printFSLImm(const MachineInstr *MI, int opNum);
+ void printMemOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printFCCOperand(const MachineInstr *MI, int opNum,
+ const char *Modifier = 0);
+ void printSavedRegsBitmask();
+ void printHex32(unsigned int Value);
+
+ const char *emitCurrentABIString();
+ void emitFrameDirective();
+
+ void printInstruction(const MachineInstr *MI); // autogenerated.
+ void EmitInstruction(const MachineInstr *MI) {
+ printInstruction(MI);
+ O << '\n';
+ }
+ virtual void EmitFunctionBodyStart();
+ virtual void EmitFunctionBodyEnd();
+ static const char *getRegisterName(unsigned RegNo);
+
+ virtual void EmitFunctionEntryLabel();
+ void EmitStartOfAsmFile(Module &M);
+ };
+} // end of anonymous namespace
+
+#include "MBlazeGenAsmWriter.inc"
+
+//===----------------------------------------------------------------------===//
+//
+// MBlaze Asm Directives
+//
+// -- Frame directive "frame Stackpointer, Stacksize, RARegister"
+// Describe the stack frame.
+//
+// -- Mask directives "mask bitmask, offset"
+// Tells the assembler which registers are saved and where.
+// bitmask - contain a little endian bitset indicating which registers are
+// saved on function prologue (e.g. with a 0x80000000 mask, the
+// assembler knows the register 31 (RA) is saved at prologue.
+// offset - the position before stack pointer subtraction indicating where
+// the first saved register on prologue is located. (e.g. with a
+//
+// Consider the following function prologue:
+//
+// .frame R19,48,R15
+// .mask 0xc0000000,-8
+// addiu R1, R1, -48
+// sw R15, 40(R1)
+// sw R19, 36(R1)
+//
+// With a 0xc0000000 mask, the assembler knows the register 15 (R15) and
+// 19 (R19) are saved at prologue. As the save order on prologue is from
+// left to right, R15 is saved first. A -8 offset means that after the
+// stack pointer subtration, the first register in the mask (R15) will be
+// saved at address 48-8=40.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Mask directives
+//===----------------------------------------------------------------------===//
+
+// Create a bitmask with all callee saved registers for CPU or Floating Point
+// registers. For CPU registers consider RA, GP and FP for saving if necessary.
+void MBlazeAsmPrinter::printSavedRegsBitmask() {
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+ const MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
+
+ // CPU Saved Registers Bitmasks
+ unsigned int CPUBitmask = 0;
+
+ // Set the CPU Bitmasks
+ const MachineFrameInfo *MFI = MF->getFrameInfo();
+ const std::vector<CalleeSavedInfo> &CSI = MFI->getCalleeSavedInfo();
+ for (unsigned i = 0, e = CSI.size(); i != e; ++i) {
+ unsigned RegNum = MBlazeRegisterInfo::getRegisterNumbering(CSI[i].getReg());
+ if (CSI[i].getRegClass() == MBlaze::CPURegsRegisterClass)
+ CPUBitmask |= (1 << RegNum);
+ }
+
+ // Return Address and Frame registers must also be set in CPUBitmask.
+ if (RI.hasFP(*MF))
+ CPUBitmask |= (1 << MBlazeRegisterInfo::
+ getRegisterNumbering(RI.getFrameRegister(*MF)));
+
+ if (MFI->hasCalls())
+ CPUBitmask |= (1 << MBlazeRegisterInfo::
+ getRegisterNumbering(RI.getRARegister()));
+
+ // Print CPUBitmask
+ O << "\t.mask \t"; printHex32(CPUBitmask); O << ','
+ << MBlazeFI->getCPUTopSavedRegOff() << '\n';
+}
+
+// Print a 32 bit hex number with all numbers.
+void MBlazeAsmPrinter::printHex32(unsigned int Value) {
+ O << "0x";
+ for (int i = 7; i >= 0; i--)
+ O << utohexstr( (Value & (0xF << (i*4))) >> (i*4) );
+}
+
+//===----------------------------------------------------------------------===//
+// Frame and Set directives
+//===----------------------------------------------------------------------===//
+
+/// Frame Directive
+void MBlazeAsmPrinter::emitFrameDirective() {
+ const TargetRegisterInfo &RI = *TM.getRegisterInfo();
+
+ unsigned stackReg = RI.getFrameRegister(*MF);
+ unsigned returnReg = RI.getRARegister();
+ unsigned stackSize = MF->getFrameInfo()->getStackSize();
+
+
+ O << "\t.frame\t" << getRegisterName(stackReg)
+ << ',' << stackSize << ','
+ << getRegisterName(returnReg)
+ << '\n';
+}
+
+void MBlazeAsmPrinter::EmitFunctionEntryLabel() {
+ O << "\t.ent\t" << *CurrentFnSym << '\n';
+ OutStreamer.EmitLabel(CurrentFnSym);
+}
+
+/// EmitFunctionBodyStart - Targets can override this to emit stuff before
+/// the first basic block in the function.
+void MBlazeAsmPrinter::EmitFunctionBodyStart() {
+ emitFrameDirective();
+ printSavedRegsBitmask();
+}
+
+/// EmitFunctionBodyEnd - Targets can override this to emit stuff after
+/// the last basic block in the function.
+void MBlazeAsmPrinter::EmitFunctionBodyEnd() {
+ O << "\t.end\t" << *CurrentFnSym << '\n';
+}
+
+// Print out an operand for an inline asm expression.
+bool MBlazeAsmPrinter::
+PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
+ unsigned AsmVariant,const char *ExtraCode){
+ // Does this asm operand have a single letter operand modifier?
+ if (ExtraCode && ExtraCode[0])
+ return true; // Unknown modifier.
+
+ printOperand(MI, OpNo);
+ return false;
+}
+
+void MBlazeAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+
+ switch (MO.getType()) {
+ case MachineOperand::MO_Register:
+ O << getRegisterName(MO.getReg());
+ break;
+
+ case MachineOperand::MO_Immediate:
+ O << (int)MO.getImm();
+ break;
+
+ case MachineOperand::MO_FPImmediate: {
+ const ConstantFP* fp = MO.getFPImm();
+ printHex32(fp->getValueAPF().bitcastToAPInt().getZExtValue());
+ O << ";\t# immediate = " << *fp;
+ break;
+ }
+
+ case MachineOperand::MO_MachineBasicBlock:
+ O << *MO.getMBB()->getSymbol(OutContext);
+ return;
+
+ case MachineOperand::MO_GlobalAddress:
+ O << *GetGlobalValueSymbol(MO.getGlobal());
+ break;
+
+ case MachineOperand::MO_ExternalSymbol:
+ O << *GetExternalSymbolSymbol(MO.getSymbolName());
+ break;
+
+ case MachineOperand::MO_JumpTableIndex:
+ O << MAI->getPrivateGlobalPrefix() << "JTI" << getFunctionNumber()
+ << '_' << MO.getIndex();
+ break;
+
+ case MachineOperand::MO_ConstantPoolIndex:
+ O << MAI->getPrivateGlobalPrefix() << "CPI"
+ << getFunctionNumber() << "_" << MO.getIndex();
+ if (MO.getOffset())
+ O << "+" << MO.getOffset();
+ break;
+
+ default:
+ llvm_unreachable("<unknown operand type>");
+ }
+}
+
+void MBlazeAsmPrinter::printUnsignedImm(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.getType() == MachineOperand::MO_Immediate)
+ O << (unsigned int)MO.getImm();
+ else
+ printOperand(MI, opNum);
+}
+
+void MBlazeAsmPrinter::printFSLImm(const MachineInstr *MI, int opNum) {
+ const MachineOperand &MO = MI->getOperand(opNum);
+ if (MO.getType() == MachineOperand::MO_Immediate)
+ O << "rfsl" << (unsigned int)MO.getImm();
+ else
+ printOperand(MI, opNum);
+}
+
+void MBlazeAsmPrinter::
+printMemOperand(const MachineInstr *MI, int opNum, const char *Modifier) {
+ printOperand(MI, opNum+1);
+ O << ", ";
+ printOperand(MI, opNum);
+}
+
+void MBlazeAsmPrinter::
+printFCCOperand(const MachineInstr *MI, int opNum, const char *Modifier) {
+ const MachineOperand& MO = MI->getOperand(opNum);
+ O << MBlaze::MBlazeFCCToString((MBlaze::CondCode)MO.getImm());
+}
+
+void MBlazeAsmPrinter::EmitStartOfAsmFile(Module &M) {
+}
+
+// Force static initialization.
+extern "C" void LLVMInitializeMBlazeAsmPrinter() {
+ RegisterAsmPrinter<MBlazeAsmPrinter> X(TheMBlazeTarget);
+}
diff --git a/lib/Target/MBlaze/AsmPrinter/Makefile b/lib/Target/MBlaze/AsmPrinter/Makefile
new file mode 100644
index 0000000..c8e4d8f
--- /dev/null
+++ b/lib/Target/MBlaze/AsmPrinter/Makefile
@@ -0,0 +1,17 @@
+##===- lib/Target/MBlaze/AsmPrinter/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeAsmPrinter
+
+# Hack: we need to include 'main' MBlaze target directory to grab
+# private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MBlaze/CMakeLists.txt b/lib/Target/MBlaze/CMakeLists.txt
new file mode 100644
index 0000000..c93e3df
--- /dev/null
+++ b/lib/Target/MBlaze/CMakeLists.txt
@@ -0,0 +1,27 @@
+set(LLVM_TARGET_DEFINITIONS MBlaze.td)
+
+tablegen(MBlazeGenRegisterInfo.h.inc -gen-register-desc-header)
+tablegen(MBlazeGenRegisterNames.inc -gen-register-enums)
+tablegen(MBlazeGenRegisterInfo.inc -gen-register-desc)
+tablegen(MBlazeGenInstrNames.inc -gen-instr-enums)
+tablegen(MBlazeGenInstrInfo.inc -gen-instr-desc)
+tablegen(MBlazeGenAsmWriter.inc -gen-asm-writer)
+tablegen(MBlazeGenDAGISel.inc -gen-dag-isel)
+tablegen(MBlazeGenCallingConv.inc -gen-callingconv)
+tablegen(MBlazeGenSubtarget.inc -gen-subtarget)
+tablegen(MBlazeGenIntrinsics.inc -gen-tgt-intrinsic)
+
+add_llvm_target(MBlazeCodeGen
+ MBlazeDelaySlotFiller.cpp
+ MBlazeInstrInfo.cpp
+ MBlazeISelDAGToDAG.cpp
+ MBlazeISelLowering.cpp
+ MBlazeMCAsmInfo.cpp
+ MBlazeRegisterInfo.cpp
+ MBlazeSubtarget.cpp
+ MBlazeTargetMachine.cpp
+ MBlazeTargetObjectFile.cpp
+ MBlazeIntrinsicInfo.cpp
+ )
+
+target_link_libraries (LLVMMBlazeCodeGen LLVMSelectionDAG)
diff --git a/lib/Target/MBlaze/MBlaze.h b/lib/Target/MBlaze/MBlaze.h
new file mode 100644
index 0000000..f9d828b
--- /dev/null
+++ b/lib/Target/MBlaze/MBlaze.h
@@ -0,0 +1,39 @@
+//===-- MBlaze.h - Top-level interface for MBlaze ---------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the entry points for global functions defined in
+// the LLVM MBlaze back-end.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef TARGET_MBLAZE_H
+#define TARGET_MBLAZE_H
+
+#include "llvm/Target/TargetMachine.h"
+
+namespace llvm {
+ class MBlazeTargetMachine;
+ class FunctionPass;
+ class MachineCodeEmitter;
+ class formatted_raw_ostream;
+
+ FunctionPass *createMBlazeISelDag(MBlazeTargetMachine &TM);
+ FunctionPass *createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &TM);
+
+ extern Target TheMBlazeTarget;
+} // end namespace llvm;
+
+// Defines symbolic names for MBlaze registers. This defines a mapping from
+// register name to register number.
+#include "MBlazeGenRegisterNames.inc"
+
+// Defines symbolic names for the MBlaze instructions.
+#include "MBlazeGenInstrNames.inc"
+
+#endif
diff --git a/lib/Target/MBlaze/MBlaze.td b/lib/Target/MBlaze/MBlaze.td
new file mode 100644
index 0000000..1679752
--- /dev/null
+++ b/lib/Target/MBlaze/MBlaze.td
@@ -0,0 +1,85 @@
+//===- MBlaze.td - Describe the MBlaze Target Machine -----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This is the top level entry point for the MBlaze target.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Target-independent interfaces
+//===----------------------------------------------------------------------===//
+
+include "llvm/Target/Target.td"
+
+//===----------------------------------------------------------------------===//
+// Register File, Calling Conv, Instruction Descriptions
+//===----------------------------------------------------------------------===//
+
+include "MBlazeRegisterInfo.td"
+include "MBlazeSchedule.td"
+include "MBlazeIntrinsics.td"
+include "MBlazeInstrInfo.td"
+include "MBlazeCallingConv.td"
+
+def MBlazeInstrInfo : InstrInfo {
+ let TSFlagsFields = [];
+ let TSFlagsShifts = [];
+}
+
+
+//===----------------------------------------------------------------------===//
+// Microblaze Subtarget features //
+//===----------------------------------------------------------------------===//
+
+def FeaturePipe3 : SubtargetFeature<"pipe3", "HasPipe3", "true",
+ "Implements 3-stage pipeline.">;
+def FeatureBarrel : SubtargetFeature<"barrel", "HasBarrel", "true",
+ "Implements barrel shifter.">;
+def FeatureDiv : SubtargetFeature<"div", "HasDiv", "true",
+ "Implements hardware divider.">;
+def FeatureMul : SubtargetFeature<"mul", "HasMul", "true",
+ "Implements hardware multiplier.">;
+def FeatureFSL : SubtargetFeature<"fsl", "HasFSL", "true",
+ "Implements FSL instructions.">;
+def FeatureEFSL : SubtargetFeature<"efsl", "HasEFSL", "true",
+ "Implements extended FSL instructions.">;
+def FeatureMSRSet : SubtargetFeature<"msrset", "HasMSRSet", "true",
+ "Implements MSR register set and clear.">;
+def FeatureException : SubtargetFeature<"exception", "HasException", "true",
+ "Implements hardware exception support.">;
+def FeaturePatCmp : SubtargetFeature<"patcmp", "HasPatCmp", "true",
+ "Implements pattern compare instruction.">;
+def FeatureFPU : SubtargetFeature<"fpu", "HasFPU", "true",
+ "Implements floating point unit.">;
+def FeatureESR : SubtargetFeature<"esr", "HasESR", "true",
+ "Implements ESR and EAR registers">;
+def FeaturePVR : SubtargetFeature<"pvr", "HasPVR", "true",
+ "Implements processor version register.">;
+def FeatureMul64 : SubtargetFeature<"mul64", "HasMul64", "true",
+ "Implements multiplier with 64-bit result">;
+def FeatureSqrt : SubtargetFeature<"sqrt", "HasSqrt", "true",
+ "Implements sqrt and floating point convert.">;
+def FeatureMMU : SubtargetFeature<"mmu", "HasMMU", "true",
+ "Implements memory management unit.">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze processors supported.
+//===----------------------------------------------------------------------===//
+
+class Proc<string Name, list<SubtargetFeature> Features>
+ : Processor<Name, MBlazeGenericItineraries, Features>;
+
+
+def : Proc<"v400", []>;
+def : Proc<"v500", []>;
+def : Proc<"v600", []>;
+def : Proc<"v700", []>;
+def : Proc<"v710", []>;
+
+def MBlaze : Target {
+ let InstructionSet = MBlazeInstrInfo;
+}
diff --git a/lib/Target/MBlaze/MBlazeCallingConv.td b/lib/Target/MBlaze/MBlazeCallingConv.td
new file mode 100644
index 0000000..dfc87f5
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeCallingConv.td
@@ -0,0 +1,41 @@
+//===- MBlazeCallingConv.td - Calling Conventions for MBlaze ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+// This describes the calling conventions for MBlaze architecture.
+//===----------------------------------------------------------------------===//
+
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>:
+ CCIf<!strconcat("State.getTarget().getSubtarget<MBlazeSubtarget>().", F), A>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze ABI Calling Convention
+//===----------------------------------------------------------------------===//
+
+def CC_MBlaze : CallingConv<[
+ // Promote i8/i16 arguments to i32.
+ CCIfType<[i8, i16], CCPromoteToType<i32>>,
+
+ // Integer arguments are passed in integer registers.
+ CCIfType<[i32], CCAssignToReg<[R5, R6, R7, R8, R9, R10]>>,
+
+ // Single fp arguments are passed in floating point registers
+ CCIfType<[f32], CCAssignToReg<[F5, F6, F7, F8, F9, F10]>>,
+
+ // 32-bit values get stored in stack slots that are 4 bytes in
+ // size and 4-byte aligned.
+ CCIfType<[i32, f32], CCAssignToStack<4, 4>>
+]>;
+
+def RetCC_MBlaze : CallingConv<[
+ // i32 are returned in registers R3, R4
+ CCIfType<[i32], CCAssignToReg<[R3, R4]>>,
+
+ // f32 are returned in registers F3, F4
+ CCIfType<[f32], CCAssignToReg<[F3, F4]>>
+]>;
diff --git a/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
new file mode 100644
index 0000000..42fea25
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp
@@ -0,0 +1,75 @@
+//===-- DelaySlotFiller.cpp - MBlaze delay slot filler --------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Simple pass to fills delay slots with NOPs.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "delay-slot-filler"
+
+#include "MBlaze.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/ADT/Statistic.h"
+
+using namespace llvm;
+
+STATISTIC(FilledSlots, "Number of delay slots filled");
+
+namespace {
+ struct Filler : public MachineFunctionPass {
+
+ TargetMachine &TM;
+ const TargetInstrInfo *TII;
+
+ static char ID;
+ Filler(TargetMachine &tm)
+ : MachineFunctionPass(&ID), TM(tm), TII(tm.getInstrInfo()) { }
+
+ virtual const char *getPassName() const {
+ return "MBlaze Delay Slot Filler";
+ }
+
+ bool runOnMachineBasicBlock(MachineBasicBlock &MBB);
+ bool runOnMachineFunction(MachineFunction &F) {
+ bool Changed = false;
+ for (MachineFunction::iterator FI = F.begin(), FE = F.end();
+ FI != FE; ++FI)
+ Changed |= runOnMachineBasicBlock(*FI);
+ return Changed;
+ }
+
+ };
+ char Filler::ID = 0;
+} // end of anonymous namespace
+
+/// runOnMachineBasicBlock - Fill in delay slots for the given basic block.
+/// Currently, we fill delay slots with NOPs. We assume there is only one
+/// delay slot per delayed instruction.
+bool Filler::runOnMachineBasicBlock(MachineBasicBlock &MBB) {
+ bool Changed = false;
+ for (MachineBasicBlock::iterator I = MBB.begin(); I != MBB.end(); ++I)
+ if (I->getDesc().hasDelaySlot()) {
+ MachineBasicBlock::iterator J = I;
+ ++J;
+ BuildMI(MBB, J, I->getDebugLoc(), TII->get(MBlaze::NOP));
+ ++FilledSlots;
+ Changed = true;
+ }
+ return Changed;
+}
+
+/// createMBlazeDelaySlotFillerPass - Returns a pass that fills in delay
+/// slots in MBlaze MachineFunctions
+FunctionPass *llvm::createMBlazeDelaySlotFillerPass(MBlazeTargetMachine &tm) {
+ return new Filler(tm);
+}
+
diff --git a/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
new file mode 100644
index 0000000..a0ebea0
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp
@@ -0,0 +1,339 @@
+//===-- MBlazeISelDAGToDAG.cpp - A dag to dag inst selector for MBlaze ----===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines an instruction selector for the MBlaze target.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-isel"
+#include "MBlaze.h"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/GlobalValue.h"
+#include "llvm/Instructions.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Support/CFG.h"
+#include "llvm/Type.h"
+#include "llvm/CodeGen/MachineConstantPool.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+//===----------------------------------------------------------------------===//
+// Instruction Selector Implementation
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlazeDAGToDAGISel - MBlaze specific code to select MBlaze machine
+// instructions for SelectionDAG operations.
+//===----------------------------------------------------------------------===//
+namespace {
+
+class MBlazeDAGToDAGISel : public SelectionDAGISel {
+
+ /// TM - Keep a reference to MBlazeTargetMachine.
+ MBlazeTargetMachine &TM;
+
+ /// Subtarget - Keep a pointer to the MBlazeSubtarget around so that we can
+ /// make the right decision when generating code for different targets.
+ const MBlazeSubtarget &Subtarget;
+
+public:
+ explicit MBlazeDAGToDAGISel(MBlazeTargetMachine &tm) :
+ SelectionDAGISel(tm),
+ TM(tm), Subtarget(tm.getSubtarget<MBlazeSubtarget>()) {}
+
+ // Pass Name
+ virtual const char *getPassName() const {
+ return "MBlaze DAG->DAG Pattern Instruction Selection";
+ }
+private:
+ // Include the pieces autogenerated from the target description.
+ #include "MBlazeGenDAGISel.inc"
+
+ /// getTargetMachine - Return a reference to the TargetMachine, casted
+ /// to the target-specific type.
+ const MBlazeTargetMachine &getTargetMachine() {
+ return static_cast<const MBlazeTargetMachine &>(TM);
+ }
+
+ /// getInstrInfo - Return a reference to the TargetInstrInfo, casted
+ /// to the target-specific type.
+ const MBlazeInstrInfo *getInstrInfo() {
+ return getTargetMachine().getInstrInfo();
+ }
+
+ SDNode *getGlobalBaseReg();
+ SDNode *Select(SDNode *N);
+
+ // Complex Pattern.
+ bool SelectAddr(SDNode *Op, SDValue N,
+ SDValue &Base, SDValue &Offset);
+
+ // Address Selection
+ bool SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index);
+ bool SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base);
+
+ // getI32Imm - Return a target constant with the specified value, of type i32.
+ inline SDValue getI32Imm(unsigned Imm) {
+ return CurDAG->getTargetConstant(Imm, MVT::i32);
+ }
+};
+
+}
+
+/// isIntS32Immediate - This method tests to see if the node is either a 32-bit
+/// or 64-bit immediate, and if the value can be accurately represented as a
+/// sign extension from a 32-bit value. If so, this returns true and the
+/// immediate.
+static bool isIntS32Immediate(SDNode *N, int32_t &Imm) {
+ unsigned Opc = N->getOpcode();
+ if (Opc != ISD::Constant)
+ return false;
+
+ Imm = (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ if (N->getValueType(0) == MVT::i32)
+ return Imm == (int32_t)cast<ConstantSDNode>(N)->getZExtValue();
+ else
+ return Imm == (int64_t)cast<ConstantSDNode>(N)->getZExtValue();
+}
+
+static bool isIntS32Immediate(SDValue Op, int32_t &Imm) {
+ return isIntS32Immediate(Op.getNode(), Imm);
+}
+
+
+/// SelectAddressRegReg - Given the specified addressed, check to see if it
+/// can be represented as an indexed [r+r] operation. Returns false if it
+/// can be more efficiently represented with [r+imm].
+bool MBlazeDAGToDAGISel::
+SelectAddrRegReg(SDNode *Op, SDValue N, SDValue &Base, SDValue &Index) {
+ if (N.getOpcode() == ISD::FrameIndex) return false;
+ if (N.getOpcode() == ISD::TargetExternalSymbol ||
+ N.getOpcode() == ISD::TargetGlobalAddress)
+ return false; // direct calls.
+
+ if (N.getOperand(0).getOpcode() == ISD::TargetJumpTable ||
+ N.getOperand(1).getOpcode() == ISD::TargetJumpTable)
+ return false; // jump tables.
+
+ int32_t imm = 0;
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ if (isIntS32Immediate(N.getOperand(1), imm))
+ return false; // r+i
+
+ Base = N.getOperand(1);
+ Index = N.getOperand(0);
+ return true;
+ }
+
+ return false;
+}
+
+/// Returns true if the address N can be represented by a base register plus
+/// a signed 32-bit displacement [r+imm], and if it is not better
+/// represented as reg+reg.
+bool MBlazeDAGToDAGISel::
+SelectAddrRegImm(SDNode *Op, SDValue N, SDValue &Disp, SDValue &Base) {
+ // If this can be more profitably realized as r+r, fail.
+ if (SelectAddrRegReg(Op, N, Disp, Base))
+ return false;
+
+ if (N.getOpcode() == ISD::ADD || N.getOpcode() == ISD::OR) {
+ int32_t imm = 0;
+ if (isIntS32Immediate(N.getOperand(1), imm)) {
+ Disp = CurDAG->getTargetConstant(imm, MVT::i32);
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ } else {
+ Base = N.getOperand(0);
+ }
+ DEBUG( errs() << "WESLEY: Using Operand Immediate\n" );
+ return true; // [r+i]
+ }
+ } else if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(N)) {
+ // Loading from a constant address.
+ uint32_t Imm = CN->getZExtValue();
+ Disp = CurDAG->getTargetConstant(Imm, CN->getValueType(0));
+ Base = CurDAG->getRegister(MBlaze::R0, CN->getValueType(0));
+ DEBUG( errs() << "WESLEY: Using Constant Node\n" );
+ return true;
+ }
+
+ Disp = CurDAG->getTargetConstant(0, TM.getTargetLowering()->getPointerTy());
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(N))
+ Base = CurDAG->getTargetFrameIndex(FI->getIndex(), N.getValueType());
+ else
+ Base = N;
+ return true; // [r+0]
+}
+
+/// getGlobalBaseReg - Output the instructions required to put the
+/// GOT address into a register.
+SDNode *MBlazeDAGToDAGISel::getGlobalBaseReg() {
+ unsigned GlobalBaseReg = getInstrInfo()->getGlobalBaseReg(MF);
+ return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
+}
+
+/// ComplexPattern used on MBlazeInstrInfo
+/// Used on MBlaze Load/Store instructions
+bool MBlazeDAGToDAGISel::
+SelectAddr(SDNode *Op, SDValue Addr, SDValue &Offset, SDValue &Base) {
+ // if Address is FI, get the TargetFrameIndex.
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>(Addr)) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+ }
+
+ // on PIC code Load GA
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ if ((Addr.getOpcode() == ISD::TargetGlobalAddress) ||
+ (Addr.getOpcode() == ISD::TargetConstantPool) ||
+ (Addr.getOpcode() == ISD::TargetJumpTable)){
+ Base = CurDAG->getRegister(MBlaze::R15, MVT::i32);
+ Offset = Addr;
+ return true;
+ }
+ } else {
+ if ((Addr.getOpcode() == ISD::TargetExternalSymbol ||
+ Addr.getOpcode() == ISD::TargetGlobalAddress))
+ return false;
+ }
+
+ // Operand is a result from an ADD.
+ if (Addr.getOpcode() == ISD::ADD) {
+ if (ConstantSDNode *CN = dyn_cast<ConstantSDNode>(Addr.getOperand(1))) {
+ if (Predicate_immSExt16(CN)) {
+
+ // If the first operand is a FI, get the TargetFI Node
+ if (FrameIndexSDNode *FIN = dyn_cast<FrameIndexSDNode>
+ (Addr.getOperand(0))) {
+ Base = CurDAG->getTargetFrameIndex(FIN->getIndex(), MVT::i32);
+ } else {
+ Base = Addr.getOperand(0);
+ }
+
+ Offset = CurDAG->getTargetConstant(CN->getZExtValue(), MVT::i32);
+ return true;
+ }
+ }
+ }
+
+ Base = Addr;
+ Offset = CurDAG->getTargetConstant(0, MVT::i32);
+ return true;
+}
+
+/// Select instructions not customized! Used for
+/// expanded, promoted and normal instructions
+SDNode* MBlazeDAGToDAGISel::Select(SDNode *Node) {
+ unsigned Opcode = Node->getOpcode();
+ DebugLoc dl = Node->getDebugLoc();
+
+ // Dump information about the Node being selected
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
+
+ // If we have a custom node, we already have selected!
+ if (Node->isMachineOpcode()) {
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
+ return NULL;
+ }
+
+ ///
+ // Instruction Selection not handled by the auto-generated
+ // tablegen selection should be handled here.
+ ///
+ switch(Opcode) {
+ default: break;
+
+ // Get target GOT address.
+ case ISD::GLOBAL_OFFSET_TABLE:
+ return getGlobalBaseReg();
+
+ case ISD::FrameIndex: {
+ SDValue imm = CurDAG->getTargetConstant(0, MVT::i32);
+ int FI = dyn_cast<FrameIndexSDNode>(Node)->getIndex();
+ EVT VT = Node->getValueType(0);
+ SDValue TFI = CurDAG->getTargetFrameIndex(FI, VT);
+ unsigned Opc = MBlaze::ADDI;
+ if (Node->hasOneUse())
+ return CurDAG->SelectNodeTo(Node, Opc, VT, TFI, imm);
+ return CurDAG->getMachineNode(Opc, dl, VT, TFI, imm);
+ }
+
+
+ /// Handle direct and indirect calls when using PIC. On PIC, when
+ /// GOT is smaller than about 64k (small code) the GA target is
+ /// loaded with only one instruction. Otherwise GA's target must
+ /// be loaded with 3 instructions.
+ case MBlazeISD::JmpLink: {
+ if (TM.getRelocationModel() == Reloc::PIC_) {
+ SDValue Chain = Node->getOperand(0);
+ SDValue Callee = Node->getOperand(1);
+ SDValue R20Reg = CurDAG->getRegister(MBlaze::R20, MVT::i32);
+ SDValue InFlag(0, 0);
+
+ if ( (isa<GlobalAddressSDNode>(Callee)) ||
+ (isa<ExternalSymbolSDNode>(Callee)) )
+ {
+ /// Direct call for global addresses and external symbols
+ SDValue GPReg = CurDAG->getRegister(MBlaze::R15, MVT::i32);
+
+ // Use load to get GOT target
+ SDValue Ops[] = { Callee, GPReg, Chain };
+ SDValue Load = SDValue(CurDAG->getMachineNode(MBlaze::LW, dl,
+ MVT::i32, MVT::Other, Ops, 3), 0);
+ Chain = Load.getValue(1);
+
+ // Call target must be on T9
+ Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Load, InFlag);
+ } else
+ /// Indirect call
+ Chain = CurDAG->getCopyToReg(Chain, dl, R20Reg, Callee, InFlag);
+
+ // Emit Jump and Link Register
+ SDNode *ResNode = CurDAG->getMachineNode(MBlaze::BRLID, dl, MVT::Other,
+ MVT::Flag, R20Reg, Chain);
+ Chain = SDValue(ResNode, 0);
+ InFlag = SDValue(ResNode, 1);
+ ReplaceUses(SDValue(Node, 0), Chain);
+ ReplaceUses(SDValue(Node, 1), InFlag);
+ return ResNode;
+ }
+ }
+ }
+
+ // Select the default instruction
+ SDNode *ResNode = SelectCode(Node);
+
+ DEBUG(errs() << "=> ");
+ if (ResNode == NULL || ResNode == Node)
+ DEBUG(Node->dump(CurDAG));
+ else
+ DEBUG(ResNode->dump(CurDAG));
+ DEBUG(errs() << "\n");
+ return ResNode;
+}
+
+/// createMBlazeISelDag - This pass converts a legalized DAG into a
+/// MBlaze-specific DAG, ready for instruction scheduling.
+FunctionPass *llvm::createMBlazeISelDag(MBlazeTargetMachine &TM) {
+ return new MBlazeDAGToDAGISel(TM);
+}
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.cpp b/lib/Target/MBlaze/MBlazeISelLowering.cpp
new file mode 100644
index 0000000..7790248
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeISelLowering.cpp
@@ -0,0 +1,881 @@
+//===-- MBlazeISelLowering.cpp - MBlaze DAG Lowering Implementation -------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-lower"
+#include "MBlazeISelLowering.h"
+#include "MBlazeMachineFunction.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/CallingConv.h"
+#include "llvm/CodeGen/CallingConvLower.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SelectionDAGISel.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+const char *MBlazeTargetLowering::getTargetNodeName(unsigned Opcode) const {
+ switch (Opcode) {
+ case MBlazeISD::JmpLink : return "MBlazeISD::JmpLink";
+ case MBlazeISD::GPRel : return "MBlazeISD::GPRel";
+ case MBlazeISD::Wrap : return "MBlazeISD::Wrap";
+ case MBlazeISD::ICmp : return "MBlazeISD::ICmp";
+ case MBlazeISD::Ret : return "MBlazeISD::Ret";
+ case MBlazeISD::Select_CC : return "MBlazeISD::Select_CC";
+ default : return NULL;
+ }
+}
+
+MBlazeTargetLowering::MBlazeTargetLowering(MBlazeTargetMachine &TM)
+ : TargetLowering(TM, new MBlazeTargetObjectFile()) {
+ Subtarget = &TM.getSubtarget<MBlazeSubtarget>();
+
+ // MBlaze does not have i1 type, so use i32 for
+ // setcc operations results (slt, sgt, ...).
+ setBooleanContents(ZeroOrOneBooleanContent);
+
+ // Set up the register classes
+ addRegisterClass(MVT::i32, MBlaze::CPURegsRegisterClass);
+ if (Subtarget->hasFPU()) {
+ addRegisterClass(MVT::f32, MBlaze::FGR32RegisterClass);
+ setOperationAction(ISD::ConstantFP, MVT::f32, Legal);
+ }
+
+ // Floating point operations which are not supported
+ setOperationAction(ISD::FREM, MVT::f32, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i8, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i16, Expand);
+ setOperationAction(ISD::UINT_TO_FP, MVT::i32, Expand);
+ setOperationAction(ISD::FP_TO_UINT, MVT::i32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f32, Expand);
+ setOperationAction(ISD::FP_ROUND, MVT::f64, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOPYSIGN, MVT::f64, Expand);
+ setOperationAction(ISD::FSIN, MVT::f32, Expand);
+ setOperationAction(ISD::FCOS, MVT::f32, Expand);
+ setOperationAction(ISD::FPOWI, MVT::f32, Expand);
+ setOperationAction(ISD::FPOW, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG2, MVT::f32, Expand);
+ setOperationAction(ISD::FLOG10, MVT::f32, Expand);
+ setOperationAction(ISD::FEXP, MVT::f32, Expand);
+
+ // Load extented operations for i1 types must be promoted
+ setLoadExtAction(ISD::EXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::ZEXTLOAD, MVT::i1, Promote);
+ setLoadExtAction(ISD::SEXTLOAD, MVT::i1, Promote);
+
+ // MBlaze has no REM or DIVREM operations.
+ setOperationAction(ISD::UREM, MVT::i32, Expand);
+ setOperationAction(ISD::SREM, MVT::i32, Expand);
+ setOperationAction(ISD::SDIVREM, MVT::i32, Expand);
+ setOperationAction(ISD::UDIVREM, MVT::i32, Expand);
+
+ // If the processor doesn't support multiply then expand it
+ if (!Subtarget->hasMul()) {
+ setOperationAction(ISD::MUL, MVT::i32, Expand);
+ }
+
+ // If the processor doesn't support 64-bit multiply then expand
+ if (!Subtarget->hasMul() || !Subtarget->hasMul64()) {
+ setOperationAction(ISD::MULHS, MVT::i32, Expand);
+ setOperationAction(ISD::MULHS, MVT::i64, Expand);
+ setOperationAction(ISD::MULHU, MVT::i32, Expand);
+ setOperationAction(ISD::MULHU, MVT::i64, Expand);
+ }
+
+ // If the processor doesn't support division then expand
+ if (!Subtarget->hasDiv()) {
+ setOperationAction(ISD::UDIV, MVT::i32, Expand);
+ setOperationAction(ISD::SDIV, MVT::i32, Expand);
+ }
+
+ // Expand unsupported conversions
+ setOperationAction(ISD::BIT_CONVERT, MVT::f32, Expand);
+ setOperationAction(ISD::BIT_CONVERT, MVT::i32, Expand);
+
+ // Expand SELECT_CC
+ setOperationAction(ISD::SELECT_CC, MVT::Other, Expand);
+
+ // MBlaze doesn't have MUL_LOHI
+ setOperationAction(ISD::SMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i32, Expand);
+ setOperationAction(ISD::SMUL_LOHI, MVT::i64, Expand);
+ setOperationAction(ISD::UMUL_LOHI, MVT::i64, Expand);
+
+ // Used by legalize types to correctly generate the setcc result.
+ // Without this, every float setcc comes with a AND/OR with the result,
+ // we don't want this, since the fpcmp result goes to a flag register,
+ // which is used implicitly by brcond and select operations.
+ AddPromotedToType(ISD::SETCC, MVT::i1, MVT::i32);
+ AddPromotedToType(ISD::SELECT, MVT::i1, MVT::i32);
+ AddPromotedToType(ISD::SELECT_CC, MVT::i1, MVT::i32);
+
+ // MBlaze Custom Operations
+ setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
+ setOperationAction(ISD::GlobalTLSAddress, MVT::i32, Custom);
+ setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ setOperationAction(ISD::ConstantPool, MVT::i32, Custom);
+
+ // Operations not directly supported by MBlaze.
+ setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
+ setOperationAction(ISD::BR_JT, MVT::Other, Expand);
+ setOperationAction(ISD::BR_CC, MVT::Other, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i1, Expand);
+ setOperationAction(ISD::ROTL, MVT::i32, Expand);
+ setOperationAction(ISD::ROTR, MVT::i32, Expand);
+ setOperationAction(ISD::SHL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRA_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::SRL_PARTS, MVT::i32, Expand);
+ setOperationAction(ISD::CTLZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTTZ, MVT::i32, Expand);
+ setOperationAction(ISD::CTPOP, MVT::i32, Expand);
+ setOperationAction(ISD::BSWAP, MVT::i32, Expand);
+
+ // We don't have line number support yet.
+ setOperationAction(ISD::EH_LABEL, MVT::Other, Expand);
+
+ // Use the default for now
+ setOperationAction(ISD::STACKSAVE, MVT::Other, Expand);
+ setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+
+ // MBlaze doesn't have extending float->double load/store
+ setLoadExtAction(ISD::EXTLOAD, MVT::f32, Expand);
+ setTruncStoreAction(MVT::f64, MVT::f32, Expand);
+
+ setStackPointerRegisterToSaveRestore(MBlaze::R1);
+ computeRegisterProperties();
+}
+
+MVT::SimpleValueType MBlazeTargetLowering::getSetCCResultType(EVT VT) const {
+ return MVT::i32;
+}
+
+/// getFunctionAlignment - Return the Log2 alignment of this function.
+unsigned MBlazeTargetLowering::getFunctionAlignment(const Function *) const {
+ return 2;
+}
+
+SDValue MBlazeTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
+ switch (Op.getOpcode())
+ {
+ case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
+ case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
+ case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
+ case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
+ }
+ return SDValue();
+}
+
+//===----------------------------------------------------------------------===//
+// Lower helper functions
+//===----------------------------------------------------------------------===//
+MachineBasicBlock* MBlazeTargetLowering::
+EmitInstrWithCustomInserter(MachineInstr *MI, MachineBasicBlock *BB,
+ DenseMap<MachineBasicBlock*,
+ MachineBasicBlock*> *EM) const {
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+
+ switch (MI->getOpcode()) {
+ default: assert(false && "Unexpected instr type to insert");
+ case MBlaze::ShiftRL:
+ case MBlaze::ShiftRA:
+ case MBlaze::ShiftL: {
+ // To "insert" a shift left instruction, we actually have to insert a
+ // simple loop. The incoming instruction knows the destination vreg to
+ // set, the source vreg to operate over and the shift amount.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // start:
+ // andi samt, samt, 31
+ // beqid samt, finish
+ // add dst, src, r0
+ // loop:
+ // addik samt, samt, -1
+ // sra dst, dst
+ // bneid samt, loop
+ // nop
+ // finish:
+ MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &R = F->getRegInfo();
+ MachineBasicBlock *loop = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *finish = F->CreateMachineBasicBlock(LLVM_BB);
+
+ unsigned IAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ BuildMI(BB, dl, TII->get(MBlaze::ANDI), IAMT)
+ .addReg(MI->getOperand(2).getReg())
+ .addImm(31);
+
+ unsigned IVAL = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ BuildMI(BB, dl, TII->get(MBlaze::ADDI), IVAL)
+ .addReg(MI->getOperand(1).getReg())
+ .addImm(0);
+
+ BuildMI(BB, dl, TII->get(MBlaze::BEQID))
+ .addReg(IAMT)
+ .addMBB(finish);
+
+ F->insert(It, loop);
+ F->insert(It, finish);
+
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ // Also inform sdisel of the edge changes.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i) {
+ EM->insert(std::make_pair(*i, finish));
+ finish->addSuccessor(*i);
+ }
+
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(loop);
+ BB->addSuccessor(finish);
+
+ // Next, add the finish block as a successor of the loop block
+ loop->addSuccessor(finish);
+ loop->addSuccessor(loop);
+
+ unsigned DST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ unsigned NDST = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), DST)
+ .addReg(IVAL).addMBB(BB)
+ .addReg(NDST).addMBB(loop);
+
+ unsigned SAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ unsigned NAMT = R.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ BuildMI(loop, dl, TII->get(MBlaze::PHI), SAMT)
+ .addReg(IAMT).addMBB(BB)
+ .addReg(NAMT).addMBB(loop);
+
+ if (MI->getOpcode() == MBlaze::ShiftL)
+ BuildMI(loop, dl, TII->get(MBlaze::ADD), NDST).addReg(DST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRA)
+ BuildMI(loop, dl, TII->get(MBlaze::SRA), NDST).addReg(DST);
+ else if (MI->getOpcode() == MBlaze::ShiftRL)
+ BuildMI(loop, dl, TII->get(MBlaze::SRL), NDST).addReg(DST);
+ else
+ llvm_unreachable( "Cannot lower unknown shift instruction" );
+
+ BuildMI(loop, dl, TII->get(MBlaze::ADDI), NAMT)
+ .addReg(SAMT)
+ .addImm(-1);
+
+ BuildMI(loop, dl, TII->get(MBlaze::BNEID))
+ .addReg(NAMT)
+ .addMBB(loop);
+
+ BuildMI(finish, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(IVAL).addMBB(BB)
+ .addReg(NDST).addMBB(loop);
+
+ // The pseudo instruction is no longer needed so remove it
+ F->DeleteMachineInstr(MI);
+ return finish;
+ }
+
+ case MBlaze::Select_FCC:
+ case MBlaze::Select_CC: {
+ // To "insert" a SELECT_CC instruction, we actually have to insert the
+ // diamond control-flow pattern. The incoming instruction knows the
+ // destination vreg to set, the condition code register to branch on, the
+ // true/false values to select between, and a branch opcode to use.
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ // thisMBB:
+ // ...
+ // TrueVal = ...
+ // setcc r1, r2, r3
+ // bNE r1, r0, copy1MBB
+ // fallthrough --> copy0MBB
+ MachineFunction *F = BB->getParent();
+ MachineBasicBlock *flsBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *dneBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ unsigned Opc;
+ switch (MI->getOperand(4).getImm()) {
+ default: llvm_unreachable( "Unknown branch condition" );
+ case MBlazeCC::EQ: Opc = MBlaze::BNEID; break;
+ case MBlazeCC::NE: Opc = MBlaze::BEQID; break;
+ case MBlazeCC::GT: Opc = MBlaze::BLEID; break;
+ case MBlazeCC::LT: Opc = MBlaze::BGEID; break;
+ case MBlazeCC::GE: Opc = MBlaze::BLTID; break;
+ case MBlazeCC::LE: Opc = MBlaze::BGTID; break;
+ }
+
+ BuildMI(BB, dl, TII->get(Opc))
+ .addReg(MI->getOperand(3).getReg())
+ .addMBB(dneBB);
+
+ F->insert(It, flsBB);
+ F->insert(It, dneBB);
+
+ // Update machine-CFG edges by first adding all successors of the current
+ // block to the new block which will contain the Phi node for the select.
+ // Also inform sdisel of the edge changes.
+ for(MachineBasicBlock::succ_iterator i = BB->succ_begin(),
+ e = BB->succ_end(); i != e; ++i) {
+ EM->insert(std::make_pair(*i, dneBB));
+ dneBB->addSuccessor(*i);
+ }
+
+ // Next, remove all successors of the current block, and add the true
+ // and fallthrough blocks as its successors.
+ while(!BB->succ_empty())
+ BB->removeSuccessor(BB->succ_begin());
+ BB->addSuccessor(flsBB);
+ BB->addSuccessor(dneBB);
+ flsBB->addSuccessor(dneBB);
+
+ // sinkMBB:
+ // %Result = phi [ %FalseValue, copy0MBB ], [ %TrueValue, thisMBB ]
+ // ...
+ //BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ // .addReg(MI->getOperand(1).getReg()).addMBB(flsBB)
+ // .addReg(MI->getOperand(2).getReg()).addMBB(BB);
+
+ BuildMI(dneBB, dl, TII->get(MBlaze::PHI), MI->getOperand(0).getReg())
+ .addReg(MI->getOperand(2).getReg()).addMBB(flsBB)
+ .addReg(MI->getOperand(1).getReg()).addMBB(BB);
+
+ F->DeleteMachineInstr(MI); // The pseudo instruction is gone now.
+ return dneBB;
+ }
+ }
+}
+
+//===----------------------------------------------------------------------===//
+// Misc Lower Operation implementation
+//===----------------------------------------------------------------------===//
+//
+
+SDValue MBlazeTargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ SDValue TrueVal = Op.getOperand(2);
+ SDValue FalseVal = Op.getOperand(3);
+ DebugLoc dl = Op.getDebugLoc();
+ unsigned Opc;
+
+ SDValue CompareFlag;
+ if (LHS.getValueType() == MVT::i32) {
+ Opc = MBlazeISD::Select_CC;
+ CompareFlag = DAG.getNode(MBlazeISD::ICmp, dl, MVT::i32, LHS, RHS)
+ .getValue(1);
+ } else {
+ llvm_unreachable( "Cannot lower select_cc with unknown type" );
+ }
+
+ return DAG.getNode(Opc, dl, TrueVal.getValueType(), TrueVal, FalseVal,
+ CompareFlag);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalAddress(SDValue Op, SelectionDAG &DAG) {
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ GlobalValue *GV = cast<GlobalAddressSDNode>(Op)->getGlobal();
+ SDValue GA = DAG.getTargetGlobalAddress(GV, MVT::i32);
+
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, GA);
+}
+
+SDValue MBlazeTargetLowering::
+LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG) {
+ llvm_unreachable("TLS not implemented for MicroBlaze.");
+ return SDValue(); // Not reached
+}
+
+SDValue MBlazeTargetLowering::
+LowerJumpTable(SDValue Op, SelectionDAG &DAG) {
+ SDValue ResNode;
+ SDValue HiPart;
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+ bool IsPIC = getTargetMachine().getRelocationModel() == Reloc::PIC_;
+ unsigned char OpFlag = IsPIC ? MBlazeII::MO_GOT : MBlazeII::MO_ABS_HILO;
+
+ EVT PtrVT = Op.getValueType();
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
+
+ SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT, OpFlag);
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, JTI);
+ //return JTI;
+}
+
+SDValue MBlazeTargetLowering::
+LowerConstantPool(SDValue Op, SelectionDAG &DAG) {
+ SDValue ResNode;
+ EVT PtrVT = Op.getValueType();
+ ConstantPoolSDNode *N = cast<ConstantPoolSDNode>(Op);
+ Constant *C = N->getConstVal();
+ SDValue Zero = DAG.getConstant(0, PtrVT);
+ // FIXME there isn't actually debug info here
+ DebugLoc dl = Op.getDebugLoc();
+
+ SDValue CP = DAG.getTargetConstantPool(C, MVT::i32, N->getAlignment(),
+ N->getOffset(), MBlazeII::MO_ABS_HILO);
+ return DAG.getNode(MBlazeISD::Wrap, dl, MVT::i32, CP);
+}
+
+//===----------------------------------------------------------------------===//
+// Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeGenCallingConv.inc"
+
+//===----------------------------------------------------------------------===//
+// Call Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerCall - functions arguments are copied from virtual regs to
+/// (physical regs)/(stack frame), CALLSEQ_START and CALLSEQ_END are emitted.
+/// TODO: isVarArg, isTailCall.
+SDValue MBlazeTargetLowering::
+LowerCall(SDValue Chain, SDValue Callee, CallingConv::ID CallConv,
+ bool isVarArg, bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(), ArgLocs,
+ *DAG.getContext());
+ CCInfo.AnalyzeCallOperands(Outs, CC_MBlaze);
+
+ // Get a count of how many bytes are to be pushed on the stack.
+ unsigned NumBytes = CCInfo.getNextStackOffset();
+ Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(NumBytes, true));
+
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // First/LastArgStackLoc contains the first/last
+ // "at stack" argument location.
+ int LastArgStackLoc = 0;
+ unsigned FirstStackArgLoc = 4;
+
+ // Walk the register/memloc assignments, inserting copies/loads.
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ EVT RegVT = VA.getLocVT();
+ SDValue Arg = Outs[i].Val;
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default: llvm_unreachable("Unknown loc info!");
+ case CCValAssign::Full: break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, dl, RegVT, Arg);
+ break;
+ case CCValAssign::BCvt:
+ Arg = DAG.getNode(ISD::BIT_CONVERT, dl, RegVT, Arg);
+ break;
+ }
+
+ // Arguments that can be passed on register must be kept at
+ // RegsToPass vector
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ } else {
+ // Register can't get to this point...
+ assert(VA.isMemLoc());
+
+ // Create the frame index object for this incoming parameter
+ LastArgStackLoc = (FirstStackArgLoc + VA.getLocMemOffset());
+ int FI = MFI->CreateFixedObject(VA.getValVT().getSizeInBits()/8,
+ LastArgStackLoc, true, false);
+
+ SDValue PtrOff = DAG.getFrameIndex(FI,getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ MemOpChains.push_back(DAG.getStore(Chain, dl, Arg, PtrOff, NULL, 0,
+ false, false, 0));
+ }
+ }
+
+ // Transform all store nodes into one single node because all store
+ // nodes are independent of each other.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other,
+ &MemOpChains[0], MemOpChains.size());
+
+ // Build a sequence of copy-to-reg nodes chained together with token
+ // chain and flag operands which copy the outgoing args into registers.
+ // The InFlag in necessary since all emited instructions must be
+ // stuck together.
+ SDValue InFlag;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, dl, RegsToPass[i].first,
+ RegsToPass[i].second, InFlag);
+ InFlag = Chain.getValue(1);
+ }
+
+ // If the callee is a GlobalAddress/ExternalSymbol node (quite common, every
+ // direct call is) turn it into a TargetGlobalAddress/TargetExternalSymbol
+ // node so that legalize doesn't hack it.
+ unsigned char OpFlag = MBlazeII::MO_NO_FLAG;
+ if (GlobalAddressSDNode *G = dyn_cast<GlobalAddressSDNode>(Callee))
+ Callee = DAG.getTargetGlobalAddress(G->getGlobal(),
+ getPointerTy(), 0, OpFlag);
+ else if (ExternalSymbolSDNode *S = dyn_cast<ExternalSymbolSDNode>(Callee))
+ Callee = DAG.getTargetExternalSymbol(S->getSymbol(),
+ getPointerTy(), OpFlag);
+
+ // MBlazeJmpLink = #chain, #target_address, #opt_in_flags...
+ // = Chain, Callee, Reg#1, Reg#2, ...
+ //
+ // Returns a chain & a flag for retval copy to use.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ Ops.push_back(Callee);
+
+ // Add argument registers to the end of the list so that they are
+ // known live into the call.
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+ }
+
+ if (InFlag.getNode())
+ Ops.push_back(InFlag);
+
+ Chain = DAG.getNode(MBlazeISD::JmpLink, dl, NodeTys, &Ops[0], Ops.size());
+ InFlag = Chain.getValue(1);
+
+ // Create the CALLSEQ_END node.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(NumBytes, true),
+ DAG.getIntPtrConstant(0, true), InFlag);
+ if (!Ins.empty())
+ InFlag = Chain.getValue(1);
+
+ // Handle result values, copying them out of physregs into vregs that we
+ // return.
+ return LowerCallResult(Chain, InFlag, CallConv, isVarArg,
+ Ins, dl, DAG, InVals);
+}
+
+/// LowerCallResult - Lower the result values of a call into the
+/// appropriate copies out of appropriate physical registers.
+SDValue MBlazeTargetLowering::
+LowerCallResult(SDValue Chain, SDValue InFlag, CallingConv::ID CallConv,
+ bool isVarArg, const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) {
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeCallResult(Ins, RetCC_MBlaze);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ Chain = DAG.getCopyFromReg(Chain, dl, RVLocs[i].getLocReg(),
+ RVLocs[i].getValVT(), InFlag).getValue(1);
+ InFlag = Chain.getValue(2);
+ InVals.push_back(Chain.getValue(0));
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Formal Arguments Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+/// LowerFormalArguments - transform physical registers into
+/// virtual registers and generate load operations for
+/// arguments places on the stack.
+/// TODO: isVarArg
+SDValue MBlazeTargetLowering::
+LowerFormalArguments(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+ unsigned StackReg = MF.getTarget().getRegisterInfo()->getFrameRegister(MF);
+
+ // Assign locations to all of the incoming arguments.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ ArgLocs, *DAG.getContext());
+
+ CCInfo.AnalyzeFormalArguments(Ins, CC_MBlaze);
+ SDValue StackPtr;
+
+ unsigned FirstStackArgLoc = 4;
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+
+ // Arguments stored on registers
+ if (VA.isRegLoc()) {
+ EVT RegVT = VA.getLocVT();
+ TargetRegisterClass *RC = 0;
+
+ if (RegVT == MVT::i32)
+ RC = MBlaze::CPURegsRegisterClass;
+ else if (RegVT == MVT::f32)
+ RC = MBlaze::FGR32RegisterClass;
+ else
+ llvm_unreachable("RegVT not supported by LowerFormalArguments");
+
+ // Transform the arguments stored on
+ // physical registers into virtual ones
+ unsigned Reg = MF.addLiveIn(VA.getLocReg(), RC);
+ SDValue ArgValue = DAG.getCopyFromReg(Chain, dl, Reg, RegVT);
+
+ // If this is an 8 or 16-bit value, it has been passed promoted
+ // to 32 bits. Insert an assert[sz]ext to capture this, then
+ // truncate to the right size.
+ if (VA.getLocInfo() != CCValAssign::Full) {
+ unsigned Opcode = 0;
+ if (VA.getLocInfo() == CCValAssign::SExt)
+ Opcode = ISD::AssertSext;
+ else if (VA.getLocInfo() == CCValAssign::ZExt)
+ Opcode = ISD::AssertZext;
+ if (Opcode)
+ ArgValue = DAG.getNode(Opcode, dl, RegVT, ArgValue,
+ DAG.getValueType(VA.getValVT()));
+ ArgValue = DAG.getNode(ISD::TRUNCATE, dl, VA.getValVT(), ArgValue);
+ }
+
+ InVals.push_back(ArgValue);
+
+ // To meet ABI, when VARARGS are passed on registers, the registers
+ // must have their values written to the caller stack frame.
+ if (isVarArg) {
+ if (StackPtr.getNode() == 0)
+ StackPtr = DAG.getRegister(StackReg, getPointerTy());
+
+ // The stack pointer offset is relative to the caller stack frame.
+ // Since the real stack size is unknown here, a negative SPOffset
+ // is used so there's a way to adjust these offsets when the stack
+ // size get known (on EliminateFrameIndex). A dummy SPOffset is
+ // used instead of a direct negative address (which is recorded to
+ // be used on emitPrologue) to avoid mis-calc of the first stack
+ // offset on PEI::calculateFrameObjectOffsets.
+ // Arguments are always 32-bit.
+ int FI = MFI->CreateFixedObject(4, 0, true, false);
+ MBlazeFI->recordStoreVarArgsFI(FI, -(FirstStackArgLoc+(i*4)));
+ SDValue PtrOff = DAG.getFrameIndex(FI, getPointerTy());
+
+ // emit ISD::STORE whichs stores the
+ // parameter value to a stack Location
+ InVals.push_back(DAG.getStore(Chain, dl, ArgValue, PtrOff, NULL, 0,
+ false, false, 0));
+ }
+
+ } else { // VA.isRegLoc()
+
+ // sanity check
+ assert(VA.isMemLoc());
+
+ // The stack pointer offset is relative to the caller stack frame.
+ // Since the real stack size is unknown here, a negative SPOffset
+ // is used so there's a way to adjust these offsets when the stack
+ // size get known (on EliminateFrameIndex). A dummy SPOffset is
+ // used instead of a direct negative address (which is recorded to
+ // be used on emitPrologue) to avoid mis-calc of the first stack
+ // offset on PEI::calculateFrameObjectOffsets.
+ // Arguments are always 32-bit.
+ unsigned ArgSize = VA.getLocVT().getSizeInBits()/8;
+ int FI = MFI->CreateFixedObject(ArgSize, 0, true, false);
+ MBlazeFI->recordLoadArgsFI(FI, -(ArgSize+
+ (FirstStackArgLoc + VA.getLocMemOffset())));
+
+ // Create load nodes to retrieve arguments from the stack
+ SDValue FIN = DAG.getFrameIndex(FI, getPointerTy());
+ InVals.push_back(DAG.getLoad(VA.getValVT(), dl, Chain, FIN, NULL, 0,
+ false, false, 0));
+ }
+ }
+
+ return Chain;
+}
+
+//===----------------------------------------------------------------------===//
+// Return Value Calling Convention Implementation
+//===----------------------------------------------------------------------===//
+
+SDValue MBlazeTargetLowering::
+LowerReturn(SDValue Chain, CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ DebugLoc dl, SelectionDAG &DAG) {
+ // CCValAssign - represent the assignment of
+ // the return value to a location
+ SmallVector<CCValAssign, 16> RVLocs;
+
+ // CCState - Info about the registers and stack slot.
+ CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
+ RVLocs, *DAG.getContext());
+
+ // Analize return values.
+ CCInfo.AnalyzeReturn(Outs, RetCC_MBlaze);
+
+ // If this is the first return lowered for this function, add
+ // the regs to the liveout set for the function.
+ if (DAG.getMachineFunction().getRegInfo().liveout_empty()) {
+ for (unsigned i = 0; i != RVLocs.size(); ++i)
+ if (RVLocs[i].isRegLoc())
+ DAG.getMachineFunction().getRegInfo().addLiveOut(RVLocs[i].getLocReg());
+ }
+
+ SDValue Flag;
+
+ // Copy the result values into the output registers.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ assert(VA.isRegLoc() && "Can only return in registers!");
+
+ Chain = DAG.getCopyToReg(Chain, dl, VA.getLocReg(),
+ Outs[i].Val, Flag);
+
+ // guarantee that all emitted copies are
+ // stuck together, avoiding something bad
+ Flag = Chain.getValue(1);
+ }
+
+ // Return on MBlaze is always a "rtsd R15, 8"
+ if (Flag.getNode())
+ return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(MBlaze::R15, MVT::i32), Flag);
+ else // Return Void
+ return DAG.getNode(MBlazeISD::Ret, dl, MVT::Other,
+ Chain, DAG.getRegister(MBlaze::R15, MVT::i32));
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze Inline Assembly Support
+//===----------------------------------------------------------------------===//
+
+/// getConstraintType - Given a constraint letter, return the type of
+/// constraint it is for this target.
+MBlazeTargetLowering::ConstraintType MBlazeTargetLowering::
+getConstraintType(const std::string &Constraint) const
+{
+ // MBlaze specific constrainy
+ //
+ // 'd' : An address register. Equivalent to r.
+ // 'y' : Equivalent to r; retained for
+ // backwards compatibility.
+ // 'f' : Floating Point registers.
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ default : break;
+ case 'd':
+ case 'y':
+ case 'f':
+ return C_RegisterClass;
+ break;
+ }
+ }
+ return TargetLowering::getConstraintType(Constraint);
+}
+
+/// getRegClassForInlineAsmConstraint - Given a constraint letter (e.g. "r"),
+/// return a list of registers that can be used to satisfy the constraint.
+/// This should only be used for C_RegisterClass constraints.
+std::pair<unsigned, const TargetRegisterClass*> MBlazeTargetLowering::
+getRegForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+ if (Constraint.size() == 1) {
+ switch (Constraint[0]) {
+ case 'r':
+ return std::make_pair(0U, MBlaze::CPURegsRegisterClass);
+ case 'f':
+ if (VT == MVT::f32)
+ return std::make_pair(0U, MBlaze::FGR32RegisterClass);
+ }
+ }
+ return TargetLowering::getRegForInlineAsmConstraint(Constraint, VT);
+}
+
+/// Given a register class constraint, like 'r', if this corresponds directly
+/// to an LLVM register class, return a register of 0 and the register class
+/// pointer.
+std::vector<unsigned> MBlazeTargetLowering::
+getRegClassForInlineAsmConstraint(const std::string &Constraint, EVT VT) const {
+ if (Constraint.size() != 1)
+ return std::vector<unsigned>();
+
+ switch (Constraint[0]) {
+ default : break;
+ case 'r':
+ // GCC MBlaze Constraint Letters
+ case 'd':
+ case 'y':
+ return make_vector<unsigned>(
+ MBlaze::R3, MBlaze::R4, MBlaze::R5, MBlaze::R6,
+ MBlaze::R7, MBlaze::R9, MBlaze::R10, MBlaze::R11,
+ MBlaze::R12, MBlaze::R19, MBlaze::R20, MBlaze::R21,
+ MBlaze::R22, MBlaze::R23, MBlaze::R24, MBlaze::R25,
+ MBlaze::R26, MBlaze::R27, MBlaze::R28, MBlaze::R29,
+ MBlaze::R30, MBlaze::R31, 0);
+
+ case 'f':
+ return make_vector<unsigned>(
+ MBlaze::F3, MBlaze::F4, MBlaze::F5, MBlaze::F6,
+ MBlaze::F7, MBlaze::F9, MBlaze::F10, MBlaze::F11,
+ MBlaze::F12, MBlaze::F19, MBlaze::F20, MBlaze::F21,
+ MBlaze::F22, MBlaze::F23, MBlaze::F24, MBlaze::F25,
+ MBlaze::F26, MBlaze::F27, MBlaze::F28, MBlaze::F29,
+ MBlaze::F30, MBlaze::F31, 0);
+ }
+ return std::vector<unsigned>();
+}
+
+bool MBlazeTargetLowering::
+isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const {
+ // The MBlaze target isn't yet aware of offsets.
+ return false;
+}
+
+bool MBlazeTargetLowering::isFPImmLegal(const APFloat &Imm, EVT VT) const {
+ return VT != MVT::f32;
+}
diff --git a/lib/Target/MBlaze/MBlazeISelLowering.h b/lib/Target/MBlaze/MBlazeISelLowering.h
new file mode 100644
index 0000000..75d2552
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeISelLowering.h
@@ -0,0 +1,146 @@
+//===-- MBlazeISelLowering.h - MBlaze DAG Lowering Interface ----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines the interfaces that MBlaze uses to lower LLVM code into a
+// selection DAG.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBlazeISELLOWERING_H
+#define MBlazeISELLOWERING_H
+
+#include "llvm/CodeGen/SelectionDAG.h"
+#include "llvm/Target/TargetLowering.h"
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+
+namespace llvm {
+ namespace MBlazeCC {
+ enum CC {
+ FIRST = 0,
+ EQ,
+ NE,
+ GT,
+ LT,
+ GE,
+ LE
+ };
+ }
+
+ namespace MBlazeISD {
+ enum NodeType {
+ // Start the numbering from where ISD NodeType finishes.
+ FIRST_NUMBER = ISD::BUILTIN_OP_END,
+
+ // Jump and link (call)
+ JmpLink,
+
+ // Handle gp_rel (small data/bss sections) relocation.
+ GPRel,
+
+ // Select CC Pseudo Instruction
+ Select_CC,
+
+ // Wrap up multiple types of instructions
+ Wrap,
+
+ // Integer Compare
+ ICmp,
+
+ // Return
+ Ret
+ };
+ }
+
+ //===--------------------------------------------------------------------===//
+ // TargetLowering Implementation
+ //===--------------------------------------------------------------------===//
+
+ class MBlazeTargetLowering : public TargetLowering {
+ public:
+
+ explicit MBlazeTargetLowering(MBlazeTargetMachine &TM);
+
+ /// LowerOperation - Provide custom lowering hooks for some operations.
+ virtual SDValue LowerOperation(SDValue Op, SelectionDAG &DAG);
+
+ /// getTargetNodeName - This method returns the name of a target specific
+ // DAG node.
+ virtual const char *getTargetNodeName(unsigned Opcode) const;
+
+ /// getSetCCResultType - get the ISD::SETCC result ValueType
+ MVT::SimpleValueType getSetCCResultType(EVT VT) const;
+
+ virtual unsigned getFunctionAlignment(const Function *F) const;
+ private:
+ // Subtarget Info
+ const MBlazeSubtarget *Subtarget;
+
+
+ // Lower Operand helpers
+ SDValue LowerCallResult(SDValue Chain, SDValue InFlag,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals);
+
+ // Lower Operand specifics
+ SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
+
+ virtual SDValue
+ LowerFormalArguments(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals);
+
+ virtual SDValue
+ LowerCall(SDValue Chain, SDValue Callee,
+ CallingConv::ID CallConv, bool isVarArg,
+ bool &isTailCall,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ const SmallVectorImpl<ISD::InputArg> &Ins,
+ DebugLoc dl, SelectionDAG &DAG,
+ SmallVectorImpl<SDValue> &InVals);
+
+ virtual SDValue
+ LowerReturn(SDValue Chain,
+ CallingConv::ID CallConv, bool isVarArg,
+ const SmallVectorImpl<ISD::OutputArg> &Outs,
+ DebugLoc dl, SelectionDAG &DAG);
+
+ virtual MachineBasicBlock *EmitInstrWithCustomInserter(MachineInstr *MI,
+ MachineBasicBlock *MBB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+
+ // Inline asm support
+ ConstraintType getConstraintType(const std::string &Constraint) const;
+
+ std::pair<unsigned, const TargetRegisterClass*>
+ getRegForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ std::vector<unsigned>
+ getRegClassForInlineAsmConstraint(const std::string &Constraint,
+ EVT VT) const;
+
+ virtual bool isOffsetFoldingLegal(const GlobalAddressSDNode *GA) const;
+
+ /// isFPImmLegal - Returns true if the target can instruction select the
+ /// specified FP immediate natively. If false, the legalizer will
+ /// materialize the FP immediate as a load from a constant pool.
+ virtual bool isFPImmLegal(const APFloat &Imm, EVT VT) const;
+ };
+}
+
+#endif // MBlazeISELLOWERING_H
diff --git a/lib/Target/MBlaze/MBlazeInstrFPU.td b/lib/Target/MBlaze/MBlazeInstrFPU.td
new file mode 100644
index 0000000..a48a8c9
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrFPU.td
@@ -0,0 +1,223 @@
+//===- MBlazeInstrFPU.td - MBlaze FPU Instruction defs ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze profiles and nodes
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs FGR32:$dst), (ins memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set FGR32:$dst, (OpNode xaddr:$addr))], IILoad>;
+
+class LoadFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TAI<op, (outs FGR32:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set FGR32:$dst, (OpNode iaddr:$addr))], IILoad>;
+
+class StoreFM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs), (ins FGR32:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode FGR32:$dst, xaddr:$addr)], IIStore>;
+
+class StoreFMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TAI<op, (outs), (ins FGR32:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode FGR32:$dst, iaddr:$addr)], IIStore>;
+
+class ArithF<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
+
+class CmpFN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs CPURegs:$dst), (ins FGR32:$b, FGR32:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithFR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set FGR32:$dst, (OpNode FGR32:$b, FGR32:$c))], itin>;
+
+class ArithF2<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TF<op, flags, (outs FGR32:$dst), (ins FGR32:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+class ArithIF<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TF<op, flags, (outs FGR32:$dst), (ins CPURegs:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+class ArithFI<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TF<op, flags, (outs CPURegs:$dst), (ins FGR32:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [], itin>;
+
+class LogicF<bits<6> op, string instr_asm> :
+ TAI<op, (outs FGR32:$dst), (ins FGR32:$b, FGR32:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [],
+ IIAlu>;
+
+class LogicFI<bits<6> op, string instr_asm> :
+ TAI<op, (outs FGR32:$dst), (ins FGR32:$b, fimm:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [],
+ IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FPU Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+let Predicates=[HasFPU] in {
+ def FOR : LogicF<0x28, "or ">;
+ def FORI : LogicFI<0x28, "ori ">;
+ def FADD : ArithF<0x16, 0x000, "fadd ", fadd, IIAlu>;
+ def FRSUB : ArithFR<0x16, 0x080, "frsub ", fsub, IIAlu>;
+ def FMUL : ArithF<0x16, 0x100, "fmul ", fmul, IIAlu>;
+ def FDIV : ArithF<0x16, 0x180, "fdiv ", fdiv, IIAlu>;
+
+ def LWF : LoadFM<0x32, "lw ", load>;
+ def LWFI : LoadFMI<0x32, "lwi ", load>;
+
+ def SWF : StoreFM<0x32, "sw ", store>;
+ def SWFI : StoreFMI<0x32, "swi ", store>;
+}
+
+let Predicates=[HasFPU,HasSqrt] in {
+ def FLT : ArithIF<0x16, 0x280, "flt ", IIAlu>;
+ def FINT : ArithFI<0x16, 0x300, "fint ", IIAlu>;
+ def FSQRT : ArithF2<0x16, 0x300, "fsqrt ", IIAlu>;
+}
+
+let isAsCheapAsAMove = 1 in {
+ def FCMP_UN : CmpFN<0x16, 0x200, "fcmp.un", IIAlu>;
+ def FCMP_LT : CmpFN<0x16, 0x210, "fcmp.lt", IIAlu>;
+ def FCMP_EQ : CmpFN<0x16, 0x220, "fcmp.eq", IIAlu>;
+ def FCMP_LE : CmpFN<0x16, 0x230, "fcmp.le", IIAlu>;
+ def FCMP_GT : CmpFN<0x16, 0x240, "fcmp.gt", IIAlu>;
+ def FCMP_NE : CmpFN<0x16, 0x250, "fcmp.ne", IIAlu>;
+ def FCMP_GE : CmpFN<0x16, 0x260, "fcmp.ge", IIAlu>;
+}
+
+
+let usesCustomInserter = 1 in {
+ def Select_FCC : MBlazePseudo<(outs FGR32:$dst),
+ (ins FGR32:$T, FGR32:$F, CPURegs:$CMP, i32imm:$CC),
+ "; SELECT_FCC PSEUDO!",
+ []>;
+}
+
+// Floating point conversions
+let Predicates=[HasFPU] in {
+ def : Pat<(sint_to_fp CPURegs:$V), (FLT CPURegs:$V)>;
+ def : Pat<(fp_to_sint FGR32:$V), (FINT FGR32:$V)>;
+ def : Pat<(fsqrt FGR32:$V), (FSQRT FGR32:$V)>;
+}
+
+// SET_CC operations
+let Predicates=[HasFPU] in {
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETEQ),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETNE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_EQ FGR32:$L, FGR32:$R), 1)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETOEQ),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_EQ FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (XOR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETONE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETGT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETLT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETGE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETLE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_GT FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_LT FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETOGE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_GE FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETOLE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_LE FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETUEQ),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_EQ FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETUNE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_NE FGR32:$L, FGR32:$R), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_GT FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETULT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_LT FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETUGE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_GE FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETULE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (OR (FCMP_UN FGR32:$L, FGR32:$R),
+ (FCMP_LE FGR32:$L, FGR32:$R)), 2)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETO),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_UN FGR32:$L, FGR32:$R), 1)>;
+ def : Pat<(setcc FGR32:$L, FGR32:$R, SETUO),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (FCMP_UN FGR32:$L, FGR32:$R), 2)>;
+}
+
+// SELECT operations
+def : Pat<(select CPURegs:$C, FGR32:$T, FGR32:$F),
+ (Select_FCC FGR32:$T, FGR32:$F, CPURegs:$C, 2)>;
+
+//===----------------------------------------------------------------------===//
+// Patterns for Floating Point Instructions
+//===----------------------------------------------------------------------===//
+def : Pat<(f32 fpimm:$imm), (FORI F0, fpimm:$imm)>;
diff --git a/lib/Target/MBlaze/MBlazeInstrFSL.td b/lib/Target/MBlaze/MBlazeInstrFSL.td
new file mode 100644
index 0000000..b59999e
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrFSL.td
@@ -0,0 +1,153 @@
+//===- MBlazeInstrFSL.td - MBlaze FSL Instruction defs ----------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// FSL Instruction Formats
+//===----------------------------------------------------------------------===//
+class FSLGetD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b))], IIAlu>;
+
+class FSLGet<bits<6> op, string instr_asm, Intrinsic OpNode> :
+ TAI<op, (outs CPURegs:$dst), (ins fslimm:$b),
+ !strconcat(instr_asm, " $dst, $b"),
+ [(set CPURegs:$dst, (OpNode immZExt4:$b))], IIAlu>;
+
+class FSLPutD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
+ TA<op, flags, (outs), (ins CPURegs:$v, CPURegs:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode CPURegs:$v, CPURegs:$b)], IIAlu>;
+
+class FSLPut<bits<6> op, string instr_asm, Intrinsic OpNode> :
+ TAI<op, (outs), (ins CPURegs:$v, fslimm:$b),
+ !strconcat(instr_asm, " $v, $b"),
+ [(OpNode CPURegs:$v, immZExt4:$b)], IIAlu>;
+
+class FSLPutTD<bits<6> op, bits<11> flags, string instr_asm, Intrinsic OpNode> :
+ TA<op, flags, (outs), (ins CPURegs:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode CPURegs:$b)], IIAlu>;
+
+class FSLPutT<bits<6> op, string instr_asm, Intrinsic OpNode> :
+ TAI<op, (outs), (ins fslimm:$b),
+ !strconcat(instr_asm, " $b"),
+ [(OpNode immZExt4:$b)], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// FSL Get Instructions
+//===----------------------------------------------------------------------===//
+def GET : FSLGet<0x1B, "get ", int_mblaze_fsl_get>;
+def AGET : FSLGet<0x1B, "aget ", int_mblaze_fsl_aget>;
+def CGET : FSLGet<0x1B, "cget ", int_mblaze_fsl_cget>;
+def CAGET : FSLGet<0x1B, "caget ", int_mblaze_fsl_caget>;
+def EGET : FSLGet<0x1B, "eget ", int_mblaze_fsl_eget>;
+def EAGET : FSLGet<0x1B, "eaget ", int_mblaze_fsl_eaget>;
+def ECGET : FSLGet<0x1B, "ecget ", int_mblaze_fsl_ecget>;
+def ECAGET : FSLGet<0x1B, "ecaget ", int_mblaze_fsl_ecaget>;
+def NGET : FSLGet<0x1B, "nget ", int_mblaze_fsl_nget>;
+def NAGET : FSLGet<0x1B, "naget ", int_mblaze_fsl_naget>;
+def NCGET : FSLGet<0x1B, "ncget ", int_mblaze_fsl_ncget>;
+def NCAGET : FSLGet<0x1B, "ncaget ", int_mblaze_fsl_ncaget>;
+def NEGET : FSLGet<0x1B, "neget ", int_mblaze_fsl_neget>;
+def NEAGET : FSLGet<0x1B, "neaget ", int_mblaze_fsl_neaget>;
+def NECGET : FSLGet<0x1B, "necget ", int_mblaze_fsl_necget>;
+def NECAGET : FSLGet<0x1B, "necaget ", int_mblaze_fsl_necaget>;
+def TGET : FSLGet<0x1B, "tget ", int_mblaze_fsl_tget>;
+def TAGET : FSLGet<0x1B, "taget ", int_mblaze_fsl_taget>;
+def TCGET : FSLGet<0x1B, "tcget ", int_mblaze_fsl_tcget>;
+def TCAGET : FSLGet<0x1B, "tcaget ", int_mblaze_fsl_tcaget>;
+def TEGET : FSLGet<0x1B, "teget ", int_mblaze_fsl_teget>;
+def TEAGET : FSLGet<0x1B, "teaget ", int_mblaze_fsl_teaget>;
+def TECGET : FSLGet<0x1B, "tecget ", int_mblaze_fsl_tecget>;
+def TECAGET : FSLGet<0x1B, "tecaget ", int_mblaze_fsl_tecaget>;
+def TNGET : FSLGet<0x1B, "tnget ", int_mblaze_fsl_tnget>;
+def TNAGET : FSLGet<0x1B, "tnaget ", int_mblaze_fsl_tnaget>;
+def TNCGET : FSLGet<0x1B, "tncget ", int_mblaze_fsl_tncget>;
+def TNCAGET : FSLGet<0x1B, "tncaget ", int_mblaze_fsl_tncaget>;
+def TNEGET : FSLGet<0x1B, "tneget ", int_mblaze_fsl_tneget>;
+def TNEAGET : FSLGet<0x1B, "tneaget ", int_mblaze_fsl_tneaget>;
+def TNECGET : FSLGet<0x1B, "tnecget ", int_mblaze_fsl_tnecget>;
+def TNECAGET : FSLGet<0x1B, "tnecaget ", int_mblaze_fsl_tnecaget>;
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Get Instructions
+//===----------------------------------------------------------------------===//
+def GETD : FSLGetD<0x1B, 0x00, "getd ", int_mblaze_fsl_get>;
+def AGETD : FSLGetD<0x1B, 0x00, "agetd ", int_mblaze_fsl_aget>;
+def CGETD : FSLGetD<0x1B, 0x00, "cgetd ", int_mblaze_fsl_cget>;
+def CAGETD : FSLGetD<0x1B, 0x00, "cagetd ", int_mblaze_fsl_caget>;
+def EGETD : FSLGetD<0x1B, 0x00, "egetd ", int_mblaze_fsl_eget>;
+def EAGETD : FSLGetD<0x1B, 0x00, "eagetd ", int_mblaze_fsl_eaget>;
+def ECGETD : FSLGetD<0x1B, 0x00, "ecgetd ", int_mblaze_fsl_ecget>;
+def ECAGETD : FSLGetD<0x1B, 0x00, "ecagetd ", int_mblaze_fsl_ecaget>;
+def NGETD : FSLGetD<0x1B, 0x00, "ngetd ", int_mblaze_fsl_nget>;
+def NAGETD : FSLGetD<0x1B, 0x00, "nagetd ", int_mblaze_fsl_naget>;
+def NCGETD : FSLGetD<0x1B, 0x00, "ncgetd ", int_mblaze_fsl_ncget>;
+def NCAGETD : FSLGetD<0x1B, 0x00, "ncagetd ", int_mblaze_fsl_ncaget>;
+def NEGETD : FSLGetD<0x1B, 0x00, "negetd ", int_mblaze_fsl_neget>;
+def NEAGETD : FSLGetD<0x1B, 0x00, "neagetd ", int_mblaze_fsl_neaget>;
+def NECGETD : FSLGetD<0x1B, 0x00, "necgetd ", int_mblaze_fsl_necget>;
+def NECAGETD : FSLGetD<0x1B, 0x00, "necagetd ", int_mblaze_fsl_necaget>;
+def TGETD : FSLGetD<0x1B, 0x00, "tgetd ", int_mblaze_fsl_tget>;
+def TAGETD : FSLGetD<0x1B, 0x00, "tagetd ", int_mblaze_fsl_taget>;
+def TCGETD : FSLGetD<0x1B, 0x00, "tcgetd ", int_mblaze_fsl_tcget>;
+def TCAGETD : FSLGetD<0x1B, 0x00, "tcagetd ", int_mblaze_fsl_tcaget>;
+def TEGETD : FSLGetD<0x1B, 0x00, "tegetd ", int_mblaze_fsl_teget>;
+def TEAGETD : FSLGetD<0x1B, 0x00, "teagetd ", int_mblaze_fsl_teaget>;
+def TECGETD : FSLGetD<0x1B, 0x00, "tecgetd ", int_mblaze_fsl_tecget>;
+def TECAGETD : FSLGetD<0x1B, 0x00, "tecagetd ", int_mblaze_fsl_tecaget>;
+def TNGETD : FSLGetD<0x1B, 0x00, "tngetd ", int_mblaze_fsl_tnget>;
+def TNAGETD : FSLGetD<0x1B, 0x00, "tnagetd ", int_mblaze_fsl_tnaget>;
+def TNCGETD : FSLGetD<0x1B, 0x00, "tncgetd ", int_mblaze_fsl_tncget>;
+def TNCAGETD : FSLGetD<0x1B, 0x00, "tncagetd ", int_mblaze_fsl_tncaget>;
+def TNEGETD : FSLGetD<0x1B, 0x00, "tnegetd ", int_mblaze_fsl_tneget>;
+def TNEAGETD : FSLGetD<0x1B, 0x00, "tneagetd ", int_mblaze_fsl_tneaget>;
+def TNECGETD : FSLGetD<0x1B, 0x00, "tnecgetd ", int_mblaze_fsl_tnecget>;
+def TNECAGETD : FSLGetD<0x1B, 0x00, "tnecagetd", int_mblaze_fsl_tnecaget>;
+
+//===----------------------------------------------------------------------===//
+// FSL Put Instructions
+//===----------------------------------------------------------------------===//
+def PUT : FSLPut<0x1B, "put ", int_mblaze_fsl_put>;
+def APUT : FSLPut<0x1B, "aput ", int_mblaze_fsl_aput>;
+def CPUT : FSLPut<0x1B, "cput ", int_mblaze_fsl_cput>;
+def CAPUT : FSLPut<0x1B, "caput ", int_mblaze_fsl_caput>;
+def NPUT : FSLPut<0x1B, "nput ", int_mblaze_fsl_nput>;
+def NAPUT : FSLPut<0x1B, "naput ", int_mblaze_fsl_naput>;
+def NCPUT : FSLPut<0x1B, "ncput ", int_mblaze_fsl_ncput>;
+def NCAPUT : FSLPut<0x1B, "ncaput ", int_mblaze_fsl_ncaput>;
+def TPUT : FSLPutT<0x1B, "tput ", int_mblaze_fsl_tput>;
+def TAPUT : FSLPutT<0x1B, "taput ", int_mblaze_fsl_taput>;
+def TCPUT : FSLPutT<0x1B, "tcput ", int_mblaze_fsl_tcput>;
+def TCAPUT : FSLPutT<0x1B, "tcaput ", int_mblaze_fsl_tcaput>;
+def TNPUT : FSLPutT<0x1B, "tnput ", int_mblaze_fsl_tnput>;
+def TNAPUT : FSLPutT<0x1B, "tnaput ", int_mblaze_fsl_tnaput>;
+def TNCPUT : FSLPutT<0x1B, "tncput ", int_mblaze_fsl_tncput>;
+def TNCAPUT : FSLPutT<0x1B, "tncaput ", int_mblaze_fsl_tncaput>;
+
+//===----------------------------------------------------------------------===//
+// FSL Dynamic Put Instructions
+//===----------------------------------------------------------------------===//
+def PUTD : FSLPutD<0x1B, 0x00, "putd ", int_mblaze_fsl_put>;
+def APUTD : FSLPutD<0x1B, 0x00, "aputd ", int_mblaze_fsl_aput>;
+def CPUTD : FSLPutD<0x1B, 0x00, "cputd ", int_mblaze_fsl_cput>;
+def CAPUTD : FSLPutD<0x1B, 0x00, "caputd ", int_mblaze_fsl_caput>;
+def NPUTD : FSLPutD<0x1B, 0x00, "nputd ", int_mblaze_fsl_nput>;
+def NAPUTD : FSLPutD<0x1B, 0x00, "naputd ", int_mblaze_fsl_naput>;
+def NCPUTD : FSLPutD<0x1B, 0x00, "ncputd ", int_mblaze_fsl_ncput>;
+def NCAPUTD : FSLPutD<0x1B, 0x00, "ncaputd ", int_mblaze_fsl_ncaput>;
+def TPUTD : FSLPutTD<0x1B, 0x00, "tputd ", int_mblaze_fsl_tput>;
+def TAPUTD : FSLPutTD<0x1B, 0x00, "taputd ", int_mblaze_fsl_taput>;
+def TCPUTD : FSLPutTD<0x1B, 0x00, "tcputd ", int_mblaze_fsl_tcput>;
+def TCAPUTD : FSLPutTD<0x1B, 0x00, "tcaputd ", int_mblaze_fsl_tcaput>;
+def TNPUTD : FSLPutTD<0x1B, 0x00, "tnputd ", int_mblaze_fsl_tnput>;
+def TNAPUTD : FSLPutTD<0x1B, 0x00, "tnaputd ", int_mblaze_fsl_tnaput>;
+def TNCPUTD : FSLPutTD<0x1B, 0x00, "tncputd ", int_mblaze_fsl_tncput>;
+def TNCAPUTD : FSLPutTD<0x1B, 0x00, "tncaputd ", int_mblaze_fsl_tncaput>;
diff --git a/lib/Target/MBlaze/MBlazeInstrFormats.td b/lib/Target/MBlaze/MBlazeInstrFormats.td
new file mode 100644
index 0000000..7d65543
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrFormats.td
@@ -0,0 +1,246 @@
+//===- MBlazeInstrFormats.td - MB Instruction defs --------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Describe MBlaze instructions format
+//
+// CPU INSTRUCTION FORMATS
+//
+// opcode - operation code.
+// rd - dst reg.
+// ra - first src. reg.
+// rb - second src. reg.
+// imm16 - 16-bit immediate value.
+//
+//===----------------------------------------------------------------------===//
+
+// Generic MBlaze Format
+class MBlazeInst<dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin> : Instruction
+{
+ field bits<32> Inst;
+
+ let Namespace = "MBlaze";
+
+ bits<6> opcode;
+
+ // Top 6 bits are the 'opcode' field
+ let Inst{0-5} = opcode;
+
+ dag OutOperandList = outs;
+ dag InOperandList = ins;
+
+ let AsmString = asmstr;
+ let Pattern = pattern;
+ let Itinerary = itin;
+}
+
+//===----------------------------------------------------------------------===//
+// Pseudo instruction class
+//===----------------------------------------------------------------------===//
+class MBlazePseudo<dag outs, dag ins, string asmstr, list<dag> pattern>:
+ MBlazeInst<outs, ins, asmstr, pattern, IIPseudo>;
+
+//===----------------------------------------------------------------------===//
+// Type A instruction class in MBlaze : <|opcode|rd|ra|rb|flags|>
+//===----------------------------------------------------------------------===//
+
+class TA<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<5> rb;
+
+ let opcode = op;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = rb;
+ let Inst{21-31} = flags;
+}
+
+class TAI<bits<6> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = imm16;
+}
+
+class TIMM<bits<6> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-15} = 0;
+ let Inst{16-31} = imm16;
+}
+
+class TADDR<bits<6> op, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<26> addr;
+
+ let opcode = op;
+
+ let Inst{6-31} = addr;
+}
+
+//===----------------------------------------------------------------------===//
+// Type B instruction class in MBlaze : <|opcode|rd|ra|immediate|>
+//===----------------------------------------------------------------------===//
+
+class TB<bits<6> op, dag outs, dag ins, string asmstr, list<dag> pattern,
+ InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = imm16;
+}
+
+//===----------------------------------------------------------------------===//
+// Float instruction class in MBlaze : <|opcode|rd|ra|flags|>
+//===----------------------------------------------------------------------===//
+
+class TF<bits<6> op, bits<11> flags, dag outs, dag ins, string asmstr,
+ list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> rd;
+ bits<5> ra;
+
+ let opcode = op;
+
+ let Inst{6-10} = rd;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = 0;
+ let Inst{21-31} = flags;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch instruction class in MBlaze : <|opcode|rd|br|ra|flags|>
+//===----------------------------------------------------------------------===//
+
+class TBR<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+
+ let opcode = op;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = br;
+ let Inst{16-20} = ra;
+ let Inst{21-31} = flags;
+}
+
+class TBRC<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+ bits<5> rb;
+
+ let opcode = op;
+
+ let Inst{6-10} = br;
+ let Inst{11-15} = ra;
+ let Inst{16-20} = rb;
+ let Inst{21-31} = flags;
+}
+
+class TBRL<bits<6> op, bits<5> br, bits<11> flags, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+
+ let opcode = op;
+
+ let Inst{6-10} = 0xF;
+ let Inst{11-15} = br;
+ let Inst{16-20} = ra;
+ let Inst{21-31} = flags;
+}
+
+class TBRI<bits<6> op, bits<5> br, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = 0;
+ let Inst{11-15} = br;
+ let Inst{16-31} = imm16;
+}
+
+class TBRLI<bits<6> op, bits<5> br, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = 0xF;
+ let Inst{11-15} = br;
+ let Inst{16-31} = imm16;
+}
+
+class TBRCI<bits<6> op, bits<5> br, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = br;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = imm16;
+}
+
+class TRET<bits<6> op, dag outs, dag ins,
+ string asmstr, list<dag> pattern, InstrItinClass itin> :
+ MBlazeInst<outs, ins, asmstr, pattern, itin>
+{
+ bits<5> ra;
+ bits<16> imm16;
+
+ let opcode = op;
+
+ let Inst{6-10} = 0x10;
+ let Inst{11-15} = ra;
+ let Inst{16-31} = imm16;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.cpp b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
new file mode 100644
index 0000000..a7e8eb7
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.cpp
@@ -0,0 +1,222 @@
+//===- MBlazeInstrInfo.cpp - MBlaze Instruction Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeInstrInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/ADT/STLExtras.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "MBlazeGenInstrInfo.inc"
+
+using namespace llvm;
+
+MBlazeInstrInfo::MBlazeInstrInfo(MBlazeTargetMachine &tm)
+ : TargetInstrInfoImpl(MBlazeInsts, array_lengthof(MBlazeInsts)),
+ TM(tm), RI(*TM.getSubtargetImpl(), *this) {}
+
+static bool isZeroImm(const MachineOperand &op) {
+ return op.isImm() && op.getImm() == 0;
+}
+
+/// Return true if the instruction is a register to register move and
+/// leave the source and dest operands in the passed parameters.
+bool MBlazeInstrInfo::
+isMoveInstr(const MachineInstr &MI, unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const {
+ SrcSubIdx = DstSubIdx = 0; // No sub-registers.
+
+ // add $dst, $src, $zero || addu $dst, $zero, $src
+ // or $dst, $src, $zero || or $dst, $zero, $src
+ if ((MI.getOpcode() == MBlaze::ADD) || (MI.getOpcode() == MBlaze::OR)) {
+ if (MI.getOperand(1).isReg() && MI.getOperand(1).getReg() == MBlaze::R0) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(2).getReg();
+ return true;
+ } else if (MI.getOperand(2).isReg() &&
+ MI.getOperand(2).getReg() == MBlaze::R0) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ }
+
+ // addi $dst, $src, 0
+ // ori $dst, $src, 0
+ if ((MI.getOpcode() == MBlaze::ADDI) || (MI.getOpcode() == MBlaze::ORI)) {
+ if ((MI.getOperand(1).isReg()) && (isZeroImm(MI.getOperand(2)))) {
+ DstReg = MI.getOperand(0).getReg();
+ SrcReg = MI.getOperand(1).getReg();
+ return true;
+ }
+ }
+
+ return false;
+}
+
+/// isLoadFromStackSlot - If the specified machine instruction is a direct
+/// load from a stack slot, return the virtual or physical register number of
+/// the destination along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than loading from the stack slot.
+unsigned MBlazeInstrInfo::
+isLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+ if (MI->getOpcode() == MBlaze::LWI) {
+ if ((MI->getOperand(2).isFI()) && // is a stack slot
+ (MI->getOperand(1).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(1)))) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+
+ return 0;
+}
+
+/// isStoreToStackSlot - If the specified machine instruction is a direct
+/// store to a stack slot, return the virtual or physical register number of
+/// the source reg along with the FrameIndex of the loaded stack slot. If
+/// not, return 0. This predicate must return 0 if the instruction has
+/// any side effects other than storing to the stack slot.
+unsigned MBlazeInstrInfo::
+isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const {
+ if (MI->getOpcode() == MBlaze::SWI) {
+ if ((MI->getOperand(2).isFI()) && // is a stack slot
+ (MI->getOperand(1).isImm()) && // the imm is zero
+ (isZeroImm(MI->getOperand(1)))) {
+ FrameIndex = MI->getOperand(2).getIndex();
+ return MI->getOperand(0).getReg();
+ }
+ }
+ return 0;
+}
+
+/// insertNoop - If data hazard condition is found insert the target nop
+/// instruction.
+void MBlazeInstrInfo::
+insertNoop(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI) const {
+ DebugLoc DL = DebugLoc::getUnknownLoc();
+ if (MI != MBB.end()) DL = MI->getDebugLoc();
+ BuildMI(MBB, MI, DL, get(MBlaze::NOP));
+}
+
+bool MBlazeInstrInfo::
+copyRegToReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const {
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ llvm::BuildMI(MBB, I, dl, get(MBlaze::ADD), DestReg)
+ .addReg(SrcReg).addReg(MBlaze::R0);
+ return true;
+}
+
+void MBlazeInstrInfo::
+storeRegToStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned SrcReg, bool isKill, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ BuildMI(MBB, I, dl, get(MBlaze::SWI)).addReg(SrcReg,getKillRegState(isKill))
+ .addImm(0).addFrameIndex(FI);
+}
+
+void MBlazeInstrInfo::
+loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
+ unsigned DestReg, int FI,
+ const TargetRegisterClass *RC) const {
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+ BuildMI(MBB, I, dl, get(MBlaze::LWI), DestReg)
+ .addImm(0).addFrameIndex(FI);
+}
+
+MachineInstr *MBlazeInstrInfo::
+foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops, int FI) const {
+ if (Ops.size() != 1) return NULL;
+
+ MachineInstr *NewMI = NULL;
+
+ switch (MI->getOpcode()) {
+ case MBlaze::OR:
+ case MBlaze::ADD:
+ if ((MI->getOperand(0).isReg()) &&
+ (MI->getOperand(2).isReg()) &&
+ (MI->getOperand(2).getReg() == MBlaze::R0) &&
+ (MI->getOperand(1).isReg())) {
+ if (Ops[0] == 0) { // COPY -> STORE
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ bool isKill = MI->getOperand(1).isKill();
+ bool isUndef = MI->getOperand(1).isUndef();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::SW))
+ .addReg(SrcReg, getKillRegState(isKill) | getUndefRegState(isUndef))
+ .addImm(0).addFrameIndex(FI);
+ } else { // COPY -> LOAD
+ unsigned DstReg = MI->getOperand(0).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isUndef = MI->getOperand(0).isUndef();
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(MBlaze::LW))
+ .addReg(DstReg, RegState::Define | getDeadRegState(isDead) |
+ getUndefRegState(isUndef))
+ .addImm(0).addFrameIndex(FI);
+ }
+ }
+ break;
+ }
+
+ return NewMI;
+}
+
+//===----------------------------------------------------------------------===//
+// Branch Analysis
+//===----------------------------------------------------------------------===//
+unsigned MBlazeInstrInfo::
+InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const {
+ DebugLoc dl = DebugLoc::getUnknownLoc();
+
+ // Can only insert uncond branches so far.
+ assert(Cond.empty() && !FBB && TBB && "Can only handle uncond branches!");
+ BuildMI(&MBB, dl, get(MBlaze::BRI)).addMBB(TBB);
+ return 1;
+}
+
+/// getGlobalBaseReg - Return a virtual register initialized with the
+/// the global base register value. Output instructions required to
+/// initialize the register in the function entry block, if necessary.
+///
+unsigned MBlazeInstrInfo::getGlobalBaseReg(MachineFunction *MF) const {
+ MBlazeFunctionInfo *MBlazeFI = MF->getInfo<MBlazeFunctionInfo>();
+ unsigned GlobalBaseReg = MBlazeFI->getGlobalBaseReg();
+ if (GlobalBaseReg != 0)
+ return GlobalBaseReg;
+
+ // Insert the set of GlobalBaseReg into the first MBB of the function
+ MachineBasicBlock &FirstMBB = MF->front();
+ MachineBasicBlock::iterator MBBI = FirstMBB.begin();
+ MachineRegisterInfo &RegInfo = MF->getRegInfo();
+ const TargetInstrInfo *TII = MF->getTarget().getInstrInfo();
+
+ GlobalBaseReg = RegInfo.createVirtualRegister(MBlaze::CPURegsRegisterClass);
+ bool Ok = TII->copyRegToReg(FirstMBB, MBBI, GlobalBaseReg, MBlaze::R20,
+ MBlaze::CPURegsRegisterClass,
+ MBlaze::CPURegsRegisterClass);
+ assert(Ok && "Couldn't assign to global base register!");
+ Ok = Ok; // Silence warning when assertions are turned off.
+ RegInfo.addLiveIn(MBlaze::R20);
+
+ MBlazeFI->setGlobalBaseReg(GlobalBaseReg);
+ return GlobalBaseReg;
+}
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.h b/lib/Target/MBlaze/MBlazeInstrInfo.h
new file mode 100644
index 0000000..4f79f1c
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.h
@@ -0,0 +1,242 @@
+//===- MBlazeInstrInfo.h - MBlaze Instruction Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetInstrInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEINSTRUCTIONINFO_H
+#define MBLAZEINSTRUCTIONINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "MBlazeRegisterInfo.h"
+
+namespace llvm {
+
+namespace MBlaze {
+
+ // MBlaze Branch Codes
+ enum FPBranchCode {
+ BRANCH_F,
+ BRANCH_T,
+ BRANCH_FL,
+ BRANCH_TL,
+ BRANCH_INVALID
+ };
+
+ // MBlaze Condition Codes
+ enum CondCode {
+ // To be used with float branch True
+ FCOND_F,
+ FCOND_UN,
+ FCOND_EQ,
+ FCOND_UEQ,
+ FCOND_OLT,
+ FCOND_ULT,
+ FCOND_OLE,
+ FCOND_ULE,
+ FCOND_SF,
+ FCOND_NGLE,
+ FCOND_SEQ,
+ FCOND_NGL,
+ FCOND_LT,
+ FCOND_NGE,
+ FCOND_LE,
+ FCOND_NGT,
+
+ // To be used with float branch False
+ // This conditions have the same mnemonic as the
+ // above ones, but are used with a branch False;
+ FCOND_T,
+ FCOND_OR,
+ FCOND_NEQ,
+ FCOND_OGL,
+ FCOND_UGE,
+ FCOND_OGE,
+ FCOND_UGT,
+ FCOND_OGT,
+ FCOND_ST,
+ FCOND_GLE,
+ FCOND_SNE,
+ FCOND_GL,
+ FCOND_NLT,
+ FCOND_GE,
+ FCOND_NLE,
+ FCOND_GT,
+
+ // Only integer conditions
+ COND_E,
+ COND_GZ,
+ COND_GEZ,
+ COND_LZ,
+ COND_LEZ,
+ COND_NE,
+ COND_INVALID
+ };
+
+ // Turn condition code into conditional branch opcode.
+ unsigned GetCondBranchFromCond(CondCode CC);
+
+ /// GetOppositeBranchCondition - Return the inverse of the specified cond,
+ /// e.g. turning COND_E to COND_NE.
+ CondCode GetOppositeBranchCondition(MBlaze::CondCode CC);
+
+ /// MBlazeCCToString - Map each FP condition code to its string
+ inline static const char *MBlazeFCCToString(MBlaze::CondCode CC)
+ {
+ switch (CC) {
+ default: llvm_unreachable("Unknown condition code");
+ case FCOND_F:
+ case FCOND_T: return "f";
+ case FCOND_UN:
+ case FCOND_OR: return "un";
+ case FCOND_EQ:
+ case FCOND_NEQ: return "eq";
+ case FCOND_UEQ:
+ case FCOND_OGL: return "ueq";
+ case FCOND_OLT:
+ case FCOND_UGE: return "olt";
+ case FCOND_ULT:
+ case FCOND_OGE: return "ult";
+ case FCOND_OLE:
+ case FCOND_UGT: return "ole";
+ case FCOND_ULE:
+ case FCOND_OGT: return "ule";
+ case FCOND_SF:
+ case FCOND_ST: return "sf";
+ case FCOND_NGLE:
+ case FCOND_GLE: return "ngle";
+ case FCOND_SEQ:
+ case FCOND_SNE: return "seq";
+ case FCOND_NGL:
+ case FCOND_GL: return "ngl";
+ case FCOND_LT:
+ case FCOND_NLT: return "lt";
+ case FCOND_NGE:
+ case FCOND_GE: return "ge";
+ case FCOND_LE:
+ case FCOND_NLE: return "nle";
+ case FCOND_NGT:
+ case FCOND_GT: return "gt";
+ }
+ }
+}
+
+/// MBlazeII - This namespace holds all of the target specific flags that
+/// instruction info tracks.
+///
+namespace MBlazeII {
+ /// Target Operand Flag enum.
+ enum TOF {
+ //===------------------------------------------------------------------===//
+ // MBlaze Specific MachineOperand flags.
+ MO_NO_FLAG,
+
+ /// MO_GOT - Represents the offset into the global offset table at which
+ /// the address the relocation entry symbol resides during execution.
+ MO_GOT,
+
+ /// MO_GOT_CALL - Represents the offset into the global offset table at
+ /// which the address of a call site relocation entry symbol resides
+ /// during execution. This is different from the above since this flag
+ /// can only be present in call instructions.
+ MO_GOT_CALL,
+
+ /// MO_GPREL - Represents the offset from the current gp value to be used
+ /// for the relocatable object file being produced.
+ MO_GPREL,
+
+ /// MO_ABS_HILO - Represents the hi or low part of an absolute symbol
+ /// address.
+ MO_ABS_HILO
+
+ };
+}
+
+class MBlazeInstrInfo : public TargetInstrInfoImpl {
+ MBlazeTargetMachine &TM;
+ const MBlazeRegisterInfo RI;
+public:
+ explicit MBlazeInstrInfo(MBlazeTargetMachine &TM);
+
+ /// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
+ /// such, whenever a client has an instance of instruction info, it should
+ /// always be able to get register info as well (through this method).
+ ///
+ virtual const MBlazeRegisterInfo &getRegisterInfo() const { return RI; }
+
+ /// Return true if the instruction is a register to register move and return
+ /// the source and dest operands and their sub-register indices by reference.
+ virtual bool isMoveInstr(const MachineInstr &MI,
+ unsigned &SrcReg, unsigned &DstReg,
+ unsigned &SrcSubIdx, unsigned &DstSubIdx) const;
+
+ /// isLoadFromStackSlot - If the specified machine instruction is a direct
+ /// load from a stack slot, return the virtual or physical register number of
+ /// the destination along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than loading from the stack slot.
+ virtual unsigned isLoadFromStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// isStoreToStackSlot - If the specified machine instruction is a direct
+ /// store to a stack slot, return the virtual or physical register number of
+ /// the source reg along with the FrameIndex of the loaded stack slot. If
+ /// not, return 0. This predicate must return 0 if the instruction has
+ /// any side effects other than storing to the stack slot.
+ virtual unsigned isStoreToStackSlot(const MachineInstr *MI,
+ int &FrameIndex) const;
+
+ /// Branch Analysis
+ virtual unsigned InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
+ MachineBasicBlock *FBB,
+ const SmallVectorImpl<MachineOperand> &Cond) const;
+ virtual bool copyRegToReg(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I,
+ unsigned DestReg, unsigned SrcReg,
+ const TargetRegisterClass *DestRC,
+ const TargetRegisterClass *SrcRC) const;
+ virtual void storeRegToStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned SrcReg, bool isKill, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual void loadRegFromStackSlot(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MBBI,
+ unsigned DestReg, int FrameIndex,
+ const TargetRegisterClass *RC) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ int FrameIndex) const;
+
+ virtual MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
+ MachineInstr* MI,
+ const SmallVectorImpl<unsigned> &Ops,
+ MachineInstr* LoadMI) const {
+ return 0;
+ }
+
+ /// Insert nop instruction when hazard condition is found
+ virtual void insertNoop(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator MI) const;
+
+ /// getGlobalBaseReg - Return a virtual register initialized with the
+ /// the global base register value. Output instructions required to
+ /// initialize the register in the function entry block, if necessary.
+ ///
+ unsigned getGlobalBaseReg(MachineFunction *MF) const;
+};
+
+}
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeInstrInfo.td b/lib/Target/MBlaze/MBlazeInstrInfo.td
new file mode 100644
index 0000000..3c406dd
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeInstrInfo.td
@@ -0,0 +1,672 @@
+//===- MBlazeInstrInfo.td - MBlaze Instruction defs -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Instruction format superclass
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFormats.td"
+
+//===----------------------------------------------------------------------===//
+// MBlaze profiles and nodes
+//===----------------------------------------------------------------------===//
+def SDT_MBlazeRet : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_MBlazeJmpLink : SDTypeProfile<0, 1, [SDTCisVT<0, i32>]>;
+
+// Call
+def MBlazeJmpLink : SDNode<"MBlazeISD::JmpLink",SDT_MBlazeJmpLink,
+ [SDNPHasChain,SDNPOptInFlag,SDNPOutFlag]>;
+
+// Return
+def MBlazeRet : SDNode<"MBlazeISD::Ret", SDT_MBlazeRet,
+ [SDNPHasChain, SDNPOptInFlag]>;
+
+// Hi and Lo nodes are used to handle global addresses. Used on
+// MBlazeISelLowering to lower stuff like GlobalAddress, ExternalSymbol
+// static model.
+def MBWrapper : SDNode<"MBlazeISD::Wrap", SDTIntUnaryOp>;
+def MBlazeGPRel : SDNode<"MBlazeISD::GPRel", SDTIntUnaryOp>;
+
+def SDT_MBCallSeqStart : SDCallSeqStart<[SDTCisVT<0, i32>]>;
+def SDT_MBCallSeqEnd : SDCallSeqEnd<[SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+// These are target-independent nodes, but have target-specific formats.
+def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_MBCallSeqStart,
+ [SDNPHasChain, SDNPOutFlag]>;
+def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_MBCallSeqEnd,
+ [SDNPHasChain, SDNPOptInFlag, SDNPOutFlag]>;
+
+def SDTMBlazeSelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>]>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Instruction Predicate Definitions.
+//===----------------------------------------------------------------------===//
+def HasPipe3 : Predicate<"Subtarget.hasPipe3()">;
+def HasBarrel : Predicate<"Subtarget.hasBarrel()">;
+def NoBarrel : Predicate<"!Subtarget.hasBarrel()">;
+def HasDiv : Predicate<"Subtarget.hasDiv()">;
+def HasMul : Predicate<"Subtarget.hasMul()">;
+def HasFSL : Predicate<"Subtarget.hasFSL()">;
+def HasEFSL : Predicate<"Subtarget.hasEFSL()">;
+def HasMSRSet : Predicate<"Subtarget.hasMSRSet()">;
+def HasException : Predicate<"Subtarget.hasException()">;
+def HasPatCmp : Predicate<"Subtarget.hasPatCmp()">;
+def HasFPU : Predicate<"Subtarget.hasFPU()">;
+def HasESR : Predicate<"Subtarget.hasESR()">;
+def HasPVR : Predicate<"Subtarget.hasPVR()">;
+def HasMul64 : Predicate<"Subtarget.hasMul64()">;
+def HasSqrt : Predicate<"Subtarget.hasSqrt()">;
+def HasMMU : Predicate<"Subtarget.hasMMU()">;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Operand, Complex Patterns and Transformations Definitions.
+//===----------------------------------------------------------------------===//
+
+// Instruction operand types
+def brtarget : Operand<OtherVT>;
+def calltarget : Operand<i32>;
+def simm16 : Operand<i32>;
+def uimm5 : Operand<i32>;
+def fimm : Operand<f32>;
+
+// Unsigned Operand
+def uimm16 : Operand<i32> {
+ let PrintMethod = "printUnsignedImm";
+}
+
+// FSL Operand
+def fslimm : Operand<i32> {
+ let PrintMethod = "printFSLImm";
+}
+
+// Address operand
+def memri : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops simm16, CPURegs);
+}
+
+def memrr : Operand<i32> {
+ let PrintMethod = "printMemOperand";
+ let MIOperandInfo = (ops CPURegs, CPURegs);
+}
+
+// Transformation Function - get the lower 16 bits.
+def LO16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() & 0xFFFF);
+}]>;
+
+// Transformation Function - get the higher 16 bits.
+def HI16 : SDNodeXForm<imm, [{
+ return getI32Imm((unsigned)N->getZExtValue() >> 16);
+}]>;
+
+// Node immediate fits as 16-bit sign extended on target immediate.
+// e.g. addi, andi
+def immSExt16 : PatLeaf<(imm), [{
+ return (N->getZExtValue() >> 16) == 0;
+}]>;
+
+// Node immediate fits as 16-bit zero extended on target immediate.
+// The LO16 param means that only the lower 16 bits of the node
+// immediate are caught.
+// e.g. addiu, sltiu
+def immZExt16 : PatLeaf<(imm), [{
+ return (N->getZExtValue() >> 16) == 0;
+}], LO16>;
+
+// FSL immediate field must fit in 4 bits.
+def immZExt4 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0xf) ;
+}]>;
+
+// shamt field must fit in 5 bits.
+def immZExt5 : PatLeaf<(imm), [{
+ return N->getZExtValue() == ((N->getZExtValue()) & 0x1f) ;
+}]>;
+
+// MBlaze Address Mode! SDNode frameindex could possibily be a match
+// since load and store instructions from stack used it.
+def iaddr : ComplexPattern<i32, 2, "SelectAddrRegImm", [frameindex], []>;
+def xaddr : ComplexPattern<i32, 2, "SelectAddrRegReg", [], []>;
+
+//===----------------------------------------------------------------------===//
+// Pseudo instructions
+//===----------------------------------------------------------------------===//
+
+// As stack alignment is always done with addiu, we need a 16-bit immediate
+let Defs = [R1], Uses = [R1] in {
+def ADJCALLSTACKDOWN : MBlazePseudo<(outs), (ins simm16:$amt),
+ "${:comment} ADJCALLSTACKDOWN $amt",
+ [(callseq_start timm:$amt)]>;
+def ADJCALLSTACKUP : MBlazePseudo<(outs),
+ (ins uimm16:$amt1, simm16:$amt2),
+ "${:comment} ADJCALLSTACKUP $amt1",
+ [(callseq_end timm:$amt1, timm:$amt2)]>;
+}
+
+// Some assembly macros need to avoid pseudoinstructions and assembler
+// automatic reodering, we should reorder ourselves.
+def MACRO : MBlazePseudo<(outs), (ins), ".set macro", []>;
+def REORDER : MBlazePseudo<(outs), (ins), ".set reorder", []>;
+def NOMACRO : MBlazePseudo<(outs), (ins), ".set nomacro", []>;
+def NOREORDER : MBlazePseudo<(outs), (ins), ".set noreorder", []>;
+
+// When handling PIC code the assembler needs .cpload and .cprestore
+// directives. If the real instructions corresponding these directives
+// are used, we have the same behavior, but get also a bunch of warnings
+// from the assembler.
+def CPLOAD : MBlazePseudo<(outs), (ins CPURegs:$reg), ".cpload $reg", []>;
+def CPRESTORE : MBlazePseudo<(outs), (ins uimm16:$l), ".cprestore $l\n", []>;
+
+//===----------------------------------------------------------------------===//
+// Instructions specific format
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+class Arith<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+
+class ArithI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, imm_type:$c))], IIAlu>;
+
+class ArithR<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode,
+ InstrItinClass itin> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], itin>;
+
+class ArithRI<bits<6> op, string instr_asm, SDNode OpNode,
+ Operand Od, PatLeaf imm_type> :
+ TAI<op, (outs CPURegs:$dst), (ins Od:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $c, $b"),
+ [(set CPURegs:$dst, (OpNode imm_type:$b, CPURegs:$c))], IIAlu>;
+
+class ArithN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, Od:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIAlu>;
+
+class ArithRN<bits<6> op, bits<11> flags, string instr_asm,
+ InstrItinClass itin> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$c, CPURegs:$b),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], itin>;
+
+class ArithRNI<bits<6> op, string instr_asm,Operand Od, PatLeaf imm_type> :
+ TAI<op, (outs CPURegs:$dst), (ins Od:$c, CPURegs:$b),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Misc Arithmetic Instructions
+//===----------------------------------------------------------------------===//
+
+class Logic<bits<6> op, bits<11> flags, string instr_asm, SDNode OpNode> :
+ TA<op, flags, (outs CPURegs:$dst), (ins CPURegs:$b, CPURegs:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, CPURegs:$c))], IIAlu>;
+
+class LogicI<bits<6> op, string instr_asm, SDNode OpNode> :
+ TAI<op, (outs CPURegs:$dst), (ins CPURegs:$b, uimm16:$c),
+ !strconcat(instr_asm, " $dst, $b, $c"),
+ [(set CPURegs:$dst, (OpNode CPURegs:$b, immZExt16:$c))],
+ IIAlu>;
+
+class EffectiveAddress<string instr_asm> :
+ TAI<0x08, (outs CPURegs:$dst), (ins memri:$addr),
+ instr_asm, [(set CPURegs:$dst, iaddr:$addr)], IIAlu>;
+
+//===----------------------------------------------------------------------===//
+// Memory Access Instructions
+//===----------------------------------------------------------------------===//
+class LoadM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs CPURegs:$dst), (ins memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set CPURegs:$dst, (OpNode xaddr:$addr))], IILoad>;
+
+class LoadMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TAI<op, (outs CPURegs:$dst), (ins memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(set CPURegs:$dst, (OpNode iaddr:$addr))], IILoad>;
+
+class StoreM<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TA<op, 0x000, (outs), (ins CPURegs:$dst, memrr:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode CPURegs:$dst, xaddr:$addr)], IIStore>;
+
+class StoreMI<bits<6> op, string instr_asm, PatFrag OpNode> :
+ TAI<op, (outs), (ins CPURegs:$dst, memri:$addr),
+ !strconcat(instr_asm, " $dst, $addr"),
+ [(OpNode CPURegs:$dst, iaddr:$addr)], IIStore>;
+
+//===----------------------------------------------------------------------===//
+// Branch Instructions
+//===----------------------------------------------------------------------===//
+class Branch<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TBR<op, br, flags, (outs), (ins CPURegs:$target),
+ !strconcat(instr_asm, " $target"),
+ [(brind CPURegs:$target)], IIBranch>;
+
+class BranchI<bits<6> op, bits<5> brf, string instr_asm> :
+ TBRI<op, brf, (outs), (ins brtarget:$target),
+ !strconcat(instr_asm, " $target"),
+ [(br bb:$target)], IIBranch>;
+
+//===----------------------------------------------------------------------===//
+// Branch and Link Instructions
+//===----------------------------------------------------------------------===//
+class BranchL<bits<6> op, bits<5> br, bits<11> flags, string instr_asm> :
+ TBRL<op, br, flags, (outs), (ins CPURegs:$target),
+ !strconcat(instr_asm, " r15, $target"),
+ [], IIBranch>;
+
+class BranchLI<bits<6> op, bits<5> br, string instr_asm> :
+ TBRLI<op, br, (outs), (ins calltarget:$target),
+ !strconcat(instr_asm, " r15, $target"),
+ [], IIBranch>;
+
+//===----------------------------------------------------------------------===//
+// Conditional Branch Instructions
+//===----------------------------------------------------------------------===//
+class BranchC<bits<6> op, bits<5> br, bits<11> flags, string instr_asm,
+ PatFrag cond_op> :
+ TBRC<op, br, flags, (outs),
+ (ins CPURegs:$a, CPURegs:$b, brtarget:$offset),
+ !strconcat(instr_asm, " $a, $b, $offset"),
+ [], IIBranch>;
+ //(brcond (cond_op CPURegs:$a, CPURegs:$b), bb:$offset)],
+ //IIBranch>;
+
+class BranchCI<bits<6> op, bits<5> br, string instr_asm, PatFrag cond_op> :
+ TBRCI<op, br, (outs), (ins CPURegs:$a, brtarget:$offset),
+ !strconcat(instr_asm, " $a, $offset"),
+ [], IIBranch>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isCommutable = 1, isAsCheapAsAMove = 1 in {
+ def ADD : Arith<0x00, 0x000, "add ", add, IIAlu>;
+ def ADDC : Arith<0x02, 0x000, "addc ", adde, IIAlu>;
+ def ADDK : Arith<0x04, 0x000, "addk ", addc, IIAlu>;
+ def ADDKC : ArithN<0x06, 0x000, "addkc ", IIAlu>;
+ def AND : Logic<0x21, 0x000, "and ", and>;
+ def OR : Logic<0x20, 0x000, "or ", or>;
+ def XOR : Logic<0x22, 0x000, "xor ", xor>;
+}
+
+let isAsCheapAsAMove = 1 in {
+ def ANDN : ArithN<0x23, 0x000, "andn ", IIAlu>;
+ def CMP : ArithN<0x05, 0x001, "cmp ", IIAlu>;
+ def CMPU : ArithN<0x05, 0x003, "cmpu ", IIAlu>;
+ def RSUB : ArithR<0x01, 0x000, "rsub ", sub, IIAlu>;
+ def RSUBC : ArithR<0x03, 0x000, "rsubc ", sube, IIAlu>;
+ def RSUBK : ArithR<0x05, 0x000, "rsubk ", subc, IIAlu>;
+ def RSUBKC : ArithRN<0x07, 0x000, "rsubkc ", IIAlu>;
+}
+
+let isCommutable = 1, Predicates=[HasMul] in {
+ def MUL : Arith<0x10, 0x000, "mul ", mul, IIAlu>;
+}
+
+let isCommutable = 1, Predicates=[HasMul,HasMul64] in {
+ def MULH : Arith<0x10, 0x001, "mulh ", mulhs, IIAlu>;
+ def MULHU : Arith<0x10, 0x003, "mulhu ", mulhu, IIAlu>;
+}
+
+let Predicates=[HasMul,HasMul64] in {
+ def MULHSU : ArithN<0x10, 0x002, "mulhsu ", IIAlu>;
+}
+
+let Predicates=[HasBarrel] in {
+ def BSRL : Arith<0x11, 0x000, "bsrl ", srl, IIAlu>;
+ def BSRA : Arith<0x11, 0x200, "bsra ", sra, IIAlu>;
+ def BSLL : Arith<0x11, 0x400, "bsll ", shl, IIAlu>;
+ def BSRLI : ArithI<0x11, "bsrli ", srl, uimm5, immZExt5>;
+ def BSRAI : ArithI<0x11, "bsrai ", sra, uimm5, immZExt5>;
+ def BSLLI : ArithI<0x11, "bslli ", shl, uimm5, immZExt5>;
+}
+
+let Predicates=[HasDiv] in {
+ def IDIV : Arith<0x12, 0x000, "idiv ", sdiv, IIAlu>;
+ def IDIVU : Arith<0x12, 0x002, "idivu ", udiv, IIAlu>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze immediate mode arithmetic instructions
+//===----------------------------------------------------------------------===//
+
+let isAsCheapAsAMove = 1 in {
+ def ADDI : ArithI<0x08, "addi ", add, simm16, immSExt16>;
+ def ADDIC : ArithNI<0x0A, "addic ", simm16, immSExt16>;
+ def ADDIK : ArithNI<0x0C, "addik ", simm16, immSExt16>;
+ def ADDIKC : ArithI<0x0E, "addikc ", addc, simm16, immSExt16>;
+ def RSUBI : ArithRI<0x09, "rsubi ", sub, simm16, immSExt16>;
+ def RSUBIC : ArithRNI<0x0B, "rsubi ", simm16, immSExt16>;
+ def RSUBIK : ArithRNI<0x0E, "rsubic ", simm16, immSExt16>;
+ def RSUBIKC : ArithRI<0x0F, "rsubikc", subc, simm16, immSExt16>;
+ def ANDNI : ArithNI<0x2B, "andni ", uimm16, immZExt16>;
+ def ANDI : LogicI<0x29, "andi ", and>;
+ def ORI : LogicI<0x28, "ori ", or>;
+ def XORI : LogicI<0x2A, "xori ", xor>;
+}
+
+let Predicates=[HasMul] in {
+ def MULI : ArithI<0x18, "muli ", mul, simm16, immSExt16>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze memory access instructions
+//===----------------------------------------------------------------------===//
+
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
+ def LBU : LoadM<0x30, "lbu ", zextloadi8>;
+ def LHU : LoadM<0x31, "lhu ", zextloadi16>;
+ def LW : LoadM<0x32, "lw ", load>;
+
+ def LBUI : LoadMI<0x30, "lbui ", zextloadi8>;
+ def LHUI : LoadMI<0x31, "lhui ", zextloadi16>;
+ def LWI : LoadMI<0x32, "lwi ", load>;
+}
+
+ def SB : StoreM<0x34, "sb ", truncstorei8>;
+ def SH : StoreM<0x35, "sh ", truncstorei16>;
+ def SW : StoreM<0x36, "sw ", store>;
+
+ def SBI : StoreMI<0x34, "sbi ", truncstorei8>;
+ def SHI : StoreMI<0x35, "shi ", truncstorei16>;
+ def SWI : StoreMI<0x36, "swi ", store>;
+
+//===----------------------------------------------------------------------===//
+// MBlaze branch instructions
+//===----------------------------------------------------------------------===//
+
+let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def BRI : BranchI<0x2E, 0x00, "bri ">;
+ def BRAI : BranchI<0x2E, 0x08, "brai ">;
+ def BEQI : BranchCI<0x2F, 0x00, "beqi ", seteq>;
+ def BNEI : BranchCI<0x2F, 0x01, "bnei ", setne>;
+ def BLTI : BranchCI<0x2F, 0x02, "blti ", setlt>;
+ def BLEI : BranchCI<0x2F, 0x03, "blei ", setle>;
+ def BGTI : BranchCI<0x2F, 0x04, "bgti ", setgt>;
+ def BGEI : BranchCI<0x2F, 0x05, "bgei ", setge>;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1, hasCtrlDep = 1 in {
+ def BR : Branch<0x26, 0x00, 0x000, "br ">;
+ def BRA : Branch<0x26, 0x08, 0x000, "bra ">;
+ def BEQ : BranchC<0x27, 0x00, 0x000, "beq ", seteq>;
+ def BNE : BranchC<0x27, 0x01, 0x000, "bne ", setne>;
+ def BLT : BranchC<0x27, 0x02, 0x000, "blt ", setlt>;
+ def BLE : BranchC<0x27, 0x03, 0x000, "ble ", setle>;
+ def BGT : BranchC<0x27, 0x04, 0x000, "bgt ", setgt>;
+ def BGE : BranchC<0x27, 0x05, 0x000, "bge ", setge>;
+}
+
+let isBranch = 1, isTerminator = 1, hasDelaySlot = 1, hasCtrlDep = 1 in {
+ def BRID : BranchI<0x2E, 0x10, "brid ">;
+ def BRAID : BranchI<0x2E, 0x18, "braid ">;
+ def BEQID : BranchCI<0x2F, 0x10, "beqid ", seteq>;
+ def BNEID : BranchCI<0x2F, 0x11, "bneid ", setne>;
+ def BLTID : BranchCI<0x2F, 0x12, "bltid ", setlt>;
+ def BLEID : BranchCI<0x2F, 0x13, "bleid ", setle>;
+ def BGTID : BranchCI<0x2F, 0x14, "bgtid ", setgt>;
+ def BGEID : BranchCI<0x2F, 0x15, "bgeid ", setge>;
+}
+
+let isBranch = 1, isIndirectBranch = 1, isTerminator = 1,
+ hasDelaySlot = 1, hasCtrlDep = 1 in {
+ def BRD : Branch<0x26, 0x10, 0x000, "brd ">;
+ def BRAD : Branch<0x26, 0x18, 0x000, "brad ">;
+ def BEQD : BranchC<0x27, 0x10, 0x000, "beqd ", seteq>;
+ def BNED : BranchC<0x27, 0x11, 0x000, "bned ", setne>;
+ def BLTD : BranchC<0x27, 0x12, 0x000, "bltd ", setlt>;
+ def BLED : BranchC<0x27, 0x13, 0x000, "bled ", setle>;
+ def BGTD : BranchC<0x27, 0x14, 0x000, "bgtd ", setgt>;
+ def BGED : BranchC<0x27, 0x15, 0x000, "bged ", setge>;
+}
+
+let isCall = 1, hasCtrlDep = 1, isIndirectBranch = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
+ Uses = [R1,R5,R6,R7,R8,R9,R10] in {
+ def BRL : BranchL<0x26, 0x04, 0x000, "brl ">;
+ def BRAL : BranchL<0x26, 0x0C, 0x000, "bral ">;
+}
+
+let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
+ Uses = [R1,R5,R6,R7,R8,R9,R10] in {
+ def BRLID : BranchLI<0x2E, 0x14, "brlid ">;
+ def BRALID : BranchLI<0x2E, 0x1C, "bralid ">;
+}
+
+let isCall = 1, hasDelaySlot = 1, hasCtrlDep = 1, isIndirectBranch = 1,
+ Defs = [R3,R4,R5,R6,R7,R8,R9,R10,R11,R12],
+ Uses = [R1,R5,R6,R7,R8,R9,R10] in {
+ def BRLD : BranchL<0x26, 0x14, 0x000, "brld ">;
+ def BRALD : BranchL<0x26, 0x1C, 0x000, "brald ">;
+}
+
+let isReturn=1, isTerminator=1, hasDelaySlot=1,
+ isBarrier=1, hasCtrlDep=1, imm16=0x8 in {
+ def RTSD : TRET<0x2D, (outs), (ins CPURegs:$target),
+ "rtsd $target, 8",
+ [(MBlazeRet CPURegs:$target)],
+ IIBranch>;
+}
+
+//===----------------------------------------------------------------------===//
+// MBlaze misc instructions
+//===----------------------------------------------------------------------===//
+
+let addr = 0 in {
+ def NOP : TADDR<0x00, (outs), (ins), "nop ", [], IIAlu>;
+}
+
+let usesCustomInserter = 1 in {
+ //class PseudoSelCC<RegisterClass RC, string asmstr>:
+ // MBlazePseudo<(outs RC:$D), (ins RC:$T, RC:$F, CPURegs:$CMP), asmstr,
+ // [(set RC:$D, (MBlazeSelectCC RC:$T, RC:$F, CPURegs:$CMP))]>;
+ //def Select_CC : PseudoSelCC<CPURegs, "# MBlazeSelect_CC">;
+
+ def Select_CC : MBlazePseudo<(outs CPURegs:$dst),
+ (ins CPURegs:$T, CPURegs:$F, CPURegs:$CMP, i32imm:$CC),
+ "; SELECT_CC PSEUDO!",
+ []>;
+
+ def ShiftL : MBlazePseudo<(outs CPURegs:$dst),
+ (ins CPURegs:$L, CPURegs:$R),
+ "; ShiftL PSEUDO!",
+ []>;
+
+ def ShiftRA : MBlazePseudo<(outs CPURegs:$dst),
+ (ins CPURegs:$L, CPURegs:$R),
+ "; ShiftRA PSEUDO!",
+ []>;
+
+ def ShiftRL : MBlazePseudo<(outs CPURegs:$dst),
+ (ins CPURegs:$L, CPURegs:$R),
+ "; ShiftRL PSEUDO!",
+ []>;
+}
+
+
+let rb = 0 in {
+ def SEXT16 : TA<0x24, 0x061, (outs CPURegs:$dst), (ins CPURegs:$src),
+ "sext16 $dst, $src", [], IIAlu>;
+ def SEXT8 : TA<0x24, 0x060, (outs CPURegs:$dst), (ins CPURegs:$src),
+ "sext8 $dst, $src", [], IIAlu>;
+ def SRL : TA<0x24, 0x041, (outs CPURegs:$dst), (ins CPURegs:$src),
+ "srl $dst, $src", [], IIAlu>;
+ def SRA : TA<0x24, 0x001, (outs CPURegs:$dst), (ins CPURegs:$src),
+ "sra $dst, $src", [], IIAlu>;
+ def SRC : TA<0x24, 0x021, (outs CPURegs:$dst), (ins CPURegs:$src),
+ "src $dst, $src", [], IIAlu>;
+}
+
+def LEA_ADDI : EffectiveAddress<"addi $dst, ${addr:stackloc}">;
+
+//===----------------------------------------------------------------------===//
+// Arbitrary patterns that map to one or more instructions
+//===----------------------------------------------------------------------===//
+
+// Small immediates
+def : Pat<(i32 0), (ADD R0, R0)>;
+def : Pat<(i32 immSExt16:$imm), (ADDI R0, imm:$imm)>;
+def : Pat<(i32 immZExt16:$imm), (ORI R0, imm:$imm)>;
+
+// Arbitrary immediates
+def : Pat<(i32 imm:$imm), (ADDI R0, imm:$imm)>;
+
+// In register sign extension
+def : Pat<(sext_inreg CPURegs:$src, i16), (SEXT16 CPURegs:$src)>;
+def : Pat<(sext_inreg CPURegs:$src, i8), (SEXT8 CPURegs:$src)>;
+
+// Call
+def : Pat<(MBlazeJmpLink (i32 tglobaladdr:$dst)), (BRLID tglobaladdr:$dst)>;
+def : Pat<(MBlazeJmpLink (i32 texternalsym:$dst)),(BRLID texternalsym:$dst)>;
+def : Pat<(MBlazeJmpLink CPURegs:$dst), (BRLD CPURegs:$dst)>;
+
+// Shift Instructions
+def : Pat<(shl CPURegs:$L, CPURegs:$R), (ShiftL CPURegs:$L, CPURegs:$R)>;
+def : Pat<(sra CPURegs:$L, CPURegs:$R), (ShiftRA CPURegs:$L, CPURegs:$R)>;
+def : Pat<(srl CPURegs:$L, CPURegs:$R), (ShiftRL CPURegs:$L, CPURegs:$R)>;
+
+// SET_CC operations
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETEQ),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 1)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETNE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 2)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETGE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETLE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMP CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMPU CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULT),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMPU CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETUGE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMPU CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(setcc CPURegs:$L, CPURegs:$R, SETULE),
+ (Select_CC (ADDI R0, 1), (ADDI R0, 0),
+ (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+
+// SELECT operations
+def : Pat<(select CPURegs:$C, CPURegs:$T, CPURegs:$F),
+ (Select_CC CPURegs:$T, CPURegs:$F, CPURegs:$C, 2)>;
+
+// SELECT_CC
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETEQ),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 1)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETNE),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 2)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGT),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLT),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETGE),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETLE),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMP CPURegs:$L, CPURegs:$R), 6)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGT),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 3)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULT),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 4)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETUGE),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 5)>;
+def : Pat<(selectcc CPURegs:$L, CPURegs:$R, CPURegs:$T, CPURegs:$F, SETULE),
+ (Select_CC CPURegs:$T, CPURegs:$F, (CMPU CPURegs:$L, CPURegs:$R), 6)>;
+
+// BRCOND instructions
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETEQ), bb:$T),
+ (BEQID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETNE), bb:$T),
+ (BNEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGT), bb:$T),
+ (BGTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLT), bb:$T),
+ (BLTID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETGE), bb:$T),
+ (BGEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETLE), bb:$T),
+ (BLEID (CMP CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGT), bb:$T),
+ (BGTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULT), bb:$T),
+ (BLTID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETUGE), bb:$T),
+ (BGEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond (setcc CPURegs:$L, CPURegs:$R, SETULE), bb:$T),
+ (BLEID (CMPU CPURegs:$R, CPURegs:$L), bb:$T)>;
+def : Pat<(brcond CPURegs:$C, bb:$T),
+ (BNEID CPURegs:$C, bb:$T)>;
+
+// Jump tables, global addresses, and constant pools
+def : Pat<(MBWrapper tglobaladdr:$in), (ORI R0, tglobaladdr:$in)>;
+def : Pat<(MBWrapper tjumptable:$in), (ORI R0, tjumptable:$in)>;
+def : Pat<(MBWrapper tconstpool:$in), (ORI R0, tconstpool:$in)>;
+
+// Misc instructions
+def : Pat<(and CPURegs:$lh, (not CPURegs:$rh)),(ANDN CPURegs:$lh, CPURegs:$rh)>;
+
+// Arithmetic with immediates
+def : Pat<(add CPURegs:$in, imm:$imm),(ADDI CPURegs:$in, imm:$imm)>;
+def : Pat<(or CPURegs:$in, imm:$imm),(ORI CPURegs:$in, imm:$imm)>;
+def : Pat<(xor CPURegs:$in, imm:$imm),(XORI CPURegs:$in, imm:$imm)>;
+
+// extended load and stores
+def : Pat<(extloadi1 iaddr:$src), (LBUI iaddr:$src)>;
+def : Pat<(extloadi8 iaddr:$src), (LBUI iaddr:$src)>;
+def : Pat<(extloadi16 iaddr:$src), (LHUI iaddr:$src)>;
+def : Pat<(extloadi1 xaddr:$src), (LBU xaddr:$src)>;
+def : Pat<(extloadi8 xaddr:$src), (LBU xaddr:$src)>;
+def : Pat<(extloadi16 xaddr:$src), (LHU xaddr:$src)>;
+
+def : Pat<(sextloadi1 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>;
+def : Pat<(sextloadi8 iaddr:$src), (SEXT8 (LBUI iaddr:$src))>;
+def : Pat<(sextloadi16 iaddr:$src), (SEXT16 (LHUI iaddr:$src))>;
+def : Pat<(sextloadi1 xaddr:$src), (SEXT8 (LBU xaddr:$src))>;
+def : Pat<(sextloadi8 xaddr:$src), (SEXT8 (LBU xaddr:$src))>;
+def : Pat<(sextloadi16 xaddr:$src), (SEXT16 (LHU xaddr:$src))>;
+
+// peepholes
+def : Pat<(store (i32 0), iaddr:$dst), (SWI R0, iaddr:$dst)>;
+
+//===----------------------------------------------------------------------===//
+// Floating Point Support
+//===----------------------------------------------------------------------===//
+include "MBlazeInstrFSL.td"
+include "MBlazeInstrFPU.td"
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
new file mode 100644
index 0000000..c8faffc
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp
@@ -0,0 +1,109 @@
+//===- MBlazeIntrinsicInfo.cpp - Intrinsic Information -00-------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeIntrinsicInfo.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/Function.h"
+#include "llvm/Intrinsics.h"
+#include "llvm/Module.h"
+#include "llvm/Type.h"
+#include "llvm/Support/raw_ostream.h"
+#include <cstring>
+
+using namespace llvm;
+
+namespace mblazeIntrinsic {
+
+ enum ID {
+ last_non_mblaze_intrinsic = Intrinsic::num_intrinsics-1,
+#define GET_INTRINSIC_ENUM_VALUES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ENUM_VALUES
+ , num_mblaze_intrinsics
+ };
+
+#define GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_LLVM_INTRINSIC_FOR_GCC_BUILTIN
+}
+
+std::string MBlazeIntrinsicInfo::getName(unsigned IntrID, const Type **Tys,
+ unsigned numTys) const {
+ static const char *const names[] = {
+#define GET_INTRINSIC_NAME_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_NAME_TABLE
+ };
+
+ assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+ if (IntrID < Intrinsic::num_intrinsics)
+ return 0;
+ assert(IntrID < mblazeIntrinsic::num_mblaze_intrinsics &&
+ "Invalid intrinsic ID");
+
+ std::string Result(names[IntrID - Intrinsic::num_intrinsics]);
+ return Result;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupName(const char *Name, unsigned Len) const {
+#define GET_FUNCTION_RECOGNIZER
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_FUNCTION_RECOGNIZER
+ return 0;
+}
+
+unsigned MBlazeIntrinsicInfo::
+lookupGCCName(const char *Name) const {
+ return mblazeIntrinsic::getIntrinsicForGCCBuiltin("mblaze",Name);
+}
+
+bool MBlazeIntrinsicInfo::isOverloaded(unsigned IntrID) const {
+ // Overload Table
+ const bool OTable[] = {
+#define GET_INTRINSIC_OVERLOAD_TABLE
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_OVERLOAD_TABLE
+ };
+ if (IntrID == 0)
+ return false;
+ else
+ return OTable[IntrID - Intrinsic::num_intrinsics];
+}
+
+/// This defines the "getAttributes(ID id)" method.
+#define GET_INTRINSIC_ATTRIBUTES
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_ATTRIBUTES
+
+static const FunctionType *getType(LLVMContext &Context, unsigned id) {
+ const Type *ResultTy = NULL;
+ std::vector<const Type*> ArgTys;
+ bool IsVarArg = false;
+
+#define GET_INTRINSIC_GENERATOR
+#include "MBlazeGenIntrinsics.inc"
+#undef GET_INTRINSIC_GENERATOR
+
+ return FunctionType::get(ResultTy, ArgTys, IsVarArg);
+}
+
+Function *MBlazeIntrinsicInfo::getDeclaration(Module *M, unsigned IntrID,
+ const Type **Tys,
+ unsigned numTy) const {
+ assert(!isOverloaded(IntrID) && "MBlaze intrinsics are not overloaded");
+ AttrListPtr AList = getAttributes((mblazeIntrinsic::ID) IntrID);
+ return cast<Function>(M->getOrInsertFunction(getName(IntrID),
+ getType(M->getContext(), IntrID),
+ AList));
+}
diff --git a/lib/Target/MBlaze/MBlazeIntrinsicInfo.h b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
new file mode 100644
index 0000000..9804c77
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeIntrinsicInfo.h
@@ -0,0 +1,33 @@
+//===- MBlazeIntrinsicInfo.h - MBlaze Intrinsic Information -----*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of TargetIntrinsicInfo.
+//
+//===----------------------------------------------------------------------===//
+#ifndef MBLAZEINTRINSICS_H
+#define MBLAZEINTRINSICS_H
+
+#include "llvm/Target/TargetIntrinsicInfo.h"
+
+namespace llvm {
+
+ class MBlazeIntrinsicInfo : public TargetIntrinsicInfo {
+ public:
+ std::string getName(unsigned IntrID, const Type **Tys = 0,
+ unsigned numTys = 0) const;
+ unsigned lookupName(const char *Name, unsigned Len) const;
+ unsigned lookupGCCName(const char *Name) const;
+ bool isOverloaded(unsigned IID) const;
+ Function *getDeclaration(Module *M, unsigned ID, const Type **Tys = 0,
+ unsigned numTys = 0) const;
+ };
+
+}
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeIntrinsics.td b/lib/Target/MBlaze/MBlazeIntrinsics.td
new file mode 100644
index 0000000..76eb563
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeIntrinsics.td
@@ -0,0 +1,137 @@
+//===- IntrinsicsMBlaze.td - Defines MBlaze intrinsics -----*- tablegen -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file defines all of the MicroBlaze-specific intrinsics.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Definitions for all MBlaze intrinsics.
+//
+
+// MBlaze intrinsic classes.
+let TargetPrefix = "mblaze", isTarget = 1 in {
+ class MBFSL_Get_Intrinsic : Intrinsic<[llvm_i32_ty],
+ [llvm_i32_ty],
+ [IntrWriteMem]>;
+
+ class MBFSL_Put_Intrinsic : Intrinsic<[llvm_void_ty],
+ [llvm_i32_ty, llvm_i32_ty],
+ [IntrWriteMem]>;
+
+ class MBFSL_PutT_Intrinsic : Intrinsic<[llvm_void_ty],
+ [llvm_i32_ty],
+ [IntrWriteMem]>;
+}
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Get Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_get : GCCBuiltin<"__builtin_mblaze_fsl_get">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_aget : GCCBuiltin<"__builtin_mblaze_fsl_aget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_cget : GCCBuiltin<"__builtin_mblaze_fsl_cget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_caget : GCCBuiltin<"__builtin_mblaze_fsl_caget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eget : GCCBuiltin<"__builtin_mblaze_fsl_eget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_eaget : GCCBuiltin<"__builtin_mblaze_fsl_eaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecget : GCCBuiltin<"__builtin_mblaze_fsl_ecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ecaget : GCCBuiltin<"__builtin_mblaze_fsl_ecaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_nget : GCCBuiltin<"__builtin_mblaze_fsl_nget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_naget : GCCBuiltin<"__builtin_mblaze_fsl_naget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncget : GCCBuiltin<"__builtin_mblaze_fsl_ncget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_ncaget : GCCBuiltin<"__builtin_mblaze_fsl_ncaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neget : GCCBuiltin<"__builtin_mblaze_fsl_neget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_neaget : GCCBuiltin<"__builtin_mblaze_fsl_neaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necget : GCCBuiltin<"__builtin_mblaze_fsl_necget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_necaget : GCCBuiltin<"__builtin_mblaze_fsl_necaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tget : GCCBuiltin<"__builtin_mblaze_fsl_tget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_taget : GCCBuiltin<"__builtin_mblaze_fsl_taget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcget : GCCBuiltin<"__builtin_mblaze_fsl_tcget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tcaget : GCCBuiltin<"__builtin_mblaze_fsl_tcaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teget : GCCBuiltin<"__builtin_mblaze_fsl_teget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_teaget : GCCBuiltin<"__builtin_mblaze_fsl_teaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecget : GCCBuiltin<"__builtin_mblaze_fsl_tecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tecaget : GCCBuiltin<"__builtin_mblaze_fsl_tecaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnget : GCCBuiltin<"__builtin_mblaze_fsl_tnget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnaget : GCCBuiltin<"__builtin_mblaze_fsl_tnaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncget : GCCBuiltin<"__builtin_mblaze_fsl_tncget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tncaget : GCCBuiltin<"__builtin_mblaze_fsl_tncaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneget : GCCBuiltin<"__builtin_mblaze_fsl_tneget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tneaget : GCCBuiltin<"__builtin_mblaze_fsl_tneaget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecget : GCCBuiltin<"__builtin_mblaze_fsl_tnecget">,
+ MBFSL_Get_Intrinsic;
+def int_mblaze_fsl_tnecaget : GCCBuiltin<"__builtin_mblaze_fsl_tnecaget">,
+ MBFSL_Get_Intrinsic;
+
+//===----------------------------------------------------------------------===//
+// MicroBlaze FSL Put Intrinsic Definitions.
+//
+
+def int_mblaze_fsl_put : GCCBuiltin<"__builtin_mblaze_fsl_put">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_aput : GCCBuiltin<"__builtin_mblaze_fsl_aput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_cput : GCCBuiltin<"__builtin_mblaze_fsl_cput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_caput : GCCBuiltin<"__builtin_mblaze_fsl_caput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_nput : GCCBuiltin<"__builtin_mblaze_fsl_nput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_naput : GCCBuiltin<"__builtin_mblaze_fsl_naput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncput : GCCBuiltin<"__builtin_mblaze_fsl_ncput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_ncaput : GCCBuiltin<"__builtin_mblaze_fsl_ncaput">,
+ MBFSL_Put_Intrinsic;
+def int_mblaze_fsl_tput : GCCBuiltin<"__builtin_mblaze_fsl_tput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_taput : GCCBuiltin<"__builtin_mblaze_fsl_taput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcput : GCCBuiltin<"__builtin_mblaze_fsl_tcput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tcaput : GCCBuiltin<"__builtin_mblaze_fsl_tcaput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnput : GCCBuiltin<"__builtin_mblaze_fsl_tnput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tnaput : GCCBuiltin<"__builtin_mblaze_fsl_tnaput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncput : GCCBuiltin<"__builtin_mblaze_fsl_tncput">,
+ MBFSL_PutT_Intrinsic;
+def int_mblaze_fsl_tncaput : GCCBuiltin<"__builtin_mblaze_fsl_tncaput">,
+ MBFSL_PutT_Intrinsic;
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
new file mode 100644
index 0000000..7ae465d
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.cpp
@@ -0,0 +1,27 @@
+//===-- MBlazeMCAsmInfo.cpp - MBlaze asm properties -----------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declarations of the MBlazeMCAsmInfo properties.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeMCAsmInfo.h"
+using namespace llvm;
+
+MBlazeMCAsmInfo::MBlazeMCAsmInfo(const Target &T, const StringRef &TT) {
+ AlignmentIsInBytes = false;
+ Data16bitsDirective = "\t.half\t";
+ Data32bitsDirective = "\t.word\t";
+ Data64bitsDirective = 0;
+ PrivateGlobalPrefix = "$";
+ CommentString = "#";
+ ZeroDirective = "\t.space\t";
+ GPRel32Directive = "\t.gpword\t";
+ HasSetDirective = false;
+}
diff --git a/lib/Target/MBlaze/MBlazeMCAsmInfo.h b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
new file mode 100644
index 0000000..bccb418
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMCAsmInfo.h
@@ -0,0 +1,30 @@
+//=====-- MBlazeMCAsmInfo.h - MBlaze asm properties -----------*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the declaration of the MBlazeMCAsmInfo class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZETARGETASMINFO_H
+#define MBLAZETARGETASMINFO_H
+
+#include "llvm/MC/MCAsmInfo.h"
+
+namespace llvm {
+ class Target;
+ class StringRef;
+
+ class MBlazeMCAsmInfo : public MCAsmInfo {
+ public:
+ explicit MBlazeMCAsmInfo(const Target &T, const StringRef &TT);
+ };
+
+} // namespace llvm
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeMachineFunction.h b/lib/Target/MBlaze/MBlazeMachineFunction.h
new file mode 100644
index 0000000..08d4dca
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeMachineFunction.h
@@ -0,0 +1,136 @@
+//===-- MBlazeMachineFunctionInfo.h - Private data ----------------*- C++ -*-=//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of MachineFunctionInfo.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_MACHINE_FUNCTION_INFO_H
+#define MBLAZE_MACHINE_FUNCTION_INFO_H
+
+#include "llvm/ADT/SmallVector.h"
+#include "llvm/ADT/VectorExtras.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+
+namespace llvm {
+
+/// MBlazeFunctionInfo - This class is derived from MachineFunction private
+/// MBlaze target-specific information for each MachineFunction.
+class MBlazeFunctionInfo : public MachineFunctionInfo {
+
+private:
+ /// Holds for each function where on the stack the Frame Pointer must be
+ /// saved. This is used on Prologue and Epilogue to emit FP save/restore
+ int FPStackOffset;
+
+ /// Holds for each function where on the stack the Return Address must be
+ /// saved. This is used on Prologue and Epilogue to emit RA save/restore
+ int RAStackOffset;
+
+ /// At each function entry a special bitmask directive must be emitted
+ /// to help in debugging CPU callee saved registers. It needs a negative
+ /// offset from the final stack size and its higher register location on
+ /// the stack.
+ int CPUTopSavedRegOff;
+
+ /// MBlazeFIHolder - Holds a FrameIndex and it's Stack Pointer Offset
+ struct MBlazeFIHolder {
+
+ int FI;
+ int SPOffset;
+
+ MBlazeFIHolder(int FrameIndex, int StackPointerOffset)
+ : FI(FrameIndex), SPOffset(StackPointerOffset) {}
+ };
+
+ /// When PIC is used the GP must be saved on the stack on the function
+ /// prologue and must be reloaded from this stack location after every
+ /// call. A reference to its stack location and frame index must be kept
+ /// to be used on emitPrologue and processFunctionBeforeFrameFinalized.
+ MBlazeFIHolder GPHolder;
+
+ /// On LowerFormalArguments the stack size is unknown, so the Stack
+ /// Pointer Offset calculation of "not in register arguments" must be
+ /// postponed to emitPrologue.
+ SmallVector<MBlazeFIHolder, 16> FnLoadArgs;
+ bool HasLoadArgs;
+
+ // When VarArgs, we must write registers back to caller stack, preserving
+ // on register arguments. Since the stack size is unknown on
+ // LowerFormalArguments, the Stack Pointer Offset calculation must be
+ // postponed to emitPrologue.
+ SmallVector<MBlazeFIHolder, 4> FnStoreVarArgs;
+ bool HasStoreVarArgs;
+
+ /// SRetReturnReg - Some subtargets require that sret lowering includes
+ /// returning the value of the returned struct in a register. This field
+ /// holds the virtual register into which the sret argument is passed.
+ unsigned SRetReturnReg;
+
+ /// GlobalBaseReg - keeps track of the virtual register initialized for
+ /// use as the global base register. This is used for PIC in some PIC
+ /// relocation models.
+ unsigned GlobalBaseReg;
+
+public:
+ MBlazeFunctionInfo(MachineFunction& MF)
+ : FPStackOffset(0), RAStackOffset(0), CPUTopSavedRegOff(0),
+ GPHolder(-1,-1), HasLoadArgs(false), HasStoreVarArgs(false),
+ SRetReturnReg(0), GlobalBaseReg(0)
+ {}
+
+ int getFPStackOffset() const { return FPStackOffset; }
+ void setFPStackOffset(int Off) { FPStackOffset = Off; }
+
+ int getRAStackOffset() const { return RAStackOffset; }
+ void setRAStackOffset(int Off) { RAStackOffset = Off; }
+
+ int getCPUTopSavedRegOff() const { return CPUTopSavedRegOff; }
+ void setCPUTopSavedRegOff(int Off) { CPUTopSavedRegOff = Off; }
+
+ int getGPStackOffset() const { return GPHolder.SPOffset; }
+ int getGPFI() const { return GPHolder.FI; }
+ void setGPStackOffset(int Off) { GPHolder.SPOffset = Off; }
+ void setGPFI(int FI) { GPHolder.FI = FI; }
+ bool needGPSaveRestore() const { return GPHolder.SPOffset != -1; }
+
+ bool hasLoadArgs() const { return HasLoadArgs; }
+ bool hasStoreVarArgs() const { return HasStoreVarArgs; }
+
+ void recordLoadArgsFI(int FI, int SPOffset) {
+ if (!HasLoadArgs) HasLoadArgs=true;
+ FnLoadArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+ }
+ void recordStoreVarArgsFI(int FI, int SPOffset) {
+ if (!HasStoreVarArgs) HasStoreVarArgs=true;
+ FnStoreVarArgs.push_back(MBlazeFIHolder(FI, SPOffset));
+ }
+
+ void adjustLoadArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasLoadArgs()) return;
+ for (unsigned i = 0, e = FnLoadArgs.size(); i != e; ++i)
+ MFI->setObjectOffset( FnLoadArgs[i].FI, FnLoadArgs[i].SPOffset );
+ }
+ void adjustStoreVarArgsFI(MachineFrameInfo *MFI) const {
+ if (!hasStoreVarArgs()) return;
+ for (unsigned i = 0, e = FnStoreVarArgs.size(); i != e; ++i)
+ MFI->setObjectOffset( FnStoreVarArgs[i].FI, FnStoreVarArgs[i].SPOffset );
+ }
+
+ unsigned getSRetReturnReg() const { return SRetReturnReg; }
+ void setSRetReturnReg(unsigned Reg) { SRetReturnReg = Reg; }
+
+ unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
+ void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
+};
+
+} // end of namespace llvm
+
+#endif // MBLAZE_MACHINE_FUNCTION_INFO_H
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.cpp b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
new file mode 100644
index 0000000..9067f8b
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.cpp
@@ -0,0 +1,378 @@
+//===- MBlazeRegisterInfo.cpp - MBlaze Register Information -== -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "mblaze-reg-info"
+
+#include "MBlaze.h"
+#include "MBlazeSubtarget.h"
+#include "MBlazeRegisterInfo.h"
+#include "MBlazeMachineFunction.h"
+#include "llvm/Constants.h"
+#include "llvm/Type.h"
+#include "llvm/Function.h"
+#include "llvm/CodeGen/ValueTypes.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineLocation.h"
+#include "llvm/Target/TargetFrameInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetOptions.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Support/CommandLine.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/ADT/BitVector.h"
+#include "llvm/ADT/STLExtras.h"
+
+using namespace llvm;
+
+MBlazeRegisterInfo::
+MBlazeRegisterInfo(const MBlazeSubtarget &ST, const TargetInstrInfo &tii)
+ : MBlazeGenRegisterInfo(MBlaze::ADJCALLSTACKDOWN, MBlaze::ADJCALLSTACKUP),
+ Subtarget(ST), TII(tii) {}
+
+/// getRegisterNumbering - Given the enum value for some register, e.g.
+/// MBlaze::R0, return the number that it corresponds to (e.g. 0).
+unsigned MBlazeRegisterInfo::getRegisterNumbering(unsigned RegEnum) {
+ switch (RegEnum) {
+ case MBlaze::R0 : case MBlaze::F0 : return 0;
+ case MBlaze::R1 : case MBlaze::F1 : return 1;
+ case MBlaze::R2 : case MBlaze::F2 : return 2;
+ case MBlaze::R3 : case MBlaze::F3 : return 3;
+ case MBlaze::R4 : case MBlaze::F4 : return 4;
+ case MBlaze::R5 : case MBlaze::F5 : return 5;
+ case MBlaze::R6 : case MBlaze::F6 : return 6;
+ case MBlaze::R7 : case MBlaze::F7 : return 7;
+ case MBlaze::R8 : case MBlaze::F8 : return 8;
+ case MBlaze::R9 : case MBlaze::F9 : return 9;
+ case MBlaze::R10 : case MBlaze::F10 : return 10;
+ case MBlaze::R11 : case MBlaze::F11 : return 11;
+ case MBlaze::R12 : case MBlaze::F12 : return 12;
+ case MBlaze::R13 : case MBlaze::F13 : return 13;
+ case MBlaze::R14 : case MBlaze::F14 : return 14;
+ case MBlaze::R15 : case MBlaze::F15 : return 15;
+ case MBlaze::R16 : case MBlaze::F16 : return 16;
+ case MBlaze::R17 : case MBlaze::F17 : return 17;
+ case MBlaze::R18 : case MBlaze::F18 : return 18;
+ case MBlaze::R19 : case MBlaze::F19 : return 19;
+ case MBlaze::R20 : case MBlaze::F20 : return 20;
+ case MBlaze::R21 : case MBlaze::F21 : return 21;
+ case MBlaze::R22 : case MBlaze::F22 : return 22;
+ case MBlaze::R23 : case MBlaze::F23 : return 23;
+ case MBlaze::R24 : case MBlaze::F24 : return 24;
+ case MBlaze::R25 : case MBlaze::F25 : return 25;
+ case MBlaze::R26 : case MBlaze::F26 : return 26;
+ case MBlaze::R27 : case MBlaze::F27 : return 27;
+ case MBlaze::R28 : case MBlaze::F28 : return 28;
+ case MBlaze::R29 : case MBlaze::F29 : return 29;
+ case MBlaze::R30 : case MBlaze::F30 : return 30;
+ case MBlaze::R31 : case MBlaze::F31 : return 31;
+ default: llvm_unreachable("Unknown register number!");
+ }
+ return 0; // Not reached
+}
+
+unsigned MBlazeRegisterInfo::getPICCallReg() {
+ return MBlaze::R20;
+}
+
+//===----------------------------------------------------------------------===//
+// Callee Saved Registers methods
+//===----------------------------------------------------------------------===//
+
+/// MBlaze Callee Saved Registers
+const unsigned* MBlazeRegisterInfo::
+getCalleeSavedRegs(const MachineFunction *MF) const {
+ // MBlaze callee-save register range is R20 - R31
+ static const unsigned CalleeSavedRegs[] = {
+ MBlaze::R20, MBlaze::R21, MBlaze::R22, MBlaze::R23,
+ MBlaze::R24, MBlaze::R25, MBlaze::R26, MBlaze::R27,
+ MBlaze::R28, MBlaze::R29, MBlaze::R30, MBlaze::R31,
+ 0
+ };
+
+ return CalleeSavedRegs;
+}
+
+/// MBlaze Callee Saved Register Classes
+const TargetRegisterClass* const* MBlazeRegisterInfo::
+getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ static const TargetRegisterClass * const CalleeSavedRC[] = {
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ &MBlaze::CPURegsRegClass, &MBlaze::CPURegsRegClass,
+ 0
+ };
+
+ return CalleeSavedRC;
+}
+
+BitVector MBlazeRegisterInfo::
+getReservedRegs(const MachineFunction &MF) const {
+ BitVector Reserved(getNumRegs());
+ Reserved.set(MBlaze::R0);
+ Reserved.set(MBlaze::R1);
+ Reserved.set(MBlaze::R2);
+ Reserved.set(MBlaze::R13);
+ Reserved.set(MBlaze::R14);
+ Reserved.set(MBlaze::R15);
+ Reserved.set(MBlaze::R16);
+ Reserved.set(MBlaze::R17);
+ Reserved.set(MBlaze::R18);
+ Reserved.set(MBlaze::R19);
+ return Reserved;
+}
+
+//===----------------------------------------------------------------------===//
+//
+// Stack Frame Processing methods
+// +----------------------------+
+//
+// The stack is allocated decrementing the stack pointer on
+// the first instruction of a function prologue. Once decremented,
+// all stack references are are done through a positive offset
+// from the stack/frame pointer, so the stack is considered
+// to grow up.
+//
+//===----------------------------------------------------------------------===//
+
+void MBlazeRegisterInfo::adjustMBlazeStackFrame(MachineFunction &MF) const {
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+
+ // See the description at MicroBlazeMachineFunction.h
+ int TopCPUSavedRegOff = -1;
+
+ // Adjust CPU Callee Saved Registers Area. Registers RA and FP must
+ // be saved in this CPU Area there is the need. This whole Area must
+ // be aligned to the default Stack Alignment requirements.
+ unsigned StackOffset = MFI->getStackSize();
+ unsigned RegSize = 4;
+
+ // Replace the dummy '0' SPOffset by the negative offsets, as explained on
+ // LowerFORMAL_ARGUMENTS. Leaving '0' for while is necessary to avoid
+ // the approach done by calculateFrameObjectOffsets to the stack frame.
+ MBlazeFI->adjustLoadArgsFI(MFI);
+ MBlazeFI->adjustStoreVarArgsFI(MFI);
+
+ if (hasFP(MF)) {
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+ StackOffset);
+ MBlazeFI->setFPStackOffset(StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += RegSize;
+ }
+
+ if (MFI->hasCalls()) {
+ MFI->setObjectOffset(MFI->CreateStackObject(RegSize, RegSize, true),
+ StackOffset);
+ MBlazeFI->setRAStackOffset(StackOffset);
+ TopCPUSavedRegOff = StackOffset;
+ StackOffset += RegSize;
+ }
+
+ // Update frame info
+ MFI->setStackSize(StackOffset);
+
+ // Recalculate the final tops offset. The final values must be '0'
+ // if there isn't a callee saved register for CPU or FPU, otherwise
+ // a negative offset is needed.
+ if (TopCPUSavedRegOff >= 0)
+ MBlazeFI->setCPUTopSavedRegOff(TopCPUSavedRegOff-StackOffset);
+}
+
+// hasFP - Return true if the specified function should have a dedicated frame
+// pointer register. This is true if the function has variable sized allocas or
+// if frame pointer elimination is disabled.
+bool MBlazeRegisterInfo::hasFP(const MachineFunction &MF) const {
+ const MachineFrameInfo *MFI = MF.getFrameInfo();
+ return NoFramePointerElim || MFI->hasVarSizedObjects();
+}
+
+// This function eliminate ADJCALLSTACKDOWN,
+// ADJCALLSTACKUP pseudo instructions
+void MBlazeRegisterInfo::
+eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const {
+ // Simply discard ADJCALLSTACKDOWN, ADJCALLSTACKUP instructions.
+ MBB.erase(I);
+}
+
+// FrameIndex represent objects inside a abstract stack.
+// We must replace FrameIndex with an stack/frame pointer
+// direct reference.
+unsigned MBlazeRegisterInfo::
+eliminateFrameIndex(MachineBasicBlock::iterator II, int SPAdj,
+ int *Value, RegScavenger *RS) const {
+ MachineInstr &MI = *II;
+ MachineFunction &MF = *MI.getParent()->getParent();
+
+ unsigned i = 0;
+ while (!MI.getOperand(i).isFI()) {
+ ++i;
+ assert(i < MI.getNumOperands() &&
+ "Instr doesn't have FrameIndex operand!");
+ }
+
+ unsigned oi = i == 2 ? 1 : 2;
+
+ DEBUG(errs() << "\nFunction : " << MF.getFunction()->getName() << "\n";
+ errs() << "<--------->\n" << MI);
+
+ int FrameIndex = MI.getOperand(i).getIndex();
+ int stackSize = MF.getFrameInfo()->getStackSize();
+ int spOffset = MF.getFrameInfo()->getObjectOffset(FrameIndex);
+
+ DEBUG(errs() << "FrameIndex : " << FrameIndex << "\n"
+ << "spOffset : " << spOffset << "\n"
+ << "stackSize : " << stackSize << "\n");
+
+ // as explained on LowerFormalArguments, detect negative offsets
+ // and adjust SPOffsets considering the final stack size.
+ int Offset = ((spOffset < 0) ? (stackSize + (-(spOffset+4))) : (spOffset));
+ Offset += MI.getOperand(oi).getImm();
+
+ DEBUG(errs() << "Offset : " << Offset << "\n" << "<--------->\n");
+
+ MI.getOperand(oi).ChangeToImmediate(Offset);
+ MI.getOperand(i).ChangeToRegister(getFrameRegister(MF), false);
+ return 0;
+}
+
+void MBlazeRegisterInfo::
+emitPrologue(MachineFunction &MF) const {
+ MachineBasicBlock &MBB = MF.front();
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ MachineBasicBlock::iterator MBBI = MBB.begin();
+ DebugLoc dl = (MBBI != MBB.end() ?
+ MBBI->getDebugLoc() : DebugLoc::getUnknownLoc());
+
+ // Get the right frame order for MBlaze.
+ adjustMBlazeStackFrame(MF);
+
+ // Get the number of bytes to allocate from the FrameInfo.
+ unsigned StackSize = MFI->getStackSize();
+
+ // No need to allocate space on the stack.
+ if (StackSize == 0 && !MFI->hasCalls()) return;
+
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ // Adjust stack : addi R1, R1, -imm
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(-StackSize);
+
+ // Save the return address only if the function isnt a leaf one.
+ // swi R15, R1, stack_loc
+ if (MFI->hasCalls()) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R15).addImm(RAOffset).addReg(MBlaze::R1);
+ }
+
+ // if framepointer enabled, save it and set it
+ // to point to the stack pointer
+ if (hasFP(MF)) {
+ // swi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::SWI))
+ .addReg(MBlaze::R19).addImm(FPOffset).addReg(MBlaze::R1);
+
+ // add R19, R1, R0
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R19)
+ .addReg(MBlaze::R1).addReg(MBlaze::R0);
+ }
+}
+
+void MBlazeRegisterInfo::
+emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
+ MachineBasicBlock::iterator MBBI = prior(MBB.end());
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ DebugLoc dl = MBBI->getDebugLoc();
+
+ // Get the number of bytes from FrameInfo
+ int NumBytes = (int) MFI->getStackSize();
+
+ // Get the FI's where RA and FP are saved.
+ int FPOffset = MBlazeFI->getFPStackOffset();
+ int RAOffset = MBlazeFI->getRAStackOffset();
+
+ // if framepointer enabled, restore it and restore the
+ // stack pointer
+ if (hasFP(MF)) {
+ // add R1, R19, R0
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADD), MBlaze::R1)
+ .addReg(MBlaze::R19).addReg(MBlaze::R0);
+
+ // lwi R19, R1, stack_loc
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R19)
+ .addImm(FPOffset).addReg(MBlaze::R1);
+ }
+
+ // Restore the return address only if the function isnt a leaf one.
+ // lwi R15, R1, stack_loc
+ if (MFI->hasCalls()) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::LWI), MBlaze::R15)
+ .addImm(RAOffset).addReg(MBlaze::R1);
+ }
+
+ // adjust stack.
+ // addi R1, R1, imm
+ if (NumBytes) {
+ BuildMI(MBB, MBBI, dl, TII.get(MBlaze::ADDI), MBlaze::R1)
+ .addReg(MBlaze::R1).addImm(NumBytes);
+ }
+}
+
+
+void MBlazeRegisterInfo::
+processFunctionBeforeFrameFinalized(MachineFunction &MF) const {
+ // Set the stack offset where GP must be saved/loaded from.
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MBlazeFunctionInfo *MBlazeFI = MF.getInfo<MBlazeFunctionInfo>();
+ if (MBlazeFI->needGPSaveRestore())
+ MFI->setObjectOffset(MBlazeFI->getGPFI(), MBlazeFI->getGPStackOffset());
+}
+
+unsigned MBlazeRegisterInfo::getRARegister() const {
+ return MBlaze::R15;
+}
+
+unsigned MBlazeRegisterInfo::getFrameRegister(const MachineFunction &MF) const {
+ return hasFP(MF) ? MBlaze::R19 : MBlaze::R1;
+}
+
+unsigned MBlazeRegisterInfo::getEHExceptionRegister() const {
+ llvm_unreachable("What is the exception register");
+ return 0;
+}
+
+unsigned MBlazeRegisterInfo::getEHHandlerRegister() const {
+ llvm_unreachable("What is the exception handler register");
+ return 0;
+}
+
+int MBlazeRegisterInfo::getDwarfRegNum(unsigned RegNum, bool isEH) const {
+ llvm_unreachable("What is the dwarf register number");
+ return -1;
+}
+
+#include "MBlazeGenRegisterInfo.inc"
+
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.h b/lib/Target/MBlaze/MBlazeRegisterInfo.h
new file mode 100644
index 0000000..4847f1e
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.h
@@ -0,0 +1,90 @@
+//===- MBlazeRegisterInfo.h - MBlaze Register Information Impl --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains the MBlaze implementation of the TargetRegisterInfo
+// class.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZEREGISTERINFO_H
+#define MBLAZEREGISTERINFO_H
+
+#include "MBlaze.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "MBlazeGenRegisterInfo.h.inc"
+
+namespace llvm {
+class MBlazeSubtarget;
+class TargetInstrInfo;
+class Type;
+
+namespace MBlaze {
+ /// SubregIndex - The index of various sized subregister classes. Note that
+ /// these indices must be kept in sync with the class indices in the
+ /// MBlazeRegisterInfo.td file.
+ enum SubregIndex {
+ SUBREG_FPEVEN = 1, SUBREG_FPODD = 2
+ };
+}
+
+struct MBlazeRegisterInfo : public MBlazeGenRegisterInfo {
+ const MBlazeSubtarget &Subtarget;
+ const TargetInstrInfo &TII;
+
+ MBlazeRegisterInfo(const MBlazeSubtarget &Subtarget,
+ const TargetInstrInfo &tii);
+
+ /// getRegisterNumbering - Given the enum value for some register, e.g.
+ /// MBlaze::RA, return the number that it corresponds to (e.g. 31).
+ static unsigned getRegisterNumbering(unsigned RegEnum);
+
+ /// Get PIC indirect call register
+ static unsigned getPICCallReg();
+
+ /// Adjust the MBlaze stack frame.
+ void adjustMBlazeStackFrame(MachineFunction &MF) const;
+
+ /// Code Generation virtual methods...
+ const unsigned *getCalleeSavedRegs(const MachineFunction* MF = 0) const;
+
+ const TargetRegisterClass* const*
+ getCalleeSavedRegClasses(const MachineFunction* MF = 0) const;
+
+ BitVector getReservedRegs(const MachineFunction &MF) const;
+
+ bool hasFP(const MachineFunction &MF) const;
+
+ void eliminateCallFramePseudoInstr(MachineFunction &MF,
+ MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator I) const;
+
+ /// Stack Frame Processing Methods
+ unsigned eliminateFrameIndex(MachineBasicBlock::iterator II,
+ int SPAdj, int *Value = NULL,
+ RegScavenger *RS = NULL) const;
+
+ void processFunctionBeforeFrameFinalized(MachineFunction &MF) const;
+
+ void emitPrologue(MachineFunction &MF) const;
+ void emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const;
+
+ /// Debug information queries.
+ unsigned getRARegister() const;
+ unsigned getFrameRegister(const MachineFunction &MF) const;
+
+ /// Exception handling queries.
+ unsigned getEHExceptionRegister() const;
+ unsigned getEHHandlerRegister() const;
+
+ int getDwarfRegNum(unsigned RegNum, bool isEH) const;
+};
+
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeRegisterInfo.td b/lib/Target/MBlaze/MBlazeRegisterInfo.td
new file mode 100644
index 0000000..96a5c98
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeRegisterInfo.td
@@ -0,0 +1,186 @@
+//===- MBlazeRegisterInfo.td - MBlaze Register defs -------------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Declarations that describe the MicroBlaze register file
+//===----------------------------------------------------------------------===//
+
+// We have banks of 32 registers each.
+class MBlazeReg<string n> : Register<n> {
+ field bits<5> Num;
+ let Namespace = "MBlaze";
+}
+
+class MBlazeRegWithSubRegs<string n, list<Register> subregs>
+ : RegisterWithSubRegs<n, subregs> {
+ field bits<5> Num;
+ let Namespace = "MBlaze";
+}
+
+// MBlaze CPU Registers
+class MBlazeGPRReg<bits<5> num, string n> : MBlazeReg<n> {
+ let Num = num;
+}
+
+// MBlaze 32-bit (aliased) FPU Registers
+class FPR<bits<5> num, string n, list<Register> subregs>
+ : MBlazeRegWithSubRegs<n, subregs> {
+ let Num = num;
+}
+
+//===----------------------------------------------------------------------===//
+// Registers
+//===----------------------------------------------------------------------===//
+
+let Namespace = "MBlaze" in {
+
+ // General Purpose Registers
+ def R0 : MBlazeGPRReg< 0, "r0">, DwarfRegNum<[0]>;
+ def R1 : MBlazeGPRReg< 1, "r1">, DwarfRegNum<[1]>;
+ def R2 : MBlazeGPRReg< 2, "r2">, DwarfRegNum<[2]>;
+ def R3 : MBlazeGPRReg< 3, "r3">, DwarfRegNum<[3]>;
+ def R4 : MBlazeGPRReg< 4, "r4">, DwarfRegNum<[5]>;
+ def R5 : MBlazeGPRReg< 5, "r5">, DwarfRegNum<[5]>;
+ def R6 : MBlazeGPRReg< 6, "r6">, DwarfRegNum<[6]>;
+ def R7 : MBlazeGPRReg< 7, "r7">, DwarfRegNum<[7]>;
+ def R8 : MBlazeGPRReg< 8, "r8">, DwarfRegNum<[8]>;
+ def R9 : MBlazeGPRReg< 9, "r9">, DwarfRegNum<[9]>;
+ def R10 : MBlazeGPRReg< 10, "r10">, DwarfRegNum<[10]>;
+ def R11 : MBlazeGPRReg< 11, "r11">, DwarfRegNum<[11]>;
+ def R12 : MBlazeGPRReg< 12, "r12">, DwarfRegNum<[12]>;
+ def R13 : MBlazeGPRReg< 13, "r13">, DwarfRegNum<[13]>;
+ def R14 : MBlazeGPRReg< 14, "r14">, DwarfRegNum<[14]>;
+ def R15 : MBlazeGPRReg< 15, "r15">, DwarfRegNum<[15]>;
+ def R16 : MBlazeGPRReg< 16, "r16">, DwarfRegNum<[16]>;
+ def R17 : MBlazeGPRReg< 17, "r17">, DwarfRegNum<[17]>;
+ def R18 : MBlazeGPRReg< 18, "r18">, DwarfRegNum<[18]>;
+ def R19 : MBlazeGPRReg< 19, "r19">, DwarfRegNum<[19]>;
+ def R20 : MBlazeGPRReg< 20, "r20">, DwarfRegNum<[20]>;
+ def R21 : MBlazeGPRReg< 21, "r21">, DwarfRegNum<[21]>;
+ def R22 : MBlazeGPRReg< 22, "r22">, DwarfRegNum<[22]>;
+ def R23 : MBlazeGPRReg< 23, "r23">, DwarfRegNum<[23]>;
+ def R24 : MBlazeGPRReg< 24, "r24">, DwarfRegNum<[24]>;
+ def R25 : MBlazeGPRReg< 25, "r25">, DwarfRegNum<[25]>;
+ def R26 : MBlazeGPRReg< 26, "r26">, DwarfRegNum<[26]>;
+ def R27 : MBlazeGPRReg< 27, "r27">, DwarfRegNum<[27]>;
+ def R28 : MBlazeGPRReg< 28, "r28">, DwarfRegNum<[28]>;
+ def R29 : MBlazeGPRReg< 29, "r29">, DwarfRegNum<[29]>;
+ def R30 : MBlazeGPRReg< 30, "r30">, DwarfRegNum<[30]>;
+ def R31 : MBlazeGPRReg< 31, "r31">, DwarfRegNum<[31]>;
+
+ /// MBlaze Single point precision FPU Registers
+ def F0 : FPR< 0, "r0", [R0]>, DwarfRegNum<[32]>;
+ def F1 : FPR< 1, "r1", [R1]>, DwarfRegNum<[33]>;
+ def F2 : FPR< 2, "r2", [R2]>, DwarfRegNum<[34]>;
+ def F3 : FPR< 3, "r3", [R3]>, DwarfRegNum<[35]>;
+ def F4 : FPR< 4, "r4", [R4]>, DwarfRegNum<[36]>;
+ def F5 : FPR< 5, "r5", [R5]>, DwarfRegNum<[37]>;
+ def F6 : FPR< 6, "r6", [R6]>, DwarfRegNum<[38]>;
+ def F7 : FPR< 7, "r7", [R7]>, DwarfRegNum<[39]>;
+ def F8 : FPR< 8, "r8", [R8]>, DwarfRegNum<[40]>;
+ def F9 : FPR< 9, "r9", [R9]>, DwarfRegNum<[41]>;
+ def F10 : FPR<10, "r10", [R10]>, DwarfRegNum<[42]>;
+ def F11 : FPR<11, "r11", [R11]>, DwarfRegNum<[43]>;
+ def F12 : FPR<12, "r12", [R12]>, DwarfRegNum<[44]>;
+ def F13 : FPR<13, "r13", [R13]>, DwarfRegNum<[45]>;
+ def F14 : FPR<14, "r14", [R14]>, DwarfRegNum<[46]>;
+ def F15 : FPR<15, "r15", [R15]>, DwarfRegNum<[47]>;
+ def F16 : FPR<16, "r16", [R16]>, DwarfRegNum<[48]>;
+ def F17 : FPR<17, "r17", [R17]>, DwarfRegNum<[49]>;
+ def F18 : FPR<18, "r18", [R18]>, DwarfRegNum<[50]>;
+ def F19 : FPR<19, "r19", [R19]>, DwarfRegNum<[51]>;
+ def F20 : FPR<20, "r20", [R20]>, DwarfRegNum<[52]>;
+ def F21 : FPR<21, "r21", [R21]>, DwarfRegNum<[53]>;
+ def F22 : FPR<22, "r22", [R22]>, DwarfRegNum<[54]>;
+ def F23 : FPR<23, "r23", [R23]>, DwarfRegNum<[55]>;
+ def F24 : FPR<24, "r24", [R24]>, DwarfRegNum<[56]>;
+ def F25 : FPR<25, "r25", [R25]>, DwarfRegNum<[57]>;
+ def F26 : FPR<26, "r26", [R26]>, DwarfRegNum<[58]>;
+ def F27 : FPR<27, "r27", [R27]>, DwarfRegNum<[59]>;
+ def F28 : FPR<28, "r28", [R28]>, DwarfRegNum<[60]>;
+ def F29 : FPR<29, "r29", [R29]>, DwarfRegNum<[61]>;
+ def F30 : FPR<30, "r30", [R30]>, DwarfRegNum<[62]>;
+ def F31 : FPR<31, "r31", [R31]>, DwarfRegNum<[63]>;
+}
+
+//===----------------------------------------------------------------------===//
+// Register Classes
+//===----------------------------------------------------------------------===//
+
+def CPURegs : RegisterClass<"MBlaze", [i32], 32,
+ [
+ // Return Values and Arguments
+ R3, R4, R5, R6, R7, R8, R9, R10,
+
+ // Not preserved across procedure calls
+ R11, R12,
+
+ // Callee save
+ R20, R21, R22, R23, R24, R25, R26, R27, R28, R29, R30, R31,
+
+ // Reserved
+ R0, // Always zero
+ R1, // The stack pointer
+ R2, // Read-only small data area anchor
+ R13, // Read-write small data area anchor
+ R14, // Return address for interrupts
+ R15, // Return address for sub-routines
+ R16, // Return address for trap
+ R17, // Return address for exceptions
+ R18, // Reserved for assembler
+ R19 // The frame-pointer
+ ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ CPURegsClass::iterator
+ CPURegsClass::allocation_order_end(const MachineFunction &MF) const {
+ // The last 10 registers on the list above are reserved
+ return end()-10;
+ }
+ }];
+}
+
+def FGR32 : RegisterClass<"MBlaze", [f32], 32,
+ [
+ // Return Values and Arguments
+ F3, F4, F5, F6, F7, F8, F9, F10,
+
+ // Not preserved across procedure calls
+ F11, F12,
+
+ // Callee save
+ F20, F21, F22, F23, F24, F25, F26, F27, F28, F29, F30, F31,
+
+ // Reserved
+ F0, // Always zero
+ F1, // The stack pointer
+ F2, // Read-only small data area anchor
+ F13, // Read-write small data area anchor
+ F14, // Return address for interrupts
+ F15, // Return address for sub-routines
+ F16, // Return address for trap
+ F17, // Return address for exceptions
+ F18, // Reserved for assembler
+ F19 // The frame pointer
+ ]>
+{
+ let MethodProtos = [{
+ iterator allocation_order_end(const MachineFunction &MF) const;
+ }];
+ let MethodBodies = [{
+ FGR32Class::iterator
+ FGR32Class::allocation_order_end(const MachineFunction &MF) const {
+ // The last 10 registers on the list above are reserved
+ return end()-10;
+ }
+ }];
+}
diff --git a/lib/Target/MBlaze/MBlazeSchedule.td b/lib/Target/MBlaze/MBlazeSchedule.td
new file mode 100644
index 0000000..6a94491
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSchedule.td
@@ -0,0 +1,63 @@
+//===- MBlazeSchedule.td - MBlaze Scheduling Definitions --------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+//===----------------------------------------------------------------------===//
+// Functional units across MBlaze chips sets. Based on GCC/MBlaze backend files.
+//===----------------------------------------------------------------------===//
+def ALU : FuncUnit;
+def IMULDIV : FuncUnit;
+
+//===----------------------------------------------------------------------===//
+// Instruction Itinerary classes used for MBlaze
+//===----------------------------------------------------------------------===//
+def IIAlu : InstrItinClass;
+def IILoad : InstrItinClass;
+def IIStore : InstrItinClass;
+def IIXfer : InstrItinClass;
+def IIBranch : InstrItinClass;
+def IIHiLo : InstrItinClass;
+def IIImul : InstrItinClass;
+def IIIdiv : InstrItinClass;
+def IIFcvt : InstrItinClass;
+def IIFmove : InstrItinClass;
+def IIFcmp : InstrItinClass;
+def IIFadd : InstrItinClass;
+def IIFmulSingle : InstrItinClass;
+def IIFmulDouble : InstrItinClass;
+def IIFdivSingle : InstrItinClass;
+def IIFdivDouble : InstrItinClass;
+def IIFsqrtSingle : InstrItinClass;
+def IIFsqrtDouble : InstrItinClass;
+def IIFrecipFsqrtStep : InstrItinClass;
+def IIPseudo : InstrItinClass;
+
+//===----------------------------------------------------------------------===//
+// MBlaze Generic instruction itineraries.
+//===----------------------------------------------------------------------===//
+def MBlazeGenericItineraries : ProcessorItineraries<[
+ InstrItinData<IIAlu , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IILoad , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIStore , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIXfer , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIBranch , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIHiLo , [InstrStage<1, [IMULDIV]>]>,
+ InstrItinData<IIImul , [InstrStage<17, [IMULDIV]>]>,
+ InstrItinData<IIIdiv , [InstrStage<38, [IMULDIV]>]>,
+ InstrItinData<IIFcvt , [InstrStage<1, [ALU]>]>,
+ InstrItinData<IIFmove , [InstrStage<2, [ALU]>]>,
+ InstrItinData<IIFcmp , [InstrStage<3, [ALU]>]>,
+ InstrItinData<IIFadd , [InstrStage<4, [ALU]>]>,
+ InstrItinData<IIFmulSingle , [InstrStage<7, [ALU]>]>,
+ InstrItinData<IIFmulDouble , [InstrStage<8, [ALU]>]>,
+ InstrItinData<IIFdivSingle , [InstrStage<23, [ALU]>]>,
+ InstrItinData<IIFdivDouble , [InstrStage<36, [ALU]>]>,
+ InstrItinData<IIFsqrtSingle , [InstrStage<54, [ALU]>]>,
+ InstrItinData<IIFsqrtDouble , [InstrStage<12, [ALU]>]>,
+ InstrItinData<IIFrecipFsqrtStep , [InstrStage<5, [ALU]>]>
+]>;
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.cpp b/lib/Target/MBlaze/MBlazeSubtarget.cpp
new file mode 100644
index 0000000..3440521
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSubtarget.cpp
@@ -0,0 +1,31 @@
+//===- MBlazeSubtarget.cpp - MBlaze Subtarget Information -------*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MBlaze specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeSubtarget.h"
+#include "MBlaze.h"
+#include "MBlazeGenSubtarget.inc"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+MBlazeSubtarget::MBlazeSubtarget(const std::string &TT, const std::string &FS):
+ HasPipe3(false), HasBarrel(false), HasDiv(false), HasMul(false),
+ HasFSL(false), HasEFSL(false), HasMSRSet(false), HasException(false),
+ HasPatCmp(false), HasFPU(false), HasESR(false), HasPVR(false),
+ HasMul64(false), HasSqrt(false), HasMMU(false)
+{
+ std::string CPU = "v400";
+ MBlazeArchVersion = V400;
+
+ // Parse features string.
+ ParseSubtargetFeatures(FS, CPU);
+}
diff --git a/lib/Target/MBlaze/MBlazeSubtarget.h b/lib/Target/MBlaze/MBlazeSubtarget.h
new file mode 100644
index 0000000..bebb3f7
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeSubtarget.h
@@ -0,0 +1,79 @@
+//=====-- MBlazeSubtarget.h - Define Subtarget for the MBlaze -*- C++ -*--====//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetSubtarget.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZESUBTARGET_H
+#define MBLAZESUBTARGET_H
+
+#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Target/TargetMachine.h"
+
+#include <string>
+
+namespace llvm {
+
+class MBlazeSubtarget : public TargetSubtarget {
+
+protected:
+
+ enum MBlazeArchEnum {
+ V400, V500, V600, V700, V710
+ };
+
+ // MBlaze architecture version
+ MBlazeArchEnum MBlazeArchVersion;
+
+ bool HasPipe3;
+ bool HasBarrel;
+ bool HasDiv;
+ bool HasMul;
+ bool HasFSL;
+ bool HasEFSL;
+ bool HasMSRSet;
+ bool HasException;
+ bool HasPatCmp;
+ bool HasFPU;
+ bool HasESR;
+ bool HasPVR;
+ bool HasMul64;
+ bool HasSqrt;
+ bool HasMMU;
+
+ InstrItineraryData InstrItins;
+
+public:
+
+ /// This constructor initializes the data members to match that
+ /// of the specified triple.
+ MBlazeSubtarget(const std::string &TT, const std::string &FS);
+
+ /// ParseSubtargetFeatures - Parses features string setting specified
+ /// subtarget options. Definition of function is auto generated by tblgen.
+ std::string ParseSubtargetFeatures(const std::string &FS,
+ const std::string &CPU);
+
+ bool hasFPU() const { return HasFPU; }
+ bool hasSqrt() const { return HasSqrt; }
+ bool hasMul() const { return HasMul; }
+ bool hasMul64() const { return HasMul64; }
+ bool hasDiv() const { return HasDiv; }
+ bool hasBarrel() const { return HasBarrel; }
+
+ bool isV400() const { return MBlazeArchVersion == V400; }
+ bool isV500() const { return MBlazeArchVersion == V500; }
+ bool isV600() const { return MBlazeArchVersion == V600; }
+ bool isV700() const { return MBlazeArchVersion == V700; }
+ bool isV710() const { return MBlazeArchVersion == V710; }
+};
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.cpp b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
new file mode 100644
index 0000000..9eba2b3
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.cpp
@@ -0,0 +1,66 @@
+//===-- MBlazeTargetMachine.cpp - Define TargetMachine for MBlaze ---------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// Implements the info about MBlaze target spec.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "MBlazeMCAsmInfo.h"
+#include "MBlazeTargetMachine.h"
+#include "llvm/PassManager.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+extern "C" void LLVMInitializeMBlazeTarget() {
+ // Register the target.
+ RegisterTargetMachine<MBlazeTargetMachine> X(TheMBlazeTarget);
+ RegisterAsmInfo<MBlazeMCAsmInfo> A(TheMBlazeTarget);
+}
+
+// DataLayout --> Big-endian, 32-bit pointer/ABI/alignment
+// The stack is always 8 byte aligned
+// On function prologue, the stack is created by decrementing
+// its pointer. Once decremented, all references are done with positive
+// offset from the stack/frame pointer, using StackGrowsUp enables
+// an easier handling.
+MBlazeTargetMachine::
+MBlazeTargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS):
+ LLVMTargetMachine(T, TT),
+ Subtarget(TT, FS),
+ DataLayout("E-p:32:32-i8:8:8-i16:16:16-i64:32:32-"
+ "f64:32:32-v64:32:32-v128:32:32-n32"),
+ InstrInfo(*this),
+ FrameInfo(TargetFrameInfo::StackGrowsUp, 8, 0),
+ TLInfo(*this) {
+ if (getRelocationModel() == Reloc::Default) {
+ setRelocationModel(Reloc::Static);
+ }
+
+ if (getCodeModel() == CodeModel::Default)
+ setCodeModel(CodeModel::Small);
+}
+
+// Install an instruction selector pass using
+// the ISelDag to gen MBlaze code.
+bool MBlazeTargetMachine::
+addInstSelector(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ PM.add(createMBlazeISelDag(*this));
+ return false;
+}
+
+// Implemented by targets that want to run passes immediately before
+// machine code is emitted. return true if -print-machineinstrs should
+// print out the code after the passes.
+bool MBlazeTargetMachine::
+addPreEmitPass(PassManagerBase &PM, CodeGenOpt::Level OptLevel) {
+ PM.add(createMBlazeDelaySlotFillerPass(*this));
+ return true;
+}
diff --git a/lib/Target/MBlaze/MBlazeTargetMachine.h b/lib/Target/MBlaze/MBlazeTargetMachine.h
new file mode 100644
index 0000000..85c975c
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetMachine.h
@@ -0,0 +1,69 @@
+//===-- MBlazeTargetMachine.h - Define TargetMachine for MBlaze --- C++ ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares the MBlaze specific subclass of TargetMachine.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef MBLAZE_TARGETMACHINE_H
+#define MBLAZE_TARGETMACHINE_H
+
+#include "MBlazeSubtarget.h"
+#include "MBlazeInstrInfo.h"
+#include "MBlazeISelLowering.h"
+#include "MBlazeIntrinsicInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
+
+namespace llvm {
+ class formatted_raw_ostream;
+
+ class MBlazeTargetMachine : public LLVMTargetMachine {
+ MBlazeSubtarget Subtarget;
+ const TargetData DataLayout; // Calculates type size & alignment
+ MBlazeInstrInfo InstrInfo;
+ TargetFrameInfo FrameInfo;
+ MBlazeTargetLowering TLInfo;
+ MBlazeIntrinsicInfo IntrinsicInfo;
+ public:
+ MBlazeTargetMachine(const Target &T, const std::string &TT,
+ const std::string &FS);
+
+ virtual const MBlazeInstrInfo *getInstrInfo() const
+ { return &InstrInfo; }
+
+ virtual const TargetFrameInfo *getFrameInfo() const
+ { return &FrameInfo; }
+
+ virtual const MBlazeSubtarget *getSubtargetImpl() const
+ { return &Subtarget; }
+
+ virtual const TargetData *getTargetData() const
+ { return &DataLayout;}
+
+ virtual const MBlazeRegisterInfo *getRegisterInfo() const
+ { return &InstrInfo.getRegisterInfo(); }
+
+ virtual MBlazeTargetLowering *getTargetLowering() const
+ { return const_cast<MBlazeTargetLowering*>(&TLInfo); }
+
+ const TargetIntrinsicInfo *getIntrinsicInfo() const
+ { return &IntrinsicInfo; }
+
+ // Pass Pipeline Configuration
+ virtual bool addInstSelector(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+
+ virtual bool addPreEmitPass(PassManagerBase &PM,
+ CodeGenOpt::Level OptLevel);
+ };
+} // End llvm namespace
+
+#endif
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
new file mode 100644
index 0000000..79c9494
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.cpp
@@ -0,0 +1,88 @@
+//===-- MBlazeTargetObjectFile.cpp - MBlaze object files ------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlazeTargetObjectFile.h"
+#include "MBlazeSubtarget.h"
+#include "llvm/DerivedTypes.h"
+#include "llvm/GlobalVariable.h"
+#include "llvm/MC/MCSectionELF.h"
+#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Support/CommandLine.h"
+using namespace llvm;
+
+void MBlazeTargetObjectFile::
+Initialize(MCContext &Ctx, const TargetMachine &TM) {
+ TargetLoweringObjectFileELF::Initialize(Ctx, TM);
+
+ SmallDataSection =
+ getELFSection(".sdata", MCSectionELF::SHT_PROGBITS,
+ MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+ SectionKind::getDataRel());
+
+ SmallBSSSection =
+ getELFSection(".sbss", MCSectionELF::SHT_NOBITS,
+ MCSectionELF::SHF_WRITE | MCSectionELF::SHF_ALLOC,
+ SectionKind::getBSS());
+
+}
+
+// A address must be loaded from a small section if its size is less than the
+// small section size threshold. Data in this section must be addressed using
+// gp_rel operator.
+static bool IsInSmallSection(uint64_t Size) {
+ return Size > 0 && Size <= 8;
+}
+
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM) const {
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage())
+ return false;
+
+ return IsGlobalInSmallSection(GV, TM, getKindForGlobal(GV, TM));
+}
+
+/// IsGlobalInSmallSection - Return true if this global address should be
+/// placed into small data/bss section.
+bool MBlazeTargetObjectFile::
+IsGlobalInSmallSection(const GlobalValue *GV, const TargetMachine &TM,
+ SectionKind Kind) const {
+ // Only global variables, not functions.
+ const GlobalVariable *GVA = dyn_cast<GlobalVariable>(GV);
+ if (!GVA)
+ return false;
+
+ // We can only do this for datarel or BSS objects for now.
+ if (!Kind.isBSS() && !Kind.isDataRel())
+ return false;
+
+ // If this is a internal constant string, there is a special
+ // section for it, but not in small data/bss.
+ if (Kind.isMergeable1ByteCString())
+ return false;
+
+ const Type *Ty = GV->getType()->getElementType();
+ return IsInSmallSection(TM.getTargetData()->getTypeAllocSize(Ty));
+}
+
+const MCSection *MBlazeTargetObjectFile::
+SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind,
+ Mangler *Mang, const TargetMachine &TM) const {
+ // TODO: Could also support "weak" symbols as well with ".gnu.linkonce.s.*"
+ // sections?
+
+ // Handle Small Section classification here.
+ if (Kind.isBSS() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallBSSSection;
+ if (Kind.isDataNoRel() && IsGlobalInSmallSection(GV, TM, Kind))
+ return SmallDataSection;
+
+ // Otherwise, we work the same as ELF.
+ return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang,TM);
+}
diff --git a/lib/Target/MBlaze/MBlazeTargetObjectFile.h b/lib/Target/MBlaze/MBlazeTargetObjectFile.h
new file mode 100644
index 0000000..20e7702
--- /dev/null
+++ b/lib/Target/MBlaze/MBlazeTargetObjectFile.h
@@ -0,0 +1,41 @@
+//===-- llvm/Target/MBlazeTargetObjectFile.h - MBlaze Obj. Info -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+#define LLVM_TARGET_MBLAZE_TARGETOBJECTFILE_H
+
+#include "llvm/CodeGen/TargetLoweringObjectFileImpl.h"
+
+namespace llvm {
+
+ class MBlazeTargetObjectFile : public TargetLoweringObjectFileELF {
+ const MCSection *SmallDataSection;
+ const MCSection *SmallBSSSection;
+ public:
+
+ void Initialize(MCContext &Ctx, const TargetMachine &TM);
+
+
+ /// IsGlobalInSmallSection - Return true if this global address should be
+ /// placed into small data/bss section.
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM,
+ SectionKind Kind) const;
+
+ bool IsGlobalInSmallSection(const GlobalValue *GV,
+ const TargetMachine &TM) const;
+
+ const MCSection *SelectSectionForGlobal(const GlobalValue *GV,
+ SectionKind Kind,
+ Mangler *Mang,
+ const TargetMachine &TM) const;
+ };
+} // end namespace llvm
+
+#endif
diff --git a/lib/Target/MBlaze/Makefile b/lib/Target/MBlaze/Makefile
new file mode 100644
index 0000000..19e508c
--- /dev/null
+++ b/lib/Target/MBlaze/Makefile
@@ -0,0 +1,23 @@
+##===- lib/Target/MBlaze/Makefile --------------------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../..
+LIBRARYNAME = LLVMMBlazeCodeGen
+TARGET = MBlaze
+
+# Make sure that tblgen is run, first thing.
+BUILT_SOURCES = MBlazeGenRegisterInfo.h.inc MBlazeGenRegisterNames.inc \
+ MBlazeGenRegisterInfo.inc MBlazeGenInstrNames.inc \
+ MBlazeGenInstrInfo.inc MBlazeGenAsmWriter.inc \
+ MBlazeGenDAGISel.inc MBlazeGenCallingConv.inc \
+ MBlazeGenSubtarget.inc MBlazeGenIntrinsics.inc
+
+DIRS = AsmPrinter TargetInfo
+
+include $(LEVEL)/Makefile.common
+
diff --git a/lib/Target/MBlaze/TargetInfo/CMakeLists.txt b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
new file mode 100644
index 0000000..5afb14d
--- /dev/null
+++ b/lib/Target/MBlaze/TargetInfo/CMakeLists.txt
@@ -0,0 +1,7 @@
+include_directories( ${CMAKE_CURRENT_BINARY_DIR}/.. ${CMAKE_CURRENT_SOURCE_DIR}/.. )
+
+add_llvm_library(LLVMMBlazeInfo
+ MBlazeTargetInfo.cpp
+ )
+
+add_dependencies(LLVMMBlazeInfo MBlazeCodeGenTable_gen)
diff --git a/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
new file mode 100644
index 0000000..16e01db
--- /dev/null
+++ b/lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp
@@ -0,0 +1,19 @@
+//===-- MBlazeTargetInfo.cpp - MBlaze Target Implementation ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "MBlaze.h"
+#include "llvm/Module.h"
+#include "llvm/Target/TargetRegistry.h"
+using namespace llvm;
+
+Target llvm::TheMBlazeTarget;
+
+extern "C" void LLVMInitializeMBlazeTargetInfo() {
+ RegisterTarget<Triple::mblaze> X(TheMBlazeTarget, "mblaze", "MBlaze");
+}
diff --git a/lib/Target/MBlaze/TargetInfo/Makefile b/lib/Target/MBlaze/TargetInfo/Makefile
new file mode 100644
index 0000000..fb7ea11
--- /dev/null
+++ b/lib/Target/MBlaze/TargetInfo/Makefile
@@ -0,0 +1,15 @@
+##===- lib/Target/MBlaze/TargetInfo/Makefile ---------------*- Makefile -*-===##
+#
+# The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+LEVEL = ../../../..
+LIBRARYNAME = LLVMMBlazeInfo
+
+# Hack: we need to include 'main' target directory to grab private headers
+CPPFLAGS = -I$(PROJ_OBJ_DIR)/.. -I$(PROJ_SRC_DIR)/..
+
+include $(LEVEL)/Makefile.common
diff --git a/lib/Target/MSIL/MSILWriter.cpp b/lib/Target/MSIL/MSILWriter.cpp
index 3330d09..ac41cc8 100644
--- a/lib/Target/MSIL/MSILWriter.cpp
+++ b/lib/Target/MSIL/MSILWriter.cpp
@@ -38,7 +38,8 @@ namespace llvm {
virtual bool addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &Out,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel);
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify);
virtual const TargetData *getTargetData() const { return 0; }
};
@@ -57,7 +58,7 @@ bool MSILModule::runOnModule(Module &M) {
TypeSymbolTable& Table = M.getTypeSymbolTable();
std::set<const Type *> Types = getAnalysis<FindUsedTypes>().getTypes();
for (TypeSymbolTable::iterator I = Table.begin(), E = Table.end(); I!=E; ) {
- if (!isa<StructType>(I->second) && !isa<OpaqueType>(I->second))
+ if (!I->second->isStructTy() && !I->second->isOpaqueTy())
Table.remove(I++);
else {
std::set<const Type *>::iterator T = Types.find(I->second);
@@ -1459,7 +1460,7 @@ void MSILWriter::printDeclarations(const TypeSymbolTable& ST) {
for (std::set<const Type*>::const_iterator
UI = UsedTypes->begin(), UE = UsedTypes->end(); UI!=UE; ++UI) {
const Type* Ty = *UI;
- if (isa<ArrayType>(Ty) || isa<VectorType>(Ty) || isa<StructType>(Ty))
+ if (Ty->isArrayTy() || Ty->isVectorTy() || Ty->isStructTy())
Name = getTypeName(Ty, false, true);
// Type with no need to declare.
else continue;
@@ -1688,7 +1689,8 @@ void MSILWriter::printExternals() {
bool MSILTarget::addPassesToEmitWholeFile(PassManager &PM,
formatted_raw_ostream &o,
CodeGenFileType FileType,
- CodeGenOpt::Level OptLevel)
+ CodeGenOpt::Level OptLevel,
+ bool DisableVerify)
{
if (FileType != TargetMachine::CGFT_AssemblyFile) return true;
MSILWriter* Writer = new MSILWriter(o);
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index a8c5e0a..911cfcb 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -26,26 +26,12 @@
#include "llvm/CodeGen/SelectionDAG.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Target/TargetLowering.h"
-#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
-#include "llvm/ADT/Statistic.h"
-
using namespace llvm;
-#ifndef NDEBUG
-static cl::opt<bool>
-ViewRMWDAGs("view-msp430-rmw-dags", cl::Hidden,
- cl::desc("Pop up a window to show isel dags after RMW preprocess"));
-#else
-static const bool ViewRMWDAGs = false;
-#endif
-
-STATISTIC(NumLoadMoved, "Number of loads moved below TokenFactor");
-
-
namespace {
struct MSP430ISelAddressMode {
enum {
@@ -123,8 +109,6 @@ namespace {
Lowering(*TM.getTargetLowering()),
Subtarget(*TM.getSubtargetImpl()) { }
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "MSP430 DAG->DAG Pattern Instruction Selection";
}
@@ -133,8 +117,6 @@ namespace {
bool MatchWrapper(SDValue N, MSP430ISelAddressMode &AM);
bool MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM);
- bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const;
-
virtual bool
SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
std::vector<SDValue> &OutOps);
@@ -143,18 +125,12 @@ namespace {
#include "MSP430GenDAGISel.inc"
private:
- DenseMap<SDNode*, SDNode*> RMWStores;
- void PreprocessForRMW();
SDNode *Select(SDNode *N);
SDNode *SelectIndexedLoad(SDNode *Op);
SDNode *SelectIndexedBinOp(SDNode *Op, SDValue N1, SDValue N2,
unsigned Opc8, unsigned Opc16);
bool SelectAddr(SDNode *Op, SDValue Addr, SDValue &Base, SDValue &Disp);
-
- #ifndef NDEBUG
- unsigned Indent;
- #endif
};
} // end anonymous namespace
@@ -216,10 +192,7 @@ bool MSP430DAGToDAGISel::MatchAddressBase(SDValue N, MSP430ISelAddressMode &AM)
}
bool MSP430DAGToDAGISel::MatchAddress(SDValue N, MSP430ISelAddressMode &AM) {
- DEBUG({
- errs() << "MatchAddress: ";
- AM.dump();
- });
+ DEBUG(errs() << "MatchAddress: "; AM.dump());
switch (N.getOpcode()) {
default: break;
@@ -335,270 +308,6 @@ SelectInlineAsmMemoryOperand(const SDValue &Op, char ConstraintCode,
return false;
}
-bool MSP430DAGToDAGISel::IsLegalToFold(SDValue N, SDNode *U,
- SDNode *Root) const {
- if (OptLevel == CodeGenOpt::None) return false;
-
- /// RMW preprocessing creates the following code:
- /// [Load1]
- /// ^ ^
- /// / |
- /// / |
- /// [Load2] |
- /// ^ ^ |
- /// | | |
- /// | \-|
- /// | |
- /// | [Op]
- /// | ^
- /// | |
- /// \ /
- /// \ /
- /// [Store]
- ///
- /// The path Store => Load2 => Load1 is via chain. Note that in general it is
- /// not allowed to fold Load1 into Op (and Store) since it will creates a
- /// cycle. However, this is perfectly legal for the loads moved below the
- /// TokenFactor by PreprocessForRMW. Query the map Store => Load1 (created
- /// during preprocessing) to determine whether it's legal to introduce such
- /// "cycle" for a moment.
- DenseMap<SDNode*, SDNode*>::const_iterator I = RMWStores.find(Root);
- if (I != RMWStores.end() && I->second == N.getNode())
- return true;
-
- // Proceed to 'generic' cycle finder code
- return SelectionDAGISel::IsLegalToFold(N, U, Root);
-}
-
-
-/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
-/// and move load below the TokenFactor. Replace store's chain operand with
-/// load's chain result.
-static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
- SDValue Store, SDValue TF) {
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
- if (Load.getNode() == TF.getOperand(i).getNode())
- Ops.push_back(Load.getOperand(0));
- else
- Ops.push_back(TF.getOperand(i));
- SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
- SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
- Load.getOperand(1),
- Load.getOperand(2));
- CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
- Store.getOperand(2), Store.getOperand(3));
-}
-
-/// MoveBelowTokenFactor2 - Replace TokenFactor operand with load's chain operand
-/// and move load below the TokenFactor. Replace store's chain operand with
-/// load's chain result. This a version which sinks two loads below token factor.
-/// Look into PreprocessForRMW comments for explanation of transform.
-static void MoveBelowTokenFactor2(SelectionDAG *CurDAG,
- SDValue Load1, SDValue Load2,
- SDValue Store, SDValue TF) {
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i) {
- SDNode* N = TF.getOperand(i).getNode();
- if (Load2.getNode() == N)
- Ops.push_back(Load2.getOperand(0));
- else if (Load1.getNode() != N)
- Ops.push_back(TF.getOperand(i));
- }
-
- SDValue NewTF = SDValue(CurDAG->MorphNodeTo(TF.getNode(),
- TF.getOpcode(),
- TF.getNode()->getVTList(),
- &Ops[0], Ops.size()), TF.getResNo());
- SDValue NewLoad2 = CurDAG->UpdateNodeOperands(Load2, NewTF,
- Load2.getOperand(1),
- Load2.getOperand(2));
-
- SDValue NewLoad1 = CurDAG->UpdateNodeOperands(Load1, NewLoad2.getValue(1),
- Load1.getOperand(1),
- Load1.getOperand(2));
-
- CurDAG->UpdateNodeOperands(Store,
- NewLoad1.getValue(1),
- Store.getOperand(1),
- Store.getOperand(2), Store.getOperand(3));
-}
-
-/// isAllowedToSink - return true if N a load which can be moved below token
-/// factor. Basically, the load should be non-volatile and has single use.
-static bool isLoadAllowedToSink(SDValue N, SDValue Chain) {
- if (N.getOpcode() == ISD::BIT_CONVERT)
- N = N.getOperand(0);
-
- LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
- if (!LD || LD->isVolatile())
- return false;
- if (LD->getAddressingMode() != ISD::UNINDEXED)
- return false;
-
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
- return false;
-
- return (N.hasOneUse() &&
- LD->hasNUsesOfValue(1, 1) &&
- LD->isOperandOf(Chain.getNode()));
-}
-
-
-/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG.
-/// The chain produced by the load must only be used by the store's chain
-/// operand, otherwise this may produce a cycle in the DAG.
-static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
- SDValue &Load) {
- if (isLoadAllowedToSink(N, Chain) &&
- N.getOperand(1) == Address) {
- Load = N;
- return true;
- }
- return false;
-}
-
-/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
-/// This is only run if not in -O0 mode.
-/// This allows the instruction selector to pick more read-modify-write
-/// instructions. This is a common case:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [Load]
-/// ^ ^
-/// | |
-/// / \-
-/// / |
-/// [TokenFactor] [Op]
-/// ^ ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-///
-/// The fact the store's chain operand != load's chain will prevent the
-/// (store (op (load))) instruction from being selected. We can transform it to:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [TokenFactor]
-/// ^
-/// |
-/// [Load]
-/// ^ ^
-/// | |
-/// | \-
-/// | |
-/// | [Op]
-/// | ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-///
-/// We also recognize the case where second operand of Op is load as well and
-/// move it below token factor as well creating DAG as follows:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [TokenFactor]
-/// ^
-/// |
-/// [Load1]
-/// ^ ^
-/// / |
-/// / |
-/// [Load2] |
-/// ^ ^ |
-/// | | |
-/// | \-|
-/// | |
-/// | [Op]
-/// | ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-///
-/// This allows selection of mem-mem instructions. Yay!
-
-void MSP430DAGToDAGISel::PreprocessForRMW() {
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
- if (!ISD::isNON_TRUNCStore(I))
- continue;
- SDValue Chain = I->getOperand(0);
-
- if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
- continue;
-
- SDValue N1 = I->getOperand(1);
- SDValue N2 = I->getOperand(2);
- if ((N1.getValueType().isFloatingPoint() &&
- !N1.getValueType().isVector()) ||
- !N1.hasOneUse())
- continue;
-
- unsigned RModW = 0;
- SDValue Load1, Load2;
- unsigned Opcode = N1.getNode()->getOpcode();
- switch (Opcode) {
- case ISD::ADD:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
- case ISD::ADDC:
- case ISD::ADDE: {
- SDValue N10 = N1.getOperand(0);
- SDValue N11 = N1.getOperand(1);
- if (isRMWLoad(N10, Chain, N2, Load1)) {
- if (isLoadAllowedToSink(N11, Chain)) {
- Load2 = N11;
- RModW = 2;
- } else
- RModW = 1;
- } else if (isRMWLoad(N11, Chain, N2, Load1)) {
- if (isLoadAllowedToSink(N10, Chain)) {
- Load2 = N10;
- RModW = 2;
- } else
- RModW = 1;
- }
- break;
- }
- case ISD::SUB:
- case ISD::SUBC:
- case ISD::SUBE: {
- SDValue N10 = N1.getOperand(0);
- SDValue N11 = N1.getOperand(1);
- if (isRMWLoad(N10, Chain, N2, Load1)) {
- if (isLoadAllowedToSink(N11, Chain)) {
- Load2 = N11;
- RModW = 2;
- } else
- RModW = 1;
- }
- break;
- }
- }
-
- NumLoadMoved += RModW;
- if (RModW == 1)
- MoveBelowTokenFactor(CurDAG, Load1, SDValue(I, 0), Chain);
- else if (RModW == 2) {
- MoveBelowTokenFactor2(CurDAG, Load1, Load2, SDValue(I, 0), Chain);
- SDNode* Store = I;
- RMWStores[Store] = Load2.getNode();
- }
- }
-}
-
-
static bool isValidIndexedLoad(const LoadSDNode *LD) {
ISD::MemIndexedMode AM = LD->getAddressingMode();
if (AM != ISD::POST_INC || LD->getExtensionType() != ISD::NON_EXTLOAD)
@@ -681,46 +390,19 @@ SDNode *MSP430DAGToDAGISel::SelectIndexedBinOp(SDNode *Op,
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void MSP430DAGToDAGISel::InstructionSelect() {
- std::string BlockName;
- if (ViewRMWDAGs)
- BlockName = MF->getFunction()->getNameStr() + ":" +
- BB->getBasicBlock()->getNameStr();
-
- PreprocessForRMW();
-
- if (ViewRMWDAGs) CurDAG->viewGraph("RMW preprocessed:" + BlockName);
-
- DEBUG(errs() << "Selection DAG after RMW preprocessing:\n");
- DEBUG(CurDAG->dump());
-
- // Codegen the basic block.
- DEBUG(errs() << "===== Instruction selection begins:\n");
- DEBUG(Indent = 0);
- SelectRoot(*CurDAG);
- DEBUG(errs() << "===== Instruction selection ends:\n");
-
- CurDAG->RemoveDeadNodes();
- RMWStores.clear();
-}
-
SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
DebugLoc dl = Node->getDebugLoc();
// Dump information about the Node being selected
- DEBUG(errs().indent(Indent) << "Selecting: ");
+ DEBUG(errs() << "Selecting: ");
DEBUG(Node->dump(CurDAG));
DEBUG(errs() << "\n");
- DEBUG(Indent += 2);
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
- DEBUG(errs().indent(Indent-2) << "== ";
+ DEBUG(errs() << "== ";
Node->dump(CurDAG);
errs() << "\n");
- DEBUG(Indent -= 2);
return NULL;
}
@@ -808,13 +490,12 @@ SDNode *MSP430DAGToDAGISel::Select(SDNode *Node) {
// Select the default instruction
SDNode *ResNode = SelectCode(Node);
- DEBUG(errs() << std::string(Indent-2, ' ') << "=> ");
+ DEBUG(errs() << "=> ");
if (ResNode == NULL || ResNode == Node)
DEBUG(Node->dump(CurDAG));
else
DEBUG(ResNode->dump(CurDAG));
DEBUG(errs() << "\n");
- DEBUG(Indent -= 2);
return ResNode;
}
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 7281b37..e6c7e1e 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -795,18 +795,15 @@ SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
if (andCC) {
// C = ~Z, thus Res = SRW & 1, no processing is required
} else {
- // Res = (SRW >> 1) & 1
+ // Res = ~((SRW >> 1) & 1)
Shift = true;
+ Invert = true;
}
break;
case MSP430CC::COND_E:
- if (andCC) {
- // C = ~Z, thus Res = ~(SRW & 1)
- } else {
- // Res = ~((SRW >> 1) & 1)
- Shift = true;
- }
- Invert = true;
+ Shift = true;
+ // C = ~Z for AND instruction, thus we can put Res = ~(SRW & 1), however,
+ // Res = (SRW >> 1) & 1 is 1 word shorter.
break;
}
EVT VT = Op.getValueType();
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index bb06f7b..144ba26 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -250,7 +250,7 @@ def MOV16ri : I16ri<0x0,
[(set GR16:$dst, imm:$src)]>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOV8rm : I8rm<0x0,
(outs GR8:$dst), (ins memsrc:$src),
"mov.b\t{$src, $dst}",
diff --git a/lib/Target/Mips/MipsISelDAGToDAG.cpp b/lib/Target/Mips/MipsISelDAGToDAG.cpp
index f1d4a67..c4746db 100644
--- a/lib/Target/Mips/MipsISelDAGToDAG.cpp
+++ b/lib/Target/Mips/MipsISelDAGToDAG.cpp
@@ -59,8 +59,6 @@ public:
SelectionDAGISel(tm),
TM(tm), Subtarget(tm.getSubtarget<MipsSubtarget>()) {}
- virtual void InstructionSelect();
-
// Pass Name
virtual const char *getPassName() const {
return "MIPS DAG->DAG Pattern Instruction Selection";
@@ -98,29 +96,10 @@ private:
inline SDValue getI32Imm(unsigned Imm) {
return CurDAG->getTargetConstant(Imm, MVT::i32);
}
-
-
- #ifndef NDEBUG
- unsigned Indent;
- #endif
};
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void MipsDAGToDAGISel::InstructionSelect() {
- // Codegen the basic block.
- DEBUG(errs() << "===== Instruction selection begins:\n");
- DEBUG(Indent = 0);
-
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
-
- DEBUG(errs() << "===== Instruction selection ends:\n");
-
- CurDAG->RemoveDeadNodes();
-}
/// getGlobalBaseReg - Output the instructions required to put the
/// GOT address into a register.
@@ -329,17 +308,11 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
DebugLoc dl = Node->getDebugLoc();
// Dump information about the Node being selected
- DEBUG(errs().indent(Indent) << "Selecting: ";
- Node->dump(CurDAG);
- errs() << "\n");
- DEBUG(Indent += 2);
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
- DEBUG(errs().indent(Indent-2) << "== ";
- Node->dump(CurDAG);
- errs() << "\n");
- DEBUG(Indent -= 2);
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
return NULL;
}
@@ -547,14 +520,12 @@ SDNode* MipsDAGToDAGISel::Select(SDNode *Node) {
// Select the default instruction
SDNode *ResNode = SelectCode(Node);
- DEBUG(errs().indent(Indent-2) << "=> ");
+ DEBUG(errs() << "=> ");
if (ResNode == NULL || ResNode == Node)
DEBUG(Node->dump(CurDAG));
else
DEBUG(ResNode->dump(CurDAG));
DEBUG(errs() << "\n");
- DEBUG(Indent -= 2);
-
return ResNode;
}
diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td
index f16a805..cef3697 100644
--- a/lib/Target/Mips/MipsInstrInfo.td
+++ b/lib/Target/Mips/MipsInstrInfo.td
@@ -300,9 +300,8 @@ class JumpFR<bits<6> op, bits<6> func, string instr_asm>:
// Jump and Link (Call)
let isCall=1, hasDelaySlot=1,
// All calls clobber the non-callee saved registers...
- Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
- K0, K1, F0, F1, F2, F3, F4, F5, F6, F7, F8, F9, F10, F11, F12, F13,
- F14, F15, F16, F17, F18, F19], Uses = [GP] in {
+ Defs = [AT, V0, V1, A0, A1, A2, A3, T0, T1, T2, T3, T4, T5, T6, T7, T8, T9,
+ K0, K1, D0, D1, D2, D3, D4, D5, D6, D7, D8, D9], Uses = [GP] in {
class JumpLink<bits<6> op, string instr_asm>:
FJ< op,
(outs),
@@ -593,8 +592,8 @@ def : Pat<(MipsJmpLink (i32 tglobaladdr:$dst)),
(JAL tglobaladdr:$dst)>;
def : Pat<(MipsJmpLink (i32 texternalsym:$dst)),
(JAL texternalsym:$dst)>;
-def : Pat<(MipsJmpLink CPURegs:$dst),
- (JALR CPURegs:$dst)>;
+//def : Pat<(MipsJmpLink CPURegs:$dst),
+// (JALR CPURegs:$dst)>;
// hi/lo relocs
def : Pat<(MipsHi tglobaladdr:$in), (LUi tglobaladdr:$in)>;
diff --git a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
index b015edd..44a6cc0 100644
--- a/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
+++ b/lib/Target/PIC16/AsmPrinter/PIC16AsmPrinter.cpp
@@ -158,6 +158,7 @@ bool PIC16AsmPrinter::runOnMachineFunction(MachineFunction &MF) {
// printOperand - print operand of insn.
void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
const MachineOperand &MO = MI->getOperand(opNum);
+ const Function *F = MI->getParent()->getParent()->getFunction();
switch (MO.getType()) {
case MachineOperand::MO_Register:
@@ -190,19 +191,18 @@ void PIC16AsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
}
case MachineOperand::MO_ExternalSymbol: {
const char *Sname = MO.getSymbolName();
+ std::string Printname = Sname;
- // If its a libcall name, record it to decls section.
- if (PAN::getSymbolTag(Sname) == PAN::LIBCALL)
- LibcallDecls.push_back(Sname);
-
- // Record a call to intrinsic to print the extern declaration for it.
- std::string Sym = Sname;
- if (PAN::isMemIntrinsic(Sym)) {
- Sym = PAN::addPrefix(Sym);
- LibcallDecls.push_back(createESName(Sym));
+ // Intrinsic stuff needs to be renamed if we are printing IL fn.
+ if (PAN::isIntrinsicStuff(Printname)) {
+ if (PAN::isISR(F->getSection())) {
+ Printname = PAN::Rename(Sname);
+ }
+ // Record these decls, we need to print them in asm as extern.
+ LibcallDecls.push_back(createESName(Printname));
}
- O << Sym;
+ O << Printname;
break;
}
case MachineOperand::MO_MachineBasicBlock:
@@ -248,8 +248,6 @@ void PIC16AsmPrinter::printLibcallDecls() {
for (std::list<const char*>::const_iterator I = LibcallDecls.begin();
I != LibcallDecls.end(); I++) {
O << MAI->getExternDirective() << *I << "\n";
- O << MAI->getExternDirective() << PAN::getArgsLabel(*I) << "\n";
- O << MAI->getExternDirective() << PAN::getRetvalLabel(*I) << "\n";
}
O << MAI->getCommentString() << "External decls for libcalls - END." <<"\n";
}
diff --git a/lib/Target/PIC16/PIC16ABINames.h b/lib/Target/PIC16/PIC16ABINames.h
index b0b9318..4c1a8da 100644
--- a/lib/Target/PIC16/PIC16ABINames.h
+++ b/lib/Target/PIC16/PIC16ABINames.h
@@ -178,18 +178,21 @@ namespace llvm {
return Func1 + tag;
}
+ // Get the retval label for the given function.
static std::string getRetvalLabel(const std::string &Func) {
std::string Func1 = addPrefix(Func);
std::string tag = getTagName(RET_LABEL);
return Func1 + tag;
}
+ // Get the argument label for the given function.
static std::string getArgsLabel(const std::string &Func) {
std::string Func1 = addPrefix(Func);
std::string tag = getTagName(ARGS_LABEL);
return Func1 + tag;
}
+ // Get the tempdata label for the given function.
static std::string getTempdataLabel(const std::string &Func) {
std::string Func1 = addPrefix(Func);
std::string tag = getTagName(TEMPS_LABEL);
@@ -263,6 +266,7 @@ namespace llvm {
return false;
}
+
inline static bool isMemIntrinsic (const std::string &Name) {
if (Name.compare("@memcpy") == 0 || Name.compare("@memset") == 0 ||
Name.compare("@memmove") == 0) {
@@ -272,6 +276,41 @@ namespace llvm {
return false;
}
+ // Currently names of libcalls are assigned during TargetLowering
+ // object construction. There is no provision to change the when the
+ // code for a function IL function being generated.
+ // So we have to change these names while printing assembly.
+ // We need to do that mainly for names related to intrinsics. This
+ // function returns true if a name needs to be cloned.
+ inline static bool isIntrinsicStuff(const std::string &Name) {
+ // Return true if the name contains LIBCALL marker, or a MemIntrinisc.
+ // these are mainly ARGS_LABEL, RET_LABEL, and the LIBCALL name itself.
+ if ((Name.find(getTagName(LIBCALL)) != std::string::npos)
+ || isMemIntrinsic(Name))
+ return true;
+
+ return false;
+ }
+
+ // Rename the name for IL.
+ inline static std::string Rename(const std::string &Name) {
+ std::string Newname;
+ // If its a label (LIBCALL+Func+LABEL), change it to
+ // (LIBCALL+Func+IL+LABEL).
+ TAGS id = getSymbolTag(Name);
+ if (id == ARGS_LABEL || id == RET_LABEL) {
+ std::size_t pos = Name.find(getTagName(id));
+ Newname = Name.substr(0, pos) + ".IL" + getTagName(id);
+ return Newname;
+ }
+
+ // Else, just append IL to name.
+ return Name + ".IL";
+ }
+
+
+
+
inline static bool isLocalToFunc (std::string &Func, std::string &Var) {
if (! isLocalName(Var)) return false;
@@ -338,6 +377,22 @@ namespace llvm {
inline static std::string getISRAddr(void) {
return "0x4";
}
+
+ // Returns the name of clone of a function.
+ static std::string getCloneFnName(const std::string &Func) {
+ return (Func + ".IL");
+ }
+
+ // Returns the name of clone of a variable.
+ static std::string getCloneVarName(const std::string &Fn,
+ const std::string &Var) {
+ std::string cloneVarName = Var;
+ // These vars are named like fun.auto.var.
+ // Just replace the function name, with clone function name.
+ std::string cloneFnName = getCloneFnName(Fn);
+ cloneVarName.replace(cloneVarName.find(Fn), Fn.length(), cloneFnName);
+ return cloneVarName;
+ }
}; // class PAN.
} // end namespace llvm;
diff --git a/lib/Target/PIC16/PIC16DebugInfo.cpp b/lib/Target/PIC16/PIC16DebugInfo.cpp
index c517b1b..877e4ff 100644
--- a/lib/Target/PIC16/PIC16DebugInfo.cpp
+++ b/lib/Target/PIC16/PIC16DebugInfo.cpp
@@ -419,7 +419,7 @@ void PIC16DbgInfo::EmitAuxEntry(const std::string VarName, int Aux[], int Num,
if (TagName != "")
O << ", " << TagName;
for (int i = 0; i<Num; i++)
- O << "," << Aux[i];
+ O << "," << (Aux[i] && 0xff);
}
/// EmitSymbol - Emit .def for a symbol. Value is offset for the member.
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
index 82197ae..6cbd002 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.cpp
@@ -14,10 +14,7 @@
#define DEBUG_TYPE "pic16-isel"
#include "llvm/Support/ErrorHandling.h"
-#include "llvm/Support/raw_ostream.h"
#include "PIC16ISelDAGToDAG.h"
-#include "llvm/Support/Debug.h"
-
using namespace llvm;
/// createPIC16ISelDag - This pass converts a legalized DAG into a
@@ -27,13 +24,6 @@ FunctionPass *llvm::createPIC16ISelDag(PIC16TargetMachine &TM) {
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void PIC16DAGToDAGISel::InstructionSelect() {
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
/// Select - Select instructions not customized! Used for
/// expanded, promoted and normal instructions.
SDNode* PIC16DAGToDAGISel::Select(SDNode *N) {
diff --git a/lib/Target/PIC16/PIC16ISelDAGToDAG.h b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
index 813a540..8ed5bf7 100644
--- a/lib/Target/PIC16/PIC16ISelDAGToDAG.h
+++ b/lib/Target/PIC16/PIC16ISelDAGToDAG.h
@@ -19,6 +19,8 @@
#include "PIC16TargetMachine.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/Support/Compiler.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Debug.h"
#include "llvm/Intrinsics.h"
using namespace llvm;
@@ -46,8 +48,6 @@ public:
return "PIC16 DAG->DAG Pattern Instruction Selection";
}
- virtual void InstructionSelect();
-
private:
// Include the pieces autogenerated from the target description.
#include "PIC16GenDAGISel.inc"
diff --git a/lib/Target/PIC16/PIC16ISelLowering.cpp b/lib/Target/PIC16/PIC16ISelLowering.cpp
index d2fc8db..d17abb9 100644
--- a/lib/Target/PIC16/PIC16ISelLowering.cpp
+++ b/lib/Target/PIC16/PIC16ISelLowering.cpp
@@ -419,8 +419,7 @@ PIC16TargetLowering::MakePIC16Libcall(PIC16ISD::PIC16Libcall Call,
LowerCallTo(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false,
false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
- Callee, Args, DAG, dl,
- DAG.GetOrdering(DAG.getEntryNode().getNode()));
+ Callee, Args, DAG, dl);
return CallInfo.first;
}
@@ -1527,10 +1526,24 @@ bool PIC16TargetLowering::NeedToConvertToMemOp(SDValue Op, unsigned &MemOp,
return true;
if (isDirectLoad(Op.getOperand(1))) {
- if (Op.getOperand(1).hasOneUse())
- return false;
- else
- MemOp = 1;
+ if (Op.getOperand(1).hasOneUse()) {
+ // Legal and profitable folding check uses the NodeId of DAG nodes.
+ // This NodeId is assigned by topological order. Therefore first
+ // assign topological order then perform legal and profitable check.
+ // Note:- Though this ordering is done before begining with legalization,
+ // newly added node during legalization process have NodeId=-1 (NewNode)
+ // therefore before performing any check proper ordering of the node is
+ // required.
+ DAG.AssignTopologicalOrder();
+
+ // Direct load operands are folded in binary operations. But before folding
+ // verify if this folding is legal. Fold only if it is legal otherwise
+ // convert this direct load to a separate memory operation.
+ if(ISel->IsLegalToFold(Op.getOperand(1), Op.getNode(), Op.getNode()))
+ return false;
+ else
+ MemOp = 1;
+ }
}
return true;
}
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
new file mode 100644
index 0000000..865da35
--- /dev/null
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.cpp
@@ -0,0 +1,299 @@
+//===-- PIC16Cloner.cpp - PIC16 LLVM Cloner for shared functions -*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains code to clone all functions that are shared between
+// the main line code (ML) and interrupt line code (IL). It clones all such
+// shared functions and their automatic global vars by adding the .IL suffix.
+//
+// This pass is supposed to be run on the linked .bc module.
+// It traveses the module call graph twice. Once starting from the main function
+// and marking each reached function as "ML". Again, starting from the ISR
+// and cloning any reachable function that was marked as "ML". After cloning
+// the function, it remaps all the call sites in IL functions to call the
+// cloned functions.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/CallGraph.h"
+#include "llvm/Pass.h"
+#include "llvm/Module.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/Utils/Cloning.h"
+#include "PIC16Cloner.h"
+#include "../PIC16ABINames.h"
+#include <vector>
+
+using namespace llvm;
+using std::vector;
+using std::string;
+using std::map;
+
+namespace llvm {
+ char PIC16Cloner::ID = 0;
+
+ ModulePass *createPIC16ClonerPass() { return new PIC16Cloner(); }
+}
+
+// We currently intend to run these passes in opt, which does not have any
+// diagnostic support. So use these functions for now. In future
+// we will probably write our own driver tool.
+//
+void PIC16Cloner::reportError(string ErrorString) {
+ errs() << "ERROR : " << ErrorString << "\n";
+ exit(1);
+}
+
+void PIC16Cloner::
+reportError (string ErrorString, vector<string> &Values) {
+ unsigned ValCount = Values.size();
+ string TargetString;
+ for (unsigned i=0; i<ValCount; ++i) {
+ TargetString = "%";
+ TargetString += ((char)i + '0');
+ ErrorString.replace(ErrorString.find(TargetString), TargetString.length(),
+ Values[i]);
+ }
+ errs() << "ERROR : " << ErrorString << "\n";
+ exit(1);
+}
+
+
+// Entry point
+//
+bool PIC16Cloner::runOnModule(Module &M) {
+ CallGraph &CG = getAnalysis<CallGraph>();
+
+ // Search for the "main" and "ISR" functions.
+ CallGraphNode *mainCGN = NULL, *isrCGN = NULL;
+ for (CallGraph::iterator it = CG.begin() ; it != CG.end(); it++)
+ {
+ // External calling node doesn't have any function associated with it.
+ if (! it->first)
+ continue;
+
+ if (it->first->getName().str() == "main") {
+ mainCGN = it->second;
+ }
+
+ if (PAN::isISR(it->first->getSection())) {
+ isrCGN = it->second;
+ }
+
+ // Don't search further if we've found both.
+ if (mainCGN && isrCGN)
+ break;
+ }
+
+ // We have nothing to do if any of the main or ISR is missing.
+ if (! mainCGN || ! isrCGN) return false;
+
+ // Time for some diagnostics.
+ // See if the main itself is interrupt function then report an error.
+ if (PAN::isISR(mainCGN->getFunction()->getSection())) {
+ reportError("Function 'main' can't be interrupt function");
+ }
+
+
+ // Mark all reachable functions from main as ML.
+ markCallGraph(mainCGN, "ML");
+
+ // And then all the functions reachable from ISR will be cloned.
+ cloneSharedFunctions(isrCGN);
+
+ return true;
+}
+
+// Mark all reachable functions from the given node, with the given mark.
+//
+void PIC16Cloner::markCallGraph(CallGraphNode *CGN, string StringMark) {
+ // Mark the top node first.
+ Function *thisF = CGN->getFunction();
+
+ thisF->setSection(StringMark);
+
+ // Mark all the called functions
+ for(CallGraphNode::iterator cgn_it = CGN->begin();
+ cgn_it != CGN->end(); ++cgn_it) {
+ Function *CalledF = cgn_it->second->getFunction();
+
+ // If calling an external function then CallGraphNode
+ // will not be associated with any function.
+ if (! CalledF)
+ continue;
+
+ // Issue diagnostic if interrupt function is being called.
+ if (PAN::isISR(CalledF->getSection())) {
+ vector<string> Values;
+ Values.push_back(CalledF->getName().str());
+ reportError("Interrupt function (%0) can't be called", Values);
+ }
+
+ // Has already been mark
+ if (CalledF->getSection().find(StringMark) != string::npos) {
+ // Should we do anything here?
+ } else {
+ // Mark now
+ CalledF->setSection(StringMark);
+ }
+
+ // Before going any further mark all the called function by current
+ // function.
+ markCallGraph(cgn_it->second ,StringMark);
+ } // end of loop of all called functions.
+}
+
+
+// For PIC16, automatic variables of a function are emitted as globals.
+// Clone the auto variables of a function and put them in ValueMap,
+// this ValueMap will be used while
+// Cloning the code of function itself.
+//
+void PIC16Cloner::CloneAutos(Function *F) {
+ // We'll need to update module's globals list as well. So keep a reference
+ // handy.
+ Module *M = F->getParent();
+ Module::GlobalListType &Globals = M->getGlobalList();
+
+ // Clear the leftovers in ValueMap by any previous cloning.
+ ValueMap.clear();
+
+ // Find the auto globls for this function and clone them, and put them
+ // in ValueMap.
+ std::string FnName = F->getName().str();
+ std::string VarName, ClonedVarName;
+ for (Module::global_iterator I = M->global_begin(), E = M->global_end();
+ I != E; ++I) {
+ VarName = I->getName().str();
+ if (PAN::isLocalToFunc(FnName, VarName)) {
+ // Auto variable for current function found. Clone it.
+ GlobalVariable *GV = I;
+
+ const Type *InitTy = GV->getInitializer()->getType();
+ GlobalVariable *ClonedGV =
+ new GlobalVariable(InitTy, false, GV->getLinkage(),
+ GV->getInitializer());
+ ClonedGV->setName(PAN::getCloneVarName(FnName, VarName));
+ // Add these new globals to module's globals list.
+ Globals.push_back(ClonedGV);
+
+ // Update ValueMap.
+ ValueMap[GV] = ClonedGV;
+ }
+ }
+}
+
+
+// Clone all functions that are reachable from ISR and are already
+// marked as ML.
+//
+void PIC16Cloner::cloneSharedFunctions(CallGraphNode *CGN) {
+
+ // Check all the called functions from ISR.
+ for(CallGraphNode::iterator cgn_it = CGN->begin();
+ cgn_it != CGN->end(); ++cgn_it) {
+ Function *CalledF = cgn_it->second->getFunction();
+
+ // If calling an external function then CallGraphNode
+ // will not be associated with any function.
+ if (!CalledF)
+ continue;
+
+ // Issue diagnostic if interrupt function is being called.
+ if (PAN::isISR(CalledF->getSection())) {
+ vector<string> Values;
+ Values.push_back(CalledF->getName().str());
+ reportError("Interrupt function (%0) can't be called", Values);
+ }
+
+ if (CalledF->getSection().find("ML") != string::npos) {
+ // Function is alternatively marked. It should be a shared one.
+ // Create IL copy. Passing called function as first argument
+ // and the caller as the second argument.
+
+ // Before making IL copy, first ensure that this function has a
+ // body. If the function does have a body. It can't be cloned.
+ // Such a case may occur when the function has been declarated
+ // in the C source code but its body exists in assembly file.
+ if (!CalledF->isDeclaration()) {
+ Function *cf = cloneFunction(CalledF);
+ remapAllSites(CGN->getFunction(), CalledF, cf);
+ } else {
+ // It is called only from ISR. Still mark it as we need this info
+ // in code gen while calling intrinsics.Function is not marked.
+ CalledF->setSection("IL");
+ }
+ }
+ // Before going any further clone all the shared function reachaable
+ // by current function.
+ cloneSharedFunctions(cgn_it->second);
+ } // end of loop of all called functions.
+}
+
+// Clone the given function and return it.
+// Note: it uses the ValueMap member of the class, which is already populated
+// by cloneAutos by the time we reach here.
+// FIXME: Should we just pass ValueMap's ref as a parameter here? rather
+// than keeping the ValueMap as a member.
+Function *
+PIC16Cloner::cloneFunction(Function *OrgF) {
+ Function *ClonedF;
+
+ // See if we already cloned it. Return that.
+ cloned_map_iterator cm_it = ClonedFunctionMap.find(OrgF);
+ if(cm_it != ClonedFunctionMap.end()) {
+ ClonedF = cm_it->second;
+ return ClonedF;
+ }
+
+ // Clone does not exist.
+ // First clone the autos, and populate ValueMap.
+ CloneAutos(OrgF);
+
+ // Now create the clone.
+ ClonedF = CloneFunction(OrgF, ValueMap);
+
+ // The new function should be for interrupt line. Therefore should have
+ // the name suffixed with IL and section attribute marked with IL.
+ ClonedF->setName(PAN::getCloneFnName(OrgF->getName()));
+ ClonedF->setSection("IL");
+
+ // Add the newly created function to the module.
+ OrgF->getParent()->getFunctionList().push_back(ClonedF);
+
+ // Update the ClonedFunctionMap to record this cloning activity.
+ ClonedFunctionMap[OrgF] = ClonedF;
+
+ return ClonedF;
+}
+
+
+// Remap the call sites of shared functions, that are in IL.
+// Change the IL call site of a shared function to its clone.
+//
+void PIC16Cloner::
+remapAllSites(Function *Caller, Function *OrgF, Function *Clone) {
+ // First find the caller to update. If the caller itself is cloned
+ // then use the cloned caller. Otherwise use it.
+ cloned_map_iterator cm_it = ClonedFunctionMap.find(Caller);
+ if (cm_it != ClonedFunctionMap.end())
+ Caller = cm_it->second;
+
+ // For the lack of a better call site finding mechanism, iterate over
+ // all insns to find the uses of original fn.
+ for (Function::iterator BI = Caller->begin(); BI != Caller->end(); ++BI) {
+ BasicBlock &BB = *BI;
+ for (BasicBlock::iterator II = BB.begin(); II != BB.end(); ++II) {
+ if (II->getNumOperands() > 0 && II->getOperand(0) == OrgF)
+ II->setOperand(0, Clone);
+ }
+ }
+}
+
+
+
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
new file mode 100644
index 0000000..24c1152
--- /dev/null
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Cloner.h
@@ -0,0 +1,83 @@
+//===-- PIC16Cloner.h - PIC16 LLVM Cloner for shared functions --*- C++ -*-===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file contains declaration of a cloner class clone all functions that
+// are shared between the main line code (ML) and interrupt line code (IL).
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef PIC16CLONER_H
+#define PIC16CLONER_H
+
+#include "llvm/ADT/DenseMap.h"
+
+using namespace llvm;
+using std::vector;
+using std::string;
+using std::map;
+
+namespace llvm {
+ // forward classes.
+ class Value;
+ class Function;
+ class Module;
+ class ModulePass;
+ class CallGraph;
+ class CallGraphNode;
+ class AnalysisUsage;
+
+ class PIC16Cloner : public ModulePass {
+ public:
+ static char ID; // Class identification
+ PIC16Cloner() : ModulePass(&ID) {}
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.addRequired<CallGraph>();
+ }
+ virtual bool runOnModule(Module &M);
+
+ private: // Functions
+ // Mark reachable functions for the MainLine or InterruptLine.
+ void markCallGraph(CallGraphNode *CGN, string StringMark);
+
+ // Clone auto variables of function specified.
+ void CloneAutos(Function *F);
+
+ // Clone the body of a function.
+ Function *cloneFunction(Function *F);
+
+ // Clone all shared functions.
+ void cloneSharedFunctions(CallGraphNode *isrCGN);
+
+ // Remap all call sites to the shared function.
+ void remapAllSites(Function *Caller, Function *OrgF, Function *Clone);
+
+ // Error reporting for PIC16Pass
+ void reportError(string ErrorString, vector<string> &Values);
+ void reportError(string ErrorString);
+
+ private: //data
+ // Records if the interrupt function has already been found.
+ // If more than one interrupt function is found then an error
+ // should be thrown.
+ bool foundISR;
+
+ // This ValueMap maps the auto variables of the original functions with
+ // the corresponding cloned auto variable of the cloned function.
+ // This value map is passed during the function cloning so that all the
+ // uses of auto variables be updated properly.
+ DenseMap<const Value*, Value*> ValueMap;
+
+ // Map of a already cloned functions.
+ map<Function *, Function *> ClonedFunctionMap;
+ typedef map<Function *, Function *>::iterator cloned_map_iterator;
+ };
+} // End of anonymous namespace
+
+#endif
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
index 197c398..5ecb6aa 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.cpp
@@ -24,27 +24,27 @@
using namespace llvm;
namespace llvm {
- char PIC16FrameOverlay::ID = 0;
- ModulePass *createPIC16OverlayPass() { return new PIC16FrameOverlay(); }
+ char PIC16Overlay::ID = 0;
+ ModulePass *createPIC16OverlayPass() { return new PIC16Overlay(); }
}
-void PIC16FrameOverlay::getAnalysisUsage(AnalysisUsage &AU) const {
+void PIC16Overlay::getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
AU.addRequired<CallGraph>();
}
-void PIC16FrameOverlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) {
+void PIC16Overlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) {
// Do not set any color for external calling node.
if (Depth != 0 && CGN->getFunction()) {
unsigned Color = getColor(CGN->getFunction());
// Handle indirectly called functions
- if (Color >= PIC16Overlay::StartIndirectCallColor ||
- Depth >= PIC16Overlay::StartIndirectCallColor) {
+ if (Color >= PIC16OVERLAY::StartIndirectCallColor ||
+ Depth >= PIC16OVERLAY::StartIndirectCallColor) {
// All functions called from an indirectly called function are given
// an unique color.
- if (Color < PIC16Overlay::StartIndirectCallColor &&
- Depth >= PIC16Overlay::StartIndirectCallColor)
+ if (Color < PIC16OVERLAY::StartIndirectCallColor &&
+ Depth >= PIC16OVERLAY::StartIndirectCallColor)
setColor(CGN->getFunction(), Depth);
for (unsigned int i = 0; i < CGN->size(); i++)
@@ -65,7 +65,7 @@ void PIC16FrameOverlay::DFSTraverse(CallGraphNode *CGN, unsigned Depth) {
DFSTraverse((*CGN)[i], Depth+1);
}
-unsigned PIC16FrameOverlay::ModifyDepthForInterrupt(CallGraphNode *CGN,
+unsigned PIC16Overlay::ModifyDepthForInterrupt(CallGraphNode *CGN,
unsigned Depth) {
Function *Fn = CGN->getFunction();
@@ -81,7 +81,7 @@ unsigned PIC16FrameOverlay::ModifyDepthForInterrupt(CallGraphNode *CGN,
return Depth;
}
-void PIC16FrameOverlay::setColor(Function *Fn, unsigned Color) {
+void PIC16Overlay::setColor(Function *Fn, unsigned Color) {
std::string Section = "";
if (Fn->hasSection())
Section = Fn->getSection();
@@ -119,7 +119,7 @@ void PIC16FrameOverlay::setColor(Function *Fn, unsigned Color) {
Fn->setSection(Section);
}
-unsigned PIC16FrameOverlay::getColor(Function *Fn) {
+unsigned PIC16Overlay::getColor(Function *Fn) {
int Color = 0;
if (!Fn->hasSection())
return 0;
@@ -150,7 +150,7 @@ unsigned PIC16FrameOverlay::getColor(Function *Fn) {
return Color;
}
-bool PIC16FrameOverlay::runOnModule(Module &M) {
+bool PIC16Overlay::runOnModule(Module &M) {
CallGraph &CG = getAnalysis<CallGraph>();
CallGraphNode *ECN = CG.getExternalCallingNode();
@@ -164,7 +164,7 @@ bool PIC16FrameOverlay::runOnModule(Module &M) {
return false;
}
-void PIC16FrameOverlay::MarkIndirectlyCalledFunctions(Module &M) {
+void PIC16Overlay::MarkIndirectlyCalledFunctions(Module &M) {
// If the use of a function is not a call instruction then this
// function might be called indirectly. In that case give it
// an unique color.
diff --git a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
index d70c4e7..5a2551f 100644
--- a/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
+++ b/lib/Target/PIC16/PIC16Passes/PIC16Overlay.h
@@ -1,4 +1,4 @@
-//===-- PIC16FrameOverlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===//
+//===-- PIC16Overlay.h - Interface for PIC16 Frame Overlay -*- C++ -*-===//
//
// The LLVM Compiler Infrastructure
//
@@ -14,30 +14,35 @@
#ifndef PIC16FRAMEOVERLAY_H
#define PIC16FRAMEOVERLAY_H
-#include "llvm/Analysis/CallGraph.h"
-#include "llvm/Pass.h"
-#include "llvm/CallGraphSCCPass.h"
using std::string;
using namespace llvm;
namespace llvm {
- namespace PIC16Overlay {
+ // Forward declarations.
+ class Function;
+ class Module;
+ class ModulePass;
+ class AnalysisUsage;
+ class CallGraphNode;
+ class CallGraph;
+
+ namespace PIC16OVERLAY {
enum OverlayConsts {
StartInterruptColor = 200,
StartIndirectCallColor = 300
};
}
- class PIC16FrameOverlay : public ModulePass {
+ class PIC16Overlay : public ModulePass {
std::string OverlayStr;
unsigned InterruptDepth;
unsigned IndirectCallColor;
public:
static char ID; // Class identification
- PIC16FrameOverlay() : ModulePass(&ID) {
+ PIC16Overlay() : ModulePass(&ID) {
OverlayStr = "Overlay=";
- InterruptDepth = PIC16Overlay::StartInterruptColor;
- IndirectCallColor = PIC16Overlay::StartIndirectCallColor;
+ InterruptDepth = PIC16OVERLAY::StartInterruptColor;
+ IndirectCallColor = PIC16OVERLAY::StartIndirectCallColor;
}
virtual void getAnalysisUsage(AnalysisUsage &AU) const;
diff --git a/lib/Target/PowerPC/PPCHazardRecognizers.cpp b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
index 3a15f7e..66dfd4b 100644
--- a/lib/Target/PowerPC/PPCHazardRecognizers.cpp
+++ b/lib/Target/PowerPC/PPCHazardRecognizers.cpp
@@ -257,7 +257,7 @@ void PPCHazardRecognizer970::EmitInstruction(SUnit *SU) {
case PPC::STWX: case PPC::STWX8:
case PPC::STWUX:
case PPC::STW: case PPC::STW8:
- case PPC::STWU: case PPC::STWU8:
+ case PPC::STWU:
case PPC::STVEWX:
case PPC::STFIWX:
case PPC::STWBRX:
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index 004997f..9d79c0d 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -156,10 +156,6 @@ namespace {
SDValue BuildSDIVSequence(SDNode *N);
SDValue BuildUDIVSequence(SDNode *N);
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
-
void InsertVRSaveCode(MachineFunction &MF);
virtual const char *getPassName() const {
@@ -184,14 +180,6 @@ private:
};
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void PPCDAGToDAGISel::InstructionSelect() {
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
/// InsertVRSaveCode - Once the entire function has been instruction selected,
/// all virtual registers are created and all machine instructions are built,
/// check to see if we need to save/restore VRSAVE. If so, do it.
diff --git a/lib/Target/PowerPC/PPCISelLowering.cpp b/lib/Target/PowerPC/PPCISelLowering.cpp
index e73af56..3d81afa 100644
--- a/lib/Target/PowerPC/PPCISelLowering.cpp
+++ b/lib/Target/PowerPC/PPCISelLowering.cpp
@@ -1334,7 +1334,7 @@ SDValue PPCTargetLowering::LowerTRAMPOLINE(SDValue Op, SelectionDAG &DAG) {
false, false, false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__trampoline_setup", PtrVT),
- Args, DAG, dl, DAG.GetOrdering(Chain.getNode()));
+ Args, DAG, dl);
SDValue Ops[] =
{ CallResult.first, CallResult.second };
diff --git a/lib/Target/PowerPC/PPCInstr64Bit.td b/lib/Target/PowerPC/PPCInstr64Bit.td
index 219efb9..a0781b9 100644
--- a/lib/Target/PowerPC/PPCInstr64Bit.td
+++ b/lib/Target/PowerPC/PPCInstr64Bit.td
@@ -366,7 +366,7 @@ def ADDE8 : XOForm_1<31, 138, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
[(set G8RC:$rT, (adde G8RC:$rA, G8RC:$rB))]>;
def ADDME8 : XOForm_3<31, 234, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"addme $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (adde G8RC:$rA, immAllOnes))]>;
+ [(set G8RC:$rT, (adde G8RC:$rA, -1))]>;
def ADDZE8 : XOForm_3<31, 202, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"addze $rT, $rA", IntGeneral,
[(set G8RC:$rT, (adde G8RC:$rA, 0))]>;
@@ -375,7 +375,7 @@ def SUBFE8 : XOForm_1<31, 136, 0, (outs G8RC:$rT), (ins G8RC:$rA, G8RC:$rB),
[(set G8RC:$rT, (sube G8RC:$rB, G8RC:$rA))]>;
def SUBFME8 : XOForm_3<31, 232, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"subfme $rT, $rA", IntGeneral,
- [(set G8RC:$rT, (sube immAllOnes, G8RC:$rA))]>;
+ [(set G8RC:$rT, (sube -1, G8RC:$rA))]>;
def SUBFZE8 : XOForm_3<31, 200, 0, (outs G8RC:$rT), (ins G8RC:$rA),
"subfze $rT, $rA", IntGeneral,
[(set G8RC:$rT, (sube 0, G8RC:$rA))]>;
@@ -635,13 +635,6 @@ def STHU8 : DForm_1<45, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
(pre_truncsti16 G8RC:$rS, ptr_rc:$ptrreg,
iaddroff:$ptroff))]>,
RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-def STWU8 : DForm_1<37, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
- symbolLo:$ptroff, ptr_rc:$ptrreg),
- "stwu $rS, $ptroff($ptrreg)", LdStGeneral,
- [(set ptr_rc:$ea_res, (pre_store G8RC:$rS, ptr_rc:$ptrreg,
- iaddroff:$ptroff))]>,
- RegConstraint<"$ptrreg = $ea_res">, NoEncode<"$ea_res">;
-
def STDU : DSForm_1<62, 1, (outs ptr_rc:$ea_res), (ins G8RC:$rS,
s16immX4:$ptroff, ptr_rc:$ptrreg),
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 3db623a..9895bea 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -19,6 +19,7 @@
#include "PPCTargetMachine.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
@@ -73,8 +74,7 @@ bool PPCInstrInfo::isMoveInstr(const MachineInstr& MI,
destReg = MI.getOperand(0).getReg();
return true;
}
- } else if (oc == PPC::FMRS || oc == PPC::FMRD ||
- oc == PPC::FMRSD) { // fmr r1, r2
+ } else if (oc == PPC::FMR || oc == PPC::FMRSD) { // fmr r1, r2
assert(MI.getNumOperands() >= 2 &&
MI.getOperand(0).isReg() &&
MI.getOperand(1).isReg() &&
@@ -344,10 +344,9 @@ bool PPCInstrInfo::copyRegToReg(MachineBasicBlock &MBB,
BuildMI(MBB, MI, DL, get(PPC::OR), DestReg).addReg(SrcReg).addReg(SrcReg);
} else if (DestRC == PPC::G8RCRegisterClass) {
BuildMI(MBB, MI, DL, get(PPC::OR8), DestReg).addReg(SrcReg).addReg(SrcReg);
- } else if (DestRC == PPC::F4RCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::FMRS), DestReg).addReg(SrcReg);
- } else if (DestRC == PPC::F8RCRegisterClass) {
- BuildMI(MBB, MI, DL, get(PPC::FMRD), DestReg).addReg(SrcReg);
+ } else if (DestRC == PPC::F4RCRegisterClass ||
+ DestRC == PPC::F8RCRegisterClass) {
+ BuildMI(MBB, MI, DL, get(PPC::FMR), DestReg).addReg(SrcReg);
} else if (DestRC == PPC::CRRCRegisterClass) {
BuildMI(MBB, MI, DL, get(PPC::MCRF), DestReg).addReg(SrcReg);
} else if (DestRC == PPC::VRRCRegisterClass) {
@@ -688,33 +687,21 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
getUndefRegState(isUndef)),
FrameIndex);
}
- } else if (Opc == PPC::FMRD) {
- if (OpNum == 0) { // move -> store
- unsigned InReg = MI->getOperand(1).getReg();
- bool isKill = MI->getOperand(1).isKill();
- bool isUndef = MI->getOperand(1).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFD))
- .addReg(InReg,
- getKillRegState(isKill) |
- getUndefRegState(isUndef)),
- FrameIndex);
- } else { // move -> load
- unsigned OutReg = MI->getOperand(0).getReg();
- bool isDead = MI->getOperand(0).isDead();
- bool isUndef = MI->getOperand(0).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFD))
- .addReg(OutReg,
- RegState::Define |
- getDeadRegState(isDead) |
- getUndefRegState(isUndef)),
- FrameIndex);
- }
- } else if (Opc == PPC::FMRS) {
+ } else if (Opc == PPC::FMR || Opc == PPC::FMRSD) {
+ // The register may be F4RC or F8RC, and that determines the memory op.
+ unsigned OrigReg = MI->getOperand(OpNum).getReg();
+ // We cannot tell the register class from a physreg alone.
+ if (TargetRegisterInfo::isPhysicalRegister(OrigReg))
+ return NULL;
+ const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(OrigReg);
+ const bool is64 = RC == PPC::F8RCRegisterClass;
+
if (OpNum == 0) { // move -> store
unsigned InReg = MI->getOperand(1).getReg();
bool isKill = MI->getOperand(1).isKill();
bool isUndef = MI->getOperand(1).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::STFS))
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(),
+ get(is64 ? PPC::STFD : PPC::STFS))
.addReg(InReg,
getKillRegState(isKill) |
getUndefRegState(isUndef)),
@@ -723,7 +710,8 @@ MachineInstr *PPCInstrInfo::foldMemoryOperandImpl(MachineFunction &MF,
unsigned OutReg = MI->getOperand(0).getReg();
bool isDead = MI->getOperand(0).isDead();
bool isUndef = MI->getOperand(0).isUndef();
- NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(), get(PPC::LFS))
+ NewMI = addFrameReference(BuildMI(MF, MI->getDebugLoc(),
+ get(is64 ? PPC::LFD : PPC::LFS))
.addReg(OutReg,
RegState::Define |
getDeadRegState(isDead) |
@@ -749,7 +737,7 @@ bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
else if ((Opc == PPC::OR8 &&
MI->getOperand(1).getReg() == MI->getOperand(2).getReg()))
return true;
- else if (Opc == PPC::FMRD || Opc == PPC::FMRS)
+ else if (Opc == PPC::FMR || Opc == PPC::FMRSD)
return true;
return false;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.td b/lib/Target/PowerPC/PPCInstrInfo.td
index 842f8ee..845cd8f 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.td
+++ b/lib/Target/PowerPC/PPCInstrInfo.td
@@ -1019,20 +1019,16 @@ let Uses = [RM] in {
}
}
-/// FMR is split into 3 versions, one for 4/8 byte FP, and one for extending.
+/// FMR is split into 2 versions, one for 4/8 byte FP, and one for extending.
///
/// Note that these are defined as pseudo-ops on the PPC970 because they are
/// often coalesced away and we don't want the dispatch group builder to think
/// that they will fill slots (which could cause the load of a LSU reject to
/// sneak into a d-group with a store).
-def FMRS : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
- "fmr $frD, $frB", FPGeneral,
- []>, // (set F4RC:$frD, F4RC:$frB)
- PPC970_Unit_Pseudo;
-def FMRD : XForm_26<63, 72, (outs F8RC:$frD), (ins F8RC:$frB),
- "fmr $frD, $frB", FPGeneral,
- []>, // (set F8RC:$frD, F8RC:$frB)
- PPC970_Unit_Pseudo;
+def FMR : XForm_26<63, 72, (outs F4RC:$frD), (ins F4RC:$frB),
+ "fmr $frD, $frB", FPGeneral,
+ []>, // (set F4RC:$frD, F4RC:$frB)
+ PPC970_Unit_Pseudo;
def FMRSD : XForm_26<63, 72, (outs F8RC:$frD), (ins F4RC:$frB),
"fmr $frD, $frB", FPGeneral,
[(set F8RC:$frD, (fextend F4RC:$frB))]>,
@@ -1215,7 +1211,7 @@ def ADDE : XOForm_1<31, 138, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
[(set GPRC:$rT, (adde GPRC:$rA, GPRC:$rB))]>;
def ADDME : XOForm_3<31, 234, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"addme $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (adde GPRC:$rA, immAllOnes))]>;
+ [(set GPRC:$rT, (adde GPRC:$rA, -1))]>;
def ADDZE : XOForm_3<31, 202, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"addze $rT, $rA", IntGeneral,
[(set GPRC:$rT, (adde GPRC:$rA, 0))]>;
@@ -1224,7 +1220,7 @@ def SUBFE : XOForm_1<31, 136, 0, (outs GPRC:$rT), (ins GPRC:$rA, GPRC:$rB),
[(set GPRC:$rT, (sube GPRC:$rB, GPRC:$rA))]>;
def SUBFME : XOForm_3<31, 232, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"subfme $rT, $rA", IntGeneral,
- [(set GPRC:$rT, (sube immAllOnes, GPRC:$rA))]>;
+ [(set GPRC:$rT, (sube -1, GPRC:$rA))]>;
def SUBFZE : XOForm_3<31, 200, 0, (outs GPRC:$rT), (ins GPRC:$rA),
"subfze $rT, $rA", IntGeneral,
[(set GPRC:$rT, (sube 0, GPRC:$rA))]>;
@@ -1480,11 +1476,11 @@ def : Pat<(extloadf32 xaddr:$src),
(FMRSD (LFSX xaddr:$src))>;
// Memory barriers
-def : Pat<(membarrier (i32 imm:$ll),
- (i32 imm:$ls),
- (i32 imm:$sl),
- (i32 imm:$ss),
- (i32 imm:$device)),
+def : Pat<(membarrier (i32 imm /*ll*/),
+ (i32 imm /*ls*/),
+ (i32 imm /*sl*/),
+ (i32 imm /*ss*/),
+ (i32 imm /*device*/)),
(SYNC)>;
include "PPCInstrAltivec.td"
diff --git a/lib/Target/PowerPC/PPCRegisterInfo.td b/lib/Target/PowerPC/PPCRegisterInfo.td
index 049e893..1cb7340 100644
--- a/lib/Target/PowerPC/PPCRegisterInfo.td
+++ b/lib/Target/PowerPC/PPCRegisterInfo.td
@@ -287,10 +287,8 @@ def GPRC : RegisterClass<"PPC", [i32], 32,
GPRCClass::allocation_order_begin(const MachineFunction &MF) const {
// 32-bit SVR4 ABI: r2 is reserved for the OS.
// 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
- if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
- return begin()+1;
-
- return begin();
+ // Darwin: R2 is reserved for CR save/restore sequence.
+ return begin()+1;
}
GPRCClass::iterator
GPRCClass::allocation_order_end(const MachineFunction &MF) const {
@@ -325,10 +323,8 @@ def G8RC : RegisterClass<"PPC", [i64], 64,
G8RCClass::iterator
G8RCClass::allocation_order_begin(const MachineFunction &MF) const {
// 64-bit SVR4 ABI: r2 is reserved for the TOC pointer.
- if (!MF.getTarget().getSubtarget<PPCSubtarget>().isDarwin())
- return begin()+1;
-
- return begin();
+ // Darwin: r2 is reserved for CR save/restore sequence.
+ return begin()+1;
}
G8RCClass::iterator
G8RCClass::allocation_order_end(const MachineFunction &MF) const {
diff --git a/lib/Target/PowerPC/README.txt b/lib/Target/PowerPC/README.txt
index e49bda0..3465779 100644
--- a/lib/Target/PowerPC/README.txt
+++ b/lib/Target/PowerPC/README.txt
@@ -889,7 +889,7 @@ entry:
; recognize a more elaborate tree than a simple SETxx.
define double @test_FNEG_sel(double %A, double %B, double %C) {
- %D = sub double -0.000000e+00, %A ; <double> [#uses=1]
+ %D = fsub double -0.000000e+00, %A ; <double> [#uses=1]
%Cond = fcmp ugt double %D, -0.000000e+00 ; <i1> [#uses=1]
%E = select i1 %Cond, double %B, double %C ; <double> [#uses=1]
ret double %E
diff --git a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
index 9a2ce6b..d6b45be 100644
--- a/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
+++ b/lib/Target/Sparc/AsmPrinter/SparcAsmPrinter.cpp
@@ -56,6 +56,9 @@ namespace {
unsigned AsmVariant, const char *ExtraCode);
bool printGetPCX(const MachineInstr *MI, unsigned OpNo);
+
+ virtual bool isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const;
};
} // end of anonymous namespace
@@ -140,18 +143,19 @@ bool SparcAsmPrinter::printGetPCX(const MachineInstr *MI, unsigned opNum) {
break;
}
+ unsigned mfNum = MI->getParent()->getParent()->getFunctionNumber();
unsigned bbNum = MI->getParent()->getNumber();
- O << '\n' << ".LLGETPCH" << bbNum << ":\n";
- O << "\tcall\t.LLGETPC" << bbNum << '\n' ;
+ O << '\n' << ".LLGETPCH" << mfNum << '_' << bbNum << ":\n";
+ O << "\tcall\t.LLGETPC" << mfNum << '_' << bbNum << '\n' ;
O << "\t sethi\t"
- << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), "
+ << "%hi(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), "
<< operand << '\n' ;
- O << ".LLGETPC" << bbNum << ":\n" ;
+ O << ".LLGETPC" << mfNum << '_' << bbNum << ":\n" ;
O << "\tor\t" << operand
- << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << bbNum << ")), "
+ << ", %lo(_GLOBAL_OFFSET_TABLE_+(.-.LLGETPCH" << mfNum << '_' << bbNum << ")), "
<< operand << '\n';
O << "\tadd\t" << operand << ", %o7, " << operand << '\n';
@@ -197,6 +201,38 @@ bool SparcAsmPrinter::PrintAsmMemoryOperand(const MachineInstr *MI,
return false;
}
+/// isBlockOnlyReachableByFallthough - Return true if the basic block has
+/// exactly one predecessor and the control transfer mechanism between
+/// the predecessor and this block is a fall-through.
+/// Override AsmPrinter implementation to handle delay slots
+bool SparcAsmPrinter::isBlockOnlyReachableByFallthrough(const MachineBasicBlock *MBB)
+ const {
+ // If this is a landing pad, it isn't a fall through. If it has no preds,
+ // then nothing falls through to it.
+ if (MBB->isLandingPad() || MBB->pred_empty())
+ return false;
+
+ // If there isn't exactly one predecessor, it can't be a fall through.
+ MachineBasicBlock::const_pred_iterator PI = MBB->pred_begin(), PI2 = PI;
+ ++PI2;
+ if (PI2 != MBB->pred_end())
+ return false;
+
+ // The predecessor has to be immediately before this block.
+ const MachineBasicBlock *Pred = *PI;
+
+ if (!Pred->isLayoutSuccessor(MBB))
+ return false;
+
+ // Check if the last terminator is an unconditional branch
+ MachineBasicBlock::const_iterator I = Pred->end();
+ while( I != Pred->begin() && !(--I)->getDesc().isTerminator() )
+ ; /* Noop */
+ return I == Pred->end() || !I->getDesc().isBarrier();
+}
+
+
+
// Force static initialization.
extern "C" void LLVMInitializeSparcAsmPrinter() {
RegisterAsmPrinter<SparcAsmPrinter> X(TheSparcTarget);
diff --git a/lib/Target/Sparc/README.txt b/lib/Target/Sparc/README.txt
index cc24abf..b4991fe 100644
--- a/lib/Target/Sparc/README.txt
+++ b/lib/Target/Sparc/README.txt
@@ -56,3 +56,4 @@ int %t1(int %a, int %b) {
leaf fns.
* Fill delay slots
+* Implement JIT support
diff --git a/lib/Target/Sparc/SparcISelDAGToDAG.cpp b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
index e1b3299..a7d1805 100644
--- a/lib/Target/Sparc/SparcISelDAGToDAG.cpp
+++ b/lib/Target/Sparc/SparcISelDAGToDAG.cpp
@@ -35,7 +35,6 @@ class SparcDAGToDAGISel : public SelectionDAGISel {
/// make the right decision when generating code for different targets.
const SparcSubtarget &Subtarget;
SparcTargetMachine& TM;
- MachineBasicBlock *CurBB;
public:
explicit SparcDAGToDAGISel(SparcTargetMachine &tm)
: SelectionDAGISel(tm),
@@ -56,10 +55,6 @@ public:
char ConstraintCode,
std::vector<SDValue> &OutOps);
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "SPARC DAG->DAG Pattern Instruction Selection";
}
@@ -72,17 +67,8 @@ private:
};
} // end anonymous namespace
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void SparcDAGToDAGISel::InstructionSelect() {
- CurBB = BB;
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
- CurDAG->RemoveDeadNodes();
-}
-
SDNode* SparcDAGToDAGISel::getGlobalBaseReg() {
- MachineFunction *MF = CurBB->getParent();
+ MachineFunction *MF = BB->getParent();
unsigned GlobalBaseReg = TM.getInstrInfo()->getGlobalBaseReg(MF);
return CurDAG->getRegister(GlobalBaseReg, TLI.getPointerTy()).getNode();
}
diff --git a/lib/Target/Sparc/SparcMachineFunctionInfo.h b/lib/Target/Sparc/SparcMachineFunctionInfo.h
index e457235..56d8708 100644
--- a/lib/Target/Sparc/SparcMachineFunctionInfo.h
+++ b/lib/Target/Sparc/SparcMachineFunctionInfo.h
@@ -22,7 +22,7 @@ namespace llvm {
unsigned GlobalBaseReg;
public:
SparcMachineFunctionInfo() : GlobalBaseReg(0) {}
- SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {}
+ explicit SparcMachineFunctionInfo(MachineFunction &MF) : GlobalBaseReg(0) {}
unsigned getGlobalBaseReg() const { return GlobalBaseReg; }
void setGlobalBaseReg(unsigned Reg) { GlobalBaseReg = Reg; }
diff --git a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
index 7f0d9fb..8152e1d 100644
--- a/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
+++ b/lib/Target/SystemZ/SystemZISelDAGToDAG.cpp
@@ -100,8 +100,6 @@ namespace {
Lowering(*TM.getTargetLowering()),
Subtarget(*TM.getSubtargetImpl()) { }
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "SystemZ DAG->DAG Pattern Instruction Selection";
}
@@ -152,10 +150,6 @@ namespace {
bool MatchAddressBase(SDValue N, SystemZRRIAddressMode &AM);
bool MatchAddressRI(SDValue N, SystemZRRIAddressMode &AM,
bool is12Bit);
-
- #ifndef NDEBUG
- unsigned Indent;
- #endif
};
} // end anonymous namespace
@@ -599,35 +593,17 @@ bool SystemZDAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
return false;
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void SystemZDAGToDAGISel::InstructionSelect() {
- // Codegen the basic block.
- DEBUG(errs() << "===== Instruction selection begins:\n");
- DEBUG(Indent = 0);
- SelectRoot(*CurDAG);
- DEBUG(errs() << "===== Instruction selection ends:\n");
-
- CurDAG->RemoveDeadNodes();
-}
-
SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
EVT NVT = Node->getValueType(0);
DebugLoc dl = Node->getDebugLoc();
unsigned Opcode = Node->getOpcode();
// Dump information about the Node being selected
- DEBUG(errs().indent(Indent) << "Selecting: ";
- Node->dump(CurDAG);
- errs() << "\n");
- DEBUG(Indent += 2);
+ DEBUG(errs() << "Selecting: "; Node->dump(CurDAG); errs() << "\n");
// If we have a custom node, we already have selected!
if (Node->isMachineOpcode()) {
- DEBUG(errs().indent(Indent-2) << "== ";
- Node->dump(CurDAG);
- errs() << "\n");
- DEBUG(Indent -= 2);
+ DEBUG(errs() << "== "; Node->dump(CurDAG); errs() << "\n");
return NULL; // Already selected.
}
@@ -693,9 +669,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
MVT::i32));
ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
- DEBUG(errs().indent(Indent-2) << "=> ";
- Result->dump(CurDAG);
- errs() << "\n");
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
}
// Copy the remainder (even subreg) result, if it is needed.
@@ -708,15 +682,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
MVT::i32));
ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
- DEBUG(errs().indent(Indent-2) << "=> ";
- Result->dump(CurDAG);
- errs() << "\n");
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
}
-#ifndef NDEBUG
- Indent -= 2;
-#endif
-
return NULL;
}
case ISD::UDIVREM: {
@@ -782,9 +750,7 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
CurDAG->getTargetConstant(SubRegIdx,
MVT::i32));
ReplaceUses(SDValue(Node, 0), SDValue(Div, 0));
- DEBUG(errs().indent(Indent-2) << "=> ";
- Result->dump(CurDAG);
- errs() << "\n");
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
}
// Copy the remainder (even subreg) result, if it is needed.
@@ -796,15 +762,9 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
CurDAG->getTargetConstant(SubRegIdx,
MVT::i32));
ReplaceUses(SDValue(Node, 1), SDValue(Rem, 0));
- DEBUG(errs().indent(Indent-2) << "=> ";
- Result->dump(CurDAG);
- errs() << "\n");
+ DEBUG(errs() << "=> "; Result->dump(CurDAG); errs() << "\n");
}
-#ifndef NDEBUG
- Indent -= 2;
-#endif
-
return NULL;
}
}
@@ -812,14 +772,12 @@ SDNode *SystemZDAGToDAGISel::Select(SDNode *Node) {
// Select the default instruction
SDNode *ResNode = SelectCode(Node);
- DEBUG(errs().indent(Indent-2) << "=> ";
+ DEBUG(errs() << "=> ";
if (ResNode == NULL || ResNode == Node)
Node->dump(CurDAG);
else
ResNode->dump(CurDAG);
errs() << "\n";
);
- DEBUG(Indent -= 2);
-
return ResNode;
}
diff --git a/lib/Target/SystemZ/SystemZInstrFP.td b/lib/Target/SystemZ/SystemZInstrFP.td
index 336e20e..f46840c 100644
--- a/lib/Target/SystemZ/SystemZInstrFP.td
+++ b/lib/Target/SystemZ/SystemZInstrFP.td
@@ -58,7 +58,7 @@ def FMOV64rr : Pseudo<(outs FP64:$dst), (ins FP64:$src),
[]>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def FMOV32rm : Pseudo<(outs FP32:$dst), (ins rriaddr12:$src),
"le\t{$dst, $src}",
[(set FP32:$dst, (load rriaddr12:$src))]>;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.td b/lib/Target/SystemZ/SystemZInstrInfo.td
index 1891bba..a44f6d9 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.td
+++ b/lib/Target/SystemZ/SystemZInstrInfo.td
@@ -257,7 +257,7 @@ def MOV64rihi32 : RILI<0xEC0, (outs GR64:$dst), (ins i64imm:$src),
[(set GR64:$dst, i64hi32:$src)]>;
}
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOV32rm : RXI<0x58,
(outs GR32:$dst), (ins rriaddr12:$src),
"l\t{$dst, $src}",
diff --git a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
index e47d419..fd6e330 100644
--- a/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
+++ b/lib/Target/SystemZ/SystemZMachineFunctionInfo.h
@@ -33,7 +33,8 @@ class SystemZMachineFunctionInfo : public MachineFunctionInfo {
public:
SystemZMachineFunctionInfo() : CalleeSavedFrameSize(0) {}
- SystemZMachineFunctionInfo(MachineFunction &MF) : CalleeSavedFrameSize(0) {}
+ explicit SystemZMachineFunctionInfo(MachineFunction &MF)
+ : CalleeSavedFrameSize(0) {}
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index 295b30f..9a16808 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -580,7 +580,7 @@ const IntegerType *TargetData::getIntPtrType(LLVMContext &C) const {
uint64_t TargetData::getIndexedOffset(const Type *ptrTy, Value* const* Indices,
unsigned NumIndices) const {
const Type *Ty = ptrTy;
- assert(isa<PointerType>(Ty) && "Illegal argument for getIndexedOffset()");
+ assert(Ty->isPointerTy() && "Illegal argument for getIndexedOffset()");
uint64_t Result = 0;
generic_gep_type_iterator<Value* const*>
diff --git a/lib/Target/TargetLoweringObjectFile.cpp b/lib/Target/TargetLoweringObjectFile.cpp
index 0c105e9..82619c7 100644
--- a/lib/Target/TargetLoweringObjectFile.cpp
+++ b/lib/Target/TargetLoweringObjectFile.cpp
@@ -307,7 +307,7 @@ getSymbolForDwarfReference(const MCSymbol *Sym, MachineModuleInfo *MMI,
switch (Encoding & 0xF0) {
default:
- llvm_report_error("Do not support this DWARF encoding yet!");
+ llvm_report_error("We do not support this DWARF encoding yet!");
break;
case dwarf::DW_EH_PE_absptr:
// Do nothing special
diff --git a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
index 1a35a49..734a545 100644
--- a/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
+++ b/lib/Target/X86/AsmPrinter/X86ATTInstPrinter.cpp
@@ -73,7 +73,7 @@ void X86ATTInstPrinter::printOperand(const MCInst *MI, unsigned OpNo) {
O << '$' << Op.getImm();
if (CommentStream && (Op.getImm() > 255 || Op.getImm() < -256))
- *CommentStream << format("imm = 0x%X\n", Op.getImm());
+ *CommentStream << format("imm = 0x%llX\n", (long long)Op.getImm());
} else {
assert(Op.isExpr() && "unknown operand kind in printOperand");
diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt
index 61f26a7..eed3b45 100644
--- a/lib/Target/X86/CMakeLists.txt
+++ b/lib/Target/X86/CMakeLists.txt
@@ -15,6 +15,7 @@ tablegen(X86GenCallingConv.inc -gen-callingconv)
tablegen(X86GenSubtarget.inc -gen-subtarget)
set(sources
+ X86AsmBackend.cpp
X86CodeEmitter.cpp
X86COFFMachineModuleInfo.cpp
X86ELFWriterInfo.cpp
diff --git a/lib/Target/X86/README-SSE.txt b/lib/Target/X86/README-SSE.txt
index 19eb05e..e5f84e8 100644
--- a/lib/Target/X86/README-SSE.txt
+++ b/lib/Target/X86/README-SSE.txt
@@ -67,8 +67,8 @@ no_exit.i7: ; preds = %no_exit.i7, %build_tree.exit
[ %tmp.34.i18, %no_exit.i7 ]
%tmp.0.0.0.i10 = phi double [ 0.000000e+00, %build_tree.exit ],
[ %tmp.28.i16, %no_exit.i7 ]
- %tmp.28.i16 = add double %tmp.0.0.0.i10, 0.000000e+00
- %tmp.34.i18 = add double %tmp.0.1.0.i9, 0.000000e+00
+ %tmp.28.i16 = fadd double %tmp.0.0.0.i10, 0.000000e+00
+ %tmp.34.i18 = fadd double %tmp.0.1.0.i9, 0.000000e+00
br i1 false, label %Compute_Tree.exit23, label %no_exit.i7
Compute_Tree.exit23: ; preds = %no_exit.i7
@@ -97,7 +97,7 @@ pcmp/pand/pandn/por to do a selection instead of a conditional branch:
double %X(double %Y, double %Z, double %A, double %B) {
%C = setlt double %A, %B
- %z = add double %Z, 0.0 ;; select operand is not a load
+ %z = fadd double %Z, 0.0 ;; select operand is not a load
%D = select bool %C, double %Y, double %z
ret double %D
}
@@ -545,7 +545,7 @@ eliminates a constant pool load. For example, consider:
define i64 @ccosf(float %z.0, float %z.1) nounwind readonly {
entry:
- %tmp6 = sub float -0.000000e+00, %z.1 ; <float> [#uses=1]
+ %tmp6 = fsub float -0.000000e+00, %z.1 ; <float> [#uses=1]
%tmp20 = tail call i64 @ccoshf( float %tmp6, float %z.0 ) nounwind readonly
ret i64 %tmp20
}
diff --git a/lib/Target/X86/README.txt b/lib/Target/X86/README.txt
index 3c6138b..d4545a6 100644
--- a/lib/Target/X86/README.txt
+++ b/lib/Target/X86/README.txt
@@ -227,11 +227,6 @@ lambda, siod, optimizer-eval, ackermann, hash2, nestedloop, strcat, and Treesor.
//===---------------------------------------------------------------------===//
-Teach the coalescer to coalesce vregs of different register classes. e.g. FR32 /
-FR64 to VR128.
-
-//===---------------------------------------------------------------------===//
-
Adding to the list of cmp / test poor codegen issues:
int test(__m128 *A, __m128 *B) {
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index 1a1e447..ba0ee6c 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -19,13 +19,15 @@
namespace llvm {
-class X86TargetMachine;
class FunctionPass;
-class MachineCodeEmitter;
+class JITCodeEmitter;
+class MCAssembler;
class MCCodeEmitter;
class MCContext;
-class JITCodeEmitter;
+class MachineCodeEmitter;
class Target;
+class TargetAsmBackend;
+class X86TargetMachine;
class formatted_raw_ostream;
/// createX86ISelDag - This pass converts a legalized DAG into a
@@ -55,6 +57,9 @@ MCCodeEmitter *createX86_32MCCodeEmitter(const Target &, TargetMachine &TM,
MCCodeEmitter *createX86_64MCCodeEmitter(const Target &, TargetMachine &TM,
MCContext &Ctx);
+TargetAsmBackend *createX86_32AsmBackend(const Target &, MCAssembler &);
+TargetAsmBackend *createX86_64AsmBackend(const Target &, MCAssembler &);
+
/// createX86EmitCodeToMemory - Returns a pass that converts a register
/// allocated function into raw machine code in a dynamically
/// allocated chunk of memory.
diff --git a/lib/Target/X86/X86AsmBackend.cpp b/lib/Target/X86/X86AsmBackend.cpp
new file mode 100644
index 0000000..e6654ef
--- /dev/null
+++ b/lib/Target/X86/X86AsmBackend.cpp
@@ -0,0 +1,34 @@
+//===-- X86AsmBackend.cpp - X86 Assembler Backend -------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Target/TargetAsmBackend.h"
+#include "X86.h"
+#include "llvm/Target/TargetRegistry.h"
+#include "llvm/Target/TargetAsmBackend.h"
+using namespace llvm;
+
+namespace {
+
+class X86AsmBackend : public TargetAsmBackend {
+public:
+ X86AsmBackend(const Target &T, MCAssembler &A)
+ : TargetAsmBackend(T) {}
+};
+
+}
+
+TargetAsmBackend *llvm::createX86_32AsmBackend(const Target &T,
+ MCAssembler &A) {
+ return new X86AsmBackend(T, A);
+}
+
+TargetAsmBackend *llvm::createX86_64AsmBackend(const Target &T,
+ MCAssembler &A) {
+ return new X86AsmBackend(T, A);
+}
diff --git a/lib/Target/X86/X86FastISel.cpp b/lib/Target/X86/X86FastISel.cpp
index 69a9d60..17366ee 100644
--- a/lib/Target/X86/X86FastISel.cpp
+++ b/lib/Target/X86/X86FastISel.cpp
@@ -1161,6 +1161,8 @@ bool X86FastISel::X86VisitIntrinsicCall(IntrinsicInst &I) {
if (!X86SelectAddress(DI->getAddress(), AM))
return false;
const TargetInstrDesc &II = TII.get(TargetOpcode::DBG_VALUE);
+ // FIXME may need to add RegState::Debug to any registers produced,
+ // although ESP/EBP should be the only ones at the moment.
addFullAddress(BuildMI(MBB, DL, II), AM).addImm(0).
addMetadata(DI->getVariable());
return true;
diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp
index 7b349f6..08030e0 100644
--- a/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -12,15 +12,6 @@
//
//===----------------------------------------------------------------------===//
-// Force NDEBUG on in any optimized build on Darwin.
-//
-// FIXME: This is a huge hack, to work around ridiculously awful compile times
-// on this file with gcc-4.2 on Darwin, in Release mode.
-#if (!defined(__llvm__) && defined(__APPLE__) && \
- defined(__OPTIMIZE__) && !defined(NDEBUG))
-#define NDEBUG
-#endif
-
#define DEBUG_TYPE "x86-isel"
#include "X86.h"
#include "X86InstrBuilder.h"
@@ -177,15 +168,11 @@ namespace {
return "X86 DAG->DAG Instruction Selection";
}
- /// InstructionSelect - This callback is invoked by
- /// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
- virtual void InstructionSelect();
-
virtual void EmitFunctionEntryCode(Function &Fn, MachineFunction &MF);
virtual bool IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const;
- virtual bool IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const;
+ virtual void PreprocessISelDAG();
// Include the pieces autogenerated from the target description.
#include "X86GenDAGISel.inc"
@@ -209,18 +196,17 @@ namespace {
SDValue &Scale, SDValue &Index, SDValue &Disp);
bool SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index, SDValue &Disp);
- bool SelectScalarSSELoad(SDNode *Op, SDValue Pred,
- SDValue N, SDValue &Base, SDValue &Scale,
+ bool SelectScalarSSELoad(SDNode *Root, SDValue N,
+ SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment,
- SDValue &InChain, SDValue &OutChain);
+ SDValue &NodeWithChain);
+
bool TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment);
- void PreprocessForRMW();
- void PreprocessForFPConvert();
-
+
/// SelectInlineAsmMemoryOperand - Implement addressing mode selection for
/// inline asm expressions.
virtual bool SelectInlineAsmMemoryOperand(const SDValue &Op,
@@ -296,10 +282,6 @@ namespace {
const X86InstrInfo *getInstrInfo() {
return getTargetMachine().getInstrInfo();
}
-
-#ifndef NDEBUG
- unsigned Indent;
-#endif
};
}
@@ -367,65 +349,6 @@ X86DAGToDAGISel::IsProfitableToFold(SDValue N, SDNode *U, SDNode *Root) const {
return true;
}
-
-bool X86DAGToDAGISel::IsLegalToFold(SDValue N, SDNode *U, SDNode *Root) const {
- if (OptLevel == CodeGenOpt::None) return false;
-
- // Proceed to 'generic' cycle finder code
- return SelectionDAGISel::IsLegalToFold(N, U, Root);
-}
-
-/// MoveBelowTokenFactor - Replace TokenFactor operand with load's chain operand
-/// and move load below the TokenFactor. Replace store's chain operand with
-/// load's chain result.
-static void MoveBelowTokenFactor(SelectionDAG *CurDAG, SDValue Load,
- SDValue Store, SDValue TF) {
- SmallVector<SDValue, 4> Ops;
- for (unsigned i = 0, e = TF.getNode()->getNumOperands(); i != e; ++i)
- if (Load.getNode() == TF.getOperand(i).getNode())
- Ops.push_back(Load.getOperand(0));
- else
- Ops.push_back(TF.getOperand(i));
- SDValue NewTF = CurDAG->UpdateNodeOperands(TF, &Ops[0], Ops.size());
- SDValue NewLoad = CurDAG->UpdateNodeOperands(Load, NewTF,
- Load.getOperand(1),
- Load.getOperand(2));
- CurDAG->UpdateNodeOperands(Store, NewLoad.getValue(1), Store.getOperand(1),
- Store.getOperand(2), Store.getOperand(3));
-}
-
-/// isRMWLoad - Return true if N is a load that's part of RMW sub-DAG. The
-/// chain produced by the load must only be used by the store's chain operand,
-/// otherwise this may produce a cycle in the DAG.
-///
-static bool isRMWLoad(SDValue N, SDValue Chain, SDValue Address,
- SDValue &Load) {
- if (N.getOpcode() == ISD::BIT_CONVERT) {
- if (!N.hasOneUse())
- return false;
- N = N.getOperand(0);
- }
-
- LoadSDNode *LD = dyn_cast<LoadSDNode>(N);
- if (!LD || LD->isVolatile())
- return false;
- if (LD->getAddressingMode() != ISD::UNINDEXED)
- return false;
-
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType != ISD::NON_EXTLOAD && ExtType != ISD::EXTLOAD)
- return false;
-
- if (N.hasOneUse() &&
- LD->hasNUsesOfValue(1, 1) &&
- N.getOperand(1) == Address &&
- LD->isOperandOf(Chain.getNode())) {
- Load = N;
- return true;
- }
- return false;
-}
-
/// MoveBelowCallSeqStart - Replace CALLSEQ_START operand with load's chain
/// operand and move load below the call's chain operand.
static void MoveBelowCallSeqStart(SelectionDAG *CurDAG, SDValue Load,
@@ -489,51 +412,14 @@ static bool isCalleeLoad(SDValue Callee, SDValue &Chain) {
return false;
}
-
-/// PreprocessForRMW - Preprocess the DAG to make instruction selection better.
-/// This is only run if not in -O0 mode.
-/// This allows the instruction selector to pick more read-modify-write
-/// instructions. This is a common case:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [Load]
-/// ^ ^
-/// | |
-/// / \-
-/// / |
-/// [TokenFactor] [Op]
-/// ^ ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-///
-/// The fact the store's chain operand != load's chain will prevent the
-/// (store (op (load))) instruction from being selected. We can transform it to:
-///
-/// [Load chain]
-/// ^
-/// |
-/// [TokenFactor]
-/// ^
-/// |
-/// [Load]
-/// ^ ^
-/// | |
-/// | \-
-/// | |
-/// | [Op]
-/// | ^
-/// | |
-/// \ /
-/// \ /
-/// [Store]
-void X86DAGToDAGISel::PreprocessForRMW() {
+void X86DAGToDAGISel::PreprocessISelDAG() {
+ OptForSize = MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize);
+
for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ++I) {
- if (I->getOpcode() == X86ISD::CALL) {
+ E = CurDAG->allnodes_end(); I != E; ) {
+ SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ if (OptLevel != CodeGenOpt::None && N->getOpcode() == X86ISD::CALL) {
/// Also try moving call address load from outside callseq_start to just
/// before the call to allow it to be folded.
///
@@ -553,85 +439,23 @@ void X86DAGToDAGISel::PreprocessForRMW() {
/// \ /
/// \ /
/// [CALL]
- SDValue Chain = I->getOperand(0);
- SDValue Load = I->getOperand(1);
+ SDValue Chain = N->getOperand(0);
+ SDValue Load = N->getOperand(1);
if (!isCalleeLoad(Load, Chain))
continue;
- MoveBelowCallSeqStart(CurDAG, Load, SDValue(I, 0), Chain);
+ MoveBelowCallSeqStart(CurDAG, Load, SDValue(N, 0), Chain);
++NumLoadMoved;
continue;
}
-
- if (!ISD::isNON_TRUNCStore(I))
- continue;
- SDValue Chain = I->getOperand(0);
-
- if (Chain.getNode()->getOpcode() != ISD::TokenFactor)
- continue;
-
- SDValue N1 = I->getOperand(1);
- SDValue N2 = I->getOperand(2);
- if ((N1.getValueType().isFloatingPoint() &&
- !N1.getValueType().isVector()) ||
- !N1.hasOneUse())
- continue;
-
- bool RModW = false;
- SDValue Load;
- unsigned Opcode = N1.getNode()->getOpcode();
- switch (Opcode) {
- case ISD::ADD:
- case ISD::MUL:
- case ISD::AND:
- case ISD::OR:
- case ISD::XOR:
- case ISD::ADDC:
- case ISD::ADDE:
- case ISD::VECTOR_SHUFFLE: {
- SDValue N10 = N1.getOperand(0);
- SDValue N11 = N1.getOperand(1);
- RModW = isRMWLoad(N10, Chain, N2, Load);
- if (!RModW)
- RModW = isRMWLoad(N11, Chain, N2, Load);
- break;
- }
- case ISD::SUB:
- case ISD::SHL:
- case ISD::SRA:
- case ISD::SRL:
- case ISD::ROTL:
- case ISD::ROTR:
- case ISD::SUBC:
- case ISD::SUBE:
- case X86ISD::SHLD:
- case X86ISD::SHRD: {
- SDValue N10 = N1.getOperand(0);
- RModW = isRMWLoad(N10, Chain, N2, Load);
- break;
- }
- }
-
- if (RModW) {
- MoveBelowTokenFactor(CurDAG, Load, SDValue(I, 0), Chain);
- ++NumLoadMoved;
- checkForCycles(I);
- }
- }
-}
-
-
-/// PreprocessForFPConvert - Walk over the dag lowering fpround and fpextend
-/// nodes that target the FP stack to be store and load to the stack. This is a
-/// gross hack. We would like to simply mark these as being illegal, but when
-/// we do that, legalize produces these when it expands calls, then expands
-/// these in the same legalize pass. We would like dag combine to be able to
-/// hack on these between the call expansion and the node legalization. As such
-/// this pass basically does "really late" legalization of these inline with the
-/// X86 isel pass.
-void X86DAGToDAGISel::PreprocessForFPConvert() {
- for (SelectionDAG::allnodes_iterator I = CurDAG->allnodes_begin(),
- E = CurDAG->allnodes_end(); I != E; ) {
- SDNode *N = I++; // Preincrement iterator to avoid invalidation issues.
+
+ // Lower fpround and fpextend nodes that target the FP stack to be store and
+ // load to the stack. This is a gross hack. We would like to simply mark
+ // these as being illegal, but when we do that, legalize produces these when
+ // it expands calls, then expands these in the same legalize pass. We would
+ // like dag combine to be able to hack on these between the call expansion
+ // and the node legalization. As such this pass basically does "really
+ // late" legalization of these inline with the X86 isel pass.
+ // FIXME: This should only happen when not compiled with -O0.
if (N->getOpcode() != ISD::FP_ROUND && N->getOpcode() != ISD::FP_EXTEND)
continue;
@@ -687,30 +511,6 @@ void X86DAGToDAGISel::PreprocessForFPConvert() {
}
}
-/// InstructionSelectBasicBlock - This callback is invoked by SelectionDAGISel
-/// when it has created a SelectionDAG for us to codegen.
-void X86DAGToDAGISel::InstructionSelect() {
- const Function *F = MF->getFunction();
- OptForSize = F->hasFnAttr(Attribute::OptimizeForSize);
-
- if (OptLevel != CodeGenOpt::None)
- PreprocessForRMW();
-
- // FIXME: This should only happen when not compiled with -O0.
- PreprocessForFPConvert();
-
- // Codegen the basic block.
-#ifndef NDEBUG
- DEBUG(dbgs() << "===== Instruction selection begins:\n");
- Indent = 0;
-#endif
- SelectRoot(*CurDAG);
-#ifndef NDEBUG
- DEBUG(dbgs() << "===== Instruction selection ends:\n");
-#endif
-
- CurDAG->RemoveDeadNodes();
-}
/// EmitSpecialCodeForMain - Emit any code that needs to be executed only in
/// the main function.
@@ -1317,22 +1117,24 @@ bool X86DAGToDAGISel::SelectAddr(SDNode *Op, SDValue N, SDValue &Base,
/// SelectScalarSSELoad - Match a scalar SSE load. In particular, we want to
/// match a load whose top elements are either undef or zeros. The load flavor
/// is derived from the type of N, which is either v4f32 or v2f64.
-bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred,
+///
+/// We also return:
+/// PatternChainNode: this is the matched node that has a chain input and
+/// output.
+bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Root,
SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp, SDValue &Segment,
- SDValue &InChain,
- SDValue &OutChain) {
+ SDValue &PatternNodeWithChain) {
if (N.getOpcode() == ISD::SCALAR_TO_VECTOR) {
- InChain = N.getOperand(0).getValue(1);
- if (ISD::isNON_EXTLoad(InChain.getNode()) &&
- InChain.getValue(0).hasOneUse() &&
- IsProfitableToFold(N, Pred.getNode(), Op) &&
- IsLegalToFold(N, Pred.getNode(), Op)) {
- LoadSDNode *LD = cast<LoadSDNode>(InChain);
- if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ PatternNodeWithChain = N.getOperand(0);
+ if (ISD::isNON_EXTLoad(PatternNodeWithChain.getNode()) &&
+ PatternNodeWithChain.hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root)) {
+ LoadSDNode *LD = cast<LoadSDNode>(PatternNodeWithChain);
+ if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp,Segment))
return false;
- OutChain = LD->getChain();
return true;
}
}
@@ -1344,13 +1146,14 @@ bool X86DAGToDAGISel::SelectScalarSSELoad(SDNode *Op, SDValue Pred,
N.getOperand(0).getOpcode() == ISD::SCALAR_TO_VECTOR &&
N.getOperand(0).getNode()->hasOneUse() &&
ISD::isNON_EXTLoad(N.getOperand(0).getOperand(0).getNode()) &&
- N.getOperand(0).getOperand(0).hasOneUse()) {
+ N.getOperand(0).getOperand(0).hasOneUse() &&
+ IsProfitableToFold(N.getOperand(0), N.getNode(), Root) &&
+ IsLegalToFold(N.getOperand(0), N.getNode(), Root)) {
// Okay, this is a zero extending load. Fold it.
LoadSDNode *LD = cast<LoadSDNode>(N.getOperand(0).getOperand(0));
- if (!SelectAddr(Op, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
+ if (!SelectAddr(Root, LD->getBasePtr(), Base, Scale, Index, Disp, Segment))
return false;
- OutChain = LD->getChain();
- InChain = SDValue(LD, 1);
+ PatternNodeWithChain = SDValue(LD, 0);
return true;
}
return false;
@@ -1424,7 +1227,6 @@ bool X86DAGToDAGISel::SelectLEAAddr(SDNode *Op, SDValue N,
bool X86DAGToDAGISel::SelectTLSADDRAddr(SDNode *Op, SDValue N, SDValue &Base,
SDValue &Scale, SDValue &Index,
SDValue &Disp) {
- assert(Op->getOpcode() == X86ISD::TLSADDR);
assert(N.getOpcode() == ISD::TargetGlobalTLSAddress);
const GlobalAddressSDNode *GA = cast<GlobalAddressSDNode>(N);
@@ -1451,11 +1253,12 @@ bool X86DAGToDAGISel::TryFoldLoad(SDNode *P, SDValue N,
SDValue &Base, SDValue &Scale,
SDValue &Index, SDValue &Disp,
SDValue &Segment) {
- if (ISD::isNON_EXTLoad(N.getNode()) &&
- IsProfitableToFold(N, P, P) &&
- IsLegalToFold(N, P, P))
- return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
- return false;
+ if (!ISD::isNON_EXTLoad(N.getNode()) ||
+ !IsProfitableToFold(N, P, P) ||
+ !IsLegalToFold(N, P, P))
+ return false;
+
+ return SelectAddr(P, N.getOperand(1), Base, Scale, Index, Disp, Segment);
}
/// getGlobalBaseReg - Return an SDNode that returns the value of
@@ -1558,7 +1361,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_DEC16m;
else if (isSub) {
if (isCN) {
- if (Predicate_i16immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_SUB16mi8;
else
Opc = X86::LOCK_SUB16mi;
@@ -1566,7 +1369,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_SUB16mr;
} else {
if (isCN) {
- if (Predicate_i16immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_ADD16mi8;
else
Opc = X86::LOCK_ADD16mi;
@@ -1581,7 +1384,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_DEC32m;
else if (isSub) {
if (isCN) {
- if (Predicate_i32immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_SUB32mi8;
else
Opc = X86::LOCK_SUB32mi;
@@ -1589,7 +1392,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
Opc = X86::LOCK_SUB32mr;
} else {
if (isCN) {
- if (Predicate_i32immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_ADD32mi8;
else
Opc = X86::LOCK_ADD32mi;
@@ -1605,7 +1408,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
else if (isSub) {
Opc = X86::LOCK_SUB64mr;
if (isCN) {
- if (Predicate_i64immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_SUB64mi8;
else if (Predicate_i64immSExt32(Val.getNode()))
Opc = X86::LOCK_SUB64mi32;
@@ -1613,7 +1416,7 @@ SDNode *X86DAGToDAGISel::SelectAtomicLoadAdd(SDNode *Node, EVT NVT) {
} else {
Opc = X86::LOCK_ADD64mr;
if (isCN) {
- if (Predicate_i64immSExt8(Val.getNode()))
+ if (Predicate_immSext8(Val.getNode()))
Opc = X86::LOCK_ADD64mi8;
else if (Predicate_i64immSExt32(Val.getNode()))
Opc = X86::LOCK_ADD64mi32;
@@ -1710,24 +1513,10 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
unsigned Opcode = Node->getOpcode();
DebugLoc dl = Node->getDebugLoc();
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent, ' ') << "Selecting: ";
- Node->dump(CurDAG);
- dbgs() << '\n';
- });
- Indent += 2;
-#endif
+ DEBUG(dbgs() << "Selecting: "; Node->dump(CurDAG); dbgs() << '\n');
if (Node->isMachineOpcode()) {
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "== ";
- Node->dump(CurDAG);
- dbgs() << '\n';
- });
- Indent -= 2;
-#endif
+ DEBUG(dbgs() << "== "; Node->dump(CurDAG); dbgs() << '\n');
return NULL; // Already selected.
}
@@ -1823,13 +1612,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the high half of the result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
@@ -1852,19 +1635,9 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
InFlag = Result.getValue(2);
}
ReplaceUses(SDValue(Node, 1), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-#ifndef NDEBUG
- Indent -= 2;
-#endif
-
return NULL;
}
@@ -1979,13 +1752,7 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
LoReg, NVT, InFlag);
InFlag = Result.getValue(2);
ReplaceUses(SDValue(Node, 0), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
// Copy the remainder (high) result, if it is needed.
if (!SDValue(Node, 1).use_empty()) {
@@ -2009,19 +1776,8 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
InFlag = Result.getValue(2);
}
ReplaceUses(SDValue(Node, 1), Result);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- Result.getNode()->dump(CurDAG);
- dbgs() << '\n';
- });
-#endif
+ DEBUG(dbgs() << "=> "; Result.getNode()->dump(CurDAG); dbgs() << '\n');
}
-
-#ifndef NDEBUG
- Indent -= 2;
-#endif
-
return NULL;
}
@@ -2134,17 +1890,12 @@ SDNode *X86DAGToDAGISel::Select(SDNode *Node) {
SDNode *ResNode = SelectCode(Node);
-#ifndef NDEBUG
- DEBUG({
- dbgs() << std::string(Indent-2, ' ') << "=> ";
- if (ResNode == NULL || ResNode == Node)
- Node->dump(CurDAG);
- else
- ResNode->dump(CurDAG);
- dbgs() << '\n';
- });
- Indent -= 2;
-#endif
+ DEBUG(dbgs() << "=> ";
+ if (ResNode == NULL || ResNode == Node)
+ Node->dump(CurDAG);
+ else
+ ResNode->dump(CurDAG);
+ dbgs() << '\n');
return ResNode;
}
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 9974d8c..e2b8193 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -73,7 +73,7 @@ static TargetLoweringObjectFile *createTLOF(X86TargetMachine &TM) {
case X86Subtarget::isDarwin:
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return new X8664_MachoTargetObjectFile();
- return new X8632_MachoTargetObjectFile();
+ return new TargetLoweringObjectFileMachO();
case X86Subtarget::isELF:
if (TM.getSubtarget<X86Subtarget>().is64Bit())
return new X8664_ELFTargetObjectFile(TM);
@@ -990,6 +990,7 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setTargetDAGCombine(ISD::VECTOR_SHUFFLE);
setTargetDAGCombine(ISD::BUILD_VECTOR);
setTargetDAGCombine(ISD::SELECT);
+ setTargetDAGCombine(ISD::AND);
setTargetDAGCombine(ISD::SHL);
setTargetDAGCombine(ISD::SRA);
setTargetDAGCombine(ISD::SRL);
@@ -1743,7 +1744,7 @@ EmitTailCallStoreRetAddr(SelectionDAG & DAG, MachineFunction &MF,
// Calculate the new stack slot for the return address.
int SlotSize = Is64Bit ? 8 : 4;
int NewReturnAddrFI =
- MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, true,false);
+ MF.getFrameInfo()->CreateFixedObject(SlotSize, FPDiff-SlotSize, false, false);
EVT VT = Is64Bit ? MVT::i64 : MVT::i32;
SDValue NewRetAddrFrIdx = DAG.getFrameIndex(NewReturnAddrFI, VT);
Chain = DAG.getStore(Chain, dl, RetAddrFrIdx, NewRetAddrFrIdx,
@@ -2376,7 +2377,7 @@ SDValue X86TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
// Set up a frame object for the return address.
uint64_t SlotSize = TD->getPointerSize();
ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
- true, false);
+ false, false);
FuncInfo->setRAIndex(ReturnAddrIndex);
}
@@ -4816,8 +4817,16 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT_SSE4(SDValue Op, SelectionDAG &DAG){
if ((EltVT.getSizeInBits() == 8 || EltVT.getSizeInBits() == 16) &&
isa<ConstantSDNode>(N2)) {
- unsigned Opc = (EltVT.getSizeInBits() == 8) ? X86ISD::PINSRB
- : X86ISD::PINSRW;
+ unsigned Opc;
+ if (VT == MVT::v8i16)
+ Opc = X86ISD::PINSRW;
+ else if (VT == MVT::v4i16)
+ Opc = X86ISD::MMX_PINSRW;
+ else if (VT == MVT::v16i8)
+ Opc = X86ISD::PINSRB;
+ else
+ Opc = X86ISD::PINSRB;
+
// Transform it so it match pinsr{b,w} which expects a GR32 as its second
// argument.
if (N1.getValueType() != MVT::i32)
@@ -4868,7 +4877,8 @@ X86TargetLowering::LowerINSERT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG) {
N1 = DAG.getNode(ISD::ANY_EXTEND, dl, MVT::i32, N1);
if (N2.getValueType() != MVT::i32)
N2 = DAG.getIntPtrConstant(cast<ConstantSDNode>(N2)->getZExtValue());
- return DAG.getNode(X86ISD::PINSRW, dl, VT, N0, N1, N2);
+ return DAG.getNode(VT == MVT::v8i16 ? X86ISD::PINSRW : X86ISD::MMX_PINSRW,
+ dl, VT, N0, N1, N2);
}
return SDValue();
}
@@ -5244,7 +5254,7 @@ SDValue X86TargetLowering::LowerShift(SDValue Op, SelectionDAG &DAG) {
SDValue AndNode = DAG.getNode(ISD::AND, dl, MVT::i8, ShAmt,
DAG.getConstant(VTBits, MVT::i8));
- SDValue Cond = DAG.getNode(X86ISD::CMP, dl, VT,
+ SDValue Cond = DAG.getNode(X86ISD::CMP, dl, MVT::i32,
AndNode, DAG.getConstant(0, MVT::i8));
SDValue Hi, Lo;
@@ -5873,26 +5883,31 @@ SDValue X86TargetLowering::EmitCmp(SDValue Op0, SDValue Op1, unsigned X86CC,
/// LowerToBT - Result of 'and' is compared against zero. Turn it into a BT node
/// if it's possible.
-static SDValue LowerToBT(SDValue Op0, ISD::CondCode CC,
+static SDValue LowerToBT(SDValue And, ISD::CondCode CC,
DebugLoc dl, SelectionDAG &DAG) {
+ SDValue Op0 = And.getOperand(0);
+ SDValue Op1 = And.getOperand(1);
+ if (Op0.getOpcode() == ISD::TRUNCATE)
+ Op0 = Op0.getOperand(0);
+ if (Op1.getOpcode() == ISD::TRUNCATE)
+ Op1 = Op1.getOperand(0);
+
SDValue LHS, RHS;
- if (Op0.getOperand(1).getOpcode() == ISD::SHL) {
- if (ConstantSDNode *Op010C =
- dyn_cast<ConstantSDNode>(Op0.getOperand(1).getOperand(0)))
- if (Op010C->getZExtValue() == 1) {
- LHS = Op0.getOperand(0);
- RHS = Op0.getOperand(1).getOperand(1);
+ if (Op1.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *And10C = dyn_cast<ConstantSDNode>(Op1.getOperand(0)))
+ if (And10C->getZExtValue() == 1) {
+ LHS = Op0;
+ RHS = Op1.getOperand(1);
}
- } else if (Op0.getOperand(0).getOpcode() == ISD::SHL) {
- if (ConstantSDNode *Op000C =
- dyn_cast<ConstantSDNode>(Op0.getOperand(0).getOperand(0)))
- if (Op000C->getZExtValue() == 1) {
- LHS = Op0.getOperand(1);
- RHS = Op0.getOperand(0).getOperand(1);
+ } else if (Op0.getOpcode() == ISD::SHL) {
+ if (ConstantSDNode *And00C = dyn_cast<ConstantSDNode>(Op0.getOperand(0)))
+ if (And00C->getZExtValue() == 1) {
+ LHS = Op1;
+ RHS = Op0.getOperand(1);
}
- } else if (Op0.getOperand(1).getOpcode() == ISD::Constant) {
- ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op0.getOperand(1));
- SDValue AndLHS = Op0.getOperand(0);
+ } else if (Op1.getOpcode() == ISD::Constant) {
+ ConstantSDNode *AndRHS = cast<ConstantSDNode>(Op1);
+ SDValue AndLHS = Op0;
if (AndRHS->getZExtValue() == 1 && AndLHS.getOpcode() == ISD::SRL) {
LHS = AndLHS.getOperand(0);
RHS = AndLHS.getOperand(1);
@@ -5942,6 +5957,21 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
return NewSetCC;
}
+ // Look for "(setcc) == / != 1" to avoid unncessary setcc.
+ if (Op0.getOpcode() == X86ISD::SETCC &&
+ Op1.getOpcode() == ISD::Constant &&
+ (cast<ConstantSDNode>(Op1)->getZExtValue() == 1 ||
+ cast<ConstantSDNode>(Op1)->isNullValue()) &&
+ (CC == ISD::SETEQ || CC == ISD::SETNE)) {
+ X86::CondCode CCode = (X86::CondCode)Op0.getConstantOperandVal(0);
+ bool Invert = (CC == ISD::SETNE) ^
+ cast<ConstantSDNode>(Op1)->isNullValue();
+ if (Invert)
+ CCode = X86::GetOppositeBranchCondition(CCode);
+ return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
+ DAG.getConstant(CCode, MVT::i8), Op0.getOperand(1));
+ }
+
bool isFP = Op.getOperand(1).getValueType().isFloatingPoint();
unsigned X86CC = TranslateX86CC(CC, isFP, Op0, Op1, DAG);
if (X86CC == X86::COND_INVALID)
@@ -6444,8 +6474,7 @@ X86TargetLowering::EmitTargetCodeForMemset(SelectionDAG &DAG, DebugLoc dl,
LowerCallTo(Chain, Type::getVoidTy(*DAG.getContext()),
false, false, false, false,
0, CallingConv::C, false, /*isReturnValueUsed=*/false,
- DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl,
- DAG.GetOrdering(Chain.getNode()));
+ DAG.getExternalSymbol(bzeroEntry, IntPtr), Args, DAG, dl);
return CallResult.second;
}
@@ -7662,6 +7691,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::INSERTPS: return "X86ISD::INSERTPS";
case X86ISD::PINSRB: return "X86ISD::PINSRB";
case X86ISD::PINSRW: return "X86ISD::PINSRW";
+ case X86ISD::MMX_PINSRW: return "X86ISD::MMX_PINSRW";
case X86ISD::PSHUFB: return "X86ISD::PSHUFB";
case X86ISD::FMAX: return "X86ISD::FMAX";
case X86ISD::FMIN: return "X86ISD::FMIN";
@@ -7769,7 +7799,7 @@ bool X86TargetLowering::isTruncateFree(const Type *Ty1, const Type *Ty2) const {
unsigned NumBits2 = Ty2->getPrimitiveSizeInBits();
if (NumBits1 <= NumBits2)
return false;
- return Subtarget->is64Bit() || NumBits1 < 64;
+ return true;
}
bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
@@ -7779,7 +7809,7 @@ bool X86TargetLowering::isTruncateFree(EVT VT1, EVT VT2) const {
unsigned NumBits2 = VT2.getSizeInBits();
if (NumBits1 <= NumBits2)
return false;
- return Subtarget->is64Bit() || NumBits1 < 64;
+ return true;
}
bool X86TargetLowering::isZExtFree(const Type *Ty1, const Type *Ty2) const {
@@ -8792,10 +8822,9 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
SDValue RHS = N->getOperand(2);
// If we have SSE[12] support, try to form min/max nodes. SSE min/max
- // instructions have the peculiarity that if either operand is a NaN,
- // they chose what we call the RHS operand (and as such are not symmetric).
- // It happens that this matches the semantics of the common C idiom
- // x<y?x:y and related forms, so we can recognize these cases.
+ // instructions match the semantics of the common C idiom x<y?x:y but not
+ // x<=y?x:y, because of how they handle negative zero (which can be
+ // ignored in unsafe-math mode).
if (Subtarget->hasSSE2() &&
(LHS.getValueType() == MVT::f32 || LHS.getValueType() == MVT::f64) &&
Cond.getOpcode() == ISD::SETCC) {
@@ -8803,36 +8832,34 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
unsigned Opcode = 0;
// Check for x CC y ? x : y.
- if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
+ if (DAG.isEqualTo(LHS, Cond.getOperand(0)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(1))) {
switch (CC) {
default: break;
case ISD::SETULT:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a min would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETOLE:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS))
+ break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETULE:
- // This can be a min, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
@@ -8841,32 +8868,29 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
break;
case ISD::SETOGE:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(LHS))
+ break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGT:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a max would handle NaNs incorrectly, and swapping
+ // the operands would cause it to handle comparisons between positive
+ // and negative zero incorrectly.
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS))) {
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETUGE:
- // This can be a max, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
@@ -8875,36 +8899,33 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
break;
}
// Check for x CC y ? y : x -- a min/max with reversed arms.
- } else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
+ } else if (DAG.isEqualTo(LHS, Cond.getOperand(1)) &&
+ DAG.isEqualTo(RHS, Cond.getOperand(0))) {
switch (CC) {
default: break;
case ISD::SETOGE:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a min would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ !(DAG.isKnownNeverZero(LHS) || DAG.isKnownNeverZero(RHS))) {
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGT:
- // This can be a min if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a min would handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ break;
Opcode = X86ISD::FMIN;
break;
case ISD::SETUGE:
- // This can be a min, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a min would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOGT:
case ISD::SETGT:
@@ -8913,32 +8934,28 @@ static SDValue PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
break;
case ISD::SETULT:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(LHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(RHS))
- break;
- }
+ // Converting this to a max would handle NaNs incorrectly.
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
+ break;
Opcode = X86ISD::FMAX;
break;
case ISD::SETOLE:
- // This can be a max if we can prove that at least one of the operands
- // is not a nan.
- if (!FiniteOnlyFPMath()) {
- if (DAG.isKnownNeverNaN(RHS)) {
- // Put the potential NaN in the RHS so that SSE will preserve it.
- std::swap(LHS, RHS);
- } else if (!DAG.isKnownNeverNaN(LHS))
+ // Converting this to a max would handle comparisons between positive
+ // and negative zero incorrectly, and swapping the operands would
+ // cause it to handle NaNs incorrectly.
+ if (!UnsafeFPMath &&
+ !DAG.isKnownNeverZero(LHS) && !DAG.isKnownNeverZero(RHS)) {
+ if (!FiniteOnlyFPMath() &&
+ (!DAG.isKnownNeverNaN(LHS) || !DAG.isKnownNeverNaN(RHS)))
break;
+ std::swap(LHS, RHS);
}
Opcode = X86ISD::FMAX;
break;
case ISD::SETULE:
- // This can be a max, but if either operand is a NaN we need it to
- // preserve the original LHS.
+ // Converting this to a max would handle both negative zeros and NaNs
+ // incorrectly, but we can swap the operands to fix both.
std::swap(LHS, RHS);
case ISD::SETOLT:
case ISD::SETLT:
@@ -9157,16 +9174,64 @@ static SDValue PerformCMOVCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+/// PerformANDCombine - Look for SSE and instructions of this form:
+/// (and x, (build_vector signbit,signbit,signbit,signbit)). If there
+/// exists a use of a build_vector that's the bitwise complement of the mask,
+/// then transform the node to
+/// (and (xor x, (build_vector -1,-1,-1,-1)), (build_vector ~sb,~sb,~sb,~sb)).
+static SDValue PerformANDCombine(SDNode *N, SelectionDAG &DAG,
+ TargetLowering::DAGCombinerInfo &DCI) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() || !VT.isInteger())
+ return SDValue();
+
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ if (N0.getOpcode() == ISD::XOR || !N1.hasOneUse())
+ return SDValue();
+
+ if (N1.getOpcode() == ISD::BUILD_VECTOR) {
+ unsigned NumElts = VT.getVectorNumElements();
+ EVT EltVT = VT.getVectorElementType();
+ SmallVector<SDValue, 8> Mask;
+ Mask.reserve(NumElts);
+ for (unsigned i = 0; i != NumElts; ++i) {
+ SDValue Arg = N1.getOperand(i);
+ if (Arg.getOpcode() == ISD::UNDEF) {
+ Mask.push_back(Arg);
+ continue;
+ }
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Arg);
+ if (!C)
+ return SDValue();
+ if (!C->getAPIntValue().isSignBit() &&
+ !C->getAPIntValue().isMaxSignedValue())
+ return SDValue();
+ Mask.push_back(DAG.getConstant(~C->getAPIntValue(), EltVT));
+ }
+ N1 = DAG.getNode(ISD::BUILD_VECTOR, N1.getDebugLoc(), VT,
+ &Mask[0], NumElts);
+ if (!N1.use_empty()) {
+ unsigned Bits = EltVT.getSizeInBits();
+ Mask.clear();
+ for (unsigned i = 0; i != NumElts; ++i)
+ Mask.push_back(DAG.getConstant(APInt::getAllOnesValue(Bits), EltVT));
+ SDValue NewMask = DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(),
+ VT, &Mask[0], NumElts);
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::XOR, N->getDebugLoc(), VT,
+ N0, NewMask), N1);
+ }
+ }
+
+ return SDValue();
+}
/// PerformMulCombine - Optimize a single multiply with constant into two
/// in order to implement it with two cheaper instructions, e.g.
/// LEA + SHL, LEA + LEA.
static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
TargetLowering::DAGCombinerInfo &DCI) {
- if (DAG.getMachineFunction().
- getFunction()->hasFnAttr(Attribute::OptimizeForSize))
- return SDValue();
-
if (DCI.isBeforeLegalize() || DCI.isCalledByLegalizer())
return SDValue();
@@ -9305,7 +9370,7 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
}
} else if (InVec.getOpcode() == ISD::INSERT_VECTOR_ELT) {
if (ConstantSDNode *C = dyn_cast<ConstantSDNode>(InVec.getOperand(2))) {
- unsigned SplatIdx = cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
+ unsigned SplatIdx= cast<ShuffleVectorSDNode>(ShAmtOp)->getSplatIndex();
if (C->getZExtValue() == SplatIdx)
BaseShAmt = InVec.getOperand(1);
}
@@ -9690,6 +9755,7 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N,
case ISD::VECTOR_SHUFFLE: return PerformShuffleCombine(N, DAG, *this);
case ISD::SELECT: return PerformSELECTCombine(N, DAG, Subtarget);
case X86ISD::CMOV: return PerformCMOVCombine(N, DAG, DCI);
+ case ISD::AND: return PerformANDCombine(N, DAG, DCI);
case ISD::MUL: return PerformMulCombine(N, DAG, DCI);
case ISD::SHL:
case ISD::SRA:
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index cf0eb40..ffaf1cf 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -180,7 +180,7 @@ namespace llvm {
/// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
/// corresponds to X86::PINSRW.
- PINSRW,
+ PINSRW, MMX_PINSRW,
/// PSHUFB - Shuffle 16 8-bit values within a vector.
PSHUFB,
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index 4ea3739..8462255 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -59,10 +59,11 @@ def tls64addr : ComplexPattern<i64, 4, "SelectTLSADDRAddr",
// Pattern fragments.
//
-def i64immSExt8 : PatLeaf<(i64 imm), [{
- // i64immSExt8 predicate - True if the 64-bit immediate fits in a 8-bit
- // sign extended field.
- return (int64_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+def i64immSExt8 : PatLeaf<(i64 immSext8)>;
+
+def GetLo32XForm : SDNodeXForm<imm, [{
+ // Transformation function: get the low 32 bits.
+ return getI32Imm((unsigned)N->getZExtValue());
}]>;
def i64immSExt32 : PatLeaf<(i64 imm), [{
@@ -71,6 +72,7 @@ def i64immSExt32 : PatLeaf<(i64 imm), [{
return (int64_t)N->getZExtValue() == (int32_t)N->getZExtValue();
}]>;
+
def i64immZExt32 : PatLeaf<(i64 imm), [{
// i64immZExt32 predicate - True if the 64-bit immediate fits in a 32-bit
// unsignedsign extended field.
@@ -325,7 +327,7 @@ def MOV64ri32 : RIi32<0xC7, MRM0r, (outs GR64:$dst), (ins i64i32imm:$src),
def MOV64rr_REV : RI<0x8B, MRMSrcReg, (outs GR64:$dst), (ins GR64:$src),
"mov{q}\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV64rm : RI<0x8B, MRMSrcMem, (outs GR64:$dst), (ins i64mem:$src),
"mov{q}\t{$src, $dst|$dst, $src}",
[(set GR64:$dst, (load addr:$src))]>;
@@ -556,7 +558,7 @@ def ADC64mi8 : RIi8<0x83, MRM2m, (outs), (ins i64mem:$dst, i64i8imm :$src2),
addr:$dst)]>;
def ADC64mi32 : RIi32<0x81, MRM2m, (outs), (ins i64mem:$dst, i64i32imm:$src2),
"adc{q}\t{$src2, $dst|$dst, $src2}",
- [(store (adde (load addr:$dst), i64immSExt8:$src2),
+ [(store (adde (load addr:$dst), i64immSExt32:$src2),
addr:$dst)]>;
} // Uses = [EFLAGS]
@@ -1981,7 +1983,7 @@ def : Pat<(and GR64:$src, i64immZExt32:$imm),
(i64 0),
(AND32ri
(EXTRACT_SUBREG GR64:$src, x86_subreg_32bit),
- imm:$imm),
+ (i32 (GetLo32XForm imm:$imm))),
x86_subreg_32bit)>;
// r & (2^32-1) ==> movz
@@ -2105,34 +2107,34 @@ def : Pat<(store (i8 (trunc_su (srl_su GR16:$src, (i8 8)))), addr:$dst),
def : Pat<(shl GR64:$src1, (i8 1)), (ADD64rr GR64:$src1, GR64:$src1)>;
// (shl x (and y, 63)) ==> (shl x, y)
-def : Pat<(shl GR64:$src1, (and CL:$amt, 63)),
+def : Pat<(shl GR64:$src1, (and CL, 63)),
(SHL64rCL GR64:$src1)>;
-def : Pat<(store (shl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+def : Pat<(store (shl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SHL64mCL addr:$dst)>;
-def : Pat<(srl GR64:$src1, (and CL:$amt, 63)),
+def : Pat<(srl GR64:$src1, (and CL, 63)),
(SHR64rCL GR64:$src1)>;
-def : Pat<(store (srl (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+def : Pat<(store (srl (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SHR64mCL addr:$dst)>;
-def : Pat<(sra GR64:$src1, (and CL:$amt, 63)),
+def : Pat<(sra GR64:$src1, (and CL, 63)),
(SAR64rCL GR64:$src1)>;
-def : Pat<(store (sra (loadi64 addr:$dst), (and CL:$amt, 63)), addr:$dst),
+def : Pat<(store (sra (loadi64 addr:$dst), (and CL, 63)), addr:$dst),
(SAR64mCL addr:$dst)>;
// Double shift patterns
-def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+def : Pat<(shrd GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)),
(SHRD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shrd (loadi64 addr:$dst), (i8 imm:$amt1),
- GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR64:$src2, (i8 imm)), addr:$dst),
(SHRD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
-def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm:$amt2)),
+def : Pat<(shld GR64:$src1, (i8 imm:$amt1), GR64:$src2, (i8 imm)),
(SHLD64rri8 GR64:$src1, GR64:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shld (loadi64 addr:$dst), (i8 imm:$amt1),
- GR64:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR64:$src2, (i8 imm)), addr:$dst),
(SHLD64mri8 addr:$dst, GR64:$src2, (i8 imm:$amt1))>;
// (or x1, x2) -> (add x1, x2) if two operands are known not to share bits.
diff --git a/lib/Target/X86/X86InstrFPStack.td b/lib/Target/X86/X86InstrFPStack.td
index e22a903..ae24bfb 100644
--- a/lib/Target/X86/X86InstrFPStack.td
+++ b/lib/Target/X86/X86InstrFPStack.td
@@ -397,7 +397,7 @@ def CMOVNP_F : FPI<0xD8, AddRegFrm, (outs RST:$op), (ins),
let canFoldAsLoad = 1 in {
def LD_Fp32m : FpIf32<(outs RFP32:$dst), (ins f32mem:$src), ZeroArgFP,
[(set RFP32:$dst, (loadf32 addr:$src))]>;
-let isReMaterializable = 1, mayHaveSideEffects = 1 in
+let isReMaterializable = 1 in
def LD_Fp64m : FpIf64<(outs RFP64:$dst), (ins f64mem:$src), ZeroArgFP,
[(set RFP64:$dst, (loadf64 addr:$src))]>;
def LD_Fp80m : FpI_<(outs RFP80:$dst), (ins f80mem:$src), ZeroArgFP,
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index a0d0312..39bda04 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -276,11 +276,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVDQArr, X86::MOVDQAmr, 0, 16 },
{ X86::MOVPDI2DIrr, X86::MOVPDI2DImr, 0, 0 },
{ X86::MOVPQIto64rr,X86::MOVPQI2QImr, 0, 0 },
- { X86::MOVPS2SSrr, X86::MOVPS2SSmr, 0, 0 },
- { X86::MOVSDrr, X86::MOVSDmr, 0, 0 },
{ X86::MOVSDto64rr, X86::MOVSDto64mr, 0, 0 },
{ X86::MOVSS2DIrr, X86::MOVSS2DImr, 0, 0 },
- { X86::MOVSSrr, X86::MOVSSmr, 0, 0 },
{ X86::MOVUPDrr, X86::MOVUPDmr, 0, 0 },
{ X86::MOVUPSrr, X86::MOVUPSmr, 0, 0 },
{ X86::MUL16r, X86::MUL16m, 1, 0 },
@@ -389,12 +386,8 @@ X86InstrInfo::X86InstrInfo(X86TargetMachine &tm)
{ X86::MOVDI2PDIrr, X86::MOVDI2PDIrm, 0 },
{ X86::MOVDI2SSrr, X86::MOVDI2SSrm, 0 },
{ X86::MOVDQArr, X86::MOVDQArm, 16 },
- { X86::MOVSD2PDrr, X86::MOVSD2PDrm, 0 },
- { X86::MOVSDrr, X86::MOVSDrm, 0 },
{ X86::MOVSHDUPrr, X86::MOVSHDUPrm, 16 },
{ X86::MOVSLDUPrr, X86::MOVSLDUPrm, 16 },
- { X86::MOVSS2PSrr, X86::MOVSS2PSrm, 0 },
- { X86::MOVSSrr, X86::MOVSSrm, 0 },
{ X86::MOVSX16rr8, X86::MOVSX16rm8, 0 },
{ X86::MOVSX32rr16, X86::MOVSX32rm16, 0 },
{ X86::MOVSX32rr8, X86::MOVSX32rm8, 0 },
@@ -682,23 +675,20 @@ bool X86InstrInfo::isMoveInstr(const MachineInstr& MI,
case X86::MOV16rr:
case X86::MOV32rr:
case X86::MOV64rr:
- case X86::MOVSSrr:
- case X86::MOVSDrr:
// FP Stack register class copies
case X86::MOV_Fp3232: case X86::MOV_Fp6464: case X86::MOV_Fp8080:
case X86::MOV_Fp3264: case X86::MOV_Fp3280:
case X86::MOV_Fp6432: case X86::MOV_Fp8032:
-
+
+ // Note that MOVSSrr and MOVSDrr are not considered copies. FR32 and FR64
+ // copies are done with FsMOVAPSrr and FsMOVAPDrr.
+
case X86::FsMOVAPSrr:
case X86::FsMOVAPDrr:
case X86::MOVAPSrr:
case X86::MOVAPDrr:
case X86::MOVDQArr:
- case X86::MOVSS2PSrr:
- case X86::MOVSD2PDrr:
- case X86::MOVPS2SSrr:
- case X86::MOVPD2SDrr:
case X86::MMX_MOVQ64rr:
assert(MI.getNumOperands() >= 2 &&
MI.getOperand(0).isReg() &&
@@ -1083,7 +1073,7 @@ void X86InstrInfo::reMaterialize(MachineBasicBlock &MBB,
case X86::MOV8r0: Opc = X86::MOV8ri; break;
case X86::MOV16r0: Opc = X86::MOV16ri; break;
case X86::MOV32r0: Opc = X86::MOV32ri; break;
- case X86::MOV64r0: Opc = X86::MOV64ri; break;
+ case X86::MOV64r0: Opc = X86::MOV64ri64i32; break;
}
Clone = false;
}
@@ -1860,7 +1850,7 @@ bool X86InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
CommonRC = SrcRC;
else if (!DestRC->hasSubClass(SrcRC)) {
// Neither of GR64_NOREX or GR64_NOSP is a superclass of the other,
- // but we want to copy then as GR64. Similarly, for GR32_NOREX and
+ // but we want to copy them as GR64. Similarly, for GR32_NOREX and
// GR32_NOSP, copy as GR32.
if (SrcRC->hasSuperClass(&X86::GR64RegClass) &&
DestRC->hasSuperClass(&X86::GR64RegClass))
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 25cd297..cfe71a5 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -343,18 +343,37 @@ def X86_COND_O : PatLeaf<(i8 13)>;
def X86_COND_P : PatLeaf<(i8 14)>; // alt. COND_PE
def X86_COND_S : PatLeaf<(i8 15)>;
-def i16immSExt8 : PatLeaf<(i16 imm), [{
- // i16immSExt8 predicate - True if the 16-bit immediate fits in a 8-bit
- // sign extended field.
- return (int16_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+def immSext8 : PatLeaf<(imm), [{
+ return N->getSExtValue() == (int8_t)N->getSExtValue();
}]>;
-def i32immSExt8 : PatLeaf<(i32 imm), [{
- // i32immSExt8 predicate - True if the 32-bit immediate fits in a 8-bit
- // sign extended field.
- return (int32_t)N->getZExtValue() == (int8_t)N->getZExtValue();
+def i16immSExt8 : PatLeaf<(i16 immSext8)>;
+def i32immSExt8 : PatLeaf<(i32 immSext8)>;
+
+/// Load patterns: these constraint the match to the right address space.
+def dsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ if (PT->getAddressSpace() > 255)
+ return false;
+ return true;
+}]>;
+
+def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return PT->getAddressSpace() == 256;
+ return false;
+}]>;
+
+def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
+ if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
+ if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
+ return PT->getAddressSpace() == 257;
+ return false;
}]>;
+
// Helper fragments for loads.
// It's always safe to treat a anyext i16 load as a i32 load if the i16 is
// known to be 32-bit aligned or better. Ditto for i8 to i16.
@@ -372,8 +391,7 @@ def loadi16 : PatFrag<(ops node:$ptr), (i16 (unindexedload node:$ptr)), [{
return false;
}]>;
-def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),
-[{
+def loadi16_anyext : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)),[{
LoadSDNode *LD = cast<LoadSDNode>(N);
if (const Value *Src = LD->getSrcValue())
if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
@@ -399,72 +417,11 @@ def loadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
return false;
}]>;
-def nvloadi32 : PatFrag<(ops node:$ptr), (i32 (unindexedload node:$ptr)), [{
- LoadSDNode *LD = cast<LoadSDNode>(N);
- if (const Value *Src = LD->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- if (LD->isVolatile())
- return false;
- ISD::LoadExtType ExtType = LD->getExtensionType();
- if (ExtType == ISD::NON_EXTLOAD)
- return true;
- if (ExtType == ISD::EXTLOAD)
- return LD->getAlignment() >= 4;
- return false;
-}]>;
-
-def gsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- return PT->getAddressSpace() == 256;
- return false;
-}]>;
-
-def fsload : PatFrag<(ops node:$ptr), (load node:$ptr), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- return PT->getAddressSpace() == 257;
- return false;
-}]>;
-
-def loadi8 : PatFrag<(ops node:$ptr), (i8 (load node:$ptr)), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
-def loadi64 : PatFrag<(ops node:$ptr), (i64 (load node:$ptr)), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
-
-def loadf32 : PatFrag<(ops node:$ptr), (f32 (load node:$ptr)), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
-def loadf64 : PatFrag<(ops node:$ptr), (f64 (load node:$ptr)), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
-def loadf80 : PatFrag<(ops node:$ptr), (f80 (load node:$ptr)), [{
- if (const Value *Src = cast<LoadSDNode>(N)->getSrcValue())
- if (const PointerType *PT = dyn_cast<PointerType>(Src->getType()))
- if (PT->getAddressSpace() > 255)
- return false;
- return true;
-}]>;
+def loadi8 : PatFrag<(ops node:$ptr), (i8 (dsload node:$ptr))>;
+def loadi64 : PatFrag<(ops node:$ptr), (i64 (dsload node:$ptr))>;
+def loadf32 : PatFrag<(ops node:$ptr), (f32 (dsload node:$ptr))>;
+def loadf64 : PatFrag<(ops node:$ptr), (f64 (dsload node:$ptr))>;
+def loadf80 : PatFrag<(ops node:$ptr), (f80 (dsload node:$ptr))>;
def sextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (sextloadi8 node:$ptr))>;
def sextloadi32i8 : PatFrag<(ops node:$ptr), (i32 (sextloadi8 node:$ptr))>;
@@ -1037,7 +994,7 @@ def MOV16rr_REV : I<0x8B, MRMSrcReg, (outs GR16:$dst), (ins GR16:$src),
def MOV32rr_REV : I<0x8B, MRMSrcReg, (outs GR32:$dst), (ins GR32:$src),
"mov{l}\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in {
+let canFoldAsLoad = 1, isReMaterializable = 1 in {
def MOV8rm : I<0x8A, MRMSrcMem, (outs GR8 :$dst), (ins i8mem :$src),
"mov{b}\t{$src, $dst|$dst, $src}",
[(set GR8:$dst, (loadi8 addr:$src))]>;
@@ -1071,7 +1028,7 @@ def MOV8mr_NOREX : I<0x88, MRMDestMem,
(outs), (ins i8mem_NOREX:$dst, GR8_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
let mayLoad = 1,
- canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+ canFoldAsLoad = 1, isReMaterializable = 1 in
def MOV8rm_NOREX : I<0x8A, MRMSrcMem,
(outs GR8_NOREX:$dst), (ins i8mem_NOREX:$src),
"mov{b}\t{$src, $dst|$dst, $src} # NOREX", []>;
@@ -1156,7 +1113,7 @@ def IMUL32m : I<0xF7, MRM5m, (outs), (ins i32mem:$src),
} // neverHasSideEffects
// unsigned division/remainder
-let Defs = [AL,AH,EFLAGS], Uses = [AX] in
+let Defs = [AX,EFLAGS], Uses = [AX] in
def DIV8r : I<0xF6, MRM6r, (outs), (ins GR8:$src), // AX/r8 = AL,AH
"div{b}\t$src", []>;
let Defs = [AX,DX,EFLAGS], Uses = [AX,DX] in
@@ -4442,12 +4399,6 @@ def : Pat<(i16 (anyext GR8 :$src)), (MOVZX16rr8 GR8 :$src)>;
def : Pat<(i32 (anyext GR8 :$src)), (MOVZX32rr8 GR8 :$src)>;
def : Pat<(i32 (anyext GR16:$src)), (MOVZX32rr16 GR16:$src)>;
-// (and (i32 load), 255) -> (zextload i8)
-def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 255))),
- (MOVZX32rm8 addr:$src)>;
-def : Pat<(i32 (and (nvloadi32 addr:$src), (i32 65535))),
- (MOVZX32rm16 addr:$src)>;
-
//===----------------------------------------------------------------------===//
// Some peepholes
//===----------------------------------------------------------------------===//
@@ -4543,43 +4494,43 @@ def : Pat<(shl GR16:$src1, (i8 1)), (ADD16rr GR16:$src1, GR16:$src1)>;
def : Pat<(shl GR32:$src1, (i8 1)), (ADD32rr GR32:$src1, GR32:$src1)>;
// (shl x (and y, 31)) ==> (shl x, y)
-def : Pat<(shl GR8:$src1, (and CL:$amt, 31)),
+def : Pat<(shl GR8:$src1, (and CL, 31)),
(SHL8rCL GR8:$src1)>;
-def : Pat<(shl GR16:$src1, (and CL:$amt, 31)),
+def : Pat<(shl GR16:$src1, (and CL, 31)),
(SHL16rCL GR16:$src1)>;
-def : Pat<(shl GR32:$src1, (and CL:$amt, 31)),
+def : Pat<(shl GR32:$src1, (and CL, 31)),
(SHL32rCL GR32:$src1)>;
-def : Pat<(store (shl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (shl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
(SHL8mCL addr:$dst)>;
-def : Pat<(store (shl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (shl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
(SHL16mCL addr:$dst)>;
-def : Pat<(store (shl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (shl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
(SHL32mCL addr:$dst)>;
-def : Pat<(srl GR8:$src1, (and CL:$amt, 31)),
+def : Pat<(srl GR8:$src1, (and CL, 31)),
(SHR8rCL GR8:$src1)>;
-def : Pat<(srl GR16:$src1, (and CL:$amt, 31)),
+def : Pat<(srl GR16:$src1, (and CL, 31)),
(SHR16rCL GR16:$src1)>;
-def : Pat<(srl GR32:$src1, (and CL:$amt, 31)),
+def : Pat<(srl GR32:$src1, (and CL, 31)),
(SHR32rCL GR32:$src1)>;
-def : Pat<(store (srl (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (srl (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
(SHR8mCL addr:$dst)>;
-def : Pat<(store (srl (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (srl (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
(SHR16mCL addr:$dst)>;
-def : Pat<(store (srl (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (srl (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
(SHR32mCL addr:$dst)>;
-def : Pat<(sra GR8:$src1, (and CL:$amt, 31)),
+def : Pat<(sra GR8:$src1, (and CL, 31)),
(SAR8rCL GR8:$src1)>;
-def : Pat<(sra GR16:$src1, (and CL:$amt, 31)),
+def : Pat<(sra GR16:$src1, (and CL, 31)),
(SAR16rCL GR16:$src1)>;
-def : Pat<(sra GR32:$src1, (and CL:$amt, 31)),
+def : Pat<(sra GR32:$src1, (and CL, 31)),
(SAR32rCL GR32:$src1)>;
-def : Pat<(store (sra (loadi8 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (sra (loadi8 addr:$dst), (and CL, 31)), addr:$dst),
(SAR8mCL addr:$dst)>;
-def : Pat<(store (sra (loadi16 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (sra (loadi16 addr:$dst), (and CL, 31)), addr:$dst),
(SAR16mCL addr:$dst)>;
-def : Pat<(store (sra (loadi32 addr:$dst), (and CL:$amt, 31)), addr:$dst),
+def : Pat<(store (sra (loadi32 addr:$dst), (and CL, 31)), addr:$dst),
(SAR32mCL addr:$dst)>;
// (or (x >> c) | (y << (32 - c))) ==> (shrd32 x, y, c)
@@ -4600,11 +4551,11 @@ def : Pat<(store (or (srl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
addr:$dst),
(SHRD32mrCL addr:$dst, GR32:$src2)>;
-def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+def : Pat<(shrd GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)),
(SHRD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shrd (loadi32 addr:$dst), (i8 imm:$amt1),
- GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
(SHRD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
// (or (x << c) | (y >> (32 - c))) ==> (shld32 x, y, c)
@@ -4625,11 +4576,11 @@ def : Pat<(store (or (shl (loadi32 addr:$dst), (i8 (trunc ECX:$amt))),
addr:$dst),
(SHLD32mrCL addr:$dst, GR32:$src2)>;
-def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm:$amt2)),
+def : Pat<(shld GR32:$src1, (i8 imm:$amt1), GR32:$src2, (i8 imm/*:$amt2*/)),
(SHLD32rri8 GR32:$src1, GR32:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shld (loadi32 addr:$dst), (i8 imm:$amt1),
- GR32:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR32:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
(SHLD32mri8 addr:$dst, GR32:$src2, (i8 imm:$amt1))>;
// (or (x >> c) | (y << (16 - c))) ==> (shrd16 x, y, c)
@@ -4650,11 +4601,11 @@ def : Pat<(store (or (srl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
addr:$dst),
(SHRD16mrCL addr:$dst, GR16:$src2)>;
-def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+def : Pat<(shrd GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)),
(SHRD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shrd (loadi16 addr:$dst), (i8 imm:$amt1),
- GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
(SHRD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
// (or (x << c) | (y >> (16 - c))) ==> (shld16 x, y, c)
@@ -4675,11 +4626,11 @@ def : Pat<(store (or (shl (loadi16 addr:$dst), (i8 (trunc CX:$amt))),
addr:$dst),
(SHLD16mrCL addr:$dst, GR16:$src2)>;
-def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm:$amt2)),
+def : Pat<(shld GR16:$src1, (i8 imm:$amt1), GR16:$src2, (i8 imm/*:$amt2*/)),
(SHLD16rri8 GR16:$src1, GR16:$src2, (i8 imm:$amt1))>;
def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
- GR16:$src2, (i8 imm:$amt2)), addr:$dst),
+ GR16:$src2, (i8 imm/*:$amt2*/)), addr:$dst),
(SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
// (anyext (setcc_carry)) -> (setcc_carry)
diff --git a/lib/Target/X86/X86InstrMMX.td b/lib/Target/X86/X86InstrMMX.td
index 89f020c..c8e0723 100644
--- a/lib/Target/X86/X86InstrMMX.td
+++ b/lib/Target/X86/X86InstrMMX.td
@@ -141,7 +141,7 @@ def MMX_MOVD64rrv164 : MMXI<0x6E, MRMSrcReg, (outs VR64:$dst), (ins GR64:$src),
let neverHasSideEffects = 1 in
def MMX_MOVQ64rr : MMXI<0x6F, MRMSrcReg, (outs VR64:$dst), (ins VR64:$src),
"movq\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MMX_MOVQ64rm : MMXI<0x6F, MRMSrcMem, (outs VR64:$dst), (ins i64mem:$src),
"movq\t{$src, $dst|$dst, $src}",
[(set VR64:$dst, (load_mmx addr:$src))]>;
@@ -426,13 +426,15 @@ def MMX_CVTTPS2PIrm : MMXI<0x2C, MRMSrcMem, (outs VR64:$dst), (ins f64mem:$src),
// Extract / Insert
-def MMX_X86pextrw : SDNode<"X86ISD::PEXTRW", SDTypeProfile<1, 2, []>, []>;
-def MMX_X86pinsrw : SDNode<"X86ISD::PINSRW", SDTypeProfile<1, 3, []>, []>;
+def MMX_X86pinsrw : SDNode<"X86ISD::MMX_PINSRW",
+ SDTypeProfile<1, 3, [SDTCisVT<0, v4i16>, SDTCisSameAs<0,1>,
+ SDTCisVT<2, i32>, SDTCisPtrTy<3>]>>;
+
def MMX_PEXTRWri : MMXIi8<0xC5, MRMSrcReg,
(outs GR32:$dst), (ins VR64:$src1, i16i8imm:$src2),
"pextrw\t{$src2, $src1, $dst|$dst, $src1, $src2}",
- [(set GR32:$dst, (MMX_X86pextrw (v4i16 VR64:$src1),
+ [(set GR32:$dst, (X86pextrw (v4i16 VR64:$src1),
(iPTR imm:$src2)))]>;
let Constraints = "$src1 = $dst" in {
def MMX_PINSRWrri : MMXIi8<0xC4, MRMSrcReg,
@@ -597,13 +599,6 @@ let AddedComplexity = 10 in {
(MMX_PUNPCKHDQrr VR64:$src, VR64:$src)>;
}
-// Patterns to perform vector shuffling with a zeroed out vector.
-let AddedComplexity = 20 in {
- def : Pat<(bc_v2i32 (mmx_unpckl immAllZerosV,
- (v2i32 (scalar_to_vector (load_mmx addr:$src))))),
- (MMX_PUNPCKLDQrm VR64:$src, VR64:$src)>;
-}
-
// Some special case PANDN patterns.
// FIXME: Get rid of these.
def : Pat<(v1i64 (and (xor VR64:$src1, (bc_v1i64 (v2i32 immAllOnesV))),
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index 9b2140f..2743dba 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -160,6 +160,32 @@ def memopv4i16 : PatFrag<(ops node:$ptr), (v4i16 (memop64 node:$ptr))>;
def memopv8i16 : PatFrag<(ops node:$ptr), (v8i16 (memop64 node:$ptr))>;
def memopv2i32 : PatFrag<(ops node:$ptr), (v2i32 (memop64 node:$ptr))>;
+// MOVNT Support
+// Like 'store', but requires the non-temporal bit to be set
+def nontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal();
+ return false;
+}]>;
+
+def alignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() && !ST->isTruncatingStore() &&
+ ST->getAddressingMode() == ISD::UNINDEXED &&
+ ST->getAlignment() >= 16;
+ return false;
+}]>;
+
+def unalignednontemporalstore : PatFrag<(ops node:$val, node:$ptr),
+ (st node:$val, node:$ptr), [{
+ if (StoreSDNode *ST = dyn_cast<StoreSDNode>(N))
+ return ST->isNonTemporal() &&
+ ST->getAlignment() < 16;
+ return false;
+}]>;
+
def bc_v4f32 : PatFrag<(ops node:$in), (v4f32 (bitconvert node:$in))>;
def bc_v2f64 : PatFrag<(ops node:$in), (v2f64 (bitconvert node:$in))>;
def bc_v16i8 : PatFrag<(ops node:$in), (v16i8 (bitconvert node:$in))>;
@@ -344,18 +370,56 @@ let Uses = [EFLAGS], usesCustomInserter = 1 in {
// SSE1 Instructions
//===----------------------------------------------------------------------===//
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVSSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
- "movss\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+// Move Instructions. Register-to-register movss is not used for FR32
+// register copies because it's a partial register update; FsMOVAPSrr is
+// used instead. Register-to-register movss is not modeled as an INSERT_SUBREG
+// because INSERT_SUBREG requires that the insert be implementable in terms of
+// a copy, and just mentioned, we don't use movss for copies.
+let Constraints = "$src1 = $dst" in
+def MOVSSrr : SSI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
+ "movss\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (movl VR128:$src1, (scalar_to_vector FR32:$src2)))]>;
+
+// Extract the low 32-bit value from one vector and insert it into another.
+let AddedComplexity = 15 in
+def : Pat<(v4f32 (movl VR128:$src1, VR128:$src2)),
+ (MOVSSrr VR128:$src1,
+ (EXTRACT_SUBREG (v4f32 VR128:$src2), x86_subreg_ss))>;
+
+// Implicitly promote a 32-bit scalar to a vector.
+def : Pat<(v4f32 (scalar_to_vector FR32:$src)),
+ (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FR32:$src, x86_subreg_ss)>;
+
+// Loading from memory automatically zeroing upper bits.
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVSSrm : SSI<0x10, MRMSrcMem, (outs FR32:$dst), (ins f32mem:$src),
"movss\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (loadf32 addr:$src))]>;
+
+// MOVSSrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+let AddedComplexity = 20 in {
+def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector (loadf32 addr:$src))))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+def : Pat<(v4f32 (scalar_to_vector (loadf32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
+ (SUBREG_TO_REG (i32 0), (MOVSSrm addr:$src), x86_subreg_ss)>;
+}
+
+// Store scalar value to memory.
def MOVSSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, FR32:$src),
"movss\t{$src, $dst|$dst, $src}",
[(store FR32:$src, addr:$dst)]>;
+// Extract and store.
+def : Pat<(store (f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSSmr addr:$dst,
+ (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
+
// Conversion instructions
def CVTTSS2SIrr : SSI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR32:$src),
"cvttss2si\t{$src, $dst|$dst, $src}",
@@ -518,7 +582,7 @@ def FsMOVAPSrr : PSI<0x28, MRMSrcReg, (outs FR32:$dst), (ins FR32:$src),
// Alias instruction to load FR32 from f128mem using movaps. Upper bits are
// disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def FsMOVAPSrm : PSI<0x28, MRMSrcMem, (outs FR32:$dst), (ins f128mem:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(set FR32:$dst, (alignedloadfsf32 addr:$src))]>;
@@ -715,7 +779,7 @@ defm MIN : sse1_fp_binop_rm<0x5D, "min", X86fmin,
let neverHasSideEffects = 1 in
def MOVAPSrr : PSI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movaps\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVAPSrm : PSI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movaps\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (alignedloadv4f32 addr:$src))]>;
@@ -727,7 +791,7 @@ def MOVAPSmr : PSI<0x29, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
let neverHasSideEffects = 1 in
def MOVUPSrr : PSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movups\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVUPSrm : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (loadv4f32 addr:$src))]>;
@@ -736,7 +800,7 @@ def MOVUPSmr : PSI<0x11, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
[(store (v4f32 VR128:$src), addr:$dst)]>;
// Intrinsic forms of MOVUPS load and store
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVUPSrm_Int : PSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movups\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (int_x86_sse_loadu_ps addr:$src))]>;
@@ -796,9 +860,9 @@ def MOVHLPSrr : PSI<0x12, MRMSrcReg, (outs VR128:$dst),
let AddedComplexity = 20 in {
def : Pat<(v4f32 (movddup VR128:$src, (undef))),
- (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+ (MOVLHPSrr VR128:$src, VR128:$src)>;
def : Pat<(v2i64 (movddup VR128:$src, (undef))),
- (MOVLHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+ (MOVLHPSrr VR128:$src, VR128:$src)>;
}
@@ -1013,10 +1077,33 @@ def PREFETCHNTA : PSI<0x18, MRM0m, (outs), (ins i8mem:$src),
"prefetchnta\t$src", [(prefetch addr:$src, imm, (i32 0))]>;
// Non-temporal stores
-def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+def MOVNTPSmr_Int : PSI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
"movntps\t{$src, $dst|$dst, $src}",
[(int_x86_sse_movnt_ps addr:$dst, VR128:$src)]>;
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPSmr : PSI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntps\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQ_64mr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v2f64 VR128:$src), addr:$dst)]>;
+
+def : Pat<(alignednontemporalstore (v2i64 VR128:$src), addr:$dst),
+ (MOVNTDQ_64mr VR128:$src, addr:$dst)>;
+
+def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i32 GR32:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+
+def MOVNTI_64mr : RI<0xC3, MRMDestMem, (outs), (ins i64mem:$dst, GR64:$src),
+ "movnti\t{$src, $dst|$dst, $src}",
+ [(nontemporalstore (i64 GR64:$src), addr:$dst)]>,
+ TB, Requires<[HasSSE2]>;
+}
+
// Load, store, and memory fence
def SFENCE : PSI<0xAE, MRM7r, (outs), (ins), "sfence", [(int_x86_sse_sfence)]>;
@@ -1035,84 +1122,73 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
def V_SET0 : PSI<0x57, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllZerosV))]>;
-let Predicates = [HasSSE1] in {
- def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
- def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
- def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
- def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
- def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
-}
-
-// FR32 to 128-bit vector conversion.
-let isAsCheapAsAMove = 1 in
-def MOVSS2PSrr : SSI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR32:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4f32 (scalar_to_vector FR32:$src)))]>;
-def MOVSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v4f32 (scalar_to_vector (loadf32 addr:$src))))]>;
-
-// FIXME: may not be able to eliminate this movss with coalescing the src and
-// dest register classes are different. We really want to write this pattern
-// like this:
-// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
-// (f32 FR32:$src)>;
-let isAsCheapAsAMove = 1 in
-def MOVPS2SSrr : SSI<0x10, MRMSrcReg, (outs FR32:$dst), (ins VR128:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(set FR32:$dst, (vector_extract (v4f32 VR128:$src),
- (iPTR 0)))]>;
-def MOVPS2SSmr : SSI<0x11, MRMDestMem, (outs), (ins f32mem:$dst, VR128:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(store (f32 (vector_extract (v4f32 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
-
-
-// Move to lower bits of a VR128, leaving upper bits alone.
-// Three operand (but two address) aliases.
-let Constraints = "$src1 = $dst" in {
-let neverHasSideEffects = 1 in
- def MOVLSS2PSrr : SSI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, FR32:$src2),
- "movss\t{$src2, $dst|$dst, $src2}", []>;
-
- let AddedComplexity = 15 in
- def MOVLPSrr : SSI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "movss\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v4f32 (movl VR128:$src1, VR128:$src2)))]>;
-}
+def : Pat<(v2i64 immAllZerosV), (V_SET0)>;
+def : Pat<(v8i16 immAllZerosV), (V_SET0)>;
+def : Pat<(v16i8 immAllZerosV), (V_SET0)>;
+def : Pat<(v2f64 immAllZerosV), (V_SET0)>;
+def : Pat<(v4f32 immAllZerosV), (V_SET0)>;
-// Move to lower bits of a VR128 and zeroing upper bits.
-// Loading from memory automatically zeroing upper bits.
-let AddedComplexity = 20 in
-def MOVZSS2PSrm : SSI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f32mem:$src),
- "movss\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst, (v4f32 (X86vzmovl (v4f32 (scalar_to_vector
- (loadf32 addr:$src))))))]>;
-
-def : Pat<(v4f32 (X86vzmovl (loadv4f32 addr:$src))),
- (MOVZSS2PSrm addr:$src)>;
+def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss))>;
//===---------------------------------------------------------------------===//
// SSE2 Instructions
//===---------------------------------------------------------------------===//
-// Move Instructions
-let neverHasSideEffects = 1 in
-def MOVSDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+// Move Instructions. Register-to-register movsd is not used for FR64
+// register copies because it's a partial register update; FsMOVAPDrr is
+// used instead. Register-to-register movsd is not modeled as an INSERT_SUBREG
+// because INSERT_SUBREG requires that the insert be implementable in terms of
+// a copy, and just mentioned, we don't use movsd for copies.
+let Constraints = "$src1 = $dst" in
+def MOVSDrr : SDI<0x10, MRMSrcReg,
+ (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
+ "movsd\t{$src2, $dst|$dst, $src2}",
+ [(set VR128:$dst,
+ (movl VR128:$src1, (scalar_to_vector FR64:$src2)))]>;
+
+// Extract the low 64-bit value from one vector and insert it into another.
+let AddedComplexity = 15 in
+def : Pat<(v2f64 (movl VR128:$src1, VR128:$src2)),
+ (MOVSDrr VR128:$src1,
+ (EXTRACT_SUBREG (v2f64 VR128:$src2), x86_subreg_sd))>;
+
+// Implicitly promote a 64-bit scalar to a vector.
+def : Pat<(v2f64 (scalar_to_vector FR64:$src)),
+ (INSERT_SUBREG (v2f64 (IMPLICIT_DEF)), FR64:$src, x86_subreg_sd)>;
+
+// Loading from memory automatically zeroing upper bits.
+let canFoldAsLoad = 1, isReMaterializable = 1, AddedComplexity = 20 in
def MOVSDrm : SDI<0x10, MRMSrcMem, (outs FR64:$dst), (ins f64mem:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (loadf64 addr:$src))]>;
+
+// MOVSDrm zeros the high parts of the register; represent this
+// with SUBREG_TO_REG.
+let AddedComplexity = 20 in {
+def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector (loadf64 addr:$src))))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (scalar_to_vector (loadf64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+def : Pat<(v2f64 (X86vzload addr:$src)),
+ (SUBREG_TO_REG (i64 0), (MOVSDrm addr:$src), x86_subreg_sd)>;
+}
+
+// Store scalar value to memory.
def MOVSDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, FR64:$src),
"movsd\t{$src, $dst|$dst, $src}",
[(store FR64:$src, addr:$dst)]>;
+// Extract and store.
+def : Pat<(store (f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ addr:$dst),
+ (MOVSDmr addr:$dst,
+ (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+
// Conversion instructions
def CVTTSD2SIrr : SDI<0x2C, MRMSrcReg, (outs GR32:$dst), (ins FR64:$src),
"cvttsd2si\t{$src, $dst|$dst, $src}",
@@ -1166,7 +1242,8 @@ def CVTSS2SDrm : I<0x5A, MRMSrcMem, (outs FR64:$dst), (ins f32mem:$src),
Requires<[HasSSE2, OptForSize]>;
def : Pat<(extloadf32 addr:$src),
- (CVTSS2SDrr (MOVSSrm addr:$src))>, Requires<[HasSSE2, OptForSpeed]>;
+ (CVTSS2SDrr (MOVSSrm addr:$src))>,
+ Requires<[HasSSE2, OptForSpeed]>;
// Match intrinsics which expect XMM operand(s).
def Int_CVTSD2SIrr : SDI<0x2D, MRMSrcReg, (outs GR32:$dst), (ins VR128:$src),
@@ -1285,7 +1362,7 @@ def FsMOVAPDrr : PDI<0x28, MRMSrcReg, (outs FR64:$dst), (ins FR64:$src),
// Alias instruction to load FR64 from f128mem using movapd. Upper bits are
// disregarded.
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def FsMOVAPDrm : PDI<0x28, MRMSrcMem, (outs FR64:$dst), (ins f128mem:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(set FR64:$dst, (alignedloadfsf64 addr:$src))]>;
@@ -1483,7 +1560,7 @@ defm MIN : sse2_fp_binop_rm<0x5D, "min", X86fmin,
let neverHasSideEffects = 1 in
def MOVAPDrr : PDI<0x28, MRMSrcReg, (outs VR128:$dst), (ins VR128:$src),
"movapd\t{$src, $dst|$dst, $src}", []>;
-let canFoldAsLoad = 1, isReMaterializable = 1, mayHaveSideEffects = 1 in
+let canFoldAsLoad = 1, isReMaterializable = 1 in
def MOVAPDrm : PDI<0x28, MRMSrcMem, (outs VR128:$dst), (ins f128mem:$src),
"movapd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst, (alignedloadv2f64 addr:$src))]>;
@@ -2298,17 +2375,30 @@ def MASKMOVDQU64 : PDI<0xF7, MRMSrcReg, (outs), (ins VR128:$src, VR128:$mask),
[(int_x86_sse2_maskmov_dqu VR128:$src, VR128:$mask, RDI)]>;
// Non-temporal stores
-def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
- "movntpd\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
-def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
- "movntdq\t{$src, $dst|$dst, $src}",
- [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
-def MOVNTImr : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
+def MOVNTPDmr_Int : PDI<0x2B, MRMDestMem, (outs), (ins i128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_pd addr:$dst, VR128:$src)]>;
+def MOVNTDQmr_Int : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(int_x86_sse2_movnt_dq addr:$dst, VR128:$src)]>;
+def MOVNTImr_Int : I<0xC3, MRMDestMem, (outs), (ins i32mem:$dst, GR32:$src),
"movnti\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_movnt_i addr:$dst, GR32:$src)]>,
TB, Requires<[HasSSE2]>;
+let AddedComplexity = 400 in { // Prefer non-temporal versions
+def MOVNTPDmr : PDI<0x2B, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntpd\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore(v2f64 VR128:$src), addr:$dst)]>;
+
+def MOVNTDQmr : PDI<0xE7, MRMDestMem, (outs), (ins f128mem:$dst, VR128:$src),
+ "movntdq\t{$src, $dst|$dst, $src}",
+ [(alignednontemporalstore (v4f32 VR128:$src), addr:$dst)]>;
+
+def : Pat<(alignednontemporalstore (v4i32 VR128:$src), addr:$dst),
+ (MOVNTDQmr VR128:$src, addr:$dst)>;
+}
+
// Flush cache
def CLFLUSH : I<0xAE, MRM7m, (outs), (ins i8mem:$src),
"clflush\t$src", [(int_x86_sse2_clflush addr:$src)]>,
@@ -2321,11 +2411,11 @@ def MFENCE : I<0xAE, MRM_F0, (outs), (ins),
"mfence", [(int_x86_sse2_mfence)]>, TB, Requires<[HasSSE2]>;
//TODO: custom lower this so as to never even generate the noop
-def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
(i8 0)), (NOOP)>;
def : Pat<(membarrier (i8 0), (i8 0), (i8 0), (i8 1), (i8 1)), (SFENCE)>;
def : Pat<(membarrier (i8 1), (i8 0), (i8 0), (i8 0), (i8 1)), (LFENCE)>;
-def : Pat<(membarrier (i8 imm:$ll), (i8 imm:$ls), (i8 imm:$sl), (i8 imm:$ss),
+def : Pat<(membarrier (i8 imm), (i8 imm), (i8 imm), (i8 imm),
(i8 1)), (MFENCE)>;
// Alias instructions that map zero vector to pxor / xorp* for sse.
@@ -2337,17 +2427,6 @@ let isReMaterializable = 1, isAsCheapAsAMove = 1, canFoldAsLoad = 1,
def V_SETALLONES : PDI<0x76, MRMInitReg, (outs VR128:$dst), (ins), "",
[(set VR128:$dst, (v4i32 immAllOnesV))]>;
-// FR64 to 128-bit vector conversion.
-let isAsCheapAsAMove = 1 in
-def MOVSD2PDrr : SDI<0x10, MRMSrcReg, (outs VR128:$dst), (ins FR64:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (scalar_to_vector FR64:$src)))]>;
-def MOVSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (scalar_to_vector (loadf64 addr:$src))))]>;
-
def MOVDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set VR128:$dst,
@@ -2376,20 +2455,9 @@ def MOVPQI2QImr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
[(store (i64 (vector_extract (v2i64 VR128:$src),
(iPTR 0))), addr:$dst)]>;
-// FIXME: may not be able to eliminate this movss with coalescing the src and
-// dest register classes are different. We really want to write this pattern
-// like this:
-// def : Pat<(f32 (vector_extract (v4f32 VR128:$src), (iPTR 0))),
-// (f32 FR32:$src)>;
-let isAsCheapAsAMove = 1 in
-def MOVPD2SDrr : SDI<0x10, MRMSrcReg, (outs FR64:$dst), (ins VR128:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set FR64:$dst, (vector_extract (v2f64 VR128:$src),
- (iPTR 0)))]>;
-def MOVPD2SDmr : SDI<0x11, MRMDestMem, (outs), (ins f64mem:$dst, VR128:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(store (f64 (vector_extract (v2f64 VR128:$src),
- (iPTR 0))), addr:$dst)]>;
+def : Pat<(f64 (vector_extract (v2f64 VR128:$src), (iPTR 0))),
+ (f64 (EXTRACT_SUBREG (v2f64 VR128:$src), x86_subreg_sd))>;
+
def MOVPDI2DIrr : PDI<0x7E, MRMDestReg, (outs GR32:$dst), (ins VR128:$src),
"movd\t{$src, $dst|$dst, $src}",
[(set GR32:$dst, (vector_extract (v4i32 VR128:$src),
@@ -2406,44 +2474,11 @@ def MOVSS2DImr : PDI<0x7E, MRMDestMem, (outs), (ins i32mem:$dst, FR32:$src),
"movd\t{$src, $dst|$dst, $src}",
[(store (i32 (bitconvert FR32:$src)), addr:$dst)]>;
-
-// Move to lower bits of a VR128, leaving upper bits alone.
-// Three operand (but two address) aliases.
-let Constraints = "$src1 = $dst" in {
- let neverHasSideEffects = 1 in
- def MOVLSD2PDrr : SDI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, FR64:$src2),
- "movsd\t{$src2, $dst|$dst, $src2}", []>;
-
- let AddedComplexity = 15 in
- def MOVLPDrr : SDI<0x10, MRMSrcReg,
- (outs VR128:$dst), (ins VR128:$src1, VR128:$src2),
- "movsd\t{$src2, $dst|$dst, $src2}",
- [(set VR128:$dst,
- (v2f64 (movl VR128:$src1, VR128:$src2)))]>;
-}
-
// Store / copy lower 64-bits of a XMM register.
def MOVLQ128mr : PDI<0xD6, MRMDestMem, (outs), (ins i64mem:$dst, VR128:$src),
"movq\t{$src, $dst|$dst, $src}",
[(int_x86_sse2_storel_dq addr:$dst, VR128:$src)]>;
-// Move to lower bits of a VR128 and zeroing upper bits.
-// Loading from memory automatically zeroing upper bits.
-let AddedComplexity = 20 in {
-def MOVZSD2PDrm : SDI<0x10, MRMSrcMem, (outs VR128:$dst), (ins f64mem:$src),
- "movsd\t{$src, $dst|$dst, $src}",
- [(set VR128:$dst,
- (v2f64 (X86vzmovl (v2f64 (scalar_to_vector
- (loadf64 addr:$src))))))]>;
-
-def : Pat<(v2f64 (X86vzmovl (loadv2f64 addr:$src))),
- (MOVZSD2PDrm addr:$src)>;
-def : Pat<(v2f64 (X86vzmovl (bc_v2f64 (loadv4f32 addr:$src)))),
- (MOVZSD2PDrm addr:$src)>;
-def : Pat<(v2f64 (X86vzload addr:$src)), (MOVZSD2PDrm addr:$src)>;
-}
-
// movd / movq to XMM register zero-extends
let AddedComplexity = 15 in {
def MOVZDI2PDIrr : PDI<0x6E, MRMSrcReg, (outs VR128:$dst), (ins GR32:$src),
@@ -2989,13 +3024,15 @@ let Predicates = [HasSSE2] in {
let AddedComplexity = 15 in {
// Zeroing a VR128 then do a MOVS{S|D} to the lower bits.
def : Pat<(v2f64 (X86vzmovl (v2f64 (scalar_to_vector FR64:$src)))),
- (MOVLSD2PDrr (V_SET0), FR64:$src)>, Requires<[HasSSE2]>;
+ (MOVSDrr (v2f64 (V_SET0)), FR64:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 (scalar_to_vector FR32:$src)))),
- (MOVLSS2PSrr (V_SET0), FR32:$src)>, Requires<[HasSSE1]>;
+ (MOVSSrr (v4f32 (V_SET0)), FR32:$src)>;
def : Pat<(v4f32 (X86vzmovl (v4f32 VR128:$src))),
- (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
+ (MOVSSrr (v4f32 (V_SET0)),
+ (f32 (EXTRACT_SUBREG (v4f32 VR128:$src), x86_subreg_ss)))>;
def : Pat<(v4i32 (X86vzmovl (v4i32 VR128:$src))),
- (MOVLPSrr (V_SET0), VR128:$src)>, Requires<[HasSSE1]>;
+ (MOVSSrr (v4i32 (V_SET0)),
+ (EXTRACT_SUBREG (v4i32 VR128:$src), x86_subreg_ss))>;
}
// Splat v2f64 / v2i64
@@ -3013,8 +3050,7 @@ def : Pat<(unpckh (v2i64 VR128:$src), (undef)),
// Special unary SHUFPSrri case.
def : Pat<(v4f32 (pshufd:$src3 VR128:$src1, (undef))),
(SHUFPSrri VR128:$src1, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>,
- Requires<[HasSSE1]>;
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
let AddedComplexity = 5 in
def : Pat<(v4f32 (pshufd:$src2 VR128:$src1, (undef))),
(PSHUFDri VR128:$src1, (SHUFFLE_get_shuf_imm VR128:$src2))>,
@@ -3060,13 +3096,13 @@ def : Pat<(v4f32 (unpckl_undef:$src2 VR128:$src, (undef))),
}
let AddedComplexity = 10 in {
def : Pat<(v4f32 (unpckl_undef VR128:$src, (undef))),
- (UNPCKLPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+ (UNPCKLPSrr VR128:$src, VR128:$src)>;
def : Pat<(v16i8 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKLBWrr VR128:$src, VR128:$src)>;
def : Pat<(v8i16 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKLWDrr VR128:$src, VR128:$src)>;
def : Pat<(v4i32 (unpckl_undef VR128:$src, (undef))),
- (PUNPCKLDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKLDQrr VR128:$src, VR128:$src)>;
}
// vector_shuffle v1, <undef>, <2, 2, 3, 3, ...>
@@ -3080,13 +3116,13 @@ def : Pat<(v4f32 (unpckh_undef:$src2 VR128:$src, (undef))),
}
let AddedComplexity = 10 in {
def : Pat<(v4f32 (unpckh_undef VR128:$src, (undef))),
- (UNPCKHPSrr VR128:$src, VR128:$src)>, Requires<[HasSSE1]>;
+ (UNPCKHPSrr VR128:$src, VR128:$src)>;
def : Pat<(v16i8 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHBWrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKHBWrr VR128:$src, VR128:$src)>;
def : Pat<(v8i16 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHWDrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKHWDrr VR128:$src, VR128:$src)>;
def : Pat<(v4i32 (unpckh_undef VR128:$src, (undef))),
- (PUNPCKHDQrr VR128:$src, VR128:$src)>, Requires<[HasSSE2]>;
+ (PUNPCKHDQrr VR128:$src, VR128:$src)>;
}
let AddedComplexity = 20 in {
@@ -3108,45 +3144,49 @@ def : Pat<(v4i32 (movhlps_undef VR128:$src1, (undef))),
let AddedComplexity = 20 in {
// vector_shuffle v1, (load v2) <4, 5, 2, 3> using MOVLPS
def : Pat<(v4f32 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE1]>;
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v2f64 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
def : Pat<(v4i32 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPSrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+ (MOVLPSrm VR128:$src1, addr:$src2)>;
def : Pat<(v2i64 (movlp VR128:$src1, (load addr:$src2))),
- (MOVLPDrm VR128:$src1, addr:$src2)>, Requires<[HasSSE2]>;
+ (MOVLPDrm VR128:$src1, addr:$src2)>;
}
// (store (vector_shuffle (load addr), v2, <4, 5, 2, 3>), addr) using MOVLPS
def : Pat<(store (v4f32 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
def : Pat<(store (v2f64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
def : Pat<(store (v4i32 (movlp (bc_v4i32 (loadv2i64 addr:$src1)), VR128:$src2)),
addr:$src1),
- (MOVLPSmr addr:$src1, VR128:$src2)>, Requires<[HasSSE1]>;
+ (MOVLPSmr addr:$src1, VR128:$src2)>;
def : Pat<(store (v2i64 (movlp (load addr:$src1), VR128:$src2)), addr:$src1),
- (MOVLPDmr addr:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVLPDmr addr:$src1, VR128:$src2)>;
let AddedComplexity = 15 in {
// Setting the lowest element in the vector.
def : Pat<(v4i32 (movl VR128:$src1, VR128:$src2)),
- (MOVLPSrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVSSrr (v4i32 VR128:$src1),
+ (EXTRACT_SUBREG (v4i32 VR128:$src2), x86_subreg_ss))>;
def : Pat<(v2i64 (movl VR128:$src1, VR128:$src2)),
- (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVSDrr (v2i64 VR128:$src1),
+ (EXTRACT_SUBREG (v2i64 VR128:$src2), x86_subreg_sd))>;
-// vector_shuffle v1, v2 <4, 5, 2, 3> using MOVLPDrr (movsd)
+// vector_shuffle v1, v2 <4, 5, 2, 3> using movsd
def : Pat<(v4f32 (movlp VR128:$src1, VR128:$src2)),
- (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+ Requires<[HasSSE2]>;
def : Pat<(v4i32 (movlp VR128:$src1, VR128:$src2)),
- (MOVLPDrr VR128:$src1, VR128:$src2)>, Requires<[HasSSE2]>;
+ (MOVSDrr VR128:$src1, (EXTRACT_SUBREG VR128:$src2, x86_subreg_sd))>,
+ Requires<[HasSSE2]>;
}
// vector_shuffle v1, v2 <4, 5, 2, 3> using SHUFPSrri (we prefer movsd, but
// fall back to this for SSE1)
def : Pat<(v4f32 (movlp:$src3 VR128:$src1, (v4f32 VR128:$src2))),
(SHUFPSrri VR128:$src2, VR128:$src1,
- (SHUFFLE_get_shuf_imm VR128:$src3))>, Requires<[HasSSE1]>;
+ (SHUFFLE_get_shuf_imm VR128:$src3))>;
// Set lowest element and zero upper elements.
let AddedComplexity = 15 in
@@ -3188,30 +3228,30 @@ def : Pat<(v2i32 (fp_to_sint (v2f64 VR128:$src))),
// Use movaps / movups for SSE integer load / store (one byte shorter).
def : Pat<(alignedloadv4i32 addr:$src),
- (MOVAPSrm addr:$src)>, Requires<[HasSSE1]>;
+ (MOVAPSrm addr:$src)>;
def : Pat<(loadv4i32 addr:$src),
- (MOVUPSrm addr:$src)>, Requires<[HasSSE1]>;
+ (MOVUPSrm addr:$src)>;
def : Pat<(alignedloadv2i64 addr:$src),
- (MOVAPSrm addr:$src)>, Requires<[HasSSE2]>;
+ (MOVAPSrm addr:$src)>;
def : Pat<(loadv2i64 addr:$src),
- (MOVUPSrm addr:$src)>, Requires<[HasSSE2]>;
+ (MOVUPSrm addr:$src)>;
def : Pat<(alignedstore (v2i64 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v4i32 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v8i16 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(alignedstore (v16i8 VR128:$src), addr:$dst),
- (MOVAPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVAPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v2i64 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVUPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v4i32 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVUPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v8i16 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVUPSmr addr:$dst, VR128:$src)>;
def : Pat<(store (v16i8 VR128:$src), addr:$dst),
- (MOVUPSmr addr:$dst, VR128:$src)>, Requires<[HasSSE2]>;
+ (MOVUPSmr addr:$dst, VR128:$src)>;
//===----------------------------------------------------------------------===//
// SSE4.1 Instructions
@@ -3400,7 +3440,7 @@ let Constraints = "$src1 = $dst" in {
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
[(set VR128:$dst,
- (OpNode VR128:$src1, (memop addr:$src2)))]>, OpSize;
+ (OpVT (OpNode VR128:$src1, (memop addr:$src2))))]>, OpSize;
def rm_int : SS48I<opc, MRMSrcMem, (outs VR128:$dst),
(ins VR128:$src1, i128mem:$src2),
!strconcat(OpcodeStr, "\t{$src2, $dst|$dst, $src2}"),
diff --git a/lib/Target/X86/X86MCAsmInfo.cpp b/lib/Target/X86/X86MCAsmInfo.cpp
index 91c0fbb..250634f 100644
--- a/lib/Target/X86/X86MCAsmInfo.cpp
+++ b/lib/Target/X86/X86MCAsmInfo.cpp
@@ -55,6 +55,11 @@ X86MCAsmInfoDarwin::X86MCAsmInfoDarwin(const Triple &Triple) {
if (!is64Bit)
Data64bitsDirective = 0; // we can't emit a 64-bit unit
+ // Use ## as a comment string so that .s files generated by llvm can go
+ // through the GCC preprocessor without causing an error. This is needed
+ // because "clang foo.s" runs the C preprocessor, which is usually reserved
+ // for .S files on other systems. Perhaps this is because the file system
+ // wasn't always case preserving or something.
CommentString = "##";
PCSymbol = ".";
@@ -70,6 +75,8 @@ X86ELFMCAsmInfo::X86ELFMCAsmInfo(const Triple &Triple) {
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
+ TextAlignFillValue = 0x90;
+
PrivateGlobalPrefix = ".L";
WeakRefDirective = "\t.weak\t";
PCSymbol = ".";
@@ -94,4 +101,6 @@ MCSection *X86ELFMCAsmInfo::getNonexecutableStackSection(MCContext &Ctx) const {
X86MCAsmInfoCOFF::X86MCAsmInfoCOFF(const Triple &Triple) {
AsmTransCBE = x86_asm_table;
AssemblerDialect = AsmWriterFlavor;
-} \ No newline at end of file
+
+ TextAlignFillValue = 0x90;
+}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 8524236..946d6b2 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -191,6 +191,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR16_NOREXRegClass;
else if (A == &X86::GR16_ABCDRegClass)
return &X86::GR16_ABCDRegClass;
+ } else if (B == &X86::FR32RegClass) {
+ return A;
}
break;
case 2:
@@ -207,6 +209,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
else if (A == &X86::GR16RegClass || A == &X86::GR16_ABCDRegClass ||
A == &X86::GR16_NOREXRegClass)
return &X86::GR16_ABCDRegClass;
+ } else if (B == &X86::FR64RegClass) {
+ return A;
}
break;
case 3:
@@ -234,6 +238,8 @@ X86RegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A,
return &X86::GR32_NOREXRegClass;
else if (A == &X86::GR32_ABCDRegClass)
return &X86::GR64_ABCDRegClass;
+ } else if (B == &X86::VR128RegClass) {
+ return A;
}
break;
case 4:
@@ -446,8 +452,10 @@ bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {
bool X86RegisterInfo::needsStackRealignment(const MachineFunction &MF) const {
const MachineFrameInfo *MFI = MF.getFrameInfo();
+ const Function *F = MF.getFunction();
bool requiresRealignment =
- RealignStack && (MFI->getMaxAlignment() > StackAlign);
+ RealignStack && ((MFI->getMaxAlignment() > StackAlign) ||
+ F->hasFnAttr(Attribute::StackAlignment));
// FIXME: Currently we don't support stack realignment for functions with
// variable-sized allocas.
diff --git a/lib/Target/X86/X86RegisterInfo.h b/lib/Target/X86/X86RegisterInfo.h
index 8fb5e92..e4bdb4e 100644
--- a/lib/Target/X86/X86RegisterInfo.h
+++ b/lib/Target/X86/X86RegisterInfo.h
@@ -35,7 +35,8 @@ namespace X86 {
/// these indices must be kept in sync with the class indices in the
/// X86RegisterInfo.td file.
enum SubregIndex {
- SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4
+ SUBREG_8BIT = 1, SUBREG_8BIT_HI = 2, SUBREG_16BIT = 3, SUBREG_32BIT = 4,
+ SUBREG_SS = 1, SUBREG_SD = 2, SUBREG_XMM = 3
};
}
diff --git a/lib/Target/X86/X86RegisterInfo.td b/lib/Target/X86/X86RegisterInfo.td
index 1559bf7..ed2ce6c 100644
--- a/lib/Target/X86/X86RegisterInfo.td
+++ b/lib/Target/X86/X86RegisterInfo.td
@@ -158,22 +158,22 @@ let Namespace = "X86" in {
def XMM15: Register<"xmm15">, DwarfRegNum<[32, -2, -2]>;
// YMM Registers, used by AVX instructions
- def YMM0: Register<"ymm0">, DwarfRegNum<[17, 21, 21]>;
- def YMM1: Register<"ymm1">, DwarfRegNum<[18, 22, 22]>;
- def YMM2: Register<"ymm2">, DwarfRegNum<[19, 23, 23]>;
- def YMM3: Register<"ymm3">, DwarfRegNum<[20, 24, 24]>;
- def YMM4: Register<"ymm4">, DwarfRegNum<[21, 25, 25]>;
- def YMM5: Register<"ymm5">, DwarfRegNum<[22, 26, 26]>;
- def YMM6: Register<"ymm6">, DwarfRegNum<[23, 27, 27]>;
- def YMM7: Register<"ymm7">, DwarfRegNum<[24, 28, 28]>;
- def YMM8: Register<"ymm8">, DwarfRegNum<[25, -2, -2]>;
- def YMM9: Register<"ymm9">, DwarfRegNum<[26, -2, -2]>;
- def YMM10: Register<"ymm10">, DwarfRegNum<[27, -2, -2]>;
- def YMM11: Register<"ymm11">, DwarfRegNum<[28, -2, -2]>;
- def YMM12: Register<"ymm12">, DwarfRegNum<[29, -2, -2]>;
- def YMM13: Register<"ymm13">, DwarfRegNum<[30, -2, -2]>;
- def YMM14: Register<"ymm14">, DwarfRegNum<[31, -2, -2]>;
- def YMM15: Register<"ymm15">, DwarfRegNum<[32, -2, -2]>;
+ def YMM0: RegisterWithSubRegs<"ymm0", [XMM0]>, DwarfRegNum<[17, 21, 21]>;
+ def YMM1: RegisterWithSubRegs<"ymm1", [XMM1]>, DwarfRegNum<[18, 22, 22]>;
+ def YMM2: RegisterWithSubRegs<"ymm2", [XMM2]>, DwarfRegNum<[19, 23, 23]>;
+ def YMM3: RegisterWithSubRegs<"ymm3", [XMM3]>, DwarfRegNum<[20, 24, 24]>;
+ def YMM4: RegisterWithSubRegs<"ymm4", [XMM4]>, DwarfRegNum<[21, 25, 25]>;
+ def YMM5: RegisterWithSubRegs<"ymm5", [XMM5]>, DwarfRegNum<[22, 26, 26]>;
+ def YMM6: RegisterWithSubRegs<"ymm6", [XMM6]>, DwarfRegNum<[23, 27, 27]>;
+ def YMM7: RegisterWithSubRegs<"ymm7", [XMM7]>, DwarfRegNum<[24, 28, 28]>;
+ def YMM8: RegisterWithSubRegs<"ymm8", [XMM8]>, DwarfRegNum<[25, -2, -2]>;
+ def YMM9: RegisterWithSubRegs<"ymm9", [XMM9]>, DwarfRegNum<[26, -2, -2]>;
+ def YMM10: RegisterWithSubRegs<"ymm10", [XMM10]>, DwarfRegNum<[27, -2, -2]>;
+ def YMM11: RegisterWithSubRegs<"ymm11", [XMM11]>, DwarfRegNum<[28, -2, -2]>;
+ def YMM12: RegisterWithSubRegs<"ymm12", [XMM12]>, DwarfRegNum<[29, -2, -2]>;
+ def YMM13: RegisterWithSubRegs<"ymm13", [XMM13]>, DwarfRegNum<[30, -2, -2]>;
+ def YMM14: RegisterWithSubRegs<"ymm14", [XMM14]>, DwarfRegNum<[31, -2, -2]>;
+ def YMM15: RegisterWithSubRegs<"ymm15", [XMM15]>, DwarfRegNum<[32, -2, -2]>;
// Floating point stack registers
def ST0 : Register<"st(0)">, DwarfRegNum<[33, 12, 11]>;
@@ -238,6 +238,10 @@ def x86_subreg_8bit_hi : PatLeaf<(i32 2)>;
def x86_subreg_16bit : PatLeaf<(i32 3)>;
def x86_subreg_32bit : PatLeaf<(i32 4)>;
+def x86_subreg_ss : PatLeaf<(i32 1)>;
+def x86_subreg_sd : PatLeaf<(i32 2)>;
+def x86_subreg_xmm : PatLeaf<(i32 3)>;
+
def : SubRegSet<1, [AX, CX, DX, BX, SP, BP, SI, DI,
R8W, R9W, R10W, R11W, R12W, R13W, R14W, R15W],
[AL, CL, DL, BL, SPL, BPL, SIL, DIL,
@@ -277,11 +281,31 @@ def : SubRegSet<4, [RAX, RCX, RDX, RBX, RSP, RBP, RSI, RDI,
[EAX, ECX, EDX, EBX, ESP, EBP, ESI, EDI,
R8D, R9D, R10D, R11D, R12D, R13D, R14D, R15D]>;
-def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+def : SubRegSet<1, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+ YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
+def : SubRegSet<2, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
+ YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
+def : SubRegSet<3, [YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11, YMM12, YMM13, YMM14, YMM15],
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+def : SubRegSet<1, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
+def : SubRegSet<2, [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15],
+ [XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
+ XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15]>;
+
//===----------------------------------------------------------------------===//
// Register Class Definitions... now that we have all of the pieces, define the
// top-level register classes. The order specified in the register list is
@@ -793,6 +817,7 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
[XMM0, XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7,
XMM8, XMM9, XMM10, XMM11,
XMM12, XMM13, XMM14, XMM15]> {
+ let SubRegClassList = [FR32, FR64];
let MethodProtos = [{
iterator allocation_order_end(const MachineFunction &MF) const;
}];
@@ -811,7 +836,9 @@ def VR128 : RegisterClass<"X86", [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],128,
def VR256 : RegisterClass<"X86", [ v8i32, v4i64, v8f32, v4f64],256,
[YMM0, YMM1, YMM2, YMM3, YMM4, YMM5, YMM6, YMM7,
YMM8, YMM9, YMM10, YMM11,
- YMM12, YMM13, YMM14, YMM15]>;
+ YMM12, YMM13, YMM14, YMM15]> {
+ let SubRegClassList = [FR32, FR64, VR128];
+}
// Status flags registers.
def CCR : RegisterClass<"X86", [i32], 32, [EFLAGS]> {
diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h
index 5e05c2f..594a470 100644
--- a/lib/Target/X86/X86Subtarget.h
+++ b/lib/Target/X86/X86Subtarget.h
@@ -20,9 +20,9 @@
namespace llvm {
class GlobalValue;
class TargetMachine;
-
+
/// PICStyles - The X86 backend supports a number of different styles of PIC.
-///
+///
namespace PICStyles {
enum Style {
StubPIC, // Used on i386-darwin in -fPIC mode.
@@ -46,7 +46,7 @@ protected:
/// PICStyle - Which PIC style to use
///
PICStyles::Style PICStyle;
-
+
/// X86SSELevel - MMX, SSE1, SSE2, SSE3, SSSE3, SSE41, SSE42, or
/// none supported.
X86SSEEnum X86SSELevel;
@@ -58,7 +58,7 @@ protected:
/// HasCMov - True if this processor has conditional move instructions
/// (generally pentium pro+).
bool HasCMov;
-
+
/// HasX86_64 - True if the processor supports X86-64 instructions.
///
bool HasX86_64;
@@ -78,8 +78,9 @@ protected:
/// IsBTMemSlow - True if BT (bit test) of memory instructions are slow.
bool IsBTMemSlow;
- /// HasVectorUAMem - True if SIMD operations can have unaligned memory operands.
- /// This may require setting a feature bit in the processor.
+ /// HasVectorUAMem - True if SIMD operations can have unaligned memory
+ /// operands. This may require setting a feature bit in the
+ /// processor.
bool HasVectorUAMem;
/// DarwinVers - Nonzero if this is a darwin platform: the numeric
@@ -150,20 +151,20 @@ public:
bool isTargetDarwin() const { return TargetType == isDarwin; }
bool isTargetELF() const { return TargetType == isELF; }
-
+
bool isTargetWindows() const { return TargetType == isWindows; }
bool isTargetMingw() const { return TargetType == isMingw; }
bool isTargetCygwin() const { return TargetType == isCygwin; }
bool isTargetCygMing() const {
return TargetType == isMingw || TargetType == isCygwin;
}
-
+
/// isTargetCOFF - Return true if this is any COFF/Windows target variant.
bool isTargetCOFF() const {
return TargetType == isMingw || TargetType == isCygwin ||
TargetType == isWindows;
}
-
+
bool isTargetWin64() const {
return Is64Bit && (TargetType == isMingw || TargetType == isWindows);
}
@@ -196,11 +197,11 @@ public:
bool isPICStyleStubAny() const {
return PICStyle == PICStyles::StubDynamicNoPIC ||
PICStyle == PICStyles::StubPIC; }
-
+
/// getDarwinVers - Return the darwin version number, 8 = Tiger, 9 = Leopard,
/// 10 = Snow Leopard, etc.
unsigned getDarwinVers() const { return DarwinVers; }
-
+
/// ClassifyGlobalReference - Classify a global variable reference for the
/// current subtarget according to how we should reference it in a non-pcrel
/// context.
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 7802f98..56ddaf8 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -51,6 +51,12 @@ extern "C" void LLVMInitializeX86Target() {
createX86_32MCCodeEmitter);
TargetRegistry::RegisterCodeEmitter(TheX86_64Target,
createX86_64MCCodeEmitter);
+
+ // Register the asm backend.
+ TargetRegistry::RegisterAsmBackend(TheX86_32Target,
+ createX86_32AsmBackend);
+ TargetRegistry::RegisterAsmBackend(TheX86_64Target,
+ createX86_64AsmBackend);
}
diff --git a/lib/Target/X86/X86TargetObjectFile.cpp b/lib/Target/X86/X86TargetObjectFile.cpp
index d1ee3fc..29a0be5 100644
--- a/lib/Target/X86/X86TargetObjectFile.cpp
+++ b/lib/Target/X86/X86TargetObjectFile.cpp
@@ -18,38 +18,6 @@
using namespace llvm;
using namespace dwarf;
-const MCExpr *X8632_MachoTargetObjectFile::
-getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding) const {
- // The mach-o version of this method defaults to returning a stub reference.
-
- if (Encoding & DW_EH_PE_indirect) {
- MachineModuleInfoMachO &MachOMMI =
- MMI->getObjFileInfo<MachineModuleInfoMachO>();
-
- SmallString<128> Name;
- Mang->getNameWithPrefix(Name, GV, true);
- Name += "$non_lazy_ptr";
-
- // Add information about the stub reference to MachOMMI so that the stub
- // gets emitted by the asmprinter.
- MCSymbol *Sym = getContext().GetOrCreateSymbol(Name.str());
- MCSymbol *&StubSym = MachOMMI.getGVStubEntry(Sym);
- if (StubSym == 0) {
- Name.clear();
- Mang->getNameWithPrefix(Name, GV, false);
- StubSym = getContext().GetOrCreateSymbol(Name.str());
- }
-
- return TargetLoweringObjectFile::
- getSymbolForDwarfReference(Sym, MMI,
- Encoding & ~dwarf::DW_EH_PE_indirect);
- }
-
- return TargetLoweringObjectFileMachO::
- getSymbolForDwarfGlobalReference(GV, Mang, MMI, Encoding);
-}
-
const MCExpr *X8664_MachoTargetObjectFile::
getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding) const {
@@ -148,35 +116,3 @@ unsigned X8664_ELFTargetObjectFile::getTTypeEncoding() const {
return DW_EH_PE_absptr;
}
-
-unsigned X8632_MachoTargetObjectFile::getPersonalityEncoding() const {
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-}
-
-unsigned X8632_MachoTargetObjectFile::getLSDAEncoding() const {
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-}
-
-unsigned X8632_MachoTargetObjectFile::getFDEEncoding() const {
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-}
-
-unsigned X8632_MachoTargetObjectFile::getTTypeEncoding() const {
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-}
-
-unsigned X8664_MachoTargetObjectFile::getPersonalityEncoding() const {
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-}
-
-unsigned X8664_MachoTargetObjectFile::getLSDAEncoding() const {
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-}
-
-unsigned X8664_MachoTargetObjectFile::getFDEEncoding() const {
- return DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-}
-
-unsigned X8664_MachoTargetObjectFile::getTTypeEncoding() const {
- return DW_EH_PE_indirect | DW_EH_PE_pcrel | DW_EH_PE_sdata4;
-}
diff --git a/lib/Target/X86/X86TargetObjectFile.h b/lib/Target/X86/X86TargetObjectFile.h
index 0fff194..0444417 100644
--- a/lib/Target/X86/X86TargetObjectFile.h
+++ b/lib/Target/X86/X86TargetObjectFile.h
@@ -17,20 +17,6 @@
namespace llvm {
class X86TargetMachine;
- /// X8632_MachoTargetObjectFile - This TLOF implementation is used for
- /// Darwin/x86-32.
- class X8632_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
- public:
-
- virtual const MCExpr *
- getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
- MachineModuleInfo *MMI, unsigned Encoding) const;
- virtual unsigned getPersonalityEncoding() const;
- virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding() const;
- virtual unsigned getTTypeEncoding() const;
- };
-
/// X8664_MachoTargetObjectFile - This TLOF implementation is used for
/// Darwin/x86-64.
class X8664_MachoTargetObjectFile : public TargetLoweringObjectFileMachO {
@@ -39,17 +25,13 @@ namespace llvm {
virtual const MCExpr *
getSymbolForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang,
MachineModuleInfo *MMI, unsigned Encoding) const;
- virtual unsigned getPersonalityEncoding() const;
- virtual unsigned getLSDAEncoding() const;
- virtual unsigned getFDEEncoding() const;
- virtual unsigned getTTypeEncoding() const;
};
class X8632_ELFTargetObjectFile : public TargetLoweringObjectFileELF {
const X86TargetMachine &TM;
public:
X8632_ELFTargetObjectFile(const X86TargetMachine &tm)
- :TM(tm) { };
+ :TM(tm) { }
virtual unsigned getPersonalityEncoding() const;
virtual unsigned getLSDAEncoding() const;
virtual unsigned getFDEEncoding() const;
@@ -60,7 +42,7 @@ namespace llvm {
const X86TargetMachine &TM;
public:
X8664_ELFTargetObjectFile(const X86TargetMachine &tm)
- :TM(tm) { };
+ :TM(tm) { }
virtual unsigned getPersonalityEncoding() const;
virtual unsigned getLSDAEncoding() const;
virtual unsigned getFDEEncoding() const;
diff --git a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
index d18f55d..82e23a1 100644
--- a/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
+++ b/lib/Target/XCore/AsmPrinter/XCoreAsmPrinter.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/MachineFunctionPass.h"
#include "llvm/CodeGen/MachineConstantPool.h"
#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/MC/MCStreamer.h"
#include "llvm/MC/MCSymbol.h"
#include "llvm/Target/TargetData.h"
@@ -62,6 +63,11 @@ namespace {
}
void printMemOperand(const MachineInstr *MI, int opNum);
+ void printInlineJT(const MachineInstr *MI, int opNum,
+ const std::string &directive = ".jmptable");
+ void printInlineJT32(const MachineInstr *MI, int opNum) {
+ printInlineJT(MI, opNum, ".jmptable32");
+ }
void printOperand(const MachineInstr *MI, int opNum);
bool PrintAsmOperand(const MachineInstr *MI, unsigned OpNo,
unsigned AsmVariant, const char *ExtraCode);
@@ -257,6 +263,23 @@ void XCoreAsmPrinter::printMemOperand(const MachineInstr *MI, int opNum)
printOperand(MI, opNum+1);
}
+void XCoreAsmPrinter::
+printInlineJT(const MachineInstr *MI, int opNum, const std::string &directive)
+{
+ unsigned JTI = MI->getOperand(opNum).getIndex();
+ const MachineFunction *MF = MI->getParent()->getParent();
+ const MachineJumpTableInfo *MJTI = MF->getJumpTableInfo();
+ const std::vector<MachineJumpTableEntry> &JT = MJTI->getJumpTables();
+ const std::vector<MachineBasicBlock*> &JTBBs = JT[JTI].MBBs;
+ O << "\t" << directive << " ";
+ for (unsigned i = 0, e = JTBBs.size(); i != e; ++i) {
+ MachineBasicBlock *MBB = JTBBs[i];
+ if (i > 0)
+ O << ",";
+ O << *MBB->getSymbol(OutContext);
+ }
+}
+
void XCoreAsmPrinter::printOperand(const MachineInstr *MI, int opNum) {
const MachineOperand &MO = MI->getOperand(opNum);
switch (MO.getType()) {
diff --git a/lib/Target/XCore/XCoreISelDAGToDAG.cpp b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
index 383fd91..b1ab132 100644
--- a/lib/Target/XCore/XCoreISelDAGToDAG.cpp
+++ b/lib/Target/XCore/XCoreISelDAGToDAG.cpp
@@ -65,8 +65,6 @@ namespace {
bool SelectADDRcpii(SDNode *Op, SDValue Addr, SDValue &Base,
SDValue &Offset);
- virtual void InstructionSelect();
-
virtual const char *getPassName() const {
return "XCore DAG->DAG Pattern Instruction Selection";
}
@@ -147,15 +145,6 @@ bool XCoreDAGToDAGISel::SelectADDRcpii(SDNode *Op, SDValue Addr,
return false;
}
-/// InstructionSelect - This callback is invoked by
-/// SelectionDAGISel when it has created a SelectionDAG for us to codegen.
-void XCoreDAGToDAGISel::InstructionSelect() {
- // Select target instructions for the DAG.
- SelectRoot(*CurDAG);
-
- CurDAG->RemoveDeadNodes();
-}
-
SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
DebugLoc dl = N->getDebugLoc();
EVT NVT = N->getValueType(0);
@@ -164,7 +153,11 @@ SDNode *XCoreDAGToDAGISel::Select(SDNode *N) {
default: break;
case ISD::Constant: {
if (Predicate_immMskBitp(N)) {
- SDValue MskSize = Transform_msksize_xform(N);
+ // Transformation function: get the size of a mask
+ int64_t MaskVal = cast<ConstantSDNode>(N)->getZExtValue();
+ assert(isMask_32(MaskVal));
+ // Look for the first non-zero bit
+ SDValue MskSize = getI32Imm(32 - CountLeadingZeros_32(MaskVal));
return CurDAG->getMachineNode(XCore::MKMSK_rus, dl,
MVT::i32, MskSize);
}
diff --git a/lib/Target/XCore/XCoreISelLowering.cpp b/lib/Target/XCore/XCoreISelLowering.cpp
index 57fd43b..e6515d8 100644
--- a/lib/Target/XCore/XCoreISelLowering.cpp
+++ b/lib/Target/XCore/XCoreISelLowering.cpp
@@ -29,6 +29,7 @@
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineJumpTableInfo.h"
#include "llvm/CodeGen/MachineRegisterInfo.h"
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
@@ -53,6 +54,8 @@ getTargetNodeName(unsigned Opcode) const
case XCoreISD::RETSP : return "XCoreISD::RETSP";
case XCoreISD::LADD : return "XCoreISD::LADD";
case XCoreISD::LSUB : return "XCoreISD::LSUB";
+ case XCoreISD::BR_JT : return "XCoreISD::BR_JT";
+ case XCoreISD::BR_JT32 : return "XCoreISD::BR_JT32";
default : return NULL;
}
}
@@ -106,9 +109,8 @@ XCoreTargetLowering::XCoreTargetLowering(XCoreTargetMachine &XTM)
setOperationAction(ISD::TRAP, MVT::Other, Legal);
- // Expand jump tables for now
- setOperationAction(ISD::BR_JT, MVT::Other, Expand);
- setOperationAction(ISD::JumpTable, MVT::i32, Custom);
+ // Jump tables.
+ setOperationAction(ISD::BR_JT, MVT::Other, Custom);
setOperationAction(ISD::GlobalAddress, MVT::i32, Custom);
setOperationAction(ISD::BlockAddress, MVT::i32 , Custom);
@@ -157,7 +159,7 @@ LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::GlobalTLSAddress: return LowerGlobalTLSAddress(Op, DAG);
case ISD::BlockAddress: return LowerBlockAddress(Op, DAG);
case ISD::ConstantPool: return LowerConstantPool(Op, DAG);
- case ISD::JumpTable: return LowerJumpTable(Op, DAG);
+ case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::LOAD: return LowerLOAD(Op, DAG);
case ISD::STORE: return LowerSTORE(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
@@ -315,14 +317,27 @@ LowerConstantPool(SDValue Op, SelectionDAG &DAG)
}
SDValue XCoreTargetLowering::
-LowerJumpTable(SDValue Op, SelectionDAG &DAG)
+LowerBR_JT(SDValue Op, SelectionDAG &DAG)
{
- // FIXME there isn't really debug info here
+ SDValue Chain = Op.getOperand(0);
+ SDValue Table = Op.getOperand(1);
+ SDValue Index = Op.getOperand(2);
DebugLoc dl = Op.getDebugLoc();
- EVT PtrVT = Op.getValueType();
- JumpTableSDNode *JT = cast<JumpTableSDNode>(Op);
- SDValue JTI = DAG.getTargetJumpTable(JT->getIndex(), PtrVT);
- return DAG.getNode(XCoreISD::DPRelativeWrapper, dl, MVT::i32, JTI);
+ JumpTableSDNode *JT = cast<JumpTableSDNode>(Table);
+ unsigned JTI = JT->getIndex();
+ MachineFunction &MF = DAG.getMachineFunction();
+ const MachineJumpTableInfo *MJTI = MF.getJumpTableInfo();
+ SDValue TargetJT = DAG.getTargetJumpTable(JT->getIndex(), MVT::i32);
+
+ unsigned NumEntries = MJTI->getJumpTables()[JTI].MBBs.size();
+ if (NumEntries <= 32) {
+ return DAG.getNode(XCoreISD::BR_JT, dl, MVT::Other, Chain, TargetJT, Index);
+ }
+ assert((NumEntries >> 31) == 0);
+ SDValue ScaledIndex = DAG.getNode(ISD::SHL, dl, MVT::i32, Index,
+ DAG.getConstant(1, MVT::i32));
+ return DAG.getNode(XCoreISD::BR_JT32, dl, MVT::Other, Chain, TargetJT,
+ ScaledIndex);
}
static bool
@@ -458,7 +473,7 @@ LowerLOAD(SDValue Op, SelectionDAG &DAG)
false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__misaligned_load", getPointerTy()),
- Args, DAG, dl, DAG.GetOrdering(Chain.getNode()));
+ Args, DAG, dl);
SDValue Ops[] =
{ CallResult.first, CallResult.second };
@@ -521,7 +536,7 @@ LowerSTORE(SDValue Op, SelectionDAG &DAG)
false, false, 0, CallingConv::C, false,
/*isReturnValueUsed=*/true,
DAG.getExternalSymbol("__misaligned_store", getPointerTy()),
- Args, DAG, dl, DAG.GetOrdering(Chain.getNode()));
+ Args, DAG, dl);
return CallResult.second;
}
@@ -1146,10 +1161,8 @@ static inline bool isImmUs4(int64_t val)
bool
XCoreTargetLowering::isLegalAddressingMode(const AddrMode &AM,
const Type *Ty) const {
- // Be conservative with void
- // FIXME: Can we be more aggressive?
if (Ty->getTypeID() == Type::VoidTyID)
- return false;
+ return AM.Scale == 0 && isImmUs(AM.BaseOffs) && isImmUs4(AM.BaseOffs);
const TargetData *TD = TM.getTargetData();
unsigned Size = TD->getTypeAllocSize(Ty);
diff --git a/lib/Target/XCore/XCoreISelLowering.h b/lib/Target/XCore/XCoreISelLowering.h
index 5095f36..0c638af 100644
--- a/lib/Target/XCore/XCoreISelLowering.h
+++ b/lib/Target/XCore/XCoreISelLowering.h
@@ -52,7 +52,13 @@ namespace llvm {
LADD,
// Corresponds to LSUB instruction
- LSUB
+ LSUB,
+
+ // Jumptable branch.
+ BR_JT,
+
+ // Jumptable branch using long branches for each entry.
+ BR_JT32
};
}
@@ -122,7 +128,7 @@ namespace llvm {
SDValue LowerGlobalTLSAddress(SDValue Op, SelectionDAG &DAG);
SDValue LowerBlockAddress(SDValue Op, SelectionDAG &DAG);
SDValue LowerConstantPool(SDValue Op, SelectionDAG &DAG);
- SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG);
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
SDValue LowerVAARG(SDValue Op, SelectionDAG &DAG);
SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index 5a54844..722e747 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -145,6 +145,11 @@ static inline bool IsCondBranch(unsigned BrOpc) {
return IsBRF(BrOpc) || IsBRT(BrOpc);
}
+static inline bool IsBR_JT(unsigned BrOpc) {
+ return BrOpc == XCore::BR_JT
+ || BrOpc == XCore::BR_JT32;
+}
+
/// GetCondFromBranchOpc - Return the XCore CC that matches
/// the correspondent Branch instruction opcode.
static XCore::CondCode GetCondFromBranchOpc(unsigned BrOpc)
@@ -271,6 +276,14 @@ XCoreInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, MachineBasicBlock *&TBB,
return false;
}
+ // Likewise if it ends with a branch table followed by an unconditional branch.
+ if (IsBR_JT(SecondLastInst->getOpcode()) && IsBRU(LastInst->getOpcode())) {
+ I = LastInst;
+ if (AllowModify)
+ I->eraseFromParent();
+ return true;
+ }
+
// Otherwise, can't handle this.
return true;
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.td b/lib/Target/XCore/XCoreInstrInfo.td
index 10dc18c..46805d5 100644
--- a/lib/Target/XCore/XCoreInstrInfo.td
+++ b/lib/Target/XCore/XCoreInstrInfo.td
@@ -34,6 +34,15 @@ def XCoreBranchLink : SDNode<"XCoreISD::BL",SDT_XCoreBranchLink,
def XCoreRetsp : SDNode<"XCoreISD::RETSP", SDTNone,
[SDNPHasChain, SDNPOptInFlag]>;
+def SDT_XCoreBR_JT : SDTypeProfile<0, 2,
+ [SDTCisVT<0, i32>, SDTCisVT<1, i32>]>;
+
+def XCoreBR_JT : SDNode<"XCoreISD::BR_JT", SDT_XCoreBR_JT,
+ [SDNPHasChain]>;
+
+def XCoreBR_JT32 : SDNode<"XCoreISD::BR_JT32", SDT_XCoreBR_JT,
+ [SDNPHasChain]>;
+
def SDT_XCoreAddress : SDTypeProfile<1, 1,
[SDTCisSameAs<0, 1>, SDTCisPtrTy<0>]>;
@@ -185,6 +194,15 @@ def MEMii : Operand<i32> {
let MIOperandInfo = (ops i32imm, i32imm);
}
+// Jump tables.
+def InlineJT : Operand<i32> {
+ let PrintMethod = "printInlineJT";
+}
+
+def InlineJT32 : Operand<i32> {
+ let PrintMethod = "printInlineJT32";
+}
+
//===----------------------------------------------------------------------===//
// Instruction Class Templates
//===----------------------------------------------------------------------===//
@@ -624,7 +642,7 @@ defm RETSP : FU6_LU6<"retsp", XCoreRetsp>;
// TODO extdp, kentsp, krestsp, blat, setsr
// clrsr, getsr, kalli
-let isBranch = 1, isTerminator = 1 in {
+let isBranch = 1, isTerminator = 1, isBarrier = 1 in {
def BRBU_u6 : _FU6<
(outs),
(ins brtarget:$target),
@@ -756,24 +774,34 @@ def CLZ_l2r : _FL2R<(outs GRRegs:$dst), (ins GRRegs:$src),
// One operand short
// TODO edu, eeu, waitet, waitef, freer, tstart, msync, mjoin, syncr, clrtp
-// bru, setdp, setcp, setv, setev, kcall
+// setdp, setcp, setv, setev, kcall
// dgetreg
-let isBranch=1, isIndirectBranch=1, isTerminator=1 in
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
def BAU_1r : _F1R<(outs), (ins GRRegs:$addr),
"bau $addr",
[(brind GRRegs:$addr)]>;
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT : PseudoInstXCore<(outs), (ins InlineJT:$t, GRRegs:$i),
+ "bru $i\n$t",
+ [(XCoreBR_JT tjumptable:$t, GRRegs:$i)]>;
+
+let isBranch=1, isIndirectBranch=1, isTerminator=1, isBarrier = 1 in
+def BR_JT32 : PseudoInstXCore<(outs), (ins InlineJT32:$t, GRRegs:$i),
+ "bru $i\n$t",
+ [(XCoreBR_JT32 tjumptable:$t, GRRegs:$i)]>;
+
let Defs=[SP], neverHasSideEffects=1 in
def SETSP_1r : _F1R<(outs), (ins GRRegs:$src),
"set sp, $src",
[]>;
-let isBarrier = 1, hasCtrlDep = 1 in
+let hasCtrlDep = 1 in
def ECALLT_1r : _F1R<(outs), (ins GRRegs:$src),
"ecallt $src",
[]>;
-let isBarrier = 1, hasCtrlDep = 1 in
+let hasCtrlDep = 1 in
def ECALLF_1r : _F1R<(outs), (ins GRRegs:$src),
"ecallf $src",
[]>;
diff --git a/lib/Transforms/Hello/Hello.cpp b/lib/Transforms/Hello/Hello.cpp
index eac4e17..37d7a00 100644
--- a/lib/Transforms/Hello/Hello.cpp
+++ b/lib/Transforms/Hello/Hello.cpp
@@ -15,7 +15,6 @@
#define DEBUG_TYPE "hello"
#include "llvm/Pass.h"
#include "llvm/Function.h"
-#include "llvm/ADT/StringExtras.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
diff --git a/lib/Transforms/IPO/ArgumentPromotion.cpp b/lib/Transforms/IPO/ArgumentPromotion.cpp
index 325d353..7cb1367 100644
--- a/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -124,7 +124,7 @@ CallGraphNode *ArgPromotion::PromoteArguments(CallGraphNode *CGN) {
unsigned ArgNo = 0;
for (Function::arg_iterator I = F->arg_begin(), E = F->arg_end();
I != E; ++I, ++ArgNo)
- if (isa<PointerType>(I->getType()))
+ if (I->getType()->isPointerTy())
PointerArgs.push_back(std::pair<Argument*, unsigned>(I, ArgNo));
if (PointerArgs.empty()) return 0;
@@ -317,7 +317,7 @@ bool ArgPromotion::isSafeToPromoteArgument(Argument *Arg, bool isByVal) const {
GEPIndicesSet ToPromote;
// If the pointer is always valid, any load with first index 0 is valid.
- if(isByVal || AllCalleesPassInValidPointerForArgument(Arg))
+ if (isByVal || AllCalleesPassInValidPointerForArgument(Arg))
SafeToUnconditionallyLoad.insert(IndicesVector(1, 0));
// First, iterate the entry block and mark loads of (geps of) arguments as
@@ -673,7 +673,7 @@ CallGraphNode *ArgPromotion::DoPromotion(Function *F,
IE = SI->end(); II != IE; ++II) {
// Use i32 to index structs, and i64 for others (pointers/arrays).
// This satisfies GEP constraints.
- const Type *IdxTy = (isa<StructType>(ElTy) ?
+ const Type *IdxTy = (ElTy->isStructTy() ?
Type::getInt32Ty(F->getContext()) :
Type::getInt64Ty(F->getContext()));
Ops.push_back(ConstantInt::get(IdxTy, *II));
diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 1749b1e..f386ed7 100644
--- a/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -796,7 +796,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
// Replace by null for now.
Call->replaceAllUsesWith(Constant::getNullValue(Call->getType()));
} else {
- assert(isa<StructType>(RetTy) &&
+ assert(RetTy->isStructTy() &&
"Return type changed, but not into a void. The old return type"
" must have been a struct!");
Instruction *InsertPt = Call;
@@ -870,7 +870,7 @@ bool DAE::RemoveDeadStuffFromFunction(Function *F) {
if (NFTy->getReturnType() == Type::getVoidTy(F->getContext())) {
RetVal = 0;
} else {
- assert (isa<StructType>(RetTy));
+ assert (RetTy->isStructTy());
// The original return value was a struct, insert
// extractvalue/insertvalue chains to extract only the values we need
// to return and insert them into our new result.
diff --git a/lib/Transforms/IPO/FunctionAttrs.cpp b/lib/Transforms/IPO/FunctionAttrs.cpp
index 64a6d78..298d5cf 100644
--- a/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -175,7 +175,7 @@ bool FunctionAttrs::AddReadAttrs(const std::vector<CallGraphNode *> &SCC) {
for (CallSite::arg_iterator CI = CS.arg_begin(), CE = CS.arg_end();
CI != CE; ++CI) {
Value *Arg = *CI;
- if (isa<PointerType>(Arg->getType()) && !PointsToLocalMemory(Arg))
+ if (Arg->getType()->isPointerTy() && !PointsToLocalMemory(Arg))
// Writes memory. Just give up.
return false;
}
@@ -257,7 +257,7 @@ bool FunctionAttrs::AddNoCaptureAttrs(const std::vector<CallGraphNode *> &SCC) {
continue;
for (Function::arg_iterator A = F->arg_begin(), E = F->arg_end(); A!=E; ++A)
- if (isa<PointerType>(A->getType()) && !A->hasNoCaptureAttr() &&
+ if (A->getType()->isPointerTy() && !A->hasNoCaptureAttr() &&
!PointerMayBeCaptured(A, true, /*StoreCaptures=*/false)) {
A->addAttr(Attribute::NoCapture);
++NumNoCapture;
@@ -362,7 +362,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
// We annotate noalias return values, which are only applicable to
// pointer types.
- if (!isa<PointerType>(F->getReturnType()))
+ if (!F->getReturnType()->isPointerTy())
continue;
if (!IsFunctionMallocLike(F, SCCNodes))
@@ -372,7 +372,7 @@ bool FunctionAttrs::AddNoAliasAttrs(const std::vector<CallGraphNode *> &SCC) {
bool MadeChange = false;
for (unsigned i = 0, e = SCC.size(); i != e; ++i) {
Function *F = SCC[i]->getFunction();
- if (F->doesNotAlias(0) || !isa<PointerType>(F->getReturnType()))
+ if (F->doesNotAlias(0) || !F->getReturnType()->isPointerTy())
continue;
F->setDoesNotAlias(0);
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index df060eb..7b1e9c0 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -303,7 +303,7 @@ static bool CleanupConstantGlobalUsers(Value *V, Constant *Init) {
SubInit = ConstantFoldLoadThroughGEPConstantExpr(Init, CE);
Changed |= CleanupConstantGlobalUsers(CE, SubInit);
} else if (CE->getOpcode() == Instruction::BitCast &&
- isa<PointerType>(CE->getType())) {
+ CE->getType()->isPointerTy()) {
// Pointer cast, delete any stores and memsets to the global.
Changed |= CleanupConstantGlobalUsers(CE, 0);
}
@@ -431,7 +431,7 @@ static bool IsUserOfGlobalSafeForSRA(User *U, GlobalValue *GV) {
else if (const VectorType *SubVectorTy = dyn_cast<VectorType>(*GEPI))
NumElements = SubVectorTy->getNumElements();
else {
- assert(isa<StructType>(*GEPI) &&
+ assert((*GEPI)->isStructTy() &&
"Indexed GEP type is not array, vector, or struct!");
continue;
}
@@ -543,7 +543,7 @@ static GlobalVariable *SRAGlobal(GlobalVariable *GV, const TargetData &TD) {
if (NewGlobals.empty())
return 0;
-
+
DEBUG(dbgs() << "PERFORMING GLOBAL SRA ON: " << *GV);
Constant *NullInt =Constant::getNullValue(Type::getInt32Ty(GV->getContext()));
@@ -642,7 +642,7 @@ static bool AllUsesOfValueWillTrapIfNull(Value *V,
return false;
} else if (isa<ICmpInst>(*UI) &&
isa<ConstantPointerNull>(UI->getOperand(1))) {
- // Ignore setcc X, null
+ // Ignore icmp X, null
} else {
//cerr << "NONTRAPPING USE: " << **UI;
return false;
@@ -813,57 +813,47 @@ static void ConstantPropUsersOf(Value *V) {
static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
CallInst *CI,
const Type *AllocTy,
- Value* NElems,
+ ConstantInt *NElements,
TargetData* TD) {
- DEBUG(dbgs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
-
- const Type *IntPtrTy = TD->getIntPtrType(GV->getContext());
+ DEBUG(errs() << "PROMOTING GLOBAL: " << *GV << " CALL = " << *CI << '\n');
- // CI has either 0 or 1 bitcast uses (getMallocType() would otherwise have
- // returned NULL and we would not be here).
- BitCastInst *BCI = NULL;
- for (Value::use_iterator UI = CI->use_begin(), E = CI->use_end(); UI != E; )
- if ((BCI = dyn_cast<BitCastInst>(cast<Instruction>(*UI++))))
- break;
-
- ConstantInt *NElements = cast<ConstantInt>(NElems);
- if (NElements->getZExtValue() != 1) {
- // If we have an array allocation, transform it to a single element
- // allocation to make the code below simpler.
- Type *NewTy = ArrayType::get(AllocTy, NElements->getZExtValue());
- unsigned TypeSize = TD->getTypeAllocSize(NewTy);
- if (const StructType *ST = dyn_cast<StructType>(NewTy))
- TypeSize = TD->getStructLayout(ST)->getSizeInBytes();
- Instruction *NewCI = CallInst::CreateMalloc(CI, IntPtrTy, NewTy,
- ConstantInt::get(IntPtrTy, TypeSize));
- Value* Indices[2];
- Indices[0] = Indices[1] = Constant::getNullValue(IntPtrTy);
- Value *NewGEP = GetElementPtrInst::Create(NewCI, Indices, Indices + 2,
- NewCI->getName()+".el0", CI);
- Value *Cast = new BitCastInst(NewGEP, CI->getType(), "el0", CI);
- if (BCI) BCI->replaceAllUsesWith(NewGEP);
- CI->replaceAllUsesWith(Cast);
- if (BCI) BCI->eraseFromParent();
- CI->eraseFromParent();
- BCI = dyn_cast<BitCastInst>(NewCI);
- CI = BCI ? extractMallocCallFromBitCast(BCI) : cast<CallInst>(NewCI);
- }
+ const Type *GlobalType;
+ if (NElements->getZExtValue() == 1)
+ GlobalType = AllocTy;
+ else
+ // If we have an array allocation, the global variable is of an array.
+ GlobalType = ArrayType::get(AllocTy, NElements->getZExtValue());
// Create the new global variable. The contents of the malloc'd memory is
// undefined, so initialize with an undef value.
- const Type *MAT = getMallocAllocatedType(CI);
- Constant *Init = UndefValue::get(MAT);
GlobalVariable *NewGV = new GlobalVariable(*GV->getParent(),
- MAT, false,
- GlobalValue::InternalLinkage, Init,
+ GlobalType, false,
+ GlobalValue::InternalLinkage,
+ UndefValue::get(GlobalType),
GV->getName()+".body",
GV,
GV->isThreadLocal());
- // Anything that used the malloc or its bitcast now uses the global directly.
- if (BCI) BCI->replaceAllUsesWith(NewGV);
- CI->replaceAllUsesWith(new BitCastInst(NewGV, CI->getType(), "newgv", CI));
-
+ // If there are bitcast users of the malloc (which is typical, usually we have
+ // a malloc + bitcast) then replace them with uses of the new global. Update
+ // other users to use the global as well.
+ BitCastInst *TheBC = 0;
+ while (!CI->use_empty()) {
+ Instruction *User = cast<Instruction>(CI->use_back());
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(User)) {
+ if (BCI->getType() == NewGV->getType()) {
+ BCI->replaceAllUsesWith(NewGV);
+ BCI->eraseFromParent();
+ } else {
+ BCI->setOperand(0, NewGV);
+ }
+ } else {
+ if (TheBC == 0)
+ TheBC = new BitCastInst(NewGV, CI->getType(), "newgv", CI);
+ User->replaceUsesOfWith(CI, TheBC);
+ }
+ }
+
Constant *RepValue = NewGV;
if (NewGV->getType() != GV->getType()->getElementType())
RepValue = ConstantExpr::getBitCast(RepValue,
@@ -879,60 +869,60 @@ static GlobalVariable *OptimizeGlobalAddressOfMalloc(GlobalVariable *GV,
bool InitBoolUsed = false;
// Loop over all uses of GV, processing them in turn.
- std::vector<StoreInst*> Stores;
- while (!GV->use_empty())
- if (LoadInst *LI = dyn_cast<LoadInst>(GV->use_back())) {
- while (!LI->use_empty()) {
- Use &LoadUse = LI->use_begin().getUse();
- if (!isa<ICmpInst>(LoadUse.getUser()))
- LoadUse = RepValue;
- else {
- ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
- // Replace the cmp X, 0 with a use of the bool value.
- Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
- InitBoolUsed = true;
- switch (ICI->getPredicate()) {
- default: llvm_unreachable("Unknown ICmp Predicate!");
- case ICmpInst::ICMP_ULT:
- case ICmpInst::ICMP_SLT: // X < null -> always false
- LV = ConstantInt::getFalse(GV->getContext());
- break;
- case ICmpInst::ICMP_ULE:
- case ICmpInst::ICMP_SLE:
- case ICmpInst::ICMP_EQ:
- LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
- break;
- case ICmpInst::ICMP_NE:
- case ICmpInst::ICMP_UGE:
- case ICmpInst::ICMP_SGE:
- case ICmpInst::ICMP_UGT:
- case ICmpInst::ICMP_SGT:
- break; // no change.
- }
- ICI->replaceAllUsesWith(LV);
- ICI->eraseFromParent();
- }
- }
- LI->eraseFromParent();
- } else {
- StoreInst *SI = cast<StoreInst>(GV->use_back());
+ while (!GV->use_empty()) {
+ if (StoreInst *SI = dyn_cast<StoreInst>(GV->use_back())) {
// The global is initialized when the store to it occurs.
new StoreInst(ConstantInt::getTrue(GV->getContext()), InitBool, SI);
SI->eraseFromParent();
+ continue;
}
+
+ LoadInst *LI = cast<LoadInst>(GV->use_back());
+ while (!LI->use_empty()) {
+ Use &LoadUse = LI->use_begin().getUse();
+ if (!isa<ICmpInst>(LoadUse.getUser())) {
+ LoadUse = RepValue;
+ continue;
+ }
+
+ ICmpInst *ICI = cast<ICmpInst>(LoadUse.getUser());
+ // Replace the cmp X, 0 with a use of the bool value.
+ Value *LV = new LoadInst(InitBool, InitBool->getName()+".val", ICI);
+ InitBoolUsed = true;
+ switch (ICI->getPredicate()) {
+ default: llvm_unreachable("Unknown ICmp Predicate!");
+ case ICmpInst::ICMP_ULT:
+ case ICmpInst::ICMP_SLT: // X < null -> always false
+ LV = ConstantInt::getFalse(GV->getContext());
+ break;
+ case ICmpInst::ICMP_ULE:
+ case ICmpInst::ICMP_SLE:
+ case ICmpInst::ICMP_EQ:
+ LV = BinaryOperator::CreateNot(LV, "notinit", ICI);
+ break;
+ case ICmpInst::ICMP_NE:
+ case ICmpInst::ICMP_UGE:
+ case ICmpInst::ICMP_SGE:
+ case ICmpInst::ICMP_UGT:
+ case ICmpInst::ICMP_SGT:
+ break; // no change.
+ }
+ ICI->replaceAllUsesWith(LV);
+ ICI->eraseFromParent();
+ }
+ LI->eraseFromParent();
+ }
// If the initialization boolean was used, insert it, otherwise delete it.
if (!InitBoolUsed) {
while (!InitBool->use_empty()) // Delete initializations
- cast<Instruction>(InitBool->use_back())->eraseFromParent();
+ cast<StoreInst>(InitBool->use_back())->eraseFromParent();
delete InitBool;
} else
GV->getParent()->getGlobalList().insert(GV, InitBool);
-
- // Now the GV is dead, nuke it and the malloc (both CI and BCI).
+ // Now the GV is dead, nuke it and the malloc..
GV->eraseFromParent();
- if (BCI) BCI->eraseFromParent();
CI->eraseFromParent();
// To further other optimizations, loop over all users of NewGV and try to
@@ -1303,9 +1293,7 @@ static GlobalVariable *PerformHeapAllocSRoA(GlobalVariable *GV, CallInst *CI,
ConstantInt::get(IntPtrTy, TypeSize),
NElems,
CI->getName() + ".f" + Twine(FieldNo));
- CallInst *NCI = dyn_cast<BitCastInst>(NMI) ?
- extractMallocCallFromBitCast(NMI) : cast<CallInst>(NMI);
- FieldMallocs.push_back(NCI);
+ FieldMallocs.push_back(NMI);
new StoreInst(NMI, NGV, CI);
}
@@ -1497,7 +1485,7 @@ static bool TryToOptimizeStoreOfMallocToGlobal(GlobalVariable *GV,
// something.
if (TD &&
NElements->getZExtValue() * TD->getTypeAllocSize(AllocTy) < 2048) {
- GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElems, TD);
+ GVI = OptimizeGlobalAddressOfMalloc(GV, CI, AllocTy, NElements, TD);
return true;
}
@@ -1556,7 +1544,7 @@ static bool OptimizeOnceStoredGlobal(GlobalVariable *GV, Value *StoredOnceVal,
// only has one (non-null) value stored into it, then we can optimize any
// users of the loaded value (often calls and loads) that would trap if the
// value was null.
- if (isa<PointerType>(GV->getInitializer()->getType()) &&
+ if (GV->getInitializer()->getType()->isPointerTy() &&
GV->getInitializer()->isNullValue()) {
if (Constant *SOVC = dyn_cast<Constant>(StoredOnceVal)) {
if (GV->getInitializer()->getType() != SOVC->getType())
@@ -1591,7 +1579,7 @@ static bool TryToShrinkGlobalToBoolean(GlobalVariable *GV, Constant *OtherVal) {
// where v1 and v2 both require constant pool loads, a big loss.
if (GVElType == Type::getInt1Ty(GV->getContext()) ||
GVElType->isFloatingPointTy() ||
- isa<PointerType>(GVElType) || isa<VectorType>(GVElType))
+ GVElType->isPointerTy() || GVElType->isVectorTy())
return false;
// Walk the use list of the global seeing if all the uses are load or store.
@@ -2148,7 +2136,7 @@ static Constant *EvaluateStoreInto(Constant *Init, Constant *Val,
Elts[CI->getZExtValue()] =
EvaluateStoreInto(Elts[CI->getZExtValue()], Val, Addr, OpNo+1);
- if (isa<ArrayType>(Init->getType()))
+ if (Init->getType()->isArrayTy())
return ConstantArray::get(cast<ArrayType>(InitTy), Elts);
else
return ConstantVector::get(&Elts[0], Elts.size());
diff --git a/lib/Transforms/InstCombine/InstCombineAddSub.cpp b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
index 2da17f1..4d2c89e 100644
--- a/lib/Transforms/InstCombine/InstCombineAddSub.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAddSub.cpp
@@ -373,10 +373,10 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
if (CFP->getValueAPF().isPosZero() && CannotBeNegativeZero(LHS))
return ReplaceInstUsesWith(I, LHS);
- // Check for (add double (sitofp x), y), see if we can merge this into an
+ // Check for (fadd double (sitofp x), y), see if we can merge this into an
// integer add followed by a promotion.
if (SIToFPInst *LHSConv = dyn_cast<SIToFPInst>(LHS)) {
- // (add double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
+ // (fadd double (sitofp x), fpcst) --> (sitofp (add int x, intcst))
// ... if the constant fits in the integer value. This is useful for things
// like (double)(x & 1234) + 4.0 -> (double)((X & 1234)+4) which no longer
// requires a constant pool load, and generally allows the add to be better
@@ -394,7 +394,7 @@ Instruction *InstCombiner::visitFAdd(BinaryOperator &I) {
}
}
- // (add double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
+ // (fadd double (sitofp x), (sitofp y)) --> (sitofp (add int x, y))
if (SIToFPInst *RHSConv = dyn_cast<SIToFPInst>(RHS)) {
// Only do this if x/y have the same type, if at last one of them has a
// single use (so we don't increase the number of int->fp conversions),
diff --git a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
index 5e47953..86673f8 100644
--- a/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
+++ b/lib/Transforms/InstCombine/InstCombineAndOrXor.cpp
@@ -1464,8 +1464,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (Value *V = SimplifyOrInst(Op0, Op1, TD))
return ReplaceInstUsesWith(I, V);
-
-
+
// See if we can simplify any instructions used by the instruction whose sole
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
@@ -1474,7 +1473,9 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (ConstantInt *RHS = dyn_cast<ConstantInt>(Op1)) {
ConstantInt *C1 = 0; Value *X = 0;
// (X & C1) | C2 --> (X | C2) & (C1|C2)
+ // iff (C1 & C2) == 0.
if (match(Op0, m_And(m_Value(X), m_ConstantInt(C1))) &&
+ (RHS->getValue() & C1->getValue()) != 0 &&
Op0->hasOneUse()) {
Value *Or = Builder->CreateOr(X, RHS);
Or->takeName(Op0);
@@ -1497,6 +1498,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
if (SelectInst *SI = dyn_cast<SelectInst>(Op0))
if (Instruction *R = FoldOpIntoSelect(I, SI))
return R;
+
if (isa<PHINode>(Op0))
if (Instruction *NV = FoldOpIntoPhi(I))
return NV;
@@ -1618,7 +1620,7 @@ Instruction *InstCombiner::visitOr(BinaryOperator &I) {
// (A & (C0?-1:0)) | (B & ~(C0?-1:0)) -> C0 ? A : B, and commuted variants.
// Don't do this for vector select idioms, the code generator doesn't handle
// them well yet.
- if (!isa<VectorType>(I.getType())) {
+ if (!I.getType()->isVectorTy()) {
if (Instruction *Match = MatchSelectFromAndOr(A, B, C, D))
return Match;
if (Instruction *Match = MatchSelectFromAndOr(B, A, D, C))
@@ -1755,7 +1757,7 @@ Instruction *InstCombiner::visitXor(BinaryOperator &I) {
// purpose is to compute bits we don't care about.
if (SimplifyDemandedInstructionBits(I))
return &I;
- if (isa<VectorType>(I.getType()))
+ if (I.getType()->isVectorTy())
if (isa<ConstantAggregateZero>(Op1))
return ReplaceInstUsesWith(I, Op0); // X ^ <0,0> -> X
diff --git a/lib/Transforms/InstCombine/InstCombineCalls.cpp b/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d7efdcf..835d149 100644
--- a/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -319,7 +319,7 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
if (GlobalVariable *GV = dyn_cast<GlobalVariable>(Op1)) {
if (GV->hasDefinitiveInitializer()) {
Constant *C = GV->getInitializer();
- size_t globalSize = TD->getTypeAllocSize(C->getType());
+ uint64_t globalSize = TD->getTypeAllocSize(C->getType());
return ReplaceInstUsesWith(CI, ConstantInt::get(ReturnTy, globalSize));
} else {
Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
@@ -341,16 +341,21 @@ Instruction *InstCombiner::visitCallInst(CallInst &CI) {
// Get what we're pointing to and its size.
const PointerType *BaseType =
cast<PointerType>(Operand->getType());
- size_t Size = TD->getTypeAllocSize(BaseType->getElementType());
+ uint64_t Size = TD->getTypeAllocSize(BaseType->getElementType());
// Get the current byte offset into the thing. Use the original
// operand in case we're looking through a bitcast.
SmallVector<Value*, 8> Ops(CE->op_begin()+1, CE->op_end());
const PointerType *OffsetType =
cast<PointerType>(GEP->getPointerOperand()->getType());
- size_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size());
+ uint64_t Offset = TD->getIndexedOffset(OffsetType, &Ops[0], Ops.size());
- assert(Size >= Offset);
+ if (Size < Offset) {
+ // Out of bound reference? Negative index normalized to large
+ // index? Just return "I don't know".
+ Constant *RetVal = ConstantInt::get(ReturnTy, Min ? 0 : -1ULL);
+ return ReplaceInstUsesWith(CI, RetVal);
+ }
Constant *RetVal = ConstantInt::get(ReturnTy, Size-Offset);
return ReplaceInstUsesWith(CI, RetVal);
@@ -831,7 +836,7 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
const Type *OldRetTy = Caller->getType();
const Type *NewRetTy = FT->getReturnType();
- if (isa<StructType>(NewRetTy))
+ if (NewRetTy->isStructTy())
return false; // TODO: Handle multiple return values.
// Check to see if we are changing the return type...
@@ -839,9 +844,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
if (Callee->isDeclaration() &&
// Conversion is ok if changing from one pointer type to another or from
// a pointer to an integer of the same size.
- !((isa<PointerType>(OldRetTy) || !TD ||
+ !((OldRetTy->isPointerTy() || !TD ||
OldRetTy == TD->getIntPtrType(Caller->getContext())) &&
- (isa<PointerType>(NewRetTy) || !TD ||
+ (NewRetTy->isPointerTy() || !TD ||
NewRetTy == TD->getIntPtrType(Caller->getContext()))))
return false; // Cannot transform this return value.
@@ -888,9 +893,9 @@ bool InstCombiner::transformConstExprCastCall(CallSite CS) {
// Converting from one pointer type to another or between a pointer and an
// integer of the same size is safe even if we do not have a body.
bool isConvertible = ActTy == ParamTy ||
- (TD && ((isa<PointerType>(ParamTy) ||
+ (TD && ((ParamTy->isPointerTy() ||
ParamTy == TD->getIntPtrType(Caller->getContext())) &&
- (isa<PointerType>(ActTy) ||
+ (ActTy->isPointerTy() ||
ActTy == TD->getIntPtrType(Caller->getContext()))));
if (Callee->isDeclaration() && !isConvertible) return false;
}
diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp
index bb4a0e9..a68fc6d 100644
--- a/lib/Transforms/InstCombine/InstCombineCasts.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp
@@ -272,7 +272,7 @@ bool InstCombiner::ShouldOptimizeCast(Instruction::CastOps opc, const Value *V,
// If this is a vector sext from a compare, then we don't want to break the
// idiom where each element of the extended vector is either zero or all ones.
- if (opc == Instruction::SExt && isa<CmpInst>(V) && isa<VectorType>(Ty))
+ if (opc == Instruction::SExt && isa<CmpInst>(V) && Ty->isVectorTy())
return false;
return true;
@@ -303,8 +303,8 @@ Instruction *InstCombiner::commonCastTransforms(CastInst &CI) {
if (isa<PHINode>(Src)) {
// We don't do this if this would create a PHI node with an illegal type if
// it is currently legal.
- if (!isa<IntegerType>(Src->getType()) ||
- !isa<IntegerType>(CI.getType()) ||
+ if (!Src->getType()->isIntegerTy() ||
+ !CI.getType()->isIntegerTy() ||
ShouldChangeType(CI.getType(), Src->getType()))
if (Instruction *NV = FoldOpIntoPhi(CI))
return NV;
@@ -436,7 +436,7 @@ Instruction *InstCombiner::visitTrunc(TruncInst &CI) {
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
CanEvaluateTruncated(Src, DestTy)) {
// If this cast is a truncate, evaluting in a different type always
@@ -728,7 +728,7 @@ Instruction *InstCombiner::visitZExt(ZExtInst &CI) {
// expression tree to something weird like i93 unless the source is also
// strange.
unsigned BitsToClear;
- if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
CanEvaluateZExtd(Src, DestTy, BitsToClear)) {
assert(BitsToClear < SrcTy->getScalarSizeInBits() &&
"Unreasonable BitsToClear");
@@ -936,7 +936,7 @@ Instruction *InstCombiner::visitSExt(SExtInst &CI) {
// type. Only do this if the dest type is a simple type, don't convert the
// expression tree to something weird like i93 unless the source is also
// strange.
- if ((isa<VectorType>(DestTy) || ShouldChangeType(SrcTy, DestTy)) &&
+ if ((DestTy->isVectorTy() || ShouldChangeType(SrcTy, DestTy)) &&
CanEvaluateSExtd(Src, DestTy)) {
// Okay, we can transform this! Insert the new expression now.
DEBUG(dbgs() << "ICE: EvaluateInDifferentType converting expression type"
@@ -1289,7 +1289,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
Constant::getNullValue(Type::getInt32Ty(CI.getContext()));
unsigned NumZeros = 0;
while (SrcElTy != DstElTy &&
- isa<CompositeType>(SrcElTy) && !isa<PointerType>(SrcElTy) &&
+ isa<CompositeType>(SrcElTy) && !SrcElTy->isPointerTy() &&
SrcElTy->getNumContainedTypes() /* not "{}" */) {
SrcElTy = cast<CompositeType>(SrcElTy)->getTypeAtIndex(ZeroUInt);
++NumZeros;
@@ -1304,7 +1304,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
if (const VectorType *DestVTy = dyn_cast<VectorType>(DestTy)) {
- if (DestVTy->getNumElements() == 1 && !isa<VectorType>(SrcTy)) {
+ if (DestVTy->getNumElements() == 1 && !SrcTy->isVectorTy()) {
Value *Elem = Builder->CreateBitCast(Src, DestVTy->getElementType());
return InsertElementInst::Create(UndefValue::get(DestTy), Elem,
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
@@ -1313,7 +1313,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
if (const VectorType *SrcVTy = dyn_cast<VectorType>(SrcTy)) {
- if (SrcVTy->getNumElements() == 1 && !isa<VectorType>(DestTy)) {
+ if (SrcVTy->getNumElements() == 1 && !DestTy->isVectorTy()) {
Value *Elem =
Builder->CreateExtractElement(Src,
Constant::getNullValue(Type::getInt32Ty(CI.getContext())));
@@ -1324,7 +1324,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
if (ShuffleVectorInst *SVI = dyn_cast<ShuffleVectorInst>(Src)) {
// Okay, we have (bitcast (shuffle ..)). Check to see if this is
// a bitconvert to a vector with the same # elts.
- if (SVI->hasOneUse() && isa<VectorType>(DestTy) &&
+ if (SVI->hasOneUse() && DestTy->isVectorTy() &&
cast<VectorType>(DestTy)->getNumElements() ==
SVI->getType()->getNumElements() &&
SVI->getType()->getNumElements() ==
@@ -1346,7 +1346,7 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) {
}
}
- if (isa<PointerType>(SrcTy))
+ if (SrcTy->isPointerTy())
return commonPointerCastTransforms(CI);
return commonCastTransforms(CI);
}
diff --git a/lib/Transforms/InstCombine/InstCombineCompares.cpp b/lib/Transforms/InstCombine/InstCombineCompares.cpp
index 72af80f..518af74 100644
--- a/lib/Transforms/InstCombine/InstCombineCompares.cpp
+++ b/lib/Transforms/InstCombine/InstCombineCompares.cpp
@@ -1988,7 +1988,7 @@ Instruction *InstCombiner::visitICmpInst(ICmpInst &I) {
// values. If the ptr->ptr cast can be stripped off both arguments, we do so
// now.
if (BitCastInst *CI = dyn_cast<BitCastInst>(Op0)) {
- if (isa<PointerType>(Op0->getType()) &&
+ if (Op0->getType()->isPointerTy() &&
(isa<Constant>(Op1) || isa<BitCastInst>(Op1))) {
// We keep moving the cast from the left operand over to the right
// operand, where it can often be eliminated completely.
@@ -2458,17 +2458,17 @@ Instruction *InstCombiner::visitFCmpInst(FCmpInst &I) {
return SelectInst::Create(LHSI->getOperand(0), Op1, Op2);
break;
}
- case Instruction::Load:
- if (GetElementPtrInst *GEP =
- dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
- if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
- if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
- !cast<LoadInst>(LHSI)->isVolatile())
- if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
- return Res;
+ case Instruction::Load:
+ if (GetElementPtrInst *GEP =
+ dyn_cast<GetElementPtrInst>(LHSI->getOperand(0))) {
+ if (GlobalVariable *GV = dyn_cast<GlobalVariable>(GEP->getOperand(0)))
+ if (GV->isConstant() && GV->hasDefinitiveInitializer() &&
+ !cast<LoadInst>(LHSI)->isVolatile())
+ if (Instruction *Res = FoldCmpLoadFromIndexedGlobal(GEP, GV, I))
+ return Res;
+ }
+ break;
}
- break;
- }
}
return Changed ? &I : 0;
diff --git a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
index e6c59c7..0f2a24f 100644
--- a/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
+++ b/lib/Transforms/InstCombine/InstCombineLoadStoreAlloca.cpp
@@ -87,8 +87,8 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
const Type *SrcPTy = SrcTy->getElementType();
- if (DestPTy->isIntegerTy() || isa<PointerType>(DestPTy) ||
- isa<VectorType>(DestPTy)) {
+ if (DestPTy->isIntegerTy() || DestPTy->isPointerTy() ||
+ DestPTy->isVectorTy()) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
@@ -104,11 +104,11 @@ static Instruction *InstCombineLoadCast(InstCombiner &IC, LoadInst &LI,
}
if (IC.getTargetData() &&
- (SrcPTy->isIntegerTy() || isa<PointerType>(SrcPTy) ||
- isa<VectorType>(SrcPTy)) &&
+ (SrcPTy->isIntegerTy() || SrcPTy->isPointerTy() ||
+ SrcPTy->isVectorTy()) &&
// Do not allow turning this into a load of an integer, which is then
// casted to a pointer, this pessimizes pointer analysis a lot.
- (isa<PointerType>(SrcPTy) == isa<PointerType>(LI.getType())) &&
+ (SrcPTy->isPointerTy() == LI.getType()->isPointerTy()) &&
IC.getTargetData()->getTypeSizeInBits(SrcPTy) ==
IC.getTargetData()->getTypeSizeInBits(DestPTy)) {
@@ -243,7 +243,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
const Type *SrcPTy = SrcTy->getElementType();
- if (!DestPTy->isIntegerTy() && !isa<PointerType>(DestPTy))
+ if (!DestPTy->isIntegerTy() && !DestPTy->isPointerTy())
return 0;
/// NewGEPIndices - If SrcPTy is an aggregate type, we can emit a "noop gep"
@@ -255,7 +255,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
// If the source is an array, the code below will not succeed. Check to
// see if a trivial 'gep P, 0, 0' will help matters. Only do this for
// constants.
- if (isa<ArrayType>(SrcPTy) || isa<StructType>(SrcPTy)) {
+ if (SrcPTy->isArrayTy() || SrcPTy->isStructTy()) {
// Index through pointer.
Constant *Zero = Constant::getNullValue(Type::getInt32Ty(SI.getContext()));
NewGEPIndices.push_back(Zero);
@@ -277,7 +277,7 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
SrcTy = PointerType::get(SrcPTy, SrcTy->getAddressSpace());
}
- if (!SrcPTy->isIntegerTy() && !isa<PointerType>(SrcPTy))
+ if (!SrcPTy->isIntegerTy() && !SrcPTy->isPointerTy())
return 0;
// If the pointers point into different address spaces or if they point to
@@ -297,11 +297,11 @@ static Instruction *InstCombineStoreToCast(InstCombiner &IC, StoreInst &SI) {
Instruction::CastOps opcode = Instruction::BitCast;
const Type* CastSrcTy = SIOp0->getType();
const Type* CastDstTy = SrcPTy;
- if (isa<PointerType>(CastDstTy)) {
+ if (CastDstTy->isPointerTy()) {
if (CastSrcTy->isIntegerTy())
opcode = Instruction::IntToPtr;
- } else if (isa<IntegerType>(CastDstTy)) {
- if (isa<PointerType>(SIOp0->getType()))
+ } else if (CastDstTy->isIntegerTy()) {
+ if (SIOp0->getType()->isPointerTy())
opcode = Instruction::PtrToInt;
}
@@ -413,7 +413,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
// Don't count debug info directives, lest they affect codegen,
// and we skip pointer-to-pointer bitcasts, which are NOPs.
if (isa<DbgInfoIntrinsic>(BBI) ||
- (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
ScanInsts++;
continue;
}
@@ -483,7 +483,7 @@ Instruction *InstCombiner::visitStoreInst(StoreInst &SI) {
do {
++BBI;
} while (isa<DbgInfoIntrinsic>(BBI) ||
- (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType())));
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy()));
if (BranchInst *BI = dyn_cast<BranchInst>(BBI))
if (BI->isUnconditional())
if (SimplifyStoreAtEndOfBlock(SI))
@@ -544,7 +544,7 @@ bool InstCombiner::SimplifyStoreAtEndOfBlock(StoreInst &SI) {
--BBI;
// Skip over debugging info.
while (isa<DbgInfoIntrinsic>(BBI) ||
- (isa<BitCastInst>(BBI) && isa<PointerType>(BBI->getType()))) {
+ (isa<BitCastInst>(BBI) && BBI->getType()->isPointerTy())) {
if (BBI==OtherBB->begin())
return false;
--BBI;
diff --git a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
index 668c34f..b3974e8 100644
--- a/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
+++ b/lib/Transforms/InstCombine/InstCombineMulDivRem.cpp
@@ -76,7 +76,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
return BinaryOperator::CreateShl(Op0,
ConstantInt::get(Op0->getType(), Val.logBase2()));
}
- } else if (isa<VectorType>(Op1C->getType())) {
+ } else if (Op1C->getType()->isVectorTy()) {
if (Op1C->isNullValue())
return ReplaceInstUsesWith(I, Op1C);
@@ -173,7 +173,7 @@ Instruction *InstCombiner::visitMul(BinaryOperator &I) {
// If one of the operands of the multiply is a cast from a boolean value, then
// we know the bool is either zero or one, so this is a 'masking' multiply.
// X * Y (where Y is 0 or 1) -> X & (0-Y)
- if (!isa<VectorType>(I.getType())) {
+ if (!I.getType()->isVectorTy()) {
// -2 is "-1 << 1" so it is all bits set except the low one.
APInt Negative2(I.getType()->getPrimitiveSizeInBits(), (uint64_t)-2, true);
@@ -203,8 +203,8 @@ Instruction *InstCombiner::visitFMul(BinaryOperator &I) {
// "In IEEE floating point, x*1 is not equivalent to x for nans. However,
// ANSI says we can drop signals, so we can do this anyway." (from GCC)
if (Op1F->isExactlyValue(1.0))
- return ReplaceInstUsesWith(I, Op0); // Eliminate 'mul double %X, 1.0'
- } else if (isa<VectorType>(Op1C->getType())) {
+ return ReplaceInstUsesWith(I, Op0); // Eliminate 'fmul double %X, 1.0'
+ } else if (Op1C->getType()->isVectorTy()) {
if (ConstantVector *Op1V = dyn_cast<ConstantVector>(Op1C)) {
// As above, vector X*splat(1.0) -> X in all defined cases.
if (Constant *Splat = Op1V->getSplatValue()) {
diff --git a/lib/Transforms/InstCombine/InstCombinePHI.cpp b/lib/Transforms/InstCombine/InstCombinePHI.cpp
index bb7632f..fba8354 100644
--- a/lib/Transforms/InstCombine/InstCombinePHI.cpp
+++ b/lib/Transforms/InstCombine/InstCombinePHI.cpp
@@ -371,7 +371,7 @@ Instruction *InstCombiner::FoldPHIArgOpIntoPHI(PHINode &PN) {
// Be careful about transforming integer PHIs. We don't want to pessimize
// the code by turning an i32 into an i1293.
- if (isa<IntegerType>(PN.getType()) && isa<IntegerType>(CastSrcTy)) {
+ if (PN.getType()->isIntegerTy() && CastSrcTy->isIntegerTy()) {
if (!ShouldChangeType(PN.getType(), CastSrcTy))
return 0;
}
@@ -832,7 +832,7 @@ Instruction *InstCombiner::visitPHINode(PHINode &PN) {
// it is only used by trunc or trunc(lshr) operations. If so, we split the
// PHI into the various pieces being extracted. This sort of thing is
// introduced when SROA promotes an aggregate to a single large integer type.
- if (isa<IntegerType>(PN.getType()) && TD &&
+ if (PN.getType()->isIntegerTy() && TD &&
!TD->isLegalInteger(PN.getType()->getPrimitiveSizeInBits()))
if (Instruction *Res = SliceUpIllegalIntegerPHI(PN))
return Res;
diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp
index 7807d9a..2fc9325 100644
--- a/lib/Transforms/InstCombine/InstCombineSelect.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp
@@ -539,9 +539,18 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
!CFPf->getValueAPF().isZero()))
return ReplaceInstUsesWith(SI, FalseVal);
}
- // Transform (X != Y) ? X : Y -> X
- if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
+ // Transform (X une Y) ? X : Y -> X
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
return ReplaceInstUsesWith(SI, TrueVal);
+ }
// NOTE: if we wanted to, this is where to detect MIN/MAX
} else if (FCI->getOperand(0) == FalseVal && FCI->getOperand(1) == TrueVal){
@@ -557,9 +566,18 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) {
!CFPf->getValueAPF().isZero()))
return ReplaceInstUsesWith(SI, FalseVal);
}
- // Transform (X != Y) ? Y : X -> Y
- if (FCI->getPredicate() == FCmpInst::FCMP_ONE)
- return ReplaceInstUsesWith(SI, TrueVal);
+ // Transform (X une Y) ? Y : X -> Y
+ if (FCI->getPredicate() == FCmpInst::FCMP_UNE) {
+ // This is not safe in general for floating point:
+ // consider X== -0, Y== +0.
+ // It becomes safe if either operand is a nonzero constant.
+ ConstantFP *CFPt, *CFPf;
+ if (((CFPt = dyn_cast<ConstantFP>(TrueVal)) &&
+ !CFPt->getValueAPF().isZero()) ||
+ ((CFPf = dyn_cast<ConstantFP>(FalseVal)) &&
+ !CFPf->getValueAPF().isZero()))
+ return ReplaceInstUsesWith(SI, TrueVal);
+ }
// NOTE: if we wanted to, this is where to detect MIN/MAX
}
// NOTE: if we wanted to, this is where to detect ABS
diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
index 5e9a52f..cd41844 100644
--- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
+++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp
@@ -104,7 +104,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
assert(Depth <= 6 && "Limit Search Depth");
uint32_t BitWidth = DemandedMask.getBitWidth();
const Type *VTy = V->getType();
- assert((TD || !isa<PointerType>(VTy)) &&
+ assert((TD || !VTy->isPointerTy()) &&
"SimplifyDemandedBits needs to know bit widths!");
assert((!TD || TD->getTypeSizeInBits(VTy->getScalarType()) == BitWidth) &&
(!VTy->isIntOrIntVectorTy() ||
@@ -413,7 +413,7 @@ Value *InstCombiner::SimplifyDemandedUseBits(Value *V, APInt DemandedMask,
} else
// Don't touch a scalar-to-vector bitcast.
return 0;
- } else if (isa<VectorType>(I->getOperand(0)->getType()))
+ } else if (I->getOperand(0)->getType()->isVectorTy())
// Don't touch a vector-to-scalar bitcast.
return 0;
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index 20fda1a..a58124d 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -78,7 +78,7 @@ static std::vector<unsigned> getShuffleMask(const ShuffleVectorInst *SVI) {
/// value is already around as a register, for example if it were inserted then
/// extracted from the vector.
static Value *FindScalarElement(Value *V, unsigned EltNo) {
- assert(isa<VectorType>(V->getType()) && "Not looking at a vector?");
+ assert(V->getType()->isVectorTy() && "Not looking at a vector?");
const VectorType *PTy = cast<VectorType>(V->getType());
unsigned Width = PTy->getNumElements();
if (EltNo >= Width) // Out of range access.
@@ -322,7 +322,7 @@ static bool CollectSingleShuffleElements(Value *V, Value *LHS, Value *RHS,
/// that computes V and the LHS value of the shuffle.
static Value *CollectShuffleElements(Value *V, std::vector<Constant*> &Mask,
Value *&RHS) {
- assert(isa<VectorType>(V->getType()) &&
+ assert(V->getType()->isVectorTy() &&
(RHS == 0 || V->getType() == RHS->getType()) &&
"Invalid shuffle!");
unsigned NumElts = cast<VectorType>(V->getType())->getNumElements();
diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp
index 96c0342..af9ec5c 100644
--- a/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -73,7 +73,7 @@ void InstCombiner::getAnalysisUsage(AnalysisUsage &AU) const {
/// from 'From' to 'To'. We don't want to convert from a legal to an illegal
/// type for example, or from a smaller to a larger illegal type.
bool InstCombiner::ShouldChangeType(const Type *From, const Type *To) const {
- assert(isa<IntegerType>(From) && isa<IntegerType>(To));
+ assert(From->isIntegerTy() && To->isIntegerTy());
// If we don't have TD, we don't know if the source/dest are legal.
if (!TD) return false;
@@ -478,7 +478,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
bool EndsWithSequential = false;
for (gep_type_iterator I = gep_type_begin(*Src), E = gep_type_end(*Src);
I != E; ++I)
- EndsWithSequential = !isa<StructType>(*I);
+ EndsWithSequential = !(*I)->isStructTy();
// Can we combine the two pointer arithmetics offsets?
if (EndsWithSequential) {
@@ -578,7 +578,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// into: %t1 = getelementptr [2 x i32]* %str, i32 0, i32 %V; bitcast
const Type *SrcElTy = StrippedPtrTy->getElementType();
const Type *ResElTy=cast<PointerType>(PtrOp->getType())->getElementType();
- if (TD && isa<ArrayType>(SrcElTy) &&
+ if (TD && SrcElTy->isArrayTy() &&
TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType()) ==
TD->getTypeAllocSize(ResElTy)) {
Value *Idx[2];
@@ -596,7 +596,7 @@ Instruction *InstCombiner::visitGetElementPtrInst(GetElementPtrInst &GEP) {
// (where tmp = 8*tmp2) into:
// getelementptr [100 x double]* %arr, i32 0, i32 %tmp2; bitcast
- if (TD && isa<ArrayType>(SrcElTy) && ResElTy->isIntegerTy(8)) {
+ if (TD && SrcElTy->isArrayTy() && ResElTy->isIntegerTy(8)) {
uint64_t ArrayEltSize =
TD->getTypeAllocSize(cast<ArrayType>(SrcElTy)->getElementType());
diff --git a/lib/Transforms/Scalar/ABCD.cpp b/lib/Transforms/Scalar/ABCD.cpp
index cf5e8c0..ea8e5c3 100644
--- a/lib/Transforms/Scalar/ABCD.cpp
+++ b/lib/Transforms/Scalar/ABCD.cpp
@@ -505,7 +505,7 @@ void ABCD::executeABCD(Function &F) {
continue;
ICmpInst *ICI = dyn_cast<ICmpInst>(TI->getOperand(0));
- if (!ICI || !isa<IntegerType>(ICI->getOperand(0)->getType()))
+ if (!ICI || !ICI->getOperand(0)->getType()->isIntegerTy())
continue;
createConstraintCmpInst(ICI, TI);
@@ -713,7 +713,7 @@ void ABCD::createConstraintCmpInst(ICmpInst *ICI, TerminatorInst *TI) {
Value *V_op1 = ICI->getOperand(0);
Value *V_op2 = ICI->getOperand(1);
- if (!isa<IntegerType>(V_op1->getType()))
+ if (!V_op1->getType()->isIntegerTy())
return;
Instruction *I_op1 = dyn_cast<Instruction>(V_op1);
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 21e6f89..7ceda1f 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -612,7 +612,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
// we'd end up sinking both muls.
if (AddrMode.BaseReg) {
Value *V = AddrMode.BaseReg;
- if (isa<PointerType>(V->getType()))
+ if (V->getType()->isPointerTy())
V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
if (V->getType() != IntPtrTy)
V = CastInst::CreateIntegerCast(V, IntPtrTy, /*isSigned=*/true,
@@ -625,7 +625,7 @@ bool CodeGenPrepare::OptimizeMemoryInst(Instruction *MemoryInst, Value *Addr,
Value *V = AddrMode.ScaledReg;
if (V->getType() == IntPtrTy) {
// done.
- } else if (isa<PointerType>(V->getType())) {
+ } else if (V->getType()->isPointerTy()) {
V = new PtrToIntInst(V, IntPtrTy, "sunkaddr", InsertPt);
} else if (cast<IntegerType>(IntPtrTy)->getBitWidth() <
cast<IntegerType>(V->getType())->getBitWidth()) {
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 3ce7482..fcb802a 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -662,11 +662,10 @@ namespace {
bool runOnFunction(Function &F);
public:
static char ID; // Pass identification, replacement for typeid
- explicit GVN(bool nopre = false, bool noloads = false)
- : FunctionPass(&ID), NoPRE(nopre), NoLoads(noloads), MD(0) { }
+ explicit GVN(bool noloads = false)
+ : FunctionPass(&ID), NoLoads(noloads), MD(0) { }
private:
- bool NoPRE;
bool NoLoads;
MemoryDependenceAnalysis *MD;
DominatorTree *DT;
@@ -674,6 +673,9 @@ namespace {
ValueTable VN;
DenseMap<BasicBlock*, ValueNumberScope*> localAvail;
+ // List of critical edges to be split between iterations.
+ SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
+
// This transformation requires dominator postdominator info
virtual void getAnalysisUsage(AnalysisUsage &AU) const {
AU.addRequired<DominatorTree>();
@@ -701,14 +703,15 @@ namespace {
Value *lookupNumber(BasicBlock *BB, uint32_t num);
void cleanupGlobalSets();
void verifyRemoved(const Instruction *I) const;
+ bool splitCriticalEdges();
};
char GVN::ID = 0;
}
// createGVNPass - The public interface to this file...
-FunctionPass *llvm::createGVNPass(bool NoPRE, bool NoLoads) {
- return new GVN(NoPRE, NoLoads);
+FunctionPass *llvm::createGVNPass(bool NoLoads) {
+ return new GVN(NoLoads);
}
static RegisterPass<GVN> X("gvn",
@@ -836,9 +839,9 @@ static bool CanCoerceMustAliasedValueToLoad(Value *StoredVal,
const TargetData &TD) {
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
- if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy) ||
- isa<StructType>(StoredVal->getType()) ||
- isa<ArrayType>(StoredVal->getType()))
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy() ||
+ StoredVal->getType()->isStructTy() ||
+ StoredVal->getType()->isArrayTy())
return false;
// The store has to be at least as big as the load.
@@ -870,26 +873,26 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
// If the store and reload are the same size, we can always reuse it.
if (StoreSize == LoadSize) {
- if (isa<PointerType>(StoredValTy) && isa<PointerType>(LoadedTy)) {
+ if (StoredValTy->isPointerTy() && LoadedTy->isPointerTy()) {
// Pointer to Pointer -> use bitcast.
return new BitCastInst(StoredVal, LoadedTy, "", InsertPt);
}
// Convert source pointers to integers, which can be bitcast.
- if (isa<PointerType>(StoredValTy)) {
+ if (StoredValTy->isPointerTy()) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
const Type *TypeToCastTo = LoadedTy;
- if (isa<PointerType>(TypeToCastTo))
+ if (TypeToCastTo->isPointerTy())
TypeToCastTo = TD.getIntPtrType(StoredValTy->getContext());
if (StoredValTy != TypeToCastTo)
StoredVal = new BitCastInst(StoredVal, TypeToCastTo, "", InsertPt);
// Cast to pointer if the load needs a pointer type.
- if (isa<PointerType>(LoadedTy))
+ if (LoadedTy->isPointerTy())
StoredVal = new IntToPtrInst(StoredVal, LoadedTy, "", InsertPt);
return StoredVal;
@@ -901,13 +904,13 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
assert(StoreSize >= LoadSize && "CanCoerceMustAliasedValueToLoad fail");
// Convert source pointers to integers, which can be manipulated.
- if (isa<PointerType>(StoredValTy)) {
+ if (StoredValTy->isPointerTy()) {
StoredValTy = TD.getIntPtrType(StoredValTy->getContext());
StoredVal = new PtrToIntInst(StoredVal, StoredValTy, "", InsertPt);
}
// Convert vectors and fp to integer, which can be manipulated.
- if (!isa<IntegerType>(StoredValTy)) {
+ if (!StoredValTy->isIntegerTy()) {
StoredValTy = IntegerType::get(StoredValTy->getContext(), StoreSize);
StoredVal = new BitCastInst(StoredVal, StoredValTy, "", InsertPt);
}
@@ -927,7 +930,7 @@ static Value *CoerceAvailableValueToLoadType(Value *StoredVal,
return StoredVal;
// If the result is a pointer, inttoptr.
- if (isa<PointerType>(LoadedTy))
+ if (LoadedTy->isPointerTy())
return new IntToPtrInst(StoredVal, LoadedTy, "inttoptr", InsertPt);
// Otherwise, bitcast.
@@ -989,7 +992,7 @@ static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
const TargetData &TD) {
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
- if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy))
+ if (LoadTy->isStructTy() || LoadTy->isArrayTy())
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
@@ -1064,8 +1067,8 @@ static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
StoreInst *DepSI,
const TargetData &TD) {
// Cannot handle reading from store of first-class aggregate yet.
- if (isa<StructType>(DepSI->getOperand(0)->getType()) ||
- isa<ArrayType>(DepSI->getOperand(0)->getType()))
+ if (DepSI->getOperand(0)->getType()->isStructTy() ||
+ DepSI->getOperand(0)->getType()->isArrayTy())
return -1;
Value *StorePtr = DepSI->getPointerOperand();
@@ -1136,9 +1139,9 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
- if (isa<PointerType>(SrcVal->getType()))
+ if (SrcVal->getType()->isPointerTy())
SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
- if (!isa<IntegerType>(SrcVal->getType()))
+ if (!SrcVal->getType()->isIntegerTy())
SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
"tmp");
@@ -1323,7 +1326,7 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
Value *V = SSAUpdate.GetValueInMiddleOfBlock(LI->getParent());
// If new PHI nodes were created, notify alias analysis.
- if (isa<PointerType>(V->getType()))
+ if (V->getType()->isPointerTy())
for (unsigned i = 0, e = NewPHIs.size(); i != e; ++i)
AA->copyValue(LI, NewPHIs[i]);
@@ -1491,8 +1494,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
if (isa<PHINode>(V))
V->takeName(LI);
- if (isa<PointerType>(V->getType()))
+ if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
+ VN.erase(LI);
toErase.push_back(LI);
NumGVNLoad++;
return true;
@@ -1538,11 +1542,13 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// at least one of the values is LI. Since this means that we won't be able
// to eliminate LI even if we insert uses in the other predecessors, we will
// end up increasing code size. Reject this by scanning for LI.
- if (!EnableFullLoadPRE) {
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
- if (ValuesPerBlock[i].isSimpleValue() &&
- ValuesPerBlock[i].getSimpleValue() == LI)
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+ if (ValuesPerBlock[i].isSimpleValue() &&
+ ValuesPerBlock[i].getSimpleValue() == LI) {
+ // Skip cases where LI is the only definition, even for EnableFullLoadPRE.
+ if (!EnableFullLoadPRE || e == 1)
return false;
+ }
}
// FIXME: It is extremely unclear what this loop is doing, other than
@@ -1576,6 +1582,7 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
for (unsigned i = 0, e = UnavailableBlocks.size(); i != e; ++i)
FullyAvailableBlocks[UnavailableBlocks[i]] = false;
+ bool NeedToSplitEdges = false;
for (pred_iterator PI = pred_begin(LoadBB), E = pred_end(LoadBB);
PI != E; ++PI) {
BasicBlock *Pred = *PI;
@@ -1583,13 +1590,20 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
continue;
}
PredLoads[Pred] = 0;
- // We don't currently handle critical edges :(
+
if (Pred->getTerminator()->getNumSuccessors() != 1) {
- DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF CRITICAL EDGE '"
- << Pred->getName() << "': " << *LI << '\n');
- return false;
+ if (isa<IndirectBrInst>(Pred->getTerminator())) {
+ DEBUG(dbgs() << "COULD NOT PRE LOAD BECAUSE OF INDBR CRITICAL EDGE '"
+ << Pred->getName() << "': " << *LI << '\n');
+ return false;
+ }
+ unsigned SuccNum = GetSuccessorNumber(Pred, LoadBB);
+ toSplit.push_back(std::make_pair(Pred->getTerminator(), SuccNum));
+ NeedToSplitEdges = true;
}
}
+ if (NeedToSplitEdges)
+ return false;
// Decide whether PRE is profitable for this load.
unsigned NumUnavailablePreds = PredLoads.size();
@@ -1623,13 +1637,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
*DT, NewInsts);
} else {
- Address.PHITranslateValue(LoadBB, UnavailablePred);
+ Address.PHITranslateValue(LoadBB, UnavailablePred, DT);
LoadPtr = Address.getAddr();
-
- // Make sure the value is live in the predecessor.
- if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr))
- if (!DT->dominates(Inst->getParent(), UnavailablePred))
- LoadPtr = 0;
}
// If we couldn't find or insert a computation of this phi translated value,
@@ -1697,6 +1706,8 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// Add the newly created load.
ValuesPerBlock.push_back(AvailableValueInBlock::get(UnavailablePred,
NewLoad));
+ MD->invalidateCachedPointerInfo(LoadPtr);
+ DEBUG(dbgs() << "GVN INSERTED " << *NewLoad << '\n');
}
// Perform PHI construction.
@@ -1705,8 +1716,9 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
LI->replaceAllUsesWith(V);
if (isa<PHINode>(V))
V->takeName(LI);
- if (isa<PointerType>(V->getType()))
+ if (V->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(V);
+ VN.erase(LI);
toErase.push_back(LI);
NumPRELoad++;
return true;
@@ -1765,8 +1777,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// Replace the load!
L->replaceAllUsesWith(AvailVal);
- if (isa<PointerType>(AvailVal->getType()))
+ if (AvailVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(AvailVal);
+ VN.erase(L);
toErase.push_back(L);
NumGVNLoad++;
return true;
@@ -1810,8 +1823,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// Remove it!
L->replaceAllUsesWith(StoredVal);
- if (isa<PointerType>(StoredVal->getType()))
+ if (StoredVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(StoredVal);
+ VN.erase(L);
toErase.push_back(L);
NumGVNLoad++;
return true;
@@ -1839,8 +1853,9 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// Remove it!
L->replaceAllUsesWith(AvailableVal);
- if (isa<PointerType>(DepLI->getType()))
+ if (DepLI->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(DepLI);
+ VN.erase(L);
toErase.push_back(L);
NumGVNLoad++;
return true;
@@ -1851,6 +1866,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// intervening stores, for example.
if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ VN.erase(L);
toErase.push_back(L);
NumGVNLoad++;
return true;
@@ -1861,6 +1877,7 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
+ VN.erase(L);
toErase.push_back(L);
NumGVNLoad++;
return true;
@@ -1943,7 +1960,7 @@ bool GVN::processInstruction(Instruction *I,
if (constVal) {
p->replaceAllUsesWith(constVal);
- if (MD && isa<PointerType>(constVal->getType()))
+ if (MD && constVal->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(constVal);
VN.erase(p);
@@ -1964,7 +1981,7 @@ bool GVN::processInstruction(Instruction *I,
// Remove it!
VN.erase(I);
I->replaceAllUsesWith(repl);
- if (MD && isa<PointerType>(repl->getType()))
+ if (MD && repl->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(repl);
toErase.push_back(I);
return true;
@@ -2004,6 +2021,8 @@ bool GVN::runOnFunction(Function& F) {
while (ShouldContinue) {
DEBUG(dbgs() << "GVN iteration: " << Iteration << "\n");
ShouldContinue = iterateOnFunction(F);
+ if (splitCriticalEdges())
+ ShouldContinue = true;
Changed |= ShouldContinue;
++Iteration;
}
@@ -2070,7 +2089,6 @@ bool GVN::processBlock(BasicBlock *BB) {
/// control flow patterns and attempts to perform simple PRE at the join point.
bool GVN::performPRE(Function &F) {
bool Changed = false;
- SmallVector<std::pair<TerminatorInst*, unsigned>, 4> toSplit;
DenseMap<BasicBlock*, Value*> predMap;
for (df_iterator<BasicBlock*> DI = df_begin(&F.getEntryBlock()),
DE = df_end(&F.getEntryBlock()); DI != DE; ++DI) {
@@ -2141,14 +2159,7 @@ bool GVN::performPRE(Function &F) {
// We can't do PRE safely on a critical edge, so instead we schedule
// the edge to be split and perform the PRE the next time we iterate
// on the function.
- unsigned SuccNum = 0;
- for (unsigned i = 0, e = PREPred->getTerminator()->getNumSuccessors();
- i != e; ++i)
- if (PREPred->getTerminator()->getSuccessor(i) == CurrentBlock) {
- SuccNum = i;
- break;
- }
-
+ unsigned SuccNum = GetSuccessorNumber(PREPred, CurrentBlock);
if (isCriticalEdge(PREPred->getTerminator(), SuccNum)) {
toSplit.push_back(std::make_pair(PREPred->getTerminator(), SuccNum));
continue;
@@ -2204,7 +2215,7 @@ bool GVN::performPRE(Function &F) {
localAvail[CurrentBlock]->table[ValNo] = Phi;
CurInst->replaceAllUsesWith(Phi);
- if (MD && isa<PointerType>(Phi->getType()))
+ if (MD && Phi->getType()->isPointerTy())
MD->invalidateCachedPointerInfo(Phi);
VN.erase(CurInst);
@@ -2216,11 +2227,23 @@ bool GVN::performPRE(Function &F) {
}
}
- for (SmallVector<std::pair<TerminatorInst*, unsigned>, 4>::iterator
- I = toSplit.begin(), E = toSplit.end(); I != E; ++I)
- SplitCriticalEdge(I->first, I->second, this);
+ if (splitCriticalEdges())
+ Changed = true;
- return Changed || toSplit.size();
+ return Changed;
+}
+
+/// splitCriticalEdges - Split critical edges found during the previous
+/// iteration that may enable further optimization.
+bool GVN::splitCriticalEdges() {
+ if (toSplit.empty())
+ return false;
+ do {
+ std::pair<TerminatorInst*, unsigned> Edge = toSplit.pop_back_val();
+ SplitCriticalEdge(Edge.first, Edge.second, this);
+ } while (!toSplit.empty());
+ if (MD) MD->invalidateCachedPredecessors();
+ return true;
}
/// iterateOnFunction - Executes one iteration of GVN
diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp
index 5302fdc..cb563c3 100644
--- a/lib/Transforms/Scalar/IndVarSimplify.cpp
+++ b/lib/Transforms/Scalar/IndVarSimplify.cpp
@@ -103,11 +103,9 @@ namespace {
BasicBlock *ExitingBlock,
BranchInst *BI,
SCEVExpander &Rewriter);
- void RewriteLoopExitValues(Loop *L, const SCEV *BackedgeTakenCount,
- SCEVExpander &Rewriter);
+ void RewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter);
- void RewriteIVExpressions(Loop *L, const Type *LargestType,
- SCEVExpander &Rewriter);
+ void RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter);
void SinkUnusedInvariants(Loop *L);
@@ -190,7 +188,7 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
ICmpInst *Cond = new ICmpInst(BI, Opcode, CmpIndVar, ExitCnt, "exitcond");
- Instruction *OrigCond = cast<Instruction>(BI->getCondition());
+ Value *OrigCond = BI->getCondition();
// It's tempting to use replaceAllUsesWith here to fully replace the old
// comparison, but that's not immediately safe, since users of the old
// comparison may not be dominated by the new comparison. Instead, just
@@ -215,7 +213,6 @@ ICmpInst *IndVarSimplify::LinearFunctionTestReplace(Loop *L,
/// able to brute-force evaluate arbitrary instructions as long as they have
/// constant operands at the beginning of the loop.
void IndVarSimplify::RewriteLoopExitValues(Loop *L,
- const SCEV *BackedgeTakenCount,
SCEVExpander &Rewriter) {
// Verify the input to the pass in already in LCSSA form.
assert(L->isLCSSAForm());
@@ -241,15 +238,24 @@ void IndVarSimplify::RewriteLoopExitValues(Loop *L,
while ((PN = dyn_cast<PHINode>(BBI++))) {
if (PN->use_empty())
continue; // dead use, don't replace it
+
+ // SCEV only supports integer expressions for now.
+ if (!PN->getType()->isIntegerTy() && !PN->getType()->isPointerTy())
+ continue;
+
+ // It's necessary to tell ScalarEvolution about this explicitly so that
+ // it can walk the def-use list and forget all SCEVs, as it may not be
+ // watching the PHI itself. Once the new exit value is in place, there
+ // may not be a def-use connection between the loop and every instruction
+ // which got a SCEVAddRecExpr for that loop.
+ SE->forgetValue(PN);
+
// Iterate over all of the values in all the PHI nodes.
for (unsigned i = 0; i != NumPreds; ++i) {
// If the value being merged in is not integer or is not defined
// in the loop, skip it.
Value *InVal = PN->getIncomingValue(i);
- if (!isa<Instruction>(InVal) ||
- // SCEV only supports integer expressions for now.
- (!isa<IntegerType>(InVal->getType()) &&
- !isa<PointerType>(InVal->getType())))
+ if (!isa<Instruction>(InVal))
continue;
// If this pred is for a subloop, not L itself, skip it.
@@ -349,7 +355,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// the current expressions.
//
if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount))
- RewriteLoopExitValues(L, BackedgeTakenCount, Rewriter);
+ RewriteLoopExitValues(L, Rewriter);
// Compute the type of the largest recurrence expression, and decide whether
// a canonical induction variable should be inserted.
@@ -378,17 +384,18 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
// in this loop, insert a canonical induction variable of the largest size.
Value *IndVar = 0;
if (NeedCannIV) {
- // Check to see if the loop already has a canonical-looking induction
- // variable. If one is present and it's wider than the planned canonical
- // induction variable, temporarily remove it, so that the Rewriter
- // doesn't attempt to reuse it.
- PHINode *OldCannIV = L->getCanonicalInductionVariable();
- if (OldCannIV) {
+ // Check to see if the loop already has any canonical-looking induction
+ // variables. If any are present and wider than the planned canonical
+ // induction variable, temporarily remove them, so that the Rewriter
+ // doesn't attempt to reuse them.
+ SmallVector<PHINode *, 2> OldCannIVs;
+ while (PHINode *OldCannIV = L->getCanonicalInductionVariable()) {
if (SE->getTypeSizeInBits(OldCannIV->getType()) >
SE->getTypeSizeInBits(LargestType))
OldCannIV->removeFromParent();
else
- OldCannIV = 0;
+ break;
+ OldCannIVs.push_back(OldCannIV);
}
IndVar = Rewriter.getOrInsertCanonicalInductionVariable(L, LargestType);
@@ -398,17 +405,21 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
DEBUG(dbgs() << "INDVARS: New CanIV: " << *IndVar << '\n');
// Now that the official induction variable is established, reinsert
- // the old canonical-looking variable after it so that the IR remains
- // consistent. It will be deleted as part of the dead-PHI deletion at
+ // any old canonical-looking variables after it so that the IR remains
+ // consistent. They will be deleted as part of the dead-PHI deletion at
// the end of the pass.
- if (OldCannIV)
- OldCannIV->insertAfter(cast<Instruction>(IndVar));
+ while (!OldCannIVs.empty()) {
+ PHINode *OldCannIV = OldCannIVs.pop_back_val();
+ OldCannIV->insertBefore(L->getHeader()->getFirstNonPHI());
+ }
}
// If we have a trip count expression, rewrite the loop's exit condition
// using it. We can currently only handle loops with a single exit.
ICmpInst *NewICmp = 0;
- if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) && ExitingBlock) {
+ if (!isa<SCEVCouldNotCompute>(BackedgeTakenCount) &&
+ !BackedgeTakenCount->isZero() &&
+ ExitingBlock) {
assert(NeedCannIV &&
"LinearFunctionTestReplace requires a canonical induction variable");
// Can't rewrite non-branch yet.
@@ -418,7 +429,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
}
// Rewrite IV-derived expressions. Clears the rewriter cache.
- RewriteIVExpressions(L, LargestType, Rewriter);
+ RewriteIVExpressions(L, Rewriter);
// The Rewriter may not be used from this point on.
@@ -438,8 +449,7 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) {
return Changed;
}
-void IndVarSimplify::RewriteIVExpressions(Loop *L, const Type *LargestType,
- SCEVExpander &Rewriter) {
+void IndVarSimplify::RewriteIVExpressions(Loop *L, SCEVExpander &Rewriter) {
SmallVector<WeakVH, 16> DeadInsts;
// Rewrite all induction variable expressions in terms of the canonical
@@ -584,8 +594,8 @@ void IndVarSimplify::SinkUnusedInvariants(Loop *L) {
}
}
-/// Return true if it is OK to use SIToFPInst for an inducation variable
-/// with given inital and exit values.
+/// Return true if it is OK to use SIToFPInst for an induction variable
+/// with given initial and exit values.
static bool useSIToFPInst(ConstantFP &InitV, ConstantFP &ExitV,
uint64_t intIV, uint64_t intEV) {
@@ -638,7 +648,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
if (!convertToInt(InitValue->getValueAPF(), &newInitValue))
return;
- // Check IV increment. Reject this PH if increement operation is not
+ // Check IV increment. Reject this PH if increment operation is not
// an add or increment value can not be represented by an integer.
BinaryOperator *Incr =
dyn_cast<BinaryOperator>(PH->getIncomingValue(BackEdge));
@@ -674,7 +684,7 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
if (BI->getCondition() != EC) return;
}
- // Find exit value. If exit value can not be represented as an interger then
+ // Find exit value. If exit value can not be represented as an integer then
// do not handle this floating point PH.
ConstantFP *EV = NULL;
unsigned EVIndex = 1;
@@ -736,11 +746,11 @@ void IndVarSimplify::HandleFloatingPointIV(Loop *L, PHINode *PH) {
ICmpInst *NewEC = new ICmpInst(EC->getParent()->getTerminator(),
NewPred, LHS, RHS, EC->getName());
- // In the following deltions, PH may become dead and may be deleted.
+ // In the following deletions, PH may become dead and may be deleted.
// Use a WeakVH to observe whether this happens.
WeakVH WeakPH = PH;
- // Delete old, floating point, exit comparision instruction.
+ // Delete old, floating point, exit comparison instruction.
NewEC->takeName(EC);
EC->replaceAllUsesWith(NewEC);
RecursivelyDeleteTriviallyDeadInstructions(EC);
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 8f21aac..a6489ec 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -201,7 +201,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
if (isa<DbgInfoIntrinsic>(I)) continue;
// If this is a pointer->pointer bitcast, it is free.
- if (isa<BitCastInst>(I) && isa<PointerType>(I->getType()))
+ if (isa<BitCastInst>(I) && I->getType()->isPointerTy())
continue;
// All other instructions count for at least one unit.
@@ -214,7 +214,7 @@ static unsigned getJumpThreadDuplicationCost(const BasicBlock *BB) {
if (const CallInst *CI = dyn_cast<CallInst>(I)) {
if (!isa<IntrinsicInst>(CI))
Size += 3;
- else if (!isa<VectorType>(CI->getType()))
+ else if (!CI->getType()->isVectorTy())
Size += 1;
}
}
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 81f9ae6..d7ace342 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -678,7 +678,7 @@ void LICM::PromoteValuesInLoop() {
// If we are promoting a pointer value, update alias information for the
// inserted load.
Value *LoadValue = 0;
- if (isa<PointerType>(cast<PointerType>(Ptr->getType())->getElementType())) {
+ if (cast<PointerType>(Ptr->getType())->getElementType()->isPointerTy()) {
// Locate a load or store through the pointer, and assign the same value
// to LI as we are loading or storing. Since we know that the value is
// stored in this loop, this will always succeed.
@@ -751,7 +751,7 @@ void LICM::PromoteValuesInLoop() {
LoadInst *LI = new LoadInst(PromotedValues[i].first, "", InsertPos);
// If this is a pointer type, update alias info appropriately.
- if (isa<PointerType>(LI->getType()))
+ if (LI->getType()->isPointerTy())
CurAST->copyValue(PointerValueNumbers[PVN++], LI);
// Store into the memory we promoted.
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 240b298..f920dca 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -198,7 +198,7 @@ struct Formula {
}
-/// DoInitialMatch - Recurrsion helper for InitialMatch.
+/// DoInitialMatch - Recursion helper for InitialMatch.
static void DoInitialMatch(const SCEV *S, Loop *L,
SmallVectorImpl<const SCEV *> &Good,
SmallVectorImpl<const SCEV *> &Bad,
@@ -337,14 +337,42 @@ void Formula::dump() const {
print(errs()); errs() << '\n';
}
-/// getSDiv - Return an expression for LHS /s RHS, if it can be determined,
-/// or null otherwise. If IgnoreSignificantBits is true, expressions like
-/// (X * Y) /s Y are simplified to Y, ignoring that the multiplication may
-/// overflow, which is useful when the result will be used in a context where
-/// the most significant bits are ignored.
-static const SCEV *getSDiv(const SCEV *LHS, const SCEV *RHS,
- ScalarEvolution &SE,
- bool IgnoreSignificantBits = false) {
+/// isAddRecSExtable - Return true if the given addrec can be sign-extended
+/// without changing its value.
+static bool isAddRecSExtable(const SCEVAddRecExpr *AR, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(AR->getType()) + 1);
+ return isa<SCEVAddRecExpr>(SE.getSignExtendExpr(AR, WideTy));
+}
+
+/// isAddSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isAddSExtable(const SCEVAddExpr *A, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(A->getType()) + 1);
+ return isa<SCEVAddExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// isMulSExtable - Return true if the given add can be sign-extended
+/// without changing its value.
+static bool isMulSExtable(const SCEVMulExpr *A, ScalarEvolution &SE) {
+ const Type *WideTy =
+ IntegerType::get(SE.getContext(),
+ SE.getTypeSizeInBits(A->getType()) + 1);
+ return isa<SCEVMulExpr>(SE.getSignExtendExpr(A, WideTy));
+}
+
+/// getExactSDiv - Return an expression for LHS /s RHS, if it can be determined
+/// and if the remainder is known to be zero, or null otherwise. If
+/// IgnoreSignificantBits is true, expressions like (X * Y) /s Y are simplified
+/// to Y, ignoring that the multiplication may overflow, which is useful when
+/// the result will be used in a context where the most significant bits are
+/// ignored.
+static const SCEV *getExactSDiv(const SCEV *LHS, const SCEV *RHS,
+ ScalarEvolution &SE,
+ bool IgnoreSignificantBits = false) {
// Handle the trivial case, which works for any SCEV type.
if (LHS == RHS)
return SE.getIntegerSCEV(1, LHS->getType());
@@ -365,39 +393,44 @@ static const SCEV *getSDiv(const SCEV *LHS, const SCEV *RHS,
.sdiv(RC->getValue()->getValue()));
}
- // Distribute the sdiv over addrec operands.
+ // Distribute the sdiv over addrec operands, if the addrec doesn't overflow.
if (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(LHS)) {
- const SCEV *Start = getSDiv(AR->getStart(), RHS, SE,
- IgnoreSignificantBits);
- if (!Start) return 0;
- const SCEV *Step = getSDiv(AR->getStepRecurrence(SE), RHS, SE,
- IgnoreSignificantBits);
- if (!Step) return 0;
- return SE.getAddRecExpr(Start, Step, AR->getLoop());
+ if (IgnoreSignificantBits || isAddRecSExtable(AR, SE)) {
+ const SCEV *Start = getExactSDiv(AR->getStart(), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Start) return 0;
+ const SCEV *Step = getExactSDiv(AR->getStepRecurrence(SE), RHS, SE,
+ IgnoreSignificantBits);
+ if (!Step) return 0;
+ return SE.getAddRecExpr(Start, Step, AR->getLoop());
+ }
}
- // Distribute the sdiv over add operands.
+ // Distribute the sdiv over add operands, if the add doesn't overflow.
if (const SCEVAddExpr *Add = dyn_cast<SCEVAddExpr>(LHS)) {
- SmallVector<const SCEV *, 8> Ops;
- for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
- I != E; ++I) {
- const SCEV *Op = getSDiv(*I, RHS, SE,
- IgnoreSignificantBits);
- if (!Op) return 0;
- Ops.push_back(Op);
+ if (IgnoreSignificantBits || isAddSExtable(Add, SE)) {
+ SmallVector<const SCEV *, 8> Ops;
+ for (SCEVAddExpr::op_iterator I = Add->op_begin(), E = Add->op_end();
+ I != E; ++I) {
+ const SCEV *Op = getExactSDiv(*I, RHS, SE,
+ IgnoreSignificantBits);
+ if (!Op) return 0;
+ Ops.push_back(Op);
+ }
+ return SE.getAddExpr(Ops);
}
- return SE.getAddExpr(Ops);
}
// Check for a multiply operand that we can pull RHS out of.
if (const SCEVMulExpr *Mul = dyn_cast<SCEVMulExpr>(LHS))
- if (IgnoreSignificantBits || Mul->hasNoSignedWrap()) {
+ if (IgnoreSignificantBits || isMulSExtable(Mul, SE)) {
SmallVector<const SCEV *, 4> Ops;
bool Found = false;
for (SCEVMulExpr::op_iterator I = Mul->op_begin(), E = Mul->op_end();
I != E; ++I) {
if (!Found)
- if (const SCEV *Q = getSDiv(*I, RHS, SE, IgnoreSignificantBits)) {
+ if (const SCEV *Q = getExactSDiv(*I, RHS, SE,
+ IgnoreSignificantBits)) {
Ops.push_back(Q);
Found = true;
continue;
@@ -640,9 +673,9 @@ void Cost::RateRegister(const SCEV *Reg,
/// RatePrimaryRegister - Record this register in the set. If we haven't seen it
/// before, rate it.
void Cost::RatePrimaryRegister(const SCEV *Reg,
- SmallPtrSet<const SCEV *, 16> &Regs,
- const Loop *L,
- ScalarEvolution &SE, DominatorTree &DT) {
+ SmallPtrSet<const SCEV *, 16> &Regs,
+ const Loop *L,
+ ScalarEvolution &SE, DominatorTree &DT) {
if (Regs.insert(Reg))
RateRegister(Reg, Regs, L, SE, DT);
}
@@ -879,7 +912,7 @@ public:
MaxOffset(INT64_MIN),
AllFixupsOutsideLoop(true) {}
- bool InsertFormula(size_t LUIdx, const Formula &F);
+ bool InsertFormula(const Formula &F);
void check() const;
@@ -889,7 +922,7 @@ public:
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
-bool LSRUse::InsertFormula(size_t LUIdx, const Formula &F) {
+bool LSRUse::InsertFormula(const Formula &F) {
SmallVector<const SCEV *, 2> Key = F.BaseRegs;
if (F.ScaledReg) Key.push_back(F.ScaledReg);
// Unstable sort by host order ok, because this is only used for uniquifying.
@@ -925,7 +958,7 @@ void LSRUse::print(raw_ostream &OS) const {
case ICmpZero: OS << "ICmpZero"; break;
case Address:
OS << "Address of ";
- if (isa<PointerType>(AccessTy))
+ if (AccessTy->isPointerTy())
OS << "pointer"; // the full pointer type could be really verbose
else
OS << *AccessTy;
@@ -1024,8 +1057,7 @@ static bool isAlwaysFoldable(int64_t BaseOffs,
GlobalValue *BaseGV,
bool HasBaseReg,
LSRUse::KindType Kind, const Type *AccessTy,
- const TargetLowering *TLI,
- ScalarEvolution &SE) {
+ const TargetLowering *TLI) {
// Fast-path: zero is always foldable.
if (BaseOffs == 0 && !BaseGV) return true;
@@ -1153,7 +1185,7 @@ class LSRInstance {
const Type *AccessTy);
public:
- void InsertInitialFormula(const SCEV *S, Loop *L, LSRUse &LU, size_t LUIdx);
+ void InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void InsertSupplementalFormula(const SCEV *S, LSRUse &LU, size_t LUIdx);
void CountRegisters(const Formula &F, size_t LUIdx);
bool InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F);
@@ -1184,16 +1216,18 @@ public:
Value *Expand(const LSRFixup &LF,
const Formula &F,
- BasicBlock::iterator IP, Loop *L, Instruction *IVIncInsertPos,
+ BasicBlock::iterator IP,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution &SE, DominatorTree &DT) const;
+ SmallVectorImpl<WeakVH> &DeadInsts) const;
+ void RewriteForPHI(PHINode *PN, const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const;
void Rewrite(const LSRFixup &LF,
const Formula &F,
- Loop *L, Instruction *IVIncInsertPos,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution &SE, DominatorTree &DT,
Pass *P) const;
void ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
Pass *P);
@@ -1212,7 +1246,7 @@ public:
}
/// OptimizeShadowIV - If IV is used in a int-to-float cast
-/// inside the loop then try to eliminate the cast opeation.
+/// inside the loop then try to eliminate the cast operation.
void LSRInstance::OptimizeShadowIV() {
const SCEV *BackedgeTakenCount = SE.getBackedgeTakenCount(L);
if (isa<SCEVCouldNotCompute>(BackedgeTakenCount))
@@ -1522,7 +1556,7 @@ LSRInstance::OptimizeLoopTermCond() {
A = SE.getSignExtendExpr(A, B->getType());
}
if (const SCEVConstant *D =
- dyn_cast_or_null<SCEVConstant>(getSDiv(B, A, SE))) {
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(B, A, SE))) {
// Stride of one or negative one can have reuse with non-addresses.
if (D->getValue()->isOne() ||
D->getValue()->isAllOnesValue())
@@ -1615,12 +1649,12 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
// Conservatively assume HasBaseReg is true for now.
if (NewOffset < LU.MinOffset) {
if (!isAlwaysFoldable(LU.MaxOffset - NewOffset, 0, /*HasBaseReg=*/true,
- Kind, AccessTy, TLI, SE))
+ Kind, AccessTy, TLI))
return false;
NewMinOffset = NewOffset;
} else if (NewOffset > LU.MaxOffset) {
if (!isAlwaysFoldable(NewOffset - LU.MinOffset, 0, /*HasBaseReg=*/true,
- Kind, AccessTy, TLI, SE))
+ Kind, AccessTy, TLI))
return false;
NewMaxOffset = NewOffset;
}
@@ -1639,7 +1673,7 @@ LSRInstance::reconcileNewOffset(LSRUse &LU, int64_t NewOffset,
/// getUse - Return an LSRUse index and an offset value for a fixup which
/// needs the given expression, with the given kind and optional access type.
-/// Either reuse an exisitng use or create a new one, as needed.
+/// Either reuse an existing use or create a new one, as needed.
std::pair<size_t, int64_t>
LSRInstance::getUse(const SCEV *&Expr,
LSRUse::KindType Kind, const Type *AccessTy) {
@@ -1647,8 +1681,7 @@ LSRInstance::getUse(const SCEV *&Expr,
int64_t Offset = ExtractImmediate(Expr, SE);
// Basic uses can't accept any offset, for example.
- if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true,
- Kind, AccessTy, TLI, SE)) {
+ if (!isAlwaysFoldable(Offset, 0, /*HasBaseReg=*/true, Kind, AccessTy, TLI)) {
Expr = Copy;
Offset = 0;
}
@@ -1683,21 +1716,29 @@ LSRInstance::getUse(const SCEV *&Expr,
void LSRInstance::CollectInterestingTypesAndFactors() {
SmallSetVector<const SCEV *, 4> Strides;
- // Collect interesting types and factors.
+ // Collect interesting types and strides.
for (IVUsers::const_iterator UI = IU.begin(), E = IU.end(); UI != E; ++UI) {
const SCEV *Stride = UI->getStride();
// Collect interesting types.
Types.insert(SE.getEffectiveSCEVType(Stride->getType()));
- // Collect interesting factors.
+ // Add the stride for this loop.
+ Strides.insert(Stride);
+
+ // Add strides for other mentioned loops.
+ for (const SCEVAddRecExpr *AR = dyn_cast<SCEVAddRecExpr>(UI->getOffset());
+ AR; AR = dyn_cast<SCEVAddRecExpr>(AR->getStart()))
+ Strides.insert(AR->getStepRecurrence(SE));
+ }
+
+ // Compute interesting factors from the set of interesting strides.
+ for (SmallSetVector<const SCEV *, 4>::const_iterator
+ I = Strides.begin(), E = Strides.end(); I != E; ++I)
for (SmallSetVector<const SCEV *, 4>::const_iterator NewStrideIter =
- Strides.begin(), SEnd = Strides.end(); NewStrideIter != SEnd;
- ++NewStrideIter) {
- const SCEV *OldStride = Stride;
+ next(I); NewStrideIter != E; ++NewStrideIter) {
+ const SCEV *OldStride = *I;
const SCEV *NewStride = *NewStrideIter;
- if (OldStride == NewStride)
- continue;
if (SE.getTypeSizeInBits(OldStride->getType()) !=
SE.getTypeSizeInBits(NewStride->getType())) {
@@ -1708,19 +1749,18 @@ void LSRInstance::CollectInterestingTypesAndFactors() {
OldStride = SE.getSignExtendExpr(OldStride, NewStride->getType());
}
if (const SCEVConstant *Factor =
- dyn_cast_or_null<SCEVConstant>(getSDiv(NewStride, OldStride,
- SE, true))) {
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(NewStride, OldStride,
+ SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
} else if (const SCEVConstant *Factor =
- dyn_cast_or_null<SCEVConstant>(getSDiv(OldStride, NewStride,
- SE, true))) {
+ dyn_cast_or_null<SCEVConstant>(getExactSDiv(OldStride,
+ NewStride,
+ SE, true))) {
if (Factor->getValue()->getValue().getMinSignedBits() <= 64)
Factors.insert(Factor->getValue()->getValue().getSExtValue());
}
}
- Strides.insert(Stride);
- }
// If all uses use the same type, don't bother looking for truncation-based
// reuse.
@@ -1788,7 +1828,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
// If this is the first use of this LSRUse, give it a formula.
if (LU.Formulae.empty()) {
- InsertInitialFormula(S, L, LU, LF.LUIdx);
+ InsertInitialFormula(S, LU, LF.LUIdx);
CountRegisters(LU.Formulae.back(), LF.LUIdx);
}
}
@@ -1797,8 +1837,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() {
}
void
-LSRInstance::InsertInitialFormula(const SCEV *S, Loop *L,
- LSRUse &LU, size_t LUIdx) {
+LSRInstance::InsertInitialFormula(const SCEV *S, LSRUse &LU, size_t LUIdx) {
Formula F;
F.InitialMatch(S, L, SE, DT);
bool Inserted = InsertFormula(LU, LUIdx, F);
@@ -1828,7 +1867,7 @@ void LSRInstance::CountRegisters(const Formula &F, size_t LUIdx) {
/// InsertFormula - If the given formula has not yet been inserted, add it to
/// the list, and return true. Return false otherwise.
bool LSRInstance::InsertFormula(LSRUse &LU, unsigned LUIdx, const Formula &F) {
- if (!LU.InsertFormula(LUIdx, F))
+ if (!LU.InsertFormula(F))
return false;
CountRegisters(F, LUIdx);
@@ -1996,7 +2035,7 @@ void LSRInstance::GenerateReassociations(LSRUse &LU, unsigned LUIdx,
/// loop-dominating registers added into a single register.
void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
Formula Base) {
- // This method is only intersting on a plurality of registers.
+ // This method is only interesting on a plurality of registers.
if (Base.BaseRegs.size() <= 1) return;
Formula F = Base;
@@ -2015,7 +2054,7 @@ void LSRInstance::GenerateCombinations(LSRUse &LU, unsigned LUIdx,
const SCEV *Sum = SE.getAddExpr(Ops);
// TODO: If Sum is zero, it probably means ScalarEvolution missed an
// opportunity to fold something. For now, just ignore such cases
- // rather than procede with zero in a register.
+ // rather than proceed with zero in a register.
if (!Sum->isZero()) {
F.BaseRegs.push_back(Sum);
(void)InsertFormula(LU, LUIdx, F);
@@ -2105,14 +2144,18 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
Formula F = Base;
// Check that the multiplication doesn't overflow.
+ if (F.AM.BaseOffs == INT64_MIN && Factor == -1)
+ continue;
F.AM.BaseOffs = (uint64_t)Base.AM.BaseOffs * Factor;
- if ((int64_t)F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
+ if (F.AM.BaseOffs / Factor != Base.AM.BaseOffs)
continue;
// Check that multiplying with the use offset doesn't overflow.
int64_t Offset = LU.MinOffset;
+ if (Offset == INT64_MIN && Factor == -1)
+ continue;
Offset = (uint64_t)Offset * Factor;
- if ((int64_t)Offset / Factor != LU.MinOffset)
+ if (Offset / Factor != LU.MinOffset)
continue;
// Check that this scale is legal.
@@ -2127,14 +2170,14 @@ void LSRInstance::GenerateICmpZeroScales(LSRUse &LU, unsigned LUIdx,
// Check that multiplying with each base register doesn't overflow.
for (size_t i = 0, e = F.BaseRegs.size(); i != e; ++i) {
F.BaseRegs[i] = SE.getMulExpr(F.BaseRegs[i], FactorS);
- if (getSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
+ if (getExactSDiv(F.BaseRegs[i], FactorS, SE) != Base.BaseRegs[i])
goto next;
}
// Check that multiplying with the scaled register doesn't overflow.
if (F.ScaledReg) {
F.ScaledReg = SE.getMulExpr(F.ScaledReg, FactorS);
- if (getSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
+ if (getExactSDiv(F.ScaledReg, FactorS, SE) != Base.ScaledReg)
continue;
}
@@ -2189,7 +2232,7 @@ void LSRInstance::GenerateScales(LSRUse &LU, unsigned LUIdx,
continue;
// Divide out the factor, ignoring high bits, since we'll be
// scaling the value back up in the end.
- if (const SCEV *Quotient = getSDiv(AR, FactorS, SE, true)) {
+ if (const SCEV *Quotient = getExactSDiv(AR, FactorS, SE, true)) {
// TODO: This could be optimized to avoid all the copying.
Formula F = Base;
F.ScaledReg = Quotient;
@@ -2358,7 +2401,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() {
const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm));
unsigned BitWidth = SE.getTypeSizeInBits(IntTy);
- // TODO: Use a more targetted data structure.
+ // TODO: Use a more targeted data structure.
for (size_t L = 0, LE = LU.Formulae.size(); L != LE; ++L) {
Formula F = LU.Formulae[L];
// Use the immediate in the scaled register.
@@ -2526,9 +2569,9 @@ void LSRInstance::FilterOutUndesirableDedicatedRegisters() {
});
}
-/// NarrowSearchSpaceUsingHeuristics - If there are an extrordinary number of
+/// NarrowSearchSpaceUsingHeuristics - If there are an extraordinary number of
/// formulae to choose from, use some rough heuristics to prune down the number
-/// of formulae. This keeps the main solver from taking an extrordinary amount
+/// of formulae. This keeps the main solver from taking an extraordinary amount
/// of time in some worst-case scenarios.
void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
// This is a rough guess that seems to work fairly well.
@@ -2578,7 +2621,7 @@ void LSRInstance::NarrowSearchSpaceUsingHeuristics() {
}
DEBUG(dbgs() << "Narrowing the search space by assuming " << *Best
- << " will yeild profitable reuse.\n");
+ << " will yield profitable reuse.\n");
Taken.insert(Best);
// In any use with formulae which references this register, delete formulae
@@ -2625,7 +2668,7 @@ void LSRInstance::SolveRecurse(SmallVectorImpl<const Formula *> &Solution,
// - sort the formula so that the most profitable solutions are found first
// - sort the uses too
// - search faster:
- // - dont compute a cost, and then compare. compare while computing a cost
+ // - don't compute a cost, and then compare. compare while computing a cost
// and bail early.
// - track register sets with SmallBitVector
@@ -2736,10 +2779,8 @@ static BasicBlock *getImmediateDominator(BasicBlock *BB, DominatorTree &DT) {
Value *LSRInstance::Expand(const LSRFixup &LF,
const Formula &F,
BasicBlock::iterator IP,
- Loop *L, Instruction *IVIncInsertPos,
SCEVExpander &Rewriter,
- SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution &SE, DominatorTree &DT) const {
+ SmallVectorImpl<WeakVH> &DeadInsts) const {
const LSRUse &LU = Uses[LF.LUIdx];
// Then, collect some instructions which we will remain dominated by when
@@ -2752,8 +2793,12 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (Instruction *I =
dyn_cast<Instruction>(cast<ICmpInst>(LF.UserInst)->getOperand(1)))
Inputs.push_back(I);
- if (LF.PostIncLoop && !L->contains(LF.UserInst))
- Inputs.push_back(L->getLoopLatch()->getTerminator());
+ if (LF.PostIncLoop) {
+ if (!L->contains(LF.UserInst))
+ Inputs.push_back(L->getLoopLatch()->getTerminator());
+ else
+ Inputs.push_back(IVIncInsertPos);
+ }
// Then, climb up the immediate dominator tree as far as we can go while
// still being dominated by the input positions.
@@ -2816,8 +2861,10 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
if (AR->getLoop() == LF.PostIncLoop) {
Reg = SE.getAddExpr(Reg, AR->getStepRecurrence(SE));
// If the user is inside the loop, insert the code after the increment
- // so that it is dominated by its operand.
- if (L->contains(LF.UserInst))
+ // so that it is dominated by its operand. If the original insert point
+ // was already dominated by the increment, keep it, because there may
+ // be loop-variant operands that need to be respected also.
+ if (L->contains(LF.UserInst) && !DT.dominates(IVIncInsertPos, IP))
IP = IVIncInsertPos;
break;
}
@@ -2827,6 +2874,13 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
Ops.push_back(SE.getUnknown(Rewriter.expandCodeFor(Reg, 0, IP)));
}
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ if (!Ops.empty()) {
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+
// Expand the ScaledReg portion.
Value *ICmpScaledV = 0;
if (F.AM.Scale != 0) {
@@ -2853,12 +2907,25 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
SE.getIntegerSCEV(F.AM.Scale,
ScaledS->getType()));
Ops.push_back(ScaledS);
+
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
}
}
- // Expand the immediate portions.
- if (F.AM.BaseGV)
- Ops.push_back(SE.getSCEV(F.AM.BaseGV));
+ // Expand the GV portion.
+ if (F.AM.BaseGV) {
+ Ops.push_back(SE.getUnknown(F.AM.BaseGV));
+
+ // Flush the operand list to suppress SCEVExpander hoisting.
+ Value *FullV = Rewriter.expandCodeFor(SE.getAddExpr(Ops), Ty, IP);
+ Ops.clear();
+ Ops.push_back(SE.getUnknown(FullV));
+ }
+
+ // Expand the immediate portion.
int64_t Offset = (uint64_t)F.AM.BaseOffs + LF.Offset;
if (Offset != 0) {
if (LU.Kind == LSRUse::ICmpZero) {
@@ -2873,7 +2940,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
} else {
// Just add the immediate values. These again are expected to be matched
// as part of the address.
- Ops.push_back(SE.getIntegerSCEV(Offset, IntTy));
+ Ops.push_back(SE.getUnknown(ConstantInt::getSigned(IntTy, Offset)));
}
}
@@ -2921,73 +2988,81 @@ Value *LSRInstance::Expand(const LSRFixup &LF,
return FullV;
}
+/// RewriteForPHI - Helper for Rewrite. PHI nodes are special because the use
+/// of their operands effectively happens in their predecessor blocks, so the
+/// expression may need to be expanded in multiple places.
+void LSRInstance::RewriteForPHI(PHINode *PN,
+ const LSRFixup &LF,
+ const Formula &F,
+ SCEVExpander &Rewriter,
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ Pass *P) const {
+ DenseMap<BasicBlock *, Value *> Inserted;
+ for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
+ if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
+ BasicBlock *BB = PN->getIncomingBlock(i);
+
+ // If this is a critical edge, split the edge so that we do not insert
+ // the code on all predecessor/successor paths. We do this unless this
+ // is the canonical backedge for this loop, which complicates post-inc
+ // users.
+ if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
+ !isa<IndirectBrInst>(BB->getTerminator()) &&
+ (PN->getParent() != L->getHeader() || !L->contains(BB))) {
+ // Split the critical edge.
+ BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
+
+ // If PN is outside of the loop and BB is in the loop, we want to
+ // move the block to be immediately before the PHI block, not
+ // immediately after BB.
+ if (L->contains(BB) && !L->contains(PN))
+ NewBB->moveBefore(PN->getParent());
+
+ // Splitting the edge can reduce the number of PHI entries we have.
+ e = PN->getNumIncomingValues();
+ BB = NewBB;
+ i = PN->getBasicBlockIndex(BB);
+ }
+
+ std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
+ Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
+ if (!Pair.second)
+ PN->setIncomingValue(i, Pair.first->second);
+ else {
+ Value *FullV = Expand(LF, F, BB->getTerminator(), Rewriter, DeadInsts);
+
+ // If this is reuse-by-noop-cast, insert the noop cast.
+ const Type *OpTy = LF.OperandValToReplace->getType();
+ if (FullV->getType() != OpTy)
+ FullV =
+ CastInst::Create(CastInst::getCastOpcode(FullV, false,
+ OpTy, false),
+ FullV, LF.OperandValToReplace->getType(),
+ "tmp", BB->getTerminator());
+
+ PN->setIncomingValue(i, FullV);
+ Pair.first->second = FullV;
+ }
+ }
+}
+
/// Rewrite - Emit instructions for the leading candidate expression for this
/// LSRUse (this is called "expanding"), and update the UserInst to reference
/// the newly expanded value.
void LSRInstance::Rewrite(const LSRFixup &LF,
const Formula &F,
- Loop *L, Instruction *IVIncInsertPos,
SCEVExpander &Rewriter,
SmallVectorImpl<WeakVH> &DeadInsts,
- ScalarEvolution &SE, DominatorTree &DT,
Pass *P) const {
- const Type *OpTy = LF.OperandValToReplace->getType();
-
// First, find an insertion point that dominates UserInst. For PHI nodes,
// find the nearest block which dominates all the relevant uses.
if (PHINode *PN = dyn_cast<PHINode>(LF.UserInst)) {
- DenseMap<BasicBlock *, Value *> Inserted;
- for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i)
- if (PN->getIncomingValue(i) == LF.OperandValToReplace) {
- BasicBlock *BB = PN->getIncomingBlock(i);
-
- // If this is a critical edge, split the edge so that we do not insert
- // the code on all predecessor/successor paths. We do this unless this
- // is the canonical backedge for this loop, which complicates post-inc
- // users.
- if (e != 1 && BB->getTerminator()->getNumSuccessors() > 1 &&
- !isa<IndirectBrInst>(BB->getTerminator()) &&
- (PN->getParent() != L->getHeader() || !L->contains(BB))) {
- // Split the critical edge.
- BasicBlock *NewBB = SplitCriticalEdge(BB, PN->getParent(), P);
-
- // If PN is outside of the loop and BB is in the loop, we want to
- // move the block to be immediately before the PHI block, not
- // immediately after BB.
- if (L->contains(BB) && !L->contains(PN))
- NewBB->moveBefore(PN->getParent());
-
- // Splitting the edge can reduce the number of PHI entries we have.
- e = PN->getNumIncomingValues();
- BB = NewBB;
- i = PN->getBasicBlockIndex(BB);
- }
-
- std::pair<DenseMap<BasicBlock *, Value *>::iterator, bool> Pair =
- Inserted.insert(std::make_pair(BB, static_cast<Value *>(0)));
- if (!Pair.second)
- PN->setIncomingValue(i, Pair.first->second);
- else {
- Value *FullV = Expand(LF, F, BB->getTerminator(), L, IVIncInsertPos,
- Rewriter, DeadInsts, SE, DT);
-
- // If this is reuse-by-noop-cast, insert the noop cast.
- if (FullV->getType() != OpTy)
- FullV =
- CastInst::Create(CastInst::getCastOpcode(FullV, false,
- OpTy, false),
- FullV, LF.OperandValToReplace->getType(),
- "tmp", BB->getTerminator());
-
- PN->setIncomingValue(i, FullV);
- Pair.first->second = FullV;
- }
- }
+ RewriteForPHI(PN, LF, F, Rewriter, DeadInsts, P);
} else {
- Value *FullV = Expand(LF, F, LF.UserInst, L, IVIncInsertPos,
- Rewriter, DeadInsts, SE, DT);
+ Value *FullV = Expand(LF, F, LF.UserInst, Rewriter, DeadInsts);
// If this is reuse-by-noop-cast, insert the noop cast.
+ const Type *OpTy = LF.OperandValToReplace->getType();
if (FullV->getType() != OpTy) {
Instruction *Cast =
CastInst::Create(CastInst::getCastOpcode(FullV, false, OpTy, false),
@@ -3024,8 +3099,7 @@ LSRInstance::ImplementSolution(const SmallVectorImpl<const Formula *> &Solution,
for (size_t i = 0, e = Fixups.size(); i != e; ++i) {
size_t LUIdx = Fixups[i].LUIdx;
- Rewrite(Fixups[i], *Solution[LUIdx], L, IVIncInsertPos, Rewriter,
- DeadInsts, SE, DT, P);
+ Rewrite(Fixups[i], *Solution[LUIdx], Rewriter, DeadInsts, P);
Changed = true;
}
@@ -3054,7 +3128,7 @@ LSRInstance::LSRInstance(const TargetLowering *tli, Loop *l, Pass *P)
dbgs() << ":\n");
/// OptimizeShadowIV - If IV is used in a int-to-float cast
- /// inside the loop then try to eliminate the cast opeation.
+ /// inside the loop then try to eliminate the cast operation.
OptimizeShadowIV();
// Change loop terminating condition to use the postinc iv when possible.
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 990e0c4..071e9b7 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -170,7 +170,7 @@ Pass *llvm::createLoopUnswitchPass(bool Os) {
/// Otherwise, return null.
static Value *FindLIVLoopCondition(Value *Cond, Loop *L, bool &Changed) {
// We can never unswitch on vector conditions.
- if (isa<VectorType>(Cond->getType()))
+ if (Cond->getType()->isVectorTy())
return 0;
// Constants should be folded, not unswitched on!
diff --git a/lib/Transforms/Scalar/Reassociate.cpp b/lib/Transforms/Scalar/Reassociate.cpp
index 187216a..12827b6 100644
--- a/lib/Transforms/Scalar/Reassociate.cpp
+++ b/lib/Transforms/Scalar/Reassociate.cpp
@@ -930,7 +930,7 @@ void Reassociate::ReassociateBB(BasicBlock *BB) {
// Reject cases where it is pointless to do this.
if (!isa<BinaryOperator>(BI) || BI->getType()->isFloatingPointTy() ||
- isa<VectorType>(BI->getType()))
+ BI->getType()->isVectorTy())
continue; // Floating point ops are not associative.
// Do not reassociate boolean (i1) expressions. We want to preserve the
diff --git a/lib/Transforms/Scalar/SCCP.cpp b/lib/Transforms/Scalar/SCCP.cpp
index 02b45a1..7e37938 100644
--- a/lib/Transforms/Scalar/SCCP.cpp
+++ b/lib/Transforms/Scalar/SCCP.cpp
@@ -295,7 +295,7 @@ public:
}
void markOverdefined(Value *V) {
- assert(!isa<StructType>(V->getType()) && "Should use other method");
+ assert(!V->getType()->isStructTy() && "Should use other method");
markOverdefined(ValueState[V], V);
}
@@ -321,12 +321,12 @@ private:
}
void markConstant(Value *V, Constant *C) {
- assert(!isa<StructType>(V->getType()) && "Should use other method");
+ assert(!V->getType()->isStructTy() && "Should use other method");
markConstant(ValueState[V], V, C);
}
void markForcedConstant(Value *V, Constant *C) {
- assert(!isa<StructType>(V->getType()) && "Should use other method");
+ assert(!V->getType()->isStructTy() && "Should use other method");
ValueState[V].markForcedConstant(C);
DEBUG(dbgs() << "markForcedConstant: " << *C << ": " << *V << '\n');
InstWorkList.push_back(V);
@@ -360,7 +360,7 @@ private:
}
void mergeInValue(Value *V, LatticeVal MergeWithV) {
- assert(!isa<StructType>(V->getType()) && "Should use other method");
+ assert(!V->getType()->isStructTy() && "Should use other method");
mergeInValue(ValueState[V], V, MergeWithV);
}
@@ -369,7 +369,7 @@ private:
/// value. This function handles the case when the value hasn't been seen yet
/// by properly seeding constants etc.
LatticeVal &getValueState(Value *V) {
- assert(!isa<StructType>(V->getType()) && "Should use getStructValueState");
+ assert(!V->getType()->isStructTy() && "Should use getStructValueState");
std::pair<DenseMap<Value*, LatticeVal>::iterator, bool> I =
ValueState.insert(std::make_pair(V, LatticeVal()));
@@ -392,7 +392,7 @@ private:
/// value/field pair. This function handles the case when the value hasn't
/// been seen yet by properly seeding constants etc.
LatticeVal &getStructValueState(Value *V, unsigned i) {
- assert(isa<StructType>(V->getType()) && "Should use getValueState");
+ assert(V->getType()->isStructTy() && "Should use getValueState");
assert(i < cast<StructType>(V->getType())->getNumElements() &&
"Invalid element #");
@@ -666,7 +666,7 @@ bool SCCPSolver::isEdgeFeasible(BasicBlock *From, BasicBlock *To) {
void SCCPSolver::visitPHINode(PHINode &PN) {
// If this PN returns a struct, just mark the result overdefined.
// TODO: We could do a lot better than this if code actually uses this.
- if (isa<StructType>(PN.getType()))
+ if (PN.getType()->isStructTy())
return markAnythingOverdefined(&PN);
if (getValueState(&PN).isOverdefined()) {
@@ -742,7 +742,7 @@ void SCCPSolver::visitReturnInst(ReturnInst &I) {
Value *ResultOp = I.getOperand(0);
// If we are tracking the return value of this function, merge it in.
- if (!TrackedRetVals.empty() && !isa<StructType>(ResultOp->getType())) {
+ if (!TrackedRetVals.empty() && !ResultOp->getType()->isStructTy()) {
DenseMap<Function*, LatticeVal>::iterator TFRVI =
TrackedRetVals.find(F);
if (TFRVI != TrackedRetVals.end()) {
@@ -787,7 +787,7 @@ void SCCPSolver::visitCastInst(CastInst &I) {
void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
// If this returns a struct, mark all elements over defined, we don't track
// structs in structs.
- if (isa<StructType>(EVI.getType()))
+ if (EVI.getType()->isStructTy())
return markAnythingOverdefined(&EVI);
// If this is extracting from more than one level of struct, we don't know.
@@ -795,7 +795,7 @@ void SCCPSolver::visitExtractValueInst(ExtractValueInst &EVI) {
return markOverdefined(&EVI);
Value *AggVal = EVI.getAggregateOperand();
- if (isa<StructType>(AggVal->getType())) {
+ if (AggVal->getType()->isStructTy()) {
unsigned i = *EVI.idx_begin();
LatticeVal EltVal = getStructValueState(AggVal, i);
mergeInValue(getValueState(&EVI), &EVI, EltVal);
@@ -828,7 +828,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
}
Value *Val = IVI.getInsertedValueOperand();
- if (isa<StructType>(Val->getType()))
+ if (Val->getType()->isStructTy())
// We don't track structs in structs.
markOverdefined(getStructValueState(&IVI, i), &IVI);
else {
@@ -841,7 +841,7 @@ void SCCPSolver::visitInsertValueInst(InsertValueInst &IVI) {
void SCCPSolver::visitSelectInst(SelectInst &I) {
// If this select returns a struct, just mark the result overdefined.
// TODO: We could do a lot better than this if code actually uses this.
- if (isa<StructType>(I.getType()))
+ if (I.getType()->isStructTy())
return markAnythingOverdefined(&I);
LatticeVal CondValue = getValueState(I.getCondition());
@@ -1166,7 +1166,7 @@ void SCCPSolver::visitGetElementPtrInst(GetElementPtrInst &I) {
void SCCPSolver::visitStoreInst(StoreInst &SI) {
// If this store is of a struct, ignore it.
- if (isa<StructType>(SI.getOperand(0)->getType()))
+ if (SI.getOperand(0)->getType()->isStructTy())
return;
if (TrackedGlobals.empty() || !isa<GlobalVariable>(SI.getOperand(1)))
@@ -1187,7 +1187,7 @@ void SCCPSolver::visitStoreInst(StoreInst &SI) {
// global, we can replace the load with the loaded constant value!
void SCCPSolver::visitLoadInst(LoadInst &I) {
// If this load is of a struct, just mark the result overdefined.
- if (isa<StructType>(I.getType()))
+ if (I.getType()->isStructTy())
return markAnythingOverdefined(&I);
LatticeVal PtrVal = getValueState(I.getOperand(0));
@@ -1241,7 +1241,7 @@ CallOverdefined:
// Otherwise, if we have a single return value case, and if the function is
// a declaration, maybe we can constant fold it.
- if (F && F->isDeclaration() && !isa<StructType>(I->getType()) &&
+ if (F && F->isDeclaration() && !I->getType()->isStructTy() &&
canConstantFoldCallTo(F)) {
SmallVector<Constant*, 8> Operands;
@@ -1352,7 +1352,7 @@ void SCCPSolver::Solve() {
// since all of its users will have already been marked as overdefined.
// Update all of the users of this instruction's value.
//
- if (isa<StructType>(I->getType()) || !getValueState(I).isOverdefined())
+ if (I->getType()->isStructTy() || !getValueState(I).isOverdefined())
for (Value::use_iterator UI = I->use_begin(), E = I->use_end();
UI != E; ++UI)
if (Instruction *I = dyn_cast<Instruction>(*UI))
@@ -1418,7 +1418,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
if (!LV.isUndefined()) continue;
// No instructions using structs need disambiguation.
- if (isa<StructType>(I->getOperand(0)->getType()))
+ if (I->getOperand(0)->getType()->isStructTy())
continue;
// Get the lattice values of the first two operands for use below.
@@ -1426,7 +1426,7 @@ bool SCCPSolver::ResolvedUndefsIn(Function &F) {
LatticeVal Op1LV;
if (I->getNumOperands() == 2) {
// No instructions using structs need disambiguation.
- if (isa<StructType>(I->getOperand(1)->getType()))
+ if (I->getOperand(1)->getType()->isStructTy())
continue;
// If this is a two-operand instruction, and if both operands are
@@ -1656,7 +1656,7 @@ bool SCCP::runOnFunction(Function &F) {
continue;
// TODO: Reconstruct structs from their elements.
- if (isa<StructType>(Inst->getType()))
+ if (Inst->getType()->isStructTy())
continue;
LatticeVal IV = Solver.getLatticeValueFor(Inst);
@@ -1792,7 +1792,7 @@ bool IPSCCP::runOnModule(Module &M) {
if (Solver.isBlockExecutable(F->begin())) {
for (Function::arg_iterator AI = F->arg_begin(), E = F->arg_end();
AI != E; ++AI) {
- if (AI->use_empty() || isa<StructType>(AI->getType())) continue;
+ if (AI->use_empty() || AI->getType()->isStructTy()) continue;
// TODO: Could use getStructLatticeValueFor to find out if the entire
// result is a constant and replace it entirely if so.
@@ -1835,7 +1835,7 @@ bool IPSCCP::runOnModule(Module &M) {
for (BasicBlock::iterator BI = BB->begin(), E = BB->end(); BI != E; ) {
Instruction *Inst = BI++;
- if (Inst->getType()->isVoidTy() || isa<StructType>(Inst->getType()))
+ if (Inst->getType()->isVoidTy() || Inst->getType()->isStructTy())
continue;
// TODO: Could use getStructLatticeValueFor to find out if the entire
@@ -1918,6 +1918,14 @@ bool IPSCCP::runOnModule(Module &M) {
// all call uses with the inferred value. This means we don't need to bother
// actually returning anything from the function. Replace all return
// instructions with return undef.
+ //
+ // Do this in two stages: first identify the functions we should process, then
+ // actually zap their returns. This is important because we can only do this
+ // if the address of the function isn't taken. In cases where a return is the
+ // last use of a function, the order of processing functions would affect
+ // whether other functions are optimizable.
+ SmallVector<ReturnInst*, 8> ReturnsToZap;
+
// TODO: Process multiple value ret instructions also.
const DenseMap<Function*, LatticeVal> &RV = Solver.getTrackedRetVals();
for (DenseMap<Function*, LatticeVal>::const_iterator I = RV.begin(),
@@ -1933,7 +1941,13 @@ bool IPSCCP::runOnModule(Module &M) {
for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
if (ReturnInst *RI = dyn_cast<ReturnInst>(BB->getTerminator()))
if (!isa<UndefValue>(RI->getOperand(0)))
- RI->setOperand(0, UndefValue::get(F->getReturnType()));
+ ReturnsToZap.push_back(RI);
+ }
+
+ // Zap all returns which we've identified as zap to change.
+ for (unsigned i = 0, e = ReturnsToZap.size(); i != e; ++i) {
+ Function *F = ReturnsToZap[i]->getParent()->getParent();
+ ReturnsToZap[i]->setOperand(0, UndefValue::get(F->getReturnType()));
}
// If we infered constant or undef values for globals variables, we can delete
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index 822712e..bbe6270 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -302,7 +302,7 @@ bool SROA::performScalarRepl(Function &F) {
// random stuff that doesn't use vectors (e.g. <9 x double>) because then
// we just get a lot of insert/extracts. If at least one vector is
// involved, then we probably really do have a union of vector/array.
- if (VectorTy && isa<VectorType>(VectorTy) && HadAVector) {
+ if (VectorTy && VectorTy->isVectorTy() && HadAVector) {
DEBUG(dbgs() << "CONVERT TO VECTOR: " << *AI << "\n TYPE = "
<< *VectorTy << '\n');
@@ -449,7 +449,7 @@ void SROA::isSafeGEP(GetElementPtrInst *GEPI, AllocaInst *AI,
// into.
for (; GEPIt != E; ++GEPIt) {
// Ignore struct elements, no extra checking needed for these.
- if (isa<StructType>(*GEPIt))
+ if ((*GEPIt)->isStructTy())
continue;
ConstantInt *IdxVal = dyn_cast<ConstantInt>(GEPIt.getOperand());
@@ -480,7 +480,7 @@ void SROA::isSafeMemAccess(AllocaInst *AI, uint64_t Offset, uint64_t MemSize,
// (which are essentially the same as the MemIntrinsics, especially with
// regard to copying padding between elements), or references using the
// aggregate type of the alloca.
- if (!MemOpType || isa<IntegerType>(MemOpType) || UsesAggregateType) {
+ if (!MemOpType || MemOpType->isIntegerTy() || UsesAggregateType) {
if (!UsesAggregateType) {
if (isStore)
Info.isMemCpyDst = true;
@@ -565,7 +565,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
}
LI->replaceAllUsesWith(Insert);
DeadInsts.push_back(LI);
- } else if (isa<IntegerType>(LIType) &&
+ } else if (LIType->isIntegerTy() &&
TD->getTypeAllocSize(LIType) ==
TD->getTypeAllocSize(AI->getAllocatedType())) {
// If this is a load of the entire alloca to an integer, rewrite it.
@@ -588,7 +588,7 @@ void SROA::RewriteForScalarRepl(Instruction *I, AllocaInst *AI, uint64_t Offset,
new StoreInst(Extract, NewElts[i], SI);
}
DeadInsts.push_back(SI);
- } else if (isa<IntegerType>(SIType) &&
+ } else if (SIType->isIntegerTy() &&
TD->getTypeAllocSize(SIType) ==
TD->getTypeAllocSize(AI->getAllocatedType())) {
// If this is a store of the entire alloca from an integer, rewrite it.
@@ -833,7 +833,7 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *Inst,
// Convert the integer value to the appropriate type.
StoreVal = ConstantInt::get(Context, TotalVal);
- if (isa<PointerType>(ValTy))
+ if (ValTy->isPointerTy())
StoreVal = ConstantExpr::getIntToPtr(StoreVal, ValTy);
else if (ValTy->isFloatingPointTy())
StoreVal = ConstantExpr::getBitCast(StoreVal, ValTy);
@@ -939,7 +939,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
Value *DestField = NewElts[i];
if (EltVal->getType() == FieldTy) {
// Storing to an integer field of this size, just do it.
- } else if (FieldTy->isFloatingPointTy() || isa<VectorType>(FieldTy)) {
+ } else if (FieldTy->isFloatingPointTy() || FieldTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
EltVal = new BitCastInst(EltVal, FieldTy, "", SI);
} else {
@@ -984,7 +984,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
if (EltVal->getType() == ArrayEltTy) {
// Storing to an integer field of this size, just do it.
} else if (ArrayEltTy->isFloatingPointTy() ||
- isa<VectorType>(ArrayEltTy)) {
+ ArrayEltTy->isVectorTy()) {
// Bitcast to the right element type (for fp/vector values).
EltVal = new BitCastInst(EltVal, ArrayEltTy, "", SI);
} else {
@@ -1044,8 +1044,8 @@ void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
const IntegerType *FieldIntTy = IntegerType::get(LI->getContext(),
FieldSizeBits);
- if (!isa<IntegerType>(FieldTy) && !FieldTy->isFloatingPointTy() &&
- !isa<VectorType>(FieldTy))
+ if (!FieldTy->isIntegerTy() && !FieldTy->isFloatingPointTy() &&
+ !FieldTy->isVectorTy())
SrcField = new BitCastInst(SrcField,
PointerType::getUnqual(FieldIntTy),
"", LI);
@@ -1183,7 +1183,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
return;
}
} else if (In->isFloatTy() || In->isDoubleTy() ||
- (isa<IntegerType>(In) && In->getPrimitiveSizeInBits() >= 8 &&
+ (In->isIntegerTy() && In->getPrimitiveSizeInBits() >= 8 &&
isPowerOf2_32(In->getPrimitiveSizeInBits()))) {
// If we're accessing something that could be an element of a vector, see
// if the implied vector agrees with what we already have and if Offset is
@@ -1227,7 +1227,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
return false;
MergeInType(LI->getType(), Offset, VecTy,
AllocaSize, *TD, V->getContext());
- SawVec |= isa<VectorType>(LI->getType());
+ SawVec |= LI->getType()->isVectorTy();
continue;
}
@@ -1236,7 +1236,7 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
if (SI->getOperand(0) == V || SI->isVolatile()) return 0;
MergeInType(SI->getOperand(0)->getType(), Offset,
VecTy, AllocaSize, *TD, V->getContext());
- SawVec |= isa<VectorType>(SI->getOperand(0)->getType());
+ SawVec |= SI->getOperand(0)->getType()->isVectorTy();
continue;
}
@@ -1438,7 +1438,7 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
// If the result alloca is a vector type, this is either an element
// access or a bitcast to another vector type of the same size.
if (const VectorType *VTy = dyn_cast<VectorType>(FromVal->getType())) {
- if (isa<VectorType>(ToType))
+ if (ToType->isVectorTy())
return Builder.CreateBitCast(FromVal, ToType, "tmp");
// Otherwise it must be an element access.
@@ -1521,9 +1521,9 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
LIBitWidth), "tmp");
// If the result is an integer, this is a trunc or bitcast.
- if (isa<IntegerType>(ToType)) {
+ if (ToType->isIntegerTy()) {
// Should be done.
- } else if (ToType->isFloatingPointTy() || isa<VectorType>(ToType)) {
+ } else if (ToType->isFloatingPointTy() || ToType->isVectorTy()) {
// Just do a bitcast, we know the sizes match up.
FromVal = Builder.CreateBitCast(FromVal, ToType, "tmp");
} else {
@@ -1601,10 +1601,10 @@ Value *SROA::ConvertScalar_InsertValue(Value *SV, Value *Old,
unsigned DestWidth = TD->getTypeSizeInBits(AllocaType);
unsigned SrcStoreWidth = TD->getTypeStoreSizeInBits(SV->getType());
unsigned DestStoreWidth = TD->getTypeStoreSizeInBits(AllocaType);
- if (SV->getType()->isFloatingPointTy() || isa<VectorType>(SV->getType()))
+ if (SV->getType()->isFloatingPointTy() || SV->getType()->isVectorTy())
SV = Builder.CreateBitCast(SV,
IntegerType::get(SV->getContext(),SrcWidth), "tmp");
- else if (isa<PointerType>(SV->getType()))
+ else if (SV->getType()->isPointerTy())
SV = Builder.CreatePtrToInt(SV, TD->getIntPtrType(SV->getContext()), "tmp");
// Zero extend or truncate the value if needed.
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index 54b4380..cde214b 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -357,7 +357,7 @@ void LibCallOptimization::EmitFPutC(Value *Char, Value *File, IRBuilder<> &B) {
AWI[0] = AttributeWithIndex::get(2, Attribute::NoCapture);
AWI[1] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
Constant *F;
- if (isa<PointerType>(File->getType()))
+ if (File->getType()->isPointerTy())
F = M->getOrInsertFunction("fputc", AttrListPtr::get(AWI, 2),
Type::getInt32Ty(*Context),
Type::getInt32Ty(*Context), File->getType(),
@@ -384,7 +384,7 @@ void LibCallOptimization::EmitFPutS(Value *Str, Value *File, IRBuilder<> &B) {
AWI[1] = AttributeWithIndex::get(2, Attribute::NoCapture);
AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
Constant *F;
- if (isa<PointerType>(File->getType()))
+ if (File->getType()->isPointerTy())
F = M->getOrInsertFunction("fputs", AttrListPtr::get(AWI, 3),
Type::getInt32Ty(*Context),
Type::getInt8PtrTy(*Context),
@@ -409,7 +409,7 @@ void LibCallOptimization::EmitFWrite(Value *Ptr, Value *Size, Value *File,
AWI[1] = AttributeWithIndex::get(4, Attribute::NoCapture);
AWI[2] = AttributeWithIndex::get(~0u, Attribute::NoUnwind);
Constant *F;
- if (isa<PointerType>(File->getType()))
+ if (File->getType()->isPointerTy())
F = M->getOrInsertFunction("fwrite", AttrListPtr::get(AWI, 3),
TD->getIntPtrType(*Context),
Type::getInt8PtrTy(*Context),
@@ -548,7 +548,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet<PHINode*, 32> &PHIs) {
/// GetStringLength - If we can compute the length of the string pointed to by
/// the specified pointer, return 'len+1'. If we can't, return 0.
static uint64_t GetStringLength(Value *V) {
- if (!isa<PointerType>(V->getType())) return 0;
+ if (!V->getType()->isPointerTy()) return 0;
SmallPtrSet<PHINode*, 32> PHIs;
uint64_t Len = GetStringLengthH(V, PHIs);
@@ -638,7 +638,7 @@ struct StrNCatOpt : public StrCatOpt {
FT->getReturnType() != Type::getInt8PtrTy(*Context) ||
FT->getParamType(0) != FT->getReturnType() ||
FT->getParamType(1) != FT->getReturnType() ||
- !isa<IntegerType>(FT->getParamType(2)))
+ !FT->getParamType(2)->isIntegerTy())
return 0;
// Extract some information from the instruction
@@ -790,7 +790,7 @@ struct StrNCmpOpt : public LibCallOptimization {
!FT->getReturnType()->isIntegerTy(32) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
- !isa<IntegerType>(FT->getParamType(2)))
+ !FT->getParamType(2)->isIntegerTy())
return 0;
Value *Str1P = CI->getOperand(1), *Str2P = CI->getOperand(2);
@@ -866,7 +866,7 @@ struct StrNCpyOpt : public LibCallOptimization {
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
FT->getParamType(0) != FT->getParamType(1) ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
- !isa<IntegerType>(FT->getParamType(2)))
+ !FT->getParamType(2)->isIntegerTy())
return 0;
Value *Dst = CI->getOperand(1);
@@ -915,7 +915,7 @@ struct StrLenOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 1 ||
FT->getParamType(0) != Type::getInt8PtrTy(*Context) ||
- !isa<IntegerType>(FT->getReturnType()))
+ !FT->getReturnType()->isIntegerTy())
return 0;
Value *Src = CI->getOperand(1);
@@ -939,8 +939,8 @@ struct StrToOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
if ((FT->getNumParams() != 2 && FT->getNumParams() != 3) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)))
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy())
return 0;
Value *EndPtr = CI->getOperand(2);
@@ -960,9 +960,9 @@ struct StrStrOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 2 ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
- !isa<PointerType>(FT->getReturnType()))
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isPointerTy())
return 0;
// fold strstr(x, x) -> x.
@@ -1006,8 +1006,8 @@ struct StrStrOpt : public LibCallOptimization {
struct MemCmpOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 3 || !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
+ if (FT->getNumParams() != 3 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
!FT->getReturnType()->isIntegerTy(32))
return 0;
@@ -1055,8 +1055,8 @@ struct MemCpyOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
@@ -1076,8 +1076,8 @@ struct MemMoveOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
@@ -1097,8 +1097,8 @@ struct MemSetOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<IntegerType>(FT->getParamType(1)) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
@@ -1124,9 +1124,9 @@ struct MemCpyChkOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getParamType(3)) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getParamType(3)->isIntegerTy() ||
FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
@@ -1152,9 +1152,9 @@ struct MemSetChkOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<IntegerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getParamType(3)) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ !FT->getParamType(3)->isIntegerTy() ||
FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
@@ -1182,9 +1182,9 @@ struct MemMoveChkOpt : public LibCallOptimization {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 4 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getParamType(3)) ||
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getParamType(3)->isIntegerTy() ||
FT->getParamType(2) != TD->getIntPtrType(*Context))
return 0;
@@ -1205,8 +1205,8 @@ struct StrCpyChkOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
if (FT->getNumParams() != 3 || FT->getReturnType() != FT->getParamType(0) ||
- !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)))
+ !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy())
return 0;
ConstantInt *SizeCI = dyn_cast<ConstantInt>(CI->getOperand(3));
@@ -1376,7 +1376,7 @@ struct FFSOpt : public LibCallOptimization {
// result type.
if (FT->getNumParams() != 1 ||
!FT->getReturnType()->isIntegerTy(32) ||
- !isa<IntegerType>(FT->getParamType(0)))
+ !FT->getParamType(0)->isIntegerTy())
return 0;
Value *Op = CI->getOperand(1);
@@ -1410,7 +1410,7 @@ struct IsDigitOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
- if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
!FT->getParamType(0)->isIntegerTy(32))
return 0;
@@ -1431,7 +1431,7 @@ struct IsAsciiOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
// We require integer(i32)
- if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
!FT->getParamType(0)->isIntegerTy(32))
return 0;
@@ -1450,7 +1450,7 @@ struct AbsOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
const FunctionType *FT = Callee->getFunctionType();
// We require integer(integer) where the types agree.
- if (FT->getNumParams() != 1 || !isa<IntegerType>(FT->getReturnType()) ||
+ if (FT->getNumParams() != 1 || !FT->getReturnType()->isIntegerTy() ||
FT->getParamType(0) != FT->getReturnType())
return 0;
@@ -1493,8 +1493,8 @@ struct PrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require one fixed pointer argument and an integer/void result.
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() < 1 || !isa<PointerType>(FT->getParamType(0)) ||
- !(isa<IntegerType>(FT->getReturnType()) ||
+ if (FT->getNumParams() < 1 || !FT->getParamType(0)->isPointerTy() ||
+ !(FT->getReturnType()->isIntegerTy() ||
FT->getReturnType()->isVoidTy()))
return 0;
@@ -1534,7 +1534,7 @@ struct PrintFOpt : public LibCallOptimization {
// Optimize specific format strings.
// printf("%c", chr) --> putchar(*(i8*)dst)
if (FormatStr == "%c" && CI->getNumOperands() > 2 &&
- isa<IntegerType>(CI->getOperand(2)->getType())) {
+ CI->getOperand(2)->getType()->isIntegerTy()) {
Value *Res = EmitPutChar(CI->getOperand(2), B);
if (CI->use_empty()) return CI;
@@ -1543,7 +1543,7 @@ struct PrintFOpt : public LibCallOptimization {
// printf("%s\n", str) --> puts(str)
if (FormatStr == "%s\n" && CI->getNumOperands() > 2 &&
- isa<PointerType>(CI->getOperand(2)->getType()) &&
+ CI->getOperand(2)->getType()->isPointerTy() &&
CI->use_empty()) {
EmitPutS(CI->getOperand(2), B);
return CI;
@@ -1559,9 +1559,9 @@ struct SPrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require two fixed pointer arguments and an integer result.
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getReturnType()))
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
return 0;
// Check for a fixed format string.
@@ -1595,7 +1595,7 @@ struct SPrintFOpt : public LibCallOptimization {
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
// sprintf(dst, "%c", chr) --> *(i8*)dst = chr; *((i8*)dst+1) = 0
- if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
+ if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
Value *V = B.CreateTrunc(CI->getOperand(3),
Type::getInt8Ty(*Context), "char");
Value *Ptr = CastToCStr(CI->getOperand(1), B);
@@ -1612,7 +1612,7 @@ struct SPrintFOpt : public LibCallOptimization {
if (!TD) return 0;
// sprintf(dest, "%s", str) -> llvm.memcpy(dest, str, strlen(str)+1, 1)
- if (!isa<PointerType>(CI->getOperand(3)->getType())) return 0;
+ if (!CI->getOperand(3)->getType()->isPointerTy()) return 0;
Value *Len = EmitStrLen(CI->getOperand(3), B);
Value *IncLen = B.CreateAdd(Len,
@@ -1634,11 +1634,11 @@ struct FWriteOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require a pointer, an integer, an integer, a pointer, returning integer.
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 4 || !isa<PointerType>(FT->getParamType(0)) ||
- !isa<IntegerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getParamType(2)) ||
- !isa<PointerType>(FT->getParamType(3)) ||
- !isa<IntegerType>(FT->getReturnType()))
+ if (FT->getNumParams() != 4 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isIntegerTy() ||
+ !FT->getParamType(2)->isIntegerTy() ||
+ !FT->getParamType(3)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
return 0;
// Get the element size and count.
@@ -1672,8 +1672,8 @@ struct FPutsOpt : public LibCallOptimization {
// Require two pointers. Also, we can't optimize if return value is used.
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
!CI->use_empty())
return 0;
@@ -1694,9 +1694,9 @@ struct FPrintFOpt : public LibCallOptimization {
virtual Value *CallOptimizer(Function *Callee, CallInst *CI, IRBuilder<> &B) {
// Require two fixed paramters as pointers and integer result.
const FunctionType *FT = Callee->getFunctionType();
- if (FT->getNumParams() != 2 || !isa<PointerType>(FT->getParamType(0)) ||
- !isa<PointerType>(FT->getParamType(1)) ||
- !isa<IntegerType>(FT->getReturnType()))
+ if (FT->getNumParams() != 2 || !FT->getParamType(0)->isPointerTy() ||
+ !FT->getParamType(1)->isPointerTy() ||
+ !FT->getReturnType()->isIntegerTy())
return 0;
// All the optimizations depend on the format string.
@@ -1728,14 +1728,14 @@ struct FPrintFOpt : public LibCallOptimization {
// Decode the second character of the format string.
if (FormatStr[1] == 'c') {
// fprintf(F, "%c", chr) --> *(i8*)dst = chr
- if (!isa<IntegerType>(CI->getOperand(3)->getType())) return 0;
+ if (!CI->getOperand(3)->getType()->isIntegerTy()) return 0;
EmitFPutC(CI->getOperand(3), CI->getOperand(1), B);
return ConstantInt::get(CI->getType(), 1);
}
if (FormatStr[1] == 's') {
// fprintf(F, "%s", str) -> fputs(str, F)
- if (!isa<PointerType>(CI->getOperand(3)->getType()) || !CI->use_empty())
+ if (!CI->getOperand(3)->getType()->isPointerTy() || !CI->use_empty())
return 0;
EmitFPutS(CI->getOperand(3), CI->getOperand(1), B);
return CI;
@@ -2000,7 +2000,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 's':
if (Name == "strlen") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setOnlyReadsMemory(F);
setDoesNotThrow(F);
@@ -2018,14 +2018,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "strncpy" ||
Name == "strtoull") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "strxfrm") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2038,8 +2038,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "strcasecmp" ||
Name == "strncasecmp") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setOnlyReadsMemory(F);
setDoesNotThrow(F);
@@ -2048,7 +2048,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
} else if (Name == "strstr" ||
Name == "strpbrk") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setOnlyReadsMemory(F);
setDoesNotThrow(F);
@@ -2056,7 +2056,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
} else if (Name == "strtok" ||
Name == "strtok_r") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
@@ -2064,15 +2064,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "setbuf" ||
Name == "setvbuf") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "strdup" ||
Name == "strndup") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2082,31 +2082,31 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "sprintf" ||
Name == "statvfs") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "snprintf") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(2)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 3);
} else if (Name == "setitimer") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(1)) ||
- !isa<PointerType>(FTy->getParamType(2)))
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
setDoesNotCapture(F, 3);
} else if (Name == "system") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
// May throw; "system" is a valid pthread cancellation point.
setDoesNotCapture(F, 1);
@@ -2115,14 +2115,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'm':
if (Name == "malloc") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getReturnType()))
+ !FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
} else if (Name == "memcmp") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setOnlyReadsMemory(F);
setDoesNotThrow(F);
@@ -2141,18 +2141,18 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "memccpy" ||
Name == "memmove") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "memalign") {
- if (!isa<PointerType>(FTy->getReturnType()))
+ if (!FTy->getReturnType()->isPointerTy())
continue;
setDoesNotAlias(F, 0);
} else if (Name == "mkdir" ||
Name == "mktime") {
if (FTy->getNumParams() == 0 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2161,15 +2161,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'r':
if (Name == "realloc") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getReturnType()))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
setDoesNotCapture(F, 1);
} else if (Name == "read") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
// May throw; "read" is a valid pthread cancellation point.
setDoesNotCapture(F, 2);
@@ -2178,15 +2178,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "remove" ||
Name == "realpath") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "rename" ||
Name == "readlink") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2196,7 +2196,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'w':
if (Name == "write") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
// May throw; "write" is a valid pthread cancellation point.
setDoesNotCapture(F, 2);
@@ -2205,16 +2205,16 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'b':
if (Name == "bcopy") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "bcmp") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setOnlyReadsMemory(F);
@@ -2222,7 +2222,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
setDoesNotCapture(F, 2);
} else if (Name == "bzero") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2231,7 +2231,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'c':
if (Name == "calloc") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getReturnType()))
+ !FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2241,7 +2241,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "clearerr" ||
Name == "closedir") {
if (FTy->getNumParams() == 0 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2253,14 +2253,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "atof" ||
Name == "atoll") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setOnlyReadsMemory(F);
setDoesNotCapture(F, 1);
} else if (Name == "access") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2269,9 +2269,9 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'f':
if (Name == "fopen") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2279,8 +2279,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
setDoesNotCapture(F, 2);
} else if (Name == "fdopen") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2300,13 +2300,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "funlockfile" ||
Name == "ftrylockfile") {
if (FTy->getNumParams() == 0 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "ferror") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2318,22 +2318,22 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "frexpl" ||
Name == "fstatvfs") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "fgets") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(2)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 3);
} else if (Name == "fread" ||
Name == "fwrite") {
if (FTy->getNumParams() != 4 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(3)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(3)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2343,8 +2343,8 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "fprintf" ||
Name == "fgetpos") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2356,13 +2356,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "getlogin_r" ||
Name == "getc_unlocked") {
if (FTy->getNumParams() == 0 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "getenv") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setOnlyReadsMemory(F);
@@ -2372,13 +2372,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
setDoesNotThrow(F);
} else if (Name == "getitimer") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "getpwnam") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2387,7 +2387,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'u':
if (Name == "ungetc") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
@@ -2395,15 +2395,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "unlink" ||
Name == "unsetenv") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "utime" ||
Name == "utimes") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2413,7 +2413,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'p':
if (Name == "putc") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
@@ -2421,14 +2421,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "printf" ||
Name == "perror") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "pread" ||
Name == "pwrite") {
if (FTy->getNumParams() != 4 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
// May throw; these are valid pthread cancellation points.
setDoesNotCapture(F, 2);
@@ -2436,9 +2436,9 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
setDoesNotThrow(F);
} else if (Name == "popen") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2446,7 +2446,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
setDoesNotCapture(F, 2);
} else if (Name == "pclose") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2455,43 +2455,43 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'v':
if (Name == "vscanf") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "vsscanf" ||
Name == "vfscanf") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(1)) ||
- !isa<PointerType>(FTy->getParamType(2)))
+ !FTy->getParamType(1)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "valloc") {
- if (!isa<PointerType>(FTy->getReturnType()))
+ if (!FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
} else if (Name == "vprintf") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "vfprintf" ||
Name == "vsprintf") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "vsnprintf") {
if (FTy->getNumParams() != 4 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(2)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(2)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2501,14 +2501,14 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'o':
if (Name == "open") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
// May throw; "open" is a valid pthread cancellation point.
setDoesNotCapture(F, 1);
} else if (Name == "opendir") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2517,13 +2517,13 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
break;
case 't':
if (Name == "tmpfile") {
- if (!isa<PointerType>(FTy->getReturnType()))
+ if (!FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
} else if (Name == "times") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2546,15 +2546,15 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'l':
if (Name == "lstat") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "lchown") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2563,7 +2563,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 'q':
if (Name == "qsort") {
if (FTy->getNumParams() != 4 ||
- !isa<PointerType>(FTy->getParamType(3)))
+ !FTy->getParamType(3)->isPointerTy())
continue;
// May throw; places call through function pointer.
setDoesNotCapture(F, 4);
@@ -2573,27 +2573,27 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
if (Name == "__strdup" ||
Name == "__strndup") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
setDoesNotCapture(F, 1);
} else if (Name == "__strtok_r") {
if (FTy->getNumParams() != 3 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "_IO_getc") {
if (FTy->getNumParams() != 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "_IO_putc") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
@@ -2602,7 +2602,7 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
case 1:
if (Name == "\1__isoc99_scanf") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
@@ -2611,17 +2611,17 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
Name == "\1statvfs64" ||
Name == "\1__isoc99_sscanf") {
if (FTy->getNumParams() < 1 ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
setDoesNotCapture(F, 2);
} else if (Name == "\1fopen64") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getReturnType()) ||
- !isa<PointerType>(FTy->getParamType(0)) ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getReturnType()->isPointerTy() ||
+ !FTy->getParamType(0)->isPointerTy() ||
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
@@ -2630,25 +2630,25 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
} else if (Name == "\1fseeko64" ||
Name == "\1ftello64") {
if (FTy->getNumParams() == 0 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 1);
} else if (Name == "\1tmpfile64") {
- if (!isa<PointerType>(FTy->getReturnType()))
+ if (!FTy->getReturnType()->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotAlias(F, 0);
} else if (Name == "\1fstat64" ||
Name == "\1fstatvfs64") {
if (FTy->getNumParams() != 2 ||
- !isa<PointerType>(FTy->getParamType(1)))
+ !FTy->getParamType(1)->isPointerTy())
continue;
setDoesNotThrow(F);
setDoesNotCapture(F, 2);
} else if (Name == "\1open64") {
if (FTy->getNumParams() < 2 ||
- !isa<PointerType>(FTy->getParamType(0)))
+ !FTy->getParamType(0)->isPointerTy())
continue;
// May throw; "open" is a valid pthread cancellation point.
setDoesNotCapture(F, 1);
diff --git a/lib/Transforms/Utils/AddrModeMatcher.cpp b/lib/Transforms/Utils/AddrModeMatcher.cpp
index 8c4aa59..be6b383 100644
--- a/lib/Transforms/Utils/AddrModeMatcher.cpp
+++ b/lib/Transforms/Utils/AddrModeMatcher.cpp
@@ -125,7 +125,7 @@ static bool MightBeFoldableInst(Instruction *I) {
// Don't touch identity bitcasts.
if (I->getType() == I->getOperand(0)->getType())
return false;
- return isa<PointerType>(I->getType()) || isa<IntegerType>(I->getType());
+ return I->getType()->isPointerTy() || I->getType()->isIntegerTy();
case Instruction::PtrToInt:
// PtrToInt is always a noop, as we know that the int type is pointer sized.
return true;
@@ -167,8 +167,8 @@ bool AddressingModeMatcher::MatchOperationAddr(User *AddrInst, unsigned Opcode,
case Instruction::BitCast:
// BitCast is always a noop, and we can handle it as long as it is
// int->int or pointer->pointer (we don't want int<->fp or something).
- if ((isa<PointerType>(AddrInst->getOperand(0)->getType()) ||
- isa<IntegerType>(AddrInst->getOperand(0)->getType())) &&
+ if ((AddrInst->getOperand(0)->getType()->isPointerTy() ||
+ AddrInst->getOperand(0)->getType()->isIntegerTy()) &&
// Don't touch identity bitcasts. These were probably put here by LSR,
// and we don't want to mess around with them. Assume it knows what it
// is doing.
@@ -569,7 +569,7 @@ IsProfitableToFoldIntoAddressingMode(Instruction *I, ExtAddrMode &AMBefore,
// Get the access type of this use. If the use isn't a pointer, we don't
// know what it accesses.
Value *Address = User->getOperand(OpNo);
- if (!isa<PointerType>(Address->getType()))
+ if (!Address->getType()->isPointerTy())
return false;
const Type *AddressAccessTy =
cast<PointerType>(Address->getType())->getElementType();
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 7bc4fcd..1f62dab 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -274,24 +274,31 @@ void llvm::RemoveSuccessor(TerminatorInst *TI, unsigned SuccNum) {
ReplaceInstWithInst(TI, NewTI);
}
-/// SplitEdge - Split the edge connecting specified block. Pass P must
-/// not be NULL.
-BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
- TerminatorInst *LatchTerm = BB->getTerminator();
- unsigned SuccNum = 0;
+/// GetSuccessorNumber - Search for the specified successor of basic block BB
+/// and return its position in the terminator instruction's list of
+/// successors. It is an error to call this with a block that is not a
+/// successor.
+unsigned llvm::GetSuccessorNumber(BasicBlock *BB, BasicBlock *Succ) {
+ TerminatorInst *Term = BB->getTerminator();
#ifndef NDEBUG
- unsigned e = LatchTerm->getNumSuccessors();
+ unsigned e = Term->getNumSuccessors();
#endif
for (unsigned i = 0; ; ++i) {
assert(i != e && "Didn't find edge?");
- if (LatchTerm->getSuccessor(i) == Succ) {
- SuccNum = i;
- break;
- }
+ if (Term->getSuccessor(i) == Succ)
+ return i;
}
+ return 0;
+}
+
+/// SplitEdge - Split the edge connecting specified block. Pass P must
+/// not be NULL.
+BasicBlock *llvm::SplitEdge(BasicBlock *BB, BasicBlock *Succ, Pass *P) {
+ unsigned SuccNum = GetSuccessorNumber(BB, Succ);
// If this is a critical edge, let SplitCriticalEdge do it.
- if (SplitCriticalEdge(BB->getTerminator(), SuccNum, P))
+ TerminatorInst *LatchTerm = BB->getTerminator();
+ if (SplitCriticalEdge(LatchTerm, SuccNum, P))
return LatchTerm->getSuccessor(SuccNum);
// If the edge isn't critical, then BB has a single successor or Succ has a
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index 57ad459..d03f7a6 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -46,7 +46,7 @@ using namespace llvm;
static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
uint64_t &ByteOffset,
unsigned MaxLookup = 6) {
- if (!isa<PointerType>(V->getType()))
+ if (!V->getType()->isPointerTy())
return V;
for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
if (GEPOperator *GEP = dyn_cast<GEPOperator>(V)) {
@@ -65,7 +65,7 @@ static Value *getUnderlyingObjectWithOffset(Value *V, const TargetData *TD,
} else {
return V;
}
- assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
}
return V;
}
diff --git a/lib/Transforms/Utils/LoopSimplify.cpp b/lib/Transforms/Utils/LoopSimplify.cpp
index 57bab60..924b744 100644
--- a/lib/Transforms/Utils/LoopSimplify.cpp
+++ b/lib/Transforms/Utils/LoopSimplify.cpp
@@ -51,6 +51,7 @@
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Support/CFG.h"
+#include "llvm/Support/Debug.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
@@ -147,6 +148,11 @@ ReprocessLoop:
// Delete each unique out-of-loop (and thus dead) predecessor.
for (SmallPtrSet<BasicBlock *, 4>::iterator I = BadPreds.begin(),
E = BadPreds.end(); I != E; ++I) {
+
+ DEBUG(dbgs() << "LoopSimplify: Deleting edge from dead predecessor ";
+ WriteAsOperand(dbgs(), *I, false);
+ dbgs() << "\n");
+
// Inform each successor of each dead pred.
for (succ_iterator SI = succ_begin(*I), SE = succ_end(*I); SI != SE; ++SI)
(*SI)->removePredecessor(*I);
@@ -159,6 +165,27 @@ ReprocessLoop:
}
}
+ // If there are exiting blocks with branches on undef, resolve the undef in
+ // the direction which will exit the loop. This will help simplify loop
+ // trip count computations.
+ SmallVector<BasicBlock*, 8> ExitingBlocks;
+ L->getExitingBlocks(ExitingBlocks);
+ for (SmallVectorImpl<BasicBlock *>::iterator I = ExitingBlocks.begin(),
+ E = ExitingBlocks.end(); I != E; ++I)
+ if (BranchInst *BI = dyn_cast<BranchInst>((*I)->getTerminator()))
+ if (BI->isConditional()) {
+ if (UndefValue *Cond = dyn_cast<UndefValue>(BI->getCondition())) {
+
+ DEBUG(dbgs() << "LoopSimplify: Resolving \"br i1 undef\" to exit in ";
+ WriteAsOperand(dbgs(), *I, false);
+ dbgs() << "\n");
+
+ BI->setCondition(ConstantInt::get(Cond->getType(),
+ !L->contains(BI->getSuccessor(0))));
+ Changed = true;
+ }
+ }
+
// Does the loop already have a preheader? If so, don't insert one.
BasicBlock *Preheader = L->getLoopPreheader();
if (!Preheader) {
@@ -250,8 +277,6 @@ ReprocessLoop:
break;
}
if (UniqueExit) {
- SmallVector<BasicBlock*, 8> ExitingBlocks;
- L->getExitingBlocks(ExitingBlocks);
for (unsigned i = 0, e = ExitingBlocks.size(); i != e; ++i) {
BasicBlock *ExitingBlock = ExitingBlocks[i];
if (!ExitingBlock->getSinglePredecessor()) continue;
@@ -282,6 +307,11 @@ ReprocessLoop:
// Success. The block is now dead, so remove it from the loop,
// update the dominator tree and dominance frontier, and delete it.
+
+ DEBUG(dbgs() << "LoopSimplify: Eliminating exiting block ";
+ WriteAsOperand(dbgs(), ExitingBlock, false);
+ dbgs() << "\n");
+
assert(pred_begin(ExitingBlock) == pred_end(ExitingBlock));
Changed = true;
LI->removeBlock(ExitingBlock);
@@ -335,6 +365,10 @@ BasicBlock *LoopSimplify::InsertPreheaderForLoop(Loop *L) {
SplitBlockPredecessors(Header, &OutsideBlocks[0], OutsideBlocks.size(),
".preheader", this);
+ DEBUG(dbgs() << "LoopSimplify: Creating pre-header ";
+ WriteAsOperand(dbgs(), NewBB, false);
+ dbgs() << "\n");
+
// Make sure that NewBB is put someplace intelligent, which doesn't mess up
// code layout too horribly.
PlaceSplitBlockCarefully(NewBB, OutsideBlocks, L);
@@ -360,6 +394,10 @@ BasicBlock *LoopSimplify::RewriteLoopExitBlock(Loop *L, BasicBlock *Exit) {
LoopBlocks.size(), ".loopexit",
this);
+ DEBUG(dbgs() << "LoopSimplify: Creating dedicated exit block ";
+ WriteAsOperand(dbgs(), NewBB, false);
+ dbgs() << "\n");
+
return NewBB;
}
@@ -480,6 +518,8 @@ Loop *LoopSimplify::SeparateNestedLoop(Loop *L, LPPassManager &LPM) {
OuterLoopPreds.push_back(PN->getIncomingBlock(i));
}
+ DEBUG(dbgs() << "LoopSimplify: Splitting out a new outer loop\n");
+
BasicBlock *Header = L->getHeader();
BasicBlock *NewBB = SplitBlockPredecessors(Header, &OuterLoopPreds[0],
OuterLoopPreds.size(),
@@ -574,6 +614,10 @@ LoopSimplify::InsertUniqueBackedgeBlock(Loop *L, BasicBlock *Preheader) {
Header->getName()+".backedge", F);
BranchInst *BETerminator = BranchInst::Create(Header, BEBlock);
+ DEBUG(dbgs() << "LoopSimplify: Inserting unique backedge block ";
+ WriteAsOperand(dbgs(), BEBlock, false);
+ dbgs() << "\n");
+
// Move the new backedge block to right after the last backedge block.
Function::iterator InsertPos = BackedgeBlocks.back(); ++InsertPos;
F->getBasicBlockList().splice(InsertPos, F->getBasicBlockList(), BEBlock);
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index 544e20b..4f5a70b 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -518,7 +518,7 @@ void PromoteMem2Reg::run() {
// If this PHI node merges one value and/or undefs, get the value.
if (Value *V = PN->hasConstantValue(&DT)) {
- if (AST && isa<PointerType>(PN->getType()))
+ if (AST && PN->getType()->isPointerTy())
AST->deleteValue(PN);
PN->replaceAllUsesWith(V);
PN->eraseFromParent();
@@ -780,7 +780,7 @@ void PromoteMem2Reg::RewriteSingleStoreAlloca(AllocaInst *AI,
if (ReplVal == LI)
ReplVal = UndefValue::get(LI->getType());
LI->replaceAllUsesWith(ReplVal);
- if (AST && isa<PointerType>(LI->getType()))
+ if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
LI->eraseFromParent();
LBI.deleteValue(LI);
@@ -838,7 +838,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
for (Value::use_iterator UI = AI->use_begin(), E = AI->use_end(); UI != E;)
if (LoadInst *LI = dyn_cast<LoadInst>(*UI++)) {
LI->replaceAllUsesWith(UndefValue::get(LI->getType()));
- if (AST && isa<PointerType>(LI->getType()))
+ if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
LBI.deleteValue(LI);
LI->eraseFromParent();
@@ -874,7 +874,7 @@ void PromoteMem2Reg::PromoteSingleBlockAlloca(AllocaInst *AI, AllocaInfo &Info,
// Otherwise, there was a store before this load, the load takes its value.
--I;
LI->replaceAllUsesWith(I->second->getOperand(0));
- if (AST && isa<PointerType>(LI->getType()))
+ if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
LI->eraseFromParent();
LBI.deleteValue(LI);
@@ -922,7 +922,7 @@ bool PromoteMem2Reg::QueuePhiNode(BasicBlock *BB, unsigned AllocaNo,
InsertedPHINodes.insert(PN);
- if (AST && isa<PointerType>(PN->getType()))
+ if (AST && PN->getType()->isPointerTy())
AST->copyValue(PointerAllocaValues[AllocaNo], PN);
return true;
@@ -996,7 +996,7 @@ NextIteration:
// Anything using the load now uses the current value.
LI->replaceAllUsesWith(V);
- if (AST && isa<PointerType>(LI->getType()))
+ if (AST && LI->getType()->isPointerTy())
AST->deleteValue(LI);
BB->getInstList().erase(LI);
} else if (StoreInst *SI = dyn_cast<StoreInst>(I)) {
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 2215059..f343c38 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -271,7 +271,7 @@ static bool DominatesMergePoint(Value *V, BasicBlock *BB,
ConstantInt *SimplifyCFGOpt::GetConstantInt(Value *V) {
// Normal constant int.
ConstantInt *CI = dyn_cast<ConstantInt>(V);
- if (CI || !TD || !isa<Constant>(V) || !isa<PointerType>(V->getType()))
+ if (CI || !TD || !isa<Constant>(V) || !V->getType()->isPointerTy())
return CI;
// This is some kind of pointer constant. Turn it into a pointer-sized
@@ -701,7 +701,7 @@ bool SimplifyCFGOpt::FoldValueComparisonIntoPredecessors(TerminatorInst *TI) {
AddPredecessorToBlock(NewSuccessors[i], Pred, BB);
// Convert pointer to int before we switch.
- if (isa<PointerType>(CV->getType())) {
+ if (CV->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without TargetData");
CV = new PtrToIntInst(CV, TD->getIntPtrType(CV->getContext()),
"magicptr", PTI);
@@ -915,7 +915,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
case Instruction::Add:
case Instruction::Sub:
// Not worth doing for vector ops.
- if (isa<VectorType>(HInst->getType()))
+ if (HInst->getType()->isVectorTy())
return false;
break;
case Instruction::And:
@@ -925,7 +925,7 @@ static bool SpeculativelyExecuteBB(BranchInst *BI, BasicBlock *BB1) {
case Instruction::LShr:
case Instruction::AShr:
// Don't mess with vector operations.
- if (isa<VectorType>(HInst->getType()))
+ if (HInst->getType()->isVectorTy())
return false;
break; // These are all cheap and non-trapping instructions.
}
@@ -2068,7 +2068,7 @@ bool SimplifyCFGOpt::run(BasicBlock *BB) {
if (!TrueWhenEqual) std::swap(DefaultBB, EdgeBB);
// Convert pointer to int before we switch.
- if (isa<PointerType>(CompVal->getType())) {
+ if (CompVal->getType()->isPointerTy()) {
assert(TD && "Cannot switch on pointer without TargetData");
CompVal = new PtrToIntInst(CompVal,
TD->getIntPtrType(CompVal->getContext()),
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index f5ba7e7..fd74241 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -17,6 +17,7 @@
#include "llvm/Assembly/Writer.h"
#include "llvm/Assembly/PrintModulePass.h"
#include "llvm/Assembly/AsmAnnotationWriter.h"
+#include "llvm/LLVMContext.h"
#include "llvm/CallingConv.h"
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
@@ -376,8 +377,8 @@ namespace {
return;
// If this is a structure or opaque type, add a name for the type.
- if (((isa<StructType>(Ty) && cast<StructType>(Ty)->getNumElements())
- || isa<OpaqueType>(Ty)) && !TP.hasTypeName(Ty)) {
+ if (((Ty->isStructTy() && cast<StructType>(Ty)->getNumElements())
+ || Ty->isOpaqueTy()) && !TP.hasTypeName(Ty)) {
TP.addTypeName(Ty, "%"+utostr(unsigned(NumberedTypes.size())));
NumberedTypes.push_back(Ty);
}
@@ -432,7 +433,7 @@ static void AddModuleTypesToPrinter(TypePrinting &TP,
if (const PointerType *PTy = dyn_cast<PointerType>(Ty)) {
const Type *PETy = PTy->getElementType();
if ((PETy->isPrimitiveType() || PETy->isIntegerTy()) &&
- !isa<OpaqueType>(PETy))
+ !PETy->isOpaqueTy())
continue;
}
@@ -1236,7 +1237,6 @@ class AssemblyWriter {
TypePrinting TypePrinter;
AssemblyAnnotationWriter *AnnotationWriter;
std::vector<const Type*> NumberedTypes;
- SmallVector<StringRef, 8> MDNames;
public:
inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac,
@@ -1244,8 +1244,6 @@ public:
AssemblyAnnotationWriter *AAW)
: Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) {
AddModuleTypesToPrinter(TypePrinter, NumberedTypes, M);
- if (M)
- M->getMDKindNames(MDNames);
}
void printMDNodeBody(const MDNode *MD);
@@ -1850,8 +1848,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
//
Out << ' ';
if (!FTy->isVarArg() &&
- (!isa<PointerType>(RetTy) ||
- !isa<FunctionType>(cast<PointerType>(RetTy)->getElementType()))) {
+ (!RetTy->isPointerTy() ||
+ !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
TypePrinter.print(RetTy, Out);
Out << ' ';
writeOperand(Operand, false);
@@ -1896,8 +1894,8 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
//
Out << ' ';
if (!FTy->isVarArg() &&
- (!isa<PointerType>(RetTy) ||
- !isa<FunctionType>(cast<PointerType>(RetTy)->getElementType()))) {
+ (!RetTy->isPointerTy() ||
+ !cast<PointerType>(RetTy)->getElementType()->isFunctionTy())) {
TypePrinter.print(RetTy, Out);
Out << ' ';
writeOperand(Operand, false);
@@ -1988,12 +1986,20 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
}
// Print Metadata info.
- if (!MDNames.empty()) {
- SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
- I.getAllMetadata(InstMD);
- for (unsigned i = 0, e = InstMD.size(); i != e; ++i)
- Out << ", !" << MDNames[InstMD[i].first]
- << " !" << Machine.getMetadataSlot(InstMD[i].second);
+ SmallVector<std::pair<unsigned, MDNode*>, 4> InstMD;
+ I.getAllMetadata(InstMD);
+ if (!InstMD.empty()) {
+ SmallVector<StringRef, 8> MDNames;
+ I.getType()->getContext().getMDKindNames(MDNames);
+ for (unsigned i = 0, e = InstMD.size(); i != e; ++i) {
+ unsigned Kind = InstMD[i].first;
+ if (Kind < MDNames.size()) {
+ Out << ", !" << MDNames[Kind];
+ } else {
+ Out << ", !<unknown kind #" << Kind << ">";
+ }
+ Out << " !" << Machine.getMetadataSlot(InstMD[i].second);
+ }
}
printInfoComment(I);
}
diff --git a/lib/VMCore/Attributes.cpp b/lib/VMCore/Attributes.cpp
index ff0cc9b..a000aee 100644
--- a/lib/VMCore/Attributes.cpp
+++ b/lib/VMCore/Attributes.cpp
@@ -93,7 +93,7 @@ Attributes Attribute::typeIncompatible(const Type *Ty) {
// Attributes that only apply to integers.
Incompatible |= SExt | ZExt;
- if (!isa<PointerType>(Ty))
+ if (!Ty->isPointerTy())
// Attributes that only apply to pointers.
Incompatible |= ByVal | Nest | NoAlias | StructRet | NoCapture;
diff --git a/lib/VMCore/ConstantFold.cpp b/lib/VMCore/ConstantFold.cpp
index 78a45e8..194a6d4 100644
--- a/lib/VMCore/ConstantFold.cpp
+++ b/lib/VMCore/ConstantFold.cpp
@@ -112,7 +112,7 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
IdxList.push_back(Zero);
} else if (const SequentialType *STy =
dyn_cast<SequentialType>(ElTy)) {
- if (isa<PointerType>(ElTy)) break; // Can't index into pointers!
+ if (ElTy->isPointerTy()) break; // Can't index into pointers!
ElTy = STy->getElementType();
IdxList.push_back(Zero);
} else {
@@ -189,7 +189,7 @@ static Constant *FoldBitCast(Constant *V, const Type *DestTy) {
///
static Constant *ExtractConstantBytes(Constant *C, unsigned ByteStart,
unsigned ByteSize) {
- assert(isa<IntegerType>(C->getType()) &&
+ assert(C->getType()->isIntegerTy() &&
(cast<IntegerType>(C->getType())->getBitWidth() & 7) == 0 &&
"Non-byte sized integer input");
unsigned CSize = cast<IntegerType>(C->getType())->getBitWidth()/8;
@@ -334,11 +334,7 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
return ConstantExpr::getNUWMul(E, N);
}
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- Constant *N = ConstantInt::get(DestTy, VTy->getNumElements());
- Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true);
- return ConstantExpr::getNUWMul(E, N);
- }
+
if (const StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isPacked()) {
unsigned NumElems = STy->getNumElements();
@@ -361,6 +357,22 @@ static Constant *getFoldedSizeOf(const Type *Ty, const Type *DestTy,
}
}
+ if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
+ unsigned NumElems = UTy->getNumElements();
+ // Check for a union with all members having the same size.
+ Constant *MemberSize =
+ getFoldedSizeOf(UTy->getElementType(0), DestTy, true);
+ bool AllSame = true;
+ for (unsigned i = 1; i != NumElems; ++i)
+ if (MemberSize !=
+ getFoldedSizeOf(UTy->getElementType(i), DestTy, true)) {
+ AllSame = false;
+ break;
+ }
+ if (AllSame)
+ return MemberSize;
+ }
+
// Pointer size doesn't depend on the pointee type, so canonicalize them
// to an arbitrary pointee.
if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
@@ -426,6 +438,24 @@ static Constant *getFoldedAlignOf(const Type *Ty, const Type *DestTy,
return MemberAlign;
}
+ if (const UnionType *UTy = dyn_cast<UnionType>(Ty)) {
+ // Union alignment is the maximum alignment of any member.
+ // Without target data, we can't compare much, but we can check to see
+ // if all the members have the same alignment.
+ unsigned NumElems = UTy->getNumElements();
+ // Check for a union with all members having the same alignment.
+ Constant *MemberAlign =
+ getFoldedAlignOf(UTy->getElementType(0), DestTy, true);
+ bool AllSame = true;
+ for (unsigned i = 1; i != NumElems; ++i)
+ if (MemberAlign != getFoldedAlignOf(UTy->getElementType(i), DestTy, true)) {
+ AllSame = false;
+ break;
+ }
+ if (AllSame)
+ return MemberAlign;
+ }
+
// Pointer alignment doesn't depend on the pointee type, so canonicalize them
// to an arbitrary pointee.
if (const PointerType *PTy = dyn_cast<PointerType>(Ty))
@@ -464,13 +494,7 @@ static Constant *getFoldedOffsetOf(const Type *Ty, Constant *FieldNo,
Constant *E = getFoldedSizeOf(ATy->getElementType(), DestTy, true);
return ConstantExpr::getNUWMul(E, N);
}
- if (const VectorType *VTy = dyn_cast<VectorType>(Ty)) {
- Constant *N = ConstantExpr::getCast(CastInst::getCastOpcode(FieldNo, false,
- DestTy, false),
- FieldNo, DestTy);
- Constant *E = getFoldedSizeOf(VTy->getElementType(), DestTy, true);
- return ConstantExpr::getNUWMul(E, N);
- }
+
if (const StructType *STy = dyn_cast<StructType>(Ty))
if (!STy->isPacked()) {
unsigned NumElems = STy->getNumElements();
@@ -551,7 +575,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
// operating on each element. In the cast of bitcasts, the element
// count may be mismatched; don't attempt to handle that here.
if (ConstantVector *CV = dyn_cast<ConstantVector>(V))
- if (isa<VectorType>(DestTy) &&
+ if (DestTy->isVectorTy() &&
cast<VectorType>(DestTy)->getNumElements() ==
CV->getType()->getNumElements()) {
std::vector<Constant*> res;
@@ -634,7 +658,7 @@ Constant *llvm::ConstantFoldCastInstruction(unsigned opc, Constant *V,
}
}
// Handle an offsetof-like expression.
- if (isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)){
+ if (Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()){
if (Constant *C = getFoldedOffsetOf(Ty, CE->getOperand(2),
DestTy, false))
return C;
@@ -885,7 +909,7 @@ Constant *llvm::ConstantFoldInsertValueInstruction(Constant *Agg,
unsigned numOps;
if (const ArrayType *AR = dyn_cast<ArrayType>(AggTy))
numOps = AR->getNumElements();
- else if (isa<UnionType>(AggTy))
+ else if (AggTy->isUnionTy())
numOps = 1;
else
numOps = cast<StructType>(AggTy)->getNumElements();
@@ -1105,6 +1129,10 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
return ConstantExpr::getLShr(C1, C2);
break;
}
+ } else if (isa<ConstantInt>(C1)) {
+ // If C1 is a ConstantInt and C2 is not, swap the operands.
+ if (Instruction::isCommutative(Opcode))
+ return ConstantExpr::get(Opcode, C2, C1);
}
// At this point we know neither constant is an UndefValue.
@@ -1364,31 +1392,8 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
} else if (isa<ConstantExpr>(C2)) {
// If C2 is a constant expr and C1 isn't, flop them around and fold the
// other way if possible.
- switch (Opcode) {
- case Instruction::Add:
- case Instruction::FAdd:
- case Instruction::Mul:
- case Instruction::FMul:
- case Instruction::And:
- case Instruction::Or:
- case Instruction::Xor:
- // No change of opcode required.
+ if (Instruction::isCommutative(Opcode))
return ConstantFoldBinaryInstruction(Opcode, C2, C1);
-
- case Instruction::Shl:
- case Instruction::LShr:
- case Instruction::AShr:
- case Instruction::Sub:
- case Instruction::FSub:
- case Instruction::SDiv:
- case Instruction::UDiv:
- case Instruction::FDiv:
- case Instruction::URem:
- case Instruction::SRem:
- case Instruction::FRem:
- default: // These instructions cannot be flopped around.
- break;
- }
}
// i1 can be simplified in many cases.
@@ -1427,7 +1432,7 @@ Constant *llvm::ConstantFoldBinaryInstruction(unsigned Opcode,
/// isZeroSizedType - This type is zero sized if its an array or structure of
/// zero sized types. The only leaf zero sized type is an empty structure.
static bool isMaybeZeroSizedType(const Type *Ty) {
- if (isa<OpaqueType>(Ty)) return true; // Can't say.
+ if (Ty->isOpaqueTy()) return true; // Can't say.
if (const StructType *STy = dyn_cast<StructType>(Ty)) {
// If all of elements have zero size, this does too.
@@ -1667,7 +1672,7 @@ static ICmpInst::Predicate evaluateICmpRelation(Constant *V1, Constant *V2,
// If the cast is not actually changing bits, and the second operand is a
// null pointer, do the comparison with the pre-casted value.
if (V2->isNullValue() &&
- (isa<PointerType>(CE1->getType()) || CE1->getType()->isIntegerTy())) {
+ (CE1->getType()->isPointerTy() || CE1->getType()->isIntegerTy())) {
if (CE1->getOpcode() == Instruction::ZExt) isSigned = false;
if (CE1->getOpcode() == Instruction::SExt) isSigned = true;
return evaluateICmpRelation(CE1Op0,
@@ -1914,7 +1919,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
return ConstantInt::get(ResultTy, R==APFloat::cmpGreaterThan ||
R==APFloat::cmpEqual);
}
- } else if (isa<VectorType>(C1->getType())) {
+ } else if (C1->getType()->isVectorTy()) {
SmallVector<Constant*, 16> C1Elts, C2Elts;
C1->getVectorElements(C1Elts);
C2->getVectorElements(C2Elts);
@@ -2065,7 +2070,7 @@ Constant *llvm::ConstantFoldCompareInstruction(unsigned short pred,
if (ConstantExpr *CE2 = dyn_cast<ConstantExpr>(C2)) {
Constant *CE2Op0 = CE2->getOperand(0);
if (CE2->getOpcode() == Instruction::BitCast &&
- isa<VectorType>(CE2->getType())==isa<VectorType>(CE2Op0->getType())) {
+ CE2->getType()->isVectorTy()==CE2Op0->getType()->isVectorTy()) {
Constant *Inverse = ConstantExpr::getBitCast(C1, CE2Op0->getType());
return ConstantExpr::getICmp(pred, Inverse, CE2Op0);
}
@@ -2184,7 +2189,7 @@ Constant *llvm::ConstantFoldGetElementPtr(Constant *C,
I != E; ++I)
LastTy = *I;
- if ((LastTy && isa<ArrayType>(LastTy)) || Idx0->isNullValue()) {
+ if ((LastTy && LastTy->isArrayTy()) || Idx0->isNullValue()) {
SmallVector<Value*, 16> NewIndices;
NewIndices.reserve(NumIdx + CE->getNumOperands());
for (unsigned i = 1, e = CE->getNumOperands()-1; i != e; ++i)
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index 98040ea..10f8879 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -229,7 +229,7 @@ Constant::PossibleRelocationsTy Constant::getRelocationInfo() const {
/// This handles breaking down a vector undef into undef elements, etc. For
/// constant exprs and other cases we can't handle, we return an empty vector.
void Constant::getVectorElements(SmallVectorImpl<Constant*> &Elts) const {
- assert(isa<VectorType>(getType()) && "Not a vector constant!");
+ assert(getType()->isVectorTy() && "Not a vector constant!");
if (const ConstantVector *CV = dyn_cast<ConstantVector>(this)) {
for (unsigned i = 0, e = CV->getNumOperands(); i != e; ++i)
@@ -944,7 +944,7 @@ bool ConstantFP::isValueValidForType(const Type *Ty, const APFloat& Val) {
// Factory Function Implementation
ConstantAggregateZero* ConstantAggregateZero::get(const Type* Ty) {
- assert((isa<StructType>(Ty) || isa<ArrayType>(Ty) || isa<VectorType>(Ty)) &&
+ assert((Ty->isStructTy() || Ty->isArrayTy() || Ty->isVectorTy()) &&
"Cannot create an aggregate zero of non-aggregate type!");
LLVMContextImpl *pImpl = Ty->getContext().pImpl;
@@ -1239,8 +1239,8 @@ Constant *ConstantExpr::getTruncOrBitCast(Constant *C, const Type *Ty) {
}
Constant *ConstantExpr::getPointerCast(Constant *S, const Type *Ty) {
- assert(isa<PointerType>(S->getType()) && "Invalid cast");
- assert((Ty->isIntegerTy() || isa<PointerType>(Ty)) && "Invalid cast");
+ assert(S->getType()->isPointerTy() && "Invalid cast");
+ assert((Ty->isIntegerTy() || Ty->isPointerTy()) && "Invalid cast");
if (Ty->isIntegerTy())
return getCast(Instruction::PtrToInt, S, Ty);
@@ -1383,14 +1383,14 @@ Constant *ConstantExpr::getFPToSI(Constant *C, const Type *Ty) {
}
Constant *ConstantExpr::getPtrToInt(Constant *C, const Type *DstTy) {
- assert(isa<PointerType>(C->getType()) && "PtrToInt source must be pointer");
+ assert(C->getType()->isPointerTy() && "PtrToInt source must be pointer");
assert(DstTy->isIntegerTy() && "PtrToInt destination must be integral");
return getFoldedCast(Instruction::PtrToInt, C, DstTy);
}
Constant *ConstantExpr::getIntToPtr(Constant *C, const Type *DstTy) {
assert(C->getType()->isIntegerTy() && "IntToPtr source must be integral");
- assert(isa<PointerType>(DstTy) && "IntToPtr destination must be a pointer");
+ assert(DstTy->isPointerTy() && "IntToPtr destination must be a pointer");
return getFoldedCast(Instruction::IntToPtr, C, DstTy);
}
@@ -1592,7 +1592,7 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
(Constant**)Idxs, NumIdx))
return FC; // Fold a few common cases...
- assert(isa<PointerType>(C->getType()) &&
+ assert(C->getType()->isPointerTy() &&
"Non-pointer type for constant GetElementPtr expression");
// Look up the constant in the table first to ensure uniqueness
std::vector<Constant*> ArgVec;
@@ -1619,7 +1619,7 @@ Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy,
(Constant**)Idxs, NumIdx))
return FC; // Fold a few common cases...
- assert(isa<PointerType>(C->getType()) &&
+ assert(C->getType()->isPointerTy() &&
"Non-pointer type for constant GetElementPtr expression");
// Look up the constant in the table first to ensure uniqueness
std::vector<Constant*> ArgVec;
@@ -1727,7 +1727,7 @@ Constant *ConstantExpr::getExtractElementTy(const Type *ReqTy, Constant *Val,
}
Constant *ConstantExpr::getExtractElement(Constant *Val, Constant *Idx) {
- assert(isa<VectorType>(Val->getType()) &&
+ assert(Val->getType()->isVectorTy() &&
"Tried to create extractelement operation on non-vector type!");
assert(Idx->getType()->isIntegerTy(32) &&
"Extractelement index must be i32 type!");
@@ -1751,7 +1751,7 @@ Constant *ConstantExpr::getInsertElementTy(const Type *ReqTy, Constant *Val,
Constant *ConstantExpr::getInsertElement(Constant *Val, Constant *Elt,
Constant *Idx) {
- assert(isa<VectorType>(Val->getType()) &&
+ assert(Val->getType()->isVectorTy() &&
"Tried to create insertelement operation on non-vector type!");
assert(Elt->getType() == cast<VectorType>(Val->getType())->getElementType()
&& "Insertelement types must match!");
@@ -2113,7 +2113,52 @@ void ConstantStruct::replaceUsesOfWithOnConstant(Value *From, Value *To,
void ConstantUnion::replaceUsesOfWithOnConstant(Value *From, Value *To,
Use *U) {
- assert(false && "Implement replaceUsesOfWithOnConstant for unions");
+ assert(isa<Constant>(To) && "Cannot make Constant refer to non-constant!");
+ Constant *ToC = cast<Constant>(To);
+
+ assert(U == OperandList && "Union constants can only have one use!");
+ assert(getNumOperands() == 1 && "Union constants can only have one use!");
+ assert(getOperand(0) == From && "ReplaceAllUsesWith broken!");
+
+ std::pair<LLVMContextImpl::UnionConstantsTy::MapKey, ConstantUnion*> Lookup;
+ Lookup.first.first = getType();
+ Lookup.second = this;
+ Lookup.first.second = ToC;
+
+ LLVMContext &Context = getType()->getContext();
+ LLVMContextImpl *pImpl = Context.pImpl;
+
+ Constant *Replacement = 0;
+ if (ToC->isNullValue()) {
+ Replacement = ConstantAggregateZero::get(getType());
+ } else {
+ // Check to see if we have this union type already.
+ bool Exists;
+ LLVMContextImpl::UnionConstantsTy::MapTy::iterator I =
+ pImpl->UnionConstants.InsertOrGetItem(Lookup, Exists);
+
+ if (Exists) {
+ Replacement = I->second;
+ } else {
+ // Okay, the new shape doesn't exist in the system yet. Instead of
+ // creating a new constant union, inserting it, replaceallusesof'ing the
+ // old with the new, then deleting the old... just update the current one
+ // in place!
+ pImpl->UnionConstants.MoveConstantToNewSlot(this, I);
+
+ // Update to the new value.
+ setOperand(0, ToC);
+ return;
+ }
+ }
+
+ assert(Replacement != this && "I didn't contain From!");
+
+ // Everyone using this now uses the replacement.
+ uncheckedReplaceAllUsesWith(Replacement);
+
+ // Delete the old constant!
+ destroyConstant();
}
void ConstantVector::replaceUsesOfWithOnConstant(Value *From, Value *To,
diff --git a/lib/VMCore/Core.cpp b/lib/VMCore/Core.cpp
index a044fc5..f4f65c5 100644
--- a/lib/VMCore/Core.cpp
+++ b/lib/VMCore/Core.cpp
@@ -55,6 +55,15 @@ void LLVMContextDispose(LLVMContextRef C) {
delete unwrap(C);
}
+unsigned LLVMGetMDKindIDInContext(LLVMContextRef C, const char* Name,
+ unsigned SLen) {
+ return unwrap(C)->getMDKindID(StringRef(Name, SLen));
+}
+
+unsigned LLVMGetMDKindID(const char* Name, unsigned SLen) {
+ return LLVMGetMDKindIDInContext(LLVMGetGlobalContext(), Name, SLen);
+}
+
/*===-- Operations on modules ---------------------------------------------===*/
@@ -425,6 +434,18 @@ void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
unwrap(OldVal)->replaceAllUsesWith(unwrap(NewVal));
}
+int LLVMHasMetadata(LLVMValueRef Inst) {
+ return unwrap<Instruction>(Inst)->hasMetadata();
+}
+
+LLVMValueRef LLVMGetMetadata(LLVMValueRef Inst, unsigned KindID) {
+ return wrap(unwrap<Instruction>(Inst)->getMetadata(KindID));
+}
+
+void LLVMSetMetadata(LLVMValueRef Inst, unsigned KindID, LLVMValueRef MD) {
+ unwrap<Instruction>(Inst)->setMetadata(KindID, MD? unwrap<MDNode>(MD) : NULL);
+}
+
/*--.. Conversion functions ................................................--*/
#define LLVM_DEFINE_VALUE_CAST(name) \
@@ -435,7 +456,7 @@ void LLVMReplaceAllUsesWith(LLVMValueRef OldVal, LLVMValueRef NewVal) {
LLVM_FOR_EACH_VALUE_SUBCLASS(LLVM_DEFINE_VALUE_CAST)
/*--.. Operations on Uses ..................................................--*/
-LLVMUseIteratorRef LLVMGetFirstUse(LLVMValueRef Val) {
+LLVMUseRef LLVMGetFirstUse(LLVMValueRef Val) {
Value *V = unwrap(Val);
Value::use_iterator I = V->use_begin();
if (I == V->use_end())
@@ -443,16 +464,19 @@ LLVMUseIteratorRef LLVMGetFirstUse(LLVMValueRef Val) {
return wrap(&(I.getUse()));
}
-LLVMUseIteratorRef LLVMGetNextUse(LLVMUseIteratorRef UR) {
- return wrap(unwrap(UR)->getNext());
+LLVMUseRef LLVMGetNextUse(LLVMUseRef U) {
+ Use *Next = unwrap(U)->getNext();
+ if (Next)
+ return wrap(Next);
+ return 0;
}
-LLVMValueRef LLVMGetUser(LLVMUseIteratorRef UR) {
- return wrap(unwrap(UR)->getUser());
+LLVMValueRef LLVMGetUser(LLVMUseRef U) {
+ return wrap(unwrap(U)->getUser());
}
-LLVMValueRef LLVMGetUsedValue(LLVMUseIteratorRef UR) {
- return wrap(unwrap(UR)->get());
+LLVMValueRef LLVMGetUsedValue(LLVMUseRef U) {
+ return wrap(unwrap(U)->get());
}
/*--.. Operations on Users .................................................--*/
@@ -493,6 +517,26 @@ LLVMValueRef LLVMConstPointerNull(LLVMTypeRef Ty) {
wrap(ConstantPointerNull::get(unwrap<PointerType>(Ty)));
}
+/*--.. Operations on metadata nodes ........................................--*/
+
+LLVMValueRef LLVMMDStringInContext(LLVMContextRef C, const char *Str,
+ unsigned SLen) {
+ return wrap(MDString::get(*unwrap(C), StringRef(Str, SLen)));
+}
+
+LLVMValueRef LLVMMDString(const char *Str, unsigned SLen) {
+ return LLVMMDStringInContext(LLVMGetGlobalContext(), Str, SLen);
+}
+
+LLVMValueRef LLVMMDNodeInContext(LLVMContextRef C, LLVMValueRef *Vals,
+ unsigned Count) {
+ return wrap(MDNode::get(*unwrap(C), unwrap<Value>(Vals, Count), Count));
+}
+
+LLVMValueRef LLVMMDNode(LLVMValueRef *Vals, unsigned Count) {
+ return LLVMMDNodeInContext(LLVMGetGlobalContext(), Vals, Count);
+}
+
/*--.. Operations on scalar constants ......................................--*/
LLVMValueRef LLVMConstInt(LLVMTypeRef IntTy, unsigned long long N,
@@ -567,11 +611,13 @@ LLVMValueRef LLVMConstStruct(LLVMValueRef *ConstantVals, unsigned Count,
return LLVMConstStructInContext(LLVMGetGlobalContext(), ConstantVals, Count,
Packed);
}
-
LLVMValueRef LLVMConstVector(LLVMValueRef *ScalarConstantVals, unsigned Size) {
return wrap(ConstantVector::get(
unwrap<Constant>(ScalarConstantVals, Size), Size));
}
+LLVMValueRef LLVMConstUnion(LLVMTypeRef Ty, LLVMValueRef Val) {
+ return wrap(ConstantUnion::get(unwrap<UnionType>(Ty), unwrap<Constant>(Val)));
+}
/*--.. Constant expressions ................................................--*/
@@ -592,6 +638,17 @@ LLVMValueRef LLVMConstNeg(LLVMValueRef ConstantVal) {
unwrap<Constant>(ConstantVal)));
}
+LLVMValueRef LLVMConstNSWNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNSWNeg(
+ unwrap<Constant>(ConstantVal)));
+}
+
+LLVMValueRef LLVMConstNUWNeg(LLVMValueRef ConstantVal) {
+ return wrap(ConstantExpr::getNUWNeg(
+ unwrap<Constant>(ConstantVal)));
+}
+
+
LLVMValueRef LLVMConstFNeg(LLVMValueRef ConstantVal) {
return wrap(ConstantExpr::getFNeg(
unwrap<Constant>(ConstantVal)));
@@ -615,6 +672,13 @@ LLVMValueRef LLVMConstNSWAdd(LLVMValueRef LHSConstant,
unwrap<Constant>(RHSConstant)));
}
+LLVMValueRef LLVMConstNUWAdd(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWAdd(
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
LLVMValueRef LLVMConstFAdd(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
return wrap(ConstantExpr::getFAdd(
unwrap<Constant>(LHSConstant),
@@ -627,6 +691,20 @@ LLVMValueRef LLVMConstSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
unwrap<Constant>(RHSConstant)));
}
+LLVMValueRef LLVMConstNSWSub(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNSWSub(
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWSub(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWSub(
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
LLVMValueRef LLVMConstFSub(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
return wrap(ConstantExpr::getFSub(unwrap<Constant>(LHSConstant),
unwrap<Constant>(RHSConstant)));
@@ -638,6 +716,20 @@ LLVMValueRef LLVMConstMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
unwrap<Constant>(RHSConstant)));
}
+LLVMValueRef LLVMConstNSWMul(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNSWMul(
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
+LLVMValueRef LLVMConstNUWMul(LLVMValueRef LHSConstant,
+ LLVMValueRef RHSConstant) {
+ return wrap(ConstantExpr::getNUWMul(
+ unwrap<Constant>(LHSConstant),
+ unwrap<Constant>(RHSConstant)));
+}
+
LLVMValueRef LLVMConstFMul(LLVMValueRef LHSConstant, LLVMValueRef RHSConstant) {
return wrap(ConstantExpr::getFMul(
unwrap<Constant>(LHSConstant),
@@ -924,6 +1016,10 @@ LLVMValueRef LLVMConstInlineAsm(LLVMTypeRef Ty, const char *AsmString,
Constraints, HasSideEffects, IsAlignStack));
}
+LLVMValueRef LLVMBlockAddress(LLVMValueRef F, LLVMBasicBlockRef BB) {
+ return wrap(BlockAddress::get(unwrap<Function>(F), unwrap(BB)));
+}
+
/*--.. Operations on global variables, functions, and aliases (globals) ....--*/
LLVMModuleRef LLVMGetGlobalParent(LLVMValueRef Global) {
@@ -1060,6 +1156,14 @@ LLVMValueRef LLVMAddGlobal(LLVMModuleRef M, LLVMTypeRef Ty, const char *Name) {
GlobalValue::ExternalLinkage, 0, Name));
}
+LLVMValueRef LLVMAddGlobalInAddressSpace(LLVMModuleRef M, LLVMTypeRef Ty,
+ const char *Name,
+ unsigned AddressSpace) {
+ return wrap(new GlobalVariable(*unwrap(M), unwrap(Ty), false,
+ GlobalValue::ExternalLinkage, 0, Name, 0,
+ false, AddressSpace));
+}
+
LLVMValueRef LLVMGetNamedGlobal(LLVMModuleRef M, const char *Name) {
return wrap(unwrap(M)->getNamedGlobal(Name));
}
@@ -1215,14 +1319,14 @@ void LLVMSetGC(LLVMValueRef Fn, const char *GC) {
void LLVMAddFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
Function *Func = unwrap<Function>(Fn);
const AttrListPtr PAL = Func->getAttributes();
- const AttrListPtr PALnew = PAL.addAttr(0, PA);
+ const AttrListPtr PALnew = PAL.addAttr(~0U, PA);
Func->setAttributes(PALnew);
}
void LLVMRemoveFunctionAttr(LLVMValueRef Fn, LLVMAttribute PA) {
Function *Func = unwrap<Function>(Fn);
const AttrListPtr PAL = Func->getAttributes();
- const AttrListPtr PALnew = PAL.removeAttr(0, PA);
+ const AttrListPtr PALnew = PAL.removeAttr(~0U, PA);
Func->setAttributes(PALnew);
}
@@ -1563,6 +1667,21 @@ void LLVMDisposeBuilder(LLVMBuilderRef Builder) {
delete unwrap(Builder);
}
+/*--.. Metadata builders ...................................................--*/
+
+void LLVMSetCurrentDebugLocation(LLVMBuilderRef Builder, LLVMValueRef L) {
+ unwrap(Builder)->SetCurrentDebugLocation(L? unwrap<MDNode>(L) : NULL);
+}
+
+LLVMValueRef LLVMGetCurrentDebugLocation(LLVMBuilderRef Builder) {
+ return wrap(unwrap(Builder)->getCurrentDebugLocation());
+}
+
+void LLVMSetInstDebugLocation(LLVMBuilderRef Builder, LLVMValueRef Inst) {
+ unwrap(Builder)->SetInstDebugLocation(unwrap<Instruction>(Inst));
+}
+
+
/*--.. Instruction builders ................................................--*/
LLVMValueRef LLVMBuildRetVoid(LLVMBuilderRef B) {
@@ -1592,6 +1711,11 @@ LLVMValueRef LLVMBuildSwitch(LLVMBuilderRef B, LLVMValueRef V,
return wrap(unwrap(B)->CreateSwitch(unwrap(V), unwrap(Else), NumCases));
}
+LLVMValueRef LLVMBuildIndirectBr(LLVMBuilderRef B, LLVMValueRef Addr,
+ unsigned NumDests) {
+ return wrap(unwrap(B)->CreateIndirectBr(unwrap(Addr), NumDests));
+}
+
LLVMValueRef LLVMBuildInvoke(LLVMBuilderRef B, LLVMValueRef Fn,
LLVMValueRef *Args, unsigned NumArgs,
LLVMBasicBlockRef Then, LLVMBasicBlockRef Catch,
@@ -1614,6 +1738,10 @@ void LLVMAddCase(LLVMValueRef Switch, LLVMValueRef OnVal,
unwrap<SwitchInst>(Switch)->addCase(unwrap<ConstantInt>(OnVal), unwrap(Dest));
}
+void LLVMAddDestination(LLVMValueRef IndirectBr, LLVMBasicBlockRef Dest) {
+ unwrap<IndirectBrInst>(IndirectBr)->addDestination(unwrap(Dest));
+}
+
/*--.. Arithmetic ..........................................................--*/
LLVMValueRef LLVMBuildAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
@@ -1626,6 +1754,11 @@ LLVMValueRef LLVMBuildNSWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RH
return wrap(unwrap(B)->CreateNSWAdd(unwrap(LHS), unwrap(RHS), Name));
}
+LLVMValueRef LLVMBuildNUWAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWAdd(unwrap(LHS), unwrap(RHS), Name));
+}
+
LLVMValueRef LLVMBuildFAdd(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
const char *Name) {
return wrap(unwrap(B)->CreateFAdd(unwrap(LHS), unwrap(RHS), Name));
@@ -1636,6 +1769,16 @@ LLVMValueRef LLVMBuildSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
return wrap(unwrap(B)->CreateSub(unwrap(LHS), unwrap(RHS), Name));
}
+LLVMValueRef LLVMBuildNSWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWSub(unwrap(LHS), unwrap(RHS), Name));
+}
+
LLVMValueRef LLVMBuildFSub(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
const char *Name) {
return wrap(unwrap(B)->CreateFSub(unwrap(LHS), unwrap(RHS), Name));
@@ -1646,6 +1789,16 @@ LLVMValueRef LLVMBuildMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
return wrap(unwrap(B)->CreateMul(unwrap(LHS), unwrap(RHS), Name));
}
+LLVMValueRef LLVMBuildNSWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
+LLVMValueRef LLVMBuildNUWMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWMul(unwrap(LHS), unwrap(RHS), Name));
+}
+
LLVMValueRef LLVMBuildFMul(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
const char *Name) {
return wrap(unwrap(B)->CreateFMul(unwrap(LHS), unwrap(RHS), Name));
@@ -1716,10 +1869,27 @@ LLVMValueRef LLVMBuildXor(LLVMBuilderRef B, LLVMValueRef LHS, LLVMValueRef RHS,
return wrap(unwrap(B)->CreateXor(unwrap(LHS), unwrap(RHS), Name));
}
+LLVMValueRef LLVMBuildBinOp(LLVMBuilderRef B, LLVMOpcode Op,
+ LLVMValueRef LHS, LLVMValueRef RHS,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateBinOp(Instruction::BinaryOps(Op), unwrap(LHS),
+ unwrap(RHS), Name));
+}
+
LLVMValueRef LLVMBuildNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
return wrap(unwrap(B)->CreateNeg(unwrap(V), Name));
}
+LLVMValueRef LLVMBuildNSWNeg(LLVMBuilderRef B, LLVMValueRef V,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNSWNeg(unwrap(V), Name));
+}
+
+LLVMValueRef LLVMBuildNUWNeg(LLVMBuilderRef B, LLVMValueRef V,
+ const char *Name) {
+ return wrap(unwrap(B)->CreateNUWNeg(unwrap(V), Name));
+}
+
LLVMValueRef LLVMBuildFNeg(LLVMBuilderRef B, LLVMValueRef V, const char *Name) {
return wrap(unwrap(B)->CreateFNeg(unwrap(V), Name));
}
@@ -1887,6 +2057,12 @@ LLVMValueRef LLVMBuildTruncOrBitCast(LLVMBuilderRef B, LLVMValueRef Val,
Name));
}
+LLVMValueRef LLVMBuildCast(LLVMBuilderRef B, LLVMOpcode Op, LLVMValueRef Val,
+ LLVMTypeRef DestTy, const char *Name) {
+ return wrap(unwrap(B)->CreateCast(Instruction::CastOps(Op), unwrap(Val),
+ unwrap(DestTy), Name));
+}
+
LLVMValueRef LLVMBuildPointerCast(LLVMBuilderRef B, LLVMValueRef Val,
LLVMTypeRef DestTy, const char *Name) {
return wrap(unwrap(B)->CreatePointerCast(unwrap(Val), unwrap(DestTy), Name));
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index f00f6ee..dbc283e 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -73,35 +73,35 @@ unsigned Argument::getArgNo() const {
/// hasByValAttr - Return true if this argument has the byval attribute on it
/// in its containing function.
bool Argument::hasByValAttr() const {
- if (!isa<PointerType>(getType())) return false;
+ if (!getType()->isPointerTy()) return false;
return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal);
}
/// hasNestAttr - Return true if this argument has the nest attribute on
/// it in its containing function.
bool Argument::hasNestAttr() const {
- if (!isa<PointerType>(getType())) return false;
+ if (!getType()->isPointerTy()) return false;
return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest);
}
/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
/// it in its containing function.
bool Argument::hasNoAliasAttr() const {
- if (!isa<PointerType>(getType())) return false;
+ if (!getType()->isPointerTy()) return false;
return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoAlias);
}
/// hasNoCaptureAttr - Return true if this argument has the nocapture attribute
/// on it in its containing function.
bool Argument::hasNoCaptureAttr() const {
- if (!isa<PointerType>(getType())) return false;
+ if (!getType()->isPointerTy()) return false;
return getParent()->paramHasAttr(getArgNo()+1, Attribute::NoCapture);
}
/// hasSRetAttr - Return true if this argument has the sret attribute on
/// it in its containing function.
bool Argument::hasStructRetAttr() const {
- if (!isa<PointerType>(getType())) return false;
+ if (!getType()->isPointerTy()) return false;
if (this != getParent()->arg_begin())
return false; // StructRet param must be first param
return getParent()->paramHasAttr(1, Attribute::StructRet);
@@ -155,7 +155,7 @@ Function::Function(const FunctionType *Ty, LinkageTypes Linkage,
: GlobalValue(PointerType::getUnqual(Ty),
Value::FunctionVal, 0, 0, Linkage, name) {
assert(FunctionType::isValidReturnType(getReturnType()) &&
- !isa<OpaqueType>(getReturnType()) && "invalid return type");
+ !getReturnType()->isOpaqueTy() && "invalid return type");
SymTab = new ValueSymbolTable();
// If the function has arguments, mark them as lazily built.
diff --git a/lib/VMCore/InlineAsm.cpp b/lib/VMCore/InlineAsm.cpp
index ec21773..6355834 100644
--- a/lib/VMCore/InlineAsm.cpp
+++ b/lib/VMCore/InlineAsm.cpp
@@ -220,7 +220,7 @@ bool InlineAsm::Verify(const FunctionType *Ty, StringRef ConstStr) {
if (!Ty->getReturnType()->isVoidTy()) return false;
break;
case 1:
- if (isa<StructType>(Ty->getReturnType())) return false;
+ if (Ty->getReturnType()->isStructTy()) return false;
break;
default:
const StructType *STy = dyn_cast<StructType>(Ty->getReturnType());
diff --git a/lib/VMCore/Instructions.cpp b/lib/VMCore/Instructions.cpp
index 9d5f7a5..8f4763f 100644
--- a/lib/VMCore/Instructions.cpp
+++ b/lib/VMCore/Instructions.cpp
@@ -562,7 +562,7 @@ static Instruction* createFree(Value* Source, Instruction *InsertBefore,
BasicBlock *InsertAtEnd) {
assert(((!InsertBefore && InsertAtEnd) || (InsertBefore && !InsertAtEnd)) &&
"createFree needs either InsertBefore or InsertAtEnd");
- assert(isa<PointerType>(Source->getType()) &&
+ assert(Source->getType()->isPointerTy() &&
"Can not free something of nonpointer type!");
BasicBlock* BB = InsertBefore ? InsertBefore->getParent() : InsertAtEnd;
@@ -989,7 +989,7 @@ bool AllocaInst::isStaticAlloca() const {
//===----------------------------------------------------------------------===//
void LoadInst::AssertOK() {
- assert(isa<PointerType>(getOperand(0)->getType()) &&
+ assert(getOperand(0)->getType()->isPointerTy() &&
"Ptr must have pointer type.");
}
@@ -1103,7 +1103,7 @@ void LoadInst::setAlignment(unsigned Align) {
void StoreInst::AssertOK() {
assert(getOperand(0) && getOperand(1) && "Both operands must be non-null!");
- assert(isa<PointerType>(getOperand(1)->getType()) &&
+ assert(getOperand(1)->getType()->isPointerTy() &&
"Ptr must have pointer type!");
assert(getOperand(0)->getType() ==
cast<PointerType>(getOperand(1)->getType())->getElementType()
@@ -1285,7 +1285,7 @@ static const Type* getIndexedTypeInternal(const Type *Ptr, IndexTy const *Idxs,
unsigned CurIdx = 1;
for (; CurIdx != NumIdx; ++CurIdx) {
const CompositeType *CT = dyn_cast<CompositeType>(Agg);
- if (!CT || isa<PointerType>(CT)) return 0;
+ if (!CT || CT->isPointerTy()) return 0;
IndexTy Index = Idxs[CurIdx];
if (!CT->indexValid(Index)) return 0;
Agg = CT->getTypeAtIndex(Index);
@@ -1391,7 +1391,7 @@ ExtractElementInst::ExtractElementInst(Value *Val, Value *Index,
bool ExtractElementInst::isValidOperands(const Value *Val, const Value *Index) {
- if (!isa<VectorType>(Val->getType()) || !Index->getType()->isIntegerTy(32))
+ if (!Val->getType()->isVectorTy() || !Index->getType()->isIntegerTy(32))
return false;
return true;
}
@@ -1432,7 +1432,7 @@ InsertElementInst::InsertElementInst(Value *Vec, Value *Elt, Value *Index,
bool InsertElementInst::isValidOperands(const Value *Vec, const Value *Elt,
const Value *Index) {
- if (!isa<VectorType>(Vec->getType()))
+ if (!Vec->getType()->isVectorTy())
return false; // First operand of insertelement must be vector type.
if (Elt->getType() != cast<VectorType>(Vec->getType())->getElementType())
@@ -1485,7 +1485,7 @@ ShuffleVectorInst::ShuffleVectorInst(Value *V1, Value *V2, Value *Mask,
bool ShuffleVectorInst::isValidOperands(const Value *V1, const Value *V2,
const Value *Mask) {
- if (!isa<VectorType>(V1->getType()) || V1->getType() != V2->getType())
+ if (!V1->getType()->isVectorTy() || V1->getType() != V2->getType())
return false;
const VectorType *MaskTy = dyn_cast<VectorType>(Mask->getType());
@@ -1602,7 +1602,7 @@ const Type* ExtractValueInst::getIndexedType(const Type *Agg,
unsigned CurIdx = 0;
for (; CurIdx != NumIdx; ++CurIdx) {
const CompositeType *CT = dyn_cast<CompositeType>(Agg);
- if (!CT || isa<PointerType>(CT) || isa<VectorType>(CT)) return 0;
+ if (!CT || CT->isPointerTy() || CT->isVectorTy()) return 0;
unsigned Index = Idxs[CurIdx];
if (!CT->indexValid(Index)) return 0;
Agg = CT->getTypeAtIndex(Index);
@@ -1693,7 +1693,7 @@ void BinaryOperator::init(BinaryOps iType) {
case SDiv:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert((getType()->isIntegerTy() || (isa<VectorType>(getType()) &&
+ assert((getType()->isIntegerTy() || (getType()->isVectorTy() &&
cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
"Incorrect operand type (not integer) for S/UDIV");
break;
@@ -1707,7 +1707,7 @@ void BinaryOperator::init(BinaryOps iType) {
case SRem:
assert(getType() == LHS->getType() &&
"Arithmetic operation should return same type as operands!");
- assert((getType()->isIntegerTy() || (isa<VectorType>(getType()) &&
+ assert((getType()->isIntegerTy() || (getType()->isVectorTy() &&
cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
"Incorrect operand type (not integer) for S/UREM");
break;
@@ -1723,7 +1723,7 @@ void BinaryOperator::init(BinaryOps iType) {
assert(getType() == LHS->getType() &&
"Shift operation should return same type as operands!");
assert((getType()->isIntegerTy() ||
- (isa<VectorType>(getType()) &&
+ (getType()->isVectorTy() &&
cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
"Tried to create a shift operation on a non-integral type!");
break;
@@ -1732,7 +1732,7 @@ void BinaryOperator::init(BinaryOps iType) {
assert(getType() == LHS->getType() &&
"Logical operation should return same type as operands!");
assert((getType()->isIntegerTy() ||
- (isa<VectorType>(getType()) &&
+ (getType()->isVectorTy() &&
cast<VectorType>(getType())->getElementType()->isIntegerTy())) &&
"Tried to create a logical operation on a non-integral type!");
break;
@@ -1977,8 +1977,8 @@ bool CastInst::isLosslessCast() const {
return true;
// Pointer to pointer is always lossless.
- if (isa<PointerType>(SrcTy))
- return isa<PointerType>(DstTy);
+ if (SrcTy->isPointerTy())
+ return DstTy->isPointerTy();
return false; // Other types have no identity values
}
@@ -2094,7 +2094,7 @@ unsigned CastInst::isEliminableCastPair(
// no-op cast in second op implies firstOp as long as the DestTy
// is integer and we are not converting between a vector and a
// non vector type.
- if (!isa<VectorType>(SrcTy) && DstTy->isIntegerTy())
+ if (!SrcTy->isVectorTy() && DstTy->isIntegerTy())
return firstOp;
return 0;
case 4:
@@ -2148,12 +2148,12 @@ unsigned CastInst::isEliminableCastPair(
case 11:
// bitcast followed by ptrtoint is allowed as long as the bitcast
// is a pointer to pointer cast.
- if (isa<PointerType>(SrcTy) && isa<PointerType>(MidTy))
+ if (SrcTy->isPointerTy() && MidTy->isPointerTy())
return secondOp;
return 0;
case 12:
// inttoptr, bitcast -> intptr if bitcast is a ptr to ptr cast
- if (isa<PointerType>(MidTy) && isa<PointerType>(DstTy))
+ if (MidTy->isPointerTy() && DstTy->isPointerTy())
return firstOp;
return 0;
case 13: {
@@ -2274,8 +2274,8 @@ CastInst *CastInst::CreateTruncOrBitCast(Value *S, const Type *Ty,
CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
const Twine &Name,
BasicBlock *InsertAtEnd) {
- assert(isa<PointerType>(S->getType()) && "Invalid cast");
- assert((Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
+ assert(S->getType()->isPointerTy() && "Invalid cast");
+ assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Invalid cast");
if (Ty->isIntegerTy())
@@ -2287,8 +2287,8 @@ CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
CastInst *CastInst::CreatePointerCast(Value *S, const Type *Ty,
const Twine &Name,
Instruction *InsertBefore) {
- assert(isa<PointerType>(S->getType()) && "Invalid cast");
- assert((Ty->isIntegerTy() || isa<PointerType>(Ty)) &&
+ assert(S->getType()->isPointerTy() && "Invalid cast");
+ assert((Ty->isIntegerTy() || Ty->isPointerTy()) &&
"Invalid cast");
if (Ty->isIntegerTy())
@@ -2373,7 +2373,7 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
// Casting from vector
return DestBits == PTy->getBitWidth();
} else { // Casting from something else
- return isa<PointerType>(SrcTy);
+ return SrcTy->isPointerTy();
}
} else if (DestTy->isFloatingPointTy()) { // Casting to floating pt
if (SrcTy->isIntegerTy()) { // Casting from integral
@@ -2394,8 +2394,8 @@ bool CastInst::isCastable(const Type *SrcTy, const Type *DestTy) {
} else { // Casting from something else
return DestPTy->getBitWidth() == SrcBits;
}
- } else if (isa<PointerType>(DestTy)) { // Casting to pointer
- if (isa<PointerType>(SrcTy)) { // Casting from pointer
+ } else if (DestTy->isPointerTy()) { // Casting to pointer
+ if (SrcTy->isPointerTy()) { // Casting from pointer
return true;
} else if (SrcTy->isIntegerTy()) { // Casting from integral
return true;
@@ -2449,7 +2449,7 @@ CastInst::getCastOpcode(
PTy = NULL;
return BitCast; // Same size, no-op cast
} else {
- assert(isa<PointerType>(SrcTy) &&
+ assert(SrcTy->isPointerTy() &&
"Casting from a value that is not first-class type");
return PtrToInt; // ptr -> int
}
@@ -2486,8 +2486,8 @@ CastInst::getCastOpcode(
} else {
assert(!"Illegal cast to vector (wrong type or size)");
}
- } else if (isa<PointerType>(DestTy)) {
- if (isa<PointerType>(SrcTy)) {
+ } else if (DestTy->isPointerTy()) {
+ if (SrcTy->isPointerTy()) {
return BitCast; // ptr -> ptr
} else if (SrcTy->isIntegerTy()) {
return IntToPtr; // int -> ptr
@@ -2566,13 +2566,13 @@ CastInst::castIsValid(Instruction::CastOps op, Value *S, const Type *DstTy) {
}
return SrcTy->isFPOrFPVectorTy() && DstTy->isIntOrIntVectorTy();
case Instruction::PtrToInt:
- return isa<PointerType>(SrcTy) && DstTy->isIntegerTy();
+ return SrcTy->isPointerTy() && DstTy->isIntegerTy();
case Instruction::IntToPtr:
- return SrcTy->isIntegerTy() && isa<PointerType>(DstTy);
+ return SrcTy->isIntegerTy() && DstTy->isPointerTy();
case Instruction::BitCast:
// BitCast implies a no-op cast of type only. No bits change.
// However, you can't cast pointers to anything but pointers.
- if (isa<PointerType>(SrcTy) != isa<PointerType>(DstTy))
+ if (SrcTy->isPointerTy() != DstTy->isPointerTy())
return false;
// Now we know we're not dealing with a pointer/non-pointer mismatch. In all
@@ -3150,7 +3150,7 @@ void SwitchInst::setSuccessorV(unsigned idx, BasicBlock *B) {
//===----------------------------------------------------------------------===//
void IndirectBrInst::init(Value *Address, unsigned NumDests) {
- assert(Address && isa<PointerType>(Address->getType()) &&
+ assert(Address && Address->getType()->isPointerTy() &&
"Address of indirectbr must be a pointer");
ReservedSpace = 1+NumDests;
NumOperands = 1;
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 62491d8..85bbe4a 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -229,13 +229,23 @@ public:
if (I->second->use_empty())
delete I->second;
}
- MDNodeSet.clear();
AlwaysOpaqueTy->dropRef();
for (OpaqueTypesTy::iterator I = OpaqueTypes.begin(), E = OpaqueTypes.end();
I != E; ++I) {
(*I)->AbstractTypeUsers.clear();
delete *I;
}
+ // Destroy MDNode operands first.
+ for (FoldingSetIterator<MDNode> I = MDNodeSet.begin(), E = MDNodeSet.end();
+ I != E;) {
+ MDNode *N = &(*I);
+ ++I;
+ N->replaceAllOperandsWithNull();
+ }
+ while (!MDNodeSet.empty()) {
+ MDNode *N = &(*MDNodeSet.begin());
+ N->destroy();
+ }
}
};
diff --git a/lib/VMCore/Metadata.cpp b/lib/VMCore/Metadata.cpp
index 07a5f3c..a08c454 100644
--- a/lib/VMCore/Metadata.cpp
+++ b/lib/VMCore/Metadata.cpp
@@ -257,6 +257,13 @@ void MDNode::Profile(FoldingSetNodeID &ID) const {
ID.AddPointer(getOperand(i));
}
+// replaceAllOperandsWithNull - This is used while destroying llvm context to
+// gracefully delete all nodes. This method replaces all operands with null.
+void MDNode::replaceAllOperandsWithNull() {
+ for (MDNodeOperand *Op = getOperandPtr(this, 0), *E = Op+NumOperands;
+ Op != E; ++Op)
+ replaceOperand(Op, 0);
+}
// Replace value from this node's operand list.
void MDNode::replaceOperand(MDNodeOperand *Op, Value *To) {
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index 8a3527e..c4dfe14 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -1118,6 +1118,7 @@ bool BBPassManager::runOnFunction(Function &F) {
for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I)
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
BasicBlockPass *BP = getContainedPass(Index);
+ bool LocalChanged = false;
dumpPassInfo(BP, EXECUTION_MSG, ON_BASICBLOCK_MSG, I->getName());
dumpRequiredSet(BP);
@@ -1129,11 +1130,12 @@ bool BBPassManager::runOnFunction(Function &F) {
PassManagerPrettyStackEntry X(BP, *I);
Timer *T = StartPassTimer(BP);
- Changed |= BP->runOnBasicBlock(*I);
+ LocalChanged |= BP->runOnBasicBlock(*I);
StopPassTimer(BP, T);
}
- if (Changed)
+ Changed |= LocalChanged;
+ if (LocalChanged)
dumpPassInfo(BP, MODIFICATION_MSG, ON_BASICBLOCK_MSG,
I->getName());
dumpPreservedSet(BP);
@@ -1334,6 +1336,7 @@ bool FPPassManager::runOnFunction(Function &F) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
FunctionPass *FP = getContainedPass(Index);
+ bool LocalChanged = false;
dumpPassInfo(FP, EXECUTION_MSG, ON_FUNCTION_MSG, F.getName());
dumpRequiredSet(FP);
@@ -1344,11 +1347,12 @@ bool FPPassManager::runOnFunction(Function &F) {
PassManagerPrettyStackEntry X(FP, F);
Timer *T = StartPassTimer(FP);
- Changed |= FP->runOnFunction(F);
+ LocalChanged |= FP->runOnFunction(F);
StopPassTimer(FP, T);
}
- if (Changed)
+ Changed |= LocalChanged;
+ if (LocalChanged)
dumpPassInfo(FP, MODIFICATION_MSG, ON_FUNCTION_MSG, F.getName());
dumpPreservedSet(FP);
@@ -1407,6 +1411,7 @@ MPPassManager::runOnModule(Module &M) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
ModulePass *MP = getContainedPass(Index);
+ bool LocalChanged = false;
dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
dumpRequiredSet(MP);
@@ -1416,11 +1421,12 @@ MPPassManager::runOnModule(Module &M) {
{
PassManagerPrettyStackEntry X(MP, M);
Timer *T = StartPassTimer(MP);
- Changed |= MP->runOnModule(M);
+ LocalChanged |= MP->runOnModule(M);
StopPassTimer(MP, T);
}
- if (Changed)
+ Changed |= LocalChanged;
+ if (LocalChanged)
dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
M.getModuleIdentifier());
dumpPreservedSet(MP);
@@ -1706,8 +1712,13 @@ LLVMPassManagerRef LLVMCreatePassManager() {
return wrap(new PassManager());
}
+LLVMPassManagerRef LLVMCreateFunctionPassManagerForModule(LLVMModuleRef M) {
+ return wrap(new FunctionPassManager(unwrap(M)));
+}
+
LLVMPassManagerRef LLVMCreateFunctionPassManager(LLVMModuleProviderRef P) {
- return wrap(new FunctionPassManager(unwrap(P)));
+ return LLVMCreateFunctionPassManagerForModule(
+ reinterpret_cast<LLVMModuleRef>(P));
}
LLVMBool LLVMRunPassManager(LLVMPassManagerRef PM, LLVMModuleRef M) {
diff --git a/lib/VMCore/Type.cpp b/lib/VMCore/Type.cpp
index f4cd366..9b2c2ca 100644
--- a/lib/VMCore/Type.cpp
+++ b/lib/VMCore/Type.cpp
@@ -61,8 +61,8 @@ void Type::destroy() const {
// Structures and Functions allocate their contained types past the end of
// the type object itself. These need to be destroyed differently than the
// other types.
- if (isa<FunctionType>(this) || isa<StructType>(this) ||
- isa<UnionType>(this)) {
+ if (this->isFunctionTy() || this->isStructTy() ||
+ this->isUnionTy()) {
// First, make sure we destruct any PATypeHandles allocated by these
// subclasses. They must be manually destructed.
for (unsigned i = 0; i < NumContainedTys; ++i)
@@ -70,9 +70,9 @@ void Type::destroy() const {
// Now call the destructor for the subclass directly because we're going
// to delete this as an array of char.
- if (isa<FunctionType>(this))
+ if (this->isFunctionTy())
static_cast<const FunctionType*>(this)->FunctionType::~FunctionType();
- else if (isa<StructType>(this))
+ else if (this->isStructTy())
static_cast<const StructType*>(this)->StructType::~StructType();
else
static_cast<const UnionType*>(this)->UnionType::~UnionType();
@@ -176,8 +176,8 @@ bool Type::canLosslesslyBitCastTo(const Type *Ty) const {
// At this point we have only various mismatches of the first class types
// remaining and ptr->ptr. Just select the lossless conversions. Everything
// else is not lossless.
- if (isa<PointerType>(this))
- return isa<PointerType>(Ty);
+ if (this->isPointerTy())
+ return Ty->isPointerTy();
return false; // Other types have no identity values
}
@@ -220,7 +220,7 @@ int Type::getFPMantissaWidth() const {
/// iff all of the members of the type are sized as well. Since asking for
/// their size is relatively uncommon, move this operation out of line.
bool Type::isSizedDerivedType() const {
- if (isa<IntegerType>(this))
+ if (this->isIntegerTy())
return true;
if (const ArrayType *ATy = dyn_cast<ArrayType>(this))
@@ -229,7 +229,7 @@ bool Type::isSizedDerivedType() const {
if (const VectorType *PTy = dyn_cast<VectorType>(this))
return PTy->getElementType()->isSized();
- if (!isa<StructType>(this) && !isa<UnionType>(this))
+ if (!this->isStructTy() && !this->isUnionTy())
return false;
// Okay, our struct is sized if all of the elements are...
@@ -447,7 +447,7 @@ bool FunctionType::isValidReturnType(const Type *RetTy) {
/// isValidArgumentType - Return true if the specified type is valid as an
/// argument type.
bool FunctionType::isValidArgumentType(const Type *ArgTy) {
- return ArgTy->isFirstClassType() || isa<OpaqueType>(ArgTy);
+ return ArgTy->isFirstClassType() || ArgTy->isOpaqueTy();
}
FunctionType::FunctionType(const Type *Result,
@@ -613,7 +613,7 @@ void Type::PromoteAbstractToConcrete() {
// Concrete types are leaves in the tree. Since an SCC will either be all
// abstract or all concrete, we only need to check one type.
if (SCC[0]->isAbstract()) {
- if (isa<OpaqueType>(SCC[0]))
+ if (SCC[0]->isOpaqueTy())
return; // Not going to be concrete, sorry.
// If all of the children of all of the types in this SCC are concrete,
@@ -660,7 +660,7 @@ static bool TypesEqual(const Type *Ty, const Type *Ty2,
std::map<const Type *, const Type *> &EqTypes) {
if (Ty == Ty2) return true;
if (Ty->getTypeID() != Ty2->getTypeID()) return false;
- if (isa<OpaqueType>(Ty))
+ if (Ty->isOpaqueTy())
return false; // Two unequal opaque types are never equal
std::map<const Type*, const Type*>::iterator It = EqTypes.find(Ty);
@@ -888,7 +888,7 @@ ArrayType *ArrayType::get(const Type *ElementType, uint64_t NumElements) {
bool ArrayType::isValidElementType(const Type *ElemTy) {
return ElemTy->getTypeID() != VoidTyID && ElemTy->getTypeID() != LabelTyID &&
- ElemTy->getTypeID() != MetadataTyID && !isa<FunctionType>(ElemTy);
+ ElemTy->getTypeID() != MetadataTyID && !ElemTy->isFunctionTy();
}
VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
@@ -912,7 +912,7 @@ VectorType *VectorType::get(const Type *ElementType, unsigned NumElements) {
bool VectorType::isValidElementType(const Type *ElemTy) {
return ElemTy->isIntegerTy() || ElemTy->isFloatingPointTy() ||
- isa<OpaqueType>(ElemTy);
+ ElemTy->isOpaqueTy();
}
//===----------------------------------------------------------------------===//
@@ -955,7 +955,7 @@ StructType *StructType::get(LLVMContext &Context, const Type *type, ...) {
bool StructType::isValidElementType(const Type *ElemTy) {
return !ElemTy->isVoidTy() && !ElemTy->isLabelTy() &&
- !ElemTy->isMetadataTy() && !isa<FunctionType>(ElemTy);
+ !ElemTy->isMetadataTy() && !ElemTy->isFunctionTy();
}
@@ -1303,7 +1303,7 @@ void PointerType::typeBecameConcrete(const DerivedType *AbsTy) {
}
bool SequentialType::indexValid(const Value *V) const {
- if (isa<IntegerType>(V->getType()))
+ if (V->getType()->isIntegerTy())
return true;
return false;
}
diff --git a/lib/VMCore/Value.cpp b/lib/VMCore/Value.cpp
index 3759b8a..a36d262 100644
--- a/lib/VMCore/Value.cpp
+++ b/lib/VMCore/Value.cpp
@@ -45,11 +45,11 @@ Value::Value(const Type *ty, unsigned scid)
UseList(0), Name(0) {
if (isa<CallInst>(this) || isa<InvokeInst>(this))
assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
- isa<OpaqueType>(ty) || VTy->getTypeID() == Type::StructTyID) &&
+ ty->isOpaqueTy() || VTy->isStructTy()) &&
"invalid CallInst type!");
else if (!isa<Constant>(this) && !isa<BasicBlock>(this))
assert((VTy->isFirstClassType() || VTy->isVoidTy() ||
- isa<OpaqueType>(ty)) &&
+ ty->isOpaqueTy()) &&
"Cannot create non-first-class values except for constants!");
}
@@ -320,7 +320,7 @@ void Value::replaceAllUsesWith(Value *New) {
}
Value *Value::stripPointerCasts() {
- if (!isa<PointerType>(getType()))
+ if (!getType()->isPointerTy())
return this;
Value *V = this;
do {
@@ -337,12 +337,12 @@ Value *Value::stripPointerCasts() {
} else {
return V;
}
- assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
} while (1);
}
Value *Value::getUnderlyingObject(unsigned MaxLookup) {
- if (!isa<PointerType>(getType()))
+ if (!getType()->isPointerTy())
return this;
Value *V = this;
for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) {
@@ -357,7 +357,7 @@ Value *Value::getUnderlyingObject(unsigned MaxLookup) {
} else {
return V;
}
- assert(isa<PointerType>(V->getType()) && "Unexpected operand type!");
+ assert(V->getType()->isPointerTy() && "Unexpected operand type!");
}
return V;
}
diff --git a/lib/VMCore/ValueTypes.cpp b/lib/VMCore/ValueTypes.cpp
index 62b9034..a092cd1 100644
--- a/lib/VMCore/ValueTypes.cpp
+++ b/lib/VMCore/ValueTypes.cpp
@@ -46,7 +46,7 @@ bool EVT::isExtendedInteger() const {
bool EVT::isExtendedVector() const {
assert(isExtended() && "Type is not extended!");
- return isa<VectorType>(LLVMTy);
+ return LLVMTy->isVectorTy();
}
bool EVT::isExtended64BitVector() const {
@@ -126,6 +126,7 @@ std::string EVT::getEVTString() const {
case MVT::v8f32: return "v8f32";
case MVT::v2f64: return "v2f64";
case MVT::v4f64: return "v4f64";
+ case MVT::Metadata:return "Metadata";
}
}
diff --git a/lib/VMCore/Verifier.cpp b/lib/VMCore/Verifier.cpp
index 2b4892b..35625a5 100644
--- a/lib/VMCore/Verifier.cpp
+++ b/lib/VMCore/Verifier.cpp
@@ -433,7 +433,7 @@ void Verifier::visitGlobalValue(GlobalValue &GV) {
if (GV.hasAppendingLinkage()) {
GlobalVariable *GVar = dyn_cast<GlobalVariable>(&GV);
- Assert1(GVar && isa<ArrayType>(GVar->getType()->getElementType()),
+ Assert1(GVar && GVar->getType()->getElementType()->isArrayTy(),
"Only global arrays can have appending linkage!", GVar);
}
}
@@ -609,7 +609,7 @@ void Verifier::visitFunction(Function &F) {
&F, FT);
Assert1(F.getReturnType()->isFirstClassType() ||
F.getReturnType()->isVoidTy() ||
- isa<StructType>(F.getReturnType()),
+ F.getReturnType()->isStructTy(),
"Functions cannot return aggregate values!", &F);
Assert1(!F.hasStructRetAttr() || F.getReturnType()->isVoidTy(),
@@ -838,7 +838,7 @@ void Verifier::visitTruncInst(TruncInst &I) {
Assert1(SrcTy->isIntOrIntVectorTy(), "Trunc only operates on integer", &I);
Assert1(DestTy->isIntOrIntVectorTy(), "Trunc only produces integer", &I);
- Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
"trunc source and destination must both be a vector or neither", &I);
Assert1(SrcBitSize > DestBitSize,"DestTy too big for Trunc", &I);
@@ -853,7 +853,7 @@ void Verifier::visitZExtInst(ZExtInst &I) {
// Get the size of the types in bits, we'll need this later
Assert1(SrcTy->isIntOrIntVectorTy(), "ZExt only operates on integer", &I);
Assert1(DestTy->isIntOrIntVectorTy(), "ZExt only produces an integer", &I);
- Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
"zext source and destination must both be a vector or neither", &I);
unsigned SrcBitSize = SrcTy->getScalarSizeInBits();
unsigned DestBitSize = DestTy->getScalarSizeInBits();
@@ -874,7 +874,7 @@ void Verifier::visitSExtInst(SExtInst &I) {
Assert1(SrcTy->isIntOrIntVectorTy(), "SExt only operates on integer", &I);
Assert1(DestTy->isIntOrIntVectorTy(), "SExt only produces an integer", &I);
- Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
"sext source and destination must both be a vector or neither", &I);
Assert1(SrcBitSize < DestBitSize,"Type too small for SExt", &I);
@@ -891,7 +891,7 @@ void Verifier::visitFPTruncInst(FPTruncInst &I) {
Assert1(SrcTy->isFPOrFPVectorTy(),"FPTrunc only operates on FP", &I);
Assert1(DestTy->isFPOrFPVectorTy(),"FPTrunc only produces an FP", &I);
- Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
"fptrunc source and destination must both be a vector or neither",&I);
Assert1(SrcBitSize > DestBitSize,"DestTy too big for FPTrunc", &I);
@@ -909,7 +909,7 @@ void Verifier::visitFPExtInst(FPExtInst &I) {
Assert1(SrcTy->isFPOrFPVectorTy(),"FPExt only operates on FP", &I);
Assert1(DestTy->isFPOrFPVectorTy(),"FPExt only produces an FP", &I);
- Assert1(isa<VectorType>(SrcTy) == isa<VectorType>(DestTy),
+ Assert1(SrcTy->isVectorTy() == DestTy->isVectorTy(),
"fpext source and destination must both be a vector or neither", &I);
Assert1(SrcBitSize < DestBitSize,"DestTy too small for FPExt", &I);
@@ -921,8 +921,8 @@ void Verifier::visitUIToFPInst(UIToFPInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
- bool SrcVec = isa<VectorType>(SrcTy);
- bool DstVec = isa<VectorType>(DestTy);
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
Assert1(SrcVec == DstVec,
"UIToFP source and dest must both be vector or scalar", &I);
@@ -944,8 +944,8 @@ void Verifier::visitSIToFPInst(SIToFPInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
- bool SrcVec = isa<VectorType>(SrcTy);
- bool DstVec = isa<VectorType>(DestTy);
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
Assert1(SrcVec == DstVec,
"SIToFP source and dest must both be vector or scalar", &I);
@@ -967,8 +967,8 @@ void Verifier::visitFPToUIInst(FPToUIInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
- bool SrcVec = isa<VectorType>(SrcTy);
- bool DstVec = isa<VectorType>(DestTy);
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
Assert1(SrcVec == DstVec,
"FPToUI source and dest must both be vector or scalar", &I);
@@ -990,8 +990,8 @@ void Verifier::visitFPToSIInst(FPToSIInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
- bool SrcVec = isa<VectorType>(SrcTy);
- bool DstVec = isa<VectorType>(DestTy);
+ bool SrcVec = SrcTy->isVectorTy();
+ bool DstVec = DestTy->isVectorTy();
Assert1(SrcVec == DstVec,
"FPToSI source and dest must both be vector or scalar", &I);
@@ -1013,7 +1013,7 @@ void Verifier::visitPtrToIntInst(PtrToIntInst &I) {
const Type *SrcTy = I.getOperand(0)->getType();
const Type *DestTy = I.getType();
- Assert1(isa<PointerType>(SrcTy), "PtrToInt source must be pointer", &I);
+ Assert1(SrcTy->isPointerTy(), "PtrToInt source must be pointer", &I);
Assert1(DestTy->isIntegerTy(), "PtrToInt result must be integral", &I);
visitInstruction(I);
@@ -1025,7 +1025,7 @@ void Verifier::visitIntToPtrInst(IntToPtrInst &I) {
const Type *DestTy = I.getType();
Assert1(SrcTy->isIntegerTy(), "IntToPtr source must be an integral", &I);
- Assert1(isa<PointerType>(DestTy), "IntToPtr result must be a pointer",&I);
+ Assert1(DestTy->isPointerTy(), "IntToPtr result must be a pointer",&I);
visitInstruction(I);
}
@@ -1041,7 +1041,7 @@ void Verifier::visitBitCastInst(BitCastInst &I) {
// BitCast implies a no-op cast of type only. No bits change.
// However, you can't cast pointers to anything but pointers.
- Assert1(isa<PointerType>(DestTy) == isa<PointerType>(DestTy),
+ Assert1(DestTy->isPointerTy() == DestTy->isPointerTy(),
"Bitcast requires both operands to be pointer or neither", &I);
Assert1(SrcBitSize == DestBitSize, "Bitcast requires types of same width",&I);
@@ -1084,11 +1084,11 @@ void Verifier::visitPHINode(PHINode &PN) {
void Verifier::VerifyCallSite(CallSite CS) {
Instruction *I = CS.getInstruction();
- Assert1(isa<PointerType>(CS.getCalledValue()->getType()),
+ Assert1(CS.getCalledValue()->getType()->isPointerTy(),
"Called function must be a pointer!", I);
const PointerType *FPTy = cast<PointerType>(CS.getCalledValue()->getType());
- Assert1(isa<FunctionType>(FPTy->getElementType()),
+ Assert1(FPTy->getElementType()->isFunctionTy(),
"Called function is not pointer to function type!", I);
const FunctionType *FTy = cast<FunctionType>(FPTy->getElementType());
@@ -1219,7 +1219,7 @@ void Verifier::visitICmpInst(ICmpInst& IC) {
Assert1(Op0Ty == Op1Ty,
"Both operands to ICmp instruction are not of the same type!", &IC);
// Check that the operands are the right type
- Assert1(Op0Ty->isIntOrIntVectorTy() || isa<PointerType>(Op0Ty),
+ Assert1(Op0Ty->isIntOrIntVectorTy() || Op0Ty->isPointerTy(),
"Invalid operand types for ICmp instruction", &IC);
visitInstruction(IC);
@@ -1286,7 +1286,7 @@ void Verifier::visitGetElementPtrInst(GetElementPtrInst &GEP) {
GetElementPtrInst::getIndexedType(GEP.getOperand(0)->getType(),
Idxs.begin(), Idxs.end());
Assert1(ElTy, "Invalid indices for GEP pointer type!", &GEP);
- Assert2(isa<PointerType>(GEP.getType()) &&
+ Assert2(GEP.getType()->isPointerTy() &&
cast<PointerType>(GEP.getType())->getElementType() == ElTy,
"GEP is not of right type for indices!", &GEP, ElTy);
visitInstruction(GEP);
@@ -1632,7 +1632,7 @@ void Verifier::visitIntrinsicFunctionCall(Intrinsic::ID ID, CallInst &CI) {
if (ID == Intrinsic::gcroot) {
AllocaInst *AI =
dyn_cast<AllocaInst>(CI.getOperand(1)->stripPointerCasts());
- Assert1(AI && isa<PointerType>(AI->getType()->getElementType()),
+ Assert1(AI && AI->getType()->getElementType()->isPointerTy(),
"llvm.gcroot parameter #1 must be a pointer alloca.", &CI);
Assert1(isa<Constant>(CI.getOperand(2)),
"llvm.gcroot parameter #2 must be a constant.", &CI);
@@ -1794,7 +1794,7 @@ bool Verifier::PerformTypeCheck(Intrinsic::ID ID, Function *F, const Type *Ty,
}
Suffix += ".v" + utostr(NumElts) + EVT::getEVT(EltTy).getEVTString();
} else if (VT == MVT::iPTR) {
- if (!isa<PointerType>(Ty)) {
+ if (!Ty->isPointerTy()) {
CheckFailed(IntrinsicParam(ArgNo, NumRets) + " is not a "
"pointer and a pointer is required.", F);
return false;
OpenPOWER on IntegriCloud