summaryrefslogtreecommitdiffstats
path: root/lib
diff options
context:
space:
mode:
authorrdivacky <rdivacky@FreeBSD.org>2009-12-15 18:09:07 +0000
committerrdivacky <rdivacky@FreeBSD.org>2009-12-15 18:09:07 +0000
commit40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6 (patch)
tree076117cdf3579003f07cad4cdf0593347ce58150 /lib
parente7908924d847e63b02bc82bfaa1709ab9c774dcd (diff)
downloadFreeBSD-src-40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6.zip
FreeBSD-src-40a6fcdb85efd93fe0e36c9552cfb0b18b5eacd6.tar.gz
Update LLVM to 91430.
Diffstat (limited to 'lib')
-rw-r--r--lib/Analysis/CMakeLists.txt1
-rw-r--r--lib/Analysis/CaptureTracking.cpp20
-rw-r--r--lib/Analysis/ConstantFolding.cpp15
-rw-r--r--lib/Analysis/DebugInfo.cpp68
-rw-r--r--lib/Analysis/IPA/Andersens.cpp2
-rw-r--r--lib/Analysis/IVUsers.cpp5
-rw-r--r--lib/Analysis/LoopInfo.cpp6
-rw-r--r--lib/Analysis/MemoryDependenceAnalysis.cpp525
-rw-r--r--lib/Analysis/PHITransAddr.cpp432
-rw-r--r--lib/Analysis/ProfileEstimatorPass.cpp146
-rw-r--r--lib/Analysis/ProfileInfo.cpp999
-rw-r--r--lib/Analysis/ProfileInfoLoaderPass.cpp56
-rw-r--r--lib/Analysis/ProfileVerifierPass.cpp519
-rw-r--r--lib/Analysis/ScalarEvolutionExpander.cpp6
-rw-r--r--lib/Analysis/ValueTracking.cpp2
-rw-r--r--lib/AsmParser/LLLexer.cpp1
-rw-r--r--lib/AsmParser/LLParser.cpp55
-rw-r--r--lib/AsmParser/LLParser.h4
-rw-r--r--lib/AsmParser/LLToken.h1
-rw-r--r--lib/Bitcode/Reader/Deserialize.cpp2
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.cpp27
-rw-r--r--lib/CodeGen/AggressiveAntiDepBreaker.h6
-rw-r--r--lib/CodeGen/AsmPrinter/AsmPrinter.cpp35
-rw-r--r--lib/CodeGen/AsmPrinter/DIE.h2
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.cpp411
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfDebug.h53
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.cpp13
-rw-r--r--lib/CodeGen/AsmPrinter/DwarfException.h1
-rw-r--r--lib/CodeGen/BranchFolding.cpp10
-rw-r--r--lib/CodeGen/CMakeLists.txt3
-rw-r--r--lib/CodeGen/CalcSpillWeights.cpp154
-rw-r--r--lib/CodeGen/CodePlacementOpt.cpp9
-rw-r--r--lib/CodeGen/CriticalAntiDepBreaker.cpp9
-rw-r--r--lib/CodeGen/LLVMTargetMachine.cpp12
-rw-r--r--lib/CodeGen/LiveInterval.cpp2
-rw-r--r--lib/CodeGen/LiveIntervalAnalysis.cpp119
-rw-r--r--lib/CodeGen/LiveVariables.cpp45
-rw-r--r--lib/CodeGen/LowerSubregs.cpp2
-rw-r--r--lib/CodeGen/MachineBasicBlock.cpp39
-rw-r--r--lib/CodeGen/MachineFunction.cpp4
-rw-r--r--lib/CodeGen/MachineInstr.cpp25
-rw-r--r--lib/CodeGen/MachineLoopInfo.cpp6
-rw-r--r--lib/CodeGen/MachineSSAUpdater.cpp393
-rw-r--r--lib/CodeGen/MachineVerifier.cpp9
-rw-r--r--lib/CodeGen/MaxStackAlignment.cpp70
-rw-r--r--lib/CodeGen/PHIElimination.cpp56
-rw-r--r--lib/CodeGen/PHIElimination.h6
-rw-r--r--lib/CodeGen/PostRASchedulerList.cpp12
-rw-r--r--lib/CodeGen/PreAllocSplitting.cpp6
-rw-r--r--lib/CodeGen/PrologEpilogInserter.cpp6
-rw-r--r--lib/CodeGen/RegAllocLinearScan.cpp115
-rw-r--r--lib/CodeGen/RegAllocPBQP.cpp2
-rw-r--r--lib/CodeGen/RegisterScavenging.cpp2
-rw-r--r--lib/CodeGen/SelectionDAG/DAGCombiner.cpp134
-rw-r--r--lib/CodeGen/SelectionDAG/FastISel.cpp12
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeDAG.cpp15
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp94
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeTypes.h4
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp3
-rw-r--r--lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp29
-rw-r--r--lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp6
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAG.cpp366
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp5
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp196
-rw-r--r--lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp24
-rw-r--r--lib/CodeGen/SelectionDAG/TargetLowering.cpp46
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.cpp177
-rw-r--r--lib/CodeGen/SimpleRegisterCoalescing.h4
-rw-r--r--lib/CodeGen/Spiller.cpp340
-rw-r--r--lib/CodeGen/Spiller.h4
-rw-r--r--lib/CodeGen/StackSlotColoring.cpp2
-rw-r--r--lib/CodeGen/TailDuplication.cpp460
-rw-r--r--lib/CodeGen/TargetInstrInfoImpl.cpp2
-rw-r--r--lib/CodeGen/TwoAddressInstructionPass.cpp8
-rw-r--r--lib/CodeGen/VirtRegRewriter.cpp28
-rw-r--r--lib/ExecutionEngine/JIT/JIT.cpp4
-rw-r--r--lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp1
-rw-r--r--lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp20
-rw-r--r--lib/Support/CMakeLists.txt1
-rw-r--r--lib/Support/CommandLine.cpp3
-rw-r--r--lib/Support/DeltaAlgorithm.cpp114
-rw-r--r--lib/Support/MemoryBuffer.cpp6
-rw-r--r--lib/Support/raw_ostream.cpp3
-rw-r--r--lib/System/Atomic.cpp2
-rw-r--r--lib/System/Host.cpp10
-rw-r--r--lib/System/Unix/Path.inc2
-rw-r--r--lib/Target/ARM/ARM.h1
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.cpp9
-rw-r--r--lib/Target/ARM/ARMBaseInstrInfo.h5
-rw-r--r--lib/Target/ARM/ARMBaseRegisterInfo.cpp96
-rw-r--r--lib/Target/ARM/ARMConstantIslandPass.cpp22
-rw-r--r--lib/Target/ARM/ARMExpandPseudoInsts.cpp2
-rw-r--r--lib/Target/ARM/ARMISelLowering.cpp341
-rw-r--r--lib/Target/ARM/ARMISelLowering.h12
-rw-r--r--lib/Target/ARM/ARMInstrFormats.td52
-rw-r--r--lib/Target/ARM/ARMInstrInfo.cpp19
-rw-r--r--lib/Target/ARM/ARMInstrInfo.h3
-rw-r--r--lib/Target/ARM/ARMInstrInfo.td198
-rw-r--r--lib/Target/ARM/ARMInstrNEON.td4
-rw-r--r--lib/Target/ARM/ARMInstrThumb.td29
-rw-r--r--lib/Target/ARM/ARMInstrThumb2.td62
-rw-r--r--lib/Target/ARM/ARMLoadStoreOptimizer.cpp6
-rw-r--r--lib/Target/ARM/ARMTargetMachine.cpp2
-rw-r--r--lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp1
-rw-r--r--lib/Target/ARM/NEONMoveFix.cpp2
-rw-r--r--lib/Target/ARM/NEONPreAllocPass.cpp2
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.cpp19
-rw-r--r--lib/Target/ARM/Thumb1InstrInfo.h3
-rw-r--r--lib/Target/ARM/Thumb1RegisterInfo.cpp2
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.cpp24
-rw-r--r--lib/Target/ARM/Thumb2InstrInfo.h3
-rw-r--r--lib/Target/ARM/Thumb2SizeReduction.cpp2
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.cpp12
-rw-r--r--lib/Target/Alpha/AlphaInstrInfo.h1
-rw-r--r--lib/Target/CBackend/CBackend.cpp4
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.cpp4
-rw-r--r--lib/Target/CellSPU/SPUInstrInfo.h3
-rw-r--r--lib/Target/CppBackend/CPPBackend.cpp13
-rw-r--r--lib/Target/MSP430/MSP430ISelDAGToDAG.cpp4
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.cpp334
-rw-r--r--lib/Target/MSP430/MSP430ISelLowering.h20
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.cpp15
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.h1
-rw-r--r--lib/Target/MSP430/MSP430InstrInfo.td100
-rw-r--r--lib/Target/MSP430/MSP430MachineFunctionInfo.h8
-rw-r--r--lib/Target/MSP430/MSP430RegisterInfo.cpp33
-rw-r--r--lib/Target/Mips/MipsInstrInfo.cpp16
-rw-r--r--lib/Target/Mips/MipsInstrInfo.h1
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.cpp12
-rw-r--r--lib/Target/PowerPC/PPCInstrInfo.h1
-rw-r--r--lib/Target/README.txt115
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.cpp16
-rw-r--r--lib/Target/SystemZ/SystemZInstrInfo.h1
-rw-r--r--lib/Target/SystemZ/SystemZRegisterInfo.cpp2
-rw-r--r--lib/Target/TargetData.cpp126
-rw-r--r--lib/Target/X86/X86.h5
-rw-r--r--lib/Target/X86/X86COFFMachineModuleInfo.h1
-rw-r--r--lib/Target/X86/X86CallingConv.td9
-rw-r--r--lib/Target/X86/X86FloatingPoint.cpp2
-rw-r--r--lib/Target/X86/X86ISelLowering.cpp190
-rw-r--r--lib/Target/X86/X86ISelLowering.h8
-rw-r--r--lib/Target/X86/X86Instr64bit.td9
-rw-r--r--lib/Target/X86/X86InstrInfo.cpp305
-rw-r--r--lib/Target/X86/X86InstrInfo.h25
-rw-r--r--lib/Target/X86/X86InstrInfo.td24
-rw-r--r--lib/Target/X86/X86InstrSSE.td2
-rw-r--r--lib/Target/X86/X86RegisterInfo.cpp68
-rw-r--r--lib/Target/X86/X86Subtarget.cpp2
-rw-r--r--lib/Target/X86/X86TargetMachine.cpp2
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.cpp20
-rw-r--r--lib/Target/XCore/XCoreInstrInfo.h2
-rw-r--r--lib/Transforms/IPO/GlobalOpt.cpp41
-rw-r--r--lib/Transforms/Instrumentation/MaximumSpanningTree.h13
-rw-r--r--lib/Transforms/Scalar/CodeGenPrepare.cpp3
-rw-r--r--lib/Transforms/Scalar/DeadStoreElimination.cpp42
-rw-r--r--lib/Transforms/Scalar/GVN.cpp445
-rw-r--r--lib/Transforms/Scalar/InstructionCombining.cpp46
-rw-r--r--lib/Transforms/Scalar/JumpThreading.cpp5
-rw-r--r--lib/Transforms/Scalar/LICM.cpp11
-rw-r--r--lib/Transforms/Scalar/LoopStrengthReduce.cpp102
-rw-r--r--lib/Transforms/Scalar/LoopUnswitch.cpp3
-rw-r--r--lib/Transforms/Scalar/SCCVN.cpp5
-rw-r--r--lib/Transforms/Scalar/ScalarReplAggregates.cpp41
-rw-r--r--lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp2
-rw-r--r--lib/Transforms/Scalar/SimplifyLibCalls.cpp34
-rw-r--r--lib/Transforms/Utils/BasicBlockUtils.cpp1
-rw-r--r--lib/Transforms/Utils/Local.cpp62
-rw-r--r--lib/Transforms/Utils/LowerSwitch.cpp2
-rw-r--r--lib/Transforms/Utils/PromoteMemoryToRegister.cpp3
-rw-r--r--lib/Transforms/Utils/SSAUpdater.cpp6
-rw-r--r--lib/Transforms/Utils/SimplifyCFG.cpp63
-rw-r--r--lib/VMCore/AsmWriter.cpp27
-rw-r--r--lib/VMCore/BasicBlock.cpp2
-rw-r--r--lib/VMCore/Constants.cpp2
-rw-r--r--lib/VMCore/Function.cpp7
-rw-r--r--lib/VMCore/LLVMContextImpl.h2
-rw-r--r--lib/VMCore/Pass.cpp67
-rw-r--r--lib/VMCore/PassManager.cpp15
178 files changed, 7255 insertions, 2888 deletions
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index 0a83c3d..17c9b86 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -27,6 +27,7 @@ add_llvm_library(LLVMAnalysis
LoopPass.cpp
MemoryBuiltins.cpp
MemoryDependenceAnalysis.cpp
+ PHITransAddr.cpp
PointerTracking.cpp
PostDominators.cpp
ProfileEstimatorPass.cpp
diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp
index a276c64..10a8b11 100644
--- a/lib/Analysis/CaptureTracking.cpp
+++ b/lib/Analysis/CaptureTracking.cpp
@@ -25,6 +25,16 @@
#include "llvm/Support/CallSite.h"
using namespace llvm;
+/// As its comment mentions, PointerMayBeCaptured can be expensive.
+/// However, it's not easy for BasicAA to cache the result, because
+/// it's an ImmutablePass. To work around this, bound queries at a
+/// fixed number of uses.
+///
+/// TODO: Write a new FunctionPass AliasAnalysis so that it can keep
+/// a cache. Then we can move the code from BasicAliasAnalysis into
+/// that path, and remove this threshold.
+static int const Threshold = 20;
+
/// PointerMayBeCaptured - Return true if this pointer value may be captured
/// by the enclosing function (which is required to exist). This routine can
/// be expensive, so consider caching the results. The boolean ReturnCaptures
@@ -35,11 +45,17 @@ using namespace llvm;
bool llvm::PointerMayBeCaptured(const Value *V,
bool ReturnCaptures, bool StoreCaptures) {
assert(isa<PointerType>(V->getType()) && "Capture is for pointers only!");
- SmallVector<Use*, 16> Worklist;
- SmallSet<Use*, 16> Visited;
+ SmallVector<Use*, Threshold> Worklist;
+ SmallSet<Use*, Threshold> Visited;
+ int Count = 0;
for (Value::use_const_iterator UI = V->use_begin(), UE = V->use_end();
UI != UE; ++UI) {
+ // If there are lots of uses, conservatively say that the value
+ // is captured to avoid taking too much compile time.
+ if (Count++ >= Threshold)
+ return true;
+
Use *U = &UI.getUse();
Visited.insert(U);
Worklist.push_back(U);
diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp
index 96f738e..eaf90d0 100644
--- a/lib/Analysis/ConstantFolding.cpp
+++ b/lib/Analysis/ConstantFolding.cpp
@@ -432,7 +432,7 @@ Constant *llvm::ConstantFoldLoadFromConstPtr(Constant *C,
// Instead of loading constant c string, use corresponding integer value
// directly if string length is small enough.
std::string Str;
- if (TD && GetConstantStringInfo(CE->getOperand(0), Str) && !Str.empty()) {
+ if (TD && GetConstantStringInfo(CE, Str) && !Str.empty()) {
unsigned StrLen = Str.length();
const Type *Ty = cast<PointerType>(CE->getType())->getElementType();
unsigned NumBits = Ty->getPrimitiveSizeInBits();
@@ -569,9 +569,16 @@ static Constant *SymbolicallyEvaluateGEP(Constant *const *Ops, unsigned NumOps,
SmallVector<Constant*, 32> NewIdxs;
do {
if (const SequentialType *ATy = dyn_cast<SequentialType>(Ty)) {
- // The only pointer indexing we'll do is on the first index of the GEP.
- if (isa<PointerType>(ATy) && !NewIdxs.empty())
- break;
+ if (isa<PointerType>(ATy)) {
+ // The only pointer indexing we'll do is on the first index of the GEP.
+ if (!NewIdxs.empty())
+ break;
+
+ // Only handle pointers to sized types, not pointers to functions.
+ if (!ATy->getElementType()->isSized())
+ return 0;
+ }
+
// Determine which element of the array the offset points into.
APInt ElemSize(BitWidth, TD->getTypeAllocSize(ATy->getElementType()));
if (ElemSize == 0)
diff --git a/lib/Analysis/DebugInfo.cpp b/lib/Analysis/DebugInfo.cpp
index 41d803c..1c9f500 100644
--- a/lib/Analysis/DebugInfo.cpp
+++ b/lib/Analysis/DebugInfo.cpp
@@ -866,7 +866,9 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
DICompileUnit CompileUnit,
unsigned LineNo, DIType Type,
bool isLocalToUnit,
- bool isDefinition) {
+ bool isDefinition,
+ unsigned VK, unsigned VIndex,
+ DIType ContainingType) {
Value *Elts[] = {
GetTagConstant(dwarf::DW_TAG_subprogram),
@@ -879,9 +881,38 @@ DISubprogram DIFactory::CreateSubprogram(DIDescriptor Context,
ConstantInt::get(Type::getInt32Ty(VMContext), LineNo),
Type.getNode(),
ConstantInt::get(Type::getInt1Ty(VMContext), isLocalToUnit),
- ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition)
+ ConstantInt::get(Type::getInt1Ty(VMContext), isDefinition),
+ ConstantInt::get(Type::getInt32Ty(VMContext), (unsigned)VK),
+ ConstantInt::get(Type::getInt32Ty(VMContext), VIndex),
+ ContainingType.getNode()
};
- return DISubprogram(MDNode::get(VMContext, &Elts[0], 11));
+ return DISubprogram(MDNode::get(VMContext, &Elts[0], 14));
+}
+
+/// CreateSubprogramDefinition - Create new subprogram descriptor for the
+/// given declaration.
+DISubprogram DIFactory::CreateSubprogramDefinition(DISubprogram &SPDeclaration) {
+ if (SPDeclaration.isDefinition())
+ return DISubprogram(SPDeclaration.getNode());
+
+ MDNode *DeclNode = SPDeclaration.getNode();
+ Value *Elts[] = {
+ GetTagConstant(dwarf::DW_TAG_subprogram),
+ llvm::Constant::getNullValue(Type::getInt32Ty(VMContext)),
+ DeclNode->getElement(2), // Context
+ DeclNode->getElement(3), // Name
+ DeclNode->getElement(4), // DisplayName
+ DeclNode->getElement(5), // LinkageName
+ DeclNode->getElement(6), // CompileUnit
+ DeclNode->getElement(7), // LineNo
+ DeclNode->getElement(8), // Type
+ DeclNode->getElement(9), // isLocalToUnit
+ ConstantInt::get(Type::getInt1Ty(VMContext), true),
+ DeclNode->getElement(11), // Virtuality
+ DeclNode->getElement(12), // VIndex
+ DeclNode->getElement(13) // Containting Type
+ };
+ return DISubprogram(MDNode::get(VMContext, &Elts[0], 14));
}
/// CreateGlobalVariable - Create a new descriptor for the specified global.
@@ -1019,6 +1050,37 @@ Instruction *DIFactory::InsertDeclare(Value *Storage, DIVariable D,
return CallInst::Create(DeclareFn, Args, Args+2, "", InsertAtEnd);
}
+/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset,
+ DIVariable D,
+ Instruction *InsertBefore) {
+ assert(V && "no value passed to dbg.value");
+ assert(Offset->getType() == Type::getInt64Ty(V->getContext()) &&
+ "offset must be i64");
+ if (!ValueFn)
+ ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+ Value *Elts[] = { V };
+ Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset,
+ D.getNode() };
+ return CallInst::Create(ValueFn, Args, Args+3, "", InsertBefore);
+}
+
+/// InsertDbgValueIntrinsic - Insert a new llvm.dbg.value intrinsic call.
+Instruction *DIFactory::InsertDbgValueIntrinsic(Value *V, Value *Offset,
+ DIVariable D,
+ BasicBlock *InsertAtEnd) {
+ assert(V && "no value passed to dbg.value");
+ assert(Offset->getType() == Type::getInt64Ty(V->getContext()) &&
+ "offset must be i64");
+ if (!ValueFn)
+ ValueFn = Intrinsic::getDeclaration(&M, Intrinsic::dbg_value);
+
+ Value *Elts[] = { V };
+ Value *Args[] = { MDNode::get(V->getContext(), Elts, 1), Offset,
+ D.getNode() };
+ return CallInst::Create(ValueFn, Args, Args+3, "", InsertAtEnd);
+}
//===----------------------------------------------------------------------===//
// DebugInfoFinder implementations.
diff --git a/lib/Analysis/IPA/Andersens.cpp b/lib/Analysis/IPA/Andersens.cpp
index e12db81..4d5b312 100644
--- a/lib/Analysis/IPA/Andersens.cpp
+++ b/lib/Analysis/IPA/Andersens.cpp
@@ -121,8 +121,6 @@ namespace {
return *LHS == *RHS;
}
-
- static bool isPod() { return true; }
};
class Andersens : public ModulePass, public AliasAnalysis,
diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp
index 37747b6..627dbbb 100644
--- a/lib/Analysis/IVUsers.cpp
+++ b/lib/Analysis/IVUsers.cpp
@@ -53,7 +53,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
if (newLoop == L)
return false;
// if newLoop is an outer loop of L, this is OK.
- if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop))
+ if (newLoop->contains(L->getHeader()))
return false;
}
return true;
@@ -307,6 +307,7 @@ bool IVUsers::runOnLoop(Loop *l, LPPassManager &LPM) {
for (BasicBlock::iterator I = L->getHeader()->begin(); isa<PHINode>(I); ++I)
AddUsersIfInteresting(I);
+ Processed.clear();
return false;
}
@@ -369,7 +370,7 @@ void IVUsers::dump() const {
void IVUsers::releaseMemory() {
IVUsesByStride.clear();
StrideOrder.clear();
- Processed.clear();
+ IVUses.clear();
}
void IVStrideUse::deleted() {
diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp
index 4de756c..34089ee 100644
--- a/lib/Analysis/LoopInfo.cpp
+++ b/lib/Analysis/LoopInfo.cpp
@@ -316,12 +316,12 @@ bool Loop::hasDedicatedExits() const {
/// getUniqueExitBlocks - Return all unique successor blocks of this loop.
/// These are the blocks _outside of the current loop_ which are branched to.
-/// This assumes that loop is in canonical form.
+/// This assumes that loop exits are in canonical form.
///
void
Loop::getUniqueExitBlocks(SmallVectorImpl<BasicBlock *> &ExitBlocks) const {
- assert(isLoopSimplifyForm() &&
- "getUniqueExitBlocks assumes the loop is in canonical form!");
+ assert(hasDedicatedExits() &&
+ "getUniqueExitBlocks assumes the loop has canonical form exits!");
// Sort the blocks vector so that we can use binary search to do quick
// lookups.
diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp
index ae6f970..a0c7706 100644
--- a/lib/Analysis/MemoryDependenceAnalysis.cpp
+++ b/lib/Analysis/MemoryDependenceAnalysis.cpp
@@ -23,6 +23,7 @@
#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/InstructionSimplify.h"
#include "llvm/Analysis/MemoryBuiltins.h"
+#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/ADT/Statistic.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/Support/PredIteratorCache.h"
@@ -172,7 +173,7 @@ MemDepResult MemoryDependenceAnalysis::
getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
BasicBlock::iterator ScanIt, BasicBlock *BB) {
- Value *invariantTag = 0;
+ Value *InvariantTag = 0;
// Walk backwards through the basic block, looking for dependencies.
while (ScanIt != BB->begin()) {
@@ -180,34 +181,36 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
// If we're in an invariant region, no dependencies can be found before
// we pass an invariant-begin marker.
- if (invariantTag == Inst) {
- invariantTag = 0;
+ if (InvariantTag == Inst) {
+ InvariantTag = 0;
continue;
- } else if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ }
+
+ if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
+ // Debug intrinsics don't cause dependences.
+ if (isa<DbgInfoIntrinsic>(Inst)) continue;
+
// If we pass an invariant-end marker, then we've just entered an
// invariant region and can start ignoring dependencies.
if (II->getIntrinsicID() == Intrinsic::invariant_end) {
- uint64_t invariantSize = ~0ULL;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(2)))
- invariantSize = CI->getZExtValue();
-
- AliasAnalysis::AliasResult R =
- AA->alias(II->getOperand(3), invariantSize, MemPtr, MemSize);
+ // FIXME: This only considers queries directly on the invariant-tagged
+ // pointer, not on query pointers that are indexed off of them. It'd
+ // be nice to handle that at some point.
+ AliasAnalysis::AliasResult R =
+ AA->alias(II->getOperand(3), ~0U, MemPtr, ~0U);
if (R == AliasAnalysis::MustAlias) {
- invariantTag = II->getOperand(1);
+ InvariantTag = II->getOperand(1);
continue;
}
// If we reach a lifetime begin or end marker, then the query ends here
// because the value is undefined.
- } else if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- uint64_t invariantSize = ~0ULL;
- if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(1)))
- invariantSize = CI->getZExtValue();
-
+ } else if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
+ // FIXME: This only considers queries directly on the invariant-tagged
+ // pointer, not on query pointers that are indexed off of them. It'd
+ // be nice to handle that at some point.
AliasAnalysis::AliasResult R =
- AA->alias(II->getOperand(2), invariantSize, MemPtr, MemSize);
+ AA->alias(II->getOperand(2), ~0U, MemPtr, ~0U);
if (R == AliasAnalysis::MustAlias)
return MemDepResult::getDef(II);
}
@@ -215,10 +218,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
// If we're querying on a load and we're in an invariant region, we're done
// at this point. Nothing a load depends on can live in an invariant region.
- if (isLoad && invariantTag) continue;
-
- // Debug intrinsics don't cause dependences.
- if (isa<DbgInfoIntrinsic>(Inst)) continue;
+ if (isLoad && InvariantTag) continue;
// Values depend on loads if the pointers are must aliased. This means that
// a load depends on another must aliased load from the same value.
@@ -243,7 +243,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
if (StoreInst *SI = dyn_cast<StoreInst>(Inst)) {
// There can't be stores to the value we care about inside an
// invariant region.
- if (invariantTag) continue;
+ if (InvariantTag) continue;
// If alias analysis can tell that this store is guaranteed to not modify
// the query pointer, ignore it. Use getModRefInfo to handle cases where
@@ -292,7 +292,7 @@ getPointerDependencyFrom(Value *MemPtr, uint64_t MemSize, bool isLoad,
case AliasAnalysis::Mod:
// If we're in an invariant region, we can ignore calls that ONLY
// modify the pointer.
- if (invariantTag) continue;
+ if (InvariantTag) continue;
return MemDepResult::getClobber(Inst);
case AliasAnalysis::Ref:
// If the call is known to never store to the pointer, and if this is a
@@ -374,21 +374,22 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) {
IntrinsicID = II->getIntrinsicID();
switch (IntrinsicID) {
- case Intrinsic::lifetime_start:
- case Intrinsic::lifetime_end:
- case Intrinsic::invariant_start:
- MemPtr = QueryInst->getOperand(2);
- MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue();
- break;
- case Intrinsic::invariant_end:
- MemPtr = QueryInst->getOperand(3);
- MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue();
- break;
- default:
- CallSite QueryCS = CallSite::get(QueryInst);
- bool isReadOnly = AA->onlyReadsMemory(QueryCS);
- LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
- QueryParent);
+ case Intrinsic::lifetime_start:
+ case Intrinsic::lifetime_end:
+ case Intrinsic::invariant_start:
+ MemPtr = QueryInst->getOperand(2);
+ MemSize = cast<ConstantInt>(QueryInst->getOperand(1))->getZExtValue();
+ break;
+ case Intrinsic::invariant_end:
+ MemPtr = QueryInst->getOperand(3);
+ MemSize = cast<ConstantInt>(QueryInst->getOperand(2))->getZExtValue();
+ break;
+ default:
+ CallSite QueryCS = CallSite::get(QueryInst);
+ bool isReadOnly = AA->onlyReadsMemory(QueryCS);
+ LocalCache = getCallSiteDependencyFrom(QueryCS, isReadOnly, ScanPos,
+ QueryParent);
+ break;
}
} else {
// Non-memory instruction.
@@ -421,7 +422,7 @@ static void AssertSorted(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
if (Count == 0) return;
for (unsigned i = 1; i != unsigned(Count); ++i)
- assert(Cache[i-1] <= Cache[i] && "Cache isn't sorted!");
+ assert(!(Cache[i] < Cache[i-1]) && "Cache isn't sorted!");
}
#endif
@@ -462,8 +463,8 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
// determine what is dirty, seeding our initial DirtyBlocks worklist.
for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end();
I != E; ++I)
- if (I->second.isDirty())
- DirtyBlocks.push_back(I->first);
+ if (I->getResult().isDirty())
+ DirtyBlocks.push_back(I->getBB());
// Sort the cache so that we can do fast binary search lookups below.
std::sort(Cache.begin(), Cache.end());
@@ -501,27 +502,27 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
DEBUG(AssertSorted(Cache, NumSortedEntries));
NonLocalDepInfo::iterator Entry =
std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries,
- std::make_pair(DirtyBB, MemDepResult()));
- if (Entry != Cache.begin() && prior(Entry)->first == DirtyBB)
+ NonLocalDepEntry(DirtyBB));
+ if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB)
--Entry;
- MemDepResult *ExistingResult = 0;
+ NonLocalDepEntry *ExistingResult = 0;
if (Entry != Cache.begin()+NumSortedEntries &&
- Entry->first == DirtyBB) {
+ Entry->getBB() == DirtyBB) {
// If we already have an entry, and if it isn't already dirty, the block
// is done.
- if (!Entry->second.isDirty())
+ if (!Entry->getResult().isDirty())
continue;
// Otherwise, remember this slot so we can update the value.
- ExistingResult = &Entry->second;
+ ExistingResult = &*Entry;
}
// If the dirty entry has a pointer, start scanning from it so we don't have
// to rescan the entire block.
BasicBlock::iterator ScanPos = DirtyBB->end();
if (ExistingResult) {
- if (Instruction *Inst = ExistingResult->getInst()) {
+ if (Instruction *Inst = ExistingResult->getResult().getInst()) {
ScanPos = Inst;
// We're removing QueryInst's use of Inst.
RemoveFromReverseMap(ReverseNonLocalDeps, Inst,
@@ -545,9 +546,9 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) {
// If we had a dirty entry for the block, update it. Otherwise, just add
// a new entry.
if (ExistingResult)
- *ExistingResult = Dep;
+ ExistingResult->setResult(Dep, 0);
else
- Cache.push_back(std::make_pair(DirtyBB, Dep));
+ Cache.push_back(NonLocalDepEntry(DirtyBB, Dep, 0));
// If the block has a dependency (i.e. it isn't completely transparent to
// the value), remember the association!
@@ -587,17 +588,20 @@ getNonLocalPointerDependency(Value *Pointer, bool isLoad, BasicBlock *FromBB,
const Type *EltTy = cast<PointerType>(Pointer->getType())->getElementType();
uint64_t PointeeSize = AA->getTypeStoreSize(EltTy);
+ PHITransAddr Address(Pointer, TD);
+
// This is the set of blocks we've inspected, and the pointer we consider in
// each block. Because of critical edges, we currently bail out if querying
// a block with multiple different pointers. This can happen during PHI
// translation.
DenseMap<BasicBlock*, Value*> Visited;
- if (!getNonLocalPointerDepFromBB(Pointer, PointeeSize, isLoad, FromBB,
+ if (!getNonLocalPointerDepFromBB(Address, PointeeSize, isLoad, FromBB,
Result, Visited, true))
return;
Result.clear();
- Result.push_back(std::make_pair(FromBB,
- MemDepResult::getClobber(FromBB->begin())));
+ Result.push_back(NonLocalDepEntry(FromBB,
+ MemDepResult::getClobber(FromBB->begin()),
+ Pointer));
}
/// GetNonLocalInfoForBlock - Compute the memdep value for BB with
@@ -613,30 +617,30 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
// the cache set. If so, find it.
NonLocalDepInfo::iterator Entry =
std::upper_bound(Cache->begin(), Cache->begin()+NumSortedEntries,
- std::make_pair(BB, MemDepResult()));
- if (Entry != Cache->begin() && prior(Entry)->first == BB)
+ NonLocalDepEntry(BB));
+ if (Entry != Cache->begin() && (Entry-1)->getBB() == BB)
--Entry;
- MemDepResult *ExistingResult = 0;
- if (Entry != Cache->begin()+NumSortedEntries && Entry->first == BB)
- ExistingResult = &Entry->second;
+ NonLocalDepEntry *ExistingResult = 0;
+ if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB)
+ ExistingResult = &*Entry;
// If we have a cached entry, and it is non-dirty, use it as the value for
// this dependency.
- if (ExistingResult && !ExistingResult->isDirty()) {
+ if (ExistingResult && !ExistingResult->getResult().isDirty()) {
++NumCacheNonLocalPtr;
- return *ExistingResult;
+ return ExistingResult->getResult();
}
// Otherwise, we have to scan for the value. If we have a dirty cache
// entry, start scanning from its position, otherwise we scan from the end
// of the block.
BasicBlock::iterator ScanPos = BB->end();
- if (ExistingResult && ExistingResult->getInst()) {
- assert(ExistingResult->getInst()->getParent() == BB &&
+ if (ExistingResult && ExistingResult->getResult().getInst()) {
+ assert(ExistingResult->getResult().getInst()->getParent() == BB &&
"Instruction invalidated?");
++NumCacheDirtyNonLocalPtr;
- ScanPos = ExistingResult->getInst();
+ ScanPos = ExistingResult->getResult().getInst();
// Eliminating the dirty entry from 'Cache', so update the reverse info.
ValueIsLoadPair CacheKey(Pointer, isLoad);
@@ -652,9 +656,9 @@ GetNonLocalInfoForBlock(Value *Pointer, uint64_t PointeeSize,
// If we had a dirty entry for the block, update it. Otherwise, just add
// a new entry.
if (ExistingResult)
- *ExistingResult = Dep;
+ ExistingResult->setResult(Dep, Pointer);
else
- Cache->push_back(std::make_pair(BB, Dep));
+ Cache->push_back(NonLocalDepEntry(BB, Dep, Pointer));
// If the block has a dependency (i.e. it isn't completely transparent to
// the value), remember the reverse association because we just added it
@@ -683,7 +687,7 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
break;
case 2: {
// Two new entries, insert the last one into place.
- MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
+ NonLocalDepEntry Val = Cache.back();
Cache.pop_back();
MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
std::upper_bound(Cache.begin(), Cache.end()-1, Val);
@@ -693,7 +697,7 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
case 1:
// One new entry, Just insert the new value at the appropriate position.
if (Cache.size() != 1) {
- MemoryDependenceAnalysis::NonLocalDepEntry Val = Cache.back();
+ NonLocalDepEntry Val = Cache.back();
Cache.pop_back();
MemoryDependenceAnalysis::NonLocalDepInfo::iterator Entry =
std::upper_bound(Cache.begin(), Cache.end(), Val);
@@ -707,275 +711,6 @@ SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache,
}
}
-/// isPHITranslatable - Return true if the specified computation is derived from
-/// a PHI node in the current block and if it is simple enough for us to handle.
-static bool isPHITranslatable(Instruction *Inst) {
- if (isa<PHINode>(Inst))
- return true;
-
- // We can handle bitcast of a PHI, but the PHI needs to be in the same block
- // as the bitcast.
- if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
- Instruction *OpI = dyn_cast<Instruction>(BC->getOperand(0));
- if (OpI == 0 || OpI->getParent() != Inst->getParent())
- return true;
- return isPHITranslatable(OpI);
- }
-
- // We can translate a GEP if all of its operands defined in this block are phi
- // translatable.
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
- for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
- Instruction *OpI = dyn_cast<Instruction>(GEP->getOperand(i));
- if (OpI == 0 || OpI->getParent() != Inst->getParent())
- continue;
-
- if (!isPHITranslatable(OpI))
- return false;
- }
- return true;
- }
-
- if (Inst->getOpcode() == Instruction::Add &&
- isa<ConstantInt>(Inst->getOperand(1))) {
- Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0));
- if (OpI == 0 || OpI->getParent() != Inst->getParent())
- return true;
- return isPHITranslatable(OpI);
- }
-
- // cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
- // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
- // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
-
- return false;
-}
-
-/// GetPHITranslatedValue - Given a computation that satisfied the
-/// isPHITranslatable predicate, see if we can translate the computation into
-/// the specified predecessor block. If so, return that value.
-Value *MemoryDependenceAnalysis::
-GetPHITranslatedValue(Value *InVal, BasicBlock *CurBB, BasicBlock *Pred,
- const TargetData *TD) const {
- // If the input value is not an instruction, or if it is not defined in CurBB,
- // then we don't need to phi translate it.
- Instruction *Inst = dyn_cast<Instruction>(InVal);
- if (Inst == 0 || Inst->getParent() != CurBB)
- return InVal;
-
- if (PHINode *PN = dyn_cast<PHINode>(Inst))
- return PN->getIncomingValueForBlock(Pred);
-
- // Handle bitcast of PHI.
- if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
- // PHI translate the input operand.
- Value *PHIIn = GetPHITranslatedValue(BC->getOperand(0), CurBB, Pred, TD);
- if (PHIIn == 0) return 0;
-
- // Constants are trivial to phi translate.
- if (Constant *C = dyn_cast<Constant>(PHIIn))
- return ConstantExpr::getBitCast(C, BC->getType());
-
- // Otherwise we have to see if a bitcasted version of the incoming pointer
- // is available. If so, we can use it, otherwise we have to fail.
- for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
- UI != E; ++UI) {
- if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
- if (BCI->getType() == BC->getType())
- return BCI;
- }
- return 0;
- }
-
- // Handle getelementptr with at least one PHI translatable operand.
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
- SmallVector<Value*, 8> GEPOps;
- BasicBlock *CurBB = GEP->getParent();
- for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
- Value *GEPOp = GEP->getOperand(i);
- // No PHI translation is needed of operands whose values are live in to
- // the predecessor block.
- if (!isa<Instruction>(GEPOp) ||
- cast<Instruction>(GEPOp)->getParent() != CurBB) {
- GEPOps.push_back(GEPOp);
- continue;
- }
-
- // If the operand is a phi node, do phi translation.
- Value *InOp = GetPHITranslatedValue(GEPOp, CurBB, Pred, TD);
- if (InOp == 0) return 0;
-
- GEPOps.push_back(InOp);
- }
-
- // Simplify the GEP to handle 'gep x, 0' -> x etc.
- if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD))
- return V;
-
- // Scan to see if we have this GEP available.
- Value *APHIOp = GEPOps[0];
- for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
- UI != E; ++UI) {
- if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
- if (GEPI->getType() == GEP->getType() &&
- GEPI->getNumOperands() == GEPOps.size() &&
- GEPI->getParent()->getParent() == CurBB->getParent()) {
- bool Mismatch = false;
- for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
- if (GEPI->getOperand(i) != GEPOps[i]) {
- Mismatch = true;
- break;
- }
- if (!Mismatch)
- return GEPI;
- }
- }
- return 0;
- }
-
- // Handle add with a constant RHS.
- if (Inst->getOpcode() == Instruction::Add &&
- isa<ConstantInt>(Inst->getOperand(1))) {
- // PHI translate the LHS.
- Value *LHS;
- Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
- Instruction *OpI = dyn_cast<Instruction>(Inst->getOperand(0));
- bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
- bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
-
- if (OpI == 0 || OpI->getParent() != Inst->getParent())
- LHS = Inst->getOperand(0);
- else {
- LHS = GetPHITranslatedValue(Inst->getOperand(0), CurBB, Pred, TD);
- if (LHS == 0)
- return 0;
- }
-
- // If the PHI translated LHS is an add of a constant, fold the immediates.
- if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
- if (BOp->getOpcode() == Instruction::Add)
- if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
- LHS = BOp->getOperand(0);
- RHS = ConstantExpr::getAdd(RHS, CI);
- isNSW = isNUW = false;
- }
-
- // See if the add simplifies away.
- if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD))
- return Res;
-
- // Otherwise, see if we have this add available somewhere.
- for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
- UI != E; ++UI) {
- if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
- if (BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
- BO->getParent()->getParent() == CurBB->getParent())
- return BO;
- }
-
- return 0;
- }
-
- return 0;
-}
-
-/// GetAvailablePHITranslatePointer - Return the value computed by
-/// PHITranslatePointer if it dominates PredBB, otherwise return null.
-Value *MemoryDependenceAnalysis::
-GetAvailablePHITranslatedValue(Value *V,
- BasicBlock *CurBB, BasicBlock *PredBB,
- const TargetData *TD,
- const DominatorTree &DT) const {
- // See if PHI translation succeeds.
- V = GetPHITranslatedValue(V, CurBB, PredBB, TD);
- if (V == 0) return 0;
-
- // Make sure the value is live in the predecessor.
- if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
- if (!DT.dominates(Inst->getParent(), PredBB))
- return 0;
- return V;
-}
-
-
-/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
-/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
-/// block. All newly created instructions are added to the NewInsts list.
-///
-Value *MemoryDependenceAnalysis::
-InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB,
- BasicBlock *PredBB, const TargetData *TD,
- const DominatorTree &DT,
- SmallVectorImpl<Instruction*> &NewInsts) const {
- // See if we have a version of this value already available and dominating
- // PredBB. If so, there is no need to insert a new copy.
- if (Value *Res = GetAvailablePHITranslatedValue(InVal, CurBB, PredBB, TD, DT))
- return Res;
-
- // If we don't have an available version of this value, it must be an
- // instruction.
- Instruction *Inst = cast<Instruction>(InVal);
-
- // Handle bitcast of PHI translatable value.
- if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
- Value *OpVal = InsertPHITranslatedPointer(BC->getOperand(0),
- CurBB, PredBB, TD, DT, NewInsts);
- if (OpVal == 0) return 0;
-
- // Otherwise insert a bitcast at the end of PredBB.
- BitCastInst *New = new BitCastInst(OpVal, InVal->getType(),
- InVal->getName()+".phi.trans.insert",
- PredBB->getTerminator());
- NewInsts.push_back(New);
- return New;
- }
-
- // Handle getelementptr with at least one PHI operand.
- if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
- SmallVector<Value*, 8> GEPOps;
- BasicBlock *CurBB = GEP->getParent();
- for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
- Value *OpVal = InsertPHITranslatedPointer(GEP->getOperand(i),
- CurBB, PredBB, TD, DT, NewInsts);
- if (OpVal == 0) return 0;
- GEPOps.push_back(OpVal);
- }
-
- GetElementPtrInst *Result =
- GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
- InVal->getName()+".phi.trans.insert",
- PredBB->getTerminator());
- Result->setIsInBounds(GEP->isInBounds());
- NewInsts.push_back(Result);
- return Result;
- }
-
-#if 0
- // FIXME: This code works, but it is unclear that we actually want to insert
- // a big chain of computation in order to make a value available in a block.
- // This needs to be evaluated carefully to consider its cost trade offs.
-
- // Handle add with a constant RHS.
- if (Inst->getOpcode() == Instruction::Add &&
- isa<ConstantInt>(Inst->getOperand(1))) {
- // PHI translate the LHS.
- Value *OpVal = InsertPHITranslatedPointer(Inst->getOperand(0),
- CurBB, PredBB, TD, DT, NewInsts);
- if (OpVal == 0) return 0;
-
- BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
- InVal->getName()+".phi.trans.insert",
- PredBB->getTerminator());
- Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
- Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
- NewInsts.push_back(Res);
- return Res;
- }
-#endif
-
- return 0;
-}
-
/// getNonLocalPointerDepFromBB - Perform a dependency query based on
/// pointer/pointeesize starting at the end of StartBB. Add any clobber/def
/// results to the results vector and keep track of which blocks are visited in
@@ -989,14 +724,14 @@ InsertPHITranslatedPointer(Value *InVal, BasicBlock *CurBB,
/// not compute dependence information for some reason. This should be treated
/// as a clobber dependence on the first instruction in the predecessor block.
bool MemoryDependenceAnalysis::
-getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
+getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, uint64_t PointeeSize,
bool isLoad, BasicBlock *StartBB,
SmallVectorImpl<NonLocalDepEntry> &Result,
DenseMap<BasicBlock*, Value*> &Visited,
bool SkipFirstBlock) {
// Look up the cached info for Pointer.
- ValueIsLoadPair CacheKey(Pointer, isLoad);
+ ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad);
std::pair<BBSkipFirstBlockPair, NonLocalDepInfo> *CacheInfo =
&NonLocalPointerDeps[CacheKey];
@@ -1013,8 +748,9 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
if (!Visited.empty()) {
for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
I != E; ++I) {
- DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->first);
- if (VI == Visited.end() || VI->second == Pointer) continue;
+ DenseMap<BasicBlock*, Value*>::iterator VI = Visited.find(I->getBB());
+ if (VI == Visited.end() || VI->second == Pointer.getAddr())
+ continue;
// We have a pointer mismatch in a block. Just return clobber, saying
// that something was clobbered in this result. We could also do a
@@ -1025,8 +761,8 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end();
I != E; ++I) {
- Visited.insert(std::make_pair(I->first, Pointer));
- if (!I->second.isNonLocal())
+ Visited.insert(std::make_pair(I->getBB(), Pointer.getAddr()));
+ if (!I->getResult().isNonLocal())
Result.push_back(*I);
}
++NumCacheCompleteNonLocalPtr;
@@ -1065,30 +801,27 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
// Get the dependency info for Pointer in BB. If we have cached
// information, we will use it, otherwise we compute it.
DEBUG(AssertSorted(*Cache, NumSortedEntries));
- MemDepResult Dep = GetNonLocalInfoForBlock(Pointer, PointeeSize, isLoad,
- BB, Cache, NumSortedEntries);
+ MemDepResult Dep = GetNonLocalInfoForBlock(Pointer.getAddr(), PointeeSize,
+ isLoad, BB, Cache,
+ NumSortedEntries);
// If we got a Def or Clobber, add this to the list of results.
if (!Dep.isNonLocal()) {
- Result.push_back(NonLocalDepEntry(BB, Dep));
+ Result.push_back(NonLocalDepEntry(BB, Dep, Pointer.getAddr()));
continue;
}
}
// If 'Pointer' is an instruction defined in this block, then we need to do
// phi translation to change it into a value live in the predecessor block.
- // If phi translation fails, then we can't continue dependence analysis.
- Instruction *PtrInst = dyn_cast<Instruction>(Pointer);
- bool NeedsPHITranslation = PtrInst && PtrInst->getParent() == BB;
-
- // If no PHI translation is needed, just add all the predecessors of this
- // block to scan them as well.
- if (!NeedsPHITranslation) {
+ // If not, we just add the predecessors to the worklist and scan them with
+ // the same Pointer.
+ if (!Pointer.NeedsPHITranslationFromBlock(BB)) {
SkipFirstBlock = false;
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
// Verify that we haven't looked at this block yet.
std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
- InsertRes = Visited.insert(std::make_pair(*PI, Pointer));
+ InsertRes = Visited.insert(std::make_pair(*PI, Pointer.getAddr()));
if (InsertRes.second) {
// First time we've looked at *PI.
Worklist.push_back(*PI);
@@ -1098,16 +831,17 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
// If we have seen this block before, but it was with a different
// pointer then we have a phi translation failure and we have to treat
// this as a clobber.
- if (InsertRes.first->second != Pointer)
+ if (InsertRes.first->second != Pointer.getAddr())
goto PredTranslationFailure;
}
continue;
}
- // If we do need to do phi translation, then there are a bunch of different
- // cases, because we have to find a Value* live in the predecessor block. We
- // know that PtrInst is defined in this block at least.
-
+ // We do need to do phi translation, if we know ahead of time we can't phi
+ // translate this value, don't even try.
+ if (!Pointer.IsPotentiallyPHITranslatable())
+ goto PredTranslationFailure;
+
// We may have added values to the cache list before this PHI translation.
// If so, we haven't done anything to ensure that the cache remains sorted.
// Sort it now (if needed) so that recursive invocations of
@@ -1117,19 +851,17 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
SortNonLocalDepInfoCache(*Cache, NumSortedEntries);
NumSortedEntries = Cache->size();
}
-
- // If this is a computation derived from a PHI node, use the suitably
- // translated incoming values for each pred as the phi translated version.
- if (!isPHITranslatable(PtrInst))
- goto PredTranslationFailure;
-
Cache = 0;
-
+
for (BasicBlock **PI = PredCache->GetPreds(BB); *PI; ++PI) {
BasicBlock *Pred = *PI;
- // Get the PHI translated pointer in this predecessor. This can fail and
- // return null if not translatable.
- Value *PredPtr = GetPHITranslatedValue(PtrInst, BB, Pred, TD);
+
+ // Get the PHI translated pointer in this predecessor. This can fail if
+ // not translatable, in which case the getAddr() returns null.
+ PHITransAddr PredPointer(Pointer);
+ PredPointer.PHITranslateValue(BB, Pred);
+
+ Value *PredPtrVal = PredPointer.getAddr();
// Check to see if we have already visited this pred block with another
// pointer. If so, we can't do this lookup. This failure can occur
@@ -1137,12 +869,12 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
// the successor translates to a pointer value different than the
// pointer the block was first analyzed with.
std::pair<DenseMap<BasicBlock*,Value*>::iterator, bool>
- InsertRes = Visited.insert(std::make_pair(Pred, PredPtr));
+ InsertRes = Visited.insert(std::make_pair(Pred, PredPtrVal));
if (!InsertRes.second) {
// If the predecessor was visited with PredPtr, then we already did
// the analysis and can ignore it.
- if (InsertRes.first->second == PredPtr)
+ if (InsertRes.first->second == PredPtrVal)
continue;
// Otherwise, the block was previously analyzed with a different
@@ -1155,10 +887,11 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
// predecessor, then we have to assume that the pointer is clobbered in
// that predecessor. We can still do PRE of the load, which would insert
// a computation of the pointer in this predecessor.
- if (PredPtr == 0) {
+ if (PredPtrVal == 0) {
// Add the entry to the Result list.
NonLocalDepEntry Entry(Pred,
- MemDepResult::getClobber(Pred->getTerminator()));
+ MemDepResult::getClobber(Pred->getTerminator()),
+ PredPtrVal);
Result.push_back(Entry);
// Add it to the cache for this CacheKey so that subsequent queries get
@@ -1167,27 +900,27 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
MemoryDependenceAnalysis::NonLocalDepInfo::iterator It =
std::upper_bound(Cache->begin(), Cache->end(), Entry);
- if (It != Cache->begin() && prior(It)->first == Pred)
+ if (It != Cache->begin() && (It-1)->getBB() == Pred)
--It;
- if (It == Cache->end() || It->first != Pred) {
+ if (It == Cache->end() || It->getBB() != Pred) {
Cache->insert(It, Entry);
// Add it to the reverse map.
ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
- } else if (!It->second.isDirty()) {
+ } else if (!It->getResult().isDirty()) {
// noop
- } else if (It->second.getInst() == Pred->getTerminator()) {
+ } else if (It->getResult().getInst() == Pred->getTerminator()) {
// Same instruction, clear the dirty marker.
- It->second = Entry.second;
- } else if (It->second.getInst() == 0) {
+ It->setResult(Entry.getResult(), PredPtrVal);
+ } else if (It->getResult().getInst() == 0) {
// Dirty, with no instruction, just add this.
- It->second = Entry.second;
+ It->setResult(Entry.getResult(), PredPtrVal);
ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
} else {
// Otherwise, dirty with a different instruction.
- RemoveFromReverseMap(ReverseNonLocalPtrDeps, It->second.getInst(),
- CacheKey);
- It->second = Entry.second;
+ RemoveFromReverseMap(ReverseNonLocalPtrDeps,
+ It->getResult().getInst(), CacheKey);
+ It->setResult(Entry.getResult(),PredPtrVal);
ReverseNonLocalPtrDeps[Pred->getTerminator()].insert(CacheKey);
}
Cache = 0;
@@ -1201,7 +934,7 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
// If we have a problem phi translating, fall through to the code below
// to handle the failure condition.
- if (getNonLocalPointerDepFromBB(PredPtr, PointeeSize, isLoad, Pred,
+ if (getNonLocalPointerDepFromBB(PredPointer, PointeeSize, isLoad, Pred,
Result, Visited))
goto PredTranslationFailure;
}
@@ -1245,12 +978,12 @@ getNonLocalPointerDepFromBB(Value *Pointer, uint64_t PointeeSize,
for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) {
assert(I != Cache->rend() && "Didn't find current block??");
- if (I->first != BB)
+ if (I->getBB() != BB)
continue;
- assert(I->second.isNonLocal() &&
+ assert(I->getResult().isNonLocal() &&
"Should only be here with transparent block");
- I->second = MemDepResult::getClobber(BB->begin());
+ I->setResult(MemDepResult::getClobber(BB->begin()), Pointer.getAddr());
ReverseNonLocalPtrDeps[BB->begin()].insert(CacheKey);
Result.push_back(*I);
break;
@@ -1276,9 +1009,9 @@ RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) {
NonLocalDepInfo &PInfo = It->second.second;
for (unsigned i = 0, e = PInfo.size(); i != e; ++i) {
- Instruction *Target = PInfo[i].second.getInst();
+ Instruction *Target = PInfo[i].getResult().getInst();
if (Target == 0) continue; // Ignore non-local dep results.
- assert(Target->getParent() == PInfo[i].first);
+ assert(Target->getParent() == PInfo[i].getBB());
// Eliminating the dirty entry from 'Cache', so update the reverse info.
RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P);
@@ -1315,7 +1048,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
NonLocalDepInfo &BlockMap = NLDI->second.first;
for (NonLocalDepInfo::iterator DI = BlockMap.begin(), DE = BlockMap.end();
DI != DE; ++DI)
- if (Instruction *Inst = DI->second.getInst())
+ if (Instruction *Inst = DI->getResult().getInst())
RemoveFromReverseMap(ReverseNonLocalDeps, Inst, RemInst);
NonLocalDeps.erase(NLDI);
}
@@ -1403,10 +1136,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
for (NonLocalDepInfo::iterator DI = INLD.first.begin(),
DE = INLD.first.end(); DI != DE; ++DI) {
- if (DI->second.getInst() != RemInst) continue;
+ if (DI->getResult().getInst() != RemInst) continue;
// Convert to a dirty entry for the subsequent instruction.
- DI->second = NewDirtyVal;
+ DI->setResult(NewDirtyVal, DI->getAddress());
if (Instruction *NextI = NewDirtyVal.getInst())
ReverseDepsToAdd.push_back(std::make_pair(NextI, *I));
@@ -1445,10 +1178,10 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) {
// Update any entries for RemInst to use the instruction after it.
for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end();
DI != DE; ++DI) {
- if (DI->second.getInst() != RemInst) continue;
+ if (DI->getResult().getInst() != RemInst) continue;
// Convert to a dirty entry for the subsequent instruction.
- DI->second = NewDirtyVal;
+ DI->setResult(NewDirtyVal, DI->getAddress());
if (Instruction *NewDirtyInst = NewDirtyVal.getInst())
ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P));
@@ -1489,7 +1222,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
const NonLocalDepInfo &Val = I->second.second;
for (NonLocalDepInfo::const_iterator II = Val.begin(), E = Val.end();
II != E; ++II)
- assert(II->second.getInst() != D && "Inst occurs as NLPD value");
+ assert(II->getResult().getInst() != D && "Inst occurs as NLPD value");
}
for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(),
@@ -1498,7 +1231,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const {
const PerInstNLInfo &INLD = I->second;
for (NonLocalDepInfo::const_iterator II = INLD.first.begin(),
EE = INLD.first.end(); II != EE; ++II)
- assert(II->second.getInst() != D && "Inst occurs in data structures");
+ assert(II->getResult().getInst() != D && "Inst occurs in data structures");
}
for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(),
diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp
new file mode 100644
index 0000000..07e2919
--- /dev/null
+++ b/lib/Analysis/PHITransAddr.cpp
@@ -0,0 +1,432 @@
+//===- PHITransAddr.cpp - PHI Translation for Addresses -------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the PHITransAddr class.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Analysis/PHITransAddr.h"
+#include "llvm/Analysis/Dominators.h"
+#include "llvm/Analysis/InstructionSimplify.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+static bool CanPHITrans(Instruction *Inst) {
+ if (isa<PHINode>(Inst) ||
+ isa<BitCastInst>(Inst) ||
+ isa<GetElementPtrInst>(Inst))
+ return true;
+
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1)))
+ return true;
+
+ // cerr << "MEMDEP: Could not PHI translate: " << *Pointer;
+ // if (isa<BitCastInst>(PtrInst) || isa<GetElementPtrInst>(PtrInst))
+ // cerr << "OP:\t\t\t\t" << *PtrInst->getOperand(0);
+ return false;
+}
+
+void PHITransAddr::dump() const {
+ if (Addr == 0) {
+ errs() << "PHITransAddr: null\n";
+ return;
+ }
+ errs() << "PHITransAddr: " << *Addr << "\n";
+ for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+ errs() << " Input #" << i << " is " << *InstInputs[i] << "\n";
+}
+
+
+static bool VerifySubExpr(Value *Expr,
+ SmallVectorImpl<Instruction*> &InstInputs) {
+ // If this is a non-instruction value, there is nothing to do.
+ Instruction *I = dyn_cast<Instruction>(Expr);
+ if (I == 0) return true;
+
+ // If it's an instruction, it is either in Tmp or its operands recursively
+ // are.
+ SmallVectorImpl<Instruction*>::iterator Entry =
+ std::find(InstInputs.begin(), InstInputs.end(), I);
+ if (Entry != InstInputs.end()) {
+ InstInputs.erase(Entry);
+ return true;
+ }
+
+ // If it isn't in the InstInputs list it is a subexpr incorporated into the
+ // address. Sanity check that it is phi translatable.
+ if (!CanPHITrans(I)) {
+ errs() << "Non phi translatable instruction found in PHITransAddr, either "
+ "something is missing from InstInputs or CanPHITrans is wrong:\n";
+ errs() << *I << '\n';
+ return false;
+ }
+
+ // Validate the operands of the instruction.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i)
+ if (!VerifySubExpr(I->getOperand(i), InstInputs))
+ return false;
+
+ return true;
+}
+
+/// Verify - Check internal consistency of this data structure. If the
+/// structure is valid, it returns true. If invalid, it prints errors and
+/// returns false.
+bool PHITransAddr::Verify() const {
+ if (Addr == 0) return true;
+
+ SmallVector<Instruction*, 8> Tmp(InstInputs.begin(), InstInputs.end());
+
+ if (!VerifySubExpr(Addr, Tmp))
+ return false;
+
+ if (!Tmp.empty()) {
+ errs() << "PHITransAddr inconsistent, contains extra instructions:\n";
+ for (unsigned i = 0, e = InstInputs.size(); i != e; ++i)
+ errs() << " InstInput #" << i << " is " << *InstInputs[i] << "\n";
+ return false;
+ }
+
+ // a-ok.
+ return true;
+}
+
+
+/// IsPotentiallyPHITranslatable - If this needs PHI translation, return true
+/// if we have some hope of doing it. This should be used as a filter to
+/// avoid calling PHITranslateValue in hopeless situations.
+bool PHITransAddr::IsPotentiallyPHITranslatable() const {
+ // If the input value is not an instruction, or if it is not defined in CurBB,
+ // then we don't need to phi translate it.
+ Instruction *Inst = dyn_cast<Instruction>(Addr);
+ return Inst == 0 || CanPHITrans(Inst);
+}
+
+
+static void RemoveInstInputs(Value *V,
+ SmallVectorImpl<Instruction*> &InstInputs) {
+ Instruction *I = dyn_cast<Instruction>(V);
+ if (I == 0) return;
+
+ // If the instruction is in the InstInputs list, remove it.
+ SmallVectorImpl<Instruction*>::iterator Entry =
+ std::find(InstInputs.begin(), InstInputs.end(), I);
+ if (Entry != InstInputs.end()) {
+ InstInputs.erase(Entry);
+ return;
+ }
+
+ assert(!isa<PHINode>(I) && "Error, removing something that isn't an input");
+
+ // Otherwise, it must have instruction inputs itself. Zap them recursively.
+ for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
+ if (Instruction *Op = dyn_cast<Instruction>(I->getOperand(i)))
+ RemoveInstInputs(Op, InstInputs);
+ }
+}
+
+Value *PHITransAddr::PHITranslateSubExpr(Value *V, BasicBlock *CurBB,
+ BasicBlock *PredBB) {
+ // If this is a non-instruction value, it can't require PHI translation.
+ Instruction *Inst = dyn_cast<Instruction>(V);
+ if (Inst == 0) return V;
+
+ // Determine whether 'Inst' is an input to our PHI translatable expression.
+ bool isInput = std::count(InstInputs.begin(), InstInputs.end(), Inst);
+
+ // Handle inputs instructions if needed.
+ if (isInput) {
+ if (Inst->getParent() != CurBB) {
+ // If it is an input defined in a different block, then it remains an
+ // input.
+ return Inst;
+ }
+
+ // If 'Inst' is defined in this block and is an input that needs to be phi
+ // translated, we need to incorporate the value into the expression or fail.
+
+ // In either case, the instruction itself isn't an input any longer.
+ InstInputs.erase(std::find(InstInputs.begin(), InstInputs.end(), Inst));
+
+ // If this is a PHI, go ahead and translate it.
+ if (PHINode *PN = dyn_cast<PHINode>(Inst))
+ return AddAsInput(PN->getIncomingValueForBlock(PredBB));
+
+ // If this is a non-phi value, and it is analyzable, we can incorporate it
+ // into the expression by making all instruction operands be inputs.
+ if (!CanPHITrans(Inst))
+ return 0;
+
+ // All instruction operands are now inputs (and of course, they may also be
+ // defined in this block, so they may need to be phi translated themselves.
+ for (unsigned i = 0, e = Inst->getNumOperands(); i != e; ++i)
+ if (Instruction *Op = dyn_cast<Instruction>(Inst->getOperand(i)))
+ InstInputs.push_back(Op);
+ }
+
+ // Ok, it must be an intermediate result (either because it started that way
+ // or because we just incorporated it into the expression). See if its
+ // operands need to be phi translated, and if so, reconstruct it.
+
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
+ Value *PHIIn = PHITranslateSubExpr(BC->getOperand(0), CurBB, PredBB);
+ if (PHIIn == 0) return 0;
+ if (PHIIn == BC->getOperand(0))
+ return BC;
+
+ // Find an available version of this cast.
+
+ // Constants are trivial to find.
+ if (Constant *C = dyn_cast<Constant>(PHIIn))
+ return AddAsInput(ConstantExpr::getBitCast(C, BC->getType()));
+
+ // Otherwise we have to see if a bitcasted version of the incoming pointer
+ // is available. If so, we can use it, otherwise we have to fail.
+ for (Value::use_iterator UI = PHIIn->use_begin(), E = PHIIn->use_end();
+ UI != E; ++UI) {
+ if (BitCastInst *BCI = dyn_cast<BitCastInst>(*UI))
+ if (BCI->getType() == BC->getType())
+ return BCI;
+ }
+ return 0;
+ }
+
+ // Handle getelementptr with at least one PHI translatable operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ SmallVector<Value*, 8> GEPOps;
+ bool AnyChanged = false;
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+ Value *GEPOp = PHITranslateSubExpr(GEP->getOperand(i), CurBB, PredBB);
+ if (GEPOp == 0) return 0;
+
+ AnyChanged |= GEPOp != GEP->getOperand(i);
+ GEPOps.push_back(GEPOp);
+ }
+
+ if (!AnyChanged)
+ return GEP;
+
+ // Simplify the GEP to handle 'gep x, 0' -> x etc.
+ if (Value *V = SimplifyGEPInst(&GEPOps[0], GEPOps.size(), TD)) {
+ for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+ RemoveInstInputs(GEPOps[i], InstInputs);
+
+ return AddAsInput(V);
+ }
+
+ // Scan to see if we have this GEP available.
+ Value *APHIOp = GEPOps[0];
+ for (Value::use_iterator UI = APHIOp->use_begin(), E = APHIOp->use_end();
+ UI != E; ++UI) {
+ if (GetElementPtrInst *GEPI = dyn_cast<GetElementPtrInst>(*UI))
+ if (GEPI->getType() == GEP->getType() &&
+ GEPI->getNumOperands() == GEPOps.size() &&
+ GEPI->getParent()->getParent() == CurBB->getParent()) {
+ bool Mismatch = false;
+ for (unsigned i = 0, e = GEPOps.size(); i != e; ++i)
+ if (GEPI->getOperand(i) != GEPOps[i]) {
+ Mismatch = true;
+ break;
+ }
+ if (!Mismatch)
+ return GEPI;
+ }
+ }
+ return 0;
+ }
+
+ // Handle add with a constant RHS.
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ // PHI translate the LHS.
+ Constant *RHS = cast<ConstantInt>(Inst->getOperand(1));
+ bool isNSW = cast<BinaryOperator>(Inst)->hasNoSignedWrap();
+ bool isNUW = cast<BinaryOperator>(Inst)->hasNoUnsignedWrap();
+
+ Value *LHS = PHITranslateSubExpr(Inst->getOperand(0), CurBB, PredBB);
+ if (LHS == 0) return 0;
+
+ // If the PHI translated LHS is an add of a constant, fold the immediates.
+ if (BinaryOperator *BOp = dyn_cast<BinaryOperator>(LHS))
+ if (BOp->getOpcode() == Instruction::Add)
+ if (ConstantInt *CI = dyn_cast<ConstantInt>(BOp->getOperand(1))) {
+ LHS = BOp->getOperand(0);
+ RHS = ConstantExpr::getAdd(RHS, CI);
+ isNSW = isNUW = false;
+
+ // If the old 'LHS' was an input, add the new 'LHS' as an input.
+ if (std::count(InstInputs.begin(), InstInputs.end(), BOp)) {
+ RemoveInstInputs(BOp, InstInputs);
+ AddAsInput(LHS);
+ }
+ }
+
+ // See if the add simplifies away.
+ if (Value *Res = SimplifyAddInst(LHS, RHS, isNSW, isNUW, TD)) {
+ // If we simplified the operands, the LHS is no longer an input, but Res
+ // is.
+ RemoveInstInputs(LHS, InstInputs);
+ return AddAsInput(Res);
+ }
+
+ // If we didn't modify the add, just return it.
+ if (LHS == Inst->getOperand(0) && RHS == Inst->getOperand(1))
+ return Inst;
+
+ // Otherwise, see if we have this add available somewhere.
+ for (Value::use_iterator UI = LHS->use_begin(), E = LHS->use_end();
+ UI != E; ++UI) {
+ if (BinaryOperator *BO = dyn_cast<BinaryOperator>(*UI))
+ if (BO->getOpcode() == Instruction::Add &&
+ BO->getOperand(0) == LHS && BO->getOperand(1) == RHS &&
+ BO->getParent()->getParent() == CurBB->getParent())
+ return BO;
+ }
+
+ return 0;
+ }
+
+ // Otherwise, we failed.
+ return 0;
+}
+
+
+/// PHITranslateValue - PHI translate the current address up the CFG from
+/// CurBB to Pred, updating our state the reflect any needed changes. This
+/// returns true on failure and sets Addr to null.
+bool PHITransAddr::PHITranslateValue(BasicBlock *CurBB, BasicBlock *PredBB) {
+ assert(Verify() && "Invalid PHITransAddr!");
+ Addr = PHITranslateSubExpr(Addr, CurBB, PredBB);
+ assert(Verify() && "Invalid PHITransAddr!");
+ return Addr == 0;
+}
+
+/// GetAvailablePHITranslatedSubExpr - Return the value computed by
+/// PHITranslateSubExpr if it dominates PredBB, otherwise return null.
+Value *PHITransAddr::
+GetAvailablePHITranslatedSubExpr(Value *V, BasicBlock *CurBB,BasicBlock *PredBB,
+ const DominatorTree &DT) const {
+ PHITransAddr Tmp(V, TD);
+ Tmp.PHITranslateValue(CurBB, PredBB);
+
+ // See if PHI translation succeeds.
+ V = Tmp.getAddr();
+
+ // Make sure the value is live in the predecessor.
+ if (Instruction *Inst = dyn_cast_or_null<Instruction>(V))
+ if (!DT.dominates(Inst->getParent(), PredBB))
+ return 0;
+ return V;
+}
+
+
+/// PHITranslateWithInsertion - PHI translate this value into the specified
+/// predecessor block, inserting a computation of the value if it is
+/// unavailable.
+///
+/// All newly created instructions are added to the NewInsts list. This
+/// returns null on failure.
+///
+Value *PHITransAddr::
+PHITranslateWithInsertion(BasicBlock *CurBB, BasicBlock *PredBB,
+ const DominatorTree &DT,
+ SmallVectorImpl<Instruction*> &NewInsts) {
+ unsigned NISize = NewInsts.size();
+
+ // Attempt to PHI translate with insertion.
+ Addr = InsertPHITranslatedSubExpr(Addr, CurBB, PredBB, DT, NewInsts);
+
+ // If successful, return the new value.
+ if (Addr) return Addr;
+
+ // If not, destroy any intermediate instructions inserted.
+ while (NewInsts.size() != NISize)
+ NewInsts.pop_back_val()->eraseFromParent();
+ return 0;
+}
+
+
+/// InsertPHITranslatedPointer - Insert a computation of the PHI translated
+/// version of 'V' for the edge PredBB->CurBB into the end of the PredBB
+/// block. All newly created instructions are added to the NewInsts list.
+/// This returns null on failure.
+///
+Value *PHITransAddr::
+InsertPHITranslatedSubExpr(Value *InVal, BasicBlock *CurBB,
+ BasicBlock *PredBB, const DominatorTree &DT,
+ SmallVectorImpl<Instruction*> &NewInsts) {
+ // See if we have a version of this value already available and dominating
+ // PredBB. If so, there is no need to insert a new instance of it.
+ if (Value *Res = GetAvailablePHITranslatedSubExpr(InVal, CurBB, PredBB, DT))
+ return Res;
+
+ // If we don't have an available version of this value, it must be an
+ // instruction.
+ Instruction *Inst = cast<Instruction>(InVal);
+
+ // Handle bitcast of PHI translatable value.
+ if (BitCastInst *BC = dyn_cast<BitCastInst>(Inst)) {
+ Value *OpVal = InsertPHITranslatedSubExpr(BC->getOperand(0),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+
+ // Otherwise insert a bitcast at the end of PredBB.
+ BitCastInst *New = new BitCastInst(OpVal, InVal->getType(),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ NewInsts.push_back(New);
+ return New;
+ }
+
+ // Handle getelementptr with at least one PHI operand.
+ if (GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(Inst)) {
+ SmallVector<Value*, 8> GEPOps;
+ BasicBlock *CurBB = GEP->getParent();
+ for (unsigned i = 0, e = GEP->getNumOperands(); i != e; ++i) {
+ Value *OpVal = InsertPHITranslatedSubExpr(GEP->getOperand(i),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+ GEPOps.push_back(OpVal);
+ }
+
+ GetElementPtrInst *Result =
+ GetElementPtrInst::Create(GEPOps[0], GEPOps.begin()+1, GEPOps.end(),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ Result->setIsInBounds(GEP->isInBounds());
+ NewInsts.push_back(Result);
+ return Result;
+ }
+
+#if 0
+ // FIXME: This code works, but it is unclear that we actually want to insert
+ // a big chain of computation in order to make a value available in a block.
+ // This needs to be evaluated carefully to consider its cost trade offs.
+
+ // Handle add with a constant RHS.
+ if (Inst->getOpcode() == Instruction::Add &&
+ isa<ConstantInt>(Inst->getOperand(1))) {
+ // PHI translate the LHS.
+ Value *OpVal = InsertPHITranslatedSubExpr(Inst->getOperand(0),
+ CurBB, PredBB, DT, NewInsts);
+ if (OpVal == 0) return 0;
+
+ BinaryOperator *Res = BinaryOperator::CreateAdd(OpVal, Inst->getOperand(1),
+ InVal->getName()+".phi.trans.insert",
+ PredBB->getTerminator());
+ Res->setHasNoSignedWrap(cast<BinaryOperator>(Inst)->hasNoSignedWrap());
+ Res->setHasNoUnsignedWrap(cast<BinaryOperator>(Inst)->hasNoUnsignedWrap());
+ NewInsts.push_back(Res);
+ return Res;
+ }
+#endif
+
+ return 0;
+}
diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp
index e767891..8148429 100644
--- a/lib/Analysis/ProfileEstimatorPass.cpp
+++ b/lib/Analysis/ProfileEstimatorPass.cpp
@@ -35,6 +35,7 @@ namespace {
LoopInfo *LI;
std::set<BasicBlock*> BBToVisit;
std::map<Loop*,double> LoopExitWeights;
+ std::map<Edge,double> MinimalWeight;
public:
static char ID; // Class identification, replacement for typeinfo
explicit ProfileEstimatorPass(const double execcount = 0)
@@ -91,7 +92,7 @@ static void inline printEdgeError(ProfileInfo::Edge e, const char *M) {
void inline ProfileEstimatorPass::printEdgeWeight(Edge E) {
DEBUG(errs() << "-- Weight of Edge " << E << ":"
- << format("%g", getEdgeWeight(E)) << "\n");
+ << format("%20.20g", getEdgeWeight(E)) << "\n");
}
// recurseBasicBlock() - This calculates the ProfileInfo estimation for a
@@ -174,6 +175,12 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
double w = getEdgeWeight(*ei);
if (w == MissingValue) {
Edges.push_back(*ei);
+ // Check if there is a necessary minimal weight, if yes, subtract it
+ // from weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ incoming -= MinimalWeight[*ei];
+ DEBUG(errs() << "Reserving " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
} else {
incoming -= w;
}
@@ -191,11 +198,43 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
printEdgeWeight(edge);
}
}
- // Distribute remaining weight onto the exit edges.
+
+ // Distribute remaining weight to the exting edges. To prevent fractions
+ // from building up and provoking precision problems the weight which is to
+ // be distributed is split and the rounded, the last edge gets a somewhat
+ // bigger value, but we are close enough for an estimation.
+ double fraction = floor(incoming/Edges.size());
for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
ei != ee; ++ei) {
- EdgeInformation[BB->getParent()][*ei] += incoming/Edges.size();
+ double w = 0;
+ if (ei != (ee-1)) {
+ w = fraction;
+ incoming -= fraction;
+ } else {
+ w = incoming;
+ }
+ EdgeInformation[BB->getParent()][*ei] += w;
+ // Read necessary minimal weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+ DEBUG(errs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
printEdgeWeight(*ei);
+
+ // Add minimal weight to paths to all exit edges, this is used to ensure
+ // that enough flow is reaching this edges.
+ Path p;
+ const BasicBlock *Dest = GetPath(BB, (*ei).first, p, GetPathToDest);
+ while (Dest != BB) {
+ const BasicBlock *Parent = p.find(Dest)->second;
+ Edge e = getEdge(Parent, Dest);
+ if (MinimalWeight.find(e) == MinimalWeight.end()) {
+ MinimalWeight[e] = 0;
+ }
+ MinimalWeight[e] += w;
+ DEBUG(errs() << "Minimal Weight for " << e << ": " << format("%.20g",MinimalWeight[e]) << "\n");
+ Dest = Parent;
+ }
}
// Increase flow into the loop.
BBWeight *= (ExecCount+1);
@@ -203,7 +242,7 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
BlockInformation[BB->getParent()][BB] = BBWeight;
// Up until now we considered only the loop exiting edges, now we have a
- // definite block weight and must ditribute this onto the outgoing edges.
+ // definite block weight and must distribute this onto the outgoing edges.
// Since there may be already flow attached to some of the edges, read this
// flow first and remember the edges that have still now flow attached.
Edges.clear();
@@ -225,15 +264,32 @@ void ProfileEstimatorPass::recurseBasicBlock(BasicBlock *BB) {
BBWeight -= getEdgeWeight(edge);
} else {
Edges.push_back(edge);
+ // If minimal weight is necessary, reserve weight by subtracting weight
+ // from block weight, this is readded later on.
+ if (MinimalWeight.find(edge) != MinimalWeight.end()) {
+ BBWeight -= MinimalWeight[edge];
+ DEBUG(errs() << "Reserving " << format("%.20g",MinimalWeight[edge]) << " at " << edge << "\n");
+ }
}
}
}
+ double fraction = floor(BBWeight/Edges.size());
// Finally we know what flow is still not leaving the block, distribute this
// flow onto the empty edges.
for (SmallVector<Edge, 8>::iterator ei = Edges.begin(), ee = Edges.end();
ei != ee; ++ei) {
- EdgeInformation[BB->getParent()][*ei] += BBWeight/Edges.size();
+ if (ei != (ee-1)) {
+ EdgeInformation[BB->getParent()][*ei] += fraction;
+ BBWeight -= fraction;
+ } else {
+ EdgeInformation[BB->getParent()][*ei] += BBWeight;
+ }
+ // Readd minial necessary weight.
+ if (MinimalWeight.find(*ei) != MinimalWeight.end()) {
+ EdgeInformation[BB->getParent()][*ei] += MinimalWeight[*ei];
+ DEBUG(errs() << "Additionally " << format("%.20g",MinimalWeight[*ei]) << " at " << (*ei) << "\n");
+ }
printEdgeWeight(*ei);
}
@@ -260,20 +316,24 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) {
for (Function::iterator bi = F.begin(), be = F.end(); bi != be; ++bi)
BBToVisit.insert(bi);
+ // Clear Minimal Edges.
+ MinimalWeight.clear();
+
DEBUG(errs() << "Working on function " << F.getNameStr() << "\n");
// Since the entry block is the first one and has no predecessors, the edge
// (0,entry) is inserted with the starting weight of 1.
BasicBlock *entry = &F.getEntryBlock();
- BlockInformation[&F][entry] = 1;
+ BlockInformation[&F][entry] = pow(2.0, 32.0);
Edge edge = getEdge(0,entry);
- EdgeInformation[&F][edge] = 1;
+ EdgeInformation[&F][edge] = BlockInformation[&F][entry];
printEdgeWeight(edge);
// Since recurseBasicBlock() maybe returns with a block which was not fully
- // estimated, use recurseBasicBlock() until everything is calculated.
+ // estimated, use recurseBasicBlock() until everything is calculated.
+ bool cleanup = false;
recurseBasicBlock(entry);
- while (BBToVisit.size() > 0) {
+ while (BBToVisit.size() > 0 && !cleanup) {
// Remember number of open blocks, this is later used to check if progress
// was made.
unsigned size = BBToVisit.size();
@@ -287,21 +347,65 @@ bool ProfileEstimatorPass::runOnFunction(Function &F) {
if (BBToVisit.size() < size) break;
}
- // If there was not a single block resovled, make some assumptions.
+ // If there was not a single block resolved, make some assumptions.
if (BBToVisit.size() == size) {
- BasicBlock *BB = *(BBToVisit.begin());
- // Since this BB was not calculated because of missing incoming edges,
- // set these edges to zero.
- for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi) {
- Edge e = getEdge(*bbi,BB);
- double w = getEdgeWeight(e);
- if (w == MissingValue) {
- EdgeInformation[&F][e] = 0;
- DEBUG(errs() << "Assuming edge weight: ");
- printEdgeWeight(e);
+ bool found = false;
+ for (std::set<BasicBlock*>::iterator BBI = BBToVisit.begin(), BBE = BBToVisit.end();
+ (BBI != BBE) && (!found); ++BBI) {
+ BasicBlock *BB = *BBI;
+ // Try each predecessor if it can be assumend.
+ for (pred_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ (bbi != bbe) && (!found); ++bbi) {
+ Edge e = getEdge(*bbi,BB);
+ double w = getEdgeWeight(e);
+ // Check that edge from predecessor is still free.
+ if (w == MissingValue) {
+ // Check if there is a circle from this block to predecessor.
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, *bbi, P, GetPathToDest);
+ if (Dest != *bbi) {
+ // If there is no circle, just set edge weight to 0
+ EdgeInformation[&F][e] = 0;
+ DEBUG(errs() << "Assuming edge weight: ");
+ printEdgeWeight(e);
+ found = true;
+ }
+ }
}
}
+ if (!found) {
+ cleanup = true;
+ DEBUG(errs() << "No assumption possible in Fuction "<<F.getName()<<", setting all to zero\n");
+ }
+ }
+ }
+ // In case there was no safe way to assume edges, set as a last measure,
+ // set _everything_ to zero.
+ if (cleanup) {
+ FunctionInformation[&F] = 0;
+ BlockInformation[&F].clear();
+ EdgeInformation[&F].clear();
+ for (Function::const_iterator FI = F.begin(), FE = F.end(); FI != FE; ++FI) {
+ const BasicBlock *BB = &(*FI);
+ BlockInformation[&F][BB] = 0;
+ pred_const_iterator predi = pred_begin(BB), prede = pred_end(BB);
+ if (predi == prede) {
+ Edge e = getEdge(0,BB);
+ setEdgeWeight(e,0);
+ }
+ for (;predi != prede; ++predi) {
+ Edge e = getEdge(*predi,BB);
+ setEdgeWeight(e,0);
+ }
+ succ_const_iterator succi = succ_begin(BB), succe = succ_end(BB);
+ if (succi == succe) {
+ Edge e = getEdge(BB,0);
+ setEdgeWeight(e,0);
+ }
+ for (;succi != succe; ++succi) {
+ Edge e = getEdge(*succi,BB);
+ setEdgeWeight(e,0);
+ }
}
}
diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp
index 7f24f5a..c49c6e1 100644
--- a/lib/Analysis/ProfileInfo.cpp
+++ b/lib/Analysis/ProfileInfo.cpp
@@ -11,26 +11,52 @@
// "no profile" implementation.
//
//===----------------------------------------------------------------------===//
-
+#define DEBUG_TYPE "profile-info"
#include "llvm/Analysis/Passes.h"
#include "llvm/Analysis/ProfileInfo.h"
+#include "llvm/CodeGen/MachineBasicBlock.h"
+#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/Pass.h"
#include "llvm/Support/CFG.h"
-#include "llvm/Support/Debug.h"
-#include "llvm/Support/raw_ostream.h"
-#include "llvm/Support/Format.h"
+#include "llvm/ADT/SmallSet.h"
#include <set>
+#include <queue>
+#include <limits>
using namespace llvm;
// Register the ProfileInfo interface, providing a nice name to refer to.
static RegisterAnalysisGroup<ProfileInfo> Z("Profile Information");
-char ProfileInfo::ID = 0;
-ProfileInfo::~ProfileInfo() {}
+namespace llvm {
+
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::ProfileInfoT() {}
+template <>
+ProfileInfoT<MachineFunction, MachineBasicBlock>::~ProfileInfoT() {}
+
+template <>
+ProfileInfoT<Function, BasicBlock>::ProfileInfoT() {
+ MachineProfile = 0;
+}
+template <>
+ProfileInfoT<Function, BasicBlock>::~ProfileInfoT() {
+ if (MachineProfile) delete MachineProfile;
+}
+
+template<>
+char ProfileInfoT<Function,BasicBlock>::ID = 0;
+
+template<>
+char ProfileInfoT<MachineFunction, MachineBasicBlock>::ID = 0;
+
+template<>
+const double ProfileInfoT<Function,BasicBlock>::MissingValue = -1;
-const double ProfileInfo::MissingValue = -1;
+template<> const
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::MissingValue = -1;
-double ProfileInfo::getExecutionCount(const BasicBlock *BB) {
+template<> double
+ProfileInfoT<Function,BasicBlock>::getExecutionCount(const BasicBlock *BB) {
std::map<const Function*, BlockCounts>::iterator J =
BlockInformation.find(BB->getParent());
if (J != BlockInformation.end()) {
@@ -39,36 +65,74 @@ double ProfileInfo::getExecutionCount(const BasicBlock *BB) {
return I->second;
}
+ double Count = MissingValue;
+
pred_const_iterator PI = pred_begin(BB), PE = pred_end(BB);
// Are there zero predecessors of this block?
if (PI == PE) {
- // If this is the entry block, look for the Null -> Entry edge.
- if (BB == &BB->getParent()->getEntryBlock())
- return getEdgeWeight(getEdge(0, BB));
- else
- return 0; // Otherwise, this is a dead block.
+ Edge e = getEdge(0,BB);
+ Count = getEdgeWeight(e);
+ } else {
+ // Otherwise, if there are predecessors, the execution count of this block is
+ // the sum of the edge frequencies from the incoming edges.
+ std::set<const BasicBlock*> ProcessedPreds;
+ Count = 0;
+ for (; PI != PE; ++PI)
+ if (ProcessedPreds.insert(*PI).second) {
+ double w = getEdgeWeight(getEdge(*PI, BB));
+ if (w == MissingValue) {
+ Count = MissingValue;
+ break;
+ }
+ Count += w;
+ }
}
- // Otherwise, if there are predecessors, the execution count of this block is
- // the sum of the edge frequencies from the incoming edges.
- std::set<const BasicBlock*> ProcessedPreds;
- double Count = 0;
- for (; PI != PE; ++PI)
- if (ProcessedPreds.insert(*PI).second) {
- double w = getEdgeWeight(getEdge(*PI, BB));
- if (w == MissingValue) {
- Count = MissingValue;
- break;
- }
- Count += w;
+ // If the predecessors did not suffice to get block weight, try successors.
+ if (Count == MissingValue) {
+
+ succ_const_iterator SI = succ_begin(BB), SE = succ_end(BB);
+
+ // Are there zero successors of this block?
+ if (SI == SE) {
+ Edge e = getEdge(BB,0);
+ Count = getEdgeWeight(e);
+ } else {
+ std::set<const BasicBlock*> ProcessedSuccs;
+ Count = 0;
+ for (; SI != SE; ++SI)
+ if (ProcessedSuccs.insert(*SI).second) {
+ double w = getEdgeWeight(getEdge(BB, *SI));
+ if (w == MissingValue) {
+ Count = MissingValue;
+ break;
+ }
+ Count += w;
+ }
}
+ }
if (Count != MissingValue) BlockInformation[BB->getParent()][BB] = Count;
return Count;
}
-double ProfileInfo::getExecutionCount(const Function *F) {
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ getExecutionCount(const MachineBasicBlock *MBB) {
+ std::map<const MachineFunction*, BlockCounts>::iterator J =
+ BlockInformation.find(MBB->getParent());
+ if (J != BlockInformation.end()) {
+ BlockCounts::iterator I = J->second.find(MBB);
+ if (I != J->second.end())
+ return I->second;
+ }
+
+ return MissingValue;
+}
+
+template<>
+double ProfileInfoT<Function,BasicBlock>::getExecutionCount(const Function *F) {
std::map<const Function*, double>::iterator J =
FunctionInformation.find(F);
if (J != FunctionInformation.end())
@@ -83,35 +147,211 @@ double ProfileInfo::getExecutionCount(const Function *F) {
return Count;
}
+template<>
+double ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ getExecutionCount(const MachineFunction *MF) {
+ std::map<const MachineFunction*, double>::iterator J =
+ FunctionInformation.find(MF);
+ if (J != FunctionInformation.end())
+ return J->second;
+
+ double Count = getExecutionCount(&MF->front());
+ if (Count != MissingValue) FunctionInformation[MF] = Count;
+ return Count;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ setExecutionCount(const BasicBlock *BB, double w) {
+ DEBUG(errs() << "Creating Block " << BB->getName()
+ << " (weight: " << format("%.20g",w) << ")\n");
+ BlockInformation[BB->getParent()][BB] = w;
+}
+
+template<>
+void ProfileInfoT<MachineFunction, MachineBasicBlock>::
+ setExecutionCount(const MachineBasicBlock *MBB, double w) {
+ DEBUG(errs() << "Creating Block " << MBB->getBasicBlock()->getName()
+ << " (weight: " << format("%.20g",w) << ")\n");
+ BlockInformation[MBB->getParent()][MBB] = w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::addEdgeWeight(Edge e, double w) {
+ double oldw = getEdgeWeight(e);
+ assert (oldw != MissingValue && "Adding weight to Edge with no previous weight");
+ DEBUG(errs() << "Adding to Edge " << e
+ << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+ EdgeInformation[getFunction(e)][e] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ addExecutionCount(const BasicBlock *BB, double w) {
+ double oldw = getExecutionCount(BB);
+ assert (oldw != MissingValue && "Adding weight to Block with no previous weight");
+ DEBUG(errs() << "Adding to Block " << BB->getName()
+ << " (new weight: " << format("%.20g",oldw + w) << ")\n");
+ BlockInformation[BB->getParent()][BB] = oldw + w;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeBlock(const BasicBlock *BB) {
+ std::map<const Function*, BlockCounts>::iterator J =
+ BlockInformation.find(BB->getParent());
+ if (J == BlockInformation.end()) return;
+
+ DEBUG(errs() << "Deleting " << BB->getName() << "\n");
+ J->second.erase(BB);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::removeEdge(Edge e) {
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(getFunction(e));
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(errs() << "Deleting" << e << "\n");
+ J->second.erase(e);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ replaceEdge(const Edge &oldedge, const Edge &newedge) {
+ double w;
+ if ((w = getEdgeWeight(newedge)) == MissingValue) {
+ w = getEdgeWeight(oldedge);
+ DEBUG(errs() << "Replacing " << oldedge << " with " << newedge << "\n");
+ } else {
+ w += getEdgeWeight(oldedge);
+ DEBUG(errs() << "Adding " << oldedge << " to " << newedge << "\n");
+ }
+ setEdgeWeight(newedge,w);
+ removeEdge(oldedge);
+}
+
+template<>
+const BasicBlock *ProfileInfoT<Function,BasicBlock>::
+ GetPath(const BasicBlock *Src, const BasicBlock *Dest,
+ Path &P, unsigned Mode) {
+ const BasicBlock *BB = 0;
+ bool hasFoundPath = false;
+
+ std::queue<const BasicBlock *> BFS;
+ BFS.push(Src);
+
+ while(BFS.size() && !hasFoundPath) {
+ BB = BFS.front();
+ BFS.pop();
+
+ succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+ if (Succ == End) {
+ P[0] = BB;
+ if (Mode & GetPathToExit) {
+ hasFoundPath = true;
+ BB = 0;
+ }
+ }
+ for(;Succ != End; ++Succ) {
+ if (P.find(*Succ) != P.end()) continue;
+ Edge e = getEdge(BB,*Succ);
+ if ((Mode & GetPathWithNewEdges) && (getEdgeWeight(e) != MissingValue)) continue;
+ P[*Succ] = BB;
+ BFS.push(*Succ);
+ if ((Mode & GetPathToDest) && *Succ == Dest) {
+ hasFoundPath = true;
+ BB = *Succ;
+ break;
+ }
+ if ((Mode & GetPathToValue) && (getExecutionCount(*Succ) != MissingValue)) {
+ hasFoundPath = true;
+ BB = *Succ;
+ break;
+ }
+ }
+ }
+
+ return BB;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ divertFlow(const Edge &oldedge, const Edge &newedge) {
+ DEBUG(errs() << "Diverting " << oldedge << " via " << newedge );
+
+ // First check if the old edge was taken, if not, just delete it...
+ if (getEdgeWeight(oldedge) == 0) {
+ removeEdge(oldedge);
+ return;
+ }
+
+ Path P;
+ P[newedge.first] = 0;
+ P[newedge.second] = newedge.first;
+ const BasicBlock *BB = GetPath(newedge.second,oldedge.second,P,GetPathToExit | GetPathToDest);
+
+ double w = getEdgeWeight (oldedge);
+ DEBUG(errs() << ", Weight: " << format("%.20g",w) << "\n");
+ do {
+ const BasicBlock *Parent = P.find(BB)->second;
+ Edge e = getEdge(Parent,BB);
+ double oldw = getEdgeWeight(e);
+ double oldc = getExecutionCount(e.first);
+ setEdgeWeight(e, w+oldw);
+ if (Parent != oldedge.first) {
+ setExecutionCount(e.first, w+oldc);
+ }
+ BB = Parent;
+ } while (BB != newedge.first);
+ removeEdge(oldedge);
+}
+
/// Replaces all occurences of RmBB in the ProfilingInfo with DestBB.
/// This checks all edges of the function the blocks reside in and replaces the
/// occurences of RmBB with DestBB.
-void ProfileInfo::replaceAllUses(const BasicBlock *RmBB,
- const BasicBlock *DestBB) {
- DEBUG(errs() << "Replacing " << RmBB->getNameStr()
- << " with " << DestBB->getNameStr() << "\n");
+template<>
+void ProfileInfoT<Function,BasicBlock>::
+ replaceAllUses(const BasicBlock *RmBB, const BasicBlock *DestBB) {
+ DEBUG(errs() << "Replacing " << RmBB->getName()
+ << " with " << DestBB->getName() << "\n");
const Function *F = DestBB->getParent();
std::map<const Function*, EdgeWeights>::iterator J =
EdgeInformation.find(F);
if (J == EdgeInformation.end()) return;
- for (EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
- I != E; ++I) {
- Edge e = I->first;
- Edge newedge; bool foundedge = false;
+ Edge e, newedge;
+ bool erasededge = false;
+ EdgeWeights::iterator I = J->second.begin(), E = J->second.end();
+ while(I != E) {
+ e = (I++)->first;
+ bool foundedge = false; bool eraseedge = false;
if (e.first == RmBB) {
- newedge = getEdge(DestBB, e.second);
- foundedge = true;
+ if (e.second == DestBB) {
+ eraseedge = true;
+ } else {
+ newedge = getEdge(DestBB, e.second);
+ foundedge = true;
+ }
}
if (e.second == RmBB) {
- newedge = getEdge(e.first, DestBB);
- foundedge = true;
+ if (e.first == DestBB) {
+ eraseedge = true;
+ } else {
+ newedge = getEdge(e.first, DestBB);
+ foundedge = true;
+ }
}
if (foundedge) {
- double w = getEdgeWeight(e);
- EdgeInformation[F][newedge] = w;
- DEBUG(errs() << "Replacing " << e << " with " << newedge << "\n");
- J->second.erase(e);
+ replaceEdge(e, newedge);
+ }
+ if (eraseedge) {
+ if (erasededge) {
+ Edge newedge = getEdge(DestBB, DestBB);
+ replaceEdge(e, newedge);
+ } else {
+ removeEdge(e);
+ erasededge = true;
+ }
}
}
}
@@ -119,10 +359,11 @@ void ProfileInfo::replaceAllUses(const BasicBlock *RmBB,
/// Splits an edge in the ProfileInfo and redirects flow over NewBB.
/// Since its possible that there is more than one edge in the CFG from FristBB
/// to SecondBB its necessary to redirect the flow proporionally.
-void ProfileInfo::splitEdge(const BasicBlock *FirstBB,
- const BasicBlock *SecondBB,
- const BasicBlock *NewBB,
- bool MergeIdenticalEdges) {
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitEdge(const BasicBlock *FirstBB,
+ const BasicBlock *SecondBB,
+ const BasicBlock *NewBB,
+ bool MergeIdenticalEdges) {
const Function *F = FirstBB->getParent();
std::map<const Function*, EdgeWeights>::iterator J =
EdgeInformation.find(F);
@@ -153,7 +394,7 @@ void ProfileInfo::splitEdge(const BasicBlock *FirstBB,
// We know now how many edges there are from FirstBB to SecondBB, reroute a
// proportional part of the edge weight over NewBB.
- double neww = w / succ_count;
+ double neww = floor(w / succ_count);
ECs[n1] += neww;
ECs[n2] += neww;
BlockInformation[F][NewBB] += neww;
@@ -164,14 +405,672 @@ void ProfileInfo::splitEdge(const BasicBlock *FirstBB,
}
}
-raw_ostream& llvm::operator<<(raw_ostream &O, ProfileInfo::Edge E) {
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *Old,
+ const BasicBlock* New) {
+ const Function *F = Old->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(errs() << "Splitting " << Old->getName() << " to " << New->getName() << "\n");
+
+ std::set<Edge> Edges;
+ for (EdgeWeights::iterator ewi = J->second.begin(), ewe = J->second.end();
+ ewi != ewe; ++ewi) {
+ Edge old = ewi->first;
+ if (old.first == Old) {
+ Edges.insert(old);
+ }
+ }
+ for (std::set<Edge>::iterator EI = Edges.begin(), EE = Edges.end();
+ EI != EE; ++EI) {
+ Edge newedge = getEdge(New, EI->second);
+ replaceEdge(*EI, newedge);
+ }
+
+ double w = getExecutionCount(Old);
+ setEdgeWeight(getEdge(Old, New), w);
+ setExecutionCount(New, w);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::splitBlock(const BasicBlock *BB,
+ const BasicBlock* NewBB,
+ BasicBlock *const *Preds,
+ unsigned NumPreds) {
+ const Function *F = BB->getParent();
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(F);
+ if (J == EdgeInformation.end()) return;
+
+ DEBUG(errs() << "Splitting " << NumPreds << " Edges from " << BB->getName()
+ << " to " << NewBB->getName() << "\n");
+
+ // Collect weight that was redirected over NewBB.
+ double newweight = 0;
+
+ std::set<const BasicBlock *> ProcessedPreds;
+ // For all requestes Predecessors.
+ for (unsigned pred = 0; pred < NumPreds; ++pred) {
+ const BasicBlock * Pred = Preds[pred];
+ if (ProcessedPreds.insert(Pred).second) {
+ // Create edges and read old weight.
+ Edge oldedge = getEdge(Pred, BB);
+ Edge newedge = getEdge(Pred, NewBB);
+
+ // Remember how much weight was redirected.
+ newweight += getEdgeWeight(oldedge);
+
+ replaceEdge(oldedge,newedge);
+ }
+ }
+
+ Edge newedge = getEdge(NewBB,BB);
+ setEdgeWeight(newedge, newweight);
+ setExecutionCount(NewBB, newweight);
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::transfer(const Function *Old,
+ const Function *New) {
+ DEBUG(errs() << "Replacing Function " << Old->getName() << " with "
+ << New->getName() << "\n");
+ std::map<const Function*, EdgeWeights>::iterator J =
+ EdgeInformation.find(Old);
+ if(J != EdgeInformation.end()) {
+ EdgeInformation[New] = J->second;
+ }
+ EdgeInformation.erase(Old);
+ BlockInformation.erase(Old);
+ FunctionInformation.erase(Old);
+}
+
+static double readEdgeOrRemember(ProfileInfo::Edge edge, double w,
+ ProfileInfo::Edge &tocalc, unsigned &uncalc) {
+ if (w == ProfileInfo::MissingValue) {
+ tocalc = edge;
+ uncalc++;
+ return 0;
+ } else {
+ return w;
+ }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::
+ CalculateMissingEdge(const BasicBlock *BB, Edge &removed,
+ bool assumeEmptySelf) {
+ Edge edgetocalc;
+ unsigned uncalculated = 0;
+
+ // collect weights of all incoming and outgoing edges, rememer edges that
+ // have no value
+ double incount = 0;
+ SmallSet<const BasicBlock*,8> pred_visited;
+ pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ if (bbi==bbe) {
+ Edge e = getEdge(0,BB);
+ incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+ }
+ for (;bbi != bbe; ++bbi) {
+ if (pred_visited.insert(*bbi)) {
+ Edge e = getEdge(*bbi,BB);
+ incount += readEdgeOrRemember(e, getEdgeWeight(e) ,edgetocalc,uncalculated);
+ }
+ }
+
+ double outcount = 0;
+ SmallSet<const BasicBlock*,8> succ_visited;
+ succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+ if (sbbi==sbbe) {
+ Edge e = getEdge(BB,0);
+ if (getEdgeWeight(e) == MissingValue) {
+ double w = getExecutionCount(BB);
+ if (w != MissingValue) {
+ setEdgeWeight(e,w);
+ removed = e;
+ }
+ }
+ outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+ }
+ for (;sbbi != sbbe; ++sbbi) {
+ if (succ_visited.insert(*sbbi)) {
+ Edge e = getEdge(BB,*sbbi);
+ outcount += readEdgeOrRemember(e, getEdgeWeight(e), edgetocalc, uncalculated);
+ }
+ }
+
+ // if exactly one edge weight was missing, calculate it and remove it from
+ // spanning tree
+ if (uncalculated == 0 ) {
+ return true;
+ } else
+ if (uncalculated == 1) {
+ if (incount < outcount) {
+ EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
+ } else {
+ EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
+ }
+ DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": "
+ << format("%.20g", getEdgeWeight(edgetocalc)) << "\n");
+ removed = edgetocalc;
+ return true;
+ } else
+ if (uncalculated == 2 && assumeEmptySelf && edgetocalc.first == edgetocalc.second && incount == outcount) {
+ setEdgeWeight(edgetocalc, incount * 10);
+ removed = edgetocalc;
+ return true;
+ } else {
+ return false;
+ }
+}
+
+static void readEdge(ProfileInfo *PI, ProfileInfo::Edge e, double &calcw, std::set<ProfileInfo::Edge> &misscount) {
+ double w = PI->getEdgeWeight(e);
+ if (w != ProfileInfo::MissingValue) {
+ calcw += w;
+ } else {
+ misscount.insert(e);
+ }
+}
+
+template<>
+bool ProfileInfoT<Function,BasicBlock>::EstimateMissingEdges(const BasicBlock *BB) {
+ bool hasNoSuccessors = false;
+
+ double inWeight = 0;
+ std::set<Edge> inMissing;
+ std::set<const BasicBlock*> ProcessedPreds;
+ pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ if (bbi == bbe) {
+ readEdge(this,getEdge(0,BB),inWeight,inMissing);
+ }
+ for( ; bbi != bbe; ++bbi ) {
+ if (ProcessedPreds.insert(*bbi).second) {
+ readEdge(this,getEdge(*bbi,BB),inWeight,inMissing);
+ }
+ }
+
+ double outWeight = 0;
+ std::set<Edge> outMissing;
+ std::set<const BasicBlock*> ProcessedSuccs;
+ succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
+ if (sbbi == sbbe) {
+ readEdge(this,getEdge(BB,0),outWeight,outMissing);
+ hasNoSuccessors = true;
+ }
+ for ( ; sbbi != sbbe; ++sbbi ) {
+ if (ProcessedSuccs.insert(*sbbi).second) {
+ readEdge(this,getEdge(BB,*sbbi),outWeight,outMissing);
+ }
+ }
+
+ double share;
+ std::set<Edge>::iterator ei,ee;
+ if (inMissing.size() == 0 && outMissing.size() > 0) {
+ ei = outMissing.begin();
+ ee = outMissing.end();
+ share = inWeight/outMissing.size();
+ setExecutionCount(BB,inWeight);
+ } else
+ if (inMissing.size() > 0 && outMissing.size() == 0 && outWeight == 0) {
+ ei = inMissing.begin();
+ ee = inMissing.end();
+ share = 0;
+ setExecutionCount(BB,0);
+ } else
+ if (inMissing.size() == 0 && outMissing.size() == 0) {
+ setExecutionCount(BB,outWeight);
+ return true;
+ } else {
+ return false;
+ }
+ for ( ; ei != ee; ++ei ) {
+ setEdgeWeight(*ei,share);
+ }
+ return true;
+}
+
+template<>
+void ProfileInfoT<Function,BasicBlock>::repair(const Function *F) {
+// if (getExecutionCount(&(F->getEntryBlock())) == 0) {
+// for (Function::const_iterator FI = F->begin(), FE = F->end();
+// FI != FE; ++FI) {
+// const BasicBlock* BB = &(*FI);
+// {
+// pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+// if (NBB == End) {
+// setEdgeWeight(getEdge(0,BB),0);
+// }
+// for(;NBB != End; ++NBB) {
+// setEdgeWeight(getEdge(*NBB,BB),0);
+// }
+// }
+// {
+// succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+// if (NBB == End) {
+// setEdgeWeight(getEdge(0,BB),0);
+// }
+// for(;NBB != End; ++NBB) {
+// setEdgeWeight(getEdge(*NBB,BB),0);
+// }
+// }
+// }
+// return;
+// }
+ // The set of BasicBlocks that are still unvisited.
+ std::set<const BasicBlock*> Unvisited;
+
+ // The set of return edges (Edges with no successors).
+ std::set<Edge> ReturnEdges;
+ double ReturnWeight = 0;
+
+ // First iterate over the whole function and collect:
+ // 1) The blocks in this function in the Unvisited set.
+ // 2) The return edges in the ReturnEdges set.
+ // 3) The flow that is leaving the function already via return edges.
+
+ // Data structure for searching the function.
+ std::queue<const BasicBlock *> BFS;
+ const BasicBlock *BB = &(F->getEntryBlock());
+ BFS.push(BB);
+ Unvisited.insert(BB);
+
+ while (BFS.size()) {
+ BB = BFS.front(); BFS.pop();
+ succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ if (NBB == End) {
+ Edge e = getEdge(BB,0);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ // If the return edge has no value, try to read value from block.
+ double bw = getExecutionCount(BB);
+ if (bw != MissingValue) {
+ setEdgeWeight(e,bw);
+ ReturnWeight += bw;
+ } else {
+ // If both return edge and block provide no value, collect edge.
+ ReturnEdges.insert(e);
+ }
+ } else {
+ // If the return edge has a proper value, collect it.
+ ReturnWeight += w;
+ }
+ }
+ for (;NBB != End; ++NBB) {
+ if (Unvisited.insert(*NBB).second) {
+ BFS.push(*NBB);
+ }
+ }
+ }
+
+ while (Unvisited.size() > 0) {
+ unsigned oldUnvisitedCount = Unvisited.size();
+ bool FoundPath = false;
+
+ // If there is only one edge left, calculate it.
+ if (ReturnEdges.size() == 1) {
+ ReturnWeight = getExecutionCount(&(F->getEntryBlock())) - ReturnWeight;
+
+ Edge e = *ReturnEdges.begin();
+ setEdgeWeight(e,ReturnWeight);
+ setExecutionCount(e.first,ReturnWeight);
+
+ Unvisited.erase(e.first);
+ ReturnEdges.erase(e);
+ continue;
+ }
+
+ // Calculate all blocks where only one edge is missing, this may also
+ // resolve furhter return edges.
+ std::set<const BasicBlock *>::iterator FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ Edge e;
+ if(CalculateMissingEdge(BB,e,true)) {
+ if (BlockInformation[F].find(BB) == BlockInformation[F].end()) {
+ setExecutionCount(BB,getExecutionCount(BB));
+ }
+ Unvisited.erase(BB);
+ if (e.first != 0 && e.second == 0) {
+ ReturnEdges.erase(e);
+ ReturnWeight += getEdgeWeight(e);
+ }
+ }
+ }
+ if (oldUnvisitedCount > Unvisited.size()) continue;
+
+ // Estimate edge weights by dividing the flow proportionally.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ const BasicBlock *Dest = 0;
+ bool AllEdgesHaveSameReturn = true;
+ // Check each Successor, these must all end up in the same or an empty
+ // return block otherwise its dangerous to do an estimation on them.
+ for (succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB);
+ Succ != End; ++Succ) {
+ Path P;
+ GetPath(*Succ, 0, P, GetPathToExit);
+ if (Dest && Dest != P[0]) {
+ AllEdgesHaveSameReturn = false;
+ }
+ Dest = P[0];
+ }
+ if (AllEdgesHaveSameReturn) {
+ if(EstimateMissingEdges(BB)) {
+ Unvisited.erase(BB);
+ break;
+ }
+ }
+ }
+ if (oldUnvisitedCount > Unvisited.size()) continue;
+
+ // Check if there is a path to an block that has a known value and redirect
+ // flow accordingly.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ // Fetch path.
+ const BasicBlock *BB = *FI; ++FI;
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToValue);
+
+ // Calculate incoming flow.
+ double iw = 0; unsigned inmissing = 0; unsigned incount = 0; unsigned invalid = 0;
+ std::set<const BasicBlock *> Processed;
+ for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ invalid++;
+ } else {
+ // If the path contains the successor, this means its a backedge,
+ // do not count as missing.
+ if (P.find(*NBB) == P.end())
+ inmissing++;
+ }
+ incount++;
+ }
+ }
+ if (inmissing == incount) continue;
+ if (invalid == 0) continue;
+
+ // Subtract (already) outgoing flow.
+ Processed.clear();
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(BB, *NBB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw -= ew;
+ }
+ }
+ }
+ if (iw < 0) continue;
+
+ // Check the recieving end of the path if it can handle the flow.
+ double ow = getExecutionCount(Dest);
+ Processed.clear();
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(BB, *NBB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ ow -= ew;
+ }
+ }
+ }
+ if (ow < 0) continue;
+
+ // Determine how much flow shall be used.
+ double ew = getEdgeWeight(getEdge(P[Dest],Dest));
+ if (ew != MissingValue) {
+ ew = ew<ow?ew:ow;
+ ew = ew<iw?ew:iw;
+ } else {
+ if (inmissing == 0)
+ ew = iw;
+ }
+
+ // Create flow.
+ if (ew != MissingValue) {
+ do {
+ Edge e = getEdge(P[Dest],Dest);
+ if (getEdgeWeight(e) == MissingValue) {
+ setEdgeWeight(e,ew);
+ FoundPath = true;
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Calculate a block with self loop.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+ bool SelfEdgeFound = false;
+ for (succ_const_iterator NBB = succ_begin(BB), End = succ_end(BB);
+ NBB != End; ++NBB) {
+ if (*NBB == BB) {
+ SelfEdgeFound = true;
+ break;
+ }
+ }
+ if (SelfEdgeFound) {
+ Edge e = getEdge(BB,BB);
+ if (getEdgeWeight(e) == MissingValue) {
+ double iw = 0;
+ std::set<const BasicBlock *> Processed;
+ for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ }
+ }
+ }
+ setEdgeWeight(e,iw * 10);
+ FoundPath = true;
+ }
+ }
+ }
+ if (FoundPath) continue;
+
+ // Determine backedges, set them to zero.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+ const BasicBlock *Dest;
+ Path P;
+ bool BackEdgeFound = false;
+ for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ Dest = GetPath(BB, *NBB, P, GetPathToDest | GetPathWithNewEdges);
+ if (Dest == *NBB) {
+ BackEdgeFound = true;
+ break;
+ }
+ }
+ if (BackEdgeFound) {
+ Edge e = getEdge(Dest,BB);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ }
+ do {
+ Edge e = getEdge(P[Dest], Dest);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Channel flow to return block.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+
+ Path P;
+ const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges);
+ Dest = P[0];
+ if (!Dest) continue;
+
+ if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) {
+ // Calculate incoming flow.
+ double iw = 0;
+ std::set<const BasicBlock *> Processed;
+ for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ if (Processed.insert(*NBB).second) {
+ Edge e = getEdge(*NBB, BB);
+ double ew = getEdgeWeight(e);
+ if (ew != MissingValue) {
+ iw += ew;
+ }
+ }
+ }
+ do {
+ Edge e = getEdge(P[Dest], Dest);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,iw);
+ FoundPath = true;
+ } else {
+ assert(0 && "Edge should not have value already!");
+ }
+ Dest = P[Dest];
+ } while (Dest != BB);
+ }
+ }
+ if (FoundPath) continue;
+
+ // Speculatively set edges to zero.
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE && !FoundPath) {
+ const BasicBlock *BB = *FI; ++FI;
+
+ for (pred_const_iterator NBB = pred_begin(BB), End = pred_end(BB);
+ NBB != End; ++NBB) {
+ Edge e = getEdge(*NBB,BB);
+ double w = getEdgeWeight(e);
+ if (w == MissingValue) {
+ setEdgeWeight(e,0);
+ FoundPath = true;
+ break;
+ }
+ }
+ }
+ if (FoundPath) continue;
+
+ errs() << "{";
+ FI = Unvisited.begin(), FE = Unvisited.end();
+ while(FI != FE) {
+ const BasicBlock *BB = *FI; ++FI;
+ errs() << BB->getName();
+ if (FI != FE)
+ errs() << ",";
+ }
+ errs() << "}";
+
+ errs() << "ASSERT: could not repair function";
+ assert(0 && "could not repair function");
+ }
+
+ EdgeWeights J = EdgeInformation[F];
+ for (EdgeWeights::iterator EI = J.begin(), EE = J.end(); EI != EE; ++EI) {
+ Edge e = EI->first;
+
+ bool SuccFound = false;
+ if (e.first != 0) {
+ succ_const_iterator NBB = succ_begin(e.first), End = succ_end(e.first);
+ if (NBB == End) {
+ if (0 == e.second) {
+ SuccFound = true;
+ }
+ }
+ for (;NBB != End; ++NBB) {
+ if (*NBB == e.second) {
+ SuccFound = true;
+ break;
+ }
+ }
+ if (!SuccFound) {
+ removeEdge(e);
+ }
+ }
+ }
+}
+
+raw_ostream& operator<<(raw_ostream &O, const Function *F) {
+ return O << F->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineFunction *MF) {
+ return O << MF->getFunction()->getName() << "(MF)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, const BasicBlock *BB) {
+ return O << BB->getName();
+}
+
+raw_ostream& operator<<(raw_ostream &O, const MachineBasicBlock *MBB) {
+ return O << MBB->getBasicBlock()->getName() << "(MB)";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const BasicBlock *, const BasicBlock *> E) {
O << "(";
- O << (E.first ? E.first->getNameStr() : "0");
+
+ if (E.first)
+ O << E.first;
+ else
+ O << "0";
+
+ O << ",";
+
+ if (E.second)
+ O << E.second;
+ else
+ O << "0";
+
+ return O << ")";
+}
+
+raw_ostream& operator<<(raw_ostream &O, std::pair<const MachineBasicBlock *, const MachineBasicBlock *> E) {
+ O << "(";
+
+ if (E.first)
+ O << E.first;
+ else
+ O << "0";
+
O << ",";
- O << (E.second ? E.second->getNameStr() : "0");
+
+ if (E.second)
+ O << E.second;
+ else
+ O << "0";
+
return O << ")";
}
+} // namespace llvm
+
//===----------------------------------------------------------------------===//
// NoProfile ProfileInfo implementation
//
diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp
index 9e1dfb6..cbd0430 100644
--- a/lib/Analysis/ProfileInfoLoaderPass.cpp
+++ b/lib/Analysis/ProfileInfoLoaderPass.cpp
@@ -74,6 +74,8 @@ X("profile-loader", "Load profile information from llvmprof.out", false, true);
static RegisterAnalysisGroup<ProfileInfo> Y(X);
+const PassInfo *llvm::ProfileLoaderPassID = &X;
+
ModulePass *llvm::createProfileLoaderPass() { return new LoaderPass(); }
/// createProfileLoaderPass - This function returns a Pass that loads the
@@ -112,46 +114,9 @@ void LoaderPass::recurseBasicBlock(const BasicBlock *BB) {
recurseBasicBlock(*bbi);
}
- Edge edgetocalc;
- unsigned uncalculated = 0;
-
- // collect weights of all incoming and outgoing edges, rememer edges that
- // have no value
- double incount = 0;
- SmallSet<const BasicBlock*,8> pred_visited;
- pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- if (bbi==bbe) {
- readEdgeOrRemember(getEdge(0, BB),edgetocalc,uncalculated,incount);
- }
- for (;bbi != bbe; ++bbi) {
- if (pred_visited.insert(*bbi)) {
- readEdgeOrRemember(getEdge(*bbi, BB),edgetocalc,uncalculated,incount);
- }
- }
-
- double outcount = 0;
- SmallSet<const BasicBlock*,8> succ_visited;
- succ_const_iterator sbbi = succ_begin(BB), sbbe = succ_end(BB);
- if (sbbi==sbbe) {
- readEdgeOrRemember(getEdge(BB, 0),edgetocalc,uncalculated,outcount);
- }
- for (;sbbi != sbbe; ++sbbi) {
- if (succ_visited.insert(*sbbi)) {
- readEdgeOrRemember(getEdge(BB, *sbbi),edgetocalc,uncalculated,outcount);
- }
- }
-
- // if exactly one edge weight was missing, calculate it and remove it from
- // spanning tree
- if (uncalculated == 1) {
- if (incount < outcount) {
- EdgeInformation[BB->getParent()][edgetocalc] = outcount-incount;
- } else {
- EdgeInformation[BB->getParent()][edgetocalc] = incount-outcount;
- }
- DEBUG(errs() << "--Calc Edge Counter for " << edgetocalc << ": "
- << format("%g", getEdgeWeight(edgetocalc)) << "\n");
- SpanningTree.erase(edgetocalc);
+ Edge tocalc;
+ if (CalculateMissingEdge(BB, tocalc)) {
+ SpanningTree.erase(tocalc);
}
}
@@ -219,9 +184,9 @@ bool LoaderPass::runOnModule(Module &M) {
}
}
while (SpanningTree.size() > 0) {
-#if 0
+
unsigned size = SpanningTree.size();
-#endif
+
BBisUnvisited.clear();
for (std::set<Edge>::iterator ei = SpanningTree.begin(),
ee = SpanningTree.end(); ei != ee; ++ei) {
@@ -231,17 +196,16 @@ bool LoaderPass::runOnModule(Module &M) {
while (BBisUnvisited.size() > 0) {
recurseBasicBlock(*BBisUnvisited.begin());
}
-#if 0
+
if (SpanningTree.size() == size) {
DEBUG(errs()<<"{");
for (std::set<Edge>::iterator ei = SpanningTree.begin(),
ee = SpanningTree.end(); ei != ee; ++ei) {
- DEBUG(errs()<<"("<<(ei->first?ei->first->getName():"0")<<","
- <<(ei->second?ei->second->getName():"0")<<"),");
+ DEBUG(errs()<< *ei <<",");
}
assert(0 && "No edge calculated!");
}
-#endif
+
}
}
if (ReadCount != Counters.size()) {
diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp
index 5f36294..36a80ba 100644
--- a/lib/Analysis/ProfileVerifierPass.cpp
+++ b/lib/Analysis/ProfileVerifierPass.cpp
@@ -21,6 +21,7 @@
#include "llvm/Support/CFG.h"
#include "llvm/Support/InstIterator.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Support/Format.h"
#include "llvm/Support/Debug.h"
#include <set>
using namespace llvm;
@@ -29,44 +30,45 @@ static cl::opt<bool,false>
ProfileVerifierDisableAssertions("profile-verifier-noassert",
cl::desc("Disable assertions"));
-namespace {
- class ProfileVerifierPass : public FunctionPass {
+namespace llvm {
+ template<class FType, class BType>
+ class ProfileVerifierPassT : public FunctionPass {
struct DetailedBlockInfo {
- const BasicBlock *BB;
- double BBWeight;
- double inWeight;
- int inCount;
- double outWeight;
- int outCount;
+ const BType *BB;
+ double BBWeight;
+ double inWeight;
+ int inCount;
+ double outWeight;
+ int outCount;
};
- ProfileInfo *PI;
- std::set<const BasicBlock*> BBisVisited;
- std::set<const Function*> FisVisited;
+ ProfileInfoT<FType, BType> *PI;
+ std::set<const BType*> BBisVisited;
+ std::set<const FType*> FisVisited;
bool DisableAssertions;
// When debugging is enabled, the verifier prints a whole slew of debug
// information, otherwise its just the assert. These are all the helper
// functions.
bool PrintedDebugTree;
- std::set<const BasicBlock*> BBisPrinted;
+ std::set<const BType*> BBisPrinted;
void debugEntry(DetailedBlockInfo*);
- void printDebugInfo(const BasicBlock *BB);
+ void printDebugInfo(const BType *BB);
public:
static char ID; // Class identification, replacement for typeinfo
- explicit ProfileVerifierPass () : FunctionPass(&ID) {
+ explicit ProfileVerifierPassT () : FunctionPass(&ID) {
DisableAssertions = ProfileVerifierDisableAssertions;
}
- explicit ProfileVerifierPass (bool da) : FunctionPass(&ID),
- DisableAssertions(da) {
+ explicit ProfileVerifierPassT (bool da) : FunctionPass(&ID),
+ DisableAssertions(da) {
}
void getAnalysisUsage(AnalysisUsage &AU) const {
AU.setPreservesAll();
- AU.addRequired<ProfileInfo>();
+ AU.addRequired<ProfileInfoT<FType, BType> >();
}
const char *getPassName() const {
@@ -74,271 +76,302 @@ namespace {
}
/// run - Verify the profile information.
- bool runOnFunction(Function &F);
- void recurseBasicBlock(const BasicBlock*);
+ bool runOnFunction(FType &F);
+ void recurseBasicBlock(const BType*);
- bool exitReachable(const Function*);
- double ReadOrAssert(ProfileInfo::Edge);
+ bool exitReachable(const FType*);
+ double ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge);
void CheckValue(bool, const char*, DetailedBlockInfo*);
};
-} // End of anonymous namespace
-
-char ProfileVerifierPass::ID = 0;
-static RegisterPass<ProfileVerifierPass>
-X("profile-verifier", "Verify profiling information", false, true);
-namespace llvm {
- FunctionPass *createProfileVerifierPass() {
- return new ProfileVerifierPass(ProfileVerifierDisableAssertions);
- }
-}
-
-void ProfileVerifierPass::printDebugInfo(const BasicBlock *BB) {
-
- if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
-
- double BBWeight = PI->getExecutionCount(BB);
- if (BBWeight == ProfileInfo::MissingValue) { BBWeight = 0; }
- double inWeight = 0;
- int inCount = 0;
- std::set<const BasicBlock*> ProcessedPreds;
- for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
- bbi != bbe; ++bbi ) {
- if (ProcessedPreds.insert(*bbi).second) {
- ProfileInfo::Edge E = PI->getEdge(*bbi,BB);
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; }
- errs() << "calculated in-edge " << E << ": " << EdgeWeight << "\n";
- inWeight += EdgeWeight;
- inCount++;
+ typedef ProfileVerifierPassT<Function, BasicBlock> ProfileVerifierPass;
+
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::printDebugInfo(const BType *BB) {
+
+ if (BBisPrinted.find(BB) != BBisPrinted.end()) return;
+
+ double BBWeight = PI->getExecutionCount(BB);
+ if (BBWeight == ProfileInfoT<FType, BType>::MissingValue) { BBWeight = 0; }
+ double inWeight = 0;
+ int inCount = 0;
+ std::set<const BType*> ProcessedPreds;
+ for ( pred_const_iterator bbi = pred_begin(BB), bbe = pred_end(BB);
+ bbi != bbe; ++bbi ) {
+ if (ProcessedPreds.insert(*bbi).second) {
+ typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(*bbi,BB);
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+ errs() << "calculated in-edge " << E << ": "
+ << format("%20.20g",EdgeWeight) << "\n";
+ inWeight += EdgeWeight;
+ inCount++;
+ }
}
- }
- double outWeight = 0;
- int outCount = 0;
- std::set<const BasicBlock*> ProcessedSuccs;
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- if (ProcessedSuccs.insert(*bbi).second) {
- ProfileInfo::Edge E = PI->getEdge(BB,*bbi);
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfo::MissingValue) { EdgeWeight = 0; }
- errs() << "calculated out-edge " << E << ": " << EdgeWeight << "\n";
- outWeight += EdgeWeight;
- outCount++;
+ double outWeight = 0;
+ int outCount = 0;
+ std::set<const BType*> ProcessedSuccs;
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ if (ProcessedSuccs.insert(*bbi).second) {
+ typename ProfileInfoT<FType, BType>::Edge E = PI->getEdge(BB,*bbi);
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) { EdgeWeight = 0; }
+ errs() << "calculated out-edge " << E << ": "
+ << format("%20.20g",EdgeWeight) << "\n";
+ outWeight += EdgeWeight;
+ outCount++;
+ }
+ }
+ errs() << "Block " << BB->getNameStr() << " in "
+ << BB->getParent()->getNameStr() << ":"
+ << "BBWeight=" << format("%20.20g",BBWeight) << ","
+ << "inWeight=" << format("%20.20g",inWeight) << ","
+ << "inCount=" << inCount << ","
+ << "outWeight=" << format("%20.20g",outWeight) << ","
+ << "outCount" << outCount << "\n";
+
+ // mark as visited and recurse into subnodes
+ BBisPrinted.insert(BB);
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ printDebugInfo(*bbi);
}
}
- errs()<<"Block "<<BB->getNameStr()<<" in "<<BB->getParent()->getNameStr()
- <<",BBWeight="<<BBWeight<<",inWeight="<<inWeight<<",inCount="<<inCount
- <<",outWeight="<<outWeight<<",outCount"<<outCount<<"\n";
-
- // mark as visited and recurse into subnodes
- BBisPrinted.insert(BB);
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- printDebugInfo(*bbi);
- }
-}
-void ProfileVerifierPass::debugEntry (DetailedBlockInfo *DI) {
- errs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in "
- << DI->BB->getParent()->getNameStr() << ":";
- errs() << "BBWeight=" << DI->BBWeight << ",";
- errs() << "inWeight=" << DI->inWeight << ",";
- errs() << "inCount=" << DI->inCount << ",";
- errs() << "outWeight=" << DI->outWeight << ",";
- errs() << "outCount=" << DI->outCount << "\n";
- if (!PrintedDebugTree) {
- PrintedDebugTree = true;
- printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::debugEntry (DetailedBlockInfo *DI) {
+ errs() << "TROUBLE: Block " << DI->BB->getNameStr() << " in "
+ << DI->BB->getParent()->getNameStr() << ":"
+ << "BBWeight=" << format("%20.20g",DI->BBWeight) << ","
+ << "inWeight=" << format("%20.20g",DI->inWeight) << ","
+ << "inCount=" << DI->inCount << ","
+ << "outWeight=" << format("%20.20g",DI->outWeight) << ","
+ << "outCount=" << DI->outCount << "\n";
+ if (!PrintedDebugTree) {
+ PrintedDebugTree = true;
+ printDebugInfo(&(DI->BB->getParent()->getEntryBlock()));
+ }
}
-}
-// This compares A and B but considering maybe small differences.
-static bool Equals(double A, double B) {
- double maxRelativeError = 0.0000001;
- if (A == B)
- return true;
- double relativeError;
- if (fabs(B) > fabs(A))
- relativeError = fabs((A - B) / B);
- else
- relativeError = fabs((A - B) / A);
- if (relativeError <= maxRelativeError) return true;
- return false;
-}
+ // This compares A and B for equality.
+ static bool Equals(double A, double B) {
+ return A == B;
+ }
-// This checks if the function "exit" is reachable from an given function
-// via calls, this is necessary to check if a profile is valid despite the
-// counts not fitting exactly.
-bool ProfileVerifierPass::exitReachable(const Function *F) {
- if (!F) return false;
+ // This checks if the function "exit" is reachable from an given function
+ // via calls, this is necessary to check if a profile is valid despite the
+ // counts not fitting exactly.
+ template<class FType, class BType>
+ bool ProfileVerifierPassT<FType, BType>::exitReachable(const FType *F) {
+ if (!F) return false;
- if (FisVisited.count(F)) return false;
+ if (FisVisited.count(F)) return false;
- Function *Exit = F->getParent()->getFunction("exit");
- if (Exit == F) {
- return true;
- }
+ FType *Exit = F->getParent()->getFunction("exit");
+ if (Exit == F) {
+ return true;
+ }
- FisVisited.insert(F);
- bool exits = false;
- for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
- exits |= exitReachable(CI->getCalledFunction());
- if (exits) break;
+ FisVisited.insert(F);
+ bool exits = false;
+ for (const_inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*I)) {
+ FType *F = CI->getCalledFunction();
+ if (F) {
+ exits |= exitReachable(F);
+ } else {
+ // This is a call to a pointer, all bets are off...
+ exits = true;
+ }
+ if (exits) break;
+ }
}
+ return exits;
}
- return exits;
-}
-#define ASSERTMESSAGE(M) \
- errs() << (M) << "\n"; \
- if (!DisableAssertions) assert(0 && (M));
-
-double ProfileVerifierPass::ReadOrAssert(ProfileInfo::Edge E) {
- double EdgeWeight = PI->getEdgeWeight(E);
- if (EdgeWeight == ProfileInfo::MissingValue) {
- errs() << "Edge " << E << " in Function "
- << ProfileInfo::getFunction(E)->getNameStr() << ": ";
- ASSERTMESSAGE("ASSERT:Edge has missing value");
- return 0;
- } else {
- return EdgeWeight;
+ #define ASSERTMESSAGE(M) \
+ { errs() << "ASSERT:" << (M) << "\n"; \
+ if (!DisableAssertions) assert(0 && (M)); }
+
+ template<class FType, class BType>
+ double ProfileVerifierPassT<FType, BType>::ReadOrAssert(typename ProfileInfoT<FType, BType>::Edge E) {
+ double EdgeWeight = PI->getEdgeWeight(E);
+ if (EdgeWeight == ProfileInfoT<FType, BType>::MissingValue) {
+ errs() << "Edge " << E << " in Function "
+ << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+ ASSERTMESSAGE("Edge has missing value");
+ return 0;
+ } else {
+ if (EdgeWeight < 0) {
+ errs() << "Edge " << E << " in Function "
+ << ProfileInfoT<FType, BType>::getFunction(E)->getNameStr() << ": ";
+ ASSERTMESSAGE("Edge has negative value");
+ }
+ return EdgeWeight;
+ }
}
-}
-void ProfileVerifierPass::CheckValue(bool Error, const char *Message,
- DetailedBlockInfo *DI) {
- if (Error) {
- DEBUG(debugEntry(DI));
- errs() << "Block " << DI->BB->getNameStr() << " in Function "
- << DI->BB->getParent()->getNameStr() << ": ";
- ASSERTMESSAGE(Message);
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::CheckValue(bool Error,
+ const char *Message,
+ DetailedBlockInfo *DI) {
+ if (Error) {
+ DEBUG(debugEntry(DI));
+ errs() << "Block " << DI->BB->getNameStr() << " in Function "
+ << DI->BB->getParent()->getNameStr() << ": ";
+ ASSERTMESSAGE(Message);
+ }
+ return;
}
- return;
-}
-// This calculates the Information for a block and then recurses into the
-// successors.
-void ProfileVerifierPass::recurseBasicBlock(const BasicBlock *BB) {
-
- // Break the recursion by remembering all visited blocks.
- if (BBisVisited.find(BB) != BBisVisited.end()) return;
-
- // Use a data structure to store all the information, this can then be handed
- // to debug printers.
- DetailedBlockInfo DI;
- DI.BB = BB;
- DI.outCount = DI.inCount = 0;
- DI.inWeight = DI.outWeight = 0.0;
-
- // Read predecessors.
- std::set<const BasicBlock*> ProcessedPreds;
- pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
- // If there are none, check for (0,BB) edge.
- if (bpi == bpe) {
- DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
- DI.inCount++;
- }
- for (;bpi != bpe; ++bpi) {
- if (ProcessedPreds.insert(*bpi).second) {
- DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
+ // This calculates the Information for a block and then recurses into the
+ // successors.
+ template<class FType, class BType>
+ void ProfileVerifierPassT<FType, BType>::recurseBasicBlock(const BType *BB) {
+
+ // Break the recursion by remembering all visited blocks.
+ if (BBisVisited.find(BB) != BBisVisited.end()) return;
+
+ // Use a data structure to store all the information, this can then be handed
+ // to debug printers.
+ DetailedBlockInfo DI;
+ DI.BB = BB;
+ DI.outCount = DI.inCount = 0;
+ DI.inWeight = DI.outWeight = 0;
+
+ // Read predecessors.
+ std::set<const BType*> ProcessedPreds;
+ pred_const_iterator bpi = pred_begin(BB), bpe = pred_end(BB);
+ // If there are none, check for (0,BB) edge.
+ if (bpi == bpe) {
+ DI.inWeight += ReadOrAssert(PI->getEdge(0,BB));
DI.inCount++;
}
- }
+ for (;bpi != bpe; ++bpi) {
+ if (ProcessedPreds.insert(*bpi).second) {
+ DI.inWeight += ReadOrAssert(PI->getEdge(*bpi,BB));
+ DI.inCount++;
+ }
+ }
- // Read successors.
- std::set<const BasicBlock*> ProcessedSuccs;
- succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- // If there is an (0,BB) edge, consider it too. (This is done not only when
- // there are no successors, but every time; not every function contains
- // return blocks with no successors (think loop latch as return block)).
- double w = PI->getEdgeWeight(PI->getEdge(BB,0));
- if (w != ProfileInfo::MissingValue) {
- DI.outWeight += w;
- DI.outCount++;
- }
- for (;bbi != bbe; ++bbi) {
- if (ProcessedSuccs.insert(*bbi).second) {
- DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
+ // Read successors.
+ std::set<const BType*> ProcessedSuccs;
+ succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ // If there is an (0,BB) edge, consider it too. (This is done not only when
+ // there are no successors, but every time; not every function contains
+ // return blocks with no successors (think loop latch as return block)).
+ double w = PI->getEdgeWeight(PI->getEdge(BB,0));
+ if (w != ProfileInfoT<FType, BType>::MissingValue) {
+ DI.outWeight += w;
DI.outCount++;
}
- }
+ for (;bbi != bbe; ++bbi) {
+ if (ProcessedSuccs.insert(*bbi).second) {
+ DI.outWeight += ReadOrAssert(PI->getEdge(BB,*bbi));
+ DI.outCount++;
+ }
+ }
- // Read block weight.
- DI.BBWeight = PI->getExecutionCount(BB);
- CheckValue(DI.BBWeight == ProfileInfo::MissingValue,
- "ASSERT:BasicBlock has missing value", &DI);
-
- // Check if this block is a setjmp target.
- bool isSetJmpTarget = false;
- if (DI.outWeight > DI.inWeight) {
- for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end();
- i != ie; ++i) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
- Function *F = CI->getCalledFunction();
- if (F && (F->getNameStr() == "_setjmp")) {
- isSetJmpTarget = true; break;
+ // Read block weight.
+ DI.BBWeight = PI->getExecutionCount(BB);
+ CheckValue(DI.BBWeight == ProfileInfoT<FType, BType>::MissingValue,
+ "BasicBlock has missing value", &DI);
+ CheckValue(DI.BBWeight < 0,
+ "BasicBlock has negative value", &DI);
+
+ // Check if this block is a setjmp target.
+ bool isSetJmpTarget = false;
+ if (DI.outWeight > DI.inWeight) {
+ for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+ i != ie; ++i) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+ FType *F = CI->getCalledFunction();
+ if (F && (F->getNameStr() == "_setjmp")) {
+ isSetJmpTarget = true; break;
+ }
}
}
}
- }
- // Check if this block is eventually reaching exit.
- bool isExitReachable = false;
- if (DI.inWeight > DI.outWeight) {
- for (BasicBlock::const_iterator i = BB->begin(), ie = BB->end();
- i != ie; ++i) {
- if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
- FisVisited.clear();
- isExitReachable |= exitReachable(CI->getCalledFunction());
- if (isExitReachable) break;
+ // Check if this block is eventually reaching exit.
+ bool isExitReachable = false;
+ if (DI.inWeight > DI.outWeight) {
+ for (typename BType::const_iterator i = BB->begin(), ie = BB->end();
+ i != ie; ++i) {
+ if (const CallInst *CI = dyn_cast<CallInst>(&*i)) {
+ FType *F = CI->getCalledFunction();
+ if (F) {
+ FisVisited.clear();
+ isExitReachable |= exitReachable(F);
+ } else {
+ // This is a call to a pointer, all bets are off...
+ isExitReachable = true;
+ }
+ if (isExitReachable) break;
+ }
}
}
- }
- if (DI.inCount > 0 && DI.outCount == 0) {
- // If this is a block with no successors.
- if (!isSetJmpTarget) {
- CheckValue(!Equals(DI.inWeight,DI.BBWeight),
- "ASSERT:inWeight and BBWeight do not match", &DI);
+ if (DI.inCount > 0 && DI.outCount == 0) {
+ // If this is a block with no successors.
+ if (!isSetJmpTarget) {
+ CheckValue(!Equals(DI.inWeight,DI.BBWeight),
+ "inWeight and BBWeight do not match", &DI);
+ }
+ } else if (DI.inCount == 0 && DI.outCount > 0) {
+ // If this is a block with no predecessors.
+ if (!isExitReachable)
+ CheckValue(!Equals(DI.BBWeight,DI.outWeight),
+ "BBWeight and outWeight do not match", &DI);
+ } else {
+ // If this block has successors and predecessors.
+ if (DI.inWeight > DI.outWeight && !isExitReachable)
+ CheckValue(!Equals(DI.inWeight,DI.outWeight),
+ "inWeight and outWeight do not match", &DI);
+ if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
+ CheckValue(!Equals(DI.inWeight,DI.outWeight),
+ "inWeight and outWeight do not match", &DI);
}
- } else if (DI.inCount == 0 && DI.outCount > 0) {
- // If this is a block with no predecessors.
- if (!isExitReachable)
- CheckValue(!Equals(DI.BBWeight,DI.outWeight),
- "ASSERT:BBWeight and outWeight do not match", &DI);
- } else {
- // If this block has successors and predecessors.
- if (DI.inWeight > DI.outWeight && !isExitReachable)
- CheckValue(!Equals(DI.inWeight,DI.outWeight),
- "ASSERT:inWeight and outWeight do not match", &DI);
- if (DI.inWeight < DI.outWeight && !isSetJmpTarget)
- CheckValue(!Equals(DI.inWeight,DI.outWeight),
- "ASSERT:inWeight and outWeight do not match", &DI);
- }
- // Mark this block as visited, rescurse into successors.
- BBisVisited.insert(BB);
- for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
- bbi != bbe; ++bbi ) {
- recurseBasicBlock(*bbi);
+ // Mark this block as visited, rescurse into successors.
+ BBisVisited.insert(BB);
+ for ( succ_const_iterator bbi = succ_begin(BB), bbe = succ_end(BB);
+ bbi != bbe; ++bbi ) {
+ recurseBasicBlock(*bbi);
+ }
}
-}
-bool ProfileVerifierPass::runOnFunction(Function &F) {
- PI = &getAnalysis<ProfileInfo>();
+ template<class FType, class BType>
+ bool ProfileVerifierPassT<FType, BType>::runOnFunction(FType &F) {
+ PI = getAnalysisIfAvailable<ProfileInfoT<FType, BType> >();
+ if (!PI)
+ ASSERTMESSAGE("No ProfileInfo available");
+
+ // Prepare global variables.
+ PrintedDebugTree = false;
+ BBisVisited.clear();
+
+ // Fetch entry block and recurse into it.
+ const BType *entry = &F.getEntryBlock();
+ recurseBasicBlock(entry);
+
+ if (PI->getExecutionCount(&F) != PI->getExecutionCount(entry))
+ ASSERTMESSAGE("Function count and entry block count do not match");
- // Prepare global variables.
- PrintedDebugTree = false;
- BBisVisited.clear();
+ return false;
+ }
+
+ template<class FType, class BType>
+ char ProfileVerifierPassT<FType, BType>::ID = 0;
+}
- // Fetch entry block and recurse into it.
- const BasicBlock *entry = &F.getEntryBlock();
- recurseBasicBlock(entry);
+static RegisterPass<ProfileVerifierPass>
+X("profile-verifier", "Verify profiling information", false, true);
- if (!DisableAssertions)
- assert((PI->getExecutionCount(&F)==PI->getExecutionCount(entry)) &&
- "Function count and entry block count do not match");
- return false;
+namespace llvm {
+ FunctionPass *createProfileVerifierPass() {
+ return new ProfileVerifierPass(ProfileVerifierDisableAssertions);
+ }
}
+
diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp
index d674ee8..7157d47 100644
--- a/lib/Analysis/ScalarEvolutionExpander.cpp
+++ b/lib/Analysis/ScalarEvolutionExpander.cpp
@@ -357,7 +357,7 @@ Value *SCEVExpander::expandAddToGEP(const SCEV *const *op_begin,
// without the other.
SplitAddRecs(Ops, Ty, SE);
- // Decend down the pointer's type and attempt to convert the other
+ // Descend down the pointer's type and attempt to convert the other
// operands into GEP indices, at each level. The first index in a GEP
// indexes into the array implied by the pointer operand; the rest of
// the indices index into the element or field type selected by the
@@ -628,7 +628,7 @@ Value *SCEVExpander::visitAddRecExpr(const SCEVAddRecExpr *S) {
BasicBlock *SaveInsertBB = Builder.GetInsertBlock();
BasicBlock::iterator SaveInsertPt = Builder.GetInsertPoint();
BasicBlock::iterator NewInsertPt =
- next(BasicBlock::iterator(cast<Instruction>(V)));
+ llvm::next(BasicBlock::iterator(cast<Instruction>(V)));
while (isa<PHINode>(NewInsertPt)) ++NewInsertPt;
V = expandCodeFor(SE.getTruncateExpr(SE.getUnknown(V), Ty), 0,
NewInsertPt);
@@ -844,7 +844,7 @@ Value *SCEVExpander::expand(const SCEV *S) {
if (L && S->hasComputableLoopEvolution(L))
InsertPt = L->getHeader()->getFirstNonPHI();
while (isInsertedInstruction(InsertPt))
- InsertPt = next(BasicBlock::iterator(InsertPt));
+ InsertPt = llvm::next(BasicBlock::iterator(InsertPt));
break;
}
diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp
index 31d3ccc..22c6e3b 100644
--- a/lib/Analysis/ValueTracking.cpp
+++ b/lib/Analysis/ValueTracking.cpp
@@ -659,7 +659,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const TargetData *TD,
switch (Operator::getOpcode(V)) {
default: break;
case Instruction::SExt:
- Tmp = TyBits-cast<IntegerType>(U->getOperand(0)->getType())->getBitWidth();
+ Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits();
return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp;
case Instruction::AShr:
diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp
index 1b7c9c6..cad1d3b 100644
--- a/lib/AsmParser/LLLexer.cpp
+++ b/lib/AsmParser/LLLexer.cpp
@@ -540,6 +540,7 @@ lltok::Kind LLLexer::LexIdentifier() {
KEYWORD(arm_apcscc);
KEYWORD(arm_aapcscc);
KEYWORD(arm_aapcs_vfpcc);
+ KEYWORD(msp430_intrcc);
KEYWORD(cc);
KEYWORD(c);
diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp
index a92dbf8..0333eed 100644
--- a/lib/AsmParser/LLParser.cpp
+++ b/lib/AsmParser/LLParser.cpp
@@ -581,6 +581,37 @@ bool LLParser::ParseStandaloneMetadata() {
return false;
}
+/// ParseInlineMetadata:
+/// !{type %instr}
+/// !{...} MDNode
+/// !"foo" MDString
+bool LLParser::ParseInlineMetadata(Value *&V, PerFunctionState &PFS) {
+ assert(Lex.getKind() == lltok::Metadata && "Only for Metadata");
+ V = 0;
+
+ Lex.Lex();
+ if (Lex.getKind() == lltok::lbrace) {
+ Lex.Lex();
+ if (ParseTypeAndValue(V, PFS) ||
+ ParseToken(lltok::rbrace, "expected end of metadata node"))
+ return true;
+
+ Value *Vals[] = { V };
+ V = MDNode::get(Context, Vals, 1);
+ return false;
+ }
+
+ // Standalone metadata reference
+ // !{ ..., !42, ... }
+ if (!ParseMDNode((MetadataBase *&)V))
+ return false;
+
+ // MDString:
+ // '!' STRINGCONSTANT
+ if (ParseMDString((MetadataBase *&)V)) return true;
+ return false;
+}
+
/// ParseAlias:
/// ::= GlobalVar '=' OptionalVisibility 'alias' OptionalLinkage Aliasee
/// Aliasee
@@ -1043,6 +1074,7 @@ bool LLParser::ParseOptionalVisibility(unsigned &Res) {
/// ::= 'arm_apcscc'
/// ::= 'arm_aapcscc'
/// ::= 'arm_aapcs_vfpcc'
+/// ::= 'msp430_intrcc'
/// ::= 'cc' UINT
///
bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
@@ -1056,6 +1088,7 @@ bool LLParser::ParseOptionalCallingConv(CallingConv::ID &CC) {
case lltok::kw_arm_apcscc: CC = CallingConv::ARM_APCS; break;
case lltok::kw_arm_aapcscc: CC = CallingConv::ARM_AAPCS; break;
case lltok::kw_arm_aapcs_vfpcc:CC = CallingConv::ARM_AAPCS_VFP; break;
+ case lltok::kw_msp430_intrcc: CC = CallingConv::MSP430_INTR; break;
case lltok::kw_cc: {
unsigned ArbitraryCC;
Lex.Lex();
@@ -1377,15 +1410,23 @@ bool LLParser::ParseParameterList(SmallVectorImpl<ParamInfo> &ArgList,
// Parse the argument.
LocTy ArgLoc;
PATypeHolder ArgTy(Type::getVoidTy(Context));
- unsigned ArgAttrs1, ArgAttrs2;
+ unsigned ArgAttrs1 = Attribute::None;
+ unsigned ArgAttrs2 = Attribute::None;
Value *V;
- if (ParseType(ArgTy, ArgLoc) ||
- ParseOptionalAttrs(ArgAttrs1, 0) ||
- ParseValue(ArgTy, V, PFS) ||
- // FIXME: Should not allow attributes after the argument, remove this in
- // LLVM 3.0.
- ParseOptionalAttrs(ArgAttrs2, 3))
+ if (ParseType(ArgTy, ArgLoc))
return true;
+
+ if (Lex.getKind() == lltok::Metadata) {
+ if (ParseInlineMetadata(V, PFS))
+ return true;
+ } else {
+ if (ParseOptionalAttrs(ArgAttrs1, 0) ||
+ ParseValue(ArgTy, V, PFS) ||
+ // FIXME: Should not allow attributes after the argument, remove this
+ // in LLVM 3.0.
+ ParseOptionalAttrs(ArgAttrs2, 3))
+ return true;
+ }
ArgList.push_back(ParamInfo(ArgLoc, V, ArgAttrs1|ArgAttrs2));
}
diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h
index 1112dc4..d14b1cb 100644
--- a/lib/AsmParser/LLParser.h
+++ b/lib/AsmParser/LLParser.h
@@ -279,7 +279,9 @@ namespace llvm {
LocTy Loc;
return ParseTypeAndBasicBlock(BB, Loc, PFS);
}
-
+
+ bool ParseInlineMetadata(Value *&V, PerFunctionState &PFS);
+
struct ParamInfo {
LocTy Loc;
Value *V;
diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h
index 797c32e..1165766 100644
--- a/lib/AsmParser/LLToken.h
+++ b/lib/AsmParser/LLToken.h
@@ -69,6 +69,7 @@ namespace lltok {
kw_cc, kw_ccc, kw_fastcc, kw_coldcc,
kw_x86_stdcallcc, kw_x86_fastcallcc,
kw_arm_apcscc, kw_arm_aapcscc, kw_arm_aapcs_vfpcc,
+ kw_msp430_intrcc,
kw_signext,
kw_zeroext,
diff --git a/lib/Bitcode/Reader/Deserialize.cpp b/lib/Bitcode/Reader/Deserialize.cpp
index 67607ef..b8e720a 100644
--- a/lib/Bitcode/Reader/Deserialize.cpp
+++ b/lib/Bitcode/Reader/Deserialize.cpp
@@ -413,7 +413,7 @@ uintptr_t Deserializer::ReadInternalRefPtr() {
return GetFinalPtr(E);
}
-void Deserializer::BPEntry::SetPtr(BPNode*& FreeList, void* P) {
+void BPEntry::SetPtr(BPNode*& FreeList, void* P) {
BPNode* Last = NULL;
for (BPNode* N = Head; N != NULL; N=N->Next) {
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
index 8e3f8e7..bb61682 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp
@@ -38,16 +38,19 @@ DebugMod("agg-antidep-debugmod",
cl::desc("Debug control for aggressive anti-dep breaker"),
cl::init(0), cl::Hidden);
-AggressiveAntiDepState::AggressiveAntiDepState(MachineBasicBlock *BB) :
- GroupNodes(TargetRegisterInfo::FirstVirtualRegister, 0) {
- // Initialize all registers to be in their own group. Initially we
- // assign the register to the same-indexed GroupNode.
- for (unsigned i = 0; i < TargetRegisterInfo::FirstVirtualRegister; ++i)
+AggressiveAntiDepState::AggressiveAntiDepState(const unsigned TargetRegs,
+ MachineBasicBlock *BB) :
+ NumTargetRegs(TargetRegs), GroupNodes(TargetRegs, 0) {
+
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0; i < NumTargetRegs; ++i) {
+ // Initialize all registers to be in their own group. Initially we
+ // assign the register to the same-indexed GroupNode.
GroupNodeIndices[i] = i;
-
- // Initialize the indices to indicate that no registers are live.
- std::fill(KillIndices, array_endof(KillIndices), ~0u);
- std::fill(DefIndices, array_endof(DefIndices), BB->size());
+ // Initialize the indices to indicate that no registers are live.
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
}
unsigned AggressiveAntiDepState::GetGroup(unsigned Reg)
@@ -64,7 +67,7 @@ void AggressiveAntiDepState::GetGroupRegs(
std::vector<unsigned> &Regs,
std::multimap<unsigned, AggressiveAntiDepState::RegisterReference> *RegRefs)
{
- for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) {
+ for (unsigned Reg = 0; Reg != NumTargetRegs; ++Reg) {
if ((GetGroup(Reg) == Group) && (RegRefs->count(Reg) > 0))
Regs.push_back(Reg);
}
@@ -137,7 +140,7 @@ AggressiveAntiDepBreaker::~AggressiveAntiDepBreaker() {
void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
assert(State == NULL);
- State = new AggressiveAntiDepState(BB);
+ State = new AggressiveAntiDepState(TRI->getNumRegs(), BB);
bool IsReturnBlock = (!BB->empty() && BB->back().getDesc().isReturn());
unsigned *KillIndices = State->GetKillIndices();
@@ -220,7 +223,7 @@ void AggressiveAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
DEBUG(errs() << "\tRegs:");
unsigned *DefIndices = State->GetDefIndices();
- for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg) {
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg) {
// If Reg is current live, then mark that it can't be renamed as
// we don't know the extent of its live-range anymore (now that it
// has been scheduled). If it is not live but was defined in the
diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h
index 8154d2d..d385a21 100644
--- a/lib/CodeGen/AggressiveAntiDepBreaker.h
+++ b/lib/CodeGen/AggressiveAntiDepBreaker.h
@@ -44,6 +44,10 @@ namespace llvm {
} RegisterReference;
private:
+ /// NumTargetRegs - Number of non-virtual target registers
+ /// (i.e. TRI->getNumRegs()).
+ const unsigned NumTargetRegs;
+
/// GroupNodes - Implements a disjoint-union data structure to
/// form register groups. A node is represented by an index into
/// the vector. A node can "point to" itself to indicate that it
@@ -69,7 +73,7 @@ namespace llvm {
unsigned DefIndices[TargetRegisterInfo::FirstVirtualRegister];
public:
- AggressiveAntiDepState(MachineBasicBlock *BB);
+ AggressiveAntiDepState(const unsigned TargetRegs, MachineBasicBlock *BB);
/// GetKillIndices - Return the kill indices.
unsigned *GetKillIndices() { return KillIndices; }
diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
index 993cdbf..44fd176 100644
--- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
+++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp
@@ -1374,6 +1374,7 @@ void AsmPrinter::processDebugLoc(const MachineInstr *MI,
unsigned L = DW->RecordSourceLine(CurDLT.Line, CurDLT.Col,
CurDLT.Scope);
printLabel(L);
+ O << '\n';
DW->BeginScope(MI, L);
PrevDLT = CurDLT;
}
@@ -1837,15 +1838,16 @@ void AsmPrinter::EmitComments(const MachineInstr &MI) const {
// Print source line info.
O.PadToColumn(MAI->getCommentColumn());
- O << MAI->getCommentString() << " SrcLine ";
- if (DLT.Scope) {
- DICompileUnit CU(DLT.Scope);
- if (!CU.isNull())
- O << CU.getFilename() << " ";
- }
- O << DLT.Line;
+ O << MAI->getCommentString() << ' ';
+ DIScope Scope(DLT.Scope);
+ // Omit the directory, because it's likely to be long and uninteresting.
+ if (!Scope.isNull())
+ O << Scope.getFilename();
+ else
+ O << "<unknown>";
+ O << ':' << DLT.Line;
if (DLT.Col != 0)
- O << ":" << DLT.Col;
+ O << ':' << DLT.Col;
Newline = true;
}
@@ -1857,35 +1859,40 @@ void AsmPrinter::EmitComments(const MachineInstr &MI) const {
// We assume a single instruction only has a spill or reload, not
// both.
+ const MachineMemOperand *MMO;
if (TM.getInstrInfo()->isLoadFromStackSlotPostFE(&MI, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
if (Newline) O << '\n';
O.PadToColumn(MAI->getCommentColumn());
- O << MAI->getCommentString() << " Reload";
+ O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Reload";
Newline = true;
}
}
- else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, FI)) {
+ else if (TM.getInstrInfo()->hasLoadFromStackSlot(&MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
if (Newline) O << '\n';
O.PadToColumn(MAI->getCommentColumn());
- O << MAI->getCommentString() << " Folded Reload";
+ O << MAI->getCommentString() << ' '
+ << MMO->getSize() << "-byte Folded Reload";
Newline = true;
}
}
else if (TM.getInstrInfo()->isStoreToStackSlotPostFE(&MI, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
+ MMO = *MI.memoperands_begin();
if (Newline) O << '\n';
O.PadToColumn(MAI->getCommentColumn());
- O << MAI->getCommentString() << " Spill";
+ O << MAI->getCommentString() << ' ' << MMO->getSize() << "-byte Spill";
Newline = true;
}
}
- else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, FI)) {
+ else if (TM.getInstrInfo()->hasStoreToStackSlot(&MI, MMO, FI)) {
if (FrameInfo->isSpillSlotObjectIndex(FI)) {
if (Newline) O << '\n';
O.PadToColumn(MAI->getCommentColumn());
- O << MAI->getCommentString() << " Folded Spill";
+ O << MAI->getCommentString() << ' '
+ << MMO->getSize() << "-byte Folded Spill";
Newline = true;
}
}
diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h
index dc6a70a..cad8b89 100644
--- a/lib/CodeGen/AsmPrinter/DIE.h
+++ b/lib/CodeGen/AsmPrinter/DIE.h
@@ -274,7 +274,7 @@ namespace llvm {
};
//===--------------------------------------------------------------------===//
- /// DIEString - A string value DIE.
+ /// DIEString - A string value DIE. This DIE keeps string reference only.
///
class DIEString : public DIEValue {
const StringRef Str;
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
index c2e1e05..c200a46 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp
@@ -330,8 +330,8 @@ void DwarfDebug::addSInt(DIE *Die, unsigned Attribute,
Die->addValue(Attribute, Form, Value);
}
-/// addString - Add a string attribute data and value.
-///
+/// addString - Add a string attribute data and value. DIEString only
+/// keeps string reference.
void DwarfDebug::addString(DIE *Die, unsigned Attribute, unsigned Form,
const StringRef String) {
DIEValue *Value = new DIEString(String);
@@ -393,7 +393,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIVariable *V) {
return;
unsigned Line = V->getLineNumber();
- unsigned FileID = findCompileUnit(V->getCompileUnit()).getID();
+ unsigned FileID = findCompileUnit(V->getCompileUnit())->getID();
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -407,7 +407,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIGlobal *G) {
return;
unsigned Line = G->getLineNumber();
- unsigned FileID = findCompileUnit(G->getCompileUnit()).getID();
+ unsigned FileID = findCompileUnit(G->getCompileUnit())->getID();
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -425,7 +425,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DISubprogram *SP) {
unsigned Line = SP->getLineNumber();
- unsigned FileID = findCompileUnit(SP->getCompileUnit()).getID();
+ unsigned FileID = findCompileUnit(SP->getCompileUnit())->getID();
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -440,7 +440,7 @@ void DwarfDebug::addSourceLine(DIE *Die, const DIType *Ty) {
return;
unsigned Line = Ty->getLineNumber();
- unsigned FileID = findCompileUnit(CU).getID();
+ unsigned FileID = findCompileUnit(CU)->getID();
assert(FileID && "Invalid file id");
addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID);
addUInt(Die, dwarf::DW_AT_decl_line, 0, Line);
@@ -738,13 +738,49 @@ void DwarfDebug::addAddress(DIE *Die, unsigned Attribute,
addBlock(Die, Attribute, 0, Block);
}
+/// addToContextOwner - Add Die into the list of its context owner's children.
+void DwarfDebug::addToContextOwner(DIE *Die, DIDescriptor Context) {
+ if (Context.isNull())
+ ModuleCU->addDie(Die);
+ else if (Context.isType()) {
+ DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context.getNode()));
+ ContextDIE->addChild(Die);
+ } else if (DIE *ContextDIE = ModuleCU->getDIE(Context.getNode()))
+ ContextDIE->addChild(Die);
+ else
+ ModuleCU->addDie(Die);
+}
+
+/// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+/// given DIType.
+DIE *DwarfDebug::getOrCreateTypeDIE(DIType Ty) {
+ DIE *TyDIE = ModuleCU->getDIE(Ty.getNode());
+ if (TyDIE)
+ return TyDIE;
+
+ // Create new type.
+ TyDIE = new DIE(dwarf::DW_TAG_base_type);
+ ModuleCU->insertDIE(Ty.getNode(), TyDIE);
+ if (Ty.isBasicType())
+ constructTypeDIE(*TyDIE, DIBasicType(Ty.getNode()));
+ else if (Ty.isCompositeType())
+ constructTypeDIE(*TyDIE, DICompositeType(Ty.getNode()));
+ else {
+ assert(Ty.isDerivedType() && "Unknown kind of DIType");
+ constructTypeDIE(*TyDIE, DIDerivedType(Ty.getNode()));
+ }
+
+ addToContextOwner(TyDIE, Ty.getContext());
+ return TyDIE;
+}
+
/// addType - Add a new type attribute to the specified entity.
-void DwarfDebug::addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
+void DwarfDebug::addType(DIE *Entity, DIType Ty) {
if (Ty.isNull())
return;
// Check for pre-existence.
- DIEEntry *Entry = DW_Unit->getDIEEntry(Ty.getNode());
+ DIEEntry *Entry = ModuleCU->getDIEEntry(Ty.getNode());
// If it exists then use the existing value.
if (Entry) {
@@ -754,36 +790,17 @@ void DwarfDebug::addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty) {
// Set up proxy.
Entry = createDIEEntry();
- DW_Unit->insertDIEEntry(Ty.getNode(), Entry);
+ ModuleCU->insertDIEEntry(Ty.getNode(), Entry);
// Construct type.
- DIE *Buffer = new DIE(dwarf::DW_TAG_base_type);
- if (Ty.isBasicType())
- constructTypeDIE(DW_Unit, *Buffer, DIBasicType(Ty.getNode()));
- else if (Ty.isCompositeType())
- constructTypeDIE(DW_Unit, *Buffer, DICompositeType(Ty.getNode()));
- else {
- assert(Ty.isDerivedType() && "Unknown kind of DIType");
- constructTypeDIE(DW_Unit, *Buffer, DIDerivedType(Ty.getNode()));
- }
+ DIE *Buffer = getOrCreateTypeDIE(Ty);
- // Add debug information entry to entity and appropriate context.
- DIE *Die = NULL;
- DIDescriptor Context = Ty.getContext();
- if (!Context.isNull())
- Die = DW_Unit->getDIE(Context.getNode());
-
- if (Die)
- Die->addChild(Buffer);
- else
- DW_Unit->addDie(Buffer);
Entry->setEntry(Buffer);
Entity->addValue(dwarf::DW_AT_type, dwarf::DW_FORM_ref4, Entry);
}
/// constructTypeDIE - Construct basic type die from DIBasicType.
-void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
- DIBasicType BTy) {
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DIBasicType BTy) {
// Get core information.
StringRef Name = BTy.getName();
Buffer.setTag(dwarf::DW_TAG_base_type);
@@ -798,8 +815,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
}
/// constructTypeDIE - Construct derived type die from DIDerivedType.
-void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
- DIDerivedType DTy) {
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) {
// Get core information.
StringRef Name = DTy.getName();
uint64_t Size = DTy.getSizeInBits() >> 3;
@@ -812,7 +828,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
// Map to main type, void will not have a type.
DIType FromTy = DTy.getTypeDerivedFrom();
- addType(DW_Unit, &Buffer, FromTy);
+ addType(&Buffer, FromTy);
// Add name if not anonymous or intermediate type.
if (!Name.empty())
@@ -828,8 +844,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
}
/// constructTypeDIE - Construct type DIE from DICompositeType.
-void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
- DICompositeType CTy) {
+void DwarfDebug::constructTypeDIE(DIE &Buffer, DICompositeType CTy) {
// Get core information.
StringRef Name = CTy.getName();
@@ -840,7 +855,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
switch (Tag) {
case dwarf::DW_TAG_vector_type:
case dwarf::DW_TAG_array_type:
- constructArrayTypeDIE(DW_Unit, Buffer, &CTy);
+ constructArrayTypeDIE(Buffer, &CTy);
break;
case dwarf::DW_TAG_enumeration_type: {
DIArray Elements = CTy.getTypeArray();
@@ -850,7 +865,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
DIE *ElemDie = NULL;
DIEnumerator Enum(Elements.getElement(i).getNode());
if (!Enum.isNull()) {
- ElemDie = constructEnumTypeDIE(DW_Unit, &Enum);
+ ElemDie = constructEnumTypeDIE(&Enum);
Buffer.addChild(ElemDie);
}
}
@@ -860,7 +875,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
// Add return type.
DIArray Elements = CTy.getTypeArray();
DIDescriptor RTy = Elements.getElement(0);
- addType(DW_Unit, &Buffer, DIType(RTy.getNode()));
+ addType(&Buffer, DIType(RTy.getNode()));
// Add prototype flag.
addUInt(&Buffer, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
@@ -869,7 +884,7 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
for (unsigned i = 1, N = Elements.getNumElements(); i < N; ++i) {
DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
DIDescriptor Ty = Elements.getElement(i);
- addType(DW_Unit, Arg, DIType(Ty.getNode()));
+ addType(Arg, DIType(Ty.getNode()));
Buffer.addChild(Arg);
}
}
@@ -891,11 +906,9 @@ void DwarfDebug::constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
continue;
DIE *ElemDie = NULL;
if (Element.getTag() == dwarf::DW_TAG_subprogram)
- ElemDie = createSubprogramDIE(DW_Unit,
- DISubprogram(Element.getNode()));
+ ElemDie = createSubprogramDIE(DISubprogram(Element.getNode()));
else
- ElemDie = createMemberDIE(DW_Unit,
- DIDerivedType(Element.getNode()));
+ ElemDie = createMemberDIE(DIDerivedType(Element.getNode()));
Buffer.addChild(ElemDie);
}
@@ -944,33 +957,32 @@ void DwarfDebug::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy){
addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy);
if (L)
addSInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L);
- if (H)
- addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
+ addSInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H);
Buffer.addChild(DW_Subrange);
}
/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
-void DwarfDebug::constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+void DwarfDebug::constructArrayTypeDIE(DIE &Buffer,
DICompositeType *CTy) {
Buffer.setTag(dwarf::DW_TAG_array_type);
if (CTy->getTag() == dwarf::DW_TAG_vector_type)
addUInt(&Buffer, dwarf::DW_AT_GNU_vector, dwarf::DW_FORM_flag, 1);
// Emit derived type.
- addType(DW_Unit, &Buffer, CTy->getTypeDerivedFrom());
+ addType(&Buffer, CTy->getTypeDerivedFrom());
DIArray Elements = CTy->getTypeArray();
// Get an anonymous type for index type.
- DIE *IdxTy = DW_Unit->getIndexTyDie();
+ DIE *IdxTy = ModuleCU->getIndexTyDie();
if (!IdxTy) {
// Construct an anonymous type for index type.
IdxTy = new DIE(dwarf::DW_TAG_base_type);
addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t));
addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1,
dwarf::DW_ATE_signed);
- DW_Unit->addDie(IdxTy);
- DW_Unit->setIndexTyDie(IdxTy);
+ ModuleCU->addDie(IdxTy);
+ ModuleCU->setIndexTyDie(IdxTy);
}
// Add subranges to array type.
@@ -982,7 +994,7 @@ void DwarfDebug::constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
}
/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
-DIE *DwarfDebug::constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
+DIE *DwarfDebug::constructEnumTypeDIE(DIEnumerator *ETy) {
DIE *Enumerator = new DIE(dwarf::DW_TAG_enumerator);
StringRef Name = ETy->getName();
addString(Enumerator, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
@@ -992,8 +1004,7 @@ DIE *DwarfDebug::constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy) {
}
/// createGlobalVariableDIE - Create new DIE using GV.
-DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit,
- const DIGlobalVariable &GV) {
+DIE *DwarfDebug::createGlobalVariableDIE(const DIGlobalVariable &GV) {
// If the global variable was optmized out then no need to create debug info
// entry.
if (!GV.getGlobal()) return NULL;
@@ -1014,7 +1025,7 @@ DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit,
addString(GVDie, dwarf::DW_AT_MIPS_linkage_name, dwarf::DW_FORM_string,
LinkageName);
}
- addType(DW_Unit, GVDie, GV.getType());
+ addType(GVDie, GV.getType());
if (!GV.isLocalToUnit())
addUInt(GVDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
addSourceLine(GVDie, &GV);
@@ -1030,13 +1041,13 @@ DIE *DwarfDebug::createGlobalVariableDIE(CompileUnit *DW_Unit,
}
/// createMemberDIE - Create new member DIE.
-DIE *DwarfDebug::createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
+DIE *DwarfDebug::createMemberDIE(const DIDerivedType &DT) {
DIE *MemberDie = new DIE(DT.getTag());
StringRef Name = DT.getName();
if (!Name.empty())
addString(MemberDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
- addType(DW_Unit, MemberDie, DT.getTypeDerivedFrom());
+ addType(MemberDie, DT.getTypeDerivedFrom());
addSourceLine(MemberDie, &DT);
@@ -1073,21 +1084,27 @@ DIE *DwarfDebug::createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT){
addBlock(MemberDie, dwarf::DW_AT_data_member_location, 0, MemLocationDie);
if (DT.isProtected())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
dwarf::DW_ACCESS_protected);
else if (DT.isPrivate())
- addUInt(MemberDie, dwarf::DW_AT_accessibility, 0,
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
dwarf::DW_ACCESS_private);
-
+ else if (DT.getTag() == dwarf::DW_TAG_inheritance)
+ addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_flag,
+ dwarf::DW_ACCESS_public);
+ if (DT.isVirtual())
+ addUInt(MemberDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag,
+ dwarf::DW_VIRTUALITY_virtual);
return MemberDie;
}
/// createSubprogramDIE - Create new DIE using SP.
-DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit,
- const DISubprogram &SP,
- bool IsConstructor,
- bool IsInlined) {
- DIE *SPDie = new DIE(dwarf::DW_TAG_subprogram);
+DIE *DwarfDebug::createSubprogramDIE(const DISubprogram &SP, bool MakeDecl) {
+ DIE *SPDie = ModuleCU->getDIE(SP.getNode());
+ if (SPDie)
+ return SPDie;
+
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
addString(SPDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, SP.getName());
StringRef LinkageName = SP.getLinkageName();
@@ -1103,9 +1120,6 @@ DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit,
}
addSourceLine(SPDie, &SP);
- DICompositeType SPTy = SP.getType();
- DIArray Args = SPTy.getTypeArray();
-
// Add prototyped tag, if C or ObjC.
unsigned Lang = SP.getCompileUnit().getLanguage();
if (Lang == dwarf::DW_LANG_C99 || Lang == dwarf::DW_LANG_C89 ||
@@ -1113,98 +1127,56 @@ DIE *DwarfDebug::createSubprogramDIE(CompileUnit *DW_Unit,
addUInt(SPDie, dwarf::DW_AT_prototyped, dwarf::DW_FORM_flag, 1);
// Add Return Type.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
unsigned SPTag = SPTy.getTag();
- if (!IsConstructor) {
- if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
- addType(DW_Unit, SPDie, SPTy);
- else
- addType(DW_Unit, SPDie, DIType(Args.getElement(0).getNode()));
+
+ if (Args.isNull() || SPTag != dwarf::DW_TAG_subroutine_type)
+ addType(SPDie, SPTy);
+ else
+ addType(SPDie, DIType(Args.getElement(0).getNode()));
+
+ unsigned VK = SP.getVirtuality();
+ if (VK) {
+ addUInt(SPDie, dwarf::DW_AT_virtuality, dwarf::DW_FORM_flag, VK);
+ DIEBlock *Block = new DIEBlock();
+ addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu);
+ addUInt(Block, 0, dwarf::DW_FORM_data1, SP.getVirtualIndex());
+ addBlock(SPDie, dwarf::DW_AT_vtable_elem_location, 0, Block);
+ ContainingTypeMap.insert(std::make_pair(SPDie, WeakVH(SP.getContainingType().getNode())));
}
- if (!SP.isDefinition()) {
+ if (MakeDecl || !SP.isDefinition()) {
addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
// Add arguments. Do not add arguments for subprogram definition. They will
- // be handled through RecordVariable.
+ // be handled while processing variables.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+
if (SPTag == dwarf::DW_TAG_subroutine_type)
for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
- addType(DW_Unit, Arg, DIType(Args.getElement(i).getNode()));
+ addType(Arg, DIType(Args.getElement(i).getNode()));
addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
SPDie->addChild(Arg);
}
}
// DW_TAG_inlined_subroutine may refer to this DIE.
- DW_Unit->insertDIE(SP.getNode(), SPDie);
+ ModuleCU->insertDIE(SP.getNode(), SPDie);
return SPDie;
}
/// findCompileUnit - Get the compile unit for the given descriptor.
///
-CompileUnit &DwarfDebug::findCompileUnit(DICompileUnit Unit) const {
+CompileUnit *DwarfDebug::findCompileUnit(DICompileUnit Unit) {
DenseMap<Value *, CompileUnit *>::const_iterator I =
CompileUnitMap.find(Unit.getNode());
- assert(I != CompileUnitMap.end() && "Missing compile unit.");
- return *I->second;
-}
-
-/// createDbgScopeVariable - Create a new scope variable.
-///
-DIE *DwarfDebug::createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit) {
- // Get the descriptor.
- const DIVariable &VD = DV->getVariable();
- StringRef Name = VD.getName();
- if (Name.empty())
- return NULL;
-
- // Translate tag to proper Dwarf tag. The result variable is dropped for
- // now.
- unsigned Tag;
- switch (VD.getTag()) {
- case dwarf::DW_TAG_return_variable:
- return NULL;
- case dwarf::DW_TAG_arg_variable:
- Tag = dwarf::DW_TAG_formal_parameter;
- break;
- case dwarf::DW_TAG_auto_variable: // fall thru
- default:
- Tag = dwarf::DW_TAG_variable;
- break;
- }
-
- // Define variable debug information entry.
- DIE *VariableDie = new DIE(Tag);
- addString(VariableDie, dwarf::DW_AT_name, dwarf::DW_FORM_string, Name);
-
- // Add source line info if available.
- addSourceLine(VariableDie, &VD);
-
- // Add variable type.
- // FIXME: isBlockByrefVariable should be reformulated in terms of complex
- // addresses instead.
- if (VD.isBlockByrefVariable())
- addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name));
- else
- addType(Unit, VariableDie, VD.getType());
-
- // Add variable address.
- // Variables for abstract instances of inlined functions don't get a
- // location.
- MachineLocation Location;
- unsigned FrameReg;
- int Offset = RI->getFrameIndexReference(*MF, DV->getFrameIndex(), FrameReg);
- Location.set(FrameReg, Offset);
-
-
- if (VD.hasComplexAddress())
- addComplexAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else if (VD.isBlockByrefVariable())
- addBlockByrefAddress(DV, VariableDie, dwarf::DW_AT_location, Location);
- else
- addAddress(VariableDie, dwarf::DW_AT_location, Location);
-
- return VariableDie;
+ if (I == CompileUnitMap.end())
+ return constructCompileUnit(Unit.getNode());
+ return I->second;
}
/// getUpdatedDbgScope - Find or create DbgScope assicated with the instruction.
@@ -1295,6 +1267,28 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
DIE *SPDie = ModuleCU->getDIE(SPNode);
assert (SPDie && "Unable to find subprogram DIE!");
+ DISubprogram SP(SPNode);
+ if (SP.isDefinition() && !SP.getContext().isCompileUnit()) {
+ addUInt(SPDie, dwarf::DW_AT_declaration, dwarf::DW_FORM_flag, 1);
+ // Add arguments.
+ DICompositeType SPTy = SP.getType();
+ DIArray Args = SPTy.getTypeArray();
+ unsigned SPTag = SPTy.getTag();
+ if (SPTag == dwarf::DW_TAG_subroutine_type)
+ for (unsigned i = 1, N = Args.getNumElements(); i < N; ++i) {
+ DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter);
+ addType(Arg, DIType(Args.getElement(i).getNode()));
+ addUInt(Arg, dwarf::DW_AT_artificial, dwarf::DW_FORM_flag, 1); // ??
+ SPDie->addChild(Arg);
+ }
+ DIE *SPDeclDie = SPDie;
+ SPDie = new DIE(dwarf::DW_TAG_subprogram);
+ addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4,
+ SPDeclDie);
+
+ ModuleCU->addDie(SPDie);
+ }
+
addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr,
DWLabel("func_begin", SubprogramCount));
addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr,
@@ -1305,19 +1299,6 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(MDNode *SPNode) {
if (!DISubprogram(SPNode).isLocalToUnit())
addUInt(SPDie, dwarf::DW_AT_external, dwarf::DW_FORM_flag, 1);
- // If there are global variables at this scope then add their dies.
- for (SmallVector<WeakVH, 4>::iterator SGI = ScopedGVs.begin(),
- SGE = ScopedGVs.end(); SGI != SGE; ++SGI) {
- MDNode *N = dyn_cast_or_null<MDNode>(*SGI);
- if (!N) continue;
- DIGlobalVariable GV(N);
- if (GV.getContext().getNode() == SPNode) {
- DIE *ScopedGVDie = createGlobalVariableDIE(ModuleCU, GV);
- if (ScopedGVDie)
- SPDie->addChild(ScopedGVDie);
- }
- }
-
return SPDie;
}
@@ -1401,8 +1382,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(DbgScope *Scope) {
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
-DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV,
- DbgScope *Scope, CompileUnit *Unit) {
+DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV, DbgScope *Scope) {
// Get the descriptor.
const DIVariable &VD = DV->getVariable();
StringRef Name = VD.getName();
@@ -1451,9 +1431,9 @@ DIE *DwarfDebug::constructVariableDIE(DbgVariable *DV,
// FIXME: isBlockByrefVariable should be reformulated in terms of complex
// addresses instead.
if (VD.isBlockByrefVariable())
- addType(Unit, VariableDie, getBlockByrefType(VD.getType(), Name));
+ addType(VariableDie, getBlockByrefType(VD.getType(), Name));
else
- addType(Unit, VariableDie, VD.getType());
+ addType(VariableDie, VD.getType());
}
// Add variable address.
@@ -1522,7 +1502,7 @@ DIE *DwarfDebug::constructScopeDIE(DbgScope *Scope) {
// Add variables to scope.
SmallVector<DbgVariable *, 8> &Variables = Scope->getVariables();
for (unsigned i = 0, N = Variables.size(); i < N; ++i) {
- DIE *VariableDIE = constructVariableDIE(Variables[i], Scope, ModuleCU);
+ DIE *VariableDIE = constructVariableDIE(Variables[i], Scope);
if (VariableDIE)
ScopeDIE->addChild(VariableDIE);
}
@@ -1579,7 +1559,7 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef DirName, StringRef FileName)
return SrcId;
}
-void DwarfDebug::constructCompileUnit(MDNode *N) {
+CompileUnit *DwarfDebug::constructCompileUnit(MDNode *N) {
DICompileUnit DIUnit(N);
StringRef FN = DIUnit.getFilename();
StringRef Dir = DIUnit.getDirectory();
@@ -1618,6 +1598,7 @@ void DwarfDebug::constructCompileUnit(MDNode *N) {
CompileUnitMap[DIUnit.getNode()] = Unit;
CompileUnits.push_back(Unit);
+ return Unit;
}
void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
@@ -1631,14 +1612,16 @@ void DwarfDebug::constructGlobalVariableDIE(MDNode *N) {
if (ModuleCU->getDIE(DI_GV.getNode()))
return;
- DIE *VariableDie = createGlobalVariableDIE(ModuleCU, DI_GV);
+ DIE *VariableDie = createGlobalVariableDIE(DI_GV);
+ if (!VariableDie)
+ return;
// Add to map.
ModuleCU->insertDIE(N, VariableDie);
// Add to context owner.
- ModuleCU->getCUDie()->addChild(VariableDie);
-
+ addToContextOwner(VariableDie, DI_GV.getContext());
+
// Expose as global. FIXME - need to check external flag.
ModuleCU->addGlobal(DI_GV.getName(), VariableDie);
@@ -1663,13 +1646,15 @@ void DwarfDebug::constructSubprogramDIE(MDNode *N) {
// class type.
return;
- DIE *SubprogramDie = createSubprogramDIE(ModuleCU, SP);
+ DIE *SubprogramDie = createSubprogramDIE(SP);
// Add to map.
ModuleCU->insertDIE(N, SubprogramDie);
// Add to context owner.
- ModuleCU->getCUDie()->addChild(SubprogramDie);
+ if (SP.getContext().getNode() == SP.getCompileUnit().getNode())
+ if (TopLevelDIEs.insert(SubprogramDie))
+ TopLevelDIEsVector.push_back(SubprogramDie);
// Expose as global.
ModuleCU->addGlobal(SP.getName(), SubprogramDie);
@@ -1709,21 +1694,16 @@ void DwarfDebug::beginModule(Module *M, MachineModuleInfo *mmi) {
if (!ModuleCU)
ModuleCU = CompileUnits[0];
- // Create DIEs for each of the externally visible global variables.
- for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
- E = DbgFinder.global_variable_end(); I != E; ++I) {
- DIGlobalVariable GV(*I);
- if (GV.getContext().getNode() != GV.getCompileUnit().getNode())
- ScopedGVs.push_back(*I);
- else
- constructGlobalVariableDIE(*I);
- }
-
// Create DIEs for each subprogram.
for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(),
E = DbgFinder.subprogram_end(); I != E; ++I)
constructSubprogramDIE(*I);
+ // Create DIEs for each global variable.
+ for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(),
+ E = DbgFinder.global_variable_end(); I != E; ++I)
+ constructGlobalVariableDIE(*I);
+
MMI = mmi;
shouldEmit = true;
MMI->setDebugInfoAvailability(true);
@@ -1770,6 +1750,22 @@ void DwarfDebug::endModule() {
addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined);
}
+ // Insert top level DIEs.
+ for (SmallVector<DIE *, 4>::iterator TI = TopLevelDIEsVector.begin(),
+ TE = TopLevelDIEsVector.end(); TI != TE; ++TI)
+ ModuleCU->getCUDie()->addChild(*TI);
+
+ for (DenseMap<DIE *, WeakVH>::iterator CI = ContainingTypeMap.begin(),
+ CE = ContainingTypeMap.end(); CI != CE; ++CI) {
+ DIE *SPDie = CI->first;
+ MDNode *N = dyn_cast_or_null<MDNode>(CI->second);
+ if (!N) continue;
+ DIE *NDie = ModuleCU->getDIE(N);
+ if (!NDie) continue;
+ addDIEEntry(SPDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+ addDIEEntry(NDie, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, NDie);
+ }
+
// Standard sections final addresses.
Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection());
EmitLabel("text_end", 0);
@@ -1898,6 +1894,7 @@ void DwarfDebug::endScope(const MachineInstr *MI) {
unsigned Label = MMI->NextLabelID();
Asm->printLabel(Label);
+ O << '\n';
SmallVector<DbgScope *, 2> &SD = I->second;
for (SmallVector<DbgScope *, 2>::iterator SDI = SD.begin(), SDE = SD.end();
@@ -2092,17 +2089,15 @@ void DwarfDebug::endFunction(MachineFunction *MF) {
MMI->getFrameMoves()));
// Clear debug info
- if (CurrentFnDbgScope) {
- CurrentFnDbgScope = NULL;
- DbgScopeMap.clear();
- DbgScopeBeginMap.clear();
- DbgScopeEndMap.clear();
- ConcreteScopes.clear();
- AbstractScopesList.clear();
- }
+ CurrentFnDbgScope = NULL;
+ DbgScopeMap.clear();
+ DbgScopeBeginMap.clear();
+ DbgScopeEndMap.clear();
+ ConcreteScopes.clear();
+ AbstractScopesList.clear();
Lines.clear();
-
+
if (TimePassesIsEnabled)
DebugTimer->stopTimer();
}
@@ -2337,13 +2332,16 @@ void DwarfDebug::emitDIE(DIE *Die) {
}
}
-/// emitDebugInfo / emitDebugInfoPerCU - Emit the debug info section.
+/// emitDebugInfo - Emit the debug info section.
///
-void DwarfDebug::emitDebugInfoPerCU(CompileUnit *Unit) {
- DIE *Die = Unit->getCUDie();
+void DwarfDebug::emitDebugInfo() {
+ // Start debug info section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfInfoSection());
+ DIE *Die = ModuleCU->getCUDie();
// Emit the compile units header.
- EmitLabel("info_begin", Unit->getID());
+ EmitLabel("info_begin", ModuleCU->getID());
// Emit size of content not including length itself
unsigned ContentSize = Die->getSize() +
@@ -2364,17 +2362,10 @@ void DwarfDebug::emitDebugInfoPerCU(CompileUnit *Unit) {
Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
Asm->EmitInt8(0); Asm->EOL("Extra Pad For GDB");
- EmitLabel("info_end", Unit->getID());
+ EmitLabel("info_end", ModuleCU->getID());
Asm->EOL();
-}
-
-void DwarfDebug::emitDebugInfo() {
- // Start debug info section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfInfoSection());
- emitDebugInfoPerCU(ModuleCU);
}
/// emitAbbreviations - Emit the abbreviation section.
@@ -2534,9 +2525,9 @@ void DwarfDebug::emitDebugLines() {
std::pair<unsigned, unsigned> SourceID =
getSourceDirectoryAndFileIds(LineInfo.getSourceID());
O << '\t' << MAI->getCommentString() << ' '
- << getSourceDirectoryName(SourceID.first) << ' '
+ << getSourceDirectoryName(SourceID.first) << '/'
<< getSourceFileName(SourceID.second)
- <<" :" << utostr_32(LineInfo.getLine()) << '\n';
+ << ':' << utostr_32(LineInfo.getLine()) << '\n';
}
// Define the line address.
@@ -2672,24 +2663,30 @@ DwarfDebug::emitFunctionDebugFrame(const FunctionDebugFrameInfo&DebugFrameInfo){
Asm->EOL();
}
-void DwarfDebug::emitDebugPubNamesPerCU(CompileUnit *Unit) {
- EmitDifference("pubnames_end", Unit->getID(),
- "pubnames_begin", Unit->getID(), true);
+/// emitDebugPubNames - Emit visible names into a debug pubnames section.
+///
+void DwarfDebug::emitDebugPubNames() {
+ // Start the dwarf pubnames section.
+ Asm->OutStreamer.SwitchSection(
+ Asm->getObjFileLowering().getDwarfPubNamesSection());
+
+ EmitDifference("pubnames_end", ModuleCU->getID(),
+ "pubnames_begin", ModuleCU->getID(), true);
Asm->EOL("Length of Public Names Info");
- EmitLabel("pubnames_begin", Unit->getID());
+ EmitLabel("pubnames_begin", ModuleCU->getID());
Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->EOL("DWARF Version");
EmitSectionOffset("info_begin", "section_info",
- Unit->getID(), 0, true, false);
+ ModuleCU->getID(), 0, true, false);
Asm->EOL("Offset of Compilation Unit Info");
- EmitDifference("info_end", Unit->getID(), "info_begin", Unit->getID(),
+ EmitDifference("info_end", ModuleCU->getID(), "info_begin", ModuleCU->getID(),
true);
Asm->EOL("Compilation Unit Length");
- const StringMap<DIE*> &Globals = Unit->getGlobals();
+ const StringMap<DIE*> &Globals = ModuleCU->getGlobals();
for (StringMap<DIE*>::const_iterator
GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) {
const char *Name = GI->getKeyData();
@@ -2700,21 +2697,11 @@ void DwarfDebug::emitDebugPubNamesPerCU(CompileUnit *Unit) {
}
Asm->EmitInt32(0); Asm->EOL("End Mark");
- EmitLabel("pubnames_end", Unit->getID());
+ EmitLabel("pubnames_end", ModuleCU->getID());
Asm->EOL();
}
-/// emitDebugPubNames - Emit visible names into a debug pubnames section.
-///
-void DwarfDebug::emitDebugPubNames() {
- // Start the dwarf pubnames section.
- Asm->OutStreamer.SwitchSection(
- Asm->getObjFileLowering().getDwarfPubNamesSection());
-
- emitDebugPubNamesPerCU(ModuleCU);
-}
-
void DwarfDebug::emitDebugPubTypes() {
// Start the dwarf pubnames section.
Asm->OutStreamer.SwitchSection(
diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h
index 679d9b9..12ad322 100644
--- a/lib/CodeGen/AsmPrinter/DwarfDebug.h
+++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h
@@ -154,12 +154,14 @@ class DwarfDebug : public Dwarf {
/// (at the end of the module) as DW_AT_inline.
SmallPtrSet<DIE *, 4> InlinedSubprogramDIEs;
+ DenseMap<DIE *, WeakVH> ContainingTypeMap;
+
/// AbstractSubprogramDIEs - Collection of abstruct subprogram DIEs.
SmallPtrSet<DIE *, 4> AbstractSubprogramDIEs;
- /// ScopedGVs - Tracks global variables that are not at file scope.
- /// For example void f() { static int b = 42; }
- SmallVector<WeakVH, 4> ScopedGVs;
+ /// TopLevelDIEs - Collection of top level DIEs.
+ SmallPtrSet<DIE *, 4> TopLevelDIEs;
+ SmallVector<DIE *, 4> TopLevelDIEsVector;
typedef SmallVector<DbgScope *, 2> ScopeVector;
typedef DenseMap<const MachineInstr *, ScopeVector>
@@ -307,53 +309,52 @@ class DwarfDebug : public Dwarf {
void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute,
const MachineLocation &Location);
+ /// addToContextOwner - Add Die into the list of its context owner's children.
+ void addToContextOwner(DIE *Die, DIDescriptor Context);
+
/// addType - Add a new type attribute to the specified entity.
- void addType(CompileUnit *DW_Unit, DIE *Entity, DIType Ty);
+ void addType(DIE *Entity, DIType Ty);
+
+ /// getOrCreateTypeDIE - Find existing DIE or create new DIE for the
+ /// given DIType.
+ DIE *getOrCreateTypeDIE(DIType Ty);
void addPubTypes(DISubprogram SP);
/// constructTypeDIE - Construct basic type die from DIBasicType.
- void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ void constructTypeDIE(DIE &Buffer,
DIBasicType BTy);
/// constructTypeDIE - Construct derived type die from DIDerivedType.
- void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ void constructTypeDIE(DIE &Buffer,
DIDerivedType DTy);
/// constructTypeDIE - Construct type DIE from DICompositeType.
- void constructTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ void constructTypeDIE(DIE &Buffer,
DICompositeType CTy);
/// constructSubrangeDIE - Construct subrange DIE from DISubrange.
void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy);
/// constructArrayTypeDIE - Construct array type DIE from DICompositeType.
- void constructArrayTypeDIE(CompileUnit *DW_Unit, DIE &Buffer,
+ void constructArrayTypeDIE(DIE &Buffer,
DICompositeType *CTy);
/// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator.
- DIE *constructEnumTypeDIE(CompileUnit *DW_Unit, DIEnumerator *ETy);
+ DIE *constructEnumTypeDIE(DIEnumerator *ETy);
/// createGlobalVariableDIE - Create new DIE using GV.
- DIE *createGlobalVariableDIE(CompileUnit *DW_Unit,
- const DIGlobalVariable &GV);
+ DIE *createGlobalVariableDIE(const DIGlobalVariable &GV);
/// createMemberDIE - Create new member DIE.
- DIE *createMemberDIE(CompileUnit *DW_Unit, const DIDerivedType &DT);
+ DIE *createMemberDIE(const DIDerivedType &DT);
/// createSubprogramDIE - Create new DIE using SP.
- DIE *createSubprogramDIE(CompileUnit *DW_Unit,
- const DISubprogram &SP,
- bool IsConstructor = false,
- bool IsInlined = false);
+ DIE *createSubprogramDIE(const DISubprogram &SP, bool MakeDecl = false);
/// findCompileUnit - Get the compile unit for the given descriptor.
///
- CompileUnit &findCompileUnit(DICompileUnit Unit) const;
-
- /// createDbgScopeVariable - Create a new scope variable.
- ///
- DIE *createDbgScopeVariable(DbgVariable *DV, CompileUnit *Unit);
+ CompileUnit *findCompileUnit(DICompileUnit Unit);
/// getUpdatedDbgScope - Find or create DbgScope assicated with
/// the instruction. Initialize scope and update scope hierarchy.
@@ -384,7 +385,7 @@ class DwarfDebug : public Dwarf {
DIE *constructInlinedScopeDIE(DbgScope *Scope);
/// constructVariableDIE - Construct a DIE for the given DbgVariable.
- DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S, CompileUnit *Unit);
+ DIE *constructVariableDIE(DbgVariable *DV, DbgScope *S);
/// constructScopeDIE - Construct a DIE for this scope.
DIE *constructScopeDIE(DbgScope *Scope);
@@ -405,10 +406,8 @@ class DwarfDebug : public Dwarf {
///
void computeSizeAndOffsets();
- /// EmitDebugInfo / emitDebugInfoPerCU - Emit the debug info section.
+ /// EmitDebugInfo - Emit the debug info section.
///
- void emitDebugInfoPerCU(CompileUnit *Unit);
-
void emitDebugInfo();
/// emitAbbreviations - Emit the abbreviation section.
@@ -432,8 +431,6 @@ class DwarfDebug : public Dwarf {
/// section.
void emitFunctionDebugFrame(const FunctionDebugFrameInfo &DebugFrameInfo);
- void emitDebugPubNamesPerCU(CompileUnit *Unit);
-
/// emitDebugPubNames - Emit visible names into a debug pubnames section.
///
void emitDebugPubNames();
@@ -488,7 +485,7 @@ class DwarfDebug : public Dwarf {
/// as well.
unsigned GetOrCreateSourceID(StringRef DirName, StringRef FileName);
- void constructCompileUnit(MDNode *N);
+ CompileUnit *constructCompileUnit(MDNode *N);
void constructGlobalVariableDIE(MDNode *N);
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp
index 1c8b8f4..3fd077f 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp
@@ -292,14 +292,13 @@ void DwarfException::EmitFDE(const FunctionEHFrameInfo &EHFrameInfo) {
Asm->EmitULEB128Bytes(is4Byte ? 4 : 8);
Asm->EOL("Augmentation size");
+ // We force 32-bits here because we've encoded our LSDA in the CIE with
+ // `dwarf::DW_EH_PE_sdata4'. And the CIE and FDE should agree.
if (EHFrameInfo.hasLandingPads)
- EmitReference("exception", EHFrameInfo.Number, true, false);
- else {
- if (is4Byte)
- Asm->EmitInt32((int)0);
- else
- Asm->EmitInt64((int)0);
- }
+ EmitReference("exception", EHFrameInfo.Number, true, true);
+ else
+ Asm->EmitInt32((int)0);
+
Asm->EOL("Language Specific Data Area");
} else {
Asm->EmitULEB128Bytes(0);
diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h
index aff1665..aa01c5b 100644
--- a/lib/CodeGen/AsmPrinter/DwarfException.h
+++ b/lib/CodeGen/AsmPrinter/DwarfException.h
@@ -119,7 +119,6 @@ class DwarfException : public Dwarf {
static inline unsigned getTombstoneKey() { return -2U; }
static unsigned getHashValue(const unsigned &Key) { return Key; }
static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
- static bool isPod() { return true; }
};
/// PadRange - Structure holding a try-range and the associated landing pad.
diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp
index 8a62eb2..3887e6d 100644
--- a/lib/CodeGen/BranchFolding.cpp
+++ b/lib/CodeGen/BranchFolding.cpp
@@ -427,7 +427,7 @@ static unsigned EstimateRuntime(MachineBasicBlock::iterator I,
static void FixTail(MachineBasicBlock *CurMBB, MachineBasicBlock *SuccBB,
const TargetInstrInfo *TII) {
MachineFunction *MF = CurMBB->getParent();
- MachineFunction::iterator I = next(MachineFunction::iterator(CurMBB));
+ MachineFunction::iterator I = llvm::next(MachineFunction::iterator(CurMBB));
MachineBasicBlock *TBB = 0, *FBB = 0;
SmallVector<MachineOperand, 4> Cond;
if (I != MF->end() &&
@@ -805,7 +805,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
// a compile-time infinite loop repeatedly doing and undoing the same
// transformations.)
- for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I) {
if (I->pred_size() >= 2 && I->pred_size() < TailMergeThreshold) {
SmallPtrSet<MachineBasicBlock *, 8> UniquePreds;
@@ -833,7 +833,7 @@ bool BranchFolder::TailMergeBlocks(MachineFunction &MF) {
continue;
// This is the QBB case described above
if (!FBB)
- FBB = next(MachineFunction::iterator(PBB));
+ FBB = llvm::next(MachineFunction::iterator(PBB));
}
// Failing case: the only way IBB can be reached from PBB is via
// exception handling. Happens for landing pads. Would be nice
@@ -1140,7 +1140,7 @@ ReoptimizeBlock:
// falls through into MBB and we can't understand the prior block's branch
// condition.
if (MBB->empty()) {
- bool PredHasNoFallThrough = TII->BlockHasNoFallThrough(PrevBB);
+ bool PredHasNoFallThrough = !PrevBB.canFallThrough();
if (PredHasNoFallThrough || !PriorUnAnalyzable ||
!PrevBB.isSuccessor(MBB)) {
// If the prior block falls through into us, turn it into an
@@ -1239,7 +1239,7 @@ ReoptimizeBlock:
// B elsewhere
// next:
if (CurFallsThru) {
- MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
CurCond.clear();
TII->InsertBranch(*MBB, NextBB, 0, CurCond);
}
diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt
index 6f86614..7a969f0 100644
--- a/lib/CodeGen/CMakeLists.txt
+++ b/lib/CodeGen/CMakeLists.txt
@@ -1,6 +1,7 @@
add_llvm_library(LLVMCodeGen
AggressiveAntiDepBreaker.cpp
BranchFolding.cpp
+ CalcSpillWeights.cpp
CodePlacementOpt.cpp
CriticalAntiDepBreaker.cpp
DeadMachineInstructionElim.cpp
@@ -35,7 +36,9 @@ add_llvm_library(LLVMCodeGen
MachinePassRegistry.cpp
MachineRegisterInfo.cpp
MachineSink.cpp
+ MachineSSAUpdater.cpp
MachineVerifier.cpp
+ MaxStackAlignment.cpp
ObjectCodeEmitter.cpp
OcamlGC.cpp
PHIElimination.cpp
diff --git a/lib/CodeGen/CalcSpillWeights.cpp b/lib/CodeGen/CalcSpillWeights.cpp
new file mode 100644
index 0000000..dcffb8a2
--- /dev/null
+++ b/lib/CodeGen/CalcSpillWeights.cpp
@@ -0,0 +1,154 @@
+//===------------------------ CalcSpillWeights.cpp ------------------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+
+#define DEBUG_TYPE "calcspillweights"
+
+#include "llvm/Function.h"
+#include "llvm/ADT/SmallSet.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
+#include "llvm/CodeGen/LiveIntervalAnalysis.h"
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineLoopInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/SlotIndexes.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/raw_ostream.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+
+using namespace llvm;
+
+char CalculateSpillWeights::ID = 0;
+static RegisterPass<CalculateSpillWeights> X("calcspillweights",
+ "Calculate spill weights");
+
+void CalculateSpillWeights::getAnalysisUsage(AnalysisUsage &au) const {
+ au.addRequired<LiveIntervals>();
+ au.addRequired<MachineLoopInfo>();
+ au.setPreservesAll();
+ MachineFunctionPass::getAnalysisUsage(au);
+}
+
+bool CalculateSpillWeights::runOnMachineFunction(MachineFunction &fn) {
+
+ DEBUG(errs() << "********** Compute Spill Weights **********\n"
+ << "********** Function: "
+ << fn.getFunction()->getName() << '\n');
+
+ LiveIntervals *lis = &getAnalysis<LiveIntervals>();
+ MachineLoopInfo *loopInfo = &getAnalysis<MachineLoopInfo>();
+ const TargetInstrInfo *tii = fn.getTarget().getInstrInfo();
+ MachineRegisterInfo *mri = &fn.getRegInfo();
+
+ SmallSet<unsigned, 4> processed;
+ for (MachineFunction::iterator mbbi = fn.begin(), mbbe = fn.end();
+ mbbi != mbbe; ++mbbi) {
+ MachineBasicBlock* mbb = mbbi;
+ SlotIndex mbbEnd = lis->getMBBEndIdx(mbb);
+ MachineLoop* loop = loopInfo->getLoopFor(mbb);
+ unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
+ bool isExiting = loop ? loop->isLoopExiting(mbb) : false;
+
+ for (MachineBasicBlock::const_iterator mii = mbb->begin(), mie = mbb->end();
+ mii != mie; ++mii) {
+ const MachineInstr *mi = mii;
+ if (tii->isIdentityCopy(*mi))
+ continue;
+
+ if (mi->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
+ continue;
+
+ for (unsigned i = 0, e = mi->getNumOperands(); i != e; ++i) {
+ const MachineOperand &mopi = mi->getOperand(i);
+ if (!mopi.isReg() || mopi.getReg() == 0)
+ continue;
+ unsigned reg = mopi.getReg();
+ if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
+ continue;
+ // Multiple uses of reg by the same instruction. It should not
+ // contribute to spill weight again.
+ if (!processed.insert(reg))
+ continue;
+
+ bool hasDef = mopi.isDef();
+ bool hasUse = !hasDef;
+ for (unsigned j = i+1; j != e; ++j) {
+ const MachineOperand &mopj = mi->getOperand(j);
+ if (!mopj.isReg() || mopj.getReg() != reg)
+ continue;
+ hasDef |= mopj.isDef();
+ hasUse |= mopj.isUse();
+ if (hasDef && hasUse)
+ break;
+ }
+
+ LiveInterval &regInt = lis->getInterval(reg);
+ float weight = lis->getSpillWeight(hasDef, hasUse, loopDepth);
+ if (hasDef && isExiting) {
+ // Looks like this is a loop count variable update.
+ SlotIndex defIdx = lis->getInstructionIndex(mi).getDefIndex();
+ const LiveRange *dlr =
+ lis->getInterval(reg).getLiveRangeContaining(defIdx);
+ if (dlr->end > mbbEnd)
+ weight *= 3.0F;
+ }
+ regInt.weight += weight;
+ }
+ processed.clear();
+ }
+ }
+
+ for (LiveIntervals::iterator I = lis->begin(), E = lis->end(); I != E; ++I) {
+ LiveInterval &li = *I->second;
+ if (TargetRegisterInfo::isVirtualRegister(li.reg)) {
+ // If the live interval length is essentially zero, i.e. in every live
+ // range the use follows def immediately, it doesn't make sense to spill
+ // it and hope it will be easier to allocate for this li.
+ if (isZeroLengthInterval(&li)) {
+ li.weight = HUGE_VALF;
+ continue;
+ }
+
+ bool isLoad = false;
+ SmallVector<LiveInterval*, 4> spillIs;
+ if (lis->isReMaterializable(li, spillIs, isLoad)) {
+ // If all of the definitions of the interval are re-materializable,
+ // it is a preferred candidate for spilling. If non of the defs are
+ // loads, then it's potentially very cheap to re-materialize.
+ // FIXME: this gets much more complicated once we support non-trivial
+ // re-materialization.
+ if (isLoad)
+ li.weight *= 0.9F;
+ else
+ li.weight *= 0.5F;
+ }
+
+ // Slightly prefer live interval that has been assigned a preferred reg.
+ std::pair<unsigned, unsigned> Hint = mri->getRegAllocationHint(li.reg);
+ if (Hint.first || Hint.second)
+ li.weight *= 1.01F;
+
+ // Divide the weight of the interval by its size. This encourages
+ // spilling of intervals that are large and have few uses, and
+ // discourages spilling of small intervals with many uses.
+ li.weight /= lis->getApproximateInstructionCount(li) * SlotIndex::NUM;
+ }
+ }
+
+ return false;
+}
+
+/// Returns true if the given live interval is zero length.
+bool CalculateSpillWeights::isZeroLengthInterval(LiveInterval *li) const {
+ for (LiveInterval::Ranges::const_iterator
+ i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
+ if (i->end.getPrevIndex() > i->start)
+ return false;
+ return true;
+}
diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp
index e9844d8..ff71f6b 100644
--- a/lib/CodeGen/CodePlacementOpt.cpp
+++ b/lib/CodeGen/CodePlacementOpt.cpp
@@ -182,7 +182,7 @@ bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF,
// Move it and all the blocks that can reach it via fallthrough edges
// exclusively, to keep existing fallthrough edges intact.
MachineFunction::iterator Begin = Pred;
- MachineFunction::iterator End = next(Begin);
+ MachineFunction::iterator End = llvm::next(Begin);
while (Begin != MF.begin()) {
MachineFunction::iterator Prior = prior(Begin);
if (Prior == MF.begin())
@@ -255,7 +255,8 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
// to the top of the loop to avoid loosing that fallthrough. Otherwise append
// them to the bottom, even if it previously had a fallthrough, on the theory
// that it's worth an extra branch to keep the loop contiguous.
- MachineFunction::iterator InsertPt = next(MachineFunction::iterator(BotMBB));
+ MachineFunction::iterator InsertPt =
+ llvm::next(MachineFunction::iterator(BotMBB));
bool InsertAtTop = false;
if (TopMBB != MF.begin() &&
!HasFallthrough(prior(MachineFunction::iterator(TopMBB))) &&
@@ -268,7 +269,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
// with the loop header.
SmallPtrSet<MachineBasicBlock *, 8> ContiguousBlocks;
for (MachineFunction::iterator I = TopMBB,
- E = next(MachineFunction::iterator(BotMBB)); I != E; ++I)
+ E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I)
ContiguousBlocks.insert(I);
// Find non-contigous blocks and fix them.
@@ -301,7 +302,7 @@ bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF,
// Process this block and all loop blocks contiguous with it, to keep
// them in their relative order.
MachineFunction::iterator Begin = BB;
- MachineFunction::iterator End = next(MachineFunction::iterator(BB));
+ MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB));
for (; End != MF.end(); ++End) {
if (!L->contains(End)) break;
if (!HasAnalyzableTerminator(End)) break;
diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp
index 1b39fec..3c7961c2 100644
--- a/lib/CodeGen/CriticalAntiDepBreaker.cpp
+++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp
@@ -43,8 +43,11 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) {
static_cast<const TargetRegisterClass *>(0));
// Initialize the indices to indicate that no registers are live.
- std::fill(KillIndices, array_endof(KillIndices), ~0u);
- std::fill(DefIndices, array_endof(DefIndices), BB->size());
+ const unsigned BBSize = BB->size();
+ for (unsigned i = 0; i < TRI->getNumRegs(); ++i) {
+ KillIndices[i] = ~0u;
+ DefIndices[i] = BBSize;
+ }
// Clear "do not change" set.
KeepRegs.clear();
@@ -122,7 +125,7 @@ void CriticalAntiDepBreaker::Observe(MachineInstr *MI, unsigned Count,
// may have been rescheduled and its lifetime may overlap with registers
// in ways not reflected in our current liveness state. For each such
// register, adjust the liveness state to be conservatively correct.
- for (unsigned Reg = 0; Reg != TargetRegisterInfo::FirstVirtualRegister; ++Reg)
+ for (unsigned Reg = 0; Reg != TRI->getNumRegs(); ++Reg)
if (DefIndices[Reg] < InsertPosIndex && DefIndices[Reg] >= Count) {
assert(KillIndices[Reg] == ~0u && "Clobbered register is live!");
// Mark this register to be non-renamable.
diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp
index 242cba5..297dd31 100644
--- a/lib/CodeGen/LLVMTargetMachine.cpp
+++ b/lib/CodeGen/LLVMTargetMachine.cpp
@@ -74,6 +74,9 @@ EnableFastISelOption("fast-isel", cl::Hidden,
static cl::opt<bool> EnableSplitGEPGVN("split-gep-gvn", cl::Hidden,
cl::desc("Split GEPs and run no-load GVN"));
+static cl::opt<bool> PreAllocTailDup("pre-regalloc-taildup", cl::Hidden,
+ cl::desc("Pre-register allocation tail duplication"));
+
LLVMTargetMachine::LLVMTargetMachine(const Target &T,
const std::string &TargetTriple)
: TargetMachine(T) {
@@ -302,6 +305,13 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
/* allowDoubleDefs= */ true);
}
+ // Pre-ra tail duplication.
+ if (OptLevel != CodeGenOpt::None &&
+ !DisableTailDuplicate && PreAllocTailDup) {
+ PM.add(createTailDuplicatePass(true));
+ printAndVerify(PM, "After Pre-RegAlloc TailDuplicate");
+ }
+
// Run pre-ra passes.
if (addPreRegAlloc(PM, OptLevel))
printAndVerify(PM, "After PreRegAlloc passes",
@@ -348,7 +358,7 @@ bool LLVMTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM,
// Tail duplication.
if (OptLevel != CodeGenOpt::None && !DisableTailDuplicate) {
- PM.add(createTailDuplicatePass());
+ PM.add(createTailDuplicatePass(false));
printAndVerify(PM, "After TailDuplicate");
}
diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp
index 8d632cb..cc286aa 100644
--- a/lib/CodeGen/LiveInterval.cpp
+++ b/lib/CodeGen/LiveInterval.cpp
@@ -847,7 +847,7 @@ void LiveInterval::print(raw_ostream &OS, const TargetRegisterInfo *TRI) const {
if (vni->isUnused()) {
OS << "x";
} else {
- if (!vni->isDefAccurate())
+ if (!vni->isDefAccurate() && !vni->isPHIDef())
OS << "?";
else
OS << vni->def;
diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp
index 24adf36..8806439f 100644
--- a/lib/CodeGen/LiveIntervalAnalysis.cpp
+++ b/lib/CodeGen/LiveIntervalAnalysis.cpp
@@ -149,44 +149,69 @@ void LiveIntervals::dumpInstrs() const {
printInstrs(errs());
}
-/// conflictsWithPhysRegDef - Returns true if the specified register
-/// is defined during the duration of the specified interval.
-bool LiveIntervals::conflictsWithPhysRegDef(const LiveInterval &li,
- VirtRegMap &vrm, unsigned reg) {
- for (LiveInterval::Ranges::const_iterator
- I = li.ranges.begin(), E = li.ranges.end(); I != E; ++I) {
- for (SlotIndex index = I->start.getBaseIndex(),
- end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
- index != end;
- index = index.getNextIndex()) {
- // skip deleted instructions
- while (index != end && !getInstructionFromIndex(index))
- index = index.getNextIndex();
- if (index == end) break;
+bool LiveIntervals::conflictsWithPhysReg(const LiveInterval &li,
+ VirtRegMap &vrm, unsigned reg) {
+ // We don't handle fancy stuff crossing basic block boundaries
+ if (li.ranges.size() != 1)
+ return true;
+ const LiveRange &range = li.ranges.front();
+ SlotIndex idx = range.start.getBaseIndex();
+ SlotIndex end = range.end.getPrevSlot().getBaseIndex().getNextIndex();
+
+ // Skip deleted instructions
+ MachineInstr *firstMI = getInstructionFromIndex(idx);
+ while (!firstMI && idx != end) {
+ idx = idx.getNextIndex();
+ firstMI = getInstructionFromIndex(idx);
+ }
+ if (!firstMI)
+ return false;
- MachineInstr *MI = getInstructionFromIndex(index);
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
- if (tii_->isMoveInstr(*MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
- if (SrcReg == li.reg || DstReg == li.reg)
- continue;
- for (unsigned i = 0; i != MI->getNumOperands(); ++i) {
- MachineOperand& mop = MI->getOperand(i);
- if (!mop.isReg())
- continue;
- unsigned PhysReg = mop.getReg();
- if (PhysReg == 0 || PhysReg == li.reg)
+ // Find last instruction in range
+ SlotIndex lastIdx = end.getPrevIndex();
+ MachineInstr *lastMI = getInstructionFromIndex(lastIdx);
+ while (!lastMI && lastIdx != idx) {
+ lastIdx = lastIdx.getPrevIndex();
+ lastMI = getInstructionFromIndex(lastIdx);
+ }
+ if (!lastMI)
+ return false;
+
+ // Range cannot cross basic block boundaries or terminators
+ MachineBasicBlock *MBB = firstMI->getParent();
+ if (MBB != lastMI->getParent() || lastMI->getDesc().isTerminator())
+ return true;
+
+ MachineBasicBlock::const_iterator E = lastMI;
+ ++E;
+ for (MachineBasicBlock::const_iterator I = firstMI; I != E; ++I) {
+ const MachineInstr &MI = *I;
+
+ // Allow copies to and from li.reg
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (tii_->isMoveInstr(MI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ if (SrcReg == li.reg || DstReg == li.reg)
+ continue;
+
+ // Check for operands using reg
+ for (unsigned i = 0, e = MI.getNumOperands(); i != e; ++i) {
+ const MachineOperand& mop = MI.getOperand(i);
+ if (!mop.isReg())
+ continue;
+ unsigned PhysReg = mop.getReg();
+ if (PhysReg == 0 || PhysReg == li.reg)
+ continue;
+ if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
+ if (!vrm.hasPhys(PhysReg))
continue;
- if (TargetRegisterInfo::isVirtualRegister(PhysReg)) {
- if (!vrm.hasPhys(PhysReg))
- continue;
- PhysReg = vrm.getPhys(PhysReg);
- }
- if (PhysReg && tri_->regsOverlap(PhysReg, reg))
- return true;
+ PhysReg = vrm.getPhys(PhysReg);
}
+ if (PhysReg && tri_->regsOverlap(PhysReg, reg))
+ return true;
}
}
+ // No conflicts found.
return false;
}
@@ -201,15 +226,9 @@ bool LiveIntervals::conflictsWithPhysRegRef(LiveInterval &li,
end = I->end.getPrevSlot().getBaseIndex().getNextIndex();
index != end;
index = index.getNextIndex()) {
- // Skip deleted instructions.
- MachineInstr *MI = 0;
- while (index != end) {
- MI = getInstructionFromIndex(index);
- if (MI)
- break;
- index = index.getNextIndex();
- }
- if (index == end) break;
+ MachineInstr *MI = getInstructionFromIndex(index);
+ if (!MI)
+ continue; // skip deleted instructions
if (JoinedCopies.count(MI))
continue;
@@ -374,8 +393,6 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// Value#0 is now defined by the 2-addr instruction.
OldValNo->def = RedefIndex;
OldValNo->setCopy(0);
- if (MO.isEarlyClobber())
- OldValNo->setHasRedefByEC(true);
// Add the new live interval which replaces the range for the input copy.
LiveRange LR(DefIndex, RedefIndex, ValNo);
@@ -411,7 +428,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
interval.removeRange(Start, End);
assert(interval.ranges.size() == 1 &&
"Newly discovered PHI interval has >1 ranges.");
- MachineBasicBlock *killMBB = getMBBFromIndex(interval.endIndex());
+ MachineBasicBlock *killMBB = getMBBFromIndex(VNI->def);
VNI->addKill(indexes_->getTerminatorGap(killMBB));
VNI->setHasPHIKill(true);
DEBUG({
@@ -422,7 +439,7 @@ void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb,
// Replace the interval with one of a NEW value number. Note that this
// value number isn't actually defined by an instruction, weird huh? :)
LiveRange LR(Start, End,
- interval.getNextValue(SlotIndex(getMBBStartIdx(mbb), true),
+ interval.getNextValue(SlotIndex(getMBBStartIdx(Killer->getParent()), true),
0, false, VNInfoAllocator));
LR.valno->setIsPHIDef(true);
DEBUG(errs() << " replace range with " << LR);
@@ -513,8 +530,6 @@ void LiveIntervals::handlePhysicalRegisterDef(MachineBasicBlock *MBB,
if (mi->isRegTiedToUseOperand(DefIdx)) {
// Two-address instruction.
end = baseIndex.getDefIndex();
- assert(!mi->getOperand(DefIdx).isEarlyClobber() &&
- "Two address instruction is an early clobber?");
} else {
// Another instruction redefines the register before it is ever read.
// Then the register is essentially dead at the instruction that defines
@@ -730,8 +745,16 @@ unsigned LiveIntervals::getVNInfoSourceReg(const VNInfo *VNI) const {
if (VNI->getCopy()->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
// If it's extracting out of a physical register, return the sub-register.
unsigned Reg = VNI->getCopy()->getOperand(1).getReg();
- if (TargetRegisterInfo::isPhysicalRegister(Reg))
+ if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
+ unsigned SrcSubReg = VNI->getCopy()->getOperand(2).getImm();
+ unsigned DstSubReg = VNI->getCopy()->getOperand(0).getSubReg();
+ if (SrcSubReg == DstSubReg)
+ // %reg1034:3<def> = EXTRACT_SUBREG %EDX, 3
+ // reg1034 can still be coalesced to EDX.
+ return Reg;
+ assert(DstSubReg == 0);
Reg = tri_->getSubReg(Reg, VNI->getCopy()->getOperand(2).getImm());
+ }
return Reg;
} else if (VNI->getCopy()->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
VNI->getCopy()->getOpcode() == TargetInstrInfo::SUBREG_TO_REG)
diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp
index 68f80ac..3c88e37 100644
--- a/lib/CodeGen/LiveVariables.cpp
+++ b/lib/CodeGen/LiveVariables.cpp
@@ -720,6 +720,51 @@ bool LiveVariables::VarInfo::isLiveIn(const MachineBasicBlock &MBB,
return findKill(&MBB);
}
+bool LiveVariables::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB) {
+ LiveVariables::VarInfo &VI = getVarInfo(Reg);
+
+ // Loop over all of the successors of the basic block, checking to see if
+ // the value is either live in the block, or if it is killed in the block.
+ std::vector<MachineBasicBlock*> OpSuccBlocks;
+ for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
+ E = MBB.succ_end(); SI != E; ++SI) {
+ MachineBasicBlock *SuccMBB = *SI;
+
+ // Is it alive in this successor?
+ unsigned SuccIdx = SuccMBB->getNumber();
+ if (VI.AliveBlocks.test(SuccIdx))
+ return true;
+ OpSuccBlocks.push_back(SuccMBB);
+ }
+
+ // Check to see if this value is live because there is a use in a successor
+ // that kills it.
+ switch (OpSuccBlocks.size()) {
+ case 1: {
+ MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB)
+ return true;
+ break;
+ }
+ case 2: {
+ MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (VI.Kills[i]->getParent() == SuccMBB1 ||
+ VI.Kills[i]->getParent() == SuccMBB2)
+ return true;
+ break;
+ }
+ default:
+ std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
+ for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
+ if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
+ VI.Kills[i]->getParent()))
+ return true;
+ }
+ return false;
+}
+
/// addNewBlock - Add a new basic block BB as an empty succcessor to DomBB. All
/// variables that are live out of DomBB will be marked as passing live through
/// BB.
diff --git a/lib/CodeGen/LowerSubregs.cpp b/lib/CodeGen/LowerSubregs.cpp
index 30636a8..80eb6cd 100644
--- a/lib/CodeGen/LowerSubregs.cpp
+++ b/lib/CodeGen/LowerSubregs.cpp
@@ -312,7 +312,7 @@ bool LowerSubregsInstructionPass::runOnMachineFunction(MachineFunction &MF) {
mbbi != mbbe; ++mbbi) {
for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
mi != me;) {
- MachineBasicBlock::iterator nmi = next(mi);
+ MachineBasicBlock::iterator nmi = llvm::next(mi);
MachineInstr *MI = mi;
if (MI->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
MadeChange |= LowerExtract(MI);
diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp
index e55e369..a58286d 100644
--- a/lib/CodeGen/MachineBasicBlock.cpp
+++ b/lib/CodeGen/MachineBasicBlock.cpp
@@ -290,7 +290,7 @@ void MachineBasicBlock::updateTerminator() {
} else {
// The block has a fallthrough conditional branch.
MachineBasicBlock *MBBA = *succ_begin();
- MachineBasicBlock *MBBB = *next(succ_begin());
+ MachineBasicBlock *MBBB = *llvm::next(succ_begin());
if (MBBA == TBB) std::swap(MBBB, MBBA);
if (isLayoutSuccessor(TBB)) {
if (TII->ReverseBranchCondition(Cond)) {
@@ -359,15 +359,10 @@ bool MachineBasicBlock::isSuccessor(const MachineBasicBlock *MBB) const {
bool MachineBasicBlock::isLayoutSuccessor(const MachineBasicBlock *MBB) const {
MachineFunction::const_iterator I(this);
- return next(I) == MachineFunction::const_iterator(MBB);
+ return llvm::next(I) == MachineFunction::const_iterator(MBB);
}
bool MachineBasicBlock::canFallThrough() {
- MachineBasicBlock *TBB = 0, *FBB = 0;
- SmallVector<MachineOperand, 4> Cond;
- const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
- bool BranchUnAnalyzable = TII->AnalyzeBranch(*this, TBB, FBB, Cond, true);
-
MachineFunction::iterator Fallthrough = this;
++Fallthrough;
// If FallthroughBlock is off the end of the function, it can't fall through.
@@ -378,16 +373,21 @@ bool MachineBasicBlock::canFallThrough() {
if (!isSuccessor(Fallthrough))
return false;
- // If we couldn't analyze the branch, examine the last instruction.
- // If the block doesn't end in a known control barrier, assume fallthrough
- // is possible. The isPredicable check is needed because this code can be
- // called during IfConversion, where an instruction which is normally a
- // Barrier is predicated and thus no longer an actual control barrier. This
- // is over-conservative though, because if an instruction isn't actually
- // predicated we could still treat it like a barrier.
- if (BranchUnAnalyzable)
+ // Analyze the branches, if any, at the end of the block.
+ MachineBasicBlock *TBB = 0, *FBB = 0;
+ SmallVector<MachineOperand, 4> Cond;
+ const TargetInstrInfo *TII = getParent()->getTarget().getInstrInfo();
+ if (TII->AnalyzeBranch(*this, TBB, FBB, Cond, true)) {
+ // If we couldn't analyze the branch, examine the last instruction.
+ // If the block doesn't end in a known control barrier, assume fallthrough
+ // is possible. The isPredicable check is needed because this code can be
+ // called during IfConversion, where an instruction which is normally a
+ // Barrier is predicated and thus no longer an actual control barrier. This
+ // is over-conservative though, because if an instruction isn't actually
+ // predicated we could still treat it like a barrier.
return empty() || !back().getDesc().isBarrier() ||
back().getDesc().isPredicable();
+ }
// If there is no branch, control always falls through.
if (TBB == 0) return true;
@@ -461,18 +461,19 @@ bool MachineBasicBlock::CorrectExtraCFGEdges(MachineBasicBlock *DestA,
bool MadeChange = false;
bool AddedFallThrough = false;
- MachineFunction::iterator FallThru = next(MachineFunction::iterator(this));
+ MachineFunction::iterator FallThru =
+ llvm::next(MachineFunction::iterator(this));
- // If this block ends with a conditional branch that falls through to its
- // successor, set DestB as the successor.
if (isCond) {
+ // If this block ends with a conditional branch that falls through to its
+ // successor, set DestB as the successor.
if (DestB == 0 && FallThru != getParent()->end()) {
DestB = FallThru;
AddedFallThrough = true;
}
} else {
// If this is an unconditional branch with no explicit dest, it must just be
- // a fallthrough into DestB.
+ // a fallthrough into DestA.
if (DestA == 0 && FallThru != getParent()->end()) {
DestA = FallThru;
AddedFallThrough = true;
diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp
index d20f446..dd6fd7e 100644
--- a/lib/CodeGen/MachineFunction.cpp
+++ b/lib/CodeGen/MachineFunction.cpp
@@ -328,7 +328,7 @@ void MachineFunction::print(raw_ostream &OS) const {
if (I->second)
OS << " in reg%" << I->second;
- if (next(I) != E)
+ if (llvm::next(I) != E)
OS << ", ";
}
OS << '\n';
@@ -342,7 +342,7 @@ void MachineFunction::print(raw_ostream &OS) const {
else
OS << "%physreg" << *I;
- if (next(I) != E)
+ if (llvm::next(I) != E)
OS << " ";
}
OS << '\n';
diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp
index f73a5a3..12b974d 100644
--- a/lib/CodeGen/MachineInstr.cpp
+++ b/lib/CodeGen/MachineInstr.cpp
@@ -1058,6 +1058,22 @@ bool MachineInstr::isInvariantLoad(AliasAnalysis *AA) const {
return true;
}
+/// isConstantValuePHI - If the specified instruction is a PHI that always
+/// merges together the same virtual register, return the register, otherwise
+/// return 0.
+unsigned MachineInstr::isConstantValuePHI() const {
+ if (getOpcode() != TargetInstrInfo::PHI)
+ return 0;
+ assert(getNumOperands() >= 3 &&
+ "It's illegal to have a PHI without source operands");
+
+ unsigned Reg = getOperand(1).getReg();
+ for (unsigned i = 3, e = getNumOperands(); i < e; i += 2)
+ if (getOperand(i).getReg() != Reg)
+ return 0;
+ return Reg;
+}
+
void MachineInstr::dump() const {
errs() << " " << *this;
}
@@ -1150,9 +1166,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const {
DebugLocTuple DLT = MF->getDebugLocTuple(debugLoc);
DIScope Scope(DLT.Scope);
OS << " dbg:";
+ // Omit the directory, since it's usually long and uninteresting.
if (!Scope.isNull())
- OS << Scope.getDirectory() << ':' << Scope.getFilename() << ':';
- OS << DLT.Line << ":" << DLT.Col;
+ OS << Scope.getFilename();
+ else
+ OS << "<unknown>";
+ OS << ':' << DLT.Line;
+ if (DLT.Col != 0)
+ OS << ':' << DLT.Col;
}
OS << "\n";
diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp
index db77d19..63f4f18 100644
--- a/lib/CodeGen/MachineLoopInfo.cpp
+++ b/lib/CodeGen/MachineLoopInfo.cpp
@@ -62,11 +62,11 @@ MachineBasicBlock *MachineLoop::getBottomBlock() {
MachineBasicBlock *BotMBB = getHeader();
MachineFunction::iterator End = BotMBB->getParent()->end();
if (BotMBB != prior(End)) {
- MachineBasicBlock *NextMBB = next(MachineFunction::iterator(BotMBB));
+ MachineBasicBlock *NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
while (contains(NextMBB)) {
BotMBB = NextMBB;
- if (BotMBB == next(MachineFunction::iterator(BotMBB))) break;
- NextMBB = next(MachineFunction::iterator(BotMBB));
+ if (BotMBB == llvm::next(MachineFunction::iterator(BotMBB))) break;
+ NextMBB = llvm::next(MachineFunction::iterator(BotMBB));
}
}
return BotMBB;
diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp
new file mode 100644
index 0000000..292096f
--- /dev/null
+++ b/lib/CodeGen/MachineSSAUpdater.cpp
@@ -0,0 +1,393 @@
+//===- MachineSSAUpdater.cpp - Unstructured SSA Update Tool ---------------===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This file implements the MachineSSAUpdater class. It's based on SSAUpdater
+// class in lib/Transforms/Utils.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineSSAUpdater.h"
+#include "llvm/CodeGen/MachineInstr.h"
+#include "llvm/CodeGen/MachineInstrBuilder.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/Target/TargetInstrInfo.h"
+#include "llvm/Target/TargetMachine.h"
+#include "llvm/Target/TargetRegisterInfo.h"
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
+#include "llvm/Support/raw_ostream.h"
+using namespace llvm;
+
+typedef DenseMap<MachineBasicBlock*, unsigned> AvailableValsTy;
+typedef std::vector<std::pair<MachineBasicBlock*, unsigned> >
+ IncomingPredInfoTy;
+
+static AvailableValsTy &getAvailableVals(void *AV) {
+ return *static_cast<AvailableValsTy*>(AV);
+}
+
+static IncomingPredInfoTy &getIncomingPredInfo(void *IPI) {
+ return *static_cast<IncomingPredInfoTy*>(IPI);
+}
+
+
+MachineSSAUpdater::MachineSSAUpdater(MachineFunction &MF,
+ SmallVectorImpl<MachineInstr*> *NewPHI)
+ : AV(0), IPI(0), InsertedPHIs(NewPHI) {
+ TII = MF.getTarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
+}
+
+MachineSSAUpdater::~MachineSSAUpdater() {
+ delete &getAvailableVals(AV);
+ delete &getIncomingPredInfo(IPI);
+}
+
+/// Initialize - Reset this object to get ready for a new set of SSA
+/// updates. ProtoValue is the value used to name PHI nodes.
+void MachineSSAUpdater::Initialize(unsigned V) {
+ if (AV == 0)
+ AV = new AvailableValsTy();
+ else
+ getAvailableVals(AV).clear();
+
+ if (IPI == 0)
+ IPI = new IncomingPredInfoTy();
+ else
+ getIncomingPredInfo(IPI).clear();
+
+ VR = V;
+ VRC = MRI->getRegClass(VR);
+}
+
+/// HasValueForBlock - Return true if the MachineSSAUpdater already has a value for
+/// the specified block.
+bool MachineSSAUpdater::HasValueForBlock(MachineBasicBlock *BB) const {
+ return getAvailableVals(AV).count(BB);
+}
+
+/// AddAvailableValue - Indicate that a rewritten value is available in the
+/// specified block with the specified value.
+void MachineSSAUpdater::AddAvailableValue(MachineBasicBlock *BB, unsigned V) {
+ getAvailableVals(AV)[BB] = V;
+}
+
+/// GetValueAtEndOfBlock - Construct SSA form, materializing a value that is
+/// live at the end of the specified block.
+unsigned MachineSSAUpdater::GetValueAtEndOfBlock(MachineBasicBlock *BB) {
+ return GetValueAtEndOfBlockInternal(BB);
+}
+
+static
+unsigned LookForIdenticalPHI(MachineBasicBlock *BB,
+ SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> &PredValues) {
+ if (BB->empty())
+ return 0;
+
+ MachineBasicBlock::iterator I = BB->front();
+ if (I->getOpcode() != TargetInstrInfo::PHI)
+ return 0;
+
+ AvailableValsTy AVals;
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ AVals[PredValues[i].first] = PredValues[i].second;
+ while (I != BB->end() && I->getOpcode() == TargetInstrInfo::PHI) {
+ bool Same = true;
+ for (unsigned i = 1, e = I->getNumOperands(); i != e; i += 2) {
+ unsigned SrcReg = I->getOperand(i).getReg();
+ MachineBasicBlock *SrcBB = I->getOperand(i+1).getMBB();
+ if (AVals[SrcBB] != SrcReg) {
+ Same = false;
+ break;
+ }
+ }
+ if (Same)
+ return I->getOperand(0).getReg();
+ ++I;
+ }
+ return 0;
+}
+
+/// InsertNewDef - Insert an empty PHI or IMPLICIT_DEF instruction which define
+/// a value of the given register class at the start of the specified basic
+/// block. It returns the virtual register defined by the instruction.
+static
+MachineInstr *InsertNewDef(unsigned Opcode,
+ MachineBasicBlock *BB, MachineBasicBlock::iterator I,
+ const TargetRegisterClass *RC,
+ MachineRegisterInfo *MRI, const TargetInstrInfo *TII) {
+ unsigned NewVR = MRI->createVirtualRegister(RC);
+ return BuildMI(*BB, I, DebugLoc::getUnknownLoc(), TII->get(Opcode), NewVR);
+}
+
+/// GetValueInMiddleOfBlock - Construct SSA form, materializing a value that
+/// is live in the middle of the specified block.
+///
+/// GetValueInMiddleOfBlock is the same as GetValueAtEndOfBlock except in one
+/// important case: if there is a definition of the rewritten value after the
+/// 'use' in BB. Consider code like this:
+///
+/// X1 = ...
+/// SomeBB:
+/// use(X)
+/// X2 = ...
+/// br Cond, SomeBB, OutBB
+///
+/// In this case, there are two values (X1 and X2) added to the AvailableVals
+/// set by the client of the rewriter, and those values are both live out of
+/// their respective blocks. However, the use of X happens in the *middle* of
+/// a block. Because of this, we need to insert a new PHI node in SomeBB to
+/// merge the appropriate values, and this value isn't live out of the block.
+///
+unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) {
+ // If there is no definition of the renamed variable in this block, just use
+ // GetValueAtEndOfBlock to do our work.
+ if (!getAvailableVals(AV).count(BB))
+ return GetValueAtEndOfBlockInternal(BB);
+
+ // If there are no predecessors, just return undef.
+ if (BB->pred_empty()) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ VRC, MRI, TII);
+ return NewDef->getOperand(0).getReg();
+ }
+
+ // Otherwise, we have the hard case. Get the live-in values for each
+ // predecessor.
+ SmallVector<std::pair<MachineBasicBlock*, unsigned>, 8> PredValues;
+ unsigned SingularValue = 0;
+
+ bool isFirstPred = true;
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+ PredValues.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+
+ // Otherwise, if all the merged values are the same, just use it.
+ if (SingularValue != 0)
+ return SingularValue;
+
+ // If an identical PHI is already in BB, just reuse it.
+ unsigned DupPHI = LookForIdenticalPHI(BB, PredValues);
+ if (DupPHI)
+ return DupPHI;
+
+ // Otherwise, we do need a PHI: insert one now.
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineInstr *InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB,
+ Loc, VRC, MRI, TII);
+
+ // Fill in all the predecessors of the PHI.
+ MachineInstrBuilder MIB(InsertedPHI);
+ for (unsigned i = 0, e = PredValues.size(); i != e; ++i)
+ MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first);
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+ InsertedPHI->eraseFromParent();
+ return ConstVal;
+ }
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+
+ DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n");
+ return InsertedPHI->getOperand(0).getReg();
+}
+
+static
+MachineBasicBlock *findCorrespondingPred(const MachineInstr *MI,
+ MachineOperand *U) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ if (&MI->getOperand(i) == U)
+ return MI->getOperand(i+1).getMBB();
+ }
+
+ llvm_unreachable("MachineOperand::getParent() failure?");
+ return 0;
+}
+
+/// RewriteUse - Rewrite a use of the symbolic value. This handles PHI nodes,
+/// which use their value in the corresponding predecessor.
+void MachineSSAUpdater::RewriteUse(MachineOperand &U) {
+ MachineInstr *UseMI = U.getParent();
+ unsigned NewVR = 0;
+ if (UseMI->getOpcode() == TargetInstrInfo::PHI) {
+ MachineBasicBlock *SourceBB = findCorrespondingPred(UseMI, &U);
+ NewVR = GetValueAtEndOfBlockInternal(SourceBB);
+ } else {
+ NewVR = GetValueInMiddleOfBlock(UseMI->getParent());
+ }
+
+ U.setReg(NewVR);
+}
+
+void MachineSSAUpdater::ReplaceRegWith(unsigned OldReg, unsigned NewReg) {
+ MRI->replaceRegWith(OldReg, NewReg);
+
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+ for (DenseMap<MachineBasicBlock*, unsigned>::iterator
+ I = AvailableVals.begin(), E = AvailableVals.end(); I != E; ++I)
+ if (I->second == OldReg)
+ I->second = NewReg;
+}
+
+/// GetValueAtEndOfBlockInternal - Check to see if AvailableVals has an entry
+/// for the specified BB and if so, return it. If not, construct SSA form by
+/// walking predecessors inserting PHI nodes as needed until we get to a block
+/// where the value is available.
+///
+unsigned MachineSSAUpdater::GetValueAtEndOfBlockInternal(MachineBasicBlock *BB){
+ AvailableValsTy &AvailableVals = getAvailableVals(AV);
+
+ // Query AvailableVals by doing an insertion of null.
+ std::pair<AvailableValsTy::iterator, bool> InsertRes =
+ AvailableVals.insert(std::make_pair(BB, 0));
+
+ // Handle the case when the insertion fails because we have already seen BB.
+ if (!InsertRes.second) {
+ // If the insertion failed, there are two cases. The first case is that the
+ // value is already available for the specified block. If we get this, just
+ // return the value.
+ if (InsertRes.first->second != 0)
+ return InsertRes.first->second;
+
+ // Otherwise, if the value we find is null, then this is the value is not
+ // known but it is being computed elsewhere in our recursion. This means
+ // that we have a cycle. Handle this by inserting a PHI node and returning
+ // it. When we get back to the first instance of the recursion we will fill
+ // in the PHI node.
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ MachineInstr *NewPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc,
+ VRC, MRI,TII);
+ unsigned NewVR = NewPHI->getOperand(0).getReg();
+ InsertRes.first->second = NewVR;
+ return NewVR;
+ }
+
+ // If there are no predecessors, then we must have found an unreachable block
+ // just return 'undef'. Since there are no predecessors, InsertRes must not
+ // be invalidated.
+ if (BB->pred_empty()) {
+ // Insert an implicit_def to represent an undef value.
+ MachineInstr *NewDef = InsertNewDef(TargetInstrInfo::IMPLICIT_DEF,
+ BB, BB->getFirstTerminator(),
+ VRC, MRI, TII);
+ return InsertRes.first->second = NewDef->getOperand(0).getReg();
+ }
+
+ // Okay, the value isn't in the map and we just inserted a null in the entry
+ // to indicate that we're processing the block. Since we have no idea what
+ // value is in this block, we have to recurse through our predecessors.
+ //
+ // While we're walking our predecessors, we keep track of them in a vector,
+ // then insert a PHI node in the end if we actually need one. We could use a
+ // smallvector here, but that would take a lot of stack space for every level
+ // of the recursion, just use IncomingPredInfo as an explicit stack.
+ IncomingPredInfoTy &IncomingPredInfo = getIncomingPredInfo(IPI);
+ unsigned FirstPredInfoEntry = IncomingPredInfo.size();
+
+ // As we're walking the predecessors, keep track of whether they are all
+ // producing the same value. If so, this value will capture it, if not, it
+ // will get reset to null. We distinguish the no-predecessor case explicitly
+ // below.
+ unsigned SingularValue = 0;
+ bool isFirstPred = true;
+ for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(),
+ E = BB->pred_end(); PI != E; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ unsigned PredVal = GetValueAtEndOfBlockInternal(PredBB);
+ IncomingPredInfo.push_back(std::make_pair(PredBB, PredVal));
+
+ // Compute SingularValue.
+ if (isFirstPred) {
+ SingularValue = PredVal;
+ isFirstPred = false;
+ } else if (PredVal != SingularValue)
+ SingularValue = 0;
+ }
+
+ /// Look up BB's entry in AvailableVals. 'InsertRes' may be invalidated. If
+ /// this block is involved in a loop, a no-entry PHI node will have been
+ /// inserted as InsertedVal. Otherwise, we'll still have the null we inserted
+ /// above.
+ unsigned &InsertedVal = AvailableVals[BB];
+
+ // If all the predecessor values are the same then we don't need to insert a
+ // PHI. This is the simple and common case.
+ if (SingularValue) {
+ // If a PHI node got inserted, replace it with the singlar value and delete
+ // it.
+ if (InsertedVal) {
+ MachineInstr *OldVal = MRI->getVRegDef(InsertedVal);
+ // Be careful about dead loops. These RAUW's also update InsertedVal.
+ assert(InsertedVal != SingularValue && "Dead loop?");
+ ReplaceRegWith(InsertedVal, SingularValue);
+ OldVal->eraseFromParent();
+ }
+
+ InsertedVal = SingularValue;
+
+ // Drop the entries we added in IncomingPredInfo to restore the stack.
+ IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+ IncomingPredInfo.end());
+ return InsertedVal;
+ }
+
+
+ // Otherwise, we do need a PHI: insert one now if we don't already have one.
+ MachineInstr *InsertedPHI;
+ if (InsertedVal == 0) {
+ MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->front();
+ InsertedPHI = InsertNewDef(TargetInstrInfo::PHI, BB, Loc,
+ VRC, MRI, TII);
+ InsertedVal = InsertedPHI->getOperand(0).getReg();
+ } else {
+ InsertedPHI = MRI->getVRegDef(InsertedVal);
+ }
+
+ // Fill in all the predecessors of the PHI.
+ MachineInstrBuilder MIB(InsertedPHI);
+ for (IncomingPredInfoTy::iterator I =
+ IncomingPredInfo.begin()+FirstPredInfoEntry,
+ E = IncomingPredInfo.end(); I != E; ++I)
+ MIB.addReg(I->second).addMBB(I->first);
+
+ // Drop the entries we added in IncomingPredInfo to restore the stack.
+ IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
+ IncomingPredInfo.end());
+
+ // See if the PHI node can be merged to a single value. This can happen in
+ // loop cases when we get a PHI of itself and one other value.
+ if (unsigned ConstVal = InsertedPHI->isConstantValuePHI()) {
+ MRI->replaceRegWith(InsertedVal, ConstVal);
+ InsertedPHI->eraseFromParent();
+ InsertedVal = ConstVal;
+ } else {
+ DEBUG(errs() << " Inserted PHI: " << *InsertedPHI << "\n");
+
+ // If the client wants to know about all new instructions, tell it.
+ if (InsertedPHIs) InsertedPHIs->push_back(InsertedPHI);
+ }
+
+ return InsertedVal;
+}
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index d9f4c99..917d053 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -376,15 +376,6 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) {
report("MBB doesn't fall through but is empty!", MBB);
}
}
- if (TII->BlockHasNoFallThrough(*MBB)) {
- if (MBB->empty()) {
- report("TargetInstrInfo says the block has no fall through, but the "
- "block is empty!", MBB);
- } else if (!MBB->back().getDesc().isBarrier()) {
- report("TargetInstrInfo says the block has no fall through, but the "
- "block does not end in a barrier!", MBB);
- }
- }
} else {
// Block is last in function.
if (MBB->empty()) {
diff --git a/lib/CodeGen/MaxStackAlignment.cpp b/lib/CodeGen/MaxStackAlignment.cpp
new file mode 100644
index 0000000..d327cfa
--- /dev/null
+++ b/lib/CodeGen/MaxStackAlignment.cpp
@@ -0,0 +1,70 @@
+//===-- MaxStackAlignment.cpp - Compute the required stack alignment -- ---===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//
+//===----------------------------------------------------------------------===//
+//
+// This pass looks for vector register usage and aligned local objects to
+// calculate the maximum required alignment for a function. This is used by
+// targets which support it to determine if dynamic stack realignment is
+// necessary.
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/CodeGen/MachineFunction.h"
+#include "llvm/CodeGen/MachineFrameInfo.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/Passes.h"
+
+using namespace llvm;
+
+namespace {
+ struct MaximalStackAlignmentCalculator : public MachineFunctionPass {
+ static char ID;
+ MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {}
+
+ virtual bool runOnMachineFunction(MachineFunction &MF) {
+ MachineFrameInfo *FFI = MF.getFrameInfo();
+ MachineRegisterInfo &RI = MF.getRegInfo();
+
+ // Calculate max stack alignment of all already allocated stack objects.
+ FFI->calculateMaxStackAlignment();
+ unsigned MaxAlign = FFI->getMaxAlignment();
+
+ // Be over-conservative: scan over all vreg defs and find whether vector
+ // registers are used. If yes, there is probability that vector registers
+ // will be spilled and thus the stack needs to be aligned properly.
+ // FIXME: It would be better to only do this if a spill actually
+ // happens rather than conseratively aligning the stack regardless.
+ for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
+ RegNum < RI.getLastVirtReg(); ++RegNum)
+ MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
+
+ if (FFI->getMaxAlignment() == MaxAlign)
+ return false;
+
+ FFI->setMaxAlignment(MaxAlign);
+ return true;
+ }
+
+ virtual const char *getPassName() const {
+ return "Stack Alignment Requirements Auto-Detector";
+ }
+
+ virtual void getAnalysisUsage(AnalysisUsage &AU) const {
+ AU.setPreservesCFG();
+ MachineFunctionPass::getAnalysisUsage(AU);
+ }
+ };
+
+ char MaximalStackAlignmentCalculator::ID = 0;
+}
+
+FunctionPass*
+llvm::createMaxStackAlignmentCalculatorPass() {
+ return new MaximalStackAlignmentCalculator();
+}
+
diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp
index 2e30cc6..c62d179 100644
--- a/lib/CodeGen/PHIElimination.cpp
+++ b/lib/CodeGen/PHIElimination.cpp
@@ -287,7 +287,7 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Okay, if we now know that the value is not live out of the block, we can
// add a kill marker in this block saying that it kills the incoming value!
- if (!ValueIsUsed && !isLiveOut(SrcReg, opBlock, *LV)) {
+ if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) {
// In our final twist, we have to decide which instruction kills the
// register. In most cases this is the copy, however, the first
// terminator instruction at the end of the block may also use the value.
@@ -301,8 +301,8 @@ void llvm::PHIElimination::LowerAtomicPHINode(
// Check that no other terminators use values.
#ifndef NDEBUG
- for (MachineBasicBlock::iterator TI = next(Term); TI != opBlock.end();
- ++TI) {
+ for (MachineBasicBlock::iterator TI = llvm::next(Term);
+ TI != opBlock.end(); ++TI) {
assert(!TI->readsRegister(SrcReg) &&
"Terminator instructions cannot use virtual registers unless"
"they are the first terminator in a block!");
@@ -353,59 +353,13 @@ bool llvm::PHIElimination::SplitPHIEdges(MachineFunction &MF,
// We break edges when registers are live out from the predecessor block
// (not considering PHI nodes). If the register is live in to this block
// anyway, we would gain nothing from splitting.
- if (!LV.isLiveIn(Reg, MBB) && isLiveOut(Reg, *PreMBB, LV))
+ if (!LV.isLiveIn(Reg, MBB) && LV.isLiveOut(Reg, *PreMBB))
SplitCriticalEdge(PreMBB, &MBB);
}
}
return true;
}
-bool llvm::PHIElimination::isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
- LiveVariables &LV) {
- LiveVariables::VarInfo &VI = LV.getVarInfo(Reg);
-
- // Loop over all of the successors of the basic block, checking to see if
- // the value is either live in the block, or if it is killed in the block.
- std::vector<MachineBasicBlock*> OpSuccBlocks;
- for (MachineBasicBlock::const_succ_iterator SI = MBB.succ_begin(),
- E = MBB.succ_end(); SI != E; ++SI) {
- MachineBasicBlock *SuccMBB = *SI;
-
- // Is it alive in this successor?
- unsigned SuccIdx = SuccMBB->getNumber();
- if (VI.AliveBlocks.test(SuccIdx))
- return true;
- OpSuccBlocks.push_back(SuccMBB);
- }
-
- // Check to see if this value is live because there is a use in a successor
- // that kills it.
- switch (OpSuccBlocks.size()) {
- case 1: {
- MachineBasicBlock *SuccMBB = OpSuccBlocks[0];
- for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
- if (VI.Kills[i]->getParent() == SuccMBB)
- return true;
- break;
- }
- case 2: {
- MachineBasicBlock *SuccMBB1 = OpSuccBlocks[0], *SuccMBB2 = OpSuccBlocks[1];
- for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
- if (VI.Kills[i]->getParent() == SuccMBB1 ||
- VI.Kills[i]->getParent() == SuccMBB2)
- return true;
- break;
- }
- default:
- std::sort(OpSuccBlocks.begin(), OpSuccBlocks.end());
- for (unsigned i = 0, e = VI.Kills.size(); i != e; ++i)
- if (std::binary_search(OpSuccBlocks.begin(), OpSuccBlocks.end(),
- VI.Kills[i]->getParent()))
- return true;
- }
- return false;
-}
-
MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
MachineBasicBlock *B) {
assert(A && B && "Missing MBB end point");
@@ -423,7 +377,7 @@ MachineBasicBlock *PHIElimination::SplitCriticalEdge(MachineBasicBlock *A,
++NumSplits;
MachineBasicBlock *NMBB = MF->CreateMachineBasicBlock();
- MF->insert(next(MachineFunction::iterator(A)), NMBB);
+ MF->insert(llvm::next(MachineFunction::iterator(A)), NMBB);
DEBUG(errs() << "PHIElimination splitting critical edge:"
" BB#" << A->getNumber()
<< " -- BB#" << NMBB->getNumber()
diff --git a/lib/CodeGen/PHIElimination.h b/lib/CodeGen/PHIElimination.h
index f5872cb..b0b71ce 100644
--- a/lib/CodeGen/PHIElimination.h
+++ b/lib/CodeGen/PHIElimination.h
@@ -93,12 +93,6 @@ namespace llvm {
bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB,
LiveVariables &LV);
- /// isLiveOut - Determine if Reg is live out from MBB, when not
- /// considering PHI nodes. This means that Reg is either killed by
- /// a successor block or passed through one.
- bool isLiveOut(unsigned Reg, const MachineBasicBlock &MBB,
- LiveVariables &LV);
-
/// SplitCriticalEdge - Split a critical edge from A to B by
/// inserting a new MBB. Update branches in A and PHI instructions
/// in B. Return the new block.
diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp
index 9101fce2..79be295 100644
--- a/lib/CodeGen/PostRASchedulerList.cpp
+++ b/lib/CodeGen/PostRASchedulerList.cpp
@@ -373,7 +373,8 @@ void SchedulePostRATDList::FinishBlock() {
///
void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) {
// Initialize the indices to indicate that no registers are live.
- std::fill(KillIndices, array_endof(KillIndices), ~0u);
+ for (unsigned i = 0; i < TRI->getNumRegs(); ++i)
+ KillIndices[i] = ~0u;
// Determine the live-out physregs for this block.
if (!BB->empty() && BB->back().getDesc().isReturn()) {
@@ -510,12 +511,9 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) {
}
if (MO.isKill() != kill) {
- bool removed = ToggleKillFlag(MI, MO);
- if (removed) {
- DEBUG(errs() << "Fixed <removed> in ");
- } else {
- DEBUG(errs() << "Fixed " << MO << " in ");
- }
+ DEBUG(errs() << "Fixing " << MO << " in ");
+ // Warning: ToggleKillFlag may invalidate MO.
+ ToggleKillFlag(MI, MO);
DEBUG(MI->dump());
}
diff --git a/lib/CodeGen/PreAllocSplitting.cpp b/lib/CodeGen/PreAllocSplitting.cpp
index 8f62345..b0d7a47 100644
--- a/lib/CodeGen/PreAllocSplitting.cpp
+++ b/lib/CodeGen/PreAllocSplitting.cpp
@@ -16,6 +16,7 @@
#define DEBUG_TYPE "pre-alloc-split"
#include "VirtRegMap.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineDominators.h"
@@ -104,6 +105,7 @@ namespace {
AU.addRequired<LiveStacks>();
AU.addPreserved<LiveStacks>();
AU.addPreserved<RegisterCoalescer>();
+ AU.addPreserved<CalculateSpillWeights>();
if (StrongPHIElim)
AU.addPreservedID(StrongPHIEliminationID);
else
@@ -876,7 +878,7 @@ bool PreAllocSplitting::Rematerialize(unsigned VReg, VNInfo* ValNo,
if (!ValNo->isDefAccurate() || DefMI->getParent() == BarrierMBB)
KillPt = findSpillPoint(BarrierMBB, Barrier, NULL, RefsInMBB);
else
- KillPt = next(MachineBasicBlock::iterator(DefMI));
+ KillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
if (KillPt == DefMI->getParent()->end())
return false;
@@ -1118,7 +1120,7 @@ bool PreAllocSplitting::SplitRegLiveInterval(LiveInterval *LI) {
return false; // No gap to insert spill.
}
} else {
- SpillPt = next(MachineBasicBlock::iterator(DefMI));
+ SpillPt = llvm::next(MachineBasicBlock::iterator(DefMI));
if (SpillPt == DefMBB->end()) {
DEBUG(errs() << "FAILED (could not find a suitable spill point).\n");
return false; // No gap to insert spill.
diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp
index 8905f75..e94247f 100644
--- a/lib/CodeGen/PrologEpilogInserter.cpp
+++ b/lib/CodeGen/PrologEpilogInserter.cpp
@@ -136,9 +136,10 @@ void PEI::getAnalysisUsage(AnalysisUsage &AU) const {
/// pseudo instructions.
void PEI::calculateCallsInformation(MachineFunction &Fn) {
const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo();
+ MachineFrameInfo *FFI = Fn.getFrameInfo();
unsigned MaxCallFrameSize = 0;
- bool HasCalls = false;
+ bool HasCalls = FFI->hasCalls();
// Get the function call frame set-up and tear-down instruction opcode
int FrameSetupOpcode = RegInfo->getCallFrameSetupOpcode();
@@ -166,7 +167,6 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) {
HasCalls = true;
}
- MachineFrameInfo *FFI = Fn.getFrameInfo();
FFI->setHasCalls(HasCalls);
FFI->setMaxCallFrameSize(MaxCallFrameSize);
@@ -674,7 +674,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) {
if (PrevI == BB->end())
I = BB->begin(); // The replaced instr was the first in the block.
else
- I = next(PrevI);
+ I = llvm::next(PrevI);
continue;
}
diff --git a/lib/CodeGen/RegAllocLinearScan.cpp b/lib/CodeGen/RegAllocLinearScan.cpp
index 4ff5129..c02d47b 100644
--- a/lib/CodeGen/RegAllocLinearScan.cpp
+++ b/lib/CodeGen/RegAllocLinearScan.cpp
@@ -16,6 +16,7 @@
#include "VirtRegRewriter.h"
#include "Spiller.h"
#include "llvm/Function.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -59,6 +60,11 @@ PreSplitIntervals("pre-alloc-split",
cl::desc("Pre-register allocation live interval splitting"),
cl::init(false), cl::Hidden);
+static cl::opt<bool>
+TrivCoalesceEnds("trivial-coalesce-ends",
+ cl::desc("Attempt trivial coalescing of interval ends"),
+ cl::init(false), cl::Hidden);
+
static RegisterRegAlloc
linearscanRegAlloc("linearscan", "linear scan register allocator",
createLinearScanRegisterAllocator);
@@ -182,6 +188,7 @@ namespace {
// Make sure PassManager knows which analyses to make available
// to coalescing and which analyses coalescing invalidates.
AU.addRequiredTransitive<RegisterCoalescer>();
+ AU.addRequired<CalculateSpillWeights>();
if (PreSplitIntervals)
AU.addRequiredID(PreAllocSplittingID);
AU.addRequired<LiveStacks>();
@@ -390,66 +397,71 @@ void RALinScan::ComputeRelatedRegClasses() {
RelatedRegClasses.unionSets(I->second, OneClassForEachPhysReg[*AS]);
}
-/// attemptTrivialCoalescing - If a simple interval is defined by a copy,
-/// try allocate the definition the same register as the source register
-/// if the register is not defined during live time of the interval. This
-/// eliminate a copy. This is used to coalesce copies which were not
-/// coalesced away before allocation either due to dest and src being in
-/// different register classes or because the coalescer was overly
-/// conservative.
+/// attemptTrivialCoalescing - If a simple interval is defined by a copy, try
+/// allocate the definition the same register as the source register if the
+/// register is not defined during live time of the interval. If the interval is
+/// killed by a copy, try to use the destination register. This eliminates a
+/// copy. This is used to coalesce copies which were not coalesced away before
+/// allocation either due to dest and src being in different register classes or
+/// because the coalescer was overly conservative.
unsigned RALinScan::attemptTrivialCoalescing(LiveInterval &cur, unsigned Reg) {
unsigned Preference = vrm_->getRegAllocPref(cur.reg);
if ((Preference && Preference == Reg) || !cur.containsOneValue())
return Reg;
- VNInfo *vni = cur.begin()->valno;
- if ((vni->def == SlotIndex()) ||
- vni->isUnused() || !vni->isDefAccurate())
+ // We cannot handle complicated live ranges. Simple linear stuff only.
+ if (cur.ranges.size() != 1)
return Reg;
- MachineInstr *CopyMI = li_->getInstructionFromIndex(vni->def);
- unsigned SrcReg, DstReg, SrcSubReg, DstSubReg, PhysReg;
- if (!CopyMI ||
- !tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+
+ const LiveRange &range = cur.ranges.front();
+
+ VNInfo *vni = range.valno;
+ if (vni->isUnused())
return Reg;
- PhysReg = SrcReg;
- if (TargetRegisterInfo::isVirtualRegister(SrcReg)) {
- if (!vrm_->isAssignedReg(SrcReg))
+
+ unsigned CandReg;
+ {
+ MachineInstr *CopyMI;
+ unsigned SrcReg, DstReg, SrcSubReg, DstSubReg;
+ if (vni->def != SlotIndex() && vni->isDefAccurate() &&
+ (CopyMI = li_->getInstructionFromIndex(vni->def)) &&
+ tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg))
+ // Defined by a copy, try to extend SrcReg forward
+ CandReg = SrcReg;
+ else if (TrivCoalesceEnds &&
+ (CopyMI =
+ li_->getInstructionFromIndex(range.end.getBaseIndex())) &&
+ tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubReg, DstSubReg) &&
+ cur.reg == SrcReg)
+ // Only used by a copy, try to extend DstReg backwards
+ CandReg = DstReg;
+ else
+ return Reg;
+ }
+
+ if (TargetRegisterInfo::isVirtualRegister(CandReg)) {
+ if (!vrm_->isAssignedReg(CandReg))
return Reg;
- PhysReg = vrm_->getPhys(SrcReg);
+ CandReg = vrm_->getPhys(CandReg);
}
- if (Reg == PhysReg)
+ if (Reg == CandReg)
return Reg;
const TargetRegisterClass *RC = mri_->getRegClass(cur.reg);
- if (!RC->contains(PhysReg))
+ if (!RC->contains(CandReg))
return Reg;
- // Try to coalesce.
- if (!li_->conflictsWithPhysRegDef(cur, *vrm_, PhysReg)) {
- DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(PhysReg)
- << '\n');
- vrm_->clearVirt(cur.reg);
- vrm_->assignVirt2Phys(cur.reg, PhysReg);
-
- // Remove unnecessary kills since a copy does not clobber the register.
- if (li_->hasInterval(SrcReg)) {
- LiveInterval &SrcLI = li_->getInterval(SrcReg);
- for (MachineRegisterInfo::use_iterator I = mri_->use_begin(cur.reg),
- E = mri_->use_end(); I != E; ++I) {
- MachineOperand &O = I.getOperand();
- if (!O.isKill())
- continue;
- MachineInstr *MI = &*I;
- if (SrcLI.liveAt(li_->getInstructionIndex(MI).getDefIndex()))
- O.setIsKill(false);
- }
- }
+ if (li_->conflictsWithPhysReg(cur, *vrm_, CandReg))
+ return Reg;
- ++NumCoalesce;
- return PhysReg;
- }
+ // Try to coalesce.
+ DEBUG(errs() << "Coalescing: " << cur << " -> " << tri_->getName(CandReg)
+ << '\n');
+ vrm_->clearVirt(cur.reg);
+ vrm_->assignVirt2Phys(cur.reg, CandReg);
- return Reg;
+ ++NumCoalesce;
+ return CandReg;
}
bool RALinScan::runOnMachineFunction(MachineFunction &fn) {
@@ -1261,9 +1273,9 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
// The earliest start of a Spilled interval indicates up to where
// in handled we need to roll back
+ assert(!spillIs.empty() && "No spill intervals?");
+ SlotIndex earliestStart = spillIs[0]->beginIndex();
- LiveInterval *earliestStartInterval = cur;
-
// Spill live intervals of virtual regs mapped to the physical register we
// want to clear (and its aliases). We only spill those that overlap with the
// current interval as the rest do not affect its allocation. we also keep
@@ -1274,19 +1286,16 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
LiveInterval *sli = spillIs.back();
spillIs.pop_back();
DEBUG(errs() << "\t\t\tspilling(a): " << *sli << '\n');
- earliestStartInterval =
- (earliestStartInterval->beginIndex() < sli->beginIndex()) ?
- earliestStartInterval : sli;
+ if (sli->beginIndex() < earliestStart)
+ earliestStart = sli->beginIndex();
std::vector<LiveInterval*> newIs;
- newIs = spiller_->spill(sli, spillIs);
+ newIs = spiller_->spill(sli, spillIs, &earliestStart);
addStackInterval(sli, ls_, li_, mri_, *vrm_);
std::copy(newIs.begin(), newIs.end(), std::back_inserter(added));
spilled.insert(sli->reg);
}
- SlotIndex earliestStart = earliestStartInterval->beginIndex();
-
DEBUG(errs() << "\t\trolling back to: " << earliestStart << '\n');
// Scan handled in reverse order up to the earliest start of a
@@ -1295,7 +1304,7 @@ void RALinScan::assignRegOrStackSlotAtInterval(LiveInterval* cur) {
while (!handled_.empty()) {
LiveInterval* i = handled_.back();
// If this interval starts before t we are done.
- if (i->beginIndex() < earliestStart)
+ if (!i->empty() && i->beginIndex() < earliestStart)
break;
DEBUG(errs() << "\t\t\tundo changes for: " << *i << '\n');
handled_.pop_back();
diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp
index c677d34..c2014a7 100644
--- a/lib/CodeGen/RegAllocPBQP.cpp
+++ b/lib/CodeGen/RegAllocPBQP.cpp
@@ -36,6 +36,7 @@
#include "PBQP/Heuristics/Briggs.h"
#include "VirtRegMap.h"
#include "VirtRegRewriter.h"
+#include "llvm/CodeGen/CalcSpillWeights.h"
#include "llvm/CodeGen/LiveIntervalAnalysis.h"
#include "llvm/CodeGen/LiveStackAnalysis.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
@@ -90,6 +91,7 @@ namespace {
au.addRequired<LiveIntervals>();
//au.addRequiredID(SplitCriticalEdgesID);
au.addRequired<RegisterCoalescer>();
+ au.addRequired<CalculateSpillWeights>();
au.addRequired<LiveStacks>();
au.addPreserved<LiveStacks>();
au.addRequired<MachineLoopInfo>();
diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp
index 94680ed..67bf209 100644
--- a/lib/CodeGen/RegisterScavenging.cpp
+++ b/lib/CodeGen/RegisterScavenging.cpp
@@ -125,7 +125,7 @@ void RegScavenger::forward() {
Tracking = true;
} else {
assert(MBBI != MBB->end() && "Already at the end of the basic block!");
- MBBI = next(MBBI);
+ MBBI = llvm::next(MBBI);
}
MachineInstr *MI = MBBI;
diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
index 06ffdd6..2b52187 100644
--- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
+++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp
@@ -119,7 +119,8 @@ namespace {
/// it can be simplified or if things it uses can be simplified by bit
/// propagation. If so, return true.
bool SimplifyDemandedBits(SDValue Op) {
- APInt Demanded = APInt::getAllOnesValue(Op.getValueSizeInBits());
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Demanded = APInt::getAllOnesValue(BitWidth);
return SimplifyDemandedBits(Op, Demanded);
}
@@ -546,7 +547,8 @@ SDValue DAGCombiner::CombineTo(SDNode *N, const SDValue *To, unsigned NumTo,
To[0].getNode()->dump(&DAG);
errs() << " and " << NumTo-1 << " other values\n";
for (unsigned i = 0, e = NumTo; i != e; ++i)
- assert(N->getValueType(i) == To[i].getValueType() &&
+ assert((!To[i].getNode() ||
+ N->getValueType(i) == To[i].getValueType()) &&
"Cannot combine value to value of different type!"));
WorkListRemover DeadNodes(*this);
DAG.ReplaceAllUsesWith(N, To, &DeadNodes);
@@ -1687,10 +1689,14 @@ SDValue DAGCombiner::SimplifyBinOpWithSameOpcodeHands(SDNode *N) {
// fold (OP (sext x), (sext y)) -> (sext (OP x, y))
// fold (OP (aext x), (aext y)) -> (aext (OP x, y))
// fold (OP (trunc x), (trunc y)) -> (trunc (OP x, y)) (if trunc isn't free)
+ //
+ // do not sink logical op inside of a vector extend, since it may combine
+ // into a vsetcc.
if ((N0.getOpcode() == ISD::ZERO_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND||
N0.getOpcode() == ISD::SIGN_EXTEND ||
(N0.getOpcode() == ISD::TRUNCATE &&
!TLI.isTruncateFree(N0.getOperand(0).getValueType(), VT))) &&
+ !VT.isVector() &&
N0.getOperand(0).getValueType() == N1.getOperand(0).getValueType() &&
(!LegalOperations ||
TLI.isOperationLegal(N->getOpcode(), N0.getOperand(0).getValueType()))) {
@@ -1943,8 +1949,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) {
}
// fold (or x, undef) -> -1
- if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF)
- return DAG.getConstant(APInt::getAllOnesValue(VT.getSizeInBits()), VT);
+ if (N0.getOpcode() == ISD::UNDEF || N1.getOpcode() == ISD::UNDEF) {
+ EVT EltVT = VT.isVector() ? VT.getVectorElementType() : VT;
+ return DAG.getConstant(APInt::getAllOnesValue(EltVT.getSizeInBits()), VT);
+ }
// fold (or c1, c2) -> c1|c2
if (N0C && N1C)
return DAG.FoldConstantArithmetic(ISD::OR, VT, N0C, N1C);
@@ -2434,7 +2442,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getSizeInBits();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold (shl c1, c2) -> c1<<c2
if (N0C && N1C)
@@ -2450,7 +2458,7 @@ SDValue DAGCombiner::visitSHL(SDNode *N) {
return N0;
// if (shl x, c) is known to be zero, return 0
if (DAG.MaskedValueIsZero(SDValue(N, 0),
- APInt::getAllOnesValue(VT.getSizeInBits())))
+ APInt::getAllOnesValue(OpSizeInBits)))
return DAG.getConstant(0, VT);
// fold (shl x, (trunc (and y, c))) -> (shl x, (and (trunc y), (trunc c))).
if (N1.getOpcode() == ISD::TRUNCATE &&
@@ -2526,6 +2534,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold (sra c1, c2) -> (sra c1, c2)
if (N0C && N1C)
@@ -2537,7 +2546,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (N0C && N0C->isAllOnesValue())
return N0;
// fold (sra x, (setge c, size(x))) -> undef
- if (N1C && N1C->getZExtValue() >= VT.getSizeInBits())
+ if (N1C && N1C->getZExtValue() >= OpSizeInBits)
return DAG.getUNDEF(VT);
// fold (sra x, 0) -> x
if (N1C && N1C->isNullValue())
@@ -2545,7 +2554,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
// fold (sra (shl x, c1), c1) -> sext_inreg for some c1 and target supports
// sext_inreg.
if (N1C && N0.getOpcode() == ISD::SHL && N1 == N0.getOperand(1)) {
- unsigned LowBits = VT.getSizeInBits() - (unsigned)N1C->getZExtValue();
+ unsigned LowBits = OpSizeInBits - (unsigned)N1C->getZExtValue();
EVT EVT = EVT::getIntegerVT(*DAG.getContext(), LowBits);
if ((!LegalOperations || TLI.isOperationLegal(ISD::SIGN_EXTEND_INREG, EVT)))
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT,
@@ -2556,7 +2565,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
if (N1C && N0.getOpcode() == ISD::SRA) {
if (ConstantSDNode *C1 = dyn_cast<ConstantSDNode>(N0.getOperand(1))) {
unsigned Sum = N1C->getZExtValue() + C1->getZExtValue();
- if (Sum >= VT.getSizeInBits()) Sum = VT.getSizeInBits()-1;
+ if (Sum >= OpSizeInBits) Sum = OpSizeInBits-1;
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0.getOperand(0),
DAG.getConstant(Sum, N1C->getValueType(0)));
}
@@ -2572,9 +2581,8 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
const ConstantSDNode *N01C = dyn_cast<ConstantSDNode>(N0.getOperand(1));
if (N01C && N1C) {
// Determine what the truncate's result bitsize and type would be.
- unsigned VTValSize = VT.getSizeInBits();
EVT TruncVT =
- EVT::getIntegerVT(*DAG.getContext(), VTValSize - N1C->getZExtValue());
+ EVT::getIntegerVT(*DAG.getContext(), OpSizeInBits - N1C->getZExtValue());
// Determine the residual right-shift amount.
signed ShiftAmt = N1C->getZExtValue() - N01C->getZExtValue();
@@ -2607,7 +2615,7 @@ SDValue DAGCombiner::visitSRA(SDNode *N) {
EVT TruncVT = N1.getValueType();
SDValue N100 = N1.getOperand(0).getOperand(0);
APInt TruncC = N101C->getAPIntValue();
- TruncC.trunc(TruncVT.getSizeInBits());
+ TruncC.trunc(TruncVT.getScalarType().getSizeInBits());
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT, N0,
DAG.getNode(ISD::AND, N->getDebugLoc(),
TruncVT,
@@ -2636,7 +2644,7 @@ SDValue DAGCombiner::visitSRL(SDNode *N) {
ConstantSDNode *N0C = dyn_cast<ConstantSDNode>(N0);
ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
EVT VT = N0.getValueType();
- unsigned OpSizeInBits = VT.getSizeInBits();
+ unsigned OpSizeInBits = VT.getScalarType().getSizeInBits();
// fold (srl c1, c2) -> c1 >>u c2
if (N0C && N1C)
@@ -3029,7 +3037,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) {
else if (Op.getValueType().bitsGT(VT))
Op = DAG.getNode(ISD::TRUNCATE, N0.getDebugLoc(), VT, Op);
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, Op,
- DAG.getValueType(N0.getValueType()));
+ DAG.getValueType(N0.getValueType().getScalarType()));
}
}
@@ -3170,7 +3178,8 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
} else if (Op.getValueType().bitsGT(VT)) {
Op = DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), VT, Op);
}
- return DAG.getZeroExtendInReg(Op, N->getDebugLoc(), N0.getValueType());
+ return DAG.getZeroExtendInReg(Op, N->getDebugLoc(),
+ N0.getValueType().getScalarType());
}
// Fold (zext (and (trunc x), cst)) -> (and x, cst),
@@ -3193,6 +3202,19 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
X, DAG.getConstant(Mask, VT));
}
+ // Fold (zext (and x, cst)) -> (and (zext x), cst)
+ if (N0.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant &&
+ N0.getOperand(0).getOpcode() != ISD::TRUNCATE &&
+ N0.getOperand(0).hasOneUse()) {
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ Mask.zext(VT.getSizeInBits());
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ DAG.getNode(ISD::ZERO_EXTEND, N->getDebugLoc(), VT,
+ N0.getOperand(0)),
+ DAG.getConstant(Mask, VT));
+ }
+
// fold (zext (load x)) -> (zext (truncate (zextload x)))
if (ISD::isNON_EXTLoad(N0.getNode()) &&
((!LegalOperations && !cast<LoadSDNode>(N0)->isVolatile()) ||
@@ -3269,6 +3291,26 @@ SDValue DAGCombiner::visitZERO_EXTEND(SDNode *N) {
if (SCC.getNode()) return SCC;
}
+ // (zext (shl (zext x), cst)) -> (shl (zext x), cst)
+ if ((N0.getOpcode() == ISD::SHL || N0.getOpcode() == ISD::SRL) &&
+ isa<ConstantSDNode>(N0.getOperand(1)) &&
+ N0.getOperand(0).getOpcode() == ISD::ZERO_EXTEND &&
+ N0.hasOneUse()) {
+ if (N0.getOpcode() == ISD::SHL) {
+ // If the original shl may be shifting out bits, do not perform this
+ // transformation.
+ unsigned ShAmt = cast<ConstantSDNode>(N0.getOperand(1))->getZExtValue();
+ unsigned KnownZeroBits = N0.getOperand(0).getValueType().getSizeInBits() -
+ N0.getOperand(0).getOperand(0).getValueType().getSizeInBits();
+ if (ShAmt > KnownZeroBits)
+ return SDValue();
+ }
+ DebugLoc dl = N->getDebugLoc();
+ return DAG.getNode(N0.getOpcode(), dl, VT,
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(0)),
+ DAG.getNode(ISD::ZERO_EXTEND, dl, VT, N0.getOperand(1)));
+ }
+
return SDValue();
}
@@ -3529,7 +3571,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
SDValue N1 = N->getOperand(1);
EVT VT = N->getValueType(0);
EVT EVT = cast<VTSDNode>(N1)->getVT();
- unsigned VTBits = VT.getSizeInBits();
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
unsigned EVTBits = EVT.getSizeInBits();
// fold (sext_in_reg c1) -> c1
@@ -3537,7 +3579,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), VT, N0, N1);
// If the input is already sign extended, just drop the extension.
- if (DAG.ComputeNumSignBits(N0) >= VT.getSizeInBits()-EVTBits+1)
+ if (DAG.ComputeNumSignBits(N0) >= VTBits-EVTBits+1)
return N0;
// fold (sext_in_reg (sext_in_reg x, VT2), VT1) -> (sext_in_reg x, minVT) pt2
@@ -3552,7 +3594,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// if x is small enough.
if (N0.getOpcode() == ISD::SIGN_EXTEND || N0.getOpcode() == ISD::ANY_EXTEND) {
SDValue N00 = N0.getOperand(0);
- if (N00.getValueType().getSizeInBits() < EVTBits)
+ if (N00.getValueType().getScalarType().getSizeInBits() < EVTBits)
return DAG.getNode(ISD::SIGN_EXTEND, N->getDebugLoc(), VT, N00, N1);
}
@@ -3576,11 +3618,11 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) {
// We already fold "(sext_in_reg (srl X, 25), i8) -> srl X, 25" above.
if (N0.getOpcode() == ISD::SRL) {
if (ConstantSDNode *ShAmt = dyn_cast<ConstantSDNode>(N0.getOperand(1)))
- if (ShAmt->getZExtValue()+EVTBits <= VT.getSizeInBits()) {
+ if (ShAmt->getZExtValue()+EVTBits <= VTBits) {
// We can turn this into an SRA iff the input to the SRL is already sign
// extended enough.
unsigned InSignBits = DAG.ComputeNumSignBits(N0.getOperand(0));
- if (VT.getSizeInBits()-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
+ if (VTBits-(ShAmt->getZExtValue()+EVTBits) < InSignBits)
return DAG.getNode(ISD::SRA, N->getDebugLoc(), VT,
N0.getOperand(0), N0.getOperand(1));
}
@@ -3681,7 +3723,6 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
if (!LD1 || !LD2 || !ISD::isNON_EXTLoad(LD1) || !LD1->hasOneUse())
return SDValue();
EVT LD1VT = LD1->getValueType(0);
- const MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
if (ISD::isNON_EXTLoad(LD2) &&
LD2->hasOneUse() &&
@@ -3689,7 +3730,7 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) {
// If one is volatile it might be ok, but play conservative and bail out.
!LD1->isVolatile() &&
!LD2->isVolatile() &&
- TLI.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1, MFI)) {
+ DAG.isConsecutiveLoad(LD2, LD1, LD1VT.getSizeInBits()/8, 1)) {
unsigned Align = LD1->getAlignment();
unsigned NewAlign = TLI.getTargetData()->
getABITypeAlignment(VT.getTypeForEVT(*DAG.getContext()));
@@ -4804,49 +4845,6 @@ bool DAGCombiner::CombineToPostIndexedLoadStore(SDNode *N) {
return false;
}
-/// InferAlignment - If we can infer some alignment information from this
-/// pointer, return it.
-static unsigned InferAlignment(SDValue Ptr, SelectionDAG &DAG) {
- // If this is a direct reference to a stack slot, use information about the
- // stack slot's alignment.
- int FrameIdx = 1 << 31;
- int64_t FrameOffset = 0;
- if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
- FrameIdx = FI->getIndex();
- } else if (Ptr.getOpcode() == ISD::ADD &&
- isa<ConstantSDNode>(Ptr.getOperand(1)) &&
- isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
- FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
- FrameOffset = Ptr.getConstantOperandVal(1);
- }
-
- if (FrameIdx != (1 << 31)) {
- // FIXME: Handle FI+CST.
- const MachineFrameInfo &MFI = *DAG.getMachineFunction().getFrameInfo();
- if (MFI.isFixedObjectIndex(FrameIdx)) {
- int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
-
- // The alignment of the frame index can be determined from its offset from
- // the incoming frame position. If the frame object is at offset 32 and
- // the stack is guaranteed to be 16-byte aligned, then we know that the
- // object is 16-byte aligned.
- unsigned StackAlign = DAG.getTarget().getFrameInfo()->getStackAlignment();
- unsigned Align = MinAlign(ObjectOffset, StackAlign);
-
- // Finally, the frame object itself may have a known alignment. Factor
- // the alignment + offset into a new alignment. For example, if we know
- // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
- // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte
- // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
- unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
- FrameOffset);
- return std::max(Align, FIInfoAlign);
- }
- }
-
- return 0;
-}
-
SDValue DAGCombiner::visitLOAD(SDNode *N) {
LoadSDNode *LD = cast<LoadSDNode>(N);
SDValue Chain = LD->getChain();
@@ -4854,7 +4852,7 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) {
// Try to infer better alignment information than the load already has.
if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) {
- if (unsigned Align = InferAlignment(Ptr, DAG)) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > LD->getAlignment())
return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(),
LD->getValueType(0),
@@ -5079,7 +5077,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// Try to infer better alignment information than the store already has.
if (OptLevel != CodeGenOpt::None && ST->isUnindexed()) {
- if (unsigned Align = InferAlignment(Ptr, DAG)) {
+ if (unsigned Align = DAG.InferPtrAlignment(Ptr)) {
if (Align > ST->getAlignment())
return DAG.getTruncStore(Chain, N->getDebugLoc(), Value,
Ptr, ST->getSrcValue(),
@@ -5231,7 +5229,7 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) {
// SimplifyDemandedBits, which only works if the value has a single use.
if (SimplifyDemandedBits(Value,
APInt::getLowBitsSet(
- Value.getValueSizeInBits(),
+ Value.getValueType().getScalarType().getSizeInBits(),
ST->getMemoryVT().getSizeInBits())))
return SDValue(N, 0);
}
diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp
index 5eb9ca1..4ead9c9 100644
--- a/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -532,7 +532,15 @@ bool FastISel::SelectBitCast(User *I) {
bool
FastISel::SelectInstruction(Instruction *I) {
- return SelectOperator(I, I->getOpcode());
+ // First, try doing target-independent selection.
+ if (SelectOperator(I, I->getOpcode()))
+ return true;
+
+ // Next, try calling the target to attempt to handle the instruction.
+ if (TargetSelectInstruction(I))
+ return true;
+
+ return false;
}
/// FastEmitBranch - Emit an unconditional branch to the given block,
@@ -541,7 +549,7 @@ FastISel::SelectInstruction(Instruction *I) {
void
FastISel::FastEmitBranch(MachineBasicBlock *MSucc) {
MachineFunction::iterator NextMBB =
- next(MachineFunction::iterator(MBB));
+ llvm::next(MachineFunction::iterator(MBB));
if (MBB->isLayoutSuccessor(MSucc)) {
// The unconditional fall-through case, which needs no instructions.
diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
index 273dbf0..f9c05d0 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp
@@ -232,7 +232,7 @@ void SelectionDAGLegalize::LegalizeDAG() {
// node is only legalized after all of its operands are legalized.
DAG.AssignTopologicalOrder();
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = prior(DAG.allnodes_end()); I != next(E); ++I)
+ E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
LegalizeOp(SDValue(I, 0));
// Finally, it's possible the root changed. Get the new root.
@@ -2294,9 +2294,15 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node,
// NOTE: we could fall back on load/store here too for targets without
// SAR. However, it is doubtful that any exist.
EVT ExtraVT = cast<VTSDNode>(Node->getOperand(1))->getVT();
- unsigned BitsDiff = Node->getValueType(0).getSizeInBits() -
+ EVT VT = Node->getValueType(0);
+ EVT ShiftAmountTy = TLI.getShiftAmountTy();
+ if (VT.isVector()) {
+ ShiftAmountTy = VT;
+ VT = VT.getVectorElementType();
+ }
+ unsigned BitsDiff = VT.getSizeInBits() -
ExtraVT.getSizeInBits();
- SDValue ShiftCst = DAG.getConstant(BitsDiff, TLI.getShiftAmountTy());
+ SDValue ShiftCst = DAG.getConstant(BitsDiff, ShiftAmountTy);
Tmp1 = DAG.getNode(ISD::SHL, dl, Node->getValueType(0),
Node->getOperand(0), ShiftCst);
Tmp1 = DAG.getNode(ISD::SRA, dl, Node->getValueType(0), Tmp1, ShiftCst);
@@ -3059,8 +3065,7 @@ void SelectionDAGLegalize::PromoteNode(SDNode *Node,
// SelectionDAG::Legalize - This is the entry point for the file.
//
-void SelectionDAG::Legalize(bool TypesNeedLegalizing,
- CodeGenOpt::Level OptLevel) {
+void SelectionDAG::Legalize(CodeGenOpt::Level OptLevel) {
/// run - This is the main entry point to this class.
///
SelectionDAGLegalize(*this, OptLevel).LegalizeDAG();
diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
index 8ac8063..2f4457e 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp
@@ -1167,55 +1167,62 @@ ExpandShiftWithUnknownAmountBit(SDNode *N, SDValue &Lo, SDValue &Hi) {
GetExpandedInteger(N->getOperand(0), InL, InH);
SDValue NVBitsNode = DAG.getConstant(NVTBits, ShTy);
- SDValue Amt2 = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
- SDValue Cmp = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
- Amt, NVBitsNode, ISD::SETULT);
+ SDValue AmtExcess = DAG.getNode(ISD::SUB, dl, ShTy, Amt, NVBitsNode);
+ SDValue AmtLack = DAG.getNode(ISD::SUB, dl, ShTy, NVBitsNode, Amt);
+ SDValue isShort = DAG.getSetCC(dl, TLI.getSetCCResultType(ShTy),
+ Amt, NVBitsNode, ISD::SETULT);
- SDValue Lo1, Hi1, Lo2, Hi2;
+ SDValue LoS, HiS, LoL, HiL;
switch (N->getOpcode()) {
default: llvm_unreachable("Unknown shift");
case ISD::SHL:
- // ShAmt < NVTBits
- Lo1 = DAG.getConstant(0, NVT); // Low part is zero.
- Hi1 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt); // High part from Lo part.
-
- // ShAmt >= NVTBits
- Lo2 = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
- Hi2 = DAG.getNode(ISD::OR, dl, NVT,
+ // Short: ShAmt < NVTBits
+ LoS = DAG.getNode(ISD::SHL, dl, NVT, InL, Amt);
+ HiS = DAG.getNode(ISD::OR, dl, NVT,
DAG.getNode(ISD::SHL, dl, NVT, InH, Amt),
- DAG.getNode(ISD::SRL, dl, NVT, InL, Amt2));
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SRL, dl, NVT, InL, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ LoL = DAG.getConstant(0, NVT); // Lo part is zero.
+ HiL = DAG.getNode(ISD::SHL, dl, NVT, InL, AmtExcess); // Hi from Lo part.
- Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
- Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
return true;
case ISD::SRL:
- // ShAmt < NVTBits
- Hi1 = DAG.getConstant(0, NVT); // Hi part is zero.
- Lo1 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt); // Lo part from Hi part.
-
- // ShAmt >= NVTBits
- Hi2 = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
- Lo2 = DAG.getNode(ISD::OR, dl, NVT,
- DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
- DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2));
-
- Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
- Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRL, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
+ DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getConstant(0, NVT); // Hi part is zero.
+ LoL = DAG.getNode(ISD::SRL, dl, NVT, InH, AmtExcess); // Lo from Hi part.
+
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
return true;
case ISD::SRA:
- // ShAmt < NVTBits
- Hi1 = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign extend high part.
- DAG.getConstant(NVTBits-1, ShTy));
- Lo1 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt); // Lo part from Hi part.
-
- // ShAmt >= NVTBits
- Hi2 = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
- Lo2 = DAG.getNode(ISD::OR, dl, NVT,
+ // Short: ShAmt < NVTBits
+ HiS = DAG.getNode(ISD::SRA, dl, NVT, InH, Amt);
+ LoS = DAG.getNode(ISD::OR, dl, NVT,
DAG.getNode(ISD::SRL, dl, NVT, InL, Amt),
- DAG.getNode(ISD::SHL, dl, NVT, InH, Amt2));
+ // FIXME: If Amt is zero, the following shift generates an undefined result
+ // on some architectures.
+ DAG.getNode(ISD::SHL, dl, NVT, InH, AmtLack));
+
+ // Long: ShAmt >= NVTBits
+ HiL = DAG.getNode(ISD::SRA, dl, NVT, InH, // Sign of Hi part.
+ DAG.getConstant(NVTBits-1, ShTy));
+ LoL = DAG.getNode(ISD::SRA, dl, NVT, InH, AmtExcess); // Lo from Hi part.
- Lo = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Lo1, Lo2);
- Hi = DAG.getNode(ISD::SELECT, dl, NVT, Cmp, Hi1, Hi2);
+ Lo = DAG.getNode(ISD::SELECT, dl, NVT, isShort, LoS, LoL);
+ Hi = DAG.getNode(ISD::SELECT, dl, NVT, isShort, HiS, HiL);
return true;
}
@@ -1989,7 +1996,9 @@ bool DAGTypeLegalizer::ExpandIntegerOperand(SDNode *N, unsigned OpNo) {
case ISD::SRA:
case ISD::SRL:
case ISD::ROTL:
- case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+ case ISD::ROTR: Res = ExpandIntOp_Shift(N); break;
+ case ISD::RETURNADDR:
+ case ISD::FRAMEADDR: Res = ExpandIntOp_RETURNADDR(N); break;
}
// If the result is null, the sub-method took care of registering results etc.
@@ -2173,6 +2182,15 @@ SDValue DAGTypeLegalizer::ExpandIntOp_Shift(SDNode *N) {
return DAG.UpdateNodeOperands(SDValue(N, 0), N->getOperand(0), Lo);
}
+SDValue DAGTypeLegalizer::ExpandIntOp_RETURNADDR(SDNode *N) {
+ // The argument of RETURNADDR / FRAMEADDR builtin is 32 bit contant. This
+ // surely makes pretty nice problems on 8/16 bit targets. Just truncate this
+ // constant to valid type.
+ SDValue Lo, Hi;
+ GetExpandedInteger(N->getOperand(0), Lo, Hi);
+ return DAG.UpdateNodeOperands(SDValue(N, 0), Lo);
+}
+
SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) {
SDValue Op = N->getOperand(0);
EVT DstVT = N->getValueType(0);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
index 2ee9f8a..c35f7ad 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h
+++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h
@@ -362,6 +362,7 @@ private:
SDValue ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo);
SDValue ExpandIntOp_TRUNCATE(SDNode *N);
SDValue ExpandIntOp_UINT_TO_FP(SDNode *N);
+ SDValue ExpandIntOp_RETURNADDR(SDNode *N);
void IntegerExpandSetCCOperands(SDValue &NewLHS, SDValue &NewRHS,
ISD::CondCode &CCCode, DebugLoc dl);
@@ -516,6 +517,7 @@ private:
SDValue ScalarizeVecRes_INSERT_VECTOR_ELT(SDNode *N);
SDValue ScalarizeVecRes_LOAD(LoadSDNode *N);
SDValue ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N);
+ SDValue ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N);
SDValue ScalarizeVecRes_SELECT(SDNode *N);
SDValue ScalarizeVecRes_SELECT_CC(SDNode *N);
SDValue ScalarizeVecRes_SETCC(SDNode *N);
@@ -559,6 +561,7 @@ private:
void SplitVecRes_INSERT_VECTOR_ELT(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_LOAD(LoadSDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo, SDValue &Hi);
+ void SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_SETCC(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_UNDEF(SDNode *N, SDValue &Lo, SDValue &Hi);
void SplitVecRes_VECTOR_SHUFFLE(ShuffleVectorSDNode *N, SDValue &Lo,
@@ -601,6 +604,7 @@ private:
SDValue WidenVecRes_INSERT_VECTOR_ELT(SDNode* N);
SDValue WidenVecRes_LOAD(SDNode* N);
SDValue WidenVecRes_SCALAR_TO_VECTOR(SDNode* N);
+ SDValue WidenVecRes_SIGN_EXTEND_INREG(SDNode* N);
SDValue WidenVecRes_SELECT(SDNode* N);
SDValue WidenVecRes_SELECT_CC(SDNode* N);
SDValue WidenVecRes_UNDEF(SDNode *N);
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
index 785c2ad..2625245 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp
@@ -79,7 +79,7 @@ bool VectorLegalizer::Run() {
// node is only legalized after all of its operands are legalized.
DAG.AssignTopologicalOrder();
for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(),
- E = prior(DAG.allnodes_end()); I != next(E); ++I)
+ E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I)
LegalizeOp(SDValue(I, 0));
// Finally, it's possible the root changed. Get the new root.
@@ -179,6 +179,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) {
case ISD::FRINT:
case ISD::FNEARBYINT:
case ISD::FFLOOR:
+ case ISD::SIGN_EXTEND_INREG:
QueryType = Node->getValueType(0);
break;
case ISD::SINT_TO_FP:
diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
index 023324b..cf67ab9 100644
--- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
+++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp
@@ -54,6 +54,7 @@ void DAGTypeLegalizer::ScalarizeVectorResult(SDNode *N, unsigned ResNo) {
case ISD::INSERT_VECTOR_ELT: R = ScalarizeVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: R = ScalarizeVecRes_LOAD(cast<LoadSDNode>(N));break;
case ISD::SCALAR_TO_VECTOR: R = ScalarizeVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: R = ScalarizeVecRes_SIGN_EXTEND_INREG(N); break;
case ISD::SELECT: R = ScalarizeVecRes_SELECT(N); break;
case ISD::SELECT_CC: R = ScalarizeVecRes_SELECT_CC(N); break;
case ISD::SETCC: R = ScalarizeVecRes_SETCC(N); break;
@@ -195,6 +196,13 @@ SDValue DAGTypeLegalizer::ScalarizeVecRes_SCALAR_TO_VECTOR(SDNode *N) {
return InOp;
}
+SDValue DAGTypeLegalizer::ScalarizeVecRes_SIGN_EXTEND_INREG(SDNode *N) {
+ EVT EltVT = N->getValueType(0).getVectorElementType();
+ SDValue LHS = GetScalarizedVector(N->getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(), EltVT,
+ LHS, N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::ScalarizeVecRes_SELECT(SDNode *N) {
SDValue LHS = GetScalarizedVector(N->getOperand(1));
return DAG.getNode(ISD::SELECT, N->getDebugLoc(),
@@ -401,6 +409,7 @@ void DAGTypeLegalizer::SplitVectorResult(SDNode *N, unsigned ResNo) {
case ISD::FPOWI: SplitVecRes_FPOWI(N, Lo, Hi); break;
case ISD::INSERT_VECTOR_ELT: SplitVecRes_INSERT_VECTOR_ELT(N, Lo, Hi); break;
case ISD::SCALAR_TO_VECTOR: SplitVecRes_SCALAR_TO_VECTOR(N, Lo, Hi); break;
+ case ISD::SIGN_EXTEND_INREG: SplitVecRes_SIGN_EXTEND_INREG(N, Lo, Hi); break;
case ISD::LOAD:
SplitVecRes_LOAD(cast<LoadSDNode>(N), Lo, Hi);
break;
@@ -700,6 +709,18 @@ void DAGTypeLegalizer::SplitVecRes_SCALAR_TO_VECTOR(SDNode *N, SDValue &Lo,
Hi = DAG.getUNDEF(HiVT);
}
+void DAGTypeLegalizer::SplitVecRes_SIGN_EXTEND_INREG(SDNode *N, SDValue &Lo,
+ SDValue &Hi) {
+ SDValue LHSLo, LHSHi;
+ GetSplitVector(N->getOperand(0), LHSLo, LHSHi);
+ DebugLoc dl = N->getDebugLoc();
+
+ Lo = DAG.getNode(N->getOpcode(), dl, LHSLo.getValueType(), LHSLo,
+ N->getOperand(1));
+ Hi = DAG.getNode(N->getOpcode(), dl, LHSHi.getValueType(), LHSHi,
+ N->getOperand(1));
+}
+
void DAGTypeLegalizer::SplitVecRes_LOAD(LoadSDNode *LD, SDValue &Lo,
SDValue &Hi) {
assert(ISD::isUNINDEXEDLoad(LD) && "Indexed load during type legalization!");
@@ -1141,6 +1162,7 @@ void DAGTypeLegalizer::WidenVectorResult(SDNode *N, unsigned ResNo) {
case ISD::INSERT_VECTOR_ELT: Res = WidenVecRes_INSERT_VECTOR_ELT(N); break;
case ISD::LOAD: Res = WidenVecRes_LOAD(N); break;
case ISD::SCALAR_TO_VECTOR: Res = WidenVecRes_SCALAR_TO_VECTOR(N); break;
+ case ISD::SIGN_EXTEND_INREG: Res = WidenVecRes_SIGN_EXTEND_INREG(N); break;
case ISD::SELECT: Res = WidenVecRes_SELECT(N); break;
case ISD::SELECT_CC: Res = WidenVecRes_SELECT_CC(N); break;
case ISD::UNDEF: Res = WidenVecRes_UNDEF(N); break;
@@ -1691,6 +1713,13 @@ SDValue DAGTypeLegalizer::WidenVecRes_SCALAR_TO_VECTOR(SDNode *N) {
WidenVT, N->getOperand(0));
}
+SDValue DAGTypeLegalizer::WidenVecRes_SIGN_EXTEND_INREG(SDNode *N) {
+ EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
+ SDValue WidenLHS = GetWidenedVector(N->getOperand(0));
+ return DAG.getNode(ISD::SIGN_EXTEND_INREG, N->getDebugLoc(),
+ WidenVT, WidenLHS, N->getOperand(1));
+}
+
SDValue DAGTypeLegalizer::WidenVecRes_SELECT(SDNode *N) {
EVT WidenVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0));
unsigned WidenNumElts = WidenVT.getVectorNumElements();
diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
index d53de34..b2ee8bb 100644
--- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
+++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp
@@ -20,10 +20,16 @@
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetSubtarget.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
using namespace llvm;
+cl::opt<bool>
+DisableInstScheduling("disable-inst-scheduling",
+ cl::init(false),
+ cl::desc("Disable instruction scheduling"));
+
ScheduleDAGSDNodes::ScheduleDAGSDNodes(MachineFunction &mf)
: ScheduleDAG(mf) {
}
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index c38c79b..da55e6b 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -27,6 +27,7 @@
#include "llvm/CodeGen/PseudoSourceValue.h"
#include "llvm/Target/TargetRegisterInfo.h"
#include "llvm/Target/TargetData.h"
+#include "llvm/Target/TargetFrameInfo.h"
#include "llvm/Target/TargetLowering.h"
#include "llvm/Target/TargetOptions.h"
#include "llvm/Target/TargetInstrInfo.h"
@@ -47,6 +48,8 @@
#include <cmath>
using namespace llvm;
+extern cl::opt<bool> DisableInstScheduling;
+
/// makeVTList - Return an instance of the SDVTList struct initialized with the
/// specified members.
static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) {
@@ -551,6 +554,9 @@ void SelectionDAG::RemoveDeadNodes(SmallVectorImpl<SDNode *> &DeadNodes,
}
DeallocateNode(N);
+
+ // Remove the ordering of this node.
+ if (Ordering) Ordering->remove(N);
}
}
@@ -576,6 +582,9 @@ void SelectionDAG::DeleteNodeNotInCSEMaps(SDNode *N) {
N->DropOperands();
DeallocateNode(N);
+
+ // Remove the ordering of this node.
+ if (Ordering) Ordering->remove(N);
}
void SelectionDAG::DeallocateNode(SDNode *N) {
@@ -587,6 +596,9 @@ void SelectionDAG::DeallocateNode(SDNode *N) {
N->NodeType = ISD::DELETED_NODE;
NodeAllocator.Deallocate(AllNodes.remove(N));
+
+ // Remove the ordering of this node.
+ if (Ordering) Ordering->remove(N);
}
/// RemoveNodeFromCSEMaps - Take the specified node out of the CSE map that
@@ -690,7 +702,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N, SDValue Op,
FoldingSetNodeID ID;
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 1);
AddNodeIDCustom(ID, N);
- return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (Ordering) Ordering->remove(Node);
+ return Node;
}
/// FindModifiedNodeSlot - Find a slot for the specified node if its operands
@@ -707,7 +721,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
FoldingSetNodeID ID;
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, 2);
AddNodeIDCustom(ID, N);
- return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (Ordering) Ordering->remove(Node);
+ return Node;
}
@@ -724,7 +740,9 @@ SDNode *SelectionDAG::FindModifiedNodeSlot(SDNode *N,
FoldingSetNodeID ID;
AddNodeIDNode(ID, N->getOpcode(), N->getVTList(), Ops, NumOps);
AddNodeIDCustom(ID, N);
- return CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ SDNode *Node = CSEMap.FindNodeOrInsertPos(ID, InsertPos);
+ if (Ordering) Ordering->remove(Node);
+ return Node;
}
/// VerifyNode - Sanity check the given node. Aborts if it is invalid.
@@ -777,8 +795,13 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const {
SelectionDAG::SelectionDAG(TargetLowering &tli, FunctionLoweringInfo &fli)
: TLI(tli), FLI(fli), DW(0),
EntryNode(ISD::EntryToken, DebugLoc::getUnknownLoc(),
- getVTList(MVT::Other)), Root(getEntryNode()) {
+ getVTList(MVT::Other)),
+ Root(getEntryNode()), Ordering(0) {
AllNodes.push_back(&EntryNode);
+ if (DisableInstScheduling) {
+ Ordering = new NodeOrdering();
+ Ordering->add(&EntryNode);
+ }
}
void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
@@ -791,6 +814,7 @@ void SelectionDAG::init(MachineFunction &mf, MachineModuleInfo *mmi,
SelectionDAG::~SelectionDAG() {
allnodes_clear();
+ delete Ordering;
}
void SelectionDAG::allnodes_clear() {
@@ -816,6 +840,10 @@ void SelectionDAG::clear() {
EntryNode.UseList = 0;
AllNodes.push_back(&EntryNode);
Root = getEntryNode();
+ if (DisableInstScheduling) {
+ Ordering = new NodeOrdering();
+ Ordering->add(&EntryNode);
+ }
}
SDValue SelectionDAG::getSExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
@@ -831,8 +859,12 @@ SDValue SelectionDAG::getZExtOrTrunc(SDValue Op, DebugLoc DL, EVT VT) {
}
SDValue SelectionDAG::getZeroExtendInReg(SDValue Op, DebugLoc DL, EVT VT) {
+ assert(!VT.isVector() &&
+ "getZeroExtendInReg should use the vector element type instead of "
+ "the vector type!");
if (Op.getValueType() == VT) return Op;
- APInt Imm = APInt::getLowBitsSet(Op.getValueSizeInBits(),
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
+ APInt Imm = APInt::getLowBitsSet(BitWidth,
VT.getSizeInBits());
return getNode(ISD::AND, DL, Op.getValueType(), Op,
getConstant(Imm, Op.getValueType()));
@@ -872,14 +904,17 @@ SDValue SelectionDAG::getConstant(const ConstantInt &Val, EVT VT, bool isT) {
ID.AddPointer(&Val);
void *IP = 0;
SDNode *N = NULL;
- if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) {
+ if (Ordering) Ordering->add(N);
if (!VT.isVector())
return SDValue(N, 0);
+ }
if (!N) {
N = NodeAllocator.Allocate<ConstantSDNode>();
new (N) ConstantSDNode(isT, &Val, EltVT);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
}
SDValue Result(N, 0);
@@ -916,14 +951,17 @@ SDValue SelectionDAG::getConstantFP(const ConstantFP& V, EVT VT, bool isTarget){
ID.AddPointer(&V);
void *IP = 0;
SDNode *N = NULL;
- if ((N = CSEMap.FindNodeOrInsertPos(ID, IP)))
+ if ((N = CSEMap.FindNodeOrInsertPos(ID, IP))) {
+ if (Ordering) Ordering->add(N);
if (!VT.isVector())
return SDValue(N, 0);
+ }
if (!N) {
N = NodeAllocator.Allocate<ConstantFPSDNode>();
new (N) ConstantFPSDNode(isTarget, &V, EltVT);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
}
SDValue Result(N, 0);
@@ -978,12 +1016,15 @@ SDValue SelectionDAG::getGlobalAddress(const GlobalValue *GV,
ID.AddInteger(Offset);
ID.AddInteger(TargetFlags);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<GlobalAddressSDNode>();
new (N) GlobalAddressSDNode(Opc, GV, VT, Offset, TargetFlags);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -993,12 +1034,15 @@ SDValue SelectionDAG::getFrameIndex(int FI, EVT VT, bool isTarget) {
AddNodeIDNode(ID, Opc, getVTList(VT), 0, 0);
ID.AddInteger(FI);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<FrameIndexSDNode>();
new (N) FrameIndexSDNode(FI, VT, isTarget);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1012,12 +1056,15 @@ SDValue SelectionDAG::getJumpTable(int JTI, EVT VT, bool isTarget,
ID.AddInteger(JTI);
ID.AddInteger(TargetFlags);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<JumpTableSDNode>();
new (N) JumpTableSDNode(JTI, VT, isTarget, TargetFlags);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1037,12 +1084,15 @@ SDValue SelectionDAG::getConstantPool(Constant *C, EVT VT,
ID.AddPointer(C);
ID.AddInteger(TargetFlags);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1063,12 +1113,15 @@ SDValue SelectionDAG::getConstantPool(MachineConstantPoolValue *C, EVT VT,
C->AddSelectionDAGCSEId(ID);
ID.AddInteger(TargetFlags);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<ConstantPoolSDNode>();
new (N) ConstantPoolSDNode(isTarget, C, VT, Offset, Alignment, TargetFlags);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1077,12 +1130,15 @@ SDValue SelectionDAG::getBasicBlock(MachineBasicBlock *MBB) {
AddNodeIDNode(ID, ISD::BasicBlock, getVTList(MVT::Other), 0, 0);
ID.AddPointer(MBB);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<BasicBlockSDNode>();
new (N) BasicBlockSDNode(MBB);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1098,6 +1154,7 @@ SDValue SelectionDAG::getValueType(EVT VT) {
N = NodeAllocator.Allocate<VTSDNode>();
new (N) VTSDNode(VT);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1107,6 +1164,7 @@ SDValue SelectionDAG::getExternalSymbol(const char *Sym, EVT VT) {
N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
new (N) ExternalSymbolSDNode(false, Sym, 0, VT);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1119,6 +1177,7 @@ SDValue SelectionDAG::getTargetExternalSymbol(const char *Sym, EVT VT,
N = NodeAllocator.Allocate<ExternalSymbolSDNode>();
new (N) ExternalSymbolSDNode(true, Sym, TargetFlags, VT);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1131,6 +1190,7 @@ SDValue SelectionDAG::getCondCode(ISD::CondCode Cond) {
new (N) CondCodeSDNode(Cond);
CondCodeNodes[Cond] = N;
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
}
return SDValue(CondCodeNodes[Cond], 0);
}
@@ -1223,8 +1283,10 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
ID.AddInteger(MaskVec[i]);
void* IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
// Allocate the mask array for the node out of the BumpPtrAllocator, since
// SDNode doesn't have access to it. This memory will be "leaked" when
@@ -1236,6 +1298,7 @@ SDValue SelectionDAG::getVectorShuffle(EVT VT, DebugLoc dl, SDValue N1,
new (N) ShuffleVectorSDNode(VT, dl, N1, N2, MaskAlloc);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1253,12 +1316,15 @@ SDValue SelectionDAG::getConvertRndSat(EVT VT, DebugLoc dl,
SDValue Ops[] = { Val, DTy, STy, Rnd, Sat };
AddNodeIDNode(ID, ISD::CONVERT_RNDSAT, getVTList(VT), &Ops[0], 5);
void* IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
CvtRndSatSDNode *N = NodeAllocator.Allocate<CvtRndSatSDNode>();
new (N) CvtRndSatSDNode(VT, dl, Ops, 5, Code);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1267,12 +1333,15 @@ SDValue SelectionDAG::getRegister(unsigned RegNo, EVT VT) {
AddNodeIDNode(ID, ISD::Register, getVTList(VT), 0, 0);
ID.AddInteger(RegNo);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<RegisterSDNode>();
new (N) RegisterSDNode(RegNo, VT);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1284,12 +1353,15 @@ SDValue SelectionDAG::getLabel(unsigned Opcode, DebugLoc dl,
AddNodeIDNode(ID, Opcode, getVTList(MVT::Other), &Ops[0], 1);
ID.AddInteger(LabelID);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<LabelSDNode>();
new (N) LabelSDNode(Opcode, dl, Root, LabelID);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1303,12 +1375,15 @@ SDValue SelectionDAG::getBlockAddress(BlockAddress *BA, EVT VT,
ID.AddPointer(BA);
ID.AddInteger(TargetFlags);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<BlockAddressSDNode>();
new (N) BlockAddressSDNode(Opc, VT, BA, TargetFlags);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1321,13 +1396,16 @@ SDValue SelectionDAG::getSrcValue(const Value *V) {
ID.AddPointer(V);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<SrcValueSDNode>();
new (N) SrcValueSDNode(V);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -1480,7 +1558,7 @@ bool SelectionDAG::SignBitIsZero(SDValue Op, unsigned Depth) const {
if (Op.getValueType().isVector())
return false;
- unsigned BitWidth = Op.getValueSizeInBits();
+ unsigned BitWidth = Op.getValueType().getScalarType().getSizeInBits();
return MaskedValueIsZero(Op, APInt::getSignBit(BitWidth), Depth);
}
@@ -1503,7 +1581,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
APInt &KnownZero, APInt &KnownOne,
unsigned Depth) const {
unsigned BitWidth = Mask.getBitWidth();
- assert(BitWidth == Op.getValueType().getSizeInBits() &&
+ assert(BitWidth == Op.getValueType().getScalarType().getSizeInBits() &&
"Mask size mismatches value type size!");
KnownZero = KnownOne = APInt(BitWidth, 0); // Don't know anything.
@@ -1760,7 +1838,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
}
case ISD::ZERO_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getSizeInBits();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
APInt InMask = Mask;
InMask.trunc(InBits);
@@ -1774,7 +1852,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
}
case ISD::SIGN_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getSizeInBits();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt InSignBit = APInt::getSignBit(InBits);
APInt NewBits = APInt::getHighBitsSet(BitWidth, BitWidth - InBits) & Mask;
APInt InMask = Mask;
@@ -1815,7 +1893,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
}
case ISD::ANY_EXTEND: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getSizeInBits();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt InMask = Mask;
InMask.trunc(InBits);
KnownZero.trunc(InBits);
@@ -1827,7 +1905,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
}
case ISD::TRUNCATE: {
EVT InVT = Op.getOperand(0).getValueType();
- unsigned InBits = InVT.getSizeInBits();
+ unsigned InBits = InVT.getScalarType().getSizeInBits();
APInt InMask = Mask;
InMask.zext(InBits);
KnownZero.zext(InBits);
@@ -1960,7 +2038,7 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, const APInt &Mask,
unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
EVT VT = Op.getValueType();
assert(VT.isInteger() && "Invalid VT!");
- unsigned VTBits = VT.getSizeInBits();
+ unsigned VTBits = VT.getScalarType().getSizeInBits();
unsigned Tmp, Tmp2;
unsigned FirstAnswer = 1;
@@ -1987,7 +2065,7 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{
}
case ISD::SIGN_EXTEND:
- Tmp = VTBits-Op.getOperand(0).getValueType().getSizeInBits();
+ Tmp = VTBits-Op.getOperand(0).getValueType().getScalarType().getSizeInBits();
return ComputeNumSignBits(Op.getOperand(0), Depth+1) + Tmp;
case ISD::SIGN_EXTEND_INREG:
@@ -2238,13 +2316,16 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, getVTList(VT), 0, 0);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<SDNode>();
new (N) SDNode(Opcode, DL, getVTList(VT));
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
#ifndef NDEBUG
VerifyNode(N);
#endif
@@ -2349,6 +2430,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
assert(VT.isFloatingPoint() &&
Operand.getValueType().isFloatingPoint() && "Invalid FP cast!");
if (Operand.getValueType() == VT) return Operand; // noop conversion.
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
if (Operand.getOpcode() == ISD::UNDEF)
return getUNDEF(VT);
break;
@@ -2356,8 +2441,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid SIGN_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().bitsLT(VT)
- && "Invalid sext node, dst < src!");
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid sext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
if (OpOpcode == ISD::SIGN_EXTEND || OpOpcode == ISD::ZERO_EXTEND)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
break;
@@ -2365,8 +2454,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ZERO_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().bitsLT(VT)
- && "Invalid zext node, dst < src!");
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid zext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
if (OpOpcode == ISD::ZERO_EXTEND) // (zext (zext x)) -> (zext x)
return getNode(ISD::ZERO_EXTEND, DL, VT,
Operand.getNode()->getOperand(0));
@@ -2375,8 +2468,12 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid ANY_EXTEND!");
if (Operand.getValueType() == VT) return Operand; // noop extension
- assert(Operand.getValueType().bitsLT(VT)
- && "Invalid anyext node, dst < src!");
+ assert(Operand.getValueType().getScalarType().bitsLT(VT.getScalarType()) &&
+ "Invalid anyext node, dst < src!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND)
// (ext (zext x)) -> (zext x) and (ext (sext x)) -> (sext x)
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
@@ -2385,14 +2482,19 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
assert(VT.isInteger() && Operand.getValueType().isInteger() &&
"Invalid TRUNCATE!");
if (Operand.getValueType() == VT) return Operand; // noop truncate
- assert(Operand.getValueType().bitsGT(VT)
- && "Invalid truncate node, src < dst!");
+ assert(Operand.getValueType().getScalarType().bitsGT(VT.getScalarType()) &&
+ "Invalid truncate node, src < dst!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() ==
+ Operand.getValueType().getVectorNumElements()) &&
+ "Vector element count mismatch!");
if (OpOpcode == ISD::TRUNCATE)
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
else if (OpOpcode == ISD::ZERO_EXTEND || OpOpcode == ISD::SIGN_EXTEND ||
OpOpcode == ISD::ANY_EXTEND) {
// If the source is smaller than the dest, we still need an extend.
- if (Operand.getNode()->getOperand(0).getValueType().bitsLT(VT))
+ if (Operand.getNode()->getOperand(0).getValueType().getScalarType()
+ .bitsLT(VT.getScalarType()))
return getNode(OpOpcode, DL, VT, Operand.getNode()->getOperand(0));
else if (Operand.getNode()->getOperand(0).getValueType().bitsGT(VT))
return getNode(ISD::TRUNCATE, DL, VT, Operand.getNode()->getOperand(0));
@@ -2447,8 +2549,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
SDValue Ops[1] = { Operand };
AddNodeIDNode(ID, Opcode, VTs, Ops, 1);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
N = NodeAllocator.Allocate<UnarySDNode>();
new (N) UnarySDNode(Opcode, DL, VTs, Operand);
CSEMap.InsertNode(N, IP);
@@ -2458,6 +2562,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL,
}
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
#ifndef NDEBUG
VerifyNode(N);
#endif
@@ -2623,6 +2728,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
assert(VT == N1.getValueType() && "Not an inreg extend!");
assert(VT.isInteger() && EVT.isInteger() &&
"Cannot *_EXTEND_INREG FP types");
+ assert(!EVT.isVector() &&
+ "AssertSExt/AssertZExt type should be the vector element type "
+ "rather than the vector type!");
assert(EVT.bitsLE(VT) && "Not extending!");
if (VT == EVT) return N1; // noop assertion.
break;
@@ -2632,12 +2740,15 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
assert(VT == N1.getValueType() && "Not an inreg extend!");
assert(VT.isInteger() && EVT.isInteger() &&
"Cannot *_EXTEND_INREG FP types");
- assert(EVT.bitsLE(VT) && "Not extending!");
+ assert(!EVT.isVector() &&
+ "SIGN_EXTEND_INREG type should be the vector element type rather "
+ "than the vector type!");
+ assert(EVT.bitsLE(VT.getScalarType()) && "Not extending!");
if (EVT == VT) return N1; // Not actually extending
if (N1C) {
APInt Val = N1C->getAPIntValue();
- unsigned FromBits = cast<VTSDNode>(N2)->getVT().getSizeInBits();
+ unsigned FromBits = EVT.getSizeInBits();
Val <<= Val.getBitWidth()-FromBits;
Val = Val.ashr(Val.getBitWidth()-FromBits);
return getConstant(Val, VT);
@@ -2859,8 +2970,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, 2);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
N = NodeAllocator.Allocate<BinarySDNode>();
new (N) BinarySDNode(Opcode, DL, VTs, N1, N2);
CSEMap.InsertNode(N, IP);
@@ -2870,6 +2983,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
}
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
#ifndef NDEBUG
VerifyNode(N);
#endif
@@ -2936,8 +3050,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTs, Ops, 3);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
N = NodeAllocator.Allocate<TernarySDNode>();
new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
CSEMap.InsertNode(N, IP);
@@ -2945,7 +3061,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
N = NodeAllocator.Allocate<TernarySDNode>();
new (N) TernarySDNode(Opcode, DL, VTs, N1, N2, N3);
}
+
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
#ifndef NDEBUG
VerifyNode(N);
#endif
@@ -3541,12 +3659,14 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
}
SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Cmp, Swp, MMO);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -3604,12 +3724,14 @@ SDValue SelectionDAG::getAtomic(unsigned Opcode, DebugLoc dl, EVT MemVT,
void* IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<AtomicSDNode>(E)->refineAlignment(MMO);
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
}
SDNode* N = NodeAllocator.Allocate<AtomicSDNode>();
new (N) AtomicSDNode(Opcode, dl, VTs, MemVT, Chain, Ptr, Val, MMO);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -3682,6 +3804,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<MemIntrinsicSDNode>(E)->refineAlignment(MMO);
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
}
@@ -3693,6 +3816,7 @@ SelectionDAG::getMemIntrinsicNode(unsigned Opcode, DebugLoc dl, SDVTList VTList,
new (N) MemIntrinsicSDNode(Opcode, dl, VTList, Ops, NumOps, MemVT, MMO);
}
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -3732,16 +3856,15 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
assert(VT == MemVT && "Non-extending load from different memory type!");
} else {
// Extending load.
- if (VT.isVector())
- assert(MemVT.getVectorNumElements() == VT.getVectorNumElements() &&
- "Invalid vector extload!");
- else
- assert(MemVT.bitsLT(VT) &&
- "Should only be an extending load, not truncating!");
- assert((ExtType == ISD::EXTLOAD || VT.isInteger()) &&
- "Cannot sign/zero extend a FP/Vector load!");
+ assert(MemVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be an extending load, not truncating!");
assert(VT.isInteger() == MemVT.isInteger() &&
"Cannot convert from FP to Int or Int -> FP!");
+ assert(VT.isVector() == MemVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == MemVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
}
bool Indexed = AM != ISD::UNINDEXED;
@@ -3758,12 +3881,14 @@ SelectionDAG::getLoad(ISD::MemIndexedMode AM, DebugLoc dl,
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<LoadSDNode>(E)->refineAlignment(MMO);
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
}
SDNode *N = NodeAllocator.Allocate<LoadSDNode>();
new (N) LoadSDNode(Ops, dl, VTs, AM, ExtType, MemVT, MMO);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -3834,12 +3959,14 @@ SDValue SelectionDAG::getStore(SDValue Chain, DebugLoc dl, SDValue Val,
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
}
SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, false, VT, MMO);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -3874,10 +4001,15 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
if (VT == SVT)
return getStore(Chain, dl, Val, Ptr, MMO);
- assert(VT.bitsGT(SVT) && "Not a truncation?");
+ assert(SVT.getScalarType().bitsLT(VT.getScalarType()) &&
+ "Should only be a truncating store, not extending!");
assert(VT.isInteger() == SVT.isInteger() &&
"Can't do FP-INT conversion!");
-
+ assert(VT.isVector() == SVT.isVector() &&
+ "Cannot use trunc store to convert to or from a vector!");
+ assert((!VT.isVector() ||
+ VT.getVectorNumElements() == SVT.getVectorNumElements()) &&
+ "Cannot use trunc store to change the number of vector elements!");
SDVTList VTs = getVTList(MVT::Other);
SDValue Undef = getUNDEF(Ptr.getValueType());
@@ -3889,12 +4021,14 @@ SDValue SelectionDAG::getTruncStore(SDValue Chain, DebugLoc dl, SDValue Val,
void *IP = 0;
if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
cast<StoreSDNode>(E)->refineAlignment(MMO);
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
}
SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
new (N) StoreSDNode(Ops, dl, VTs, ISD::UNINDEXED, true, SVT, MMO);
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -3911,14 +4045,17 @@ SelectionDAG::getIndexedStore(SDValue OrigStore, DebugLoc dl, SDValue Base,
ID.AddInteger(ST->getMemoryVT().getRawBits());
ID.AddInteger(ST->getRawSubclassData());
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
SDNode *N = NodeAllocator.Allocate<StoreSDNode>();
new (N) StoreSDNode(Ops, dl, VTs, AM,
ST->isTruncatingStore(), ST->getMemoryVT(),
ST->getMemOperand());
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
return SDValue(N, 0);
}
@@ -3984,8 +4121,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
AddNodeIDNode(ID, Opcode, VTs, Ops, NumOps);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
N = NodeAllocator.Allocate<SDNode>();
new (N) SDNode(Opcode, DL, VTs, Ops, NumOps);
@@ -3996,6 +4135,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT,
}
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
#ifndef NDEBUG
VerifyNode(N);
#endif
@@ -4051,8 +4191,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return SDValue(E, 0);
+ }
if (NumOps == 1) {
N = NodeAllocator.Allocate<UnarySDNode>();
new (N) UnarySDNode(Opcode, DL, VTList, Ops[0]);
@@ -4083,6 +4225,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, SDVTList VTList,
}
}
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
#ifndef NDEBUG
VerifyNode(N);
#endif
@@ -4166,7 +4309,7 @@ SDVTList SelectionDAG::getVTList(EVT VT1, EVT VT2, EVT VT3, EVT VT4) {
I->VTs[2] == VT3 && I->VTs[3] == VT4)
return *I;
- EVT *Array = Allocator.Allocate<EVT>(3);
+ EVT *Array = Allocator.Allocate<EVT>(4);
Array[0] = VT1;
Array[1] = VT2;
Array[2] = VT3;
@@ -4545,8 +4688,10 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
if (VTs.VTs[VTs.NumVTs-1] != MVT::Flag) {
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opc, VTs, Ops, NumOps);
- if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *ON = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(ON);
return ON;
+ }
}
if (!RemoveNodeFromCSEMaps(N))
@@ -4610,6 +4755,7 @@ SDNode *SelectionDAG::MorphNodeTo(SDNode *N, unsigned Opc,
if (IP)
CSEMap.InsertNode(N, IP); // Memoize the new node.
+ if (Ordering) Ordering->add(N);
return N;
}
@@ -4748,8 +4894,10 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
FoldingSetNodeID ID;
AddNodeIDNode(ID, ~Opcode, VTs, Ops, NumOps);
IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return cast<MachineSDNode>(E);
+ }
}
// Allocate a new MachineSDNode.
@@ -4771,6 +4919,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc DL, SDVTList VTs,
CSEMap.InsertNode(N, IP);
AllNodes.push_back(N);
+ if (Ordering) Ordering->add(N);
#ifndef NDEBUG
VerifyNode(N);
#endif
@@ -4807,8 +4956,10 @@ SDNode *SelectionDAG::getNodeIfExists(unsigned Opcode, SDVTList VTList,
FoldingSetNodeID ID;
AddNodeIDNode(ID, Opcode, VTList, Ops, NumOps);
void *IP = 0;
- if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP))
+ if (SDNode *E = CSEMap.FindNodeOrInsertPos(ID, IP)) {
+ if (Ordering) Ordering->add(E);
return E;
+ }
}
return NULL;
}
@@ -5867,6 +6018,99 @@ SDValue SelectionDAG::UnrollVectorOp(SDNode *N, unsigned ResNE) {
&Scalars[0], Scalars.size());
}
+
+/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
+/// location that is 'Dist' units away from the location that the 'Base' load
+/// is loading from.
+bool SelectionDAG::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
+ unsigned Bytes, int Dist) const {
+ if (LD->getChain() != Base->getChain())
+ return false;
+ EVT VT = LD->getValueType(0);
+ if (VT.getSizeInBits() / 8 != Bytes)
+ return false;
+
+ SDValue Loc = LD->getOperand(1);
+ SDValue BaseLoc = Base->getOperand(1);
+ if (Loc.getOpcode() == ISD::FrameIndex) {
+ if (BaseLoc.getOpcode() != ISD::FrameIndex)
+ return false;
+ const MachineFrameInfo *MFI = getMachineFunction().getFrameInfo();
+ int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
+ int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
+ int FS = MFI->getObjectSize(FI);
+ int BFS = MFI->getObjectSize(BFI);
+ if (FS != BFS || FS != (int)Bytes) return false;
+ return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
+ }
+ if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
+ ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
+ if (V && (V->getSExtValue() == Dist*Bytes))
+ return true;
+ }
+
+ GlobalValue *GV1 = NULL;
+ GlobalValue *GV2 = NULL;
+ int64_t Offset1 = 0;
+ int64_t Offset2 = 0;
+ bool isGA1 = TLI.isGAPlusOffset(Loc.getNode(), GV1, Offset1);
+ bool isGA2 = TLI.isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
+ if (isGA1 && isGA2 && GV1 == GV2)
+ return Offset1 == (Offset2 + Dist*Bytes);
+ return false;
+}
+
+
+/// InferPtrAlignment - Infer alignment of a load / store address. Return 0 if
+/// it cannot be inferred.
+unsigned SelectionDAG::InferPtrAlignment(SDValue Ptr) const {
+ // If this is a GlobalAddress + cst, return the alignment.
+ GlobalValue *GV;
+ int64_t GVOffset = 0;
+ if (TLI.isGAPlusOffset(Ptr.getNode(), GV, GVOffset))
+ return MinAlign(GV->getAlignment(), GVOffset);
+
+ // If this is a direct reference to a stack slot, use information about the
+ // stack slot's alignment.
+ int FrameIdx = 1 << 31;
+ int64_t FrameOffset = 0;
+ if (FrameIndexSDNode *FI = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FrameIdx = FI->getIndex();
+ } else if (Ptr.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FrameIdx = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ FrameOffset = Ptr.getConstantOperandVal(1);
+ }
+
+ if (FrameIdx != (1 << 31)) {
+ // FIXME: Handle FI+CST.
+ const MachineFrameInfo &MFI = *getMachineFunction().getFrameInfo();
+ unsigned FIInfoAlign = MinAlign(MFI.getObjectAlignment(FrameIdx),
+ FrameOffset);
+ if (MFI.isFixedObjectIndex(FrameIdx)) {
+ int64_t ObjectOffset = MFI.getObjectOffset(FrameIdx) + FrameOffset;
+
+ // The alignment of the frame index can be determined from its offset from
+ // the incoming frame position. If the frame object is at offset 32 and
+ // the stack is guaranteed to be 16-byte aligned, then we know that the
+ // object is 16-byte aligned.
+ unsigned StackAlign = getTarget().getFrameInfo()->getStackAlignment();
+ unsigned Align = MinAlign(ObjectOffset, StackAlign);
+
+ // Finally, the frame object itself may have a known alignment. Factor
+ // the alignment + offset into a new alignment. For example, if we know
+ // the FI is 8 byte aligned, but the pointer is 4 off, we really have a
+ // 4-byte alignment of the resultant pointer. Likewise align 4 + 4-byte
+ // offset = 4-byte alignment, align 4 + 1-byte offset = align 1, etc.
+ return std::max(Align, FIInfoAlign);
+ }
+ return FIInfoAlign;
+ }
+
+ return 0;
+}
+
void SelectionDAG::dump() const {
errs() << "SelectionDAG has " << AllNodes.size() << " nodes:";
@@ -5882,6 +6126,9 @@ void SelectionDAG::dump() const {
errs() << "\n\n";
}
+void SelectionDAG::NodeOrdering::dump() const {
+}
+
void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const {
print_types(OS, G);
print_details(OS, G);
@@ -6022,4 +6269,3 @@ bool ShuffleVectorSDNode::isSplatMask(const int *Mask, EVT VT) {
return false;
return true;
}
-
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 57d8903..7568384 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -583,6 +583,9 @@ void SelectionDAGBuilder::visit(Instruction &I) {
}
void SelectionDAGBuilder::visit(unsigned Opcode, User &I) {
+ // Tell the DAG that we're processing a new instruction.
+ DAG.NewInst();
+
// Note: this doesn't use InstVisitor, because it has to work with
// ConstantExpr's in addition to instructions.
switch (Opcode) {
@@ -2108,7 +2111,7 @@ void SelectionDAGBuilder::visitSelect(User &I) {
for (unsigned i = 0; i != NumValues; ++i)
Values[i] = DAG.getNode(ISD::SELECT, getCurDebugLoc(),
- TrueVal.getValueType(), Cond,
+ TrueVal.getNode()->getValueType(i), Cond,
SDValue(TrueVal.getNode(), TrueVal.getResNo() + i),
SDValue(FalseVal.getNode(), FalseVal.getResNo() + i));
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
index c39437f..a640c7d 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp
@@ -60,8 +60,6 @@
using namespace llvm;
static cl::opt<bool>
-DisableLegalizeTypes("disable-legalize-types", cl::Hidden);
-static cl::opt<bool>
EnableFastISelVerbose("fast-isel-verbose", cl::Hidden,
cl::desc("Enable verbose messages in the \"fast\" "
"instruction selector"));
@@ -362,6 +360,39 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) {
return true;
}
+/// SetDebugLoc - Update MF's and SDB's DebugLocs if debug information is
+/// attached with this instruction.
+static void SetDebugLoc(unsigned MDDbgKind,
+ MetadataContext &TheMetadata,
+ Instruction *I,
+ SelectionDAGBuilder *SDB,
+ FastISel *FastIS,
+ MachineFunction *MF) {
+ if (!isa<DbgInfoIntrinsic>(I))
+ if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
+ DILocation DILoc(Dbg);
+ DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
+
+ SDB->setCurDebugLoc(Loc);
+
+ if (FastIS)
+ FastIS->setCurDebugLoc(Loc);
+
+ // If the function doesn't have a default debug location yet, set
+ // it. This is kind of a hack.
+ if (MF->getDefaultDebugLoc().isUnknown())
+ MF->setDefaultDebugLoc(Loc);
+ }
+}
+
+/// ResetDebugLoc - Set MF's and SDB's DebugLocs to Unknown.
+static void ResetDebugLoc(SelectionDAGBuilder *SDB,
+ FastISel *FastIS) {
+ SDB->setCurDebugLoc(DebugLoc::getUnknownLoc());
+ if (FastIS)
+ FastIS->setCurDebugLoc(DebugLoc::getUnknownLoc());
+}
+
void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
BasicBlock::iterator Begin,
BasicBlock::iterator End,
@@ -373,20 +404,16 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
// Lower all of the non-terminator instructions. If a call is emitted
// as a tail call, cease emitting nodes for this block.
for (BasicBlock::iterator I = Begin; I != End && !SDB->HasTailCall; ++I) {
- if (MDDbgKind) {
- // Update DebugLoc if debug information is attached with this
- // instruction.
- if (!isa<DbgInfoIntrinsic>(I))
- if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, I)) {
- DILocation DILoc(Dbg);
- DebugLoc Loc = ExtractDebugLocation(DILoc, MF->getDebugLocInfo());
- SDB->setCurDebugLoc(Loc);
- if (MF->getDefaultDebugLoc().isUnknown())
- MF->setDefaultDebugLoc(Loc);
- }
- }
- if (!isa<TerminatorInst>(I))
+ if (MDDbgKind)
+ SetDebugLoc(MDDbgKind, TheMetadata, I, SDB, 0, MF);
+
+ if (!isa<TerminatorInst>(I)) {
SDB->visit(*I);
+
+ // Set the current debug location back to "unknown" so that it doesn't
+ // spuriously apply to subsequent instructions.
+ ResetDebugLoc(SDB, 0);
+ }
}
if (!SDB->HasTailCall) {
@@ -401,7 +428,9 @@ void SelectionDAGISel::SelectBasicBlock(BasicBlock *LLVMBB,
HandlePHINodesInSuccessorBlocks(LLVMBB);
// Lower the terminator after the copies are emitted.
+ SetDebugLoc(MDDbgKind, TheMetadata, LLVMBB->getTerminator(), SDB, 0, MF);
SDB->visit(*LLVMBB->getTerminator());
+ ResetDebugLoc(SDB, 0);
}
}
@@ -498,75 +527,73 @@ void SelectionDAGISel::CodeGenAndEmitDAG() {
// Second step, hack on the DAG until it only uses operations and types that
// the target supports.
- if (!DisableLegalizeTypes) {
- if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
- BlockName);
+ if (ViewLegalizeTypesDAGs) CurDAG->viewGraph("legalize-types input for " +
+ BlockName);
+
+ bool Changed;
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Type Legalization", GroupName);
+ Changed = CurDAG->LegalizeTypes();
+ } else {
+ Changed = CurDAG->LegalizeTypes();
+ }
+
+ DEBUG(errs() << "Type-legalized selection DAG:\n");
+ DEBUG(CurDAG->dump());
- bool Changed;
+ if (Changed) {
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
+
+ // Run the DAG combiner in post-type-legalize mode.
if (TimePassesIsEnabled) {
- NamedRegionTimer T("Type Legalization", GroupName);
- Changed = CurDAG->LegalizeTypes();
+ NamedRegionTimer T("DAG Combining after legalize types", GroupName);
+ CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
} else {
- Changed = CurDAG->LegalizeTypes();
+ CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
}
- DEBUG(errs() << "Type-legalized selection DAG:\n");
+ DEBUG(errs() << "Optimized type-legalized selection DAG:\n");
DEBUG(CurDAG->dump());
+ }
- if (Changed) {
- if (ViewDAGCombineLT)
- CurDAG->viewGraph("dag-combine-lt input for " + BlockName);
-
- // Run the DAG combiner in post-type-legalize mode.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Combining after legalize types", GroupName);
- CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
- } else {
- CurDAG->Combine(NoIllegalTypes, *AA, OptLevel);
- }
-
- DEBUG(errs() << "Optimized type-legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
- }
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("Vector Legalization", GroupName);
+ Changed = CurDAG->LegalizeVectors();
+ } else {
+ Changed = CurDAG->LegalizeVectors();
+ }
+ if (Changed) {
if (TimePassesIsEnabled) {
- NamedRegionTimer T("Vector Legalization", GroupName);
- Changed = CurDAG->LegalizeVectors();
+ NamedRegionTimer T("Type Legalization 2", GroupName);
+ Changed = CurDAG->LegalizeTypes();
} else {
- Changed = CurDAG->LegalizeVectors();
+ Changed = CurDAG->LegalizeTypes();
}
- if (Changed) {
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("Type Legalization 2", GroupName);
- Changed = CurDAG->LegalizeTypes();
- } else {
- Changed = CurDAG->LegalizeTypes();
- }
-
- if (ViewDAGCombineLT)
- CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
+ if (ViewDAGCombineLT)
+ CurDAG->viewGraph("dag-combine-lv input for " + BlockName);
- // Run the DAG combiner in post-type-legalize mode.
- if (TimePassesIsEnabled) {
- NamedRegionTimer T("DAG Combining after legalize vectors", GroupName);
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
- } else {
- CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
- }
-
- DEBUG(errs() << "Optimized vector-legalized selection DAG:\n");
- DEBUG(CurDAG->dump());
+ // Run the DAG combiner in post-type-legalize mode.
+ if (TimePassesIsEnabled) {
+ NamedRegionTimer T("DAG Combining after legalize vectors", GroupName);
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
+ } else {
+ CurDAG->Combine(NoIllegalOperations, *AA, OptLevel);
}
+
+ DEBUG(errs() << "Optimized vector-legalized selection DAG:\n");
+ DEBUG(CurDAG->dump());
}
if (ViewLegalizeDAGs) CurDAG->viewGraph("legalize input for " + BlockName);
if (TimePassesIsEnabled) {
NamedRegionTimer T("DAG Legalization", GroupName);
- CurDAG->Legalize(DisableLegalizeTypes, OptLevel);
+ CurDAG->Legalize(OptLevel);
} else {
- CurDAG->Legalize(DisableLegalizeTypes, OptLevel);
+ CurDAG->Legalize(OptLevel);
}
DEBUG(errs() << "Legalized selection DAG:\n");
@@ -738,24 +765,11 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
FastIS->startNewBlock(BB);
// Do FastISel on as many instructions as possible.
for (; BI != End; ++BI) {
- if (MDDbgKind) {
- // Update DebugLoc if debug information is attached with this
- // instruction.
- if (!isa<DbgInfoIntrinsic>(BI))
- if (MDNode *Dbg = TheMetadata.getMD(MDDbgKind, BI)) {
- DILocation DILoc(Dbg);
- DebugLoc Loc = ExtractDebugLocation(DILoc,
- MF.getDebugLocInfo());
- FastIS->setCurDebugLoc(Loc);
- if (MF.getDefaultDebugLoc().isUnknown())
- MF.setDefaultDebugLoc(Loc);
- }
- }
-
// Just before the terminator instruction, insert instructions to
// feed PHI nodes in successor blocks.
if (isa<TerminatorInst>(BI))
if (!HandlePHINodesInSuccessorBlocksFast(LLVMBB, FastIS)) {
+ ResetDebugLoc(SDB, FastIS);
if (EnableFastISelVerbose || EnableFastISelAbort) {
errs() << "FastISel miss: ";
BI->dump();
@@ -765,13 +779,18 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
break;
}
+ if (MDDbgKind)
+ SetDebugLoc(MDDbgKind, TheMetadata, BI, SDB, FastIS, &MF);
+
// First try normal tablegen-generated "fast" selection.
- if (FastIS->SelectInstruction(BI))
+ if (FastIS->SelectInstruction(BI)) {
+ ResetDebugLoc(SDB, FastIS);
continue;
+ }
- // Next, try calling the target to attempt to handle the instruction.
- if (FastIS->TargetSelectInstruction(BI))
- continue;
+ // Clear out the debug location so that it doesn't carry over to
+ // unrelated instructions.
+ ResetDebugLoc(SDB, FastIS);
// Then handle certain instructions as single-LLVM-Instruction blocks.
if (isa<CallInst>(BI)) {
@@ -786,10 +805,8 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
R = FuncInfo->CreateRegForValue(BI);
}
- SDB->setCurDebugLoc(FastIS->getCurDebugLoc());
-
bool HadTailCall = false;
- SelectBasicBlock(LLVMBB, BI, next(BI), HadTailCall);
+ SelectBasicBlock(LLVMBB, BI, llvm::next(BI), HadTailCall);
// If the call was emitted as a tail call, we're done with the block.
if (HadTailCall) {
@@ -823,9 +840,6 @@ void SelectionDAGISel::SelectAllBasicBlocks(Function &Fn,
// not handled by FastISel. If FastISel is not run, this is the entire
// block.
if (BI != End) {
- // If FastISel is run and it has known DebugLoc then use it.
- if (FastIS && !FastIS->getCurDebugLoc().isUnknown())
- SDB->setCurDebugLoc(FastIS->getCurDebugLoc());
bool HadTailCall;
SelectBasicBlock(LLVMBB, BI, End, HadTailCall);
}
@@ -1313,14 +1327,6 @@ SDNode *SelectionDAGISel::Select_UNDEF(const SDValue &N) {
N.getValueType());
}
-SDNode *SelectionDAGISel::Select_DBG_LABEL(const SDValue &N) {
- SDValue Chain = N.getOperand(0);
- unsigned C = cast<LabelSDNode>(N)->getLabelID();
- SDValue Tmp = CurDAG->getTargetConstant(C, MVT::i32);
- return CurDAG->SelectNodeTo(N.getNode(), TargetInstrInfo::DBG_LABEL,
- MVT::Other, Tmp, Chain);
-}
-
SDNode *SelectionDAGISel::Select_EH_LABEL(const SDValue &N) {
SDValue Chain = N.getOperand(0);
unsigned C = cast<LabelSDNode>(N)->getLabelID();
diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
index c5adc50..83fa5a8 100644
--- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
+++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp
@@ -29,14 +29,14 @@
#include "llvm/ADT/DenseSet.h"
#include "llvm/ADT/StringExtras.h"
#include "llvm/Config/config.h"
-#include <fstream>
using namespace llvm;
namespace llvm {
template<>
struct DOTGraphTraits<SelectionDAG*> : public DefaultDOTGraphTraits {
- DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {}
+ explicit DOTGraphTraits(bool isSimple=false) :
+ DefaultDOTGraphTraits(isSimple) {}
static bool hasEdgeDestLabels() {
return true;
@@ -50,6 +50,11 @@ namespace llvm {
return ((const SDNode *) Node)->getValueType(i).getEVTString();
}
+ template<typename EdgeIter>
+ static std::string getEdgeSourceLabel(const void *Node, EdgeIter I) {
+ return itostr(I - SDNodeIterator::begin((SDNode *) Node));
+ }
+
/// edgeTargetsEdgeSource - This method returns true if this outgoing edge
/// should actually target another edge source, not a node. If this method
/// is implemented, getEdgeTarget should be implemented.
@@ -76,12 +81,12 @@ namespace llvm {
static bool renderGraphFromBottomUp() {
return true;
}
-
+
static bool hasNodeAddressLabel(const SDNode *Node,
const SelectionDAG *Graph) {
return true;
}
-
+
/// If you want to override the dot attributes printed for a particular
/// edge, override this method.
template<typename EdgeIter>
@@ -94,7 +99,7 @@ namespace llvm {
return "color=blue,style=dashed";
return "";
}
-
+
static std::string getSimpleNodeLabel(const SDNode *Node,
const SelectionDAG *G) {
@@ -132,7 +137,7 @@ namespace llvm {
std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
const SelectionDAG *G) {
- return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel (Node, G);
+ return DOTGraphTraits<SelectionDAG*>::getSimpleNodeLabel(Node, G);
}
@@ -142,7 +147,7 @@ std::string DOTGraphTraits<SelectionDAG*>::getNodeLabel(const SDNode *Node,
void SelectionDAG::viewGraph(const std::string &Title) {
// This code is only for debugging!
#ifndef NDEBUG
- ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
+ ViewGraph(this, "dag." + getMachineFunction().getFunction()->getNameStr(),
false, Title);
#else
errs() << "SelectionDAG::viewGraph is only available in debug builds on "
@@ -186,7 +191,7 @@ const std::string SelectionDAG::getGraphAttrs(const SDNode *N) const {
#ifndef NDEBUG
std::map<const SDNode *, std::string>::const_iterator I =
NodeGraphAttrs.find(N);
-
+
if (I != NodeGraphAttrs.end())
return I->second;
else
@@ -252,8 +257,7 @@ void SelectionDAG::setSubgraphColor(SDNode *N, const char *Color) {
// Visually mark that we hit the limit
if (strcmp(Color, "red") == 0) {
setSubgraphColorHelper(N, "blue", visited, 0, printed);
- }
- else if (strcmp(Color, "yellow") == 0) {
+ } else if (strcmp(Color, "yellow") == 0) {
setSubgraphColorHelper(N, "green", visited, 0, printed);
}
}
diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index 68bc2d6..1026169 100644
--- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -911,7 +911,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
TargetLoweringOpt &TLO,
unsigned Depth) const {
unsigned BitWidth = DemandedMask.getBitWidth();
- assert(Op.getValueSizeInBits() == BitWidth &&
+ assert(Op.getValueType().getScalarType().getSizeInBits() == BitWidth &&
"Mask size mismatches value type size!");
APInt NewMask = DemandedMask;
DebugLoc dl = Op.getDebugLoc();
@@ -1240,7 +1240,7 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op,
// demand the input sign bit.
APInt HighBits = APInt::getHighBitsSet(BitWidth, ShAmt);
if (HighBits.intersects(NewMask))
- InDemandedMask |= APInt::getSignBit(VT.getSizeInBits());
+ InDemandedMask |= APInt::getSignBit(VT.getScalarType().getSizeInBits());
if (SimplifyDemandedBits(Op.getOperand(0), InDemandedMask,
KnownZero, KnownOne, TLO, Depth+1))
@@ -2184,48 +2184,6 @@ bool TargetLowering::isGAPlusOffset(SDNode *N, GlobalValue* &GA,
}
-/// isConsecutiveLoad - Return true if LD is loading 'Bytes' bytes from a
-/// location that is 'Dist' units away from the location that the 'Base' load
-/// is loading from.
-bool TargetLowering::isConsecutiveLoad(LoadSDNode *LD, LoadSDNode *Base,
- unsigned Bytes, int Dist,
- const MachineFrameInfo *MFI) const {
- if (LD->getChain() != Base->getChain())
- return false;
- EVT VT = LD->getValueType(0);
- if (VT.getSizeInBits() / 8 != Bytes)
- return false;
-
- SDValue Loc = LD->getOperand(1);
- SDValue BaseLoc = Base->getOperand(1);
- if (Loc.getOpcode() == ISD::FrameIndex) {
- if (BaseLoc.getOpcode() != ISD::FrameIndex)
- return false;
- int FI = cast<FrameIndexSDNode>(Loc)->getIndex();
- int BFI = cast<FrameIndexSDNode>(BaseLoc)->getIndex();
- int FS = MFI->getObjectSize(FI);
- int BFS = MFI->getObjectSize(BFI);
- if (FS != BFS || FS != (int)Bytes) return false;
- return MFI->getObjectOffset(FI) == (MFI->getObjectOffset(BFI) + Dist*Bytes);
- }
- if (Loc.getOpcode() == ISD::ADD && Loc.getOperand(0) == BaseLoc) {
- ConstantSDNode *V = dyn_cast<ConstantSDNode>(Loc.getOperand(1));
- if (V && (V->getSExtValue() == Dist*Bytes))
- return true;
- }
-
- GlobalValue *GV1 = NULL;
- GlobalValue *GV2 = NULL;
- int64_t Offset1 = 0;
- int64_t Offset2 = 0;
- bool isGA1 = isGAPlusOffset(Loc.getNode(), GV1, Offset1);
- bool isGA2 = isGAPlusOffset(BaseLoc.getNode(), GV2, Offset2);
- if (isGA1 && isGA2 && GV1 == GV2)
- return Offset1 == (Offset2 + Dist*Bytes);
- return false;
-}
-
-
SDValue TargetLowering::
PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const {
// Default implementation: no optimization.
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.cpp b/lib/CodeGen/SimpleRegisterCoalescing.cpp
index 5876371..ed407eb 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.cpp
+++ b/lib/CodeGen/SimpleRegisterCoalescing.cpp
@@ -130,7 +130,8 @@ bool SimpleRegisterCoalescing::AdjustCopiesBackFrom(LiveInterval &IntA,
// See PR3149:
// 172 %ECX<def> = MOV32rr %reg1039<kill>
// 180 INLINEASM <es:subl $5,$1
- // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9, %EAX<kill>,
+ // sbbl $3,$0>, 10, %EAX<def>, 14, %ECX<earlyclobber,def>, 9,
+ // %EAX<kill>,
// 36, <fi#0>, 1, %reg0, 0, 9, %ECX<kill>, 36, <fi#1>, 1, %reg0, 0
// 188 %EAX<def> = MOV32rr %EAX<kill>
// 196 %ECX<def> = MOV32rr %ECX<kill>
@@ -281,12 +282,12 @@ TransferImplicitOps(MachineInstr *MI, MachineInstr *NewMI) {
}
}
-/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with IntA
-/// being the source and IntB being the dest, thus this defines a value number
-/// in IntB. If the source value number (in IntA) is defined by a commutable
-/// instruction and its other operand is coalesced to the copy dest register,
-/// see if we can transform the copy into a noop by commuting the definition. For
-/// example,
+/// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with
+/// IntA being the source and IntB being the dest, thus this defines a value
+/// number in IntB. If the source value number (in IntA) is defined by a
+/// commutable instruction and its other operand is coalesced to the copy dest
+/// register, see if we can transform the copy into a noop by commuting the
+/// definition. For example,
///
/// A3 = op A2 B0<kill>
/// ...
@@ -508,7 +509,8 @@ bool SimpleRegisterCoalescing::RemoveCopyByCommutingDef(LiveInterval &IntA,
if (BHasSubRegs) {
for (const unsigned *SR = tri_->getSubRegisters(IntB.reg); *SR; ++SR) {
LiveInterval &SRLI = li_->getInterval(*SR);
- SRLI.MergeInClobberRange(*li_, AI->start, End, li_->getVNInfoAllocator());
+ SRLI.MergeInClobberRange(*li_, AI->start, End,
+ li_->getVNInfoAllocator());
}
}
}
@@ -708,7 +710,8 @@ bool SimpleRegisterCoalescing::ReMaterializeTrivialDef(LiveInterval &SrcInt,
checkForDeadDef = true;
}
- MachineBasicBlock::iterator MII = next(MachineBasicBlock::iterator(CopyMI));
+ MachineBasicBlock::iterator MII =
+ llvm::next(MachineBasicBlock::iterator(CopyMI));
tii_->reMaterialize(*MBB, MII, DstReg, DstSubIdx, DefMI, tri_);
MachineInstr *NewMI = prior(MII);
@@ -1314,7 +1317,13 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
"coalesced to another register.\n");
return false; // Not coalescable.
}
- } else if (!tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)){
+ } else if (tii_->isMoveInstr(*CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx)) {
+ if (SrcSubIdx && DstSubIdx && SrcSubIdx != DstSubIdx) {
+ // e.g. %reg16404:1<def> = MOV8rr %reg16412:2<kill>
+ Again = true;
+ return false; // Not coalescable.
+ }
+ } else {
llvm_unreachable("Unrecognized copy instruction!");
}
@@ -1611,9 +1620,9 @@ bool SimpleRegisterCoalescing::JoinCopy(CopyRec &TheCopy, bool &Again) {
}
}
} else {
- // If the virtual register live interval is long but it has low use desity,
- // do not join them, instead mark the physical register as its allocation
- // preference.
+ // If the virtual register live interval is long but it has low use
+ // density, do not join them, instead mark the physical register as its
+ // allocation preference.
LiveInterval &JoinVInt = SrcIsPhys ? DstInt : SrcInt;
unsigned JoinVReg = SrcIsPhys ? DstReg : SrcReg;
unsigned JoinPReg = SrcIsPhys ? SrcReg : DstReg;
@@ -1938,6 +1947,10 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
if (Overlaps) {
// If we haven't already recorded that this value # is safe, check it.
if (!InVector(LHSIt->valno, EliminatedLHSVals)) {
+ // If it's re-defined by an early clobber somewhere in the live range,
+ // then conservatively abort coalescing.
+ if (LHSIt->valno->hasRedefByEC())
+ return false;
// Copy from the RHS?
if (!RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg))
return false; // Nope, bail out.
@@ -1977,6 +1990,10 @@ bool SimpleRegisterCoalescing::SimpleJoin(LiveInterval &LHS, LiveInterval &RHS){
// if coalescing succeeds. Just skip the liverange.
if (++LHSIt == LHSEnd) break;
} else {
+ // If it's re-defined by an early clobber somewhere in the live range,
+ // then conservatively abort coalescing.
+ if (LHSIt->valno->hasRedefByEC())
+ return false;
// Otherwise, if this is a copy from the RHS, mark it as being merged
// in.
if (RangeIsDefinedByCopyFromReg(LHS, LHSIt, RHS.reg)) {
@@ -2316,6 +2333,10 @@ SimpleRegisterCoalescing::JoinIntervals(LiveInterval &LHS, LiveInterval &RHS,
if (LHSValNoAssignments[I->valno->id] !=
RHSValNoAssignments[J->valno->id])
return false;
+ // If it's re-defined by an early clobber somewhere in the live range,
+ // then conservatively abort coalescing.
+ if (NewVNInfo[LHSValNoAssignments[I->valno->id]]->hasRedefByEC())
+ return false;
}
if (I->end < J->end) {
@@ -2401,9 +2422,15 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
// If this isn't a copy nor a extract_subreg, we can't join intervals.
unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
+ bool isInsUndef = false;
if (Inst->getOpcode() == TargetInstrInfo::EXTRACT_SUBREG) {
DstReg = Inst->getOperand(0).getReg();
SrcReg = Inst->getOperand(1).getReg();
+ } else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG) {
+ DstReg = Inst->getOperand(0).getReg();
+ SrcReg = Inst->getOperand(2).getReg();
+ if (Inst->getOperand(1).isUndef())
+ isInsUndef = true;
} else if (Inst->getOpcode() == TargetInstrInfo::INSERT_SUBREG ||
Inst->getOpcode() == TargetInstrInfo::SUBREG_TO_REG) {
DstReg = Inst->getOperand(0).getReg();
@@ -2413,7 +2440,8 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg);
bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
- if (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty())
+ if (isInsUndef ||
+ (li_->hasInterval(SrcReg) && li_->getInterval(SrcReg).empty()))
ImpDefCopies.push_back(CopyRec(Inst, 0));
else if (SrcIsPhys || DstIsPhys)
PhysCopies.push_back(CopyRec(Inst, 0));
@@ -2421,9 +2449,9 @@ void SimpleRegisterCoalescing::CopyCoalesceInMBB(MachineBasicBlock *MBB,
VirtCopies.push_back(CopyRec(Inst, 0));
}
- // Try coalescing implicit copies first, followed by copies to / from
- // physical registers, then finally copies from virtual registers to
- // virtual registers.
+ // Try coalescing implicit copies and insert_subreg <undef> first,
+ // followed by copies to / from physical registers, then finally copies
+ // from virtual registers to virtual registers.
for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) {
CopyRec &TheCopy = ImpDefCopies[i];
bool Again = false;
@@ -2594,114 +2622,6 @@ void SimpleRegisterCoalescing::releaseMemory() {
ReMatDefs.clear();
}
-/// Returns true if the given live interval is zero length.
-static bool isZeroLengthInterval(LiveInterval *li, LiveIntervals *li_) {
- for (LiveInterval::Ranges::const_iterator
- i = li->ranges.begin(), e = li->ranges.end(); i != e; ++i)
- if (i->end.getPrevIndex() > i->start)
- return false;
- return true;
-}
-
-
-void SimpleRegisterCoalescing::CalculateSpillWeights() {
- SmallSet<unsigned, 4> Processed;
- for (MachineFunction::iterator mbbi = mf_->begin(), mbbe = mf_->end();
- mbbi != mbbe; ++mbbi) {
- MachineBasicBlock* MBB = mbbi;
- SlotIndex MBBEnd = li_->getMBBEndIdx(MBB);
- MachineLoop* loop = loopInfo->getLoopFor(MBB);
- unsigned loopDepth = loop ? loop->getLoopDepth() : 0;
- bool isExiting = loop ? loop->isLoopExiting(MBB) : false;
-
- for (MachineBasicBlock::const_iterator mii = MBB->begin(), mie = MBB->end();
- mii != mie; ++mii) {
- const MachineInstr *MI = mii;
- if (tii_->isIdentityCopy(*MI))
- continue;
-
- if (MI->getOpcode() == TargetInstrInfo::IMPLICIT_DEF)
- continue;
-
- for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
- const MachineOperand &mopi = MI->getOperand(i);
- if (!mopi.isReg() || mopi.getReg() == 0)
- continue;
- unsigned Reg = mopi.getReg();
- if (!TargetRegisterInfo::isVirtualRegister(mopi.getReg()))
- continue;
- // Multiple uses of reg by the same instruction. It should not
- // contribute to spill weight again.
- if (!Processed.insert(Reg))
- continue;
-
- bool HasDef = mopi.isDef();
- bool HasUse = !HasDef;
- for (unsigned j = i+1; j != e; ++j) {
- const MachineOperand &mopj = MI->getOperand(j);
- if (!mopj.isReg() || mopj.getReg() != Reg)
- continue;
- HasDef |= mopj.isDef();
- HasUse |= mopj.isUse();
- if (HasDef && HasUse)
- break;
- }
-
- LiveInterval &RegInt = li_->getInterval(Reg);
- float Weight = li_->getSpillWeight(HasDef, HasUse, loopDepth);
- if (HasDef && isExiting) {
- // Looks like this is a loop count variable update.
- SlotIndex DefIdx = li_->getInstructionIndex(MI).getDefIndex();
- const LiveRange *DLR =
- li_->getInterval(Reg).getLiveRangeContaining(DefIdx);
- if (DLR->end > MBBEnd)
- Weight *= 3.0F;
- }
- RegInt.weight += Weight;
- }
- Processed.clear();
- }
- }
-
- for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I) {
- LiveInterval &LI = *I->second;
- if (TargetRegisterInfo::isVirtualRegister(LI.reg)) {
- // If the live interval length is essentially zero, i.e. in every live
- // range the use follows def immediately, it doesn't make sense to spill
- // it and hope it will be easier to allocate for this li.
- if (isZeroLengthInterval(&LI, li_)) {
- LI.weight = HUGE_VALF;
- continue;
- }
-
- bool isLoad = false;
- SmallVector<LiveInterval*, 4> SpillIs;
- if (li_->isReMaterializable(LI, SpillIs, isLoad)) {
- // If all of the definitions of the interval are re-materializable,
- // it is a preferred candidate for spilling. If non of the defs are
- // loads, then it's potentially very cheap to re-materialize.
- // FIXME: this gets much more complicated once we support non-trivial
- // re-materialization.
- if (isLoad)
- LI.weight *= 0.9F;
- else
- LI.weight *= 0.5F;
- }
-
- // Slightly prefer live interval that has been assigned a preferred reg.
- std::pair<unsigned, unsigned> Hint = mri_->getRegAllocationHint(LI.reg);
- if (Hint.first || Hint.second)
- LI.weight *= 1.01F;
-
- // Divide the weight of the interval by its size. This encourages
- // spilling of intervals that are large and have few uses, and
- // discourages spilling of small intervals with many uses.
- LI.weight /= li_->getApproximateInstructionCount(LI) * InstrSlots::NUM;
- }
- }
-}
-
-
bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
mf_ = &fn;
mri_ = &fn.getRegInfo();
@@ -2727,7 +2647,8 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
joinIntervals();
DEBUG({
errs() << "********** INTERVALS POST JOINING **********\n";
- for (LiveIntervals::iterator I = li_->begin(), E = li_->end(); I != E; ++I){
+ for (LiveIntervals::iterator I = li_->begin(), E = li_->end();
+ I != E; ++I){
I->second->print(errs(), tri_);
errs() << "\n";
}
@@ -2768,7 +2689,7 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
DoDelete = true;
}
if (!DoDelete)
- mii = next(mii);
+ mii = llvm::next(mii);
else {
li_->RemoveMachineInstrFromMaps(MI);
mii = mbbi->erase(mii);
@@ -2831,8 +2752,6 @@ bool SimpleRegisterCoalescing::runOnMachineFunction(MachineFunction &fn) {
}
}
- CalculateSpillWeights();
-
DEBUG(dump());
return true;
}
diff --git a/lib/CodeGen/SimpleRegisterCoalescing.h b/lib/CodeGen/SimpleRegisterCoalescing.h
index 78f8a9a..605a740 100644
--- a/lib/CodeGen/SimpleRegisterCoalescing.h
+++ b/lib/CodeGen/SimpleRegisterCoalescing.h
@@ -244,10 +244,6 @@ namespace llvm {
MachineOperand *lastRegisterUse(SlotIndex Start, SlotIndex End,
unsigned Reg, SlotIndex &LastUseIdx) const;
- /// CalculateSpillWeights - Compute spill weights for all virtual register
- /// live intervals.
- void CalculateSpillWeights();
-
void printRegName(unsigned reg) const;
};
diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp
index 237d0b5..bc246c1 100644
--- a/lib/CodeGen/Spiller.cpp
+++ b/lib/CodeGen/Spiller.cpp
@@ -20,22 +20,25 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/raw_ostream.h"
+#include <set>
using namespace llvm;
namespace {
- enum SpillerName { trivial, standard };
+ enum SpillerName { trivial, standard, splitting };
}
static cl::opt<SpillerName>
spillerOpt("spiller",
cl::desc("Spiller to use: (default: standard)"),
cl::Prefix,
- cl::values(clEnumVal(trivial, "trivial spiller"),
- clEnumVal(standard, "default spiller"),
+ cl::values(clEnumVal(trivial, "trivial spiller"),
+ clEnumVal(standard, "default spiller"),
+ clEnumVal(splitting, "splitting spiller"),
clEnumValEnd),
cl::init(standard));
+// Spiller virtual destructor implementation.
Spiller::~Spiller() {}
namespace {
@@ -140,9 +143,9 @@ protected:
// Insert store if necessary.
if (hasDef) {
- tii->storeRegToStackSlot(*mi->getParent(), next(miItr), newVReg, true,
+ tii->storeRegToStackSlot(*mi->getParent(), llvm::next(miItr), newVReg, true,
ss, trc);
- MachineInstr *storeInstr(next(miItr));
+ MachineInstr *storeInstr(llvm::next(miItr));
SlotIndex storeIndex =
lis->InsertMachineInstrInMaps(storeInstr).getDefIndex();
SlotIndex beginIndex = storeIndex.getPrevIndex();
@@ -170,7 +173,8 @@ public:
: SpillerBase(mf, lis, vrm) {}
std::vector<LiveInterval*> spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &spillIs) {
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex*) {
// Ignore spillIs - we don't use it.
return trivialSpillEverywhere(li);
}
@@ -179,23 +183,336 @@ public:
/// Falls back on LiveIntervals::addIntervalsForSpills.
class StandardSpiller : public Spiller {
-private:
+protected:
LiveIntervals *lis;
const MachineLoopInfo *loopInfo;
VirtRegMap *vrm;
public:
- StandardSpiller(MachineFunction *mf, LiveIntervals *lis,
- const MachineLoopInfo *loopInfo, VirtRegMap *vrm)
+ StandardSpiller(LiveIntervals *lis, const MachineLoopInfo *loopInfo,
+ VirtRegMap *vrm)
: lis(lis), loopInfo(loopInfo), vrm(vrm) {}
/// Falls back on LiveIntervals::addIntervalsForSpills.
std::vector<LiveInterval*> spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &spillIs) {
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex*) {
return lis->addIntervalsForSpills(*li, spillIs, loopInfo, *vrm);
}
};
+/// When a call to spill is placed this spiller will first try to break the
+/// interval up into its component values (one new interval per value).
+/// If this fails, or if a call is placed to spill a previously split interval
+/// then the spiller falls back on the standard spilling mechanism.
+class SplittingSpiller : public StandardSpiller {
+public:
+ SplittingSpiller(MachineFunction *mf, LiveIntervals *lis,
+ const MachineLoopInfo *loopInfo, VirtRegMap *vrm)
+ : StandardSpiller(lis, loopInfo, vrm) {
+
+ mri = &mf->getRegInfo();
+ tii = mf->getTarget().getInstrInfo();
+ tri = mf->getTarget().getRegisterInfo();
+ }
+
+ std::vector<LiveInterval*> spill(LiveInterval *li,
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestStart) {
+
+ if (worthTryingToSplit(li)) {
+ return tryVNISplit(li, earliestStart);
+ }
+ // else
+ return StandardSpiller::spill(li, spillIs, earliestStart);
+ }
+
+private:
+
+ MachineRegisterInfo *mri;
+ const TargetInstrInfo *tii;
+ const TargetRegisterInfo *tri;
+ DenseSet<LiveInterval*> alreadySplit;
+
+ bool worthTryingToSplit(LiveInterval *li) const {
+ return (!alreadySplit.count(li) && li->getNumValNums() > 1);
+ }
+
+ /// Try to break a LiveInterval into its component values.
+ std::vector<LiveInterval*> tryVNISplit(LiveInterval *li,
+ SlotIndex *earliestStart) {
+
+ DEBUG(errs() << "Trying VNI split of %reg" << *li << "\n");
+
+ std::vector<LiveInterval*> added;
+ SmallVector<VNInfo*, 4> vnis;
+
+ std::copy(li->vni_begin(), li->vni_end(), std::back_inserter(vnis));
+
+ for (SmallVectorImpl<VNInfo*>::iterator vniItr = vnis.begin(),
+ vniEnd = vnis.end(); vniItr != vniEnd; ++vniItr) {
+ VNInfo *vni = *vniItr;
+
+ // Skip unused VNIs, or VNIs with no kills.
+ if (vni->isUnused() || vni->kills.empty())
+ continue;
+
+ DEBUG(errs() << " Extracted Val #" << vni->id << " as ");
+ LiveInterval *splitInterval = extractVNI(li, vni);
+
+ if (splitInterval != 0) {
+ DEBUG(errs() << *splitInterval << "\n");
+ added.push_back(splitInterval);
+ alreadySplit.insert(splitInterval);
+ if (earliestStart != 0) {
+ if (splitInterval->beginIndex() < *earliestStart)
+ *earliestStart = splitInterval->beginIndex();
+ }
+ } else {
+ DEBUG(errs() << "0\n");
+ }
+ }
+
+ DEBUG(errs() << "Original LI: " << *li << "\n");
+
+ // If there original interval still contains some live ranges
+ // add it to added and alreadySplit.
+ if (!li->empty()) {
+ added.push_back(li);
+ alreadySplit.insert(li);
+ if (earliestStart != 0) {
+ if (li->beginIndex() < *earliestStart)
+ *earliestStart = li->beginIndex();
+ }
+ }
+
+ return added;
+ }
+
+ /// Extract the given value number from the interval.
+ LiveInterval* extractVNI(LiveInterval *li, VNInfo *vni) const {
+ assert(vni->isDefAccurate() || vni->isPHIDef());
+ assert(!vni->kills.empty());
+
+ // Create a new vreg and live interval, copy VNI kills & ranges over.
+ const TargetRegisterClass *trc = mri->getRegClass(li->reg);
+ unsigned newVReg = mri->createVirtualRegister(trc);
+ vrm->grow();
+ LiveInterval *newLI = &lis->getOrCreateInterval(newVReg);
+ VNInfo *newVNI = newLI->createValueCopy(vni, lis->getVNInfoAllocator());
+
+ // Start by copying all live ranges in the VN to the new interval.
+ for (LiveInterval::iterator rItr = li->begin(), rEnd = li->end();
+ rItr != rEnd; ++rItr) {
+ if (rItr->valno == vni) {
+ newLI->addRange(LiveRange(rItr->start, rItr->end, newVNI));
+ }
+ }
+
+ // Erase the old VNI & ranges.
+ li->removeValNo(vni);
+
+ // Collect all current uses of the register belonging to the given VNI.
+ // We'll use this to rename the register after we've dealt with the def.
+ std::set<MachineInstr*> uses;
+ for (MachineRegisterInfo::use_iterator
+ useItr = mri->use_begin(li->reg), useEnd = mri->use_end();
+ useItr != useEnd; ++useItr) {
+ uses.insert(&*useItr);
+ }
+
+ // Process the def instruction for this VNI.
+ if (newVNI->isPHIDef()) {
+ // Insert a copy at the start of the MBB. The range proceeding the
+ // copy will be attached to the original LiveInterval.
+ MachineBasicBlock *defMBB = lis->getMBBFromIndex(newVNI->def);
+ tii->copyRegToReg(*defMBB, defMBB->begin(), newVReg, li->reg, trc, trc);
+ MachineInstr *copyMI = defMBB->begin();
+ copyMI->addRegisterKilled(li->reg, tri);
+ SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+ VNInfo *phiDefVNI = li->getNextValue(lis->getMBBStartIdx(defMBB),
+ 0, false, lis->getVNInfoAllocator());
+ phiDefVNI->setIsPHIDef(true);
+ phiDefVNI->addKill(copyIdx.getDefIndex());
+ li->addRange(LiveRange(phiDefVNI->def, copyIdx.getDefIndex(), phiDefVNI));
+ LiveRange *oldPHIDefRange =
+ newLI->getLiveRangeContaining(lis->getMBBStartIdx(defMBB));
+
+ // If the old phi def starts in the middle of the range chop it up.
+ if (oldPHIDefRange->start < lis->getMBBStartIdx(defMBB)) {
+ LiveRange oldPHIDefRange2(copyIdx.getDefIndex(), oldPHIDefRange->end,
+ oldPHIDefRange->valno);
+ oldPHIDefRange->end = lis->getMBBStartIdx(defMBB);
+ newLI->addRange(oldPHIDefRange2);
+ } else if (oldPHIDefRange->start == lis->getMBBStartIdx(defMBB)) {
+ // Otherwise if it's at the start of the range just trim it.
+ oldPHIDefRange->start = copyIdx.getDefIndex();
+ } else {
+ assert(false && "PHI def range doesn't cover PHI def?");
+ }
+
+ newVNI->def = copyIdx.getDefIndex();
+ newVNI->setCopy(copyMI);
+ newVNI->setIsPHIDef(false); // not a PHI def anymore.
+ newVNI->setIsDefAccurate(true);
+ } else {
+ // non-PHI def. Rename the def. If it's two-addr that means renaming the use
+ // and inserting a new copy too.
+ MachineInstr *defInst = lis->getInstructionFromIndex(newVNI->def);
+ // We'll rename this now, so we can remove it from uses.
+ uses.erase(defInst);
+ unsigned defOpIdx = defInst->findRegisterDefOperandIdx(li->reg);
+ bool isTwoAddr = defInst->isRegTiedToUseOperand(defOpIdx),
+ twoAddrUseIsUndef = false;
+
+ for (unsigned i = 0; i < defInst->getNumOperands(); ++i) {
+ MachineOperand &mo = defInst->getOperand(i);
+ if (mo.isReg() && (mo.isDef() || isTwoAddr) && (mo.getReg()==li->reg)) {
+ mo.setReg(newVReg);
+ if (isTwoAddr && mo.isUse() && mo.isUndef())
+ twoAddrUseIsUndef = true;
+ }
+ }
+
+ SlotIndex defIdx = lis->getInstructionIndex(defInst);
+ newVNI->def = defIdx.getDefIndex();
+
+ if (isTwoAddr && !twoAddrUseIsUndef) {
+ MachineBasicBlock *defMBB = defInst->getParent();
+ tii->copyRegToReg(*defMBB, defInst, newVReg, li->reg, trc, trc);
+ MachineInstr *copyMI = prior(MachineBasicBlock::iterator(defInst));
+ SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+ copyMI->addRegisterKilled(li->reg, tri);
+ LiveRange *origUseRange =
+ li->getLiveRangeContaining(newVNI->def.getUseIndex());
+ VNInfo *origUseVNI = origUseRange->valno;
+ origUseRange->end = copyIdx.getDefIndex();
+ bool updatedKills = false;
+ for (unsigned k = 0; k < origUseVNI->kills.size(); ++k) {
+ if (origUseVNI->kills[k] == defIdx.getDefIndex()) {
+ origUseVNI->kills[k] = copyIdx.getDefIndex();
+ updatedKills = true;
+ break;
+ }
+ }
+ assert(updatedKills && "Failed to update VNI kill list.");
+ VNInfo *copyVNI = newLI->getNextValue(copyIdx.getDefIndex(), copyMI,
+ true, lis->getVNInfoAllocator());
+ copyVNI->addKill(defIdx.getDefIndex());
+ LiveRange copyRange(copyIdx.getDefIndex(),defIdx.getDefIndex(),copyVNI);
+ newLI->addRange(copyRange);
+ }
+ }
+
+ for (std::set<MachineInstr*>::iterator
+ usesItr = uses.begin(), usesEnd = uses.end();
+ usesItr != usesEnd; ++usesItr) {
+ MachineInstr *useInst = *usesItr;
+ SlotIndex useIdx = lis->getInstructionIndex(useInst);
+ LiveRange *useRange =
+ newLI->getLiveRangeContaining(useIdx.getUseIndex());
+
+ // If this use doesn't belong to the new interval skip it.
+ if (useRange == 0)
+ continue;
+
+ // This use doesn't belong to the VNI, skip it.
+ if (useRange->valno != newVNI)
+ continue;
+
+ // Check if this instr is two address.
+ unsigned useOpIdx = useInst->findRegisterUseOperandIdx(li->reg);
+ bool isTwoAddress = useInst->isRegTiedToDefOperand(useOpIdx);
+
+ // Rename uses (and defs for two-address instrs).
+ for (unsigned i = 0; i < useInst->getNumOperands(); ++i) {
+ MachineOperand &mo = useInst->getOperand(i);
+ if (mo.isReg() && (mo.isUse() || isTwoAddress) &&
+ (mo.getReg() == li->reg)) {
+ mo.setReg(newVReg);
+ }
+ }
+
+ // If this is a two address instruction we've got some extra work to do.
+ if (isTwoAddress) {
+ // We modified the def operand, so we need to copy back to the original
+ // reg.
+ MachineBasicBlock *useMBB = useInst->getParent();
+ MachineBasicBlock::iterator useItr(useInst);
+ tii->copyRegToReg(*useMBB, next(useItr), li->reg, newVReg, trc, trc);
+ MachineInstr *copyMI = next(useItr);
+ copyMI->addRegisterKilled(newVReg, tri);
+ SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+
+ // Change the old two-address defined range & vni to start at
+ // (and be defined by) the copy.
+ LiveRange *origDefRange =
+ li->getLiveRangeContaining(useIdx.getDefIndex());
+ origDefRange->start = copyIdx.getDefIndex();
+ origDefRange->valno->def = copyIdx.getDefIndex();
+ origDefRange->valno->setCopy(copyMI);
+
+ // Insert a new range & vni for the two-address-to-copy value. This
+ // will be attached to the new live interval.
+ VNInfo *copyVNI =
+ newLI->getNextValue(useIdx.getDefIndex(), 0, true,
+ lis->getVNInfoAllocator());
+ copyVNI->addKill(copyIdx.getDefIndex());
+ LiveRange copyRange(useIdx.getDefIndex(),copyIdx.getDefIndex(),copyVNI);
+ newLI->addRange(copyRange);
+ }
+ }
+
+ // Iterate over any PHI kills - we'll need to insert new copies for them.
+ for (VNInfo::KillSet::iterator
+ killItr = newVNI->kills.begin(), killEnd = newVNI->kills.end();
+ killItr != killEnd; ++killItr) {
+ SlotIndex killIdx(*killItr);
+ if (killItr->isPHI()) {
+ MachineBasicBlock *killMBB = lis->getMBBFromIndex(killIdx);
+ LiveRange *oldKillRange =
+ newLI->getLiveRangeContaining(killIdx);
+
+ assert(oldKillRange != 0 && "No kill range?");
+
+ tii->copyRegToReg(*killMBB, killMBB->getFirstTerminator(),
+ li->reg, newVReg, trc, trc);
+ MachineInstr *copyMI = prior(killMBB->getFirstTerminator());
+ copyMI->addRegisterKilled(newVReg, tri);
+ SlotIndex copyIdx = lis->InsertMachineInstrInMaps(copyMI);
+
+ // Save the current end. We may need it to add a new range if the
+ // current range runs of the end of the MBB.
+ SlotIndex newKillRangeEnd = oldKillRange->end;
+ oldKillRange->end = copyIdx.getDefIndex();
+
+ if (newKillRangeEnd != lis->getMBBEndIdx(killMBB).getNextSlot()) {
+ assert(newKillRangeEnd > lis->getMBBEndIdx(killMBB).getNextSlot() &&
+ "PHI kill range doesn't reach kill-block end. Not sane.");
+ newLI->addRange(LiveRange(lis->getMBBEndIdx(killMBB).getNextSlot(),
+ newKillRangeEnd, newVNI));
+ }
+
+ *killItr = oldKillRange->end;
+ VNInfo *newKillVNI = li->getNextValue(copyIdx.getDefIndex(),
+ copyMI, true,
+ lis->getVNInfoAllocator());
+ newKillVNI->addKill(lis->getMBBTerminatorGap(killMBB));
+ newKillVNI->setHasPHIKill(true);
+ li->addRange(LiveRange(copyIdx.getDefIndex(),
+ lis->getMBBEndIdx(killMBB).getNextSlot(),
+ newKillVNI));
+ }
+
+ }
+
+ newVNI->setHasPHIKill(false);
+
+ return newLI;
+ }
+
+};
+
}
llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
@@ -203,7 +520,8 @@ llvm::Spiller* llvm::createSpiller(MachineFunction *mf, LiveIntervals *lis,
VirtRegMap *vrm) {
switch (spillerOpt) {
case trivial: return new TrivialSpiller(mf, lis, vrm); break;
- case standard: return new StandardSpiller(mf, lis, loopInfo, vrm); break;
+ case standard: return new StandardSpiller(lis, loopInfo, vrm); break;
+ case splitting: return new SplittingSpiller(mf, lis, loopInfo, vrm); break;
default: llvm_unreachable("Unreachable!"); break;
}
}
diff --git a/lib/CodeGen/Spiller.h b/lib/CodeGen/Spiller.h
index c6bd985..dda52e8 100644
--- a/lib/CodeGen/Spiller.h
+++ b/lib/CodeGen/Spiller.h
@@ -21,6 +21,7 @@ namespace llvm {
class MachineFunction;
class MachineInstr;
class MachineLoopInfo;
+ class SlotIndex;
class VirtRegMap;
class VNInfo;
@@ -35,7 +36,8 @@ namespace llvm {
/// Spill the given live range. The method used will depend on the Spiller
/// implementation selected.
virtual std::vector<LiveInterval*> spill(LiveInterval *li,
- SmallVectorImpl<LiveInterval*> &spillIs) = 0;
+ SmallVectorImpl<LiveInterval*> &spillIs,
+ SlotIndex *earliestIndex = 0) = 0;
};
diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp
index c299192..fd25a37 100644
--- a/lib/CodeGen/StackSlotColoring.cpp
+++ b/lib/CodeGen/StackSlotColoring.cpp
@@ -668,7 +668,7 @@ bool StackSlotColoring::RemoveDeadStores(MachineBasicBlock* MBB) {
if (DCELimit != -1 && (int)NumDead >= DCELimit)
break;
- MachineBasicBlock::iterator NextMI = next(I);
+ MachineBasicBlock::iterator NextMI = llvm::next(I);
if (NextMI == MBB->end()) continue;
int FirstSS, SecondSS;
diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp
index 9c0b596..bf58902 100644
--- a/lib/CodeGen/TailDuplication.cpp
+++ b/lib/CodeGen/TailDuplication.cpp
@@ -17,15 +17,19 @@
#include "llvm/CodeGen/Passes.h"
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/CodeGen/MachineFunctionPass.h"
+#include "llvm/CodeGen/MachineRegisterInfo.h"
+#include "llvm/CodeGen/MachineSSAUpdater.h"
#include "llvm/Target/TargetInstrInfo.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
+#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/Statistic.h"
using namespace llvm;
+STATISTIC(NumTails , "Number of tails duplicated");
STATISTIC(NumTailDups , "Number of tail duplicated blocks");
STATISTIC(NumInstrDups , "Additional instructions due to tail duplication");
STATISTIC(NumDeadBlocks, "Number of dead blocks removed");
@@ -36,34 +40,71 @@ TailDuplicateSize("tail-dup-size",
cl::desc("Maximum instructions to consider tail duplicating"),
cl::init(2), cl::Hidden);
+static cl::opt<bool>
+TailDupVerify("tail-dup-verify",
+ cl::desc("Verify sanity of PHI instructions during taildup"),
+ cl::init(false), cl::Hidden);
+
+static cl::opt<unsigned>
+TailDupLimit("tail-dup-limit", cl::init(~0U), cl::Hidden);
+
+typedef std::vector<std::pair<MachineBasicBlock*,unsigned> > AvailableValsTy;
+
namespace {
/// TailDuplicatePass - Perform tail duplication.
class TailDuplicatePass : public MachineFunctionPass {
+ bool PreRegAlloc;
const TargetInstrInfo *TII;
MachineModuleInfo *MMI;
+ MachineRegisterInfo *MRI;
+
+ // SSAUpdateVRs - A list of virtual registers for which to update SSA form.
+ SmallVector<unsigned, 16> SSAUpdateVRs;
+
+ // SSAUpdateVals - For each virtual register in SSAUpdateVals keep a list of
+ // source virtual registers.
+ DenseMap<unsigned, AvailableValsTy> SSAUpdateVals;
public:
static char ID;
- explicit TailDuplicatePass() : MachineFunctionPass(&ID) {}
+ explicit TailDuplicatePass(bool PreRA) :
+ MachineFunctionPass(&ID), PreRegAlloc(PreRA) {}
virtual bool runOnMachineFunction(MachineFunction &MF);
virtual const char *getPassName() const { return "Tail Duplication"; }
private:
+ void AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB);
+ void ProcessPHI(MachineInstr *MI, MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies);
+ void DuplicateInstruction(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &LocalVRMap);
+ void UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallSetVector<MachineBasicBlock*, 8> &Succs);
bool TailDuplicateBlocks(MachineFunction &MF);
- bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF);
+ bool TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallVector<MachineInstr*, 16> &Copies);
void RemoveDeadBlock(MachineBasicBlock *MBB);
};
char TailDuplicatePass::ID = 0;
}
-FunctionPass *llvm::createTailDuplicatePass() {
- return new TailDuplicatePass();
+FunctionPass *llvm::createTailDuplicatePass(bool PreRegAlloc) {
+ return new TailDuplicatePass(PreRegAlloc);
}
bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
TII = MF.getTarget().getInstrInfo();
+ MRI = &MF.getRegInfo();
MMI = getAnalysisIfAvailable<MachineModuleInfo>();
bool MadeChange = false;
@@ -77,36 +118,325 @@ bool TailDuplicatePass::runOnMachineFunction(MachineFunction &MF) {
return MadeChange;
}
+static void VerifyPHIs(MachineFunction &MF, bool CheckExtra) {
+ for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ++I) {
+ MachineBasicBlock *MBB = I;
+ SmallSetVector<MachineBasicBlock*, 8> Preds(MBB->pred_begin(),
+ MBB->pred_end());
+ MachineBasicBlock::iterator MI = MBB->begin();
+ while (MI != MBB->end()) {
+ if (MI->getOpcode() != TargetInstrInfo::PHI)
+ break;
+ for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
+ PE = Preds.end(); PI != PE; ++PI) {
+ MachineBasicBlock *PredBB = *PI;
+ bool Found = false;
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+ if (PHIBB == PredBB) {
+ Found = true;
+ break;
+ }
+ }
+ if (!Found) {
+ errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ errs() << " missing input from predecessor BB#"
+ << PredBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ }
+
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2) {
+ MachineBasicBlock *PHIBB = MI->getOperand(i+1).getMBB();
+ if (CheckExtra && !Preds.count(PHIBB)) {
+ // This is not a hard error.
+ errs() << "Warning: malformed PHI in BB#" << MBB->getNumber()
+ << ": " << *MI;
+ errs() << " extra input from predecessor BB#"
+ << PHIBB->getNumber() << '\n';
+ }
+ if (PHIBB->getNumber() < 0) {
+ errs() << "Malformed PHI in BB#" << MBB->getNumber() << ": " << *MI;
+ errs() << " non-existing BB#" << PHIBB->getNumber() << '\n';
+ llvm_unreachable(0);
+ }
+ }
+ ++MI;
+ }
+ }
+}
+
/// TailDuplicateBlocks - Look for small blocks that are unconditionally
/// branched to and do not fall through. Tail-duplicate their instructions
/// into their predecessors to eliminate (dynamic) branches.
bool TailDuplicatePass::TailDuplicateBlocks(MachineFunction &MF) {
bool MadeChange = false;
+ if (PreRegAlloc && TailDupVerify) {
+ DEBUG(errs() << "\n*** Before tail-duplicating\n");
+ VerifyPHIs(MF, true);
+ }
+
+ SmallVector<MachineInstr*, 8> NewPHIs;
+ MachineSSAUpdater SSAUpdate(MF, &NewPHIs);
+
for (MachineFunction::iterator I = ++MF.begin(), E = MF.end(); I != E; ) {
MachineBasicBlock *MBB = I++;
+ if (NumTails == TailDupLimit)
+ break;
+
// Only duplicate blocks that end with unconditional branches.
if (MBB->canFallThrough())
continue;
- MadeChange |= TailDuplicate(MBB, MF);
-
- // If it is dead, remove it.
- if (MBB->pred_empty()) {
- NumInstrDups -= MBB->size();
- RemoveDeadBlock(MBB);
+ // Save the successors list.
+ SmallSetVector<MachineBasicBlock*, 8> Succs(MBB->succ_begin(),
+ MBB->succ_end());
+
+ SmallVector<MachineBasicBlock*, 8> TDBBs;
+ SmallVector<MachineInstr*, 16> Copies;
+ if (TailDuplicate(MBB, MF, TDBBs, Copies)) {
+ ++NumTails;
+
+ // TailBB's immediate successors are now successors of those predecessors
+ // which duplicated TailBB. Add the predecessors as sources to the PHI
+ // instructions.
+ bool isDead = MBB->pred_empty();
+ if (PreRegAlloc)
+ UpdateSuccessorsPHIs(MBB, isDead, TDBBs, Succs);
+
+ // If it is dead, remove it.
+ if (isDead) {
+ NumInstrDups -= MBB->size();
+ RemoveDeadBlock(MBB);
+ ++NumDeadBlocks;
+ }
+
+ // Update SSA form.
+ if (!SSAUpdateVRs.empty()) {
+ for (unsigned i = 0, e = SSAUpdateVRs.size(); i != e; ++i) {
+ unsigned VReg = SSAUpdateVRs[i];
+ SSAUpdate.Initialize(VReg);
+
+ // If the original definition is still around, add it as an available
+ // value.
+ MachineInstr *DefMI = MRI->getVRegDef(VReg);
+ MachineBasicBlock *DefBB = 0;
+ if (DefMI) {
+ DefBB = DefMI->getParent();
+ SSAUpdate.AddAvailableValue(DefBB, VReg);
+ }
+
+ // Add the new vregs as available values.
+ DenseMap<unsigned, AvailableValsTy>::iterator LI =
+ SSAUpdateVals.find(VReg);
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ unsigned SrcReg = LI->second[j].second;
+ SSAUpdate.AddAvailableValue(SrcBB, SrcReg);
+ }
+
+ // Rewrite uses that are outside of the original def's block.
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(VReg);
+ while (UI != MRI->use_end()) {
+ MachineOperand &UseMO = UI.getOperand();
+ MachineInstr *UseMI = &*UI;
+ ++UI;
+ if (UseMI->getParent() == DefBB)
+ continue;
+ SSAUpdate.RewriteUse(UseMO);
+ }
+ }
+
+ SSAUpdateVRs.clear();
+ SSAUpdateVals.clear();
+ }
+
+ // Eliminate some of the copies inserted tail duplication to maintain
+ // SSA form.
+ for (unsigned i = 0, e = Copies.size(); i != e; ++i) {
+ MachineInstr *Copy = Copies[i];
+ unsigned Src, Dst, SrcSR, DstSR;
+ if (TII->isMoveInstr(*Copy, Src, Dst, SrcSR, DstSR)) {
+ MachineRegisterInfo::use_iterator UI = MRI->use_begin(Src);
+ if (++UI == MRI->use_end()) {
+ // Copy is the only use. Do trivial copy propagation here.
+ MRI->replaceRegWith(Dst, Src);
+ Copy->eraseFromParent();
+ }
+ }
+ }
+
+ if (PreRegAlloc && TailDupVerify)
+ VerifyPHIs(MF, false);
MadeChange = true;
- ++NumDeadBlocks;
}
}
+
return MadeChange;
}
+static bool isDefLiveOut(unsigned Reg, MachineBasicBlock *BB,
+ const MachineRegisterInfo *MRI) {
+ for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg),
+ UE = MRI->use_end(); UI != UE; ++UI) {
+ MachineInstr *UseMI = &*UI;
+ if (UseMI->getParent() != BB)
+ return true;
+ }
+ return false;
+}
+
+static unsigned getPHISrcRegOpIdx(MachineInstr *MI, MachineBasicBlock *SrcBB) {
+ for (unsigned i = 1, e = MI->getNumOperands(); i != e; i += 2)
+ if (MI->getOperand(i+1).getMBB() == SrcBB)
+ return i;
+ return 0;
+}
+
+/// AddSSAUpdateEntry - Add a definition and source virtual registers pair for
+/// SSA update.
+void TailDuplicatePass::AddSSAUpdateEntry(unsigned OrigReg, unsigned NewReg,
+ MachineBasicBlock *BB) {
+ DenseMap<unsigned, AvailableValsTy>::iterator LI= SSAUpdateVals.find(OrigReg);
+ if (LI != SSAUpdateVals.end())
+ LI->second.push_back(std::make_pair(BB, NewReg));
+ else {
+ AvailableValsTy Vals;
+ Vals.push_back(std::make_pair(BB, NewReg));
+ SSAUpdateVals.insert(std::make_pair(OrigReg, Vals));
+ SSAUpdateVRs.push_back(OrigReg);
+ }
+}
+
+/// ProcessPHI - Process PHI node in TailBB by turning it into a copy in PredBB.
+/// Remember the source register that's contributed by PredBB and update SSA
+/// update map.
+void TailDuplicatePass::ProcessPHI(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ DenseMap<unsigned, unsigned> &LocalVRMap,
+ SmallVector<std::pair<unsigned,unsigned>, 4> &Copies) {
+ unsigned DefReg = MI->getOperand(0).getReg();
+ unsigned SrcOpIdx = getPHISrcRegOpIdx(MI, PredBB);
+ assert(SrcOpIdx && "Unable to find matching PHI source?");
+ unsigned SrcReg = MI->getOperand(SrcOpIdx).getReg();
+ const TargetRegisterClass *RC = MRI->getRegClass(DefReg);
+ LocalVRMap.insert(std::make_pair(DefReg, SrcReg));
+
+ // Insert a copy from source to the end of the block. The def register is the
+ // available value liveout of the block.
+ unsigned NewDef = MRI->createVirtualRegister(RC);
+ Copies.push_back(std::make_pair(NewDef, SrcReg));
+ if (isDefLiveOut(DefReg, TailBB, MRI))
+ AddSSAUpdateEntry(DefReg, NewDef, PredBB);
+
+ // Remove PredBB from the PHI node.
+ MI->RemoveOperand(SrcOpIdx+1);
+ MI->RemoveOperand(SrcOpIdx);
+ if (MI->getNumOperands() == 1)
+ MI->eraseFromParent();
+}
+
+/// DuplicateInstruction - Duplicate a TailBB instruction to PredBB and update
+/// the source operands due to earlier PHI translation.
+void TailDuplicatePass::DuplicateInstruction(MachineInstr *MI,
+ MachineBasicBlock *TailBB,
+ MachineBasicBlock *PredBB,
+ MachineFunction &MF,
+ DenseMap<unsigned, unsigned> &LocalVRMap) {
+ MachineInstr *NewMI = MF.CloneMachineInstr(MI);
+ for (unsigned i = 0, e = NewMI->getNumOperands(); i != e; ++i) {
+ MachineOperand &MO = NewMI->getOperand(i);
+ if (!MO.isReg())
+ continue;
+ unsigned Reg = MO.getReg();
+ if (!Reg || TargetRegisterInfo::isPhysicalRegister(Reg))
+ continue;
+ if (MO.isDef()) {
+ const TargetRegisterClass *RC = MRI->getRegClass(Reg);
+ unsigned NewReg = MRI->createVirtualRegister(RC);
+ MO.setReg(NewReg);
+ LocalVRMap.insert(std::make_pair(Reg, NewReg));
+ if (isDefLiveOut(Reg, TailBB, MRI))
+ AddSSAUpdateEntry(Reg, NewReg, PredBB);
+ } else {
+ DenseMap<unsigned, unsigned>::iterator VI = LocalVRMap.find(Reg);
+ if (VI != LocalVRMap.end())
+ MO.setReg(VI->second);
+ }
+ }
+ PredBB->insert(PredBB->end(), NewMI);
+}
+
+/// UpdateSuccessorsPHIs - After FromBB is tail duplicated into its predecessor
+/// blocks, the successors have gained new predecessors. Update the PHI
+/// instructions in them accordingly.
+void
+TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallSetVector<MachineBasicBlock*,8> &Succs) {
+ for (SmallSetVector<MachineBasicBlock*, 8>::iterator SI = Succs.begin(),
+ SE = Succs.end(); SI != SE; ++SI) {
+ MachineBasicBlock *SuccBB = *SI;
+ for (MachineBasicBlock::iterator II = SuccBB->begin(), EE = SuccBB->end();
+ II != EE; ++II) {
+ if (II->getOpcode() != TargetInstrInfo::PHI)
+ break;
+ unsigned Idx = 0;
+ for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) {
+ MachineOperand &MO = II->getOperand(i+1);
+ if (MO.getMBB() == FromBB) {
+ Idx = i;
+ break;
+ }
+ }
+
+ assert(Idx != 0);
+ MachineOperand &MO0 = II->getOperand(Idx);
+ unsigned Reg = MO0.getReg();
+ if (isDead) {
+ // Folded into the previous BB.
+ // There could be duplicate phi source entries. FIXME: Should sdisel
+ // or earlier pass fixed this?
+ for (unsigned i = II->getNumOperands()-2; i != Idx; i -= 2) {
+ MachineOperand &MO = II->getOperand(i+1);
+ if (MO.getMBB() == FromBB) {
+ II->RemoveOperand(i+1);
+ II->RemoveOperand(i);
+ }
+ }
+ II->RemoveOperand(Idx+1);
+ II->RemoveOperand(Idx);
+ }
+ DenseMap<unsigned,AvailableValsTy>::iterator LI=SSAUpdateVals.find(Reg);
+ if (LI != SSAUpdateVals.end()) {
+ // This register is defined in the tail block.
+ for (unsigned j = 0, ee = LI->second.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = LI->second[j].first;
+ unsigned SrcReg = LI->second[j].second;
+ II->addOperand(MachineOperand::CreateReg(SrcReg, false));
+ II->addOperand(MachineOperand::CreateMBB(SrcBB));
+ }
+ } else {
+ // Live in tail block, must also be live in predecessors.
+ for (unsigned j = 0, ee = TDBBs.size(); j != ee; ++j) {
+ MachineBasicBlock *SrcBB = TDBBs[j];
+ II->addOperand(MachineOperand::CreateReg(Reg, false));
+ II->addOperand(MachineOperand::CreateMBB(SrcBB));
+ }
+ }
+ }
+ }
+}
+
/// TailDuplicate - If it is profitable, duplicate TailBB's contents in each
/// of its predecessors.
-bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
- MachineFunction &MF) {
+bool
+TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB, MachineFunction &MF,
+ SmallVector<MachineBasicBlock*, 8> &TDBBs,
+ SmallVector<MachineInstr*, 16> &Copies) {
// Don't try to tail-duplicate single-block loops.
if (TailBB->isSuccessor(TailBB))
return false;
@@ -129,28 +459,36 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
// Check the instructions in the block to determine whether tail-duplication
// is invalid or unlikely to be profitable.
- unsigned i = 0;
+ unsigned InstrCount = 0;
bool HasCall = false;
for (MachineBasicBlock::iterator I = TailBB->begin();
- I != TailBB->end(); ++I, ++i) {
+ I != TailBB->end(); ++I) {
// Non-duplicable things shouldn't be tail-duplicated.
if (I->getDesc().isNotDuplicable()) return false;
+ // Do not duplicate 'return' instructions if this is a pre-regalloc run.
+ // A return may expand into a lot more instructions (e.g. reload of callee
+ // saved registers) after PEI.
+ if (PreRegAlloc && I->getDesc().isReturn()) return false;
// Don't duplicate more than the threshold.
- if (i == MaxDuplicateCount) return false;
+ if (InstrCount == MaxDuplicateCount) return false;
// Remember if we saw a call.
if (I->getDesc().isCall()) HasCall = true;
+ if (I->getOpcode() != TargetInstrInfo::PHI)
+ InstrCount += 1;
}
// Heuristically, don't tail-duplicate calls if it would expand code size,
// as it's less likely to be worth the extra cost.
- if (i > 1 && HasCall)
+ if (InstrCount > 1 && HasCall)
return false;
+ DEBUG(errs() << "\n*** Tail-duplicating BB#" << TailBB->getNumber() << '\n');
+
// Iterate through all the unique predecessors and tail-duplicate this
// block into them, if possible. Copying the list ahead of time also
// avoids trouble with the predecessor list reallocating.
bool Changed = false;
- SmallSetVector<MachineBasicBlock *, 8> Preds(TailBB->pred_begin(),
- TailBB->pred_end());
+ SmallSetVector<MachineBasicBlock*, 8> Preds(TailBB->pred_begin(),
+ TailBB->pred_end());
for (SmallSetVector<MachineBasicBlock *, 8>::iterator PI = Preds.begin(),
PE = Preds.end(); PI != PE; ++PI) {
MachineBasicBlock *PredBB = *PI;
@@ -175,13 +513,35 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
DEBUG(errs() << "\nTail-duplicating into PredBB: " << *PredBB
<< "From Succ: " << *TailBB);
+ TDBBs.push_back(PredBB);
+
// Remove PredBB's unconditional branch.
TII->RemoveBranch(*PredBB);
+
// Clone the contents of TailBB into PredBB.
- for (MachineBasicBlock::iterator I = TailBB->begin(), E = TailBB->end();
- I != E; ++I) {
- MachineInstr *NewMI = MF.CloneMachineInstr(I);
- PredBB->insert(PredBB->end(), NewMI);
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ while (I != TailBB->end()) {
+ MachineInstr *MI = &*I;
+ ++I;
+ if (MI->getOpcode() == TargetInstrInfo::PHI) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ ProcessPHI(MI, TailBB, PredBB, LocalVRMap, CopyInfos);
+ } else {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ DuplicateInstruction(MI, TailBB, PredBB, MF, LocalVRMap);
+ }
+ }
+ MachineBasicBlock::iterator Loc = PredBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
+ TII->copyRegToReg(*PredBB, Loc, CopyInfos[i].first,
+ CopyInfos[i].second, RC,RC);
+ MachineInstr *CopyMI = prior(Loc);
+ Copies.push_back(CopyMI);
}
NumInstrDups += TailBB->size() - 1; // subtract one for removed branch
@@ -190,8 +550,8 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
assert(PredBB->succ_empty() &&
"TailDuplicate called on block with multiple successors!");
for (MachineBasicBlock::succ_iterator I = TailBB->succ_begin(),
- E = TailBB->succ_end(); I != E; ++I)
- PredBB->addSuccessor(*I);
+ E = TailBB->succ_end(); I != E; ++I)
+ PredBB->addSuccessor(*I);
Changed = true;
++NumTailDups;
@@ -200,22 +560,56 @@ bool TailDuplicatePass::TailDuplicate(MachineBasicBlock *TailBB,
// If TailBB was duplicated into all its predecessors except for the prior
// block, which falls through unconditionally, move the contents of this
// block into the prior block.
- MachineBasicBlock &PrevBB = *prior(MachineFunction::iterator(TailBB));
+ MachineBasicBlock *PrevBB = prior(MachineFunction::iterator(TailBB));
MachineBasicBlock *PriorTBB = 0, *PriorFBB = 0;
SmallVector<MachineOperand, 4> PriorCond;
bool PriorUnAnalyzable =
- TII->AnalyzeBranch(PrevBB, PriorTBB, PriorFBB, PriorCond, true);
+ TII->AnalyzeBranch(*PrevBB, PriorTBB, PriorFBB, PriorCond, true);
// This has to check PrevBB->succ_size() because EH edges are ignored by
// AnalyzeBranch.
if (!PriorUnAnalyzable && PriorCond.empty() && !PriorTBB &&
- TailBB->pred_size() == 1 && PrevBB.succ_size() == 1 &&
+ TailBB->pred_size() == 1 && PrevBB->succ_size() == 1 &&
!TailBB->hasAddressTaken()) {
- DEBUG(errs() << "\nMerging into block: " << PrevBB
+ DEBUG(errs() << "\nMerging into block: " << *PrevBB
<< "From MBB: " << *TailBB);
- PrevBB.splice(PrevBB.end(), TailBB, TailBB->begin(), TailBB->end());
- PrevBB.removeSuccessor(PrevBB.succ_begin());;
- assert(PrevBB.succ_empty());
- PrevBB.transferSuccessors(TailBB);
+ if (PreRegAlloc) {
+ DenseMap<unsigned, unsigned> LocalVRMap;
+ SmallVector<std::pair<unsigned,unsigned>, 4> CopyInfos;
+ MachineBasicBlock::iterator I = TailBB->begin();
+ // Process PHI instructions first.
+ while (I != TailBB->end() && I->getOpcode() == TargetInstrInfo::PHI) {
+ // Replace the uses of the def of the PHI with the register coming
+ // from PredBB.
+ MachineInstr *MI = &*I++;
+ ProcessPHI(MI, TailBB, PrevBB, LocalVRMap, CopyInfos);
+ if (MI->getParent())
+ MI->eraseFromParent();
+ }
+
+ // Now copy the non-PHI instructions.
+ while (I != TailBB->end()) {
+ // Replace def of virtual registers with new registers, and update
+ // uses with PHI source register or the new registers.
+ MachineInstr *MI = &*I++;
+ DuplicateInstruction(MI, TailBB, PrevBB, MF, LocalVRMap);
+ MI->eraseFromParent();
+ }
+ MachineBasicBlock::iterator Loc = PrevBB->getFirstTerminator();
+ for (unsigned i = 0, e = CopyInfos.size(); i != e; ++i) {
+ const TargetRegisterClass *RC = MRI->getRegClass(CopyInfos[i].first);
+ TII->copyRegToReg(*PrevBB, Loc, CopyInfos[i].first,
+ CopyInfos[i].second, RC, RC);
+ MachineInstr *CopyMI = prior(Loc);
+ Copies.push_back(CopyMI);
+ }
+ } else {
+ // No PHIs to worry about, just splice the instructions over.
+ PrevBB->splice(PrevBB->end(), TailBB, TailBB->begin(), TailBB->end());
+ }
+ PrevBB->removeSuccessor(PrevBB->succ_begin());
+ assert(PrevBB->succ_empty());
+ PrevBB->transferSuccessors(TailBB);
+ TDBBs.push_back(PrevBB);
Changed = true;
}
diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp
index 102e2a3..393e315 100644
--- a/lib/CodeGen/TargetInstrInfoImpl.cpp
+++ b/lib/CodeGen/TargetInstrInfoImpl.cpp
@@ -329,7 +329,7 @@ TargetInstrInfo::isReallyTriviallyReMaterializableGeneric(const MachineInstr *
return false;
// For the def, it should be the only def of that register.
- if (MO.isDef() && (next(MRI.def_begin(Reg)) != MRI.def_end() ||
+ if (MO.isDef() && (llvm::next(MRI.def_begin(Reg)) != MRI.def_end() ||
MRI.isLiveIn(Reg)))
return false;
diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp
index 5fa690b..98b95ac 100644
--- a/lib/CodeGen/TwoAddressInstructionPass.cpp
+++ b/lib/CodeGen/TwoAddressInstructionPass.cpp
@@ -211,7 +211,7 @@ bool TwoAddressInstructionPass::Sink3AddrInstruction(MachineBasicBlock *MBB,
++KillPos;
unsigned NumVisited = 0;
- for (MachineBasicBlock::iterator I = next(OldPos); I != KillPos; ++I) {
+ for (MachineBasicBlock::iterator I = llvm::next(OldPos); I != KillPos; ++I) {
MachineInstr *OtherMI = I;
if (NumVisited > 30) // FIXME: Arbitrary limit to reduce compile time cost.
return false;
@@ -412,7 +412,7 @@ static bool isKilled(MachineInstr &MI, unsigned Reg,
MachineRegisterInfo::def_iterator Begin = MRI->def_begin(Reg);
// If there are multiple defs, we can't do a simple analysis, so just
// go with what the kill flag says.
- if (next(Begin) != MRI->def_end())
+ if (llvm::next(Begin) != MRI->def_end())
return true;
DefMI = &*Begin;
bool IsSrcPhys, IsDstPhys;
@@ -643,7 +643,7 @@ TwoAddressInstructionPass::ConvertInstTo3Addr(MachineBasicBlock::iterator &mi,
if (!Sunk) {
DistanceMap.insert(std::make_pair(NewMI, Dist));
mi = NewMI;
- nmi = next(mi);
+ nmi = llvm::next(mi);
}
return true;
}
@@ -923,7 +923,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &MF) {
Processed.clear();
for (MachineBasicBlock::iterator mi = mbbi->begin(), me = mbbi->end();
mi != me; ) {
- MachineBasicBlock::iterator nmi = next(mi);
+ MachineBasicBlock::iterator nmi = llvm::next(mi);
const TargetInstrDesc &TID = mi->getDesc();
bool FirstTied = true;
diff --git a/lib/CodeGen/VirtRegRewriter.cpp b/lib/CodeGen/VirtRegRewriter.cpp
index 10c8066..054c3b6 100644
--- a/lib/CodeGen/VirtRegRewriter.cpp
+++ b/lib/CodeGen/VirtRegRewriter.cpp
@@ -754,7 +754,7 @@ void AvailableSpills::AddAvailableRegsToLiveIn(MachineBasicBlock &MBB,
}
// Skip over the same register.
- std::multimap<unsigned, int>::iterator NI = next(I);
+ std::multimap<unsigned, int>::iterator NI = llvm::next(I);
while (NI != E && NI->first == Reg) {
++I;
++NI;
@@ -1133,7 +1133,7 @@ private:
std::vector<MachineOperand*> &KillOps,
VirtRegMap &VRM) {
- MachineBasicBlock::iterator NextMII = next(MII);
+ MachineBasicBlock::iterator NextMII = llvm::next(MII);
if (NextMII == MBB.end())
return false;
@@ -1186,7 +1186,7 @@ private:
// Unfold next instructions that fold the same SS.
do {
MachineInstr &NextMI = *NextMII;
- NextMII = next(NextMII);
+ NextMII = llvm::next(NextMII);
NewMIs.clear();
if (!TII->unfoldMemoryOperand(MF, &NextMI, VirtReg, false, false, NewMIs))
llvm_unreachable("Unable unfold the load / store folding instruction!");
@@ -1463,8 +1463,8 @@ private:
std::vector<MachineOperand*> &KillOps,
VirtRegMap &VRM) {
- MachineBasicBlock::iterator oldNextMII = next(MII);
- TII->storeRegToStackSlot(MBB, next(MII), PhysReg, true, StackSlot, RC);
+ MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+ TII->storeRegToStackSlot(MBB, llvm::next(MII), PhysReg, true, StackSlot, RC);
MachineInstr *StoreMI = prior(oldNextMII);
VRM.addSpillSlotUse(StackSlot, StoreMI);
DEBUG(errs() << "Store:\t" << *StoreMI);
@@ -1626,14 +1626,14 @@ private:
DistanceMap.clear();
for (MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
MII != E; ) {
- MachineBasicBlock::iterator NextMII = next(MII);
+ MachineBasicBlock::iterator NextMII = llvm::next(MII);
VirtRegMap::MI2VirtMapTy::const_iterator I, End;
bool Erased = false;
bool BackTracked = false;
if (OptimizeByUnfold(MBB, MII,
MaybeDeadStores, Spills, RegKills, KillOps, VRM))
- NextMII = next(MII);
+ NextMII = llvm::next(MII);
MachineInstr &MI = *MII;
@@ -1657,7 +1657,7 @@ private:
// Back-schedule reloads and remats.
MachineBasicBlock::iterator InsertLoc =
- ComputeReloadLoc(next(MII), MBB.begin(), PhysReg, TRI, false,
+ ComputeReloadLoc(llvm::next(MII), MBB.begin(), PhysReg, TRI, false,
SS, TII, MF);
TII->loadRegFromStackSlot(MBB, InsertLoc, PhysReg, SS, RC);
@@ -1667,7 +1667,7 @@ private:
++NumPSpills;
DistanceMap.insert(std::make_pair(LoadMI, Dist++));
}
- NextMII = next(MII);
+ NextMII = llvm::next(MII);
}
// Insert restores here if asked to.
@@ -1785,14 +1785,14 @@ private:
const TargetRegisterClass *RC = RegInfo->getRegClass(VirtReg);
unsigned Phys = VRM.getPhys(VirtReg);
int StackSlot = VRM.getStackSlot(VirtReg);
- MachineBasicBlock::iterator oldNextMII = next(MII);
- TII->storeRegToStackSlot(MBB, next(MII), Phys, isKill, StackSlot, RC);
+ MachineBasicBlock::iterator oldNextMII = llvm::next(MII);
+ TII->storeRegToStackSlot(MBB, llvm::next(MII), Phys, isKill, StackSlot, RC);
MachineInstr *StoreMI = prior(oldNextMII);
VRM.addSpillSlotUse(StackSlot, StoreMI);
DEBUG(errs() << "Store:\t" << *StoreMI);
VRM.virtFolded(VirtReg, StoreMI, VirtRegMap::isMod);
}
- NextMII = next(MII);
+ NextMII = llvm::next(MII);
}
/// ReusedOperands - Keep track of operand reuse in case we need to undo
@@ -2265,7 +2265,7 @@ private:
if (CommuteToFoldReload(MBB, MII, VirtReg, SrcReg, StackSlot,
Spills, RegKills, KillOps, TRI, VRM)) {
- NextMII = next(MII);
+ NextMII = llvm::next(MII);
BackTracked = true;
goto ProcessNextInst;
}
@@ -2381,7 +2381,7 @@ private:
MachineInstr *&LastStore = MaybeDeadStores[StackSlot];
SpillRegToStackSlot(MBB, MII, -1, PhysReg, StackSlot, RC, true,
LastStore, Spills, ReMatDefs, RegKills, KillOps, VRM);
- NextMII = next(MII);
+ NextMII = llvm::next(MII);
// Check to see if this is a noop copy. If so, eliminate the
// instruction before considering the dest reg to be changed.
diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp
index 6d781c7..26afa54 100644
--- a/lib/ExecutionEngine/JIT/JIT.cpp
+++ b/lib/ExecutionEngine/JIT/JIT.cpp
@@ -208,7 +208,7 @@ ExecutionEngine *JIT::createJIT(ModuleProvider *MP,
JITMemoryManager *JMM,
CodeGenOpt::Level OptLevel,
bool GVsWithCode,
- CodeModel::Model CMM) {
+ CodeModel::Model CMM) {
// Make sure we can resolve symbols in the program as well. The zero arg
// to the function tells DynamicLibrary to load the program, not a library.
if (sys::DynamicLibrary::LoadLibraryPermanently(0, ErrorStr))
@@ -681,7 +681,7 @@ void *JIT::getOrEmitGlobalVariable(const GlobalVariable *GV) {
if (Ptr) return Ptr;
// If the global is external, just remember the address.
- if (GV->isDeclaration()) {
+ if (GV->isDeclaration() || GV->hasAvailableExternallyLinkage()) {
#if HAVE___DSO_HANDLE
if (GV->getName() == "__dso_handle")
return (void*)&__dso_handle;
diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
index f2b28ad..0193486 100644
--- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
+++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp
@@ -175,7 +175,6 @@ struct KeyInfo {
static inline unsigned getTombstoneKey() { return -2U; }
static unsigned getHashValue(const unsigned &Key) { return Key; }
static bool isEqual(unsigned LHS, unsigned RHS) { return LHS == RHS; }
- static bool isPod() { return true; }
};
/// ActionEntry - Structure describing an entry in the actions table.
diff --git a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
index b45c71f..52a8f71 100644
--- a/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
+++ b/lib/ExecutionEngine/JIT/OProfileJITEventListener.cpp
@@ -69,24 +69,18 @@ OProfileJITEventListener::~OProfileJITEventListener() {
}
class FilenameCache {
- // Holds the filename of each Scope, so that we can pass the
- // pointer into oprofile. These char*s are freed in the destructor.
- DenseMap<MDNode*, char*> Filenames;
+ // Holds the filename of each Scope, so that we can pass a null-terminated
+ // string into oprofile.
+ DenseMap<MDNode*, std::string> Filenames;
public:
const char *getFilename(MDNode *Scope) {
- char *&Filename = Filenames[Scope];
- if (Filename == NULL) {
+ std::string &Filename = Filenames[Scope];
+ if (Filename.empty()) {
DIScope S(Scope);
- Filename = strdup(S.getFilename());
- }
- return Filename;
- }
- ~FilenameCache() {
- for (DenseMap<MDNode*, char*>::iterator
- I = Filenames.begin(), E = Filenames.end(); I != E; ++I) {
- free(I->second);
+ Filename = S.getFilename();
}
+ return Filename.c_str();
}
};
diff --git a/lib/Support/CMakeLists.txt b/lib/Support/CMakeLists.txt
index cd355ff..ac736dc 100644
--- a/lib/Support/CMakeLists.txt
+++ b/lib/Support/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_library(LLVMSupport
CommandLine.cpp
ConstantRange.cpp
Debug.cpp
+ DeltaAlgorithm.cpp
Dwarf.cpp
ErrorHandling.cpp
FileUtilities.cpp
diff --git a/lib/Support/CommandLine.cpp b/lib/Support/CommandLine.cpp
index 9cf9c89..b6c0e08 100644
--- a/lib/Support/CommandLine.cpp
+++ b/lib/Support/CommandLine.cpp
@@ -778,9 +778,10 @@ void cl::ParseCommandLineOptions(int argc, char **argv,
free(*i);
}
- DEBUG(errs() << "\nArgs: ";
+ DEBUG(errs() << "Args: ";
for (int i = 0; i < argc; ++i)
errs() << argv[i] << ' ';
+ errs() << '\n';
);
// If we had an error processing our arguments, don't let the program execute
diff --git a/lib/Support/DeltaAlgorithm.cpp b/lib/Support/DeltaAlgorithm.cpp
new file mode 100644
index 0000000..d176548
--- /dev/null
+++ b/lib/Support/DeltaAlgorithm.cpp
@@ -0,0 +1,114 @@
+//===--- DeltaAlgorithm.cpp - A Set Minimization Algorithm -----*- C++ -*--===//
+//
+// The LLVM Compiler Infrastructure
+//
+// This file is distributed under the University of Illinois Open Source
+// License. See LICENSE.TXT for details.
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/DeltaAlgorithm.h"
+#include <algorithm>
+#include <iterator>
+using namespace llvm;
+
+DeltaAlgorithm::~DeltaAlgorithm() {
+}
+
+bool DeltaAlgorithm::GetTestResult(const changeset_ty &Changes) {
+ if (FailedTestsCache.count(Changes))
+ return false;
+
+ bool Result = ExecuteOneTest(Changes);
+ if (!Result)
+ FailedTestsCache.insert(Changes);
+
+ return Result;
+}
+
+void DeltaAlgorithm::Split(const changeset_ty &S, changesetlist_ty &Res) {
+ // FIXME: Allow clients to provide heuristics for improved splitting.
+
+ // FIXME: This is really slow.
+ changeset_ty LHS, RHS;
+ unsigned idx = 0;
+ for (changeset_ty::const_iterator it = S.begin(),
+ ie = S.end(); it != ie; ++it, ++idx)
+ ((idx & 1) ? LHS : RHS).insert(*it);
+ if (!LHS.empty())
+ Res.push_back(LHS);
+ if (!RHS.empty())
+ Res.push_back(RHS);
+}
+
+DeltaAlgorithm::changeset_ty
+DeltaAlgorithm::Delta(const changeset_ty &Changes,
+ const changesetlist_ty &Sets) {
+ // Invariant: union(Res) == Changes
+ UpdatedSearchState(Changes, Sets);
+
+ // If there is nothing left we can remove, we are done.
+ if (Sets.size() <= 1)
+ return Changes;
+
+ // Look for a passing subset.
+ changeset_ty Res;
+ if (Search(Changes, Sets, Res))
+ return Res;
+
+ // Otherwise, partition the sets if possible; if not we are done.
+ changesetlist_ty SplitSets;
+ for (changesetlist_ty::const_iterator it = Sets.begin(),
+ ie = Sets.end(); it != ie; ++it)
+ Split(*it, SplitSets);
+ if (SplitSets.size() == Sets.size())
+ return Changes;
+
+ return Delta(Changes, SplitSets);
+}
+
+bool DeltaAlgorithm::Search(const changeset_ty &Changes,
+ const changesetlist_ty &Sets,
+ changeset_ty &Res) {
+ // FIXME: Parallelize.
+ for (changesetlist_ty::const_iterator it = Sets.begin(),
+ ie = Sets.end(); it != ie; ++it) {
+ // If the test passes on this subset alone, recurse.
+ if (GetTestResult(*it)) {
+ changesetlist_ty Sets;
+ Split(*it, Sets);
+ Res = Delta(*it, Sets);
+ return true;
+ }
+
+ // Otherwise, if we have more than two sets, see if test passes on the
+ // complement.
+ if (Sets.size() > 2) {
+ // FIXME: This is really slow.
+ changeset_ty Complement;
+ std::set_difference(
+ Changes.begin(), Changes.end(), it->begin(), it->end(),
+ std::insert_iterator<changeset_ty>(Complement, Complement.begin()));
+ if (GetTestResult(Complement)) {
+ changesetlist_ty ComplementSets;
+ ComplementSets.insert(ComplementSets.end(), Sets.begin(), it);
+ ComplementSets.insert(ComplementSets.end(), it + 1, Sets.end());
+ Res = Delta(Complement, ComplementSets);
+ return true;
+ }
+ }
+ }
+
+ return false;
+}
+
+DeltaAlgorithm::changeset_ty DeltaAlgorithm::Run(const changeset_ty &Changes) {
+ // Check empty set first to quickly find poor test functions.
+ if (GetTestResult(changeset_ty()))
+ return changeset_ty();
+
+ // Otherwise run the real delta algorithm.
+ changesetlist_ty Sets;
+ Split(Changes, Sets);
+
+ return Delta(Changes, Sets);
+}
diff --git a/lib/Support/MemoryBuffer.cpp b/lib/Support/MemoryBuffer.cpp
index b04864a..df1aa6a 100644
--- a/lib/Support/MemoryBuffer.cpp
+++ b/lib/Support/MemoryBuffer.cpp
@@ -176,7 +176,7 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
#endif
int FD = ::open(Filename.str().c_str(), O_RDONLY|OpenFlags);
if (FD == -1) {
- if (ErrStr) *ErrStr = "could not open file";
+ if (ErrStr) *ErrStr = strerror(errno);
return 0;
}
@@ -186,7 +186,7 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
struct stat FileInfo;
// TODO: This should use fstat64 when available.
if (fstat(FD, &FileInfo) == -1) {
- if (ErrStr) *ErrStr = "could not get file length";
+ if (ErrStr) *ErrStr = strerror(errno);
::close(FD);
return 0;
}
@@ -230,8 +230,8 @@ MemoryBuffer *MemoryBuffer::getFile(StringRef Filename, std::string *ErrStr,
// try again
} else {
// error reading.
+ if (ErrStr) *ErrStr = strerror(errno);
close(FD);
- if (ErrStr) *ErrStr = "error reading file data";
return 0;
}
}
diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp
index 31451cc..0c90e77 100644
--- a/lib/Support/raw_ostream.cpp
+++ b/lib/Support/raw_ostream.cpp
@@ -209,8 +209,7 @@ raw_ostream &raw_ostream::operator<<(const void *P) {
}
raw_ostream &raw_ostream::operator<<(double N) {
- this->operator<<(ftostr(N));
- return *this;
+ return this->operator<<(ftostr(N));
}
diff --git a/lib/System/Atomic.cpp b/lib/System/Atomic.cpp
index f9b55a1..7ba8b77 100644
--- a/lib/System/Atomic.cpp
+++ b/lib/System/Atomic.cpp
@@ -85,7 +85,7 @@ sys::cas_flag sys::AtomicAdd(volatile sys::cas_flag* ptr, sys::cas_flag val) {
#elif defined(__GNUC__)
return __sync_add_and_fetch(ptr, val);
#elif defined(_MSC_VER)
- return InterlockedAdd(ptr, val);
+ return InterlockedExchangeAdd(ptr, val) + val;
#else
# error No atomic add implementation for your platform!
#endif
diff --git a/lib/System/Host.cpp b/lib/System/Host.cpp
index e112698..79897e4 100644
--- a/lib/System/Host.cpp
+++ b/lib/System/Host.cpp
@@ -103,11 +103,8 @@ static void DetectX86FamilyModel(unsigned EAX, unsigned &Family, unsigned &Model
Model += ((EAX >> 16) & 0xf) << 4; // Bits 16 - 19
}
}
-#endif
-
std::string sys::getHostCPUName() {
-#if defined(__x86_64__) || defined(__i386__)
unsigned EAX = 0, EBX = 0, ECX = 0, EDX = 0;
if (GetX86CpuIDAndInfo(0x1, &EAX, &EBX, &ECX, &EDX))
return "generic";
@@ -295,7 +292,10 @@ std::string sys::getHostCPUName() {
return "generic";
}
}
-#endif
-
return "generic";
}
+#else
+std::string sys::getHostCPUName() {
+ return "generic";
+}
+#endif
diff --git a/lib/System/Unix/Path.inc b/lib/System/Unix/Path.inc
index ff1497a..33b26f7 100644
--- a/lib/System/Unix/Path.inc
+++ b/lib/System/Unix/Path.inc
@@ -77,7 +77,7 @@ inline bool lastIsSlash(const std::string& path) {
namespace llvm {
using namespace sys;
-extern const char sys::PathSeparator = ':';
+const char sys::PathSeparator = ':';
Path::Path(const std::string& p)
: path(p) {}
diff --git a/lib/Target/ARM/ARM.h b/lib/Target/ARM/ARM.h
index ff1980d..21445ad 100644
--- a/lib/Target/ARM/ARM.h
+++ b/lib/Target/ARM/ARM.h
@@ -109,7 +109,6 @@ FunctionPass *createNEONPreAllocPass();
FunctionPass *createNEONMoveFixPass();
FunctionPass *createThumb2ITBlockPass();
FunctionPass *createThumb2SizeReductionPass();
-FunctionPass *createARMMaxStackAlignmentCalculatorPass();
extern Target TheARMTarget, TheThumbTarget;
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp
index c95d4c8..1aae369 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.cpp
+++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp
@@ -418,11 +418,13 @@ bool ARMBaseInstrInfo::isPredicable(MachineInstr *MI) const {
return true;
}
-/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing
+/// FIXME: Works around a gcc miscompilation with -fstrict-aliasing.
+DISABLE_INLINE
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
- unsigned JTI) DISABLE_INLINE;
+ unsigned JTI);
static unsigned getNumJTEntries(const std::vector<MachineJumpTableEntry> &JT,
unsigned JTI) {
+ assert(JTI < JT.size());
return JT[JTI].MBBs.size();
}
@@ -467,6 +469,8 @@ unsigned ARMBaseInstrInfo::GetInstSizeInBytes(const MachineInstr *MI) const {
return MI->getOperand(2).getImm();
case ARM::Int_eh_sjlj_setjmp:
return 24;
+ case ARM::tInt_eh_sjlj_setjmp:
+ return 22;
case ARM::t2Int_eh_sjlj_setjmp:
return 22;
case ARM::BR_JTr:
@@ -755,7 +759,6 @@ loadRegFromStackSlot(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
assert((RC == ARM::QPRRegisterClass ||
RC == ARM::QPR_VFP2RegisterClass ||
RC == ARM::QPR_8RegisterClass) && "Unknown regclass!");
- // FIXME: Neon instructions should support predicates
if (Align >= 16
&& (getRegisterInfo().needsStackRealignment(MF))) {
AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::VLD1q64), DestReg)
diff --git a/lib/Target/ARM/ARMBaseInstrInfo.h b/lib/Target/ARM/ARMBaseInstrInfo.h
index 282e30c..78d9135 100644
--- a/lib/Target/ARM/ARMBaseInstrInfo.h
+++ b/lib/Target/ARM/ARMBaseInstrInfo.h
@@ -92,6 +92,8 @@ namespace ARMII {
StMiscFrm = 9 << FormShift,
LdStMulFrm = 10 << FormShift,
+ LdStExFrm = 28 << FormShift,
+
// Miscellaneous arithmetic instructions
ArithMiscFrm = 11 << FormShift,
@@ -190,9 +192,6 @@ public:
// if there is not such an opcode.
virtual unsigned getUnindexedOpcode(unsigned Opc) const =0;
- // Return true if the block does not fall through.
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const =0;
-
virtual MachineInstr *convertToThreeAddress(MachineFunction::iterator &MFI,
MachineBasicBlock::iterator &MBBI,
LiveVariables *LV) const;
diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
index 653328d..9b5f79f 100644
--- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp
+++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp
@@ -471,21 +471,6 @@ ARMBaseRegisterInfo::UpdateRegAllocHint(unsigned Reg, unsigned NewReg,
}
}
-static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
- unsigned MaxAlign = 0;
-
- for (int i = FFI->getObjectIndexBegin(),
- e = FFI->getObjectIndexEnd(); i != e; ++i) {
- if (FFI->isDeadObjectIndex(i))
- continue;
-
- unsigned Align = FFI->getObjectAlignment(i);
- MaxAlign = std::max(MaxAlign, Align);
- }
-
- return MaxAlign;
-}
-
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
@@ -585,16 +570,21 @@ ARMBaseRegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
SmallVector<unsigned, 4> UnspilledCS2GPRs;
ARMFunctionInfo *AFI = MF.getInfo<ARMFunctionInfo>();
- MachineFrameInfo *MFI = MF.getFrameInfo();
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
if (RealignStack) {
- unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
- calculateMaxStackAlignment(MFI));
- MFI->setMaxAlignment(MaxAlign);
+ MachineFrameInfo *MFI = MF.getFrameInfo();
+ MFI->calculateMaxStackAlignment();
}
+ // Spill R4 if Thumb2 function requires stack realignment - it will be used as
+ // scratch register.
+ // FIXME: It will be better just to find spare register here.
+ if (needsStackRealignment(MF) &&
+ AFI->isThumb2Function())
+ MF.getRegInfo().setPhysRegUsed(ARM::R4);
+
// Don't spill FP if the frame can be eliminated. This is determined
// by scanning the callee-save registers to see if any is used.
const unsigned *CSRegs = getCalleeSavedRegs();
@@ -1368,14 +1358,30 @@ emitPrologue(MachineFunction &MF) const {
// If we need dynamic stack realignment, do it here.
if (needsStackRealignment(MF)) {
- unsigned Opc;
unsigned MaxAlign = MFI->getMaxAlignment();
assert (!AFI->isThumb1OnlyFunction());
- Opc = AFI->isThumbFunction() ? ARM::t2BICri : ARM::BICri;
-
- AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), ARM::SP)
+ if (!AFI->isThumbFunction()) {
+ // Emit bic sp, sp, MaxAlign
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::BICri), ARM::SP)
.addReg(ARM::SP, RegState::Kill)
.addImm(MaxAlign-1)));
+ } else {
+ // We cannot use sp as source/dest register here, thus we're emitting the
+ // following sequence:
+ // mov r4, sp
+ // bic r4, r4, MaxAlign
+ // mov sp, r4
+ // FIXME: It will be better just to find spare register here.
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::R4)
+ .addReg(ARM::SP, RegState::Kill);
+ AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl,
+ TII.get(ARM::t2BICri), ARM::R4)
+ .addReg(ARM::R4, RegState::Kill)
+ .addImm(MaxAlign-1)));
+ BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVtgpr2gpr), ARM::SP)
+ .addReg(ARM::R4, RegState::Kill);
+ }
}
}
@@ -1479,48 +1485,4 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const {
emitSPUpdate(isARM, MBB, MBBI, dl, TII, VARegSaveSize);
}
-namespace {
- struct MaximalStackAlignmentCalculator : public MachineFunctionPass {
- static char ID;
- MaximalStackAlignmentCalculator() : MachineFunctionPass(&ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- MachineFrameInfo *FFI = MF.getFrameInfo();
- MachineRegisterInfo &RI = MF.getRegInfo();
-
- // Calculate max stack alignment of all already allocated stack objects.
- unsigned MaxAlign = calculateMaxStackAlignment(FFI);
-
- // Be over-conservative: scan over all vreg defs and find, whether vector
- // registers are used. If yes - there is probability, that vector register
- // will be spilled and thus stack needs to be aligned properly.
- for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
- RegNum < RI.getLastVirtReg(); ++RegNum)
- MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
-
- if (FFI->getMaxAlignment() == MaxAlign)
- return false;
-
- FFI->setMaxAlignment(MaxAlign);
- return true;
- }
-
- virtual const char *getPassName() const {
- return "ARM Stack Required Alignment Auto-Detector";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-
- char MaximalStackAlignmentCalculator::ID = 0;
-}
-
-FunctionPass*
-llvm::createARMMaxStackAlignmentCalculatorPass() {
- return new MaximalStackAlignmentCalculator();
-}
-
#include "ARMGenRegisterInfo.inc"
diff --git a/lib/Target/ARM/ARMConstantIslandPass.cpp b/lib/Target/ARM/ARMConstantIslandPass.cpp
index e59a315..acd30d2 100644
--- a/lib/Target/ARM/ARMConstantIslandPass.cpp
+++ b/lib/Target/ARM/ARMConstantIslandPass.cpp
@@ -418,10 +418,10 @@ void ARMConstantIslands::DoInitialPlacement(MachineFunction &MF,
static bool BBHasFallthrough(MachineBasicBlock *MBB) {
// Get the next machine basic block in the function.
MachineFunction::iterator MBBI = MBB;
- if (next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
+ if (llvm::next(MBBI) == MBB->getParent()->end()) // Can't fall off end of function.
return false;
- MachineBasicBlock *NextBB = next(MBBI);
+ MachineBasicBlock *NextBB = llvm::next(MBBI);
for (MachineBasicBlock::succ_iterator I = MBB->succ_begin(),
E = MBB->succ_end(); I != E; ++I)
if (*I == NextBB)
@@ -760,7 +760,7 @@ MachineBasicBlock *ARMConstantIslands::SplitBlockBeforeInstr(MachineInstr *MI) {
CompareMBBNumbers);
MachineBasicBlock* WaterBB = *IP;
if (WaterBB == OrigBB)
- WaterList.insert(next(IP), NewBB);
+ WaterList.insert(llvm::next(IP), NewBB);
else
WaterList.insert(IP, OrigBB);
NewWaterList.insert(OrigBB);
@@ -887,7 +887,7 @@ static bool BBIsJumpedOver(MachineBasicBlock *MBB) {
void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
int delta) {
- MachineFunction::iterator MBBI = BB; MBBI = next(MBBI);
+ MachineFunction::iterator MBBI = BB; MBBI = llvm::next(MBBI);
for(unsigned i = BB->getNumber()+1, e = BB->getParent()->getNumBlockIDs();
i < e; ++i) {
BBOffsets[i] += delta;
@@ -929,7 +929,7 @@ void ARMConstantIslands::AdjustBBOffsetsAfter(MachineBasicBlock *BB,
if (delta==0)
return;
}
- MBBI = next(MBBI);
+ MBBI = llvm::next(MBBI);
}
}
@@ -1096,7 +1096,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
DEBUG(errs() << "Split at end of block\n");
if (&UserMBB->back() == UserMI)
assert(BBHasFallthrough(UserMBB) && "Expected a fallthrough BB!");
- NewMBB = next(MachineFunction::iterator(UserMBB));
+ NewMBB = llvm::next(MachineFunction::iterator(UserMBB));
// Add an unconditional branch from UserMBB to fallthrough block.
// Record it for branch lengthening; this new branch will not get out of
// range, but if the preceding conditional branch is out of range, the
@@ -1144,7 +1144,7 @@ void ARMConstantIslands::CreateNewWater(unsigned CPUserIndex,
for (unsigned Offset = UserOffset+TII->GetInstSizeInBytes(UserMI);
Offset < BaseInsertOffset;
Offset += TII->GetInstSizeInBytes(MI),
- MI = next(MI)) {
+ MI = llvm::next(MI)) {
if (CPUIndex < CPUsers.size() && CPUsers[CPUIndex].MI == MI) {
CPUser &U = CPUsers[CPUIndex];
if (!OffsetIsInRange(Offset, EndInsertOffset,
@@ -1204,7 +1204,7 @@ bool ARMConstantIslands::HandleConstantPoolUser(MachineFunction &MF,
NewWaterList.insert(NewIsland);
}
// The new CPE goes before the following block (NewMBB).
- NewMBB = next(MachineFunction::iterator(WaterBB));
+ NewMBB = llvm::next(MachineFunction::iterator(WaterBB));
} else {
// No water found.
@@ -1406,7 +1406,7 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
NumCBrFixed++;
if (BMI != MI) {
- if (next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
+ if (llvm::next(MachineBasicBlock::iterator(MI)) == prior(MBB->end()) &&
BMI->getOpcode() == Br.UncondBr) {
// Last MI in the BB is an unconditional branch. Can we simply invert the
// condition and swap destinations:
@@ -1433,12 +1433,12 @@ ARMConstantIslands::FixUpConditionalBr(MachineFunction &MF, ImmBranch &Br) {
// branch to the destination.
int delta = TII->GetInstSizeInBytes(&MBB->back());
BBSizes[MBB->getNumber()] -= delta;
- MachineBasicBlock* SplitBB = next(MachineFunction::iterator(MBB));
+ MachineBasicBlock* SplitBB = llvm::next(MachineFunction::iterator(MBB));
AdjustBBOffsetsAfter(SplitBB, -delta);
MBB->back().eraseFromParent();
// BBOffsets[SplitBB] is wrong temporarily, fixed below
}
- MachineBasicBlock *NextBB = next(MachineFunction::iterator(MBB));
+ MachineBasicBlock *NextBB = llvm::next(MachineFunction::iterator(MBB));
DEBUG(errs() << " Insert B to BB#" << DestBB->getNumber()
<< " also invert condition and change dest. to BB#"
diff --git a/lib/Target/ARM/ARMExpandPseudoInsts.cpp b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
index c929c54..1b8727d 100644
--- a/lib/Target/ARM/ARMExpandPseudoInsts.cpp
+++ b/lib/Target/ARM/ARMExpandPseudoInsts.cpp
@@ -48,7 +48,7 @@ bool ARMExpandPseudo::ExpandMBB(MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MBBI = MBB.begin(), E = MBB.end();
while (MBBI != E) {
MachineInstr &MI = *MBBI;
- MachineBasicBlock::iterator NMBBI = next(MBBI);
+ MachineBasicBlock::iterator NMBBI = llvm::next(MBBI);
unsigned Opcode = MI.getOpcode();
switch (Opcode) {
diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp
index c839fc6..655c762 100644
--- a/lib/Target/ARM/ARMISelLowering.cpp
+++ b/lib/Target/ARM/ARMISelLowering.cpp
@@ -42,6 +42,7 @@
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MathExtras.h"
+#include "llvm/Support/raw_ostream.h"
#include <sstream>
using namespace llvm;
@@ -377,7 +378,7 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM)
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom);
else
setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand);
- setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand);
+ setOperationAction(ISD::MEMBARRIER, MVT::Other, Custom);
if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) {
setOperationAction(ISD::SIGN_EXTEND_INREG, MVT::i16, Expand);
@@ -500,6 +501,9 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const {
case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC";
+ case ARMISD::MEMBARRIER: return "ARMISD::MEMBARRIER";
+ case ARMISD::SYNCBARRIER: return "ARMISD::SYNCBARRIER";
+
case ARMISD::VCEQ: return "ARMISD::VCEQ";
case ARMISD::VCGE: return "ARMISD::VCGE";
case ARMISD::VCGEU: return "ARMISD::VCGEU";
@@ -1470,6 +1474,28 @@ ARMTargetLowering::LowerINTRINSIC_WO_CHAIN(SDValue Op, SelectionDAG &DAG) {
}
}
+static SDValue LowerMEMBARRIER(SDValue Op, SelectionDAG &DAG,
+ const ARMSubtarget *Subtarget) {
+ DebugLoc dl = Op.getDebugLoc();
+ SDValue Op5 = Op.getOperand(5);
+ SDValue Res;
+ unsigned isDeviceBarrier = cast<ConstantSDNode>(Op5)->getZExtValue();
+ if (isDeviceBarrier) {
+ if (Subtarget->hasV7Ops())
+ Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0));
+ else
+ Res = DAG.getNode(ARMISD::SYNCBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ } else {
+ if (Subtarget->hasV7Ops())
+ Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0));
+ else
+ Res = DAG.getNode(ARMISD::MEMBARRIER, dl, MVT::Other, Op.getOperand(0),
+ DAG.getConstant(0, MVT::i32));
+ }
+ return Res;
+}
+
static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG,
unsigned VarArgsFrameIndex) {
// vastart just stores the address of the VarArgsFrameIndex slot into the
@@ -2528,6 +2554,25 @@ static bool isVTRNMask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
+/// isVTRN_v_undef_Mask - Special case of isVTRNMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 2, 2> instead of <0, 4, 2, 6>.
+static bool isVTRN_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned i = 0; i < NumElts; i += 2) {
+ if ((unsigned) M[i] != i + WhichResult ||
+ (unsigned) M[i+1] != i + WhichResult)
+ return false;
+ }
+ return true;
+}
+
static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
@@ -2548,6 +2593,33 @@ static bool isVUZPMask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
+/// isVUZP_v_undef_Mask - Special case of isVUZPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 2, 0, 2> instead of <0, 2, 4, 6>,
+static bool isVUZP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned Half = VT.getVectorNumElements() / 2;
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ for (unsigned j = 0; j != 2; ++j) {
+ unsigned Idx = WhichResult;
+ for (unsigned i = 0; i != Half; ++i) {
+ if ((unsigned) M[i + j * Half] != Idx)
+ return false;
+ Idx += 2;
+ }
+ }
+
+ // VUZP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
unsigned &WhichResult) {
unsigned EltSz = VT.getVectorElementType().getSizeInBits();
@@ -2571,6 +2643,33 @@ static bool isVZIPMask(const SmallVectorImpl<int> &M, EVT VT,
return true;
}
+/// isVZIP_v_undef_Mask - Special case of isVZIPMask for canonical form of
+/// "vector_shuffle v, v", i.e., "vector_shuffle v, undef".
+/// Mask is e.g., <0, 0, 1, 1> instead of <0, 4, 1, 5>.
+static bool isVZIP_v_undef_Mask(const SmallVectorImpl<int> &M, EVT VT,
+ unsigned &WhichResult) {
+ unsigned EltSz = VT.getVectorElementType().getSizeInBits();
+ if (EltSz == 64)
+ return false;
+
+ unsigned NumElts = VT.getVectorNumElements();
+ WhichResult = (M[0] == 0 ? 0 : 1);
+ unsigned Idx = WhichResult * NumElts / 2;
+ for (unsigned i = 0; i != NumElts; i += 2) {
+ if ((unsigned) M[i] != Idx ||
+ (unsigned) M[i+1] != Idx)
+ return false;
+ Idx += 1;
+ }
+
+ // VZIP.32 for 64-bit vectors is a pseudo-instruction alias for VTRN.32.
+ if (VT.is64BitVector() && EltSz == 32)
+ return false;
+
+ return true;
+}
+
+
static SDValue BuildSplat(SDValue Val, EVT VT, SelectionDAG &DAG, DebugLoc dl) {
// Canonicalize all-zeros and all-ones vectors.
ConstantSDNode *ConstVal = cast<ConstantSDNode>(Val.getNode());
@@ -2683,7 +2782,10 @@ ARMTargetLowering::isShuffleMaskLegal(const SmallVectorImpl<int> &M,
isVEXTMask(M, VT, ReverseVEXT, Imm) ||
isVTRNMask(M, VT, WhichResult) ||
isVUZPMask(M, VT, WhichResult) ||
- isVZIPMask(M, VT, WhichResult));
+ isVZIPMask(M, VT, WhichResult) ||
+ isVTRN_v_undef_Mask(M, VT, WhichResult) ||
+ isVUZP_v_undef_Mask(M, VT, WhichResult) ||
+ isVZIP_v_undef_Mask(M, VT, WhichResult));
}
/// GeneratePerfectShuffle - Given an entry in the perfect-shuffle table, emit
@@ -2815,6 +2917,16 @@ static SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
V1, V2).getValue(WhichResult);
+ if (isVTRN_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VTRN, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVUZP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VUZP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+ if (isVZIP_v_undef_Mask(ShuffleMask, VT, WhichResult))
+ return DAG.getNode(ARMISD::VZIP, dl, DAG.getVTList(VT, VT),
+ V1, V1).getValue(WhichResult);
+
// If the shuffle is not directly supported and it has 4 elements, use
// the PerfectShuffle-generated table to synthesize it from other shuffles.
if (VT.getVectorNumElements() == 4 &&
@@ -2886,6 +2998,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::BR_JT: return LowerBR_JT(Op, DAG);
case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG);
case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex);
+ case ISD::MEMBARRIER: return LowerMEMBARRIER(Op, DAG, Subtarget);
case ISD::SINT_TO_FP:
case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG);
case ISD::FP_TO_SINT:
@@ -2938,14 +3051,233 @@ void ARMTargetLowering::ReplaceNodeResults(SDNode *N,
//===----------------------------------------------------------------------===//
MachineBasicBlock *
+ARMTargetLowering::EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const {
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned oldval = MI->getOperand(2).getReg();
+ unsigned newval = MI->getOperand(3).getReg();
+ unsigned scratch = BB->getParent()->getRegInfo()
+ .createVirtualRegister(ARM::GPRRegisterClass);
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineFunction *MF = BB->getParent();
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator It = BB;
+ ++It; // insert the new blocks after the current block
+
+ MachineBasicBlock *loop1MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *loop2MBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = MF->CreateMachineBasicBlock(LLVM_BB);
+ MF->insert(It, loop1MBB);
+ MF->insert(It, loop2MBB);
+ MF->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loop1MBB
+ BB->addSuccessor(loop1MBB);
+
+ // loop1MBB:
+ // ldrex dest, [ptr]
+ // cmp dest, oldval
+ // bne exitMBB
+ BB = loop1MBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPrr : ARM::CMPrr))
+ .addReg(dest).addReg(oldval));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(exitMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop2MBB);
+ BB->addSuccessor(exitMBB);
+
+ // loop2MBB:
+ // strex scratch, newval, [ptr]
+ // cmp scratch, #0
+ // bne loop1MBB
+ BB = loop2MBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(newval)
+ .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loop1MBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+ BB->addSuccessor(loop1MBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ return BB;
+}
+
+MachineBasicBlock *
+ARMTargetLowering::EmitAtomicBinary(MachineInstr *MI, MachineBasicBlock *BB,
+ unsigned Size, unsigned BinOpcode) const {
+ // This also handles ATOMIC_SWAP, indicated by BinOpcode==0.
+ const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction *F = BB->getParent();
+ MachineFunction::iterator It = BB;
+ ++It;
+
+ unsigned dest = MI->getOperand(0).getReg();
+ unsigned ptr = MI->getOperand(1).getReg();
+ unsigned incr = MI->getOperand(2).getReg();
+ DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
+ unsigned ldrOpc, strOpc;
+ switch (Size) {
+ default: llvm_unreachable("unsupported size for AtomicCmpSwap!");
+ case 1:
+ ldrOpc = isThumb2 ? ARM::t2LDREXB : ARM::LDREXB;
+ strOpc = isThumb2 ? ARM::t2LDREXB : ARM::STREXB;
+ break;
+ case 2:
+ ldrOpc = isThumb2 ? ARM::t2LDREXH : ARM::LDREXH;
+ strOpc = isThumb2 ? ARM::t2STREXH : ARM::STREXH;
+ break;
+ case 4:
+ ldrOpc = isThumb2 ? ARM::t2LDREX : ARM::LDREX;
+ strOpc = isThumb2 ? ARM::t2STREX : ARM::STREX;
+ break;
+ }
+
+ MachineBasicBlock *loopMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *exitMBB = F->CreateMachineBasicBlock(LLVM_BB);
+ F->insert(It, loopMBB);
+ F->insert(It, exitMBB);
+ exitMBB->transferSuccessors(BB);
+
+ MachineRegisterInfo &RegInfo = F->getRegInfo();
+ unsigned scratch = RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+ unsigned scratch2 = (!BinOpcode) ? incr :
+ RegInfo.createVirtualRegister(ARM::GPRRegisterClass);
+
+ // thisMBB:
+ // ...
+ // fallthrough --> loopMBB
+ BB->addSuccessor(loopMBB);
+
+ // loopMBB:
+ // ldrex dest, ptr
+ // <binop> scratch2, dest, incr
+ // strex scratch, scratch2, ptr
+ // cmp scratch, #0
+ // bne- loopMBB
+ // fallthrough --> exitMBB
+ BB = loopMBB;
+ AddDefaultPred(BuildMI(BB, dl, TII->get(ldrOpc), dest).addReg(ptr));
+ if (BinOpcode) {
+ // operand order needs to go the other way for NAND
+ if (BinOpcode == ARM::BICrr || BinOpcode == ARM::t2BICrr)
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(incr).addReg(dest)).addReg(0);
+ else
+ AddDefaultPred(BuildMI(BB, dl, TII->get(BinOpcode), scratch2).
+ addReg(dest).addReg(incr)).addReg(0);
+ }
+
+ AddDefaultPred(BuildMI(BB, dl, TII->get(strOpc), scratch).addReg(scratch2)
+ .addReg(ptr));
+ AddDefaultPred(BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2CMPri : ARM::CMPri))
+ .addReg(scratch).addImm(0));
+ BuildMI(BB, dl, TII->get(isThumb2 ? ARM::t2Bcc : ARM::Bcc))
+ .addMBB(loopMBB).addImm(ARMCC::NE).addReg(ARM::CPSR);
+
+ BB->addSuccessor(loopMBB);
+ BB->addSuccessor(exitMBB);
+
+ // exitMBB:
+ // ...
+ BB = exitMBB;
+ return BB;
+}
+
+MachineBasicBlock *
ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB,
DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
const TargetInstrInfo *TII = getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
+ bool isThumb2 = Subtarget->isThumb2();
switch (MI->getOpcode()) {
default:
+ MI->dump();
llvm_unreachable("Unexpected instr type to insert");
+
+ case ARM::ATOMIC_LOAD_ADD_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+ case ARM::ATOMIC_LOAD_ADD_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ADDrr : ARM::ADDrr);
+
+ case ARM::ATOMIC_LOAD_AND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+ case ARM::ATOMIC_LOAD_AND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ANDrr : ARM::ANDrr);
+
+ case ARM::ATOMIC_LOAD_OR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+ case ARM::ATOMIC_LOAD_OR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2ORRrr : ARM::ORRrr);
+
+ case ARM::ATOMIC_LOAD_XOR_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+ case ARM::ATOMIC_LOAD_XOR_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2EORrr : ARM::EORrr);
+
+ case ARM::ATOMIC_LOAD_NAND_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+ case ARM::ATOMIC_LOAD_NAND_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2BICrr : ARM::BICrr);
+
+ case ARM::ATOMIC_LOAD_SUB_I8:
+ return EmitAtomicBinary(MI, BB, 1, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I16:
+ return EmitAtomicBinary(MI, BB, 2, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+ case ARM::ATOMIC_LOAD_SUB_I32:
+ return EmitAtomicBinary(MI, BB, 4, isThumb2 ? ARM::t2SUBrr : ARM::SUBrr);
+
+ case ARM::ATOMIC_SWAP_I8: return EmitAtomicBinary(MI, BB, 1, 0);
+ case ARM::ATOMIC_SWAP_I16: return EmitAtomicBinary(MI, BB, 2, 0);
+ case ARM::ATOMIC_SWAP_I32: return EmitAtomicBinary(MI, BB, 4, 0);
+
+ case ARM::ATOMIC_CMP_SWAP_I8: return EmitAtomicCmpSwap(MI, BB, 1);
+ case ARM::ATOMIC_CMP_SWAP_I16: return EmitAtomicCmpSwap(MI, BB, 2);
+ case ARM::ATOMIC_CMP_SWAP_I32: return EmitAtomicCmpSwap(MI, BB, 4);
+
case ARM::tMOVCCr_pseudo: {
// To "insert" a SELECT_CC instruction, we actually have to insert the
// diamond control-flow pattern. The incoming instruction knows the
@@ -3935,6 +4267,8 @@ ARMTargetLowering::getRegForInlineAsmConstraint(const std::string &Constraint,
return std::make_pair(0U, ARM::SPRRegisterClass);
if (VT == MVT::f64)
return std::make_pair(0U, ARM::DPRRegisterClass);
+ if (VT.getSizeInBits() == 128)
+ return std::make_pair(0U, ARM::QPRRegisterClass);
break;
}
}
@@ -3973,6 +4307,9 @@ getRegClassForInlineAsmConstraint(const std::string &Constraint,
ARM::D4, ARM::D5, ARM::D6, ARM::D7,
ARM::D8, ARM::D9, ARM::D10,ARM::D11,
ARM::D12,ARM::D13,ARM::D14,ARM::D15, 0);
+ if (VT.getSizeInBits() == 128)
+ return make_vector<unsigned>(ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3,
+ ARM::Q4, ARM::Q5, ARM::Q6, ARM::Q7, 0);
break;
}
diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h
index 4f31f8a..e1b3348 100644
--- a/lib/Target/ARM/ARMISelLowering.h
+++ b/lib/Target/ARM/ARMISelLowering.h
@@ -72,6 +72,9 @@ namespace llvm {
DYN_ALLOC, // Dynamic allocation on the stack.
+ MEMBARRIER, // Memory barrier
+ SYNCBARRIER, // Memory sync barrier
+
VCEQ, // Vector compare equal.
VCGE, // Vector compare greater than or equal.
VCGEU, // Vector compare unsigned greater than or equal.
@@ -328,6 +331,15 @@ namespace llvm {
SDValue getARMCmp(SDValue LHS, SDValue RHS, ISD::CondCode CC,
SDValue &ARMCC, SelectionDAG &DAG, DebugLoc dl);
+
+ MachineBasicBlock *EmitAtomicCmpSwap(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size) const;
+ MachineBasicBlock *EmitAtomicBinary(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ unsigned Size,
+ unsigned BinOpcode) const;
+
};
}
diff --git a/lib/Target/ARM/ARMInstrFormats.td b/lib/Target/ARM/ARMInstrFormats.td
index e76e93c..cf0edff 100644
--- a/lib/Target/ARM/ARMInstrFormats.td
+++ b/lib/Target/ARM/ARMInstrFormats.td
@@ -33,6 +33,8 @@ def LdMiscFrm : Format<8>;
def StMiscFrm : Format<9>;
def LdStMulFrm : Format<10>;
+def LdStExFrm : Format<28>;
+
def ArithMiscFrm : Format<11>;
def ExtFrm : Format<12>;
@@ -199,6 +201,19 @@ class I<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
let Pattern = pattern;
list<Predicate> Predicates = [IsARM];
}
+// A few are not predicable
+class InoP<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ IndexMode im, Format f, InstrItinClass itin,
+ string opc, string asm, string cstr,
+ list<dag> pattern>
+ : InstARM<am, sz, im, f, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = !strconcat(opc, asm);
+ let Pattern = pattern;
+ let isPredicable = 0;
+ list<Predicate> Predicates = [IsARM];
+}
// Same as I except it can optionally modify CPSR. Note it's modeled as
// an input operand since by default it's a zero register. It will
@@ -239,6 +254,10 @@ class AXI<dag oops, dag iops, Format f, InstrItinClass itin,
string asm, list<dag> pattern>
: XI<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
asm, "", pattern>;
+class AInoP<dag oops, dag iops, Format f, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : InoP<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, f, itin,
+ opc, asm, "", pattern>;
// Ctrl flow instructions
class ABI<bits<4> opcod, dag oops, dag iops, InstrItinClass itin,
@@ -264,6 +283,28 @@ class JTI<dag oops, dag iops, InstrItinClass itin,
: XI<oops, iops, AddrModeNone, SizeSpecial, IndexModeNone, BrMiscFrm, itin,
asm, "", pattern>;
+
+// Atomic load/store instructions
+
+class AIldrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 1;
+ let Inst{11-0} = 0b111110011111;
+}
+class AIstrex<bits<2> opcod, dag oops, dag iops, InstrItinClass itin,
+ string opc, string asm, list<dag> pattern>
+ : I<oops, iops, AddrModeNone, Size4Bytes, IndexModeNone, LdStExFrm, itin,
+ opc, asm, "", pattern> {
+ let Inst{27-23} = 0b00011;
+ let Inst{22-21} = opcod;
+ let Inst{20} = 0;
+ let Inst{11-4} = 0b11111001;
+}
+
// addrmode1 instructions
class AI1<bits<4> opcod, dag oops, dag iops, Format f, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
@@ -967,6 +1008,17 @@ class Thumb2XI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
list<Predicate> Predicates = [IsThumb2];
}
+class ThumbXI<dag oops, dag iops, AddrMode am, SizeFlagVal sz,
+ InstrItinClass itin,
+ string asm, string cstr, list<dag> pattern>
+ : InstARM<am, sz, IndexModeNone, ThumbFrm, GenericDomain, cstr, itin> {
+ let OutOperandList = oops;
+ let InOperandList = iops;
+ let AsmString = asm;
+ let Pattern = pattern;
+ list<Predicate> Predicates = [IsThumb1Only];
+}
+
class T2I<dag oops, dag iops, InstrItinClass itin,
string opc, string asm, list<dag> pattern>
: Thumb2I<oops, iops, AddrModeNone, Size4Bytes, itin, opc, asm, "", pattern>;
diff --git a/lib/Target/ARM/ARMInstrInfo.cpp b/lib/Target/ARM/ARMInstrInfo.cpp
index 87bb12b..85f6b40 100644
--- a/lib/Target/ARM/ARMInstrInfo.cpp
+++ b/lib/Target/ARM/ARMInstrInfo.cpp
@@ -60,25 +60,6 @@ unsigned ARMInstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-bool ARMInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case ARM::BX_RET: // Return.
- case ARM::LDM_RET:
- case ARM::B:
- case ARM::BRIND:
- case ARM::BR_JTr: // Jumptable branch.
- case ARM::BR_JTm: // Jumptable branch through mem.
- case ARM::BR_JTadd: // Jumptable branch add to pc.
- return true;
- default:
- break;
- }
-
- return false;
-}
-
void ARMInstrInfo::
reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SubIdx, const MachineInstr *Orig,
diff --git a/lib/Target/ARM/ARMInstrInfo.h b/lib/Target/ARM/ARMInstrInfo.h
index 4319577..d4199d1 100644
--- a/lib/Target/ARM/ARMInstrInfo.h
+++ b/lib/Target/ARM/ARMInstrInfo.h
@@ -32,9 +32,6 @@ public:
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
- // Return true if the block does not fall through.
- bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
void reMaterialize(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI,
unsigned DestReg, unsigned SubIdx,
const MachineInstr *Orig,
diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td
index 7516d3c..e14696a 100644
--- a/lib/Target/ARM/ARMInstrInfo.td
+++ b/lib/Target/ARM/ARMInstrInfo.td
@@ -46,6 +46,11 @@ def SDT_ARMPICAdd : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>,
def SDT_ARMThreadPointer : SDTypeProfile<1, 0, [SDTCisPtrTy<0>]>;
def SDT_ARMEH_SJLJ_Setjmp : SDTypeProfile<1, 1, [SDTCisInt<0>, SDTCisPtrTy<1>]>;
+def SDT_ARMMEMBARRIERV7 : SDTypeProfile<0, 0, []>;
+def SDT_ARMSYNCBARRIERV7 : SDTypeProfile<0, 0, []>;
+def SDT_ARMMEMBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+def SDT_ARMSYNCBARRIERV6 : SDTypeProfile<0, 1, [SDTCisInt<0>]>;
+
// Node definitions.
def ARMWrapper : SDNode<"ARMISD::Wrapper", SDTIntUnaryOp>;
def ARMWrapperJT : SDNode<"ARMISD::WrapperJT", SDTIntBinOp>;
@@ -93,6 +98,15 @@ def ARMrrx : SDNode<"ARMISD::RRX" , SDTIntUnaryOp, [SDNPInFlag ]>;
def ARMthread_pointer: SDNode<"ARMISD::THREAD_POINTER", SDT_ARMThreadPointer>;
def ARMeh_sjlj_setjmp: SDNode<"ARMISD::EH_SJLJ_SETJMP", SDT_ARMEH_SJLJ_Setjmp>;
+def ARMMemBarrierV7 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV7,
+ [SDNPHasChain]>;
+def ARMSyncBarrierV7 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV7,
+ [SDNPHasChain]>;
+def ARMMemBarrierV6 : SDNode<"ARMISD::MEMBARRIER", SDT_ARMMEMBARRIERV6,
+ [SDNPHasChain]>;
+def ARMSyncBarrierV6 : SDNode<"ARMISD::SYNCBARRIER", SDT_ARMMEMBARRIERV6,
+ [SDNPHasChain]>;
+
//===----------------------------------------------------------------------===//
// ARM Instruction Predicate Definitions.
//
@@ -772,6 +786,7 @@ let isBranch = 1, isTerminator = 1 in {
def BR_JTr : JTI<(outs), (ins GPR:$target, jtblock_operand:$jt, i32imm:$id),
IIC_Br, "mov\tpc, $target \n$jt",
[(ARMbrjt GPR:$target, tjumptable:$jt, imm:$id)]> {
+ let Inst{11-4} = 0b00000000;
let Inst{15-12} = 0b1111;
let Inst{20} = 0; // S Bit
let Inst{24-21} = 0b1101;
@@ -1561,6 +1576,189 @@ def MOVCCi : AI1<0b1101, (outs GPR:$dst),
let Inst{25} = 1;
}
+//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def Int_MemBarrierV7 : AInoP<(outs), (ins),
+ Pseudo, NoItinerary,
+ "dmb", "",
+ [(ARMMemBarrierV7)]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-4} = 0xf57ff05;
+ // FIXME: add support for options other than a full system DMB
+ let Inst{3-0} = 0b1111;
+}
+
+def Int_SyncBarrierV7 : AInoP<(outs), (ins),
+ Pseudo, NoItinerary,
+ "dsb", "",
+ [(ARMSyncBarrierV7)]>,
+ Requires<[IsARM, HasV7]> {
+ let Inst{31-4} = 0xf57ff04;
+ // FIXME: add support for options other than a full system DSB
+ let Inst{3-0} = 0b1111;
+}
+
+def Int_MemBarrierV6 : AInoP<(outs), (ins GPR:$zero),
+ Pseudo, NoItinerary,
+ "mcr", "\tp15, 0, $zero, c7, c10, 5",
+ [(ARMMemBarrierV6 GPR:$zero)]>,
+ Requires<[IsARM, HasV6]> {
+ // FIXME: add support for options other than a full system DMB
+ // FIXME: add encoding
+}
+
+def Int_SyncBarrierV6 : AInoP<(outs), (ins GPR:$zero),
+ Pseudo, NoItinerary,
+ "mcr", "\tp15, 0, $zero, c7, c10, 4",
+ [(ARMSyncBarrierV6 GPR:$zero)]>,
+ Requires<[IsARM, HasV6]> {
+ // FIXME: add support for options other than a full system DSB
+ // FIXME: add encoding
+}
+}
+
+let usesCustomInserter = 1 in {
+ let Uses = [CPSR] in {
+ def ATOMIC_LOAD_ADD_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_ADD_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_add_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_SUB_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_sub_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_AND_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_and_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_OR_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_or_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_XOR_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_xor_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_NAND_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_nand_8 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_ADD_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_ADD_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_add_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_SUB_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_sub_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_AND_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_and_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_OR_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_or_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_XOR_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_xor_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_NAND_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_nand_16 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_ADD_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_ADD_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_add_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_SUB_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_SUB_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_sub_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_AND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_AND_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_and_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_OR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_OR_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_or_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_XOR_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_XOR_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_xor_32 GPR:$ptr, GPR:$incr))]>;
+ def ATOMIC_LOAD_NAND_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$incr), NoItinerary,
+ "${:comment} ATOMIC_LOAD_NAND_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_load_nand_32 GPR:$ptr, GPR:$incr))]>;
+
+ def ATOMIC_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_SWAP_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_swap_8 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_SWAP_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_swap_16 GPR:$ptr, GPR:$new))]>;
+ def ATOMIC_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_SWAP_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_swap_32 GPR:$ptr, GPR:$new))]>;
+
+ def ATOMIC_CMP_SWAP_I8 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_CMP_SWAP_I8 PSEUDO!",
+ [(set GPR:$dst, (atomic_cmp_swap_8 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I16 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_CMP_SWAP_I16 PSEUDO!",
+ [(set GPR:$dst, (atomic_cmp_swap_16 GPR:$ptr, GPR:$old, GPR:$new))]>;
+ def ATOMIC_CMP_SWAP_I32 : PseudoInst<
+ (outs GPR:$dst), (ins GPR:$ptr, GPR:$old, GPR:$new), NoItinerary,
+ "${:comment} ATOMIC_CMP_SWAP_I32 PSEUDO!",
+ [(set GPR:$dst, (atomic_cmp_swap_32 GPR:$ptr, GPR:$old, GPR:$new))]>;
+}
+}
+
+let mayLoad = 1 in {
+def LDREXB : AIldrex<0b10, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
+ "ldrexb", "\t$dest, [$ptr]",
+ []>;
+def LDREXH : AIldrex<0b11, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
+ "ldrexh", "\t$dest, [$ptr]",
+ []>;
+def LDREX : AIldrex<0b00, (outs GPR:$dest), (ins GPR:$ptr), NoItinerary,
+ "ldrex", "\t$dest, [$ptr]",
+ []>;
+def LDREXD : AIldrex<0b01, (outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+ NoItinerary,
+ "ldrexd", "\t$dest, $dest2, [$ptr]",
+ []>;
+}
+
+let mayStore = 1 in {
+def STREXB : AIstrex<0b10, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ NoItinerary,
+ "strexb", "\t$success, $src, [$ptr]",
+ []>;
+def STREXH : AIstrex<0b11, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ NoItinerary,
+ "strexh", "\t$success, $src, [$ptr]",
+ []>;
+def STREX : AIstrex<0b00, (outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ NoItinerary,
+ "strex", "\t$success, $src, [$ptr]",
+ []>;
+def STREXD : AIstrex<0b01, (outs GPR:$success),
+ (ins GPR:$src, GPR:$src2, GPR:$ptr),
+ NoItinerary,
+ "strexd", "\t$success, $src, $src2, [$ptr]",
+ []>;
+}
//===----------------------------------------------------------------------===//
// TLS Instructions
diff --git a/lib/Target/ARM/ARMInstrNEON.td b/lib/Target/ARM/ARMInstrNEON.td
index 3166931..61b7705 100644
--- a/lib/Target/ARM/ARMInstrNEON.td
+++ b/lib/Target/ARM/ARMInstrNEON.td
@@ -152,7 +152,7 @@ def VLDRQ : NI4<(outs QPR:$dst), (ins addrmode4:$addr),
let Inst{24} = 0; // P bit
let Inst{23} = 1; // U bit
let Inst{20} = 1;
- let Inst{11-9} = 0b101;
+ let Inst{11-8} = 0b1011;
}
// Use vstmia to store a Q register as a D register pair.
@@ -164,7 +164,7 @@ def VSTRQ : NI4<(outs), (ins QPR:$src, addrmode4:$addr),
let Inst{24} = 0; // P bit
let Inst{23} = 1; // U bit
let Inst{20} = 0;
- let Inst{11-9} = 0b101;
+ let Inst{11-8} = 0b1011;
}
// VLD1 : Vector Load (multiple single elements)
diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td
index b5956a3..9306bdb 100644
--- a/lib/Target/ARM/ARMInstrThumb.td
+++ b/lib/Target/ARM/ARMInstrThumb.td
@@ -669,6 +669,35 @@ let isCall = 1,
[(set R0, ARMthread_pointer)]>;
}
+// SJLJ Exception handling intrinsics
+// eh_sjlj_setjmp() is an instruction sequence to store the return
+// address and save #0 in R0 for the non-longjmp case.
+// Since by its nature we may be coming from some other function to get
+// here, and we're using the stack frame for the containing function to
+// save/restore registers, we can't keep anything live in regs across
+// the eh_sjlj_setjmp(), else it will almost certainly have been tromped upon
+// when we get here from a longjmp(). We force everthing out of registers
+// except for our own input by listing the relevant registers in Defs. By
+// doing so, we also cause the prologue/epilogue code to actively preserve
+// all of the callee-saved resgisters, which is exactly what we want.
+let Defs =
+ [ R0, R1, R2, R3, R4, R5, R6, R7, R12 ] in {
+ def tInt_eh_sjlj_setjmp : ThumbXI<(outs), (ins GPR:$src),
+ AddrModeNone, SizeSpecial, NoItinerary,
+ "mov\tr12, r1\t@ begin eh.setjmp\n"
+ "\tmov\tr1, sp\n"
+ "\tstr\tr1, [$src, #8]\n"
+ "\tadr\tr1, 0f\n"
+ "\tadds\tr1, #1\n"
+ "\tstr\tr1, [$src, #4]\n"
+ "\tmov\tr1, r12\n"
+ "\tmovs\tr0, #0\n"
+ "\tb\t1f\n"
+ ".align 2\n"
+ "0:\tmovs\tr0, #1\t@ end eh.setjmp\n"
+ "1:", "",
+ [(set R0, (ARMeh_sjlj_setjmp GPR:$src))]>;
+}
//===----------------------------------------------------------------------===//
// Non-Instruction Patterns
//
diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td
index 9489815..949ce73 100644
--- a/lib/Target/ARM/ARMInstrThumb2.td
+++ b/lib/Target/ARM/ARMInstrThumb2.td
@@ -1065,6 +1065,68 @@ def t2MOVCCror : T2I<(outs GPR:$dst), (ins GPR:$false, GPR:$true, i32imm:$rhs),
RegConstraint<"$false = $dst">;
//===----------------------------------------------------------------------===//
+// Atomic operations intrinsics
+//
+
+// memory barriers protect the atomic sequences
+let hasSideEffects = 1 in {
+def t2Int_MemBarrierV7 : AInoP<(outs), (ins),
+ Pseudo, NoItinerary,
+ "dmb", "",
+ [(ARMMemBarrierV7)]>,
+ Requires<[IsThumb2]> {
+ // FIXME: add support for options other than a full system DMB
+}
+
+def t2Int_SyncBarrierV7 : AInoP<(outs), (ins),
+ Pseudo, NoItinerary,
+ "dsb", "",
+ [(ARMSyncBarrierV7)]>,
+ Requires<[IsThumb2]> {
+ // FIXME: add support for options other than a full system DSB
+}
+}
+
+let mayLoad = 1 in {
+def t2LDREXB : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+ Size4Bytes, NoItinerary,
+ "ldrexb", "\t$dest, [$ptr]", "",
+ []>;
+def t2LDREXH : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+ Size4Bytes, NoItinerary,
+ "ldrexh", "\t$dest, [$ptr]", "",
+ []>;
+def t2LDREX : Thumb2I<(outs GPR:$dest), (ins GPR:$ptr), AddrModeNone,
+ Size4Bytes, NoItinerary,
+ "ldrex", "\t$dest, [$ptr]", "",
+ []>;
+def t2LDREXD : Thumb2I<(outs GPR:$dest, GPR:$dest2), (ins GPR:$ptr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "ldrexd", "\t$dest, $dest2, [$ptr]", "",
+ []>;
+}
+
+let mayStore = 1 in {
+def t2STREXB : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strexb", "\t$success, $src, [$ptr]", "",
+ []>;
+def t2STREXH : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strexh", "\t$success, $src, [$ptr]", "",
+ []>;
+def t2STREX : Thumb2I<(outs GPR:$success), (ins GPR:$src, GPR:$ptr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strex", "\t$success, $src, [$ptr]", "",
+ []>;
+def t2STREXD : Thumb2I<(outs GPR:$success),
+ (ins GPR:$src, GPR:$src2, GPR:$ptr),
+ AddrModeNone, Size4Bytes, NoItinerary,
+ "strexd", "\t$success, $src, $src2, [$ptr]", "",
+ []>;
+}
+
+//===----------------------------------------------------------------------===//
// TLS Instructions
//
diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
index 304d0ef..22bd80e 100644
--- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
+++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp
@@ -449,7 +449,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
if (MBBI != MBB.end()) {
- MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
if ((Mode == ARM_AM::ia || Mode == ARM_AM::ib) &&
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
MI->getOperand(1).setImm(ARM_AM::getAM4ModeImm(Mode, true));
@@ -494,7 +494,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLSMultiple(MachineBasicBlock &MBB,
}
if (MBBI != MBB.end()) {
- MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
if (Mode == ARM_AM::ia &&
isMatchingIncrement(NextMBBI, Base, Bytes, 0, Pred, PredReg)) {
MI->getOperand(1).setImm(ARM_AM::getAM5Opc(ARM_AM::ia, true, Offset));
@@ -604,7 +604,7 @@ bool ARMLoadStoreOpt::MergeBaseUpdateLoadStore(MachineBasicBlock &MBB,
}
if (!DoMerge && MBBI != MBB.end()) {
- MachineBasicBlock::iterator NextMBBI = next(MBBI);
+ MachineBasicBlock::iterator NextMBBI = llvm::next(MBBI);
if (!isAM5 &&
isMatchingDecrement(NextMBBI, Base, Bytes, Limit, Pred, PredReg)) {
DoMerge = true;
diff --git a/lib/Target/ARM/ARMTargetMachine.cpp b/lib/Target/ARM/ARMTargetMachine.cpp
index 2564ed9..1c6fca7 100644
--- a/lib/Target/ARM/ARMTargetMachine.cpp
+++ b/lib/Target/ARM/ARMTargetMachine.cpp
@@ -95,7 +95,7 @@ bool ARMBaseTargetMachine::addPreRegAlloc(PassManagerBase &PM,
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- PM.add(createARMMaxStackAlignmentCalculatorPass());
+ PM.add(createMaxStackAlignmentCalculatorPass());
// FIXME: temporarily disabling load / store optimization pass for Thumb1.
if (OptLevel != CodeGenOpt::None && !Subtarget.isThumb1Only())
diff --git a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
index 692bb19..362bbf1 100644
--- a/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
+++ b/lib/Target/ARM/AsmPrinter/ARMAsmPrinter.cpp
@@ -1045,6 +1045,7 @@ bool ARMAsmPrinter::PrintAsmOperand(const MachineInstr *MI, unsigned OpNum,
printNoHashImmediate(MI, OpNum);
return false;
case 'P': // Print a VFP double precision register.
+ case 'q': // Print a NEON quad precision register.
printOperand(MI, OpNum);
return false;
case 'Q':
diff --git a/lib/Target/ARM/NEONMoveFix.cpp b/lib/Target/ARM/NEONMoveFix.cpp
index 50abcf4..3c0414d 100644
--- a/lib/Target/ARM/NEONMoveFix.cpp
+++ b/lib/Target/ARM/NEONMoveFix.cpp
@@ -51,7 +51,7 @@ bool NEONMoveFixPass::InsertMoves(MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
MachineBasicBlock::iterator NextMII;
for (; MII != E; MII = NextMII) {
- NextMII = next(MII);
+ NextMII = llvm::next(MII);
MachineInstr *MI = &*MII;
if (MI->getOpcode() == ARM::VMOVD &&
diff --git a/lib/Target/ARM/NEONPreAllocPass.cpp b/lib/Target/ARM/NEONPreAllocPass.cpp
index 206677b..d9942c8 100644
--- a/lib/Target/ARM/NEONPreAllocPass.cpp
+++ b/lib/Target/ARM/NEONPreAllocPass.cpp
@@ -338,7 +338,7 @@ bool NEONPreAllocPass::PreAllocNEONRegisters(MachineBasicBlock &MBB) {
if (!isNEONMultiRegOp(MI->getOpcode(), FirstOpnd, NumRegs, Offset, Stride))
continue;
- MachineBasicBlock::iterator NextI = next(MBBI);
+ MachineBasicBlock::iterator NextI = llvm::next(MBBI);
for (unsigned R = 0; R < NumRegs; ++R) {
MachineOperand &MO = MI->getOperand(FirstOpnd + R);
assert(MO.isReg() && MO.getSubReg() == 0 && "unexpected operand");
diff --git a/lib/Target/ARM/Thumb1InstrInfo.cpp b/lib/Target/ARM/Thumb1InstrInfo.cpp
index 7602b6d..66d3b83 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb1InstrInfo.cpp
@@ -32,25 +32,6 @@ unsigned Thumb1InstrInfo::getUnindexedOpcode(unsigned Opc) const {
return 0;
}
-bool
-Thumb1InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case ARM::tBX_RET:
- case ARM::tBX_RET_vararg:
- case ARM::tPOP_RET:
- case ARM::tB:
- case ARM::tBRIND:
- case ARM::tBR_JTr:
- return true;
- default:
- break;
- }
-
- return false;
-}
-
bool Thumb1InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/ARM/Thumb1InstrInfo.h b/lib/Target/ARM/Thumb1InstrInfo.h
index b28229d..516ddf1 100644
--- a/lib/Target/ARM/Thumb1InstrInfo.h
+++ b/lib/Target/ARM/Thumb1InstrInfo.h
@@ -31,9 +31,6 @@ public:
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
- // Return true if the block does not fall through.
- bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
/// getRegisterInfo - TargetInstrInfo is a superset of MRegister info. As
/// such, whenever a client has an instance of instruction info, it should
/// always be able to get register info as well (through this method).
diff --git a/lib/Target/ARM/Thumb1RegisterInfo.cpp b/lib/Target/ARM/Thumb1RegisterInfo.cpp
index 37adf37..9f3816a 100644
--- a/lib/Target/ARM/Thumb1RegisterInfo.cpp
+++ b/lib/Target/ARM/Thumb1RegisterInfo.cpp
@@ -528,7 +528,7 @@ Thumb1RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
MI.getOperand(i+1).ChangeToImmediate(Mask);
}
Offset = (Offset - Mask * Scale);
- MachineBasicBlock::iterator NII = next(II);
+ MachineBasicBlock::iterator NII = llvm::next(II);
emitThumbRegPlusImmediate(MBB, NII, DestReg, DestReg, Offset, TII,
*this, dl);
} else {
diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp
index 16c1e6f..f4a8c27 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.cpp
+++ b/lib/Target/ARM/Thumb2InstrInfo.cpp
@@ -36,30 +36,6 @@ unsigned Thumb2InstrInfo::getUnindexedOpcode(unsigned Opc) const {
}
bool
-Thumb2InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case ARM::t2LDM_RET:
- case ARM::t2B: // Uncond branch.
- case ARM::t2BR_JT: // Jumptable branch.
- case ARM::t2TBB: // Table branch byte.
- case ARM::t2TBH: // Table branch halfword.
- case ARM::tBR_JTr: // Jumptable branch (16-bit version).
- case ARM::tBX_RET:
- case ARM::tBX_RET_vararg:
- case ARM::tPOP_RET:
- case ARM::tB:
- case ARM::tBRIND:
- return true;
- default:
- break;
- }
-
- return false;
-}
-
-bool
Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/ARM/Thumb2InstrInfo.h b/lib/Target/ARM/Thumb2InstrInfo.h
index 663a60b..a0f89a6 100644
--- a/lib/Target/ARM/Thumb2InstrInfo.h
+++ b/lib/Target/ARM/Thumb2InstrInfo.h
@@ -31,9 +31,6 @@ public:
// if there is not such an opcode.
unsigned getUnindexedOpcode(unsigned Opc) const;
- // Return true if the block does not fall through.
- bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
bool copyRegToReg(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I,
unsigned DestReg, unsigned SrcReg,
diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp
index b2fd7b3..35359aa 100644
--- a/lib/Target/ARM/Thumb2SizeReduction.cpp
+++ b/lib/Target/ARM/Thumb2SizeReduction.cpp
@@ -649,7 +649,7 @@ bool Thumb2SizeReduce::ReduceMBB(MachineBasicBlock &MBB) {
MachineBasicBlock::iterator MII = MBB.begin(), E = MBB.end();
MachineBasicBlock::iterator NextMII;
for (; MII != E; MII = NextMII) {
- NextMII = next(MII);
+ NextMII = llvm::next(MII);
MachineInstr *MI = &*MII;
LiveCPSR = UpdateCPSRUse(*MI, LiveCPSR);
diff --git a/lib/Target/Alpha/AlphaInstrInfo.cpp b/lib/Target/Alpha/AlphaInstrInfo.cpp
index 86173ff..39f0749 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.cpp
+++ b/lib/Target/Alpha/AlphaInstrInfo.cpp
@@ -392,18 +392,6 @@ void AlphaInstrInfo::insertNoop(MachineBasicBlock &MBB,
.addReg(Alpha::R31);
}
-bool AlphaInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case Alpha::RETDAG: // Return.
- case Alpha::RETDAGp:
- case Alpha::BR: // Uncond branch.
- case Alpha::JMP: // Indirect branch.
- return true;
- default: return false;
- }
-}
bool AlphaInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid Alpha branch opcode!");
diff --git a/lib/Target/Alpha/AlphaInstrInfo.h b/lib/Target/Alpha/AlphaInstrInfo.h
index 274f452..c3b6044 100644
--- a/lib/Target/Alpha/AlphaInstrInfo.h
+++ b/lib/Target/Alpha/AlphaInstrInfo.h
@@ -78,7 +78,6 @@ public:
unsigned RemoveBranch(MachineBasicBlock &MBB) const;
void insertNoop(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MI) const;
- bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
/// getGlobalBaseReg - Return a virtual register initialized with the
diff --git a/lib/Target/CBackend/CBackend.cpp b/lib/Target/CBackend/CBackend.cpp
index 9e4fe27..a52ca79 100644
--- a/lib/Target/CBackend/CBackend.cpp
+++ b/lib/Target/CBackend/CBackend.cpp
@@ -2573,7 +2573,7 @@ void CWriter::visitSwitchInst(SwitchInst &SI) {
BasicBlock *Succ = cast<BasicBlock>(SI.getOperand(i+1));
printPHICopiesForSuccessor (SI.getParent(), Succ, 2);
printBranchToBlock(SI.getParent(), Succ, 2);
- if (Function::iterator(Succ) == next(Function::iterator(SI.getParent())))
+ if (Function::iterator(Succ) == llvm::next(Function::iterator(SI.getParent())))
Out << " break;\n";
}
Out << " }\n";
@@ -2593,7 +2593,7 @@ bool CWriter::isGotoCodeNecessary(BasicBlock *From, BasicBlock *To) {
/// FIXME: This should be reenabled, but loop reordering safe!!
return true;
- if (next(Function::iterator(From)) != Function::iterator(To))
+ if (llvm::next(Function::iterator(From)) != Function::iterator(To))
return true; // Not the direct successor, we need a goto.
//isa<SwitchInst>(From->getTerminator())
diff --git a/lib/Target/CellSPU/SPUInstrInfo.cpp b/lib/Target/CellSPU/SPUInstrInfo.cpp
index ecce8e3..2306665 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.cpp
+++ b/lib/Target/CellSPU/SPUInstrInfo.cpp
@@ -580,10 +580,6 @@ SPUInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *TBB,
}
}
-bool
-SPUInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- return (!MBB.empty() && isUncondBranch(&MBB.back()));
-}
//! Reverses a branch's condition, returning false on success.
bool
SPUInstrInfo::ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond)
diff --git a/lib/Target/CellSPU/SPUInstrInfo.h b/lib/Target/CellSPU/SPUInstrInfo.h
index c644a11..42677fc 100644
--- a/lib/Target/CellSPU/SPUInstrInfo.h
+++ b/lib/Target/CellSPU/SPUInstrInfo.h
@@ -79,9 +79,6 @@ namespace llvm {
bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const;
- //! Return true if the specified block does not fall through
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
//! Reverses a branch's condition, returning false on success.
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/CppBackend/CPPBackend.cpp b/lib/Target/CppBackend/CPPBackend.cpp
index 4bae6c7..a872fbd 100644
--- a/lib/Target/CppBackend/CPPBackend.cpp
+++ b/lib/Target/CppBackend/CPPBackend.cpp
@@ -976,21 +976,20 @@ namespace {
nl(Out);
printType(GV->getType());
if (GV->hasInitializer()) {
- Constant* Init = GV->getInitializer();
+ Constant *Init = GV->getInitializer();
printType(Init->getType());
- if (Function* F = dyn_cast<Function>(Init)) {
+ if (Function *F = dyn_cast<Function>(Init)) {
nl(Out)<< "/ Function Declarations"; nl(Out);
printFunctionHead(F);
} else if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
nl(Out) << "// Global Variable Declarations"; nl(Out);
printVariableHead(gv);
- } else {
- nl(Out) << "// Constant Definitions"; nl(Out);
- printConstant(gv);
- }
- if (GlobalVariable* gv = dyn_cast<GlobalVariable>(Init)) {
+
nl(Out) << "// Global Variable Definitions"; nl(Out);
printVariableBody(gv);
+ } else {
+ nl(Out) << "// Constant Definitions"; nl(Out);
+ printConstant(Init);
}
}
}
diff --git a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
index beccb2c..4edf422 100644
--- a/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
+++ b/lib/Target/MSP430/MSP430ISelDAGToDAG.cpp
@@ -86,10 +86,10 @@ namespace {
void dump() {
errs() << "MSP430ISelAddressMode " << this << '\n';
- if (Base.Reg.getNode() != 0) {
+ if (BaseType == RegBase && Base.Reg.getNode() != 0) {
errs() << "Base.Reg ";
Base.Reg.getNode()->dump();
- } else {
+ } else if (BaseType == FrameIndexBase) {
errs() << " Base.FrameIndex " << Base.FrameIndex << '\n';
}
errs() << " Disp " << Disp << '\n';
diff --git a/lib/Target/MSP430/MSP430ISelLowering.cpp b/lib/Target/MSP430/MSP430ISelLowering.cpp
index 29cc370..5fe9b20 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.cpp
+++ b/lib/Target/MSP430/MSP430ISelLowering.cpp
@@ -15,6 +15,7 @@
#include "MSP430ISelLowering.h"
#include "MSP430.h"
+#include "MSP430MachineFunctionInfo.h"
#include "MSP430TargetMachine.h"
#include "MSP430Subtarget.h"
#include "llvm/DerivedTypes.h"
@@ -32,16 +33,38 @@
#include "llvm/CodeGen/SelectionDAGISel.h"
#include "llvm/CodeGen/ValueTypes.h"
#include "llvm/Target/TargetLoweringObjectFile.h"
+#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/ADT/VectorExtras.h"
using namespace llvm;
+typedef enum {
+ NoHWMult,
+ HWMultIntr,
+ HWMultNoIntr
+} HWMultUseMode;
+
+static cl::opt<HWMultUseMode>
+HWMultMode("msp430-hwmult-mode",
+ cl::desc("Hardware multiplier use mode"),
+ cl::init(HWMultNoIntr),
+ cl::values(
+ clEnumValN(NoHWMult, "no",
+ "Do not use hardware multiplier"),
+ clEnumValN(HWMultIntr, "interrupts",
+ "Assume hardware multiplier can be used inside interrupts"),
+ clEnumValN(HWMultNoIntr, "use",
+ "Assume hardware multiplier cannot be used inside interrupts"),
+ clEnumValEnd));
+
MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
TargetLowering(tm, new TargetLoweringObjectFileELF()),
Subtarget(*tm.getSubtargetImpl()), TM(tm) {
+ TD = getTargetData();
+
// Set up the register classes.
addRegisterClass(MVT::i8, MSP430::GR8RegisterClass);
addRegisterClass(MVT::i16, MSP430::GR16RegisterClass);
@@ -92,8 +115,8 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setOperationAction(ISD::BR_CC, MVT::i8, Custom);
setOperationAction(ISD::BR_CC, MVT::i16, Custom);
setOperationAction(ISD::BRCOND, MVT::Other, Expand);
- setOperationAction(ISD::SETCC, MVT::i8, Expand);
- setOperationAction(ISD::SETCC, MVT::i16, Expand);
+ setOperationAction(ISD::SETCC, MVT::i8, Custom);
+ setOperationAction(ISD::SETCC, MVT::i16, Custom);
setOperationAction(ISD::SELECT, MVT::i8, Expand);
setOperationAction(ISD::SELECT, MVT::i16, Expand);
setOperationAction(ISD::SELECT_CC, MVT::i8, Custom);
@@ -142,6 +165,15 @@ MSP430TargetLowering::MSP430TargetLowering(MSP430TargetMachine &tm) :
setOperationAction(ISD::SDIV, MVT::i16, Expand);
setOperationAction(ISD::SDIVREM, MVT::i16, Expand);
setOperationAction(ISD::SREM, MVT::i16, Expand);
+
+ // Libcalls names.
+ if (HWMultMode == HWMultIntr) {
+ setLibcallName(RTLIB::MUL_I8, "__mulqi3hw");
+ setLibcallName(RTLIB::MUL_I16, "__mulhi3hw");
+ } else if (HWMultMode == HWMultNoIntr) {
+ setLibcallName(RTLIB::MUL_I8, "__mulqi3hw_noint");
+ setLibcallName(RTLIB::MUL_I16, "__mulhi3hw_noint");
+ }
}
SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
@@ -151,9 +183,12 @@ SDValue MSP430TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) {
case ISD::SRA: return LowerShifts(Op, DAG);
case ISD::GlobalAddress: return LowerGlobalAddress(Op, DAG);
case ISD::ExternalSymbol: return LowerExternalSymbol(Op, DAG);
+ case ISD::SETCC: return LowerSETCC(Op, DAG);
case ISD::BR_CC: return LowerBR_CC(Op, DAG);
case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG);
case ISD::SIGN_EXTEND: return LowerSIGN_EXTEND(Op, DAG);
+ case ISD::RETURNADDR: return LowerRETURNADDR(Op, DAG);
+ case ISD::FRAMEADDR: return LowerFRAMEADDR(Op, DAG);
default:
llvm_unreachable("unimplemented operand");
return SDValue();
@@ -225,6 +260,13 @@ MSP430TargetLowering::LowerFormalArguments(SDValue Chain,
case CallingConv::C:
case CallingConv::Fast:
return LowerCCCArguments(Chain, CallConv, isVarArg, Ins, dl, DAG, InVals);
+ case CallingConv::MSP430_INTR:
+ if (Ins.empty())
+ return Chain;
+ else {
+ llvm_report_error("ISRs cannot have arguments");
+ return SDValue();
+ }
}
}
@@ -244,6 +286,9 @@ MSP430TargetLowering::LowerCall(SDValue Chain, SDValue Callee,
case CallingConv::C:
return LowerCCCCallTo(Chain, Callee, CallConv, isVarArg, isTailCall,
Outs, Ins, dl, DAG, InVals);
+ case CallingConv::MSP430_INTR:
+ llvm_report_error("ISRs cannot be called directly");
+ return SDValue();
}
}
@@ -340,6 +385,12 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
// CCValAssign - represent the assignment of the return value to a location
SmallVector<CCValAssign, 16> RVLocs;
+ // ISRs cannot return any value.
+ if (CallConv == CallingConv::MSP430_INTR && !Outs.empty()) {
+ llvm_report_error("ISRs cannot return any value");
+ return SDValue();
+ }
+
// CCState - Info about the registers and stack slot.
CCState CCInfo(CallConv, isVarArg, getTargetMachine(),
RVLocs, *DAG.getContext());
@@ -370,11 +421,14 @@ MSP430TargetLowering::LowerReturn(SDValue Chain,
Flag = Chain.getValue(1);
}
+ unsigned Opc = (CallConv == CallingConv::MSP430_INTR ?
+ MSP430ISD::RETI_FLAG : MSP430ISD::RET_FLAG);
+
if (Flag.getNode())
- return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain, Flag);
+ return DAG.getNode(Opc, dl, MVT::Other, Chain, Flag);
// Return Void
- return DAG.getNode(MSP430ISD::RET_FLAG, dl, MVT::Other, Chain);
+ return DAG.getNode(Opc, dl, MVT::Other, Chain);
}
/// LowerCCCCallTo - functions arguments are copied from virtual regs to
@@ -538,9 +592,21 @@ SDValue MSP430TargetLowering::LowerShifts(SDValue Op,
EVT VT = Op.getValueType();
DebugLoc dl = N->getDebugLoc();
- // We currently only lower shifts of constant argument.
+ // Expand non-constant shifts to loops:
if (!isa<ConstantSDNode>(N->getOperand(1)))
- return SDValue();
+ switch (Opc) {
+ default:
+ assert(0 && "Invalid shift opcode!");
+ case ISD::SHL:
+ return DAG.getNode(MSP430ISD::SHL, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ case ISD::SRA:
+ return DAG.getNode(MSP430ISD::SRA, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ case ISD::SRL:
+ return DAG.getNode(MSP430ISD::SRL, dl,
+ VT, N->getOperand(0), N->getOperand(1));
+ }
uint64_t ShiftAmount = cast<ConstantSDNode>(N->getOperand(1))->getZExtValue();
@@ -648,6 +714,88 @@ SDValue MSP430TargetLowering::LowerBR_CC(SDValue Op, SelectionDAG &DAG) {
Chain, Dest, TargetCC, Flag);
}
+
+SDValue MSP430TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
+ SDValue LHS = Op.getOperand(0);
+ SDValue RHS = Op.getOperand(1);
+ DebugLoc dl = Op.getDebugLoc();
+
+ // If we are doing an AND and testing against zero, then the CMP
+ // will not be generated. The AND (or BIT) will generate the condition codes,
+ // but they are different from CMP.
+ bool andCC = false;
+ if (ConstantSDNode *RHSC = dyn_cast<ConstantSDNode>(RHS)) {
+ if (RHSC->isNullValue() && LHS.hasOneUse() &&
+ (LHS.getOpcode() == ISD::AND ||
+ (LHS.getOpcode() == ISD::TRUNCATE &&
+ LHS.getOperand(0).getOpcode() == ISD::AND))) {
+ andCC = true;
+ }
+ }
+ ISD::CondCode CC = cast<CondCodeSDNode>(Op.getOperand(2))->get();
+ SDValue TargetCC;
+ SDValue Flag = EmitCMP(LHS, RHS, TargetCC, CC, dl, DAG);
+
+ // Get the condition codes directly from the status register, if its easy.
+ // Otherwise a branch will be generated. Note that the AND and BIT
+ // instructions generate different flags than CMP, the carry bit can be used
+ // for NE/EQ.
+ bool Invert = false;
+ bool Shift = false;
+ bool Convert = true;
+ switch (cast<ConstantSDNode>(TargetCC)->getZExtValue()) {
+ default:
+ Convert = false;
+ break;
+ case MSP430CC::COND_HS:
+ // Res = SRW & 1, no processing is required
+ break;
+ case MSP430CC::COND_LO:
+ // Res = ~(SRW & 1)
+ Invert = true;
+ break;
+ case MSP430CC::COND_NE:
+ if (andCC) {
+ // C = ~Z, thus Res = SRW & 1, no processing is required
+ } else {
+ // Res = (SRW >> 1) & 1
+ Shift = true;
+ }
+ break;
+ case MSP430CC::COND_E:
+ if (andCC) {
+ // C = ~Z, thus Res = ~(SRW & 1)
+ } else {
+ // Res = ~((SRW >> 1) & 1)
+ Shift = true;
+ }
+ Invert = true;
+ break;
+ }
+ EVT VT = Op.getValueType();
+ SDValue One = DAG.getConstant(1, VT);
+ if (Convert) {
+ SDValue SR = DAG.getCopyFromReg(DAG.getEntryNode(), dl, MSP430::SRW,
+ MVT::i16, Flag);
+ if (Shift)
+ // FIXME: somewhere this is turned into a SRL, lower it MSP specific?
+ SR = DAG.getNode(ISD::SRA, dl, MVT::i16, SR, One);
+ SR = DAG.getNode(ISD::AND, dl, MVT::i16, SR, One);
+ if (Invert)
+ SR = DAG.getNode(ISD::XOR, dl, MVT::i16, SR, One);
+ return SR;
+ } else {
+ SDValue Zero = DAG.getConstant(0, VT);
+ SDVTList VTs = DAG.getVTList(Op.getValueType(), MVT::Flag);
+ SmallVector<SDValue, 4> Ops;
+ Ops.push_back(One);
+ Ops.push_back(Zero);
+ Ops.push_back(TargetCC);
+ Ops.push_back(Flag);
+ return DAG.getNode(MSP430ISD::SELECT_CC, dl, VTs, &Ops[0], Ops.size());
+ }
+}
+
SDValue MSP430TargetLowering::LowerSELECT_CC(SDValue Op, SelectionDAG &DAG) {
SDValue LHS = Op.getOperand(0);
SDValue RHS = Op.getOperand(1);
@@ -682,6 +830,55 @@ SDValue MSP430TargetLowering::LowerSIGN_EXTEND(SDValue Op,
DAG.getValueType(Val.getValueType()));
}
+SDValue MSP430TargetLowering::getReturnAddressFrameIndex(SelectionDAG &DAG) {
+ MachineFunction &MF = DAG.getMachineFunction();
+ MSP430MachineFunctionInfo *FuncInfo = MF.getInfo<MSP430MachineFunctionInfo>();
+ int ReturnAddrIndex = FuncInfo->getRAIndex();
+
+ if (ReturnAddrIndex == 0) {
+ // Set up a frame object for the return address.
+ uint64_t SlotSize = TD->getPointerSize();
+ ReturnAddrIndex = MF.getFrameInfo()->CreateFixedObject(SlotSize, -SlotSize,
+ true, false);
+ FuncInfo->setRAIndex(ReturnAddrIndex);
+ }
+
+ return DAG.getFrameIndex(ReturnAddrIndex, getPointerTy());
+}
+
+SDValue MSP430TargetLowering::LowerRETURNADDR(SDValue Op, SelectionDAG &DAG) {
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ DebugLoc dl = Op.getDebugLoc();
+
+ if (Depth > 0) {
+ SDValue FrameAddr = LowerFRAMEADDR(Op, DAG);
+ SDValue Offset =
+ DAG.getConstant(TD->getPointerSize(), MVT::i16);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ DAG.getNode(ISD::ADD, dl, getPointerTy(),
+ FrameAddr, Offset),
+ NULL, 0);
+ }
+
+ // Just load the return address.
+ SDValue RetAddrFI = getReturnAddressFrameIndex(DAG);
+ return DAG.getLoad(getPointerTy(), dl, DAG.getEntryNode(),
+ RetAddrFI, NULL, 0);
+}
+
+SDValue MSP430TargetLowering::LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ MFI->setFrameAddressIsTaken(true);
+ EVT VT = Op.getValueType();
+ DebugLoc dl = Op.getDebugLoc(); // FIXME probably not meaningful
+ unsigned Depth = cast<ConstantSDNode>(Op.getOperand(0))->getZExtValue();
+ SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), dl,
+ MSP430::FPW, VT);
+ while (Depth--)
+ FrameAddr = DAG.getLoad(VT, dl, DAG.getEntryNode(), FrameAddr, NULL, 0);
+ return FrameAddr;
+}
+
/// getPostIndexedAddressParts - returns true by value, base pointer and
/// offset pointer and addressing mode by reference if this node can be
/// combined with a load / store to form a post-indexed load / store.
@@ -722,6 +919,7 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
switch (Opcode) {
default: return NULL;
case MSP430ISD::RET_FLAG: return "MSP430ISD::RET_FLAG";
+ case MSP430ISD::RETI_FLAG: return "MSP430ISD::RETI_FLAG";
case MSP430ISD::RRA: return "MSP430ISD::RRA";
case MSP430ISD::RLA: return "MSP430ISD::RLA";
case MSP430ISD::RRC: return "MSP430ISD::RRC";
@@ -730,6 +928,8 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
case MSP430ISD::BR_CC: return "MSP430ISD::BR_CC";
case MSP430ISD::CMP: return "MSP430ISD::CMP";
case MSP430ISD::SELECT_CC: return "MSP430ISD::SELECT_CC";
+ case MSP430ISD::SHL: return "MSP430ISD::SHL";
+ case MSP430ISD::SRA: return "MSP430ISD::SRA";
}
}
@@ -738,13 +938,131 @@ const char *MSP430TargetLowering::getTargetNodeName(unsigned Opcode) const {
//===----------------------------------------------------------------------===//
MachineBasicBlock*
+MSP430TargetLowering::EmitShiftInstr(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+ MachineFunction *F = BB->getParent();
+ MachineRegisterInfo &RI = F->getRegInfo();
+ DebugLoc dl = MI->getDebugLoc();
+ const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
+
+ unsigned Opc;
+ const TargetRegisterClass * RC;
+ switch (MI->getOpcode()) {
+ default:
+ assert(0 && "Invalid shift opcode!");
+ case MSP430::Shl8:
+ Opc = MSP430::SHL8r1;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Shl16:
+ Opc = MSP430::SHL16r1;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ case MSP430::Sra8:
+ Opc = MSP430::SAR8r1;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Sra16:
+ Opc = MSP430::SAR16r1;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ case MSP430::Srl8:
+ Opc = MSP430::SAR8r1c;
+ RC = MSP430::GR8RegisterClass;
+ break;
+ case MSP430::Srl16:
+ Opc = MSP430::SAR16r1c;
+ RC = MSP430::GR16RegisterClass;
+ break;
+ }
+
+ const BasicBlock *LLVM_BB = BB->getBasicBlock();
+ MachineFunction::iterator I = BB;
+ ++I;
+
+ // Create loop block
+ MachineBasicBlock *LoopBB = F->CreateMachineBasicBlock(LLVM_BB);
+ MachineBasicBlock *RemBB = F->CreateMachineBasicBlock(LLVM_BB);
+
+ F->insert(I, LoopBB);
+ F->insert(I, RemBB);
+
+ // Update machine-CFG edges by transferring all successors of the current
+ // block to the block containing instructions after shift.
+ RemBB->transferSuccessors(BB);
+
+ // Inform sdisel of the edge changes.
+ for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(),
+ SE = BB->succ_end(); SI != SE; ++SI)
+ EM->insert(std::make_pair(*SI, RemBB));
+
+ // Add adges BB => LoopBB => RemBB, BB => RemBB, LoopBB => LoopBB
+ BB->addSuccessor(LoopBB);
+ BB->addSuccessor(RemBB);
+ LoopBB->addSuccessor(RemBB);
+ LoopBB->addSuccessor(LoopBB);
+
+ unsigned ShiftAmtReg = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+ unsigned ShiftAmtReg2 = RI.createVirtualRegister(MSP430::GR8RegisterClass);
+ unsigned ShiftReg = RI.createVirtualRegister(RC);
+ unsigned ShiftReg2 = RI.createVirtualRegister(RC);
+ unsigned ShiftAmtSrcReg = MI->getOperand(2).getReg();
+ unsigned SrcReg = MI->getOperand(1).getReg();
+ unsigned DstReg = MI->getOperand(0).getReg();
+
+ // BB:
+ // cmp 0, N
+ // je RemBB
+ BuildMI(BB, dl, TII.get(MSP430::CMP8ir))
+ .addImm(0).addReg(ShiftAmtSrcReg);
+ BuildMI(BB, dl, TII.get(MSP430::JCC))
+ .addMBB(RemBB)
+ .addImm(MSP430CC::COND_E);
+
+ // LoopBB:
+ // ShiftReg = phi [%SrcReg, BB], [%ShiftReg2, LoopBB]
+ // ShiftAmt = phi [%N, BB], [%ShiftAmt2, LoopBB]
+ // ShiftReg2 = shift ShiftReg
+ // ShiftAmt2 = ShiftAmt - 1;
+ BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftReg)
+ .addReg(SrcReg).addMBB(BB)
+ .addReg(ShiftReg2).addMBB(LoopBB);
+ BuildMI(LoopBB, dl, TII.get(MSP430::PHI), ShiftAmtReg)
+ .addReg(ShiftAmtSrcReg).addMBB(BB)
+ .addReg(ShiftAmtReg2).addMBB(LoopBB);
+ BuildMI(LoopBB, dl, TII.get(Opc), ShiftReg2)
+ .addReg(ShiftReg);
+ BuildMI(LoopBB, dl, TII.get(MSP430::SUB8ri), ShiftAmtReg2)
+ .addReg(ShiftAmtReg).addImm(1);
+ BuildMI(LoopBB, dl, TII.get(MSP430::JCC))
+ .addMBB(LoopBB)
+ .addImm(MSP430CC::COND_NE);
+
+ // RemBB:
+ // DestReg = phi [%SrcReg, BB], [%ShiftReg, LoopBB]
+ BuildMI(RemBB, dl, TII.get(MSP430::PHI), DstReg)
+ .addReg(SrcReg).addMBB(BB)
+ .addReg(ShiftReg2).addMBB(LoopBB);
+
+ return RemBB;
+}
+
+MachineBasicBlock*
MSP430TargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB,
DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const {
+ unsigned Opc = MI->getOpcode();
+
+ if (Opc == MSP430::Shl8 || Opc == MSP430::Shl16 ||
+ Opc == MSP430::Sra8 || Opc == MSP430::Sra16 ||
+ Opc == MSP430::Srl8 || Opc == MSP430::Srl16)
+ return EmitShiftInstr(MI, BB, EM);
+
const TargetInstrInfo &TII = *getTargetMachine().getInstrInfo();
DebugLoc dl = MI->getDebugLoc();
- assert((MI->getOpcode() == MSP430::Select16 ||
- MI->getOpcode() == MSP430::Select8) &&
+
+ assert((Opc == MSP430::Select16 || Opc == MSP430::Select8) &&
"Unexpected instr type to insert");
// To "insert" a SELECT instruction, we actually have to insert the diamond
diff --git a/lib/Target/MSP430/MSP430ISelLowering.h b/lib/Target/MSP430/MSP430ISelLowering.h
index d413ccb..4921500 100644
--- a/lib/Target/MSP430/MSP430ISelLowering.h
+++ b/lib/Target/MSP430/MSP430ISelLowering.h
@@ -27,6 +27,9 @@ namespace llvm {
/// Return with a flag operand. Operand 0 is the chain operand.
RET_FLAG,
+ /// Same as RET_FLAG, but used for returning from ISRs.
+ RETI_FLAG,
+
/// Y = R{R,L}A X, rotate right (left) arithmetically
RRA, RLA,
@@ -44,7 +47,7 @@ namespace llvm {
/// CMP - Compare instruction.
CMP,
- /// SetCC. Operand 0 is condition code, and operand 1 is the flag
+ /// SetCC - Operand 0 is condition code, and operand 1 is the flag
/// operand produced by a CMP instruction.
SETCC,
@@ -54,9 +57,12 @@ namespace llvm {
/// instruction.
BR_CC,
- /// SELECT_CC. Operand 0 and operand 1 are selection variable, operand 3
+ /// SELECT_CC - Operand 0 and operand 1 are selection variable, operand 3
/// is condition code and operand 4 is flag operand.
- SELECT_CC
+ SELECT_CC,
+
+ /// SHL, SRA, SRL - Non-constant shifts.
+ SHL, SRA, SRL
};
}
@@ -81,8 +87,12 @@ namespace llvm {
SDValue LowerGlobalAddress(SDValue Op, SelectionDAG &DAG);
SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG);
SDValue LowerBR_CC(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerSETCC(SDValue Op, SelectionDAG &DAG);
SDValue LowerSELECT_CC(SDValue Op, SelectionDAG &DAG);
SDValue LowerSIGN_EXTEND(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerRETURNADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG);
+ SDValue getReturnAddressFrameIndex(SelectionDAG &DAG);
TargetLowering::ConstraintType
getConstraintType(const std::string &Constraint) const;
@@ -92,6 +102,9 @@ namespace llvm {
MachineBasicBlock* EmitInstrWithCustomInserter(MachineInstr *MI,
MachineBasicBlock *BB,
DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
+ MachineBasicBlock* EmitShiftInstr(MachineInstr *MI,
+ MachineBasicBlock *BB,
+ DenseMap<MachineBasicBlock*, MachineBasicBlock*> *EM) const;
private:
SDValue LowerCCCCallTo(SDValue Chain, SDValue Callee,
@@ -144,6 +157,7 @@ namespace llvm {
const MSP430Subtarget &Subtarget;
const MSP430TargetMachine &TM;
+ const TargetData *TD;
};
} // namespace llvm
diff --git a/lib/Target/MSP430/MSP430InstrInfo.cpp b/lib/Target/MSP430/MSP430InstrInfo.cpp
index b2f09c7..2ae6759 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.cpp
+++ b/lib/Target/MSP430/MSP430InstrInfo.cpp
@@ -219,17 +219,6 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
-bool MSP430InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case MSP430::RET: // Return.
- case MSP430::JMP: // Uncond branch.
- return true;
- default: return false;
- }
-}
-
bool MSP430InstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
const TargetInstrDesc &TID = MI->getDesc();
if (!TID.isTerminator()) return false;
@@ -270,8 +259,8 @@ bool MSP430InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a JMP, delete them.
- while (next(I) != MBB.end())
- next(I)->eraseFromParent();
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
Cond.clear();
FBB = 0;
diff --git a/lib/Target/MSP430/MSP430InstrInfo.h b/lib/Target/MSP430/MSP430InstrInfo.h
index 35e35db..e4ceeb9 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.h
+++ b/lib/Target/MSP430/MSP430InstrInfo.h
@@ -61,7 +61,6 @@ public:
// Branch folding goodness
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
- bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
bool isUnpredicatedTerminator(const MachineInstr *MI) const;
bool AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB, MachineBasicBlock *&FBB,
diff --git a/lib/Target/MSP430/MSP430InstrInfo.td b/lib/Target/MSP430/MSP430InstrInfo.td
index 7a26f6c..d67ba90 100644
--- a/lib/Target/MSP430/MSP430InstrInfo.td
+++ b/lib/Target/MSP430/MSP430InstrInfo.td
@@ -31,12 +31,15 @@ def SDT_MSP430BrCC : SDTypeProfile<0, 2, [SDTCisVT<0, OtherVT>,
SDTCisVT<1, i8>]>;
def SDT_MSP430SelectCC : SDTypeProfile<1, 3, [SDTCisSameAs<0, 1>, SDTCisSameAs<1, 2>,
SDTCisVT<3, i8>]>;
+def SDT_MSP430Shift : SDTypeProfile<1, 2, [SDTCisSameAs<0, 1>, SDTCisI8<2>]>;
//===----------------------------------------------------------------------===//
// MSP430 Specific Node Definitions.
//===----------------------------------------------------------------------===//
-def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
- [SDNPHasChain, SDNPOptInFlag]>;
+def MSP430retflag : SDNode<"MSP430ISD::RET_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
+def MSP430retiflag : SDNode<"MSP430ISD::RETI_FLAG", SDTNone,
+ [SDNPHasChain, SDNPOptInFlag]>;
def MSP430rra : SDNode<"MSP430ISD::RRA", SDTIntUnaryOp, []>;
def MSP430rla : SDNode<"MSP430ISD::RLA", SDTIntUnaryOp, []>;
@@ -54,6 +57,9 @@ def MSP430Wrapper : SDNode<"MSP430ISD::Wrapper", SDT_MSP430Wrapper>;
def MSP430cmp : SDNode<"MSP430ISD::CMP", SDT_MSP430Cmp, [SDNPOutFlag]>;
def MSP430brcc : SDNode<"MSP430ISD::BR_CC", SDT_MSP430BrCC, [SDNPHasChain, SDNPInFlag]>;
def MSP430selectcc: SDNode<"MSP430ISD::SELECT_CC", SDT_MSP430SelectCC, [SDNPInFlag]>;
+def MSP430shl : SDNode<"MSP430ISD::SHL", SDT_MSP430Shift, []>;
+def MSP430sra : SDNode<"MSP430ISD::SRA", SDT_MSP430Shift, []>;
+def MSP430srl : SDNode<"MSP430ISD::SRL", SDT_MSP430Shift, []>;
//===----------------------------------------------------------------------===//
// MSP430 Operand Definitions.
@@ -90,7 +96,9 @@ def addr : ComplexPattern<iPTR, 2, "SelectAddr", [], []>;
// Pattern Fragments
def zextloadi16i8 : PatFrag<(ops node:$ptr), (i16 (zextloadi8 node:$ptr))>;
def extloadi16i8 : PatFrag<(ops node:$ptr), (i16 ( extloadi8 node:$ptr))>;
-
+def and_su : PatFrag<(ops node:$lhs, node:$rhs), (and node:$lhs, node:$rhs), [{
+ return N->hasOneUse();
+}]>;
//===----------------------------------------------------------------------===//
// Instruction list..
@@ -117,6 +125,27 @@ let usesCustomInserter = 1 in {
"# Select16 PSEUDO",
[(set GR16:$dst,
(MSP430selectcc GR16:$src1, GR16:$src2, imm:$cc))]>;
+ let Defs = [SRW] in {
+ def Shl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Shl8 PSEUDO",
+ [(set GR8:$dst, (MSP430shl GR8:$src, GR8:$cnt))]>;
+ def Shl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Shl16 PSEUDO",
+ [(set GR16:$dst, (MSP430shl GR16:$src, GR8:$cnt))]>;
+ def Sra8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Sra8 PSEUDO",
+ [(set GR8:$dst, (MSP430sra GR8:$src, GR8:$cnt))]>;
+ def Sra16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Sra16 PSEUDO",
+ [(set GR16:$dst, (MSP430sra GR16:$src, GR8:$cnt))]>;
+ def Srl8 : Pseudo<(outs GR8:$dst), (ins GR8:$src, GR8:$cnt),
+ "# Srl8 PSEUDO",
+ [(set GR8:$dst, (MSP430srl GR8:$src, GR8:$cnt))]>;
+ def Srl16 : Pseudo<(outs GR16:$dst), (ins GR16:$src, GR8:$cnt),
+ "# Srl16 PSEUDO",
+ [(set GR16:$dst, (MSP430srl GR16:$src, GR8:$cnt))]>;
+
+ }
}
let neverHasSideEffects = 1 in
@@ -128,7 +157,8 @@ def NOP : Pseudo<(outs), (ins), "nop", []>;
// FIXME: Provide proper encoding!
let isReturn = 1, isTerminator = 1, isBarrier = 1 in {
- def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>;
+ def RET : Pseudo<(outs), (ins), "ret", [(MSP430retflag)]>;
+ def RETI : Pseudo<(outs), (ins), "reti", [(MSP430retiflag)]>;
}
let isBranch = 1, isTerminator = 1 in {
@@ -823,6 +853,65 @@ def CMP16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2),
"cmp.w\t{$src1, $src2}",
[(MSP430cmp (load addr:$src1), GR16:$src2), (implicit SRW)]>;
+
+// BIT TESTS, just sets condition codes
+// Note that the C condition is set differently than when using CMP.
+let isCommutable = 1 in {
+def BIT8rr : Pseudo<(outs), (ins GR8:$src1, GR8:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR8:$src1, GR8:$src2)),
+ (implicit SRW)]>;
+def BIT16rr : Pseudo<(outs), (ins GR16:$src1, GR16:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR16:$src1, GR16:$src2)),
+ (implicit SRW)]>;
+}
+def BIT8ri : Pseudo<(outs), (ins GR8:$src1, i8imm:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR8:$src1, imm:$src2)),
+ (implicit SRW)]>;
+def BIT16ri : Pseudo<(outs), (ins GR16:$src1, i16imm:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR16:$src1, imm:$src2)),
+ (implicit SRW)]>;
+
+def BIT8rm : Pseudo<(outs), (ins GR8:$src1, memdst:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR8:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+def BIT16rm : Pseudo<(outs), (ins GR16:$src1, memdst:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su GR16:$src1, (load addr:$src2))),
+ (implicit SRW)]>;
+
+def BIT8mr : Pseudo<(outs), (ins memsrc:$src1, GR8:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (load addr:$src1), GR8:$src2)),
+ (implicit SRW)]>;
+def BIT16mr : Pseudo<(outs), (ins memsrc:$src1, GR16:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (load addr:$src1), GR16:$src2)),
+ (implicit SRW)]>;
+
+def BIT8mi : Pseudo<(outs), (ins memsrc:$src1, i8imm:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (load addr:$src1), (i8 imm:$src2))),
+ (implicit SRW)]>;
+def BIT16mi : Pseudo<(outs), (ins memsrc:$src1, i16imm:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (load addr:$src1), (i16 imm:$src2))),
+ (implicit SRW)]>;
+
+def BIT8mm : Pseudo<(outs), (ins memsrc:$src1, memsrc:$src2),
+ "bit.b\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (i8 (load addr:$src1)),
+ (load addr:$src2))),
+ (implicit SRW)]>;
+def BIT16mm : Pseudo<(outs), (ins memsrc:$src1, memsrc:$src2),
+ "bit.w\t{$src2, $src1}",
+ [(MSP430cmp 0, (and_su (i16 (load addr:$src1)),
+ (load addr:$src2))),
+ (implicit SRW)]>;
} // Defs = [SRW]
//===----------------------------------------------------------------------===//
@@ -905,3 +994,6 @@ def : Pat<(store (subc (load addr:$dst), (i8 (load addr:$src))), addr:$dst),
// peephole patterns
def : Pat<(and GR16:$src, 255), (ZEXT16r GR16:$src)>;
+def : Pat<(MSP430cmp 0, (trunc (and_su GR16:$src1, GR16:$src2))),
+ (BIT8rr (EXTRACT_SUBREG GR16:$src1, subreg_8bit),
+ (EXTRACT_SUBREG GR16:$src2, subreg_8bit))>;
diff --git a/lib/Target/MSP430/MSP430MachineFunctionInfo.h b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
index 1d26ae3..383fd2e 100644
--- a/lib/Target/MSP430/MSP430MachineFunctionInfo.h
+++ b/lib/Target/MSP430/MSP430MachineFunctionInfo.h
@@ -25,14 +25,20 @@ class MSP430MachineFunctionInfo : public MachineFunctionInfo {
/// stack frame in bytes.
unsigned CalleeSavedFrameSize;
+ /// ReturnAddrIndex - FrameIndex for return slot.
+ int ReturnAddrIndex;
+
public:
MSP430MachineFunctionInfo() : CalleeSavedFrameSize(0) {}
explicit MSP430MachineFunctionInfo(MachineFunction &MF)
- : CalleeSavedFrameSize(0) {}
+ : CalleeSavedFrameSize(0), ReturnAddrIndex(0) {}
unsigned getCalleeSavedFrameSize() const { return CalleeSavedFrameSize; }
void setCalleeSavedFrameSize(unsigned bytes) { CalleeSavedFrameSize = bytes; }
+
+ int getRAIndex() const { return ReturnAddrIndex; }
+ void setRAIndex(int Index) { ReturnAddrIndex = Index; }
};
} // End llvm namespace
diff --git a/lib/Target/MSP430/MSP430RegisterInfo.cpp b/lib/Target/MSP430/MSP430RegisterInfo.cpp
index 92baad9..e85c7a2 100644
--- a/lib/Target/MSP430/MSP430RegisterInfo.cpp
+++ b/lib/Target/MSP430/MSP430RegisterInfo.cpp
@@ -17,6 +17,7 @@
#include "MSP430MachineFunctionInfo.h"
#include "MSP430RegisterInfo.h"
#include "MSP430TargetMachine.h"
+#include "llvm/Function.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
#include "llvm/CodeGen/MachineInstrBuilder.h"
@@ -37,17 +38,26 @@ MSP430RegisterInfo::MSP430RegisterInfo(MSP430TargetMachine &tm,
const unsigned*
MSP430RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const {
+ const Function* F = MF->getFunction();
static const unsigned CalleeSavedRegs[] = {
MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
0
};
+ static const unsigned CalleeSavedRegsIntr[] = {
+ MSP430::FPW, MSP430::R5W, MSP430::R6W, MSP430::R7W,
+ MSP430::R8W, MSP430::R9W, MSP430::R10W, MSP430::R11W,
+ MSP430::R12W, MSP430::R13W, MSP430::R14W, MSP430::R15W,
+ 0
+ };
- return CalleeSavedRegs;
+ return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+ CalleeSavedRegsIntr : CalleeSavedRegs);
}
const TargetRegisterClass *const *
MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
+ const Function* F = MF->getFunction();
static const TargetRegisterClass * const CalleeSavedRegClasses[] = {
&MSP430::GR16RegClass, &MSP430::GR16RegClass,
&MSP430::GR16RegClass, &MSP430::GR16RegClass,
@@ -55,8 +65,18 @@ MSP430RegisterInfo::getCalleeSavedRegClasses(const MachineFunction *MF) const {
&MSP430::GR16RegClass, &MSP430::GR16RegClass,
0
};
+ static const TargetRegisterClass * const CalleeSavedRegClassesIntr[] = {
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ &MSP430::GR16RegClass, &MSP430::GR16RegClass,
+ 0
+ };
- return CalleeSavedRegClasses;
+ return (F->getCallingConv() == CallingConv::MSP430_INTR ?
+ CalleeSavedRegClassesIntr : CalleeSavedRegClasses);
}
BitVector MSP430RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
@@ -193,10 +213,10 @@ MSP430RegisterInfo::eliminateFrameIndex(MachineBasicBlock::iterator II,
// We need to materialize the offset via add instruction.
unsigned DstReg = MI.getOperand(0).getReg();
if (Offset < 0)
- BuildMI(MBB, next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
+ BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::SUB16ri), DstReg)
.addReg(DstReg).addImm(-Offset);
else
- BuildMI(MBB, next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
+ BuildMI(MBB, llvm::next(II), dl, TII.get(MSP430::ADD16ri), DstReg)
.addReg(DstReg).addImm(Offset);
return 0;
@@ -251,7 +271,7 @@ void MSP430RegisterInfo::emitPrologue(MachineFunction &MF) const {
.addReg(MSP430::SPW);
// Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I)
I->addLiveIn(MSP430::FPW);
@@ -292,7 +312,8 @@ void MSP430RegisterInfo::emitEpilogue(MachineFunction &MF,
DebugLoc DL = MBBI->getDebugLoc();
switch (RetOpcode) {
- case MSP430::RET: break; // These are ok
+ case MSP430::RET:
+ case MSP430::RETI: break; // These are ok
default:
llvm_unreachable("Can only insert epilog into returning blocks");
}
diff --git a/lib/Target/Mips/MipsInstrInfo.cpp b/lib/Target/Mips/MipsInstrInfo.cpp
index 6d8e160..48b9bdf 100644
--- a/lib/Target/Mips/MipsInstrInfo.cpp
+++ b/lib/Target/Mips/MipsInstrInfo.cpp
@@ -590,22 +590,6 @@ RemoveBranch(MachineBasicBlock &MBB) const
return 2;
}
-/// BlockHasNoFallThrough - Analyze if MachineBasicBlock does not
-/// fall-through into its successor block.
-bool MipsInstrInfo::
-BlockHasNoFallThrough(const MachineBasicBlock &MBB) const
-{
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case Mips::RET: // Return.
- case Mips::JR: // Indirect branch.
- case Mips::J: // Uncond branch.
- return true;
- default: return false;
- }
-}
-
/// ReverseBranchCondition - Return the inverse opcode of the
/// specified Branch instruction.
bool MipsInstrInfo::
diff --git a/lib/Target/Mips/MipsInstrInfo.h b/lib/Target/Mips/MipsInstrInfo.h
index 249d3de..ab8dc59 100644
--- a/lib/Target/Mips/MipsInstrInfo.h
+++ b/lib/Target/Mips/MipsInstrInfo.h
@@ -232,7 +232,6 @@ public:
return 0;
}
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp
index 0083598..af7d812 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.cpp
+++ b/lib/Target/PowerPC/PPCInstrInfo.cpp
@@ -740,18 +740,6 @@ bool PPCInstrInfo::canFoldMemoryOperand(const MachineInstr *MI,
}
-bool PPCInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case PPC::BLR: // Return.
- case PPC::B: // Uncond branch.
- case PPC::BCTR: // Indirect branch.
- return true;
- default: return false;
- }
-}
-
bool PPCInstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 2 && "Invalid PPC branch opcode!");
diff --git a/lib/Target/PowerPC/PPCInstrInfo.h b/lib/Target/PowerPC/PPCInstrInfo.h
index bb0dc15a..57facac 100644
--- a/lib/Target/PowerPC/PPCInstrInfo.h
+++ b/lib/Target/PowerPC/PPCInstrInfo.h
@@ -143,7 +143,6 @@ public:
virtual bool canFoldMemoryOperand(const MachineInstr *MI,
const SmallVectorImpl<unsigned> &Ops) const;
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
diff --git a/lib/Target/README.txt b/lib/Target/README.txt
index 2d8a687..e1772c2 100644
--- a/lib/Target/README.txt
+++ b/lib/Target/README.txt
@@ -801,8 +801,21 @@ void bar(unsigned n) {
true();
}
-I think this basically amounts to a dag combine to simplify comparisons against
-multiply hi's into a comparison against the mullo.
+This is equivalent to the following, where 2863311531 is the multiplicative
+inverse of 3, and 1431655766 is ((2^32)-1)/3+1:
+void bar(unsigned n) {
+ if (n * 2863311531U < 1431655766U)
+ true();
+}
+
+The same transformation can work with an even modulo with the addition of a
+rotate: rotate the result of the multiply to the right by the number of bits
+which need to be zero for the condition to be true, and shrink the compare RHS
+by the same amount. Unless the target supports rotates, though, that
+transformation probably isn't worthwhile.
+
+The transformation can also easily be made to work with non-zero equality
+comparisons: just transform, for example, "n % 3 == 1" to "(n-1) % 3 == 0".
//===---------------------------------------------------------------------===//
@@ -823,20 +836,6 @@ int main() {
//===---------------------------------------------------------------------===//
-Instcombine will merge comparisons like (x >= 10) && (x < 20) by producing (x -
-10) u< 10, but only when the comparisons have matching sign.
-
-This could be converted with a similiar technique. (PR1941)
-
-define i1 @test(i8 %x) {
- %A = icmp uge i8 %x, 5
- %B = icmp slt i8 %x, 20
- %C = and i1 %A, %B
- ret i1 %C
-}
-
-//===---------------------------------------------------------------------===//
-
These functions perform the same computation, but produce different assembly.
define i8 @select(i8 %x) readnone nounwind {
@@ -884,18 +883,6 @@ The expression should optimize to something like
//===---------------------------------------------------------------------===//
-From GCC Bug 15241:
-unsigned int
-foo (unsigned int a, unsigned int b)
-{
- if (a <= 7 && b <= 7)
- baz ();
-}
-Should combine to "(a|b) <= 7". Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
-
-//===---------------------------------------------------------------------===//
-
From GCC Bug 3756:
int
pn (int n)
@@ -907,19 +894,6 @@ Should combine to (n >> 31) | 1. Currently not optimized with "clang
//===---------------------------------------------------------------------===//
-From GCC Bug 28685:
-int test(int a, int b)
-{
- int lt = a < b;
- int eq = a == b;
-
- return (lt || eq);
-}
-Should combine to "a <= b". Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts | llc".
-
-//===---------------------------------------------------------------------===//
-
void a(int variable)
{
if (variable == 4 || variable == 6)
@@ -993,12 +967,6 @@ Should combine to 0. Currently not optimized with "clang
//===---------------------------------------------------------------------===//
-int a(unsigned char* b) {return *b > 99;}
-There's an unnecessary zext in the generated code with "clang
--emit-llvm-bc | opt -std-compile-opts".
-
-//===---------------------------------------------------------------------===//
-
int a(unsigned b) {return ((b << 31) | (b << 30)) >> 31;}
Should be combined to "((b >> 1) | b) & 1". Currently not optimized
with "clang -emit-llvm-bc | opt -std-compile-opts".
@@ -1011,12 +979,6 @@ Should combine to "x | (y & 3)". Currently not optimized with "clang
//===---------------------------------------------------------------------===//
-unsigned a(unsigned a) {return ((a | 1) & 3) | (a & -4);}
-Should combine to "a | 1". Currently not optimized with "clang
--emit-llvm-bc | opt -std-compile-opts".
-
-//===---------------------------------------------------------------------===//
-
int a(int a, int b, int c) {return (~a & c) | ((c|a) & b);}
Should fold to "(~a & c) | (a & b)". Currently not optimized with
"clang -emit-llvm-bc | opt -std-compile-opts".
@@ -1704,3 +1666,50 @@ need all but the bottom two bits from %A, and if we gave that mask to SDB it
would delete the or instruction for us.
//===---------------------------------------------------------------------===//
+
+FunctionAttrs is not marking this function as readnone (just readonly):
+$ clang t.c -emit-llvm -S -o - -O0 | opt -mem2reg -S -functionattrs
+
+int t(int a, int b, int c) {
+ int *p;
+ if (a)
+ p = &a;
+ else
+ p = &c;
+ return *p;
+}
+
+This is because we codegen this to:
+
+define i32 @t(i32 %a, i32 %b, i32 %c) nounwind readonly ssp {
+entry:
+ %a.addr = alloca i32 ; <i32*> [#uses=3]
+ %c.addr = alloca i32 ; <i32*> [#uses=2]
+...
+
+if.end:
+ %p.0 = phi i32* [ %a.addr, %if.then ], [ %c.addr, %if.else ]
+ %tmp2 = load i32* %p.0 ; <i32> [#uses=1]
+ ret i32 %tmp2
+}
+
+And functionattrs doesn't realize that the p.0 load points to function local
+memory.
+
+Also, functionattrs doesn't know about memcpy/memset. This function should be
+marked readnone, since it only twiddles local memory, but functionattrs doesn't
+handle memset/memcpy/memmove aggressively:
+
+struct X { int *p; int *q; };
+int foo() {
+ int i = 0, j = 1;
+ struct X x, y;
+ int **p;
+ y.p = &i;
+ x.q = &j;
+ p = __builtin_memcpy (&x, &y, sizeof (int *));
+ return **p;
+}
+
+//===---------------------------------------------------------------------===//
+
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp
index d82d928..5fa7e8c 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.cpp
+++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp
@@ -402,18 +402,6 @@ ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
return false;
}
-bool SystemZInstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB)const{
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case SystemZ::RET: // Return.
- case SystemZ::JMP: // Uncond branch.
- case SystemZ::JMPr: // Indirect branch.
- return true;
- default: return false;
- }
-}
-
bool SystemZInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const {
const TargetInstrDesc &TID = MI->getDesc();
if (!TID.isTerminator()) return false;
@@ -454,8 +442,8 @@ bool SystemZInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a JMP, delete them.
- while (next(I) != MBB.end())
- next(I)->eraseFromParent();
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
Cond.clear();
FBB = 0;
diff --git a/lib/Target/SystemZ/SystemZInstrInfo.h b/lib/Target/SystemZ/SystemZInstrInfo.h
index e16d704..ef3b39e 100644
--- a/lib/Target/SystemZ/SystemZInstrInfo.h
+++ b/lib/Target/SystemZ/SystemZInstrInfo.h
@@ -89,7 +89,6 @@ public:
const std::vector<CalleeSavedInfo> &CSI) const;
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
virtual bool isUnpredicatedTerminator(const MachineInstr *MI) const;
virtual bool AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock *&TBB,
diff --git a/lib/Target/SystemZ/SystemZRegisterInfo.cpp b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
index 4d1c01f..1318195 100644
--- a/lib/Target/SystemZ/SystemZRegisterInfo.cpp
+++ b/lib/Target/SystemZ/SystemZRegisterInfo.cpp
@@ -247,7 +247,7 @@ void SystemZRegisterInfo::emitPrologue(MachineFunction &MF) const {
.addReg(SystemZ::R15D);
// Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I)
I->addLiveIn(SystemZ::R11D);
diff --git a/lib/Target/TargetData.cpp b/lib/Target/TargetData.cpp
index fc71bc3..9434a19 100644
--- a/lib/Target/TargetData.cpp
+++ b/lib/Target/TargetData.cpp
@@ -117,14 +117,6 @@ TargetAlignElem::operator==(const TargetAlignElem &rhs) const {
&& TypeBitWidth == rhs.TypeBitWidth);
}
-std::ostream &
-TargetAlignElem::dump(std::ostream &os) const {
- return os << AlignType
- << TypeBitWidth
- << ":" << (int) (ABIAlign * 8)
- << ":" << (int) (PrefAlign * 8);
-}
-
const TargetAlignElem TargetData::InvalidAlignmentElem =
TargetAlignElem::get((AlignTypeEnum) -1, 0, 0, 0);
@@ -323,11 +315,10 @@ unsigned TargetData::getAlignmentInfo(AlignTypeEnum AlignType,
: Alignments[BestMatchIdx].PrefAlign;
}
-typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
-
-namespace llvm {
+namespace {
class StructLayoutMap : public AbstractTypeUser {
+ typedef DenseMap<const StructType*, StructLayout*> LayoutInfoTy;
LayoutInfoTy LayoutInfo;
/// refineAbstractType - The callback method invoked when an abstract type is
@@ -336,19 +327,11 @@ class StructLayoutMap : public AbstractTypeUser {
///
virtual void refineAbstractType(const DerivedType *OldTy,
const Type *) {
- const StructType *STy = dyn_cast<const StructType>(OldTy);
- if (!STy) {
- OldTy->removeAbstractTypeUser(this);
- return;
- }
-
- StructLayout *SL = LayoutInfo[STy];
- if (SL) {
- SL->~StructLayout();
- free(SL);
- LayoutInfo[STy] = NULL;
- }
-
+ const StructType *STy = cast<const StructType>(OldTy);
+ LayoutInfoTy::iterator Iter = LayoutInfo.find(STy);
+ Iter->second->~StructLayout();
+ free(Iter->second);
+ LayoutInfo.erase(Iter);
OldTy->removeAbstractTypeUser(this);
}
@@ -358,70 +341,43 @@ class StructLayoutMap : public AbstractTypeUser {
/// This method notifies ATU's when this occurs for a type.
///
virtual void typeBecameConcrete(const DerivedType *AbsTy) {
- const StructType *STy = dyn_cast<const StructType>(AbsTy);
- if (!STy) {
- AbsTy->removeAbstractTypeUser(this);
- return;
- }
-
- StructLayout *SL = LayoutInfo[STy];
- if (SL) {
- SL->~StructLayout();
- free(SL);
- LayoutInfo[STy] = NULL;
- }
-
+ const StructType *STy = cast<const StructType>(AbsTy);
+ LayoutInfoTy::iterator Iter = LayoutInfo.find(STy);
+ Iter->second->~StructLayout();
+ free(Iter->second);
+ LayoutInfo.erase(Iter);
AbsTy->removeAbstractTypeUser(this);
}
- bool insert(const Type *Ty) {
- if (Ty->isAbstract())
- Ty->addAbstractTypeUser(this);
- return true;
- }
-
public:
virtual ~StructLayoutMap() {
// Remove any layouts.
for (LayoutInfoTy::iterator
- I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I)
- if (StructLayout *SL = I->second) {
- SL->~StructLayout();
- free(SL);
- }
- }
+ I = LayoutInfo.begin(), E = LayoutInfo.end(); I != E; ++I) {
+ const Type *Key = I->first;
+ StructLayout *Value = I->second;
- inline LayoutInfoTy::iterator begin() {
- return LayoutInfo.begin();
- }
- inline LayoutInfoTy::iterator end() {
- return LayoutInfo.end();
- }
- inline LayoutInfoTy::const_iterator begin() const {
- return LayoutInfo.begin();
- }
- inline LayoutInfoTy::const_iterator end() const {
- return LayoutInfo.end();
- }
+ if (Key->isAbstract())
+ Key->removeAbstractTypeUser(this);
- LayoutInfoTy::iterator find(const StructType *&Val) {
- return LayoutInfo.find(Val);
- }
- LayoutInfoTy::const_iterator find(const StructType *&Val) const {
- return LayoutInfo.find(Val);
+ Value->~StructLayout();
+ free(Value);
+ }
}
- bool erase(const StructType *&Val) {
- return LayoutInfo.erase(Val);
- }
- bool erase(LayoutInfoTy::iterator I) {
- return LayoutInfo.erase(I);
+ void InvalidateEntry(const StructType *Ty) {
+ LayoutInfoTy::iterator I = LayoutInfo.find(Ty);
+ if (I == LayoutInfo.end()) return;
+
+ I->second->~StructLayout();
+ free(I->second);
+ LayoutInfo.erase(I);
+
+ if (Ty->isAbstract())
+ Ty->removeAbstractTypeUser(this);
}
- StructLayout *&operator[](const Type *Key) {
- const StructType *STy = dyn_cast<const StructType>(Key);
- assert(STy && "Trying to access the struct layout map with a non-struct!");
- insert(STy);
+ StructLayout *&operator[](const StructType *STy) {
return LayoutInfo[STy];
}
@@ -429,17 +385,18 @@ public:
virtual void dump() const {}
};
-} // end namespace llvm
+} // end anonymous namespace
TargetData::~TargetData() {
- delete LayoutMap;
+ delete static_cast<StructLayoutMap*>(LayoutMap);
}
const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
if (!LayoutMap)
LayoutMap = new StructLayoutMap();
- StructLayout *&SL = (*LayoutMap)[Ty];
+ StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+ StructLayout *&SL = (*STM)[Ty];
if (SL) return SL;
// Otherwise, create the struct layout. Because it is variable length, we
@@ -453,6 +410,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
SL = L;
new (L) StructLayout(Ty, *this);
+
+ if (Ty->isAbstract())
+ Ty->addAbstractTypeUser(STM);
+
return L;
}
@@ -463,15 +424,10 @@ const StructLayout *TargetData::getStructLayout(const StructType *Ty) const {
void TargetData::InvalidateStructLayoutInfo(const StructType *Ty) const {
if (!LayoutMap) return; // No cache.
- DenseMap<const StructType*, StructLayout*>::iterator I = LayoutMap->find(Ty);
- if (I == LayoutMap->end()) return;
-
- I->second->~StructLayout();
- free(I->second);
- LayoutMap->erase(I);
+ StructLayoutMap *STM = static_cast<StructLayoutMap*>(LayoutMap);
+ STM->InvalidateEntry(Ty);
}
-
std::string TargetData::getStringRepresentation() const {
std::string Result;
raw_string_ostream OS(Result);
diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h
index a167118..684c61f 100644
--- a/lib/Target/X86/X86.h
+++ b/lib/Target/X86/X86.h
@@ -62,11 +62,6 @@ MCCodeEmitter *createX86MCCodeEmitter(const Target &, TargetMachine &TM);
///
FunctionPass *createEmitX86CodeToMemory();
-/// createX86MaxStackAlignmentCalculatorPass - This function returns a pass
-/// which calculates maximal stack alignment required for function
-///
-FunctionPass *createX86MaxStackAlignmentCalculatorPass();
-
extern Target TheX86_32Target, TheX86_64Target;
} // End llvm namespace
diff --git a/lib/Target/X86/X86COFFMachineModuleInfo.h b/lib/Target/X86/X86COFFMachineModuleInfo.h
index afd5525..5017af2 100644
--- a/lib/Target/X86/X86COFFMachineModuleInfo.h
+++ b/lib/Target/X86/X86COFFMachineModuleInfo.h
@@ -16,6 +16,7 @@
#include "llvm/CodeGen/MachineModuleInfo.h"
#include "llvm/ADT/StringSet.h"
+#include "X86MachineFunctionInfo.h"
namespace llvm {
class X86MachineFunctionInfo;
diff --git a/lib/Target/X86/X86CallingConv.td b/lib/Target/X86/X86CallingConv.td
index d77f039..12d3d04 100644
--- a/lib/Target/X86/X86CallingConv.td
+++ b/lib/Target/X86/X86CallingConv.td
@@ -64,11 +64,18 @@ def RetCC_X86_32_C : CallingConv<[
// X86-32 FastCC return-value convention.
def RetCC_X86_32_Fast : CallingConv<[
// The X86-32 fastcc returns 1, 2, or 3 FP values in XMM0-2 if the target has
- // SSE2, otherwise it is the the C calling conventions.
+ // SSE2.
// This can happen when a float, 2 x float, or 3 x float vector is split by
// target lowering, and is returned in 1-3 sse regs.
CCIfType<[f32], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
CCIfType<[f64], CCIfSubtarget<"hasSSE2()", CCAssignToReg<[XMM0,XMM1,XMM2]>>>,
+
+ // For integers, ECX can be used as an extra return register
+ CCIfType<[i8], CCAssignToReg<[AL, DL, CL]>>,
+ CCIfType<[i16], CCAssignToReg<[AX, DX, CX]>>,
+ CCIfType<[i32], CCAssignToReg<[EAX, EDX, ECX]>>,
+
+ // Otherwise, it is the same as the common X86 calling convention.
CCDelegateTo<RetCC_X86Common>
]>;
diff --git a/lib/Target/X86/X86FloatingPoint.cpp b/lib/Target/X86/X86FloatingPoint.cpp
index a2fe9b0..044bd4b 100644
--- a/lib/Target/X86/X86FloatingPoint.cpp
+++ b/lib/Target/X86/X86FloatingPoint.cpp
@@ -289,7 +289,7 @@ bool FPS::processBasicBlock(MachineFunction &MF, MachineBasicBlock &BB) {
while (Start != BB.begin() && prior(Start) != PrevI) --Start;
errs() << "Inserted instructions:\n\t";
Start->print(errs(), &MF.getTarget());
- while (++Start != next(I)) {}
+ while (++Start != llvm::next(I)) {}
}
dumpStack();
);
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index d80b8ec..0517b56 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -595,6 +595,18 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::FP_TO_SINT, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::UINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
setOperationAction(ISD::SINT_TO_FP, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND_INREG, (MVT::SimpleValueType)VT,Expand);
+ setOperationAction(ISD::TRUNCATE, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::SIGN_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ZERO_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ setOperationAction(ISD::ANY_EXTEND, (MVT::SimpleValueType)VT, Expand);
+ for (unsigned InnerVT = (unsigned)MVT::FIRST_VECTOR_VALUETYPE;
+ InnerVT <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++InnerVT)
+ setTruncStoreAction((MVT::SimpleValueType)VT,
+ (MVT::SimpleValueType)InnerVT, Expand);
+ setLoadExtAction(ISD::SEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::ZEXTLOAD, (MVT::SimpleValueType)VT, Expand);
+ setLoadExtAction(ISD::EXTLOAD, (MVT::SimpleValueType)VT, Expand);
}
// FIXME: In order to prevent SSE instructions being expanded to MMX ones
@@ -671,8 +683,6 @@ X86TargetLowering::X86TargetLowering(X86TargetMachine &TM)
setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v4i16, Custom);
- setTruncStoreAction(MVT::v8i16, MVT::v8i8, Expand);
- setOperationAction(ISD::TRUNCATE, MVT::v8i8, Expand);
setOperationAction(ISD::SELECT, MVT::v8i8, Promote);
setOperationAction(ISD::SELECT, MVT::v4i16, Promote);
setOperationAction(ISD::SELECT, MVT::v2i32, Promote);
@@ -3344,6 +3354,82 @@ static SDValue getVShift(bool isLeft, EVT VT, SDValue SrcOp,
}
SDValue
+X86TargetLowering::LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG) {
+
+ // Check if the scalar load can be widened into a vector load. And if
+ // the address is "base + cst" see if the cst can be "absorbed" into
+ // the shuffle mask.
+ if (LoadSDNode *LD = dyn_cast<LoadSDNode>(SrcOp)) {
+ SDValue Ptr = LD->getBasePtr();
+ if (!ISD::isNormalLoad(LD) || LD->isVolatile())
+ return SDValue();
+ EVT PVT = LD->getValueType(0);
+ if (PVT != MVT::i32 && PVT != MVT::f32)
+ return SDValue();
+
+ int FI = -1;
+ int64_t Offset = 0;
+ if (FrameIndexSDNode *FINode = dyn_cast<FrameIndexSDNode>(Ptr)) {
+ FI = FINode->getIndex();
+ Offset = 0;
+ } else if (Ptr.getOpcode() == ISD::ADD &&
+ isa<ConstantSDNode>(Ptr.getOperand(1)) &&
+ isa<FrameIndexSDNode>(Ptr.getOperand(0))) {
+ FI = cast<FrameIndexSDNode>(Ptr.getOperand(0))->getIndex();
+ Offset = Ptr.getConstantOperandVal(1);
+ Ptr = Ptr.getOperand(0);
+ } else {
+ return SDValue();
+ }
+
+ SDValue Chain = LD->getChain();
+ // Make sure the stack object alignment is at least 16.
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
+ if (DAG.InferPtrAlignment(Ptr) < 16) {
+ if (MFI->isFixedObjectIndex(FI)) {
+ // Can't change the alignment. Reference stack + offset explicitly
+ // if stack pointer is at least 16-byte aligned.
+ unsigned StackAlign = Subtarget->getStackAlignment();
+ if (StackAlign < 16)
+ return SDValue();
+ Offset = MFI->getObjectOffset(FI) + Offset;
+ SDValue StackPtr = DAG.getCopyFromReg(Chain, dl, X86StackPtr,
+ getPointerTy());
+ Ptr = DAG.getNode(ISD::ADD, dl, getPointerTy(), StackPtr,
+ DAG.getConstant(Offset & ~15, getPointerTy()));
+ Offset %= 16;
+ } else {
+ MFI->setObjectAlignment(FI, 16);
+ }
+ }
+
+ // (Offset % 16) must be multiple of 4. Then address is then
+ // Ptr + (Offset & ~15).
+ if (Offset < 0)
+ return SDValue();
+ if ((Offset % 16) & 3)
+ return SDValue();
+ int64_t StartOffset = Offset & ~15;
+ if (StartOffset)
+ Ptr = DAG.getNode(ISD::ADD, Ptr.getDebugLoc(), Ptr.getValueType(),
+ Ptr,DAG.getConstant(StartOffset, Ptr.getValueType()));
+
+ int EltNo = (Offset - StartOffset) >> 2;
+ int Mask[4] = { EltNo, EltNo, EltNo, EltNo };
+ EVT VT = (PVT == MVT::i32) ? MVT::v4i32 : MVT::v4f32;
+ SDValue V1 = DAG.getLoad(VT, dl, Chain, Ptr,LD->getSrcValue(),0);
+ // Canonicalize it to a v4i32 shuffle.
+ V1 = DAG.getNode(ISD::BIT_CONVERT, dl, MVT::v4i32, V1);
+ return DAG.getNode(ISD::BIT_CONVERT, dl, VT,
+ DAG.getVectorShuffle(MVT::v4i32, dl, V1,
+ DAG.getUNDEF(MVT::v4i32), &Mask[0]));
+ }
+
+ return SDValue();
+}
+
+SDValue
X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
DebugLoc dl = Op.getDebugLoc();
// All zero's are handled with pxor, all one's are handled with pcmpeqd.
@@ -3486,8 +3572,19 @@ X86TargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) {
}
// Splat is obviously ok. Let legalizer expand it to a shuffle.
- if (Values.size() == 1)
+ if (Values.size() == 1) {
+ if (EVTBits == 32) {
+ // Instead of a shuffle like this:
+ // shuffle (scalar_to_vector (load (ptr + 4))), undef, <0, 0, 0, 0>
+ // Check if it's possible to issue this instead.
+ // shuffle (vload ptr)), undef, <1, 1, 1, 1>
+ unsigned Idx = CountTrailingZeros_32(NonZeros);
+ SDValue Item = Op.getOperand(Idx);
+ if (Op.getNode()->isOnlyUserOf(Item.getNode()))
+ return LowerAsSplatVectorLoad(Item, VT, dl, DAG);
+ }
return SDValue();
+ }
// A vector full of immediates; various special cases are already
// handled, so this is best done with a single constant-pool load.
@@ -4278,7 +4375,7 @@ X86TargetLowering::LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG) {
unsigned ShAmt = 0;
SDValue ShVal;
bool isShift = getSubtarget()->hasSSE2() &&
- isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
+ isVectorShift(SVOp, DAG, isLeft, ShVal, ShAmt);
if (isShift && ShVal.hasOneUse()) {
// If the shifted value has multiple uses, it may be cheaper to use
// v_set0 + movlhps or movhlps, etc.
@@ -4815,6 +4912,7 @@ static SDValue
GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue *InFlag, const EVT PtrVT, unsigned ReturnReg,
unsigned char OperandFlags) {
+ MachineFrameInfo *MFI = DAG.getMachineFunction().getFrameInfo();
SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Flag);
DebugLoc dl = GA->getDebugLoc();
SDValue TGA = DAG.getTargetGlobalAddress(GA->getGlobal(),
@@ -4828,6 +4926,10 @@ GetTLSADDR(SelectionDAG &DAG, SDValue Chain, GlobalAddressSDNode *GA,
SDValue Ops[] = { Chain, TGA };
Chain = DAG.getNode(X86ISD::TLSADDR, dl, NodeTys, Ops, 2);
}
+
+ // TLSADDR will be codegen'ed as call. Inform MFI that function has calls.
+ MFI->setHasCalls(true);
+
SDValue Flag = Chain.getValue(1);
return DAG.getCopyFromReg(Chain, dl, ReturnReg, PtrVT, Flag);
}
@@ -5648,6 +5750,17 @@ SDValue X86TargetLowering::LowerSETCC(SDValue Op, SelectionDAG &DAG) {
return SDValue();
SDValue Cond = EmitCmp(Op0, Op1, X86CC, DAG);
+
+ // Use sbb x, x to materialize carry bit into a GPR.
+ // FIXME: Temporarily disabled since it breaks self-hosting. It's apparently
+ // miscompiling ARMISelDAGToDAG.cpp.
+ if (0 && !isFP && X86CC == X86::COND_B) {
+ return DAG.getNode(ISD::AND, dl, MVT::i8,
+ DAG.getNode(X86ISD::SETCC_CARRY, dl, MVT::i8,
+ DAG.getConstant(X86CC, MVT::i8), Cond),
+ DAG.getConstant(1, MVT::i8));
+ }
+
return DAG.getNode(X86ISD::SETCC, dl, MVT::i8,
DAG.getConstant(X86CC, MVT::i8), Cond);
}
@@ -5800,9 +5913,18 @@ SDValue X86TargetLowering::LowerSELECT(SDValue Op, SelectionDAG &DAG) {
Cond = NewCond;
}
+ // Look pass (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
- if (Cond.getOpcode() == X86ISD::SETCC) {
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
@@ -5885,9 +6007,18 @@ SDValue X86TargetLowering::LowerBRCOND(SDValue Op, SelectionDAG &DAG) {
Cond = LowerXALUO(Cond, DAG);
#endif
+ // Look pass (and (setcc_carry (cmp ...)), 1).
+ if (Cond.getOpcode() == ISD::AND &&
+ Cond.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY) {
+ ConstantSDNode *C = dyn_cast<ConstantSDNode>(Cond.getOperand(1));
+ if (C && C->getAPIntValue() == 1)
+ Cond = Cond.getOperand(0);
+ }
+
// If condition flag is set by a X86ISD::CMP, then use it as the condition
// setting operand in place of the X86ISD::SETCC.
- if (Cond.getOpcode() == X86ISD::SETCC) {
+ if (Cond.getOpcode() == X86ISD::SETCC ||
+ Cond.getOpcode() == X86ISD::SETCC_CARRY) {
CC = Cond.getOperand(0);
SDValue Cmp = Cond.getOperand(1);
@@ -7274,6 +7405,7 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
case X86ISD::COMI: return "X86ISD::COMI";
case X86ISD::UCOMI: return "X86ISD::UCOMI";
case X86ISD::SETCC: return "X86ISD::SETCC";
+ case X86ISD::SETCC_CARRY: return "X86ISD::SETCC_CARRY";
case X86ISD::CMOV: return "X86ISD::CMOV";
case X86ISD::BRCOND: return "X86ISD::BRCOND";
case X86ISD::RET_FLAG: return "X86ISD::RET_FLAG";
@@ -8327,16 +8459,6 @@ bool X86TargetLowering::isGAPlusOffset(SDNode *N,
return TargetLowering::isGAPlusOffset(N, GA, Offset);
}
-static bool isBaseAlignmentOfN(unsigned N, SDNode *Base,
- const TargetLowering &TLI) {
- GlobalValue *GV;
- int64_t Offset = 0;
- if (TLI.isGAPlusOffset(Base, GV, Offset))
- return (GV->getAlignment() >= N && (Offset % N) == 0);
- // DAG combine handles the stack object case.
- return false;
-}
-
static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
EVT EltVT, LoadSDNode *&LDBase,
unsigned &LastLoadedElt,
@@ -8366,7 +8488,7 @@ static bool EltsFromConsecutiveLoads(ShuffleVectorSDNode *N, unsigned NumElems,
continue;
LoadSDNode *LD = cast<LoadSDNode>(Elt);
- if (!TLI.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i, MFI))
+ if (!DAG.isConsecutiveLoad(LD, LDBase, EltVT.getSizeInBits()/8, i))
return false;
LastLoadedElt = i;
}
@@ -8399,7 +8521,7 @@ static SDValue PerformShuffleCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
if (LastLoadedElt == NumElems - 1) {
- if (isBaseAlignmentOfN(16, LD->getBasePtr().getNode(), TLI))
+ if (DAG.InferPtrAlignment(LD->getBasePtr()) >= 16)
return DAG.getLoad(VT, dl, LD->getChain(), LD->getBasePtr(),
LD->getSrcValue(), LD->getSrcValueOffset(),
LD->isVolatile());
@@ -8858,11 +8980,42 @@ static SDValue PerformMulCombine(SDNode *N, SelectionDAG &DAG,
return SDValue();
}
+static SDValue PerformSHLCombine(SDNode *N, SelectionDAG &DAG) {
+ SDValue N0 = N->getOperand(0);
+ SDValue N1 = N->getOperand(1);
+ ConstantSDNode *N1C = dyn_cast<ConstantSDNode>(N1);
+ EVT VT = N0.getValueType();
+
+ // fold (shl (and (setcc_c), c1), c2) -> (and setcc_c, (c1 << c2))
+ // since the result of setcc_c is all zero's or all ones.
+ if (N1C && N0.getOpcode() == ISD::AND &&
+ N0.getOperand(1).getOpcode() == ISD::Constant) {
+ SDValue N00 = N0.getOperand(0);
+ if (N00.getOpcode() == X86ISD::SETCC_CARRY ||
+ ((N00.getOpcode() == ISD::ANY_EXTEND ||
+ N00.getOpcode() == ISD::ZERO_EXTEND) &&
+ N00.getOperand(0).getOpcode() == X86ISD::SETCC_CARRY)) {
+ APInt Mask = cast<ConstantSDNode>(N0.getOperand(1))->getAPIntValue();
+ APInt ShAmt = N1C->getAPIntValue();
+ Mask = Mask.shl(ShAmt);
+ if (Mask != 0)
+ return DAG.getNode(ISD::AND, N->getDebugLoc(), VT,
+ N00, DAG.getConstant(Mask, VT));
+ }
+ }
+
+ return SDValue();
+}
/// PerformShiftCombine - Transforms vector shift nodes to use vector shifts
/// when possible.
static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
const X86Subtarget *Subtarget) {
+ EVT VT = N->getValueType(0);
+ if (!VT.isVector() && VT.isInteger() &&
+ N->getOpcode() == ISD::SHL)
+ return PerformSHLCombine(N, DAG);
+
// On X86 with SSE2 support, we can transform this to a vector shift if
// all elements are shifted by the same amount. We can't do this in legalize
// because the a constant vector is typically transformed to a constant pool
@@ -8870,7 +9023,6 @@ static SDValue PerformShiftCombine(SDNode* N, SelectionDAG &DAG,
if (!Subtarget->hasSSE2())
return SDValue();
- EVT VT = N->getValueType(0);
if (VT != MVT::v2i64 && VT != MVT::v4i32 && VT != MVT::v8i16)
return SDValue();
diff --git a/lib/Target/X86/X86ISelLowering.h b/lib/Target/X86/X86ISelLowering.h
index 7b4ab62..64bc70c 100644
--- a/lib/Target/X86/X86ISelLowering.h
+++ b/lib/Target/X86/X86ISelLowering.h
@@ -118,6 +118,10 @@ namespace llvm {
/// operand produced by a CMP instruction.
SETCC,
+ // Same as SETCC except it's materialized with a sbb and the value is all
+ // one's or all zero's.
+ SETCC_CARRY,
+
/// X86 conditional moves. Operand 0 and operand 1 are the two values
/// to select from. Operand 2 is the condition code, and operand 3 is the
/// flag operand produced by a CMP or TEST instruction. It also writes a
@@ -626,7 +630,9 @@ namespace llvm {
std::pair<SDValue,SDValue> FP_TO_INTHelper(SDValue Op, SelectionDAG &DAG,
bool isSigned);
-
+
+ SDValue LowerAsSplatVectorLoad(SDValue SrcOp, EVT VT, DebugLoc dl,
+ SelectionDAG &DAG);
SDValue LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG);
SDValue LowerVECTOR_SHUFFLE(SDValue Op, SelectionDAG &DAG);
SDValue LowerEXTRACT_VECTOR_ELT(SDValue Op, SelectionDAG &DAG);
diff --git a/lib/Target/X86/X86Instr64bit.td b/lib/Target/X86/X86Instr64bit.td
index b5fa862..b6a2c05 100644
--- a/lib/Target/X86/X86Instr64bit.td
+++ b/lib/Target/X86/X86Instr64bit.td
@@ -1333,6 +1333,15 @@ def CMOVNO64rm : RI<0x41, MRMSrcMem, // if !overflow, GR64 = [mem64]
X86_COND_NO, EFLAGS))]>, TB;
} // isTwoAddress
+// Use sbb to materialize carry flag into a GPR.
+let Defs = [EFLAGS], Uses = [EFLAGS], isCodeGenOnly = 1 in
+def SETB_C64r : RI<0x19, MRMInitReg, (outs GR64:$dst), (ins),
+ "sbb{q}\t$dst, $dst",
+ [(set GR64:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>;
+
+def : Pat<(i64 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+ (SETB_C64r)>;
+
//===----------------------------------------------------------------------===//
// Conversion Instructions...
//
diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp
index a37013d..1947d35 100644
--- a/lib/Target/X86/X86InstrInfo.cpp
+++ b/lib/Target/X86/X86InstrInfo.cpp
@@ -783,12 +783,14 @@ unsigned X86InstrInfo::isLoadFromStackSlotPostFE(const MachineInstr *MI,
if ((Reg = isLoadFromStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- return hasLoadFromStackSlot(MI, FrameIndex);
+ const MachineMemOperand *Dummy;
+ return hasLoadFromStackSlot(MI, Dummy, FrameIndex);
}
return 0;
}
bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
int &FrameIndex) const {
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
oe = MI->memoperands_end();
@@ -798,6 +800,7 @@ bool X86InstrInfo::hasLoadFromStackSlot(const MachineInstr *MI,
if (const FixedStackPseudoSourceValue *Value =
dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
FrameIndex = Value->getFrameIndex();
+ MMO = *o;
return true;
}
}
@@ -819,12 +822,14 @@ unsigned X86InstrInfo::isStoreToStackSlotPostFE(const MachineInstr *MI,
if ((Reg = isStoreToStackSlot(MI, FrameIndex)))
return Reg;
// Check for post-frame index elimination operations
- return hasStoreToStackSlot(MI, FrameIndex);
+ const MachineMemOperand *Dummy;
+ return hasStoreToStackSlot(MI, Dummy, FrameIndex);
}
return 0;
}
bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
int &FrameIndex) const {
for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
oe = MI->memoperands_end();
@@ -834,6 +839,7 @@ bool X86InstrInfo::hasStoreToStackSlot(const MachineInstr *MI,
if (const FixedStackPseudoSourceValue *Value =
dyn_cast<const FixedStackPseudoSourceValue>((*o)->getValue())) {
FrameIndex = Value->getFrameIndex();
+ MMO = *o;
return true;
}
}
@@ -1052,6 +1058,112 @@ static bool hasLiveCondCodeDef(MachineInstr *MI) {
return false;
}
+/// convertToThreeAddressWithLEA - Helper for convertToThreeAddress when
+/// 16-bit LEA is disabled, use 32-bit LEA to form 3-address code by promoting
+/// to a 32-bit superregister and then truncating back down to a 16-bit
+/// subregister.
+MachineInstr *
+X86InstrInfo::convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const {
+ MachineInstr *MI = MBBI;
+ unsigned Dest = MI->getOperand(0).getReg();
+ unsigned Src = MI->getOperand(1).getReg();
+ bool isDead = MI->getOperand(0).isDead();
+ bool isKill = MI->getOperand(1).isKill();
+
+ unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
+ ? X86::LEA64_32r : X86::LEA32r;
+ MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
+ unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ // This has the potential to cause partial register stall. e.g.
+ // movw (%rbp,%rcx,2), %dx
+ // leal -65(%rdx), %esi
+ // But testing has shown this *does* help performance in 64-bit mode (at
+ // least on modern x86 machines).
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
+ MachineInstr *InsMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
+ .addReg(leaInReg)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(X86::SUBREG_16BIT);
+
+ MachineInstrBuilder MIB = BuildMI(*MFI, MBBI, MI->getDebugLoc(),
+ get(Opc), leaOutReg);
+ switch (MIOpc) {
+ default:
+ llvm_unreachable(0);
+ break;
+ case X86::SHL16ri: {
+ unsigned ShAmt = MI->getOperand(2).getImm();
+ MIB.addReg(0).addImm(1 << ShAmt)
+ .addReg(leaInReg, RegState::Kill).addImm(0);
+ break;
+ }
+ case X86::INC16r:
+ case X86::INC64_16r:
+ addLeaRegOffset(MIB, leaInReg, true, 1);
+ break;
+ case X86::DEC16r:
+ case X86::DEC64_16r:
+ addLeaRegOffset(MIB, leaInReg, true, -1);
+ break;
+ case X86::ADD16ri:
+ case X86::ADD16ri8:
+ addLeaRegOffset(MIB, leaInReg, true, MI->getOperand(2).getImm());
+ break;
+ case X86::ADD16rr: {
+ unsigned Src2 = MI->getOperand(2).getReg();
+ bool isKill2 = MI->getOperand(2).isKill();
+ unsigned leaInReg2 = 0;
+ MachineInstr *InsMI2 = 0;
+ if (Src == Src2) {
+ // ADD16rr %reg1028<kill>, %reg1028
+ // just a single insert_subreg.
+ addRegReg(MIB, leaInReg, true, leaInReg, false);
+ } else {
+ leaInReg2 = RegInfo.createVirtualRegister(&X86::GR32RegClass);
+ // Build and insert into an implicit UNDEF value. This is OK because
+ // well be shifting and then extracting the lower 16-bits.
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg2);
+ InsMI2 =
+ BuildMI(*MFI, MIB, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg2)
+ .addReg(leaInReg2)
+ .addReg(Src2, getKillRegState(isKill2))
+ .addImm(X86::SUBREG_16BIT);
+ addRegReg(MIB, leaInReg, true, leaInReg2, true);
+ }
+ if (LV && isKill2 && InsMI2)
+ LV->replaceKillInstruction(Src2, MI, InsMI2);
+ break;
+ }
+ }
+
+ MachineInstr *NewMI = MIB;
+ MachineInstr *ExtMI =
+ BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(leaOutReg, RegState::Kill)
+ .addImm(X86::SUBREG_16BIT);
+
+ if (LV) {
+ // Update live variables
+ LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
+ LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
+ if (isKill)
+ LV->replaceKillInstruction(Src, MI, InsMI);
+ if (isDead)
+ LV->replaceKillInstruction(Dest, MI, ExtMI);
+ }
+
+ return ExtMI;
+}
+
/// convertToThreeAddress - This method must be implemented by targets that
/// set the M_CONVERTIBLE_TO_3_ADDR flag. When this flag is set, the target
/// may be able to convert a two-address instruction into a true
@@ -1077,7 +1189,9 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
MachineInstr *NewMI = NULL;
// FIXME: 16-bit LEA's are really slow on Athlons, but not bad on P4's. When
// we have better subtarget support, enable the 16-bit LEA generation here.
+ // 16-bit LEA is also slow on Core2.
bool DisableLEA16 = true;
+ bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
unsigned MIOpc = MI->getOpcode();
switch (MIOpc) {
@@ -1116,8 +1230,7 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
- unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit() ?
- X86::LEA64_32r : X86::LEA32r;
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
NewMI = BuildMI(MF, MI->getDebugLoc(), get(Opc))
.addReg(Dest, RegState::Define | getDeadRegState(isDead))
.addReg(0).addImm(1 << ShAmt)
@@ -1131,51 +1244,13 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
unsigned ShAmt = MI->getOperand(2).getImm();
if (ShAmt == 0 || ShAmt >= 4) return 0;
- if (DisableLEA16) {
- // If 16-bit LEA is disabled, use 32-bit LEA via subregisters.
- MachineRegisterInfo &RegInfo = MFI->getParent()->getRegInfo();
- unsigned Opc = TM.getSubtarget<X86Subtarget>().is64Bit()
- ? X86::LEA64_32r : X86::LEA32r;
- unsigned leaInReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
- unsigned leaOutReg = RegInfo.createVirtualRegister(&X86::GR32RegClass);
-
- // Build and insert into an implicit UNDEF value. This is OK because
- // well be shifting and then extracting the lower 16-bits.
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::IMPLICIT_DEF), leaInReg);
- MachineInstr *InsMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::INSERT_SUBREG),leaInReg)
- .addReg(leaInReg)
- .addReg(Src, getKillRegState(isKill))
- .addImm(X86::SUBREG_16BIT);
-
- NewMI = BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(Opc), leaOutReg)
- .addReg(0).addImm(1 << ShAmt)
- .addReg(leaInReg, RegState::Kill)
- .addImm(0);
-
- MachineInstr *ExtMI =
- BuildMI(*MFI, MBBI, MI->getDebugLoc(), get(X86::EXTRACT_SUBREG))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(leaOutReg, RegState::Kill)
- .addImm(X86::SUBREG_16BIT);
-
- if (LV) {
- // Update live variables
- LV->getVarInfo(leaInReg).Kills.push_back(NewMI);
- LV->getVarInfo(leaOutReg).Kills.push_back(ExtMI);
- if (isKill)
- LV->replaceKillInstruction(Src, MI, InsMI);
- if (isDead)
- LV->replaceKillInstruction(Dest, MI, ExtMI);
- }
- return ExtMI;
- } else {
- NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define | getDeadRegState(isDead))
- .addReg(0).addImm(1 << ShAmt)
- .addReg(Src, getKillRegState(isKill))
- .addImm(0);
- }
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
+ NewMI = BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define | getDeadRegState(isDead))
+ .addReg(0).addImm(1 << ShAmt)
+ .addReg(Src, getKillRegState(isKill))
+ .addImm(0);
break;
}
default: {
@@ -1185,7 +1260,6 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
if (hasLiveCondCodeDef(MI))
return 0;
- bool is64Bit = TM.getSubtarget<X86Subtarget>().is64Bit();
switch (MIOpc) {
default: return 0;
case X86::INC64r:
@@ -1202,7 +1276,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::INC16r:
case X86::INC64_16r:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown inc instruction!");
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addReg(Dest, RegState::Define |
@@ -1223,7 +1298,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
}
case X86::DEC16r:
case X86::DEC64_16r:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 2 && "Unknown dec instruction!");
NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
.addReg(Dest, RegState::Define |
@@ -1246,7 +1322,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
break;
}
case X86::ADD16rr: {
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
unsigned Src2 = MI->getOperand(2).getReg();
bool isKill2 = MI->getOperand(2).isKill();
@@ -1261,56 +1338,32 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI,
case X86::ADD64ri32:
case X86::ADD64ri8:
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm())
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA64r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
break;
case X86::ADD32ri:
- case X86::ADD32ri8:
+ case X86::ADD32ri8: {
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm()) {
- unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
- NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
+ unsigned Opc = is64Bit ? X86::LEA64_32r : X86::LEA32r;
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(Opc))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
Src, isKill, MI->getOperand(2).getImm());
- }
break;
+ }
case X86::ADD16ri:
case X86::ADD16ri8:
- if (DisableLEA16) return 0;
+ if (DisableLEA16)
+ return is64Bit ? convertToThreeAddressWithLEA(MIOpc, MFI, MBBI, LV) : 0;
assert(MI->getNumOperands() >= 3 && "Unknown add instruction!");
- if (MI->getOperand(2).isImm())
- NewMI = addRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)),
- Src, isKill, MI->getOperand(2).getImm());
- break;
- case X86::SHL16ri:
- if (DisableLEA16) return 0;
- case X86::SHL32ri:
- case X86::SHL64ri: {
- assert(MI->getNumOperands() >= 3 && MI->getOperand(2).isImm() &&
- "Unknown shl instruction!");
- unsigned ShAmt = MI->getOperand(2).getImm();
- if (ShAmt == 1 || ShAmt == 2 || ShAmt == 3) {
- X86AddressMode AM;
- AM.Scale = 1 << ShAmt;
- AM.IndexReg = Src;
- unsigned Opc = MIOpc == X86::SHL64ri ? X86::LEA64r
- : (MIOpc == X86::SHL32ri
- ? (is64Bit ? X86::LEA64_32r : X86::LEA32r) : X86::LEA16r);
- NewMI = addFullAddress(BuildMI(MF, MI->getDebugLoc(), get(Opc))
- .addReg(Dest, RegState::Define |
- getDeadRegState(isDead)), AM);
- if (isKill)
- NewMI->getOperand(3).setIsKill(true);
- }
+ NewMI = addLeaRegOffset(BuildMI(MF, MI->getDebugLoc(), get(X86::LEA16r))
+ .addReg(Dest, RegState::Define |
+ getDeadRegState(isDead)),
+ Src, isKill, MI->getOperand(2).getImm());
break;
}
- }
}
}
@@ -1571,14 +1624,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
MachineBasicBlock::iterator I = MBB.end();
while (I != MBB.begin()) {
--I;
- // Working from the bottom, when we see a non-terminator
- // instruction, we're done.
+
+ // Working from the bottom, when we see a non-terminator instruction, we're
+ // done.
if (!isBrAnalysisUnpredicatedTerminator(I, *this))
break;
- // A terminator that isn't a branch can't easily be handled
- // by this analysis.
+
+ // A terminator that isn't a branch can't easily be handled by this
+ // analysis.
if (!I->getDesc().isBranch())
return true;
+
// Handle unconditional branches.
if (I->getOpcode() == X86::JMP) {
if (!AllowModify) {
@@ -1587,10 +1643,12 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
}
// If the block has any instructions after a JMP, delete them.
- while (next(I) != MBB.end())
- next(I)->eraseFromParent();
+ while (llvm::next(I) != MBB.end())
+ llvm::next(I)->eraseFromParent();
+
Cond.clear();
FBB = 0;
+
// Delete the JMP if it's equivalent to a fall-through.
if (MBB.isLayoutSuccessor(I->getOperand(0).getMBB())) {
TBB = 0;
@@ -1598,14 +1656,17 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
I = MBB.end();
continue;
}
+
// TBB is used to indicate the unconditinal destination.
TBB = I->getOperand(0).getMBB();
continue;
}
+
// Handle conditional branches.
X86::CondCode BranchCode = GetCondFromBranchOpc(I->getOpcode());
if (BranchCode == X86::COND_INVALID)
return true; // Can't handle indirect branch.
+
// Working from the bottom, handle the first conditional branch.
if (Cond.empty()) {
FBB = TBB;
@@ -1613,24 +1674,26 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
Cond.push_back(MachineOperand::CreateImm(BranchCode));
continue;
}
- // Handle subsequent conditional branches. Only handle the case
- // where all conditional branches branch to the same destination
- // and their condition opcodes fit one of the special
- // multi-branch idioms.
+
+ // Handle subsequent conditional branches. Only handle the case where all
+ // conditional branches branch to the same destination and their condition
+ // opcodes fit one of the special multi-branch idioms.
assert(Cond.size() == 1);
assert(TBB);
- // Only handle the case where all conditional branches branch to
- // the same destination.
+
+ // Only handle the case where all conditional branches branch to the same
+ // destination.
if (TBB != I->getOperand(0).getMBB())
return true;
- X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
+
// If the conditions are the same, we can leave them alone.
+ X86::CondCode OldBranchCode = (X86::CondCode)Cond[0].getImm();
if (OldBranchCode == BranchCode)
continue;
- // If they differ, see if they fit one of the known patterns.
- // Theoretically we could handle more patterns here, but
- // we shouldn't expect to see them if instruction selection
- // has done a reasonable job.
+
+ // If they differ, see if they fit one of the known patterns. Theoretically,
+ // we could handle more patterns here, but we shouldn't expect to see them
+ // if instruction selection has done a reasonable job.
if ((OldBranchCode == X86::COND_NP &&
BranchCode == X86::COND_E) ||
(OldBranchCode == X86::COND_E &&
@@ -1643,6 +1706,7 @@ bool X86InstrInfo::AnalyzeBranch(MachineBasicBlock &MBB,
BranchCode = X86::COND_NE_OR_P;
else
return true;
+
// Update the MachineOperand.
Cond[0].setImm(BranchCode);
}
@@ -2713,27 +2777,6 @@ unsigned X86InstrInfo::getOpcodeAfterMemoryUnfold(unsigned Opc,
return I->second.first;
}
-bool X86InstrInfo::BlockHasNoFallThrough(const MachineBasicBlock &MBB) const {
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case X86::TCRETURNri:
- case X86::TCRETURNdi:
- case X86::RET: // Return.
- case X86::RETI:
- case X86::TAILJMPd:
- case X86::TAILJMPr:
- case X86::TAILJMPm:
- case X86::JMP: // Uncond branch.
- case X86::JMP32r: // Indirect branch.
- case X86::JMP64r: // Indirect branch (64-bit).
- case X86::JMP32m: // Indirect branch through mem.
- case X86::JMP64m: // Indirect branch through mem (64-bit).
- return true;
- default: return false;
- }
-}
-
bool X86InstrInfo::
ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const {
assert(Cond.size() == 1 && "Invalid X86 branch condition!");
diff --git a/lib/Target/X86/X86InstrInfo.h b/lib/Target/X86/X86InstrInfo.h
index c6daa25..b83441d 100644
--- a/lib/Target/X86/X86InstrInfo.h
+++ b/lib/Target/X86/X86InstrInfo.h
@@ -457,11 +457,14 @@ public:
/// hasLoadFromStackSlot - If the specified machine instruction has
/// a load from a stack slot, return true along with the FrameIndex
- /// of the loaded stack slot. If not, return false. Unlike
+ /// of the loaded stack slot and the machine mem operand containing
+ /// the reference. If not, return false. Unlike
/// isLoadFromStackSlot, this returns true for any instructions that
/// loads from the stack. This is a hint only and may not catch all
/// cases.
- bool hasLoadFromStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ bool hasLoadFromStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
unsigned isStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
/// isStoreToStackSlotPostFE - Check for post-frame ptr elimination
@@ -472,11 +475,13 @@ public:
/// hasStoreToStackSlot - If the specified machine instruction has a
/// store to a stack slot, return true along with the FrameIndex of
- /// the loaded stack slot. If not, return false. Unlike
- /// isStoreToStackSlot, this returns true for any instructions that
- /// loads from the stack. This is a hint only and may not catch all
- /// cases.
- bool hasStoreToStackSlot(const MachineInstr *MI, int &FrameIndex) const;
+ /// the loaded stack slot and the machine mem operand containing the
+ /// reference. If not, return false. Unlike isStoreToStackSlot,
+ /// this returns true for any instructions that loads from the
+ /// stack. This is a hint only and may not catch all cases.
+ bool hasStoreToStackSlot(const MachineInstr *MI,
+ const MachineMemOperand *&MMO,
+ int &FrameIndex) const;
bool isReallyTriviallyReMaterializable(const MachineInstr *MI,
AliasAnalysis *AA) const;
@@ -595,7 +600,6 @@ public:
bool UnfoldLoad, bool UnfoldStore,
unsigned *LoadRegIndex = 0) const;
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
virtual
bool ReverseBranchCondition(SmallVectorImpl<MachineOperand> &Cond) const;
@@ -633,6 +637,11 @@ public:
unsigned getGlobalBaseReg(MachineFunction *MF) const;
private:
+ MachineInstr * convertToThreeAddressWithLEA(unsigned MIOpc,
+ MachineFunction::iterator &MFI,
+ MachineBasicBlock::iterator &MBBI,
+ LiveVariables *LV) const;
+
MachineInstr* foldMemoryOperandImpl(MachineFunction &MF,
MachineInstr* MI,
unsigned OpNum,
diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td
index 90ef1f4..3cc1853 100644
--- a/lib/Target/X86/X86InstrInfo.td
+++ b/lib/Target/X86/X86InstrInfo.td
@@ -87,6 +87,7 @@ def X86cmov : SDNode<"X86ISD::CMOV", SDTX86Cmov>;
def X86brcond : SDNode<"X86ISD::BRCOND", SDTX86BrCond,
[SDNPHasChain]>;
def X86setcc : SDNode<"X86ISD::SETCC", SDTX86SetCC>;
+def X86setcc_c : SDNode<"X86ISD::SETCC_CARRY", SDTX86SetCC>;
def X86cas : SDNode<"X86ISD::LCMPXCHG_DAG", SDTX86cas,
[SDNPHasChain, SDNPInFlag, SDNPOutFlag, SDNPMayStore,
@@ -816,7 +817,7 @@ def BSR32rm : I<0xBD, MRMSrcMem, (outs GR32:$dst), (ins i32mem:$src),
let neverHasSideEffects = 1 in
def LEA16r : I<0x8D, MRMSrcMem,
- (outs GR16:$dst), (ins i32mem:$src),
+ (outs GR16:$dst), (ins lea32mem:$src),
"lea{w}\t{$src|$dst}, {$dst|$src}", []>, OpSize;
let isReMaterializable = 1 in
def LEA32r : I<0x8D, MRMSrcMem,
@@ -3059,6 +3060,21 @@ let Defs = [AH], Uses = [EFLAGS], neverHasSideEffects = 1 in
def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", []>; // AH = flags
let Uses = [EFLAGS] in {
+// Use sbb to materialize carry bit.
+
+let Defs = [EFLAGS], isCodeGenOnly = 1 in {
+def SETB_C8r : I<0x18, MRMInitReg, (outs GR8:$dst), (ins),
+ "sbb{b}\t$dst, $dst",
+ [(set GR8:$dst, (X86setcc_c X86_COND_B, EFLAGS))]>;
+def SETB_C16r : I<0x19, MRMInitReg, (outs GR16:$dst), (ins),
+ "sbb{w}\t$dst, $dst",
+ [(set GR16:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>,
+ OpSize;
+def SETB_C32r : I<0x19, MRMInitReg, (outs GR32:$dst), (ins),
+ "sbb{l}\t$dst, $dst",
+ [(set GR32:$dst, (zext (X86setcc_c X86_COND_B, EFLAGS)))]>;
+} // isCodeGenOnly
+
def SETEr : I<0x94, MRM0r,
(outs GR8 :$dst), (ins),
"sete\t$dst",
@@ -4169,6 +4185,12 @@ def : Pat<(store (shld (loadi16 addr:$dst), (i8 imm:$amt1),
GR16:$src2, (i8 imm:$amt2)), addr:$dst),
(SHLD16mri8 addr:$dst, GR16:$src2, (i8 imm:$amt1))>;
+// (anyext (setcc_carry)) -> (zext (setcc_carry))
+def : Pat<(i16 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+ (SETB_C16r)>;
+def : Pat<(i32 (anyext (X86setcc_c X86_COND_B, EFLAGS))),
+ (SETB_C32r)>;
+
//===----------------------------------------------------------------------===//
// EFLAGS-defining Patterns
//===----------------------------------------------------------------------===//
diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td
index dfdd4ce..62841f8 100644
--- a/lib/Target/X86/X86InstrSSE.td
+++ b/lib/Target/X86/X86InstrSSE.td
@@ -2083,7 +2083,7 @@ def PSHUFDmi : PDIi8<0x70, MRMSrcMem,
(outs VR128:$dst), (ins i128mem:$src1, i8imm:$src2),
"pshufd\t{$src2, $src1, $dst|$dst, $src1, $src2}",
[(set VR128:$dst, (v4i32 (pshufd:$src2
- (bc_v4i32(memopv2i64 addr:$src1)),
+ (bc_v4i32 (memopv2i64 addr:$src1)),
(undef))))]>;
}
diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp
index 33852bd..d96aafd 100644
--- a/lib/Target/X86/X86RegisterInfo.cpp
+++ b/lib/Target/X86/X86RegisterInfo.cpp
@@ -423,21 +423,6 @@ BitVector X86RegisterInfo::getReservedRegs(const MachineFunction &MF) const {
// Stack Frame Processing methods
//===----------------------------------------------------------------------===//
-static unsigned calculateMaxStackAlignment(const MachineFrameInfo *FFI) {
- unsigned MaxAlign = 0;
-
- for (int i = FFI->getObjectIndexBegin(),
- e = FFI->getObjectIndexEnd(); i != e; ++i) {
- if (FFI->isDeadObjectIndex(i))
- continue;
-
- unsigned Align = FFI->getObjectAlignment(i);
- MaxAlign = std::max(MaxAlign, Align);
- }
-
- return MaxAlign;
-}
-
/// hasFP - Return true if the specified function should have a dedicated frame
/// pointer register. This is true if the function has variable sized allocas
/// or if frame pointer elimination is disabled.
@@ -638,10 +623,7 @@ X86RegisterInfo::processFunctionBeforeCalleeSavedScan(MachineFunction &MF,
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- unsigned MaxAlign = std::max(MFI->getMaxAlignment(),
- calculateMaxStackAlignment(MFI));
-
- MFI->setMaxAlignment(MaxAlign);
+ MFI->calculateMaxStackAlignment();
X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
int32_t TailCallReturnAddrDelta = X86FI->getTCReturnAddrDelta();
@@ -741,7 +723,7 @@ void mergeSPUpdatesDown(MachineBasicBlock &MBB,
if (MBBI == MBB.end()) return;
- MachineBasicBlock::iterator NI = next(MBBI);
+ MachineBasicBlock::iterator NI = llvm::next(MBBI);
if (NI == MBB.end()) return;
unsigned Opc = NI->getOpcode();
@@ -775,7 +757,7 @@ static int mergeSPUpdates(MachineBasicBlock &MBB,
return 0;
MachineBasicBlock::iterator PI = doMergeWithPrevious ? prior(MBBI) : MBBI;
- MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : next(MBBI);
+ MachineBasicBlock::iterator NI = doMergeWithPrevious ? 0 : llvm::next(MBBI);
unsigned Opc = PI->getOpcode();
int Offset = 0;
@@ -1001,7 +983,7 @@ void X86RegisterInfo::emitPrologue(MachineFunction &MF) const {
}
// Mark the FramePtr as live-in in every block except the entry.
- for (MachineFunction::iterator I = next(MF.begin()), E = MF.end();
+ for (MachineFunction::iterator I = llvm::next(MF.begin()), E = MF.end();
I != E; ++I)
I->addLiveIn(FramePtr);
@@ -1482,45 +1464,3 @@ unsigned getX86SubSuperRegister(unsigned Reg, EVT VT, bool High) {
}
#include "X86GenRegisterInfo.inc"
-
-namespace {
- struct MSAC : public MachineFunctionPass {
- static char ID;
- MSAC() : MachineFunctionPass(&ID) {}
-
- virtual bool runOnMachineFunction(MachineFunction &MF) {
- MachineFrameInfo *FFI = MF.getFrameInfo();
- MachineRegisterInfo &RI = MF.getRegInfo();
-
- // Calculate max stack alignment of all already allocated stack objects.
- unsigned MaxAlign = calculateMaxStackAlignment(FFI);
-
- // Be over-conservative: scan over all vreg defs and find, whether vector
- // registers are used. If yes - there is probability, that vector register
- // will be spilled and thus stack needs to be aligned properly.
- for (unsigned RegNum = TargetRegisterInfo::FirstVirtualRegister;
- RegNum < RI.getLastVirtReg(); ++RegNum)
- MaxAlign = std::max(MaxAlign, RI.getRegClass(RegNum)->getAlignment());
-
- if (FFI->getMaxAlignment() == MaxAlign)
- return false;
-
- FFI->setMaxAlignment(MaxAlign);
- return true;
- }
-
- virtual const char *getPassName() const {
- return "X86 Maximal Stack Alignment Calculator";
- }
-
- virtual void getAnalysisUsage(AnalysisUsage &AU) const {
- AU.setPreservesCFG();
- MachineFunctionPass::getAnalysisUsage(AU);
- }
- };
-
- char MSAC::ID = 0;
-}
-
-FunctionPass*
-llvm::createX86MaxStackAlignmentCalculatorPass() { return new MSAC(); }
diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp
index 661f560..75cdbad 100644
--- a/lib/Target/X86/X86Subtarget.cpp
+++ b/lib/Target/X86/X86Subtarget.cpp
@@ -367,5 +367,5 @@ bool X86Subtarget::enablePostRAScheduler(
RegClassVector& CriticalPathRCs) const {
Mode = TargetSubtarget::ANTIDEP_CRITICAL;
CriticalPathRCs.clear();
- return OptLevel >= CodeGenOpt::Default;
+ return OptLevel >= CodeGenOpt::Aggressive;
}
diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp
index 0cda8bc..0152121 100644
--- a/lib/Target/X86/X86TargetMachine.cpp
+++ b/lib/Target/X86/X86TargetMachine.cpp
@@ -163,7 +163,7 @@ bool X86TargetMachine::addPreRegAlloc(PassManagerBase &PM,
CodeGenOpt::Level OptLevel) {
// Calculate and set max stack object alignment early, so we can decide
// whether we will need stack realignment (and thus FP).
- PM.add(createX86MaxStackAlignmentCalculatorPass());
+ PM.add(createMaxStackAlignmentCalculatorPass());
return false; // -print-machineinstr shouldn't print after this.
}
diff --git a/lib/Target/XCore/XCoreInstrInfo.cpp b/lib/Target/XCore/XCoreInstrInfo.cpp
index e616fe6..5a54844 100644
--- a/lib/Target/XCore/XCoreInstrInfo.cpp
+++ b/lib/Target/XCore/XCoreInstrInfo.cpp
@@ -453,26 +453,6 @@ bool XCoreInstrInfo::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
return true;
}
-/// BlockHasNoFallThrough - Analyse if MachineBasicBlock does not
-/// fall-through into its successor block.
-bool XCoreInstrInfo::
-BlockHasNoFallThrough(const MachineBasicBlock &MBB) const
-{
- if (MBB.empty()) return false;
-
- switch (MBB.back().getOpcode()) {
- case XCore::RETSP_u6: // Return.
- case XCore::RETSP_lu6:
- case XCore::BAU_1r: // Indirect branch.
- case XCore::BRFU_u6: // Uncond branch.
- case XCore::BRFU_lu6:
- case XCore::BRBU_u6:
- case XCore::BRBU_lu6:
- return true;
- default: return false;
- }
-}
-
/// ReverseBranchCondition - Return the inverse opcode of the
/// specified Branch instruction.
bool XCoreInstrInfo::
diff --git a/lib/Target/XCore/XCoreInstrInfo.h b/lib/Target/XCore/XCoreInstrInfo.h
index 24230ac..3e0a765 100644
--- a/lib/Target/XCore/XCoreInstrInfo.h
+++ b/lib/Target/XCore/XCoreInstrInfo.h
@@ -87,8 +87,6 @@ public:
MachineBasicBlock::iterator MI,
const std::vector<CalleeSavedInfo> &CSI) const;
- virtual bool BlockHasNoFallThrough(const MachineBasicBlock &MBB) const;
-
virtual bool ReverseBranchCondition(
SmallVectorImpl<MachineOperand> &Cond) const;
};
diff --git a/lib/Transforms/IPO/GlobalOpt.cpp b/lib/Transforms/IPO/GlobalOpt.cpp
index 4635d0e..1793bbf 100644
--- a/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2493,29 +2493,28 @@ bool GlobalOpt::OptimizeGlobalAliases(Module &M) {
Changed = true;
}
- // If the aliasee has internal linkage, give it the name and linkage
- // of the alias, and delete the alias. This turns:
- // define internal ... @f(...)
- // @a = alias ... @f
- // into:
- // define ... @a(...)
- if (!Target->hasLocalLinkage())
- continue;
-
- // The transform is only useful if the alias does not have internal linkage.
- if (J->hasLocalLinkage())
- continue;
+ // If the alias is externally visible, we may still be able to simplify it.
+ if (!J->hasLocalLinkage()) {
+ // If the aliasee has internal linkage, give it the name and linkage
+ // of the alias, and delete the alias. This turns:
+ // define internal ... @f(...)
+ // @a = alias ... @f
+ // into:
+ // define ... @a(...)
+ if (!Target->hasLocalLinkage())
+ continue;
- // Do not perform the transform if multiple aliases potentially target the
- // aliasee. This check also ensures that it is safe to replace the section
- // and other attributes of the aliasee with those of the alias.
- if (!hasOneUse)
- continue;
+ // Do not perform the transform if multiple aliases potentially target the
+ // aliasee. This check also ensures that it is safe to replace the section
+ // and other attributes of the aliasee with those of the alias.
+ if (!hasOneUse)
+ continue;
- // Give the aliasee the name, linkage and other attributes of the alias.
- Target->takeName(J);
- Target->setLinkage(J->getLinkage());
- Target->GlobalValue::copyAttributesFrom(J);
+ // Give the aliasee the name, linkage and other attributes of the alias.
+ Target->takeName(J);
+ Target->setLinkage(J->getLinkage());
+ Target->GlobalValue::copyAttributesFrom(J);
+ }
// Delete the alias.
M.getAliasList().erase(J);
diff --git a/lib/Transforms/Instrumentation/MaximumSpanningTree.h b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
index 2951dbc..829da6b 100644
--- a/lib/Transforms/Instrumentation/MaximumSpanningTree.h
+++ b/lib/Transforms/Instrumentation/MaximumSpanningTree.h
@@ -15,6 +15,7 @@
#ifndef LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
#define LLVM_ANALYSIS_MAXIMUMSPANNINGTREE_H
+#include "llvm/BasicBlock.h"
#include "llvm/ADT/EquivalenceClasses.h"
#include <vector>
#include <algorithm>
@@ -33,6 +34,18 @@ namespace llvm {
typename MaximumSpanningTree<CT>::EdgeWeight Y) const {
if (X.second > Y.second) return true;
if (X.second < Y.second) return false;
+ if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.first)) {
+ if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.first)) {
+ if (BBX->size() > BBY->size()) return true;
+ if (BBX->size() < BBY->size()) return false;
+ }
+ }
+ if (const BasicBlock *BBX = dyn_cast<BasicBlock>(X.first.second)) {
+ if (const BasicBlock *BBY = dyn_cast<BasicBlock>(Y.first.second)) {
+ if (BBX->size() > BBY->size()) return true;
+ if (BBX->size() < BBY->size()) return false;
+ }
+ }
return false;
}
};
diff --git a/lib/Transforms/Scalar/CodeGenPrepare.cpp b/lib/Transforms/Scalar/CodeGenPrepare.cpp
index 9ca90c3..e4c4ae5 100644
--- a/lib/Transforms/Scalar/CodeGenPrepare.cpp
+++ b/lib/Transforms/Scalar/CodeGenPrepare.cpp
@@ -21,7 +21,6 @@
#include "llvm/InlineAsm.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Pass.h"
#include "llvm/Analysis/ProfileInfo.h"
#include "llvm/Target/TargetData.h"
@@ -563,7 +562,7 @@ static bool IsNonLocalValue(Value *V, BasicBlock *BB) {
return false;
}
-/// OptimizeMemoryInst - Load and Store Instructions have often have
+/// OptimizeMemoryInst - Load and Store Instructions often have
/// addressing modes that can do significant amounts of computation. As such,
/// instruction selection will try to get the load or store to do as much
/// computation as possible for the program. The problem is that isel can only
diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp
index b0988b5..1cfde8f 100644
--- a/lib/Transforms/Scalar/DeadStoreElimination.cpp
+++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp
@@ -85,9 +85,14 @@ static bool doesClobberMemory(Instruction *I) {
return true;
if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(I)) {
switch (II->getIntrinsicID()) {
- default: return false;
- case Intrinsic::memset: case Intrinsic::memmove: case Intrinsic::memcpy:
- case Intrinsic::init_trampoline: case Intrinsic::lifetime_end: return true;
+ default:
+ return false;
+ case Intrinsic::memset:
+ case Intrinsic::memmove:
+ case Intrinsic::memcpy:
+ case Intrinsic::init_trampoline:
+ case Intrinsic::lifetime_end:
+ return true;
}
}
return false;
@@ -111,14 +116,13 @@ static Value *getPointerOperand(Instruction *I) {
return SI->getPointerOperand();
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I))
return MI->getOperand(1);
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default:
- assert(false && "Unexpected intrinsic!");
- case Intrinsic::init_trampoline:
- return II->getOperand(1);
- case Intrinsic::lifetime_end:
- return II->getOperand(2);
+
+ switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ default: assert(false && "Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return I->getOperand(1);
+ case Intrinsic::lifetime_end:
+ return I->getOperand(2);
}
}
@@ -135,15 +139,13 @@ static unsigned getStoreSize(Instruction *I, const TargetData *TD) {
if (MemIntrinsic *MI = dyn_cast<MemIntrinsic>(I)) {
Len = MI->getLength();
} else {
- IntrinsicInst *II = cast<IntrinsicInst>(I);
- switch (II->getIntrinsicID()) {
- default:
- assert(false && "Unexpected intrinsic!");
- case Intrinsic::init_trampoline:
- return -1u;
- case Intrinsic::lifetime_end:
- Len = II->getOperand(1);
- break;
+ switch (cast<IntrinsicInst>(I)->getIntrinsicID()) {
+ default: assert(false && "Unexpected intrinsic!");
+ case Intrinsic::init_trampoline:
+ return -1u;
+ case Intrinsic::lifetime_end:
+ Len = I->getOperand(1);
+ break;
}
}
if (ConstantInt *LenCI = dyn_cast<ConstantInt>(Len))
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 6f1c32c..222792b 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -31,15 +31,18 @@
#include "llvm/ADT/SmallPtrSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/MemoryBuiltins.h"
#include "llvm/Analysis/MemoryDependenceAnalysis.h"
+#include "llvm/Analysis/PHITransAddr.h"
#include "llvm/Support/CFG.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/GetElementPtrTypeIterator.h"
+#include "llvm/Support/IRBuilder.h"
#include "llvm/Support/raw_ostream.h"
#include "llvm/Target/TargetData.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
@@ -187,8 +190,11 @@ template <> struct DenseMapInfo<Expression> {
static bool isEqual(const Expression &LHS, const Expression &RHS) {
return LHS == RHS;
}
- static bool isPod() { return true; }
};
+
+template <>
+struct isPodLike<Expression> { static const bool value = true; };
+
}
//===----------------------------------------------------------------------===//
@@ -488,21 +494,21 @@ uint32_t ValueTable::lookup_or_add_call(CallInst* C) {
// Check to see if we have a single dominating call instruction that is
// identical to C.
for (unsigned i = 0, e = deps.size(); i != e; ++i) {
- const MemoryDependenceAnalysis::NonLocalDepEntry *I = &deps[i];
+ const NonLocalDepEntry *I = &deps[i];
// Ignore non-local dependencies.
- if (I->second.isNonLocal())
+ if (I->getResult().isNonLocal())
continue;
// We don't handle non-depedencies. If we already have a call, reject
// instruction dependencies.
- if (I->second.isClobber() || cdep != 0) {
+ if (I->getResult().isClobber() || cdep != 0) {
cdep = 0;
break;
}
- CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->second.getInst());
+ CallInst *NonLocalDepCall = dyn_cast<CallInst>(I->getResult().getInst());
// FIXME: All duplicated with non-local case.
- if (NonLocalDepCall && DT->properlyDominates(I->first, C->getParent())){
+ if (NonLocalDepCall && DT->properlyDominates(I->getBB(), C->getParent())){
cdep = NonLocalDepCall;
continue;
}
@@ -987,27 +993,27 @@ static Value *GetBaseWithConstantOffset(Value *Ptr, int64_t &Offset,
}
-/// AnalyzeLoadFromClobberingStore - This function is called when we have a
-/// memdep query of a load that ends up being a clobbering store. This means
-/// that the store *may* provide bits used by the load but we can't be sure
-/// because the pointers don't mustalias. Check this case to see if there is
-/// anything more we can do before we give up. This returns -1 if we have to
-/// give up, or a byte number in the stored value of the piece that feeds the
-/// load.
-static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
+/// AnalyzeLoadFromClobberingWrite - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering memory write (store,
+/// memset, memcpy, memmove). This means that the write *may* provide bits used
+/// by the load but we can't be sure because the pointers don't mustalias.
+///
+/// Check this case to see if there is anything more we can do before we give
+/// up. This returns -1 if we have to give up, or a byte number in the stored
+/// value of the piece that feeds the load.
+static int AnalyzeLoadFromClobberingWrite(const Type *LoadTy, Value *LoadPtr,
+ Value *WritePtr,
+ uint64_t WriteSizeInBits,
const TargetData &TD) {
// If the loaded or stored value is an first class array or struct, don't try
// to transform them. We need to be able to bitcast to integer.
- if (isa<StructType>(L->getType()) || isa<ArrayType>(L->getType()) ||
- isa<StructType>(DepSI->getOperand(0)->getType()) ||
- isa<ArrayType>(DepSI->getOperand(0)->getType()))
+ if (isa<StructType>(LoadTy) || isa<ArrayType>(LoadTy))
return -1;
int64_t StoreOffset = 0, LoadOffset = 0;
- Value *StoreBase =
- GetBaseWithConstantOffset(DepSI->getPointerOperand(), StoreOffset, TD);
+ Value *StoreBase = GetBaseWithConstantOffset(WritePtr, StoreOffset, TD);
Value *LoadBase =
- GetBaseWithConstantOffset(L->getPointerOperand(), LoadOffset, TD);
+ GetBaseWithConstantOffset(LoadPtr, LoadOffset, TD);
if (StoreBase != LoadBase)
return -1;
@@ -1018,12 +1024,10 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
#if 0
errs() << "STORE/LOAD DEP WITH COMMON POINTER MISSED:\n"
<< "Base = " << *StoreBase << "\n"
- << "Store Ptr = " << *DepSI->getPointerOperand() << "\n"
- << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
- << "Load Ptr = " << *L->getPointerOperand() << "\n"
- << "Load Offs = " << LoadOffset << " - " << *L << "\n\n";
- errs() << "'" << L->getParent()->getParent()->getName() << "'"
- << *L->getParent();
+ << "Store Ptr = " << *WritePtr << "\n"
+ << "Store Offs = " << StoreOffset << "\n"
+ << "Load Ptr = " << *LoadPtr << "\n";
+ abort();
#endif
return -1;
}
@@ -1033,12 +1037,11 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
// must have gotten confused.
// FIXME: Investigate cases where this bails out, e.g. rdar://7238614. Then
// remove this check, as it is duplicated with what we have below.
- uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
- uint64_t LoadSize = TD.getTypeSizeInBits(L->getType());
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy);
- if ((StoreSize & 7) | (LoadSize & 7))
+ if ((WriteSizeInBits & 7) | (LoadSize & 7))
return -1;
- StoreSize >>= 3; // Convert to bytes.
+ uint64_t StoreSize = WriteSizeInBits >> 3; // Convert to bytes.
LoadSize >>= 3;
@@ -1052,12 +1055,10 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
#if 0
errs() << "STORE LOAD DEP WITH COMMON BASE:\n"
<< "Base = " << *StoreBase << "\n"
- << "Store Ptr = " << *DepSI->getPointerOperand() << "\n"
- << "Store Offs = " << StoreOffset << " - " << *DepSI << "\n"
- << "Load Ptr = " << *L->getPointerOperand() << "\n"
- << "Load Offs = " << LoadOffset << " - " << *L << "\n\n";
- errs() << "'" << L->getParent()->getParent()->getName() << "'"
- << *L->getParent();
+ << "Store Ptr = " << *WritePtr << "\n"
+ << "Store Offs = " << StoreOffset << "\n"
+ << "Load Ptr = " << *LoadPtr << "\n";
+ abort();
#endif
return -1;
}
@@ -1075,6 +1076,66 @@ static int AnalyzeLoadFromClobberingStore(LoadInst *L, StoreInst *DepSI,
return LoadOffset-StoreOffset;
}
+/// AnalyzeLoadFromClobberingStore - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering store.
+static int AnalyzeLoadFromClobberingStore(const Type *LoadTy, Value *LoadPtr,
+ StoreInst *DepSI,
+ const TargetData &TD) {
+ // Cannot handle reading from store of first-class aggregate yet.
+ if (isa<StructType>(DepSI->getOperand(0)->getType()) ||
+ isa<ArrayType>(DepSI->getOperand(0)->getType()))
+ return -1;
+
+ Value *StorePtr = DepSI->getPointerOperand();
+ uint64_t StoreSize = TD.getTypeSizeInBits(DepSI->getOperand(0)->getType());
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+ StorePtr, StoreSize, TD);
+}
+
+static int AnalyzeLoadFromClobberingMemInst(const Type *LoadTy, Value *LoadPtr,
+ MemIntrinsic *MI,
+ const TargetData &TD) {
+ // If the mem operation is a non-constant size, we can't handle it.
+ ConstantInt *SizeCst = dyn_cast<ConstantInt>(MI->getLength());
+ if (SizeCst == 0) return -1;
+ uint64_t MemSizeInBits = SizeCst->getZExtValue()*8;
+
+ // If this is memset, we just need to see if the offset is valid in the size
+ // of the memset..
+ if (MI->getIntrinsicID() == Intrinsic::memset)
+ return AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr, MI->getDest(),
+ MemSizeInBits, TD);
+
+ // If we have a memcpy/memmove, the only case we can handle is if this is a
+ // copy from constant memory. In that case, we can read directly from the
+ // constant memory.
+ MemTransferInst *MTI = cast<MemTransferInst>(MI);
+
+ Constant *Src = dyn_cast<Constant>(MTI->getSource());
+ if (Src == 0) return -1;
+
+ GlobalVariable *GV = dyn_cast<GlobalVariable>(Src->getUnderlyingObject());
+ if (GV == 0 || !GV->isConstant()) return -1;
+
+ // See if the access is within the bounds of the transfer.
+ int Offset = AnalyzeLoadFromClobberingWrite(LoadTy, LoadPtr,
+ MI->getDest(), MemSizeInBits, TD);
+ if (Offset == -1)
+ return Offset;
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src = ConstantExpr::getBitCast(Src,
+ llvm::Type::getInt8PtrTy(Src->getContext()));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ if (ConstantFoldLoadFromConstPtr(Src, &TD))
+ return Offset;
+ return -1;
+}
+
/// GetStoreValueForLoad - This function is called when we have a
/// memdep query of a load that ends up being a clobbering store. This means
@@ -1089,50 +1150,134 @@ static Value *GetStoreValueForLoad(Value *SrcVal, unsigned Offset,
uint64_t StoreSize = TD.getTypeSizeInBits(SrcVal->getType())/8;
uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+ IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
// Compute which bits of the stored value are being used by the load. Convert
// to an integer type to start with.
if (isa<PointerType>(SrcVal->getType()))
- SrcVal = new PtrToIntInst(SrcVal, TD.getIntPtrType(Ctx), "tmp", InsertPt);
+ SrcVal = Builder.CreatePtrToInt(SrcVal, TD.getIntPtrType(Ctx), "tmp");
if (!isa<IntegerType>(SrcVal->getType()))
- SrcVal = new BitCastInst(SrcVal, IntegerType::get(Ctx, StoreSize*8),
- "tmp", InsertPt);
+ SrcVal = Builder.CreateBitCast(SrcVal, IntegerType::get(Ctx, StoreSize*8),
+ "tmp");
// Shift the bits to the least significant depending on endianness.
unsigned ShiftAmt;
- if (TD.isLittleEndian()) {
+ if (TD.isLittleEndian())
ShiftAmt = Offset*8;
- } else {
+ else
ShiftAmt = (StoreSize-LoadSize-Offset)*8;
- }
if (ShiftAmt)
- SrcVal = BinaryOperator::CreateLShr(SrcVal,
- ConstantInt::get(SrcVal->getType(), ShiftAmt), "tmp", InsertPt);
+ SrcVal = Builder.CreateLShr(SrcVal, ShiftAmt, "tmp");
if (LoadSize != StoreSize)
- SrcVal = new TruncInst(SrcVal, IntegerType::get(Ctx, LoadSize*8),
- "tmp", InsertPt);
+ SrcVal = Builder.CreateTrunc(SrcVal, IntegerType::get(Ctx, LoadSize*8),
+ "tmp");
return CoerceAvailableValueToLoadType(SrcVal, LoadTy, InsertPt, TD);
}
+/// GetMemInstValueForLoad - This function is called when we have a
+/// memdep query of a load that ends up being a clobbering mem intrinsic.
+static Value *GetMemInstValueForLoad(MemIntrinsic *SrcInst, unsigned Offset,
+ const Type *LoadTy, Instruction *InsertPt,
+ const TargetData &TD){
+ LLVMContext &Ctx = LoadTy->getContext();
+ uint64_t LoadSize = TD.getTypeSizeInBits(LoadTy)/8;
+
+ IRBuilder<> Builder(InsertPt->getParent(), InsertPt);
+
+ // We know that this method is only called when the mem transfer fully
+ // provides the bits for the load.
+ if (MemSetInst *MSI = dyn_cast<MemSetInst>(SrcInst)) {
+ // memset(P, 'x', 1234) -> splat('x'), even if x is a variable, and
+ // independently of what the offset is.
+ Value *Val = MSI->getValue();
+ if (LoadSize != 1)
+ Val = Builder.CreateZExt(Val, IntegerType::get(Ctx, LoadSize*8));
+
+ Value *OneElt = Val;
+
+ // Splat the value out to the right number of bits.
+ for (unsigned NumBytesSet = 1; NumBytesSet != LoadSize; ) {
+ // If we can double the number of bytes set, do it.
+ if (NumBytesSet*2 <= LoadSize) {
+ Value *ShVal = Builder.CreateShl(Val, NumBytesSet*8);
+ Val = Builder.CreateOr(Val, ShVal);
+ NumBytesSet <<= 1;
+ continue;
+ }
+
+ // Otherwise insert one byte at a time.
+ Value *ShVal = Builder.CreateShl(Val, 1*8);
+ Val = Builder.CreateOr(OneElt, ShVal);
+ ++NumBytesSet;
+ }
+
+ return CoerceAvailableValueToLoadType(Val, LoadTy, InsertPt, TD);
+ }
+
+ // Otherwise, this is a memcpy/memmove from a constant global.
+ MemTransferInst *MTI = cast<MemTransferInst>(SrcInst);
+ Constant *Src = cast<Constant>(MTI->getSource());
+
+ // Otherwise, see if we can constant fold a load from the constant with the
+ // offset applied as appropriate.
+ Src = ConstantExpr::getBitCast(Src,
+ llvm::Type::getInt8PtrTy(Src->getContext()));
+ Constant *OffsetCst =
+ ConstantInt::get(Type::getInt64Ty(Src->getContext()), (unsigned)Offset);
+ Src = ConstantExpr::getGetElementPtr(Src, &OffsetCst, 1);
+ Src = ConstantExpr::getBitCast(Src, PointerType::getUnqual(LoadTy));
+ return ConstantFoldLoadFromConstPtr(Src, &TD);
+}
+
+
+
struct AvailableValueInBlock {
/// BB - The basic block in question.
BasicBlock *BB;
+ enum ValType {
+ SimpleVal, // A simple offsetted value that is accessed.
+ MemIntrin // A memory intrinsic which is loaded from.
+ };
+
/// V - The value that is live out of the block.
- Value *V;
- /// Offset - The byte offset in V that is interesting for the load query.
+ PointerIntPair<Value *, 1, ValType> Val;
+
+ /// Offset - The byte offset in Val that is interesting for the load query.
unsigned Offset;
static AvailableValueInBlock get(BasicBlock *BB, Value *V,
unsigned Offset = 0) {
AvailableValueInBlock Res;
Res.BB = BB;
- Res.V = V;
+ Res.Val.setPointer(V);
+ Res.Val.setInt(SimpleVal);
+ Res.Offset = Offset;
+ return Res;
+ }
+
+ static AvailableValueInBlock getMI(BasicBlock *BB, MemIntrinsic *MI,
+ unsigned Offset = 0) {
+ AvailableValueInBlock Res;
+ Res.BB = BB;
+ Res.Val.setPointer(MI);
+ Res.Val.setInt(MemIntrin);
Res.Offset = Offset;
return Res;
}
+
+ bool isSimpleValue() const { return Val.getInt() == SimpleVal; }
+ Value *getSimpleValue() const {
+ assert(isSimpleValue() && "Wrong accessor");
+ return Val.getPointer();
+ }
+
+ MemIntrinsic *getMemIntrinValue() const {
+ assert(!isSimpleValue() && "Wrong accessor");
+ return cast<MemIntrinsic>(Val.getPointer());
+ }
};
/// ConstructSSAForLoadSet - Given a set of loads specified by ValuesPerBlock,
@@ -1149,30 +1294,33 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
const Type *LoadTy = LI->getType();
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
- BasicBlock *BB = ValuesPerBlock[i].BB;
- Value *AvailableVal = ValuesPerBlock[i].V;
- unsigned Offset = ValuesPerBlock[i].Offset;
+ const AvailableValueInBlock &AV = ValuesPerBlock[i];
+ BasicBlock *BB = AV.BB;
if (SSAUpdate.HasValueForBlock(BB))
continue;
-
- if (AvailableVal->getType() != LoadTy) {
- assert(TD && "Need target data to handle type mismatch case");
- AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
- BB->getTerminator(), *TD);
-
- if (Offset) {
- DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
- << *ValuesPerBlock[i].V << '\n'
+
+ unsigned Offset = AV.Offset;
+
+ Value *AvailableVal;
+ if (AV.isSimpleValue()) {
+ AvailableVal = AV.getSimpleValue();
+ if (AvailableVal->getType() != LoadTy) {
+ assert(TD && "Need target data to handle type mismatch case");
+ AvailableVal = GetStoreValueForLoad(AvailableVal, Offset, LoadTy,
+ BB->getTerminator(), *TD);
+
+ DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\nOffset: " << Offset << " "
+ << *AV.getSimpleValue() << '\n'
<< *AvailableVal << '\n' << "\n\n\n");
}
-
-
- DEBUG(errs() << "GVN COERCED NONLOCAL VAL:\n"
- << *ValuesPerBlock[i].V << '\n'
+ } else {
+ AvailableVal = GetMemInstValueForLoad(AV.getMemIntrinValue(), Offset,
+ LoadTy, BB->getTerminator(), *TD);
+ DEBUG(errs() << "GVN COERCED NONLOCAL MEM INTRIN:\nOffset: " << Offset
+ << " " << *AV.getMemIntrinValue() << '\n'
<< *AvailableVal << '\n' << "\n\n\n");
}
-
SSAUpdate.AddAvailableValue(BB, AvailableVal);
}
@@ -1187,12 +1335,18 @@ static Value *ConstructSSAForLoadSet(LoadInst *LI,
return V;
}
+static bool isLifetimeStart(Instruction *Inst) {
+ if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(Inst))
+ return II->getIntrinsicID() == Intrinsic::lifetime_start;
+ return false;
+}
+
/// processNonLocalLoad - Attempt to eliminate a load whose dependencies are
/// non-local by performing PHI construction.
bool GVN::processNonLocalLoad(LoadInst *LI,
SmallVectorImpl<Instruction*> &toErase) {
// Find the non-local dependencies of the load.
- SmallVector<MemoryDependenceAnalysis::NonLocalDepEntry, 64> Deps;
+ SmallVector<NonLocalDepEntry, 64> Deps;
MD->getNonLocalPointerDependency(LI->getOperand(0), true, LI->getParent(),
Deps);
//DEBUG(errs() << "INVESTIGATING NONLOCAL LOAD: "
@@ -1206,11 +1360,11 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// If we had a phi translation failure, we'll have a single entry which is a
// clobber in the current block. Reject this early.
- if (Deps.size() == 1 && Deps[0].second.isClobber()) {
+ if (Deps.size() == 1 && Deps[0].getResult().isClobber()) {
DEBUG(
errs() << "GVN: non-local load ";
WriteAsOperand(errs(), LI);
- errs() << " is clobbered by " << *Deps[0].second.getInst() << '\n';
+ errs() << " is clobbered by " << *Deps[0].getResult().getInst() << '\n';
);
return false;
}
@@ -1225,18 +1379,24 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
const TargetData *TD = 0;
for (unsigned i = 0, e = Deps.size(); i != e; ++i) {
- BasicBlock *DepBB = Deps[i].first;
- MemDepResult DepInfo = Deps[i].second;
+ BasicBlock *DepBB = Deps[i].getBB();
+ MemDepResult DepInfo = Deps[i].getResult();
if (DepInfo.isClobber()) {
+ // The address being loaded in this non-local block may not be the same as
+ // the pointer operand of the load if PHI translation occurs. Make sure
+ // to consider the right address.
+ Value *Address = Deps[i].getAddress();
+
// If the dependence is to a store that writes to a superset of the bits
// read by the load, we can extract the bits we need for the load from the
// stored value.
if (StoreInst *DepSI = dyn_cast<StoreInst>(DepInfo.getInst())) {
if (TD == 0)
TD = getAnalysisIfAvailable<TargetData>();
- if (TD) {
- int Offset = AnalyzeLoadFromClobberingStore(LI, DepSI, *TD);
+ if (TD && Address) {
+ int Offset = AnalyzeLoadFromClobberingStore(LI->getType(), Address,
+ DepSI, *TD);
if (Offset != -1) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
DepSI->getOperand(0),
@@ -1245,8 +1405,23 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
}
}
}
+
+ // If the clobbering value is a memset/memcpy/memmove, see if we can
+ // forward a value on from it.
+ if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(DepInfo.getInst())) {
+ if (TD == 0)
+ TD = getAnalysisIfAvailable<TargetData>();
+ if (TD && Address) {
+ int Offset = AnalyzeLoadFromClobberingMemInst(LI->getType(), Address,
+ DepMI, *TD);
+ if (Offset != -1) {
+ ValuesPerBlock.push_back(AvailableValueInBlock::getMI(DepBB, DepMI,
+ Offset));
+ continue;
+ }
+ }
+ }
- // FIXME: Handle memset/memcpy.
UnavailableBlocks.push_back(DepBB);
continue;
}
@@ -1254,21 +1429,14 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
Instruction *DepInst = DepInfo.getInst();
// Loading the allocation -> undef.
- if (isa<AllocaInst>(DepInst) || isMalloc(DepInst)) {
+ if (isa<AllocaInst>(DepInst) || isMalloc(DepInst) ||
+ // Loading immediately after lifetime begin -> undef.
+ isLifetimeStart(DepInst)) {
ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
UndefValue::get(LI->getType())));
continue;
}
- // Loading immediately after lifetime begin or end -> undef.
- if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
- ValuesPerBlock.push_back(AvailableValueInBlock::get(DepBB,
- UndefValue::get(LI->getType())));
- }
- }
-
if (StoreInst *S = dyn_cast<StoreInst>(DepInst)) {
// Reject loads and stores that are to the same address but are of
// different types if we have to.
@@ -1378,19 +1546,25 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// to eliminate LI even if we insert uses in the other predecessors, we will
// end up increasing code size. Reject this by scanning for LI.
for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
- if (ValuesPerBlock[i].V == LI)
+ if (ValuesPerBlock[i].isSimpleValue() &&
+ ValuesPerBlock[i].getSimpleValue() == LI)
return false;
+ // FIXME: It is extremely unclear what this loop is doing, other than
+ // artificially restricting loadpre.
if (isSinglePred) {
bool isHot = false;
- for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i)
- if (Instruction *I = dyn_cast<Instruction>(ValuesPerBlock[i].V))
+ for (unsigned i = 0, e = ValuesPerBlock.size(); i != e; ++i) {
+ const AvailableValueInBlock &AV = ValuesPerBlock[i];
+ if (AV.isSimpleValue())
// "Hot" Instruction is in some loop (because it dominates its dep.
// instruction).
- if (DT->dominates(LI, I)) {
- isHot = true;
- break;
- }
+ if (Instruction *I = dyn_cast<Instruction>(AV.getSimpleValue()))
+ if (DT->dominates(LI, I)) {
+ isHot = true;
+ break;
+ }
+ }
// We are interested only in "hot" instructions. We don't want to do any
// mis-optimizations here.
@@ -1435,29 +1609,43 @@ bool GVN::processNonLocalLoad(LoadInst *LI,
// Do PHI translation to get its value in the predecessor if necessary. The
// returned pointer (if non-null) is guaranteed to dominate UnavailablePred.
//
- // FIXME: This may insert a computation, but we don't tell scalar GVN
- // optimization stuff about it. How do we do this?
SmallVector<Instruction*, 8> NewInsts;
- Value *LoadPtr = 0;
// If all preds have a single successor, then we know it is safe to insert the
// load on the pred (?!?), so we can insert code to materialize the pointer if
// it is not available.
+ PHITransAddr Address(LI->getOperand(0), TD);
+ Value *LoadPtr = 0;
if (allSingleSucc) {
- LoadPtr = MD->InsertPHITranslatedPointer(LI->getOperand(0), LoadBB,
- UnavailablePred, TD, *DT,NewInsts);
+ LoadPtr = Address.PHITranslateWithInsertion(LoadBB, UnavailablePred,
+ *DT, NewInsts);
} else {
- LoadPtr = MD->GetAvailablePHITranslatedValue(LI->getOperand(0), LoadBB,
- UnavailablePred, TD, *DT);
- }
+ Address.PHITranslateValue(LoadBB, UnavailablePred);
+ LoadPtr = Address.getAddr();
+ // Make sure the value is live in the predecessor.
+ if (Instruction *Inst = dyn_cast_or_null<Instruction>(LoadPtr))
+ if (!DT->dominates(Inst->getParent(), UnavailablePred))
+ LoadPtr = 0;
+ }
+
// If we couldn't find or insert a computation of this phi translated value,
// we fail PRE.
if (LoadPtr == 0) {
+ assert(NewInsts.empty() && "Shouldn't insert insts on failure");
DEBUG(errs() << "COULDN'T INSERT PHI TRANSLATED VALUE OF: "
<< *LI->getOperand(0) << "\n");
return false;
}
+
+ // Assign value numbers to these new instructions.
+ for (unsigned i = 0, e = NewInsts.size(); i != e; ++i) {
+ // FIXME: We really _ought_ to insert these value numbers into their
+ // parent's availability map. However, in doing so, we risk getting into
+ // ordering issues. If a block hasn't been processed yet, we would be
+ // marking a value as AVAIL-IN, which isn't what we intend.
+ VN.lookup_or_add(NewInsts[i]);
+ }
// Make sure it is valid to move this load here. We have to watch out for:
// @1 = getelementptr (i8* p, ...
@@ -1517,11 +1705,6 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// If the value isn't available, don't do anything!
if (Dep.isClobber()) {
- // FIXME: We should handle memset/memcpy/memmove as dependent instructions
- // to forward the value if available.
- //if (isa<MemIntrinsic>(Dep.getInst()))
- //errs() << "LOAD DEPENDS ON MEM: " << *L << "\n" << *Dep.getInst()<<"\n\n";
-
// Check to see if we have something like this:
// store i32 123, i32* %P
// %A = bitcast i32* %P to i8*
@@ -1532,25 +1715,42 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
// a common base + constant offset, and if the previous store (or memset)
// completely covers this load. This sort of thing can happen in bitfield
// access code.
+ Value *AvailVal = 0;
if (StoreInst *DepSI = dyn_cast<StoreInst>(Dep.getInst()))
if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
- int Offset = AnalyzeLoadFromClobberingStore(L, DepSI, *TD);
- if (Offset != -1) {
- Value *AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
- L->getType(), L, *TD);
- DEBUG(errs() << "GVN COERCED STORE BITS:\n" << *DepSI << '\n'
- << *AvailVal << '\n' << *L << "\n\n\n");
-
- // Replace the load!
- L->replaceAllUsesWith(AvailVal);
- if (isa<PointerType>(AvailVal->getType()))
- MD->invalidateCachedPointerInfo(AvailVal);
- toErase.push_back(L);
- NumGVNLoad++;
- return true;
- }
+ int Offset = AnalyzeLoadFromClobberingStore(L->getType(),
+ L->getPointerOperand(),
+ DepSI, *TD);
+ if (Offset != -1)
+ AvailVal = GetStoreValueForLoad(DepSI->getOperand(0), Offset,
+ L->getType(), L, *TD);
}
+ // If the clobbering value is a memset/memcpy/memmove, see if we can forward
+ // a value on from it.
+ if (MemIntrinsic *DepMI = dyn_cast<MemIntrinsic>(Dep.getInst())) {
+ if (const TargetData *TD = getAnalysisIfAvailable<TargetData>()) {
+ int Offset = AnalyzeLoadFromClobberingMemInst(L->getType(),
+ L->getPointerOperand(),
+ DepMI, *TD);
+ if (Offset != -1)
+ AvailVal = GetMemInstValueForLoad(DepMI, Offset, L->getType(), L,*TD);
+ }
+ }
+
+ if (AvailVal) {
+ DEBUG(errs() << "GVN COERCED INST:\n" << *Dep.getInst() << '\n'
+ << *AvailVal << '\n' << *L << "\n\n\n");
+
+ // Replace the load!
+ L->replaceAllUsesWith(AvailVal);
+ if (isa<PointerType>(AvailVal->getType()))
+ MD->invalidateCachedPointerInfo(AvailVal);
+ toErase.push_back(L);
+ NumGVNLoad++;
+ return true;
+ }
+
DEBUG(
// fast print dep, using operator<< on instruction would be too slow
errs() << "GVN: load ";
@@ -1635,11 +1835,10 @@ bool GVN::processLoad(LoadInst *L, SmallVectorImpl<Instruction*> &toErase) {
return true;
}
- // If this load occurs either right after a lifetime begin or a lifetime end,
+ // If this load occurs either right after a lifetime begin,
// then the loaded value is undefined.
if (IntrinsicInst* II = dyn_cast<IntrinsicInst>(DepInst)) {
- if (II->getIntrinsicID() == Intrinsic::lifetime_start ||
- II->getIntrinsicID() == Intrinsic::lifetime_end) {
+ if (II->getIntrinsicID() == Intrinsic::lifetime_start) {
L->replaceAllUsesWith(UndefValue::get(L->getType()));
toErase.push_back(L);
NumGVNLoad++;
diff --git a/lib/Transforms/Scalar/InstructionCombining.cpp b/lib/Transforms/Scalar/InstructionCombining.cpp
index d12ad81..b9c536f 100644
--- a/lib/Transforms/Scalar/InstructionCombining.cpp
+++ b/lib/Transforms/Scalar/InstructionCombining.cpp
@@ -8585,25 +8585,36 @@ Instruction *InstCombiner::transformZExtICmp(ICmpInst *ICI, Instruction &CI,
if (ICI->isEquality() && CI.getType() == ICI->getOperand(0)->getType()) {
if (const IntegerType *ITy = dyn_cast<IntegerType>(CI.getType())) {
uint32_t BitWidth = ITy->getBitWidth();
- if (BitWidth > 1) {
- Value *LHS = ICI->getOperand(0);
- Value *RHS = ICI->getOperand(1);
-
- APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
- APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
- APInt TypeMask(APInt::getHighBitsSet(BitWidth, BitWidth-1));
- ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
- ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
-
- if (KnownZeroLHS.countLeadingOnes() == BitWidth-1 &&
- KnownZeroRHS.countLeadingOnes() == BitWidth-1) {
+ Value *LHS = ICI->getOperand(0);
+ Value *RHS = ICI->getOperand(1);
+
+ APInt KnownZeroLHS(BitWidth, 0), KnownOneLHS(BitWidth, 0);
+ APInt KnownZeroRHS(BitWidth, 0), KnownOneRHS(BitWidth, 0);
+ APInt TypeMask(APInt::getAllOnesValue(BitWidth));
+ ComputeMaskedBits(LHS, TypeMask, KnownZeroLHS, KnownOneLHS);
+ ComputeMaskedBits(RHS, TypeMask, KnownZeroRHS, KnownOneRHS);
+
+ if (KnownZeroLHS == KnownZeroRHS && KnownOneLHS == KnownOneRHS) {
+ APInt KnownBits = KnownZeroLHS | KnownOneLHS;
+ APInt UnknownBit = ~KnownBits;
+ if (UnknownBit.countPopulation() == 1) {
if (!DoXform) return ICI;
- Value *Xor = Builder->CreateXor(LHS, RHS);
+ Value *Result = Builder->CreateXor(LHS, RHS);
+
+ // Mask off any bits that are set and won't be shifted away.
+ if (KnownOneLHS.uge(UnknownBit))
+ Result = Builder->CreateAnd(Result,
+ ConstantInt::get(ITy, UnknownBit));
+
+ // Shift the bit we're testing down to the lsb.
+ Result = Builder->CreateLShr(
+ Result, ConstantInt::get(ITy, UnknownBit.countTrailingZeros()));
+
if (ICI->getPredicate() == ICmpInst::ICMP_EQ)
- Xor = Builder->CreateXor(Xor, ConstantInt::get(ITy, 1));
- Xor->takeName(ICI);
- return ReplaceInstUsesWith(CI, Xor);
+ Result = Builder->CreateXor(Result, ConstantInt::get(ITy, 1));
+ Result->takeName(ICI);
+ return ReplaceInstUsesWith(CI, Result);
}
}
}
@@ -11189,8 +11200,9 @@ namespace llvm {
return LHS.PN == RHS.PN && LHS.Shift == RHS.Shift &&
LHS.Width == RHS.Width;
}
- static bool isPod() { return true; }
};
+ template <>
+ struct isPodLike<LoweredPHIRecord> { static const bool value = true; };
}
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 1b93f34..d58b9c9 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -718,6 +718,11 @@ bool JumpThreading::ProcessSwitchOnDuplicateCond(BasicBlock *PredBB,
if (PredSI->getSuccessor(PredCase) != DestBB &&
DestSI->getSuccessor(i) != DestBB)
continue;
+
+ // Do not forward this if it already goes to this destination, this would
+ // be an infinite loop.
+ if (PredSI->getSuccessor(PredCase) == DestSucc)
+ continue;
// Otherwise, we're safe to make the change. Make sure that the edge from
// DestSI to DestSucc is not critical and has no PHI nodes.
diff --git a/lib/Transforms/Scalar/LICM.cpp b/lib/Transforms/Scalar/LICM.cpp
index 5511387..42a8fdc 100644
--- a/lib/Transforms/Scalar/LICM.cpp
+++ b/lib/Transforms/Scalar/LICM.cpp
@@ -160,16 +160,17 @@ namespace {
// Because the exit block is not in the loop, we know we have to get _at
// least_ its immediate dominator.
- do {
- // Get next Immediate Dominator.
- IDom = IDom->getIDom();
-
+ IDom = IDom->getIDom();
+
+ while (IDom && IDom != BlockInLoopNode) {
// If we have got to the header of the loop, then the instructions block
// did not dominate the exit node, so we can't hoist it.
if (IDom->getBlock() == LoopHeader)
return false;
- } while (IDom != BlockInLoopNode);
+ // Get next Immediate Dominator.
+ IDom = IDom->getIDom();
+ };
return true;
}
diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
index 564c7ac..85cc712 100644
--- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp
+++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp
@@ -24,18 +24,14 @@
#include "llvm/Constants.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/Type.h"
#include "llvm/DerivedTypes.h"
-#include "llvm/Analysis/Dominators.h"
#include "llvm/Analysis/IVUsers.h"
-#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/LoopPass.h"
#include "llvm/Analysis/ScalarEvolutionExpander.h"
#include "llvm/Transforms/Utils/AddrModeMatcher.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/ADT/Statistic.h"
-#include "llvm/Support/CFG.h"
#include "llvm/Support/Debug.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ValueHandle.h"
@@ -85,8 +81,6 @@ namespace {
class LoopStrengthReduce : public LoopPass {
IVUsers *IU;
- LoopInfo *LI;
- DominatorTree *DT;
ScalarEvolution *SE;
bool Changed;
@@ -94,10 +88,6 @@ namespace {
/// particular stride.
std::map<const SCEV *, IVsOfOneStride> IVsByStride;
- /// StrideNoReuse - Keep track of all the strides whose ivs cannot be
- /// reused (nor should they be rewritten to reuse other strides).
- SmallSet<const SCEV *, 4> StrideNoReuse;
-
/// DeadInsts - Keep track of instructions we may have made dead, so that
/// we can remove them after we are done working.
SmallVector<WeakVH, 16> DeadInsts;
@@ -109,8 +99,7 @@ namespace {
public:
static char ID; // Pass ID, replacement for typeid
explicit LoopStrengthReduce(const TargetLowering *tli = NULL) :
- LoopPass(&ID), TLI(tli) {
- }
+ LoopPass(&ID), TLI(tli) {}
bool runOnLoop(Loop *L, LPPassManager &LPM);
@@ -118,13 +107,11 @@ namespace {
// We split critical edges, so we change the CFG. However, we do update
// many analyses if they are around.
AU.addPreservedID(LoopSimplifyID);
- AU.addPreserved<LoopInfo>();
- AU.addPreserved<DominanceFrontier>();
- AU.addPreserved<DominatorTree>();
+ AU.addPreserved("loops");
+ AU.addPreserved("domfrontier");
+ AU.addPreserved("domtree");
AU.addRequiredID(LoopSimplifyID);
- AU.addRequired<LoopInfo>();
- AU.addRequired<DominatorTree>();
AU.addRequired<ScalarEvolution>();
AU.addPreserved<ScalarEvolution>();
AU.addRequired<IVUsers>();
@@ -228,19 +215,17 @@ void LoopStrengthReduce::DeleteTriviallyDeadInstructions() {
if (DeadInsts.empty()) return;
while (!DeadInsts.empty()) {
- Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.back());
- DeadInsts.pop_back();
+ Instruction *I = dyn_cast_or_null<Instruction>(DeadInsts.pop_back_val());
if (I == 0 || !isInstructionTriviallyDead(I))
continue;
- for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) {
+ for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI)
if (Instruction *U = dyn_cast<Instruction>(*OI)) {
*OI = 0;
if (U->use_empty())
DeadInsts.push_back(U);
}
- }
I->eraseFromParent();
Changed = true;
@@ -265,7 +250,7 @@ static bool containsAddRecFromDifferentLoop(const SCEV *S, Loop *L) {
if (newLoop == L)
return false;
// if newLoop is an outer loop of L, this is OK.
- if (!LoopInfo::isNotAlreadyContainedIn(L, newLoop))
+ if (newLoop->contains(L->getHeader()))
return false;
}
return true;
@@ -338,9 +323,6 @@ namespace {
/// BasedUser - For a particular base value, keep information about how we've
/// partitioned the expression so far.
struct BasedUser {
- /// SE - The current ScalarEvolution object.
- ScalarEvolution *SE;
-
/// Base - The Base value for the PHI node that needs to be inserted for
/// this use. As the use is processed, information gets moved from this
/// field to the Imm field (below). BasedUser values are sorted by this
@@ -372,9 +354,9 @@ namespace {
bool isUseOfPostIncrementedValue;
BasedUser(IVStrideUse &IVSU, ScalarEvolution *se)
- : SE(se), Base(IVSU.getOffset()), Inst(IVSU.getUser()),
+ : Base(IVSU.getOffset()), Inst(IVSU.getUser()),
OperandValToReplace(IVSU.getOperandValToReplace()),
- Imm(SE->getIntegerSCEV(0, Base->getType())),
+ Imm(se->getIntegerSCEV(0, Base->getType())),
isUseOfPostIncrementedValue(IVSU.isUseOfPostIncrementedValue()) {}
// Once we rewrite the code to insert the new IVs we want, update the
@@ -383,14 +365,14 @@ namespace {
void RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
Instruction *InsertPt,
SCEVExpander &Rewriter, Loop *L, Pass *P,
- LoopInfo &LI,
- SmallVectorImpl<WeakVH> &DeadInsts);
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ ScalarEvolution *SE);
Value *InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
- Instruction *IP, Loop *L,
- LoopInfo &LI);
+ Instruction *IP,
+ ScalarEvolution *SE);
void dump() const;
};
}
@@ -404,27 +386,12 @@ void BasedUser::dump() const {
Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
const Type *Ty,
SCEVExpander &Rewriter,
- Instruction *IP, Loop *L,
- LoopInfo &LI) {
- // Figure out where we *really* want to insert this code. In particular, if
- // the user is inside of a loop that is nested inside of L, we really don't
- // want to insert this expression before the user, we'd rather pull it out as
- // many loops as possible.
- Instruction *BaseInsertPt = IP;
-
- // Figure out the most-nested loop that IP is in.
- Loop *InsertLoop = LI.getLoopFor(IP->getParent());
-
- // If InsertLoop is not L, and InsertLoop is nested inside of L, figure out
- // the preheader of the outer-most loop where NewBase is not loop invariant.
- if (L->contains(IP->getParent()))
- while (InsertLoop && NewBase->isLoopInvariant(InsertLoop)) {
- BaseInsertPt = InsertLoop->getLoopPreheader()->getTerminator();
- InsertLoop = InsertLoop->getParentLoop();
- }
-
- Value *Base = Rewriter.expandCodeFor(NewBase, 0, BaseInsertPt);
+ Instruction *IP,
+ ScalarEvolution *SE) {
+ Value *Base = Rewriter.expandCodeFor(NewBase, 0, IP);
+ // Wrap the base in a SCEVUnknown so that ScalarEvolution doesn't try to
+ // re-analyze it.
const SCEV *NewValSCEV = SE->getUnknown(Base);
// Always emit the immediate into the same block as the user.
@@ -443,8 +410,8 @@ Value *BasedUser::InsertCodeForBaseAtPosition(const SCEV *const &NewBase,
void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
Instruction *NewBasePt,
SCEVExpander &Rewriter, Loop *L, Pass *P,
- LoopInfo &LI,
- SmallVectorImpl<WeakVH> &DeadInsts) {
+ SmallVectorImpl<WeakVH> &DeadInsts,
+ ScalarEvolution *SE) {
if (!isa<PHINode>(Inst)) {
// By default, insert code at the user instruction.
BasicBlock::iterator InsertPt = Inst;
@@ -473,7 +440,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
}
Value *NewVal = InsertCodeForBaseAtPosition(NewBase,
OperandValToReplace->getType(),
- Rewriter, InsertPt, L, LI);
+ Rewriter, InsertPt, SE);
// Replace the use of the operand Value with the new Phi we just created.
Inst->replaceUsesOfWith(OperandValToReplace, NewVal);
@@ -535,7 +502,7 @@ void BasedUser::RewriteInstructionToUseNewBase(const SCEV *const &NewBase,
PHIPred->getTerminator() :
OldLoc->getParent()->getTerminator();
Code = InsertCodeForBaseAtPosition(NewBase, PN->getType(),
- Rewriter, InsertPt, L, LI);
+ Rewriter, InsertPt, SE);
DEBUG(errs() << " Changing PHI use to ");
DEBUG(WriteAsOperand(errs(), Code, /*PrintType=*/false));
@@ -1011,17 +978,13 @@ const SCEV *LoopStrengthReduce::CheckForIVReuse(bool HasBaseReg,
const SCEV *const &Stride,
IVExpr &IV, const Type *Ty,
const std::vector<BasedUser>& UsersToProcess) {
- if (StrideNoReuse.count(Stride))
- return SE->getIntegerSCEV(0, Stride->getType());
-
if (const SCEVConstant *SC = dyn_cast<SCEVConstant>(Stride)) {
int64_t SInt = SC->getValue()->getSExtValue();
for (unsigned NewStride = 0, e = IU->StrideOrder.size();
NewStride != e; ++NewStride) {
std::map<const SCEV *, IVsOfOneStride>::iterator SI =
IVsByStride.find(IU->StrideOrder[NewStride]);
- if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first) ||
- StrideNoReuse.count(SI->first))
+ if (SI == IVsByStride.end() || !isa<SCEVConstant>(SI->first))
continue;
// The other stride has no uses, don't reuse it.
std::map<const SCEV *, IVUsersOfOneStride *>::iterator UI =
@@ -1780,8 +1743,8 @@ LoopStrengthReduce::StrengthReduceIVUsersOfStride(const SCEV *const &Stride,
RewriteExpr = SE->getAddExpr(RewriteExpr, SE->getUnknown(BaseV));
User.RewriteInstructionToUseNewBase(RewriteExpr, NewBasePt,
- Rewriter, L, this, *LI,
- DeadInsts);
+ Rewriter, L, this,
+ DeadInsts, SE);
// Mark old value we replaced as possibly dead, so that it is eliminated
// if we just replaced the last use of that value.
@@ -2745,8 +2708,6 @@ bool LoopStrengthReduce::OptimizeLoopCountIV(Loop *L) {
bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
IU = &getAnalysis<IVUsers>();
- LI = &getAnalysis<LoopInfo>();
- DT = &getAnalysis<DominatorTree>();
SE = &getAnalysis<ScalarEvolution>();
Changed = false;
@@ -2792,15 +2753,14 @@ bool LoopStrengthReduce::runOnLoop(Loop *L, LPPassManager &LPM) {
// After all sharing is done, see if we can adjust the loop to test against
// zero instead of counting up to a maximum. This is usually faster.
OptimizeLoopCountIV(L);
- }
- // We're done analyzing this loop; release all the state we built up for it.
- IVsByStride.clear();
- StrideNoReuse.clear();
+ // We're done analyzing this loop; release all the state we built up for it.
+ IVsByStride.clear();
- // Clean up after ourselves
- if (!DeadInsts.empty())
- DeleteTriviallyDeadInstructions();
+ // Clean up after ourselves
+ if (!DeadInsts.empty())
+ DeleteTriviallyDeadInstructions();
+ }
// At this point, it is worth checking to see if any recurrence PHIs are also
// dead, so that we can remove them as well.
diff --git a/lib/Transforms/Scalar/LoopUnswitch.cpp b/lib/Transforms/Scalar/LoopUnswitch.cpp
index 38d267a..b7adfdc 100644
--- a/lib/Transforms/Scalar/LoopUnswitch.cpp
+++ b/lib/Transforms/Scalar/LoopUnswitch.cpp
@@ -404,12 +404,13 @@ bool LoopUnswitch::IsTrivialUnswitchCondition(Value *Cond, Constant **Val,
bool LoopUnswitch::UnswitchIfProfitable(Value *LoopCond, Constant *Val){
initLoopData();
- Function *F = loopHeader->getParent();
// If LoopSimplify was unable to form a preheader, don't do any unswitching.
if (!loopPreheader)
return false;
+ Function *F = loopHeader->getParent();
+
// If the condition is trivial, always unswitch. There is no code growth for
// this case.
if (!IsTrivialUnswitchCondition(LoopCond)) {
diff --git a/lib/Transforms/Scalar/SCCVN.cpp b/lib/Transforms/Scalar/SCCVN.cpp
index 001267a..dbc82e1 100644
--- a/lib/Transforms/Scalar/SCCVN.cpp
+++ b/lib/Transforms/Scalar/SCCVN.cpp
@@ -19,7 +19,6 @@
#include "llvm/Constants.h"
#include "llvm/DerivedTypes.h"
#include "llvm/Function.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Operator.h"
#include "llvm/Value.h"
#include "llvm/ADT/DenseMap.h"
@@ -155,8 +154,10 @@ template <> struct DenseMapInfo<Expression> {
static bool isEqual(const Expression &LHS, const Expression &RHS) {
return LHS == RHS;
}
- static bool isPod() { return true; }
};
+template <>
+struct isPodLike<Expression> { static const bool value = true; };
+
}
//===----------------------------------------------------------------------===//
diff --git a/lib/Transforms/Scalar/ScalarReplAggregates.cpp b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
index ae6ad74..b040a27 100644
--- a/lib/Transforms/Scalar/ScalarReplAggregates.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregates.cpp
@@ -105,7 +105,7 @@ namespace {
void isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
AllocaInfo &Info);
void isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
- AllocaInfo &Info);
+ AllocaInfo &Info);
void isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
unsigned OpNo, AllocaInfo &Info);
void isSafeUseOfBitCastedAllocation(BitCastInst *User, AllocaInst *AI,
@@ -362,7 +362,6 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
// Now that we have created the alloca instructions that we want to use,
// expand the getelementptr instructions to use them.
- //
while (!AI->use_empty()) {
Instruction *User = cast<Instruction>(AI->use_back());
if (BitCastInst *BCInst = dyn_cast<BitCastInst>(User)) {
@@ -450,11 +449,9 @@ void SROA::DoScalarReplacement(AllocaInst *AI,
NumReplaced++;
}
-
/// isSafeElementUse - Check to see if this use is an allowed use for a
/// getelementptr instruction of an array aggregate allocation. isFirstElt
/// indicates whether Ptr is known to the start of the aggregate.
-///
void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
AllocaInfo &Info) {
for (Value::use_iterator I = Ptr->use_begin(), E = Ptr->use_end();
@@ -503,7 +500,6 @@ void SROA::isSafeElementUse(Value *Ptr, bool isFirstElt, AllocaInst *AI,
}
}
-
isSafeElementUse(GEP, AreAllZeroIndices, AI, Info);
if (Info.isUnsafe) return;
break;
@@ -543,9 +539,8 @@ static bool AllUsersAreLoads(Value *Ptr) {
return true;
}
-/// isSafeUseOfAllocation - Check to see if this user is an allowed use for an
+/// isSafeUseOfAllocation - Check if this user is an allowed use for an
/// aggregate allocation.
-///
void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
AllocaInfo &Info) {
if (BitCastInst *C = dyn_cast<BitCastInst>(User))
@@ -614,7 +609,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
// integer. Specifically, consider A[0][i]. We cannot know that the user
// isn't doing invalid things like allowing i to index an out-of-range
// subscript that accesses A[1]. Because of this, we have to reject SROA
- // of any accesses into structs where any of the components are variables.
+ // of any accesses into structs where any of the components are variables.
if (IdxVal->getZExtValue() >= AT->getNumElements())
return MarkUnsafe(Info);
} else if (const VectorType *VT = dyn_cast<VectorType>(*I)) {
@@ -628,7 +623,7 @@ void SROA::isSafeUseOfAllocation(Instruction *User, AllocaInst *AI,
return isSafeElementUse(GEPI, IsAllZeroIndices, AI, Info);
}
-/// isSafeMemIntrinsicOnAllocation - Return true if the specified memory
+/// isSafeMemIntrinsicOnAllocation - Check if the specified memory
/// intrinsic can be promoted by SROA. At this point, we know that the operand
/// of the memintrinsic is a pointer to the beginning of the allocation.
void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
@@ -656,8 +651,8 @@ void SROA::isSafeMemIntrinsicOnAllocation(MemIntrinsic *MI, AllocaInst *AI,
}
}
-/// isSafeUseOfBitCastedAllocation - Return true if all users of this bitcast
-/// are
+/// isSafeUseOfBitCastedAllocation - Check if all users of this bitcast
+/// from an alloca are safe for SROA of that alloca.
void SROA::isSafeUseOfBitCastedAllocation(BitCastInst *BC, AllocaInst *AI,
AllocaInfo &Info) {
for (Value::use_iterator UI = BC->use_begin(), E = BC->use_end();
@@ -773,6 +768,10 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
OtherPtr = MTI->getRawDest();
}
}
+
+ // Keep track of the other intrinsic argument, so it can be removed if it
+ // is dead when the intrinsic is replaced.
+ Value *PossiblyDead = OtherPtr;
// If there is an other pointer, we want to convert it to the same pointer
// type as AI has, so we can GEP through it safely.
@@ -926,9 +925,11 @@ void SROA::RewriteMemIntrinUserOfAlloca(MemIntrinsic *MI, Instruction *BCInst,
}
}
MI->eraseFromParent();
+ if (PossiblyDead)
+ RecursivelyDeleteTriviallyDeadInstructions(PossiblyDead);
}
-/// RewriteStoreUserOfWholeAlloca - We found an store of an integer that
+/// RewriteStoreUserOfWholeAlloca - We found a store of an integer that
/// overwrites the entire allocation. Extract out the pieces of the stored
/// integer and store them individually.
void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
@@ -1052,7 +1053,7 @@ void SROA::RewriteStoreUserOfWholeAlloca(StoreInst *SI, AllocaInst *AI,
SI->eraseFromParent();
}
-/// RewriteLoadUserOfWholeAlloca - We found an load of the entire allocation to
+/// RewriteLoadUserOfWholeAlloca - We found a load of the entire allocation to
/// an integer. Load the individual pieces to form the aggregate value.
void SROA::RewriteLoadUserOfWholeAlloca(LoadInst *LI, AllocaInst *AI,
SmallVector<AllocaInst*, 32> &NewElts) {
@@ -1186,7 +1187,6 @@ static bool HasPadding(const Type *Ty, const TargetData &TD) {
/// isSafeStructAllocaToScalarRepl - Check to see if the specified allocation of
/// an aggregate can be broken down into elements. Return 0 if not, 3 if safe,
/// or 1 if safe after canonicalization has been performed.
-///
int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
// Loop over the use list of the alloca. We can only transform it if all of
// the users are safe to transform.
@@ -1215,7 +1215,7 @@ int SROA::isSafeAllocaToScalarRepl(AllocaInst *AI) {
return Info.needsCleanup ? 1 : 3;
}
-/// CleanupGEP - GEP is used by an Alloca, which can be prompted after the GEP
+/// CleanupGEP - GEP is used by an Alloca, which can be promoted after the GEP
/// is canonicalized here.
void SROA::CleanupGEP(GetElementPtrInst *GEPI) {
gep_type_iterator I = gep_type_begin(GEPI);
@@ -1347,7 +1347,7 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
}
/// CanConvertToScalar - V is a pointer. If we can convert the pointee and all
-/// its accesses to use a to single vector type, return true, and set VecTy to
+/// its accesses to a single vector type, return true and set VecTy to
/// the new type. If we could convert the alloca into a single promotable
/// integer, return true but set VecTy to VoidTy. Further, if the use is not a
/// completely trivial use that mem2reg could promote, set IsNotTrivial. Offset
@@ -1355,7 +1355,6 @@ static void MergeInType(const Type *In, uint64_t Offset, const Type *&VecTy,
///
/// If we see at least one access to the value that is as a vector type, set the
/// SawVec flag.
-///
bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
bool &SawVec, uint64_t Offset,
unsigned AllocaSize) {
@@ -1438,7 +1437,6 @@ bool SROA::CanConvertToScalar(Value *V, bool &IsNotTrivial, const Type *&VecTy,
return true;
}
-
/// ConvertUsesToScalar - Convert all of the users of Ptr to use the new alloca
/// directly. This happens when we are converting an "integer union" to a
/// single integer scalar, or when we are converting a "vector union" to a
@@ -1481,7 +1479,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
if (StoreInst *SI = dyn_cast<StoreInst>(User)) {
assert(SI->getOperand(0) != Ptr && "Consistency error!");
// FIXME: Remove once builder has Twine API.
- Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
+ Value *Old = Builder.CreateLoad(NewAI,
+ (NewAI->getName()+".in").str().c_str());
Value *New = ConvertScalar_InsertValue(SI->getOperand(0), Old, Offset,
Builder);
Builder.CreateStore(New, NewAI);
@@ -1506,7 +1505,8 @@ void SROA::ConvertUsesToScalar(Value *Ptr, AllocaInst *NewAI, uint64_t Offset) {
APVal |= APVal << 8;
// FIXME: Remove once builder has Twine API.
- Value *Old = Builder.CreateLoad(NewAI, (NewAI->getName()+".in").str().c_str());
+ Value *Old = Builder.CreateLoad(NewAI,
+ (NewAI->getName()+".in").str().c_str());
Value *New = ConvertScalar_InsertValue(
ConstantInt::get(User->getContext(), APVal),
Old, Offset, Builder);
@@ -1679,7 +1679,6 @@ Value *SROA::ConvertScalar_ExtractValue(Value *FromVal, const Type *ToType,
return FromVal;
}
-
/// ConvertScalar_InsertValue - Insert the value "SV" into the existing integer
/// or vector value "Old" at the offset specified by Offset.
///
diff --git a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
index 13077fe..5acd6aa 100644
--- a/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyHalfPowrLibCalls.cpp
@@ -88,7 +88,7 @@ InlineHalfPowrs(const std::vector<Instruction *> &HalfPowrs,
if (!isa<ReturnInst>(Body->getTerminator()))
break;
- Instruction *NextInst = next(BasicBlock::iterator(Call));
+ Instruction *NextInst = llvm::next(BasicBlock::iterator(Call));
// Inline the call, taking care of what code ends up where.
NewBlock = SplitBlock(NextInst->getParent(), NextInst, this);
diff --git a/lib/Transforms/Scalar/SimplifyLibCalls.cpp b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
index f9b929c..6fd884b 100644
--- a/lib/Transforms/Scalar/SimplifyLibCalls.cpp
+++ b/lib/Transforms/Scalar/SimplifyLibCalls.cpp
@@ -128,8 +128,7 @@ public:
/// CastToCStr - Return V if it is an i8*, otherwise cast it to i8*.
Value *LibCallOptimization::CastToCStr(Value *V, IRBuilder<> &B) {
- return
- B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");
+ return B.CreateBitCast(V, Type::getInt8PtrTy(*Context), "cstr");
}
/// EmitStrLen - Emit a call to the strlen function to the builder, for the
@@ -157,27 +156,25 @@ Value *LibCallOptimization::EmitStrLen(Value *Ptr, IRBuilder<> &B) {
Value *LibCallOptimization::EmitMemCpy(Value *Dst, Value *Src, Value *Len,
unsigned Align, IRBuilder<> &B) {
Module *M = Caller->getParent();
- Intrinsic::ID IID = Intrinsic::memcpy;
- const Type *Tys[1];
- Tys[0] = Len->getType();
- Value *MemCpy = Intrinsic::getDeclaration(M, IID, Tys, 1);
- return B.CreateCall4(MemCpy, CastToCStr(Dst, B), CastToCStr(Src, B), Len,
+ const Type *Ty = Len->getType();
+ Value *MemCpy = Intrinsic::getDeclaration(M, Intrinsic::memcpy, &Ty, 1);
+ Dst = CastToCStr(Dst, B);
+ Src = CastToCStr(Src, B);
+ return B.CreateCall4(MemCpy, Dst, Src, Len,
ConstantInt::get(Type::getInt32Ty(*Context), Align));
}
-/// EmitMemMOve - Emit a call to the memmove function to the builder. This
+/// EmitMemMove - Emit a call to the memmove function to the builder. This
/// always expects that the size has type 'intptr_t' and Dst/Src are pointers.
Value *LibCallOptimization::EmitMemMove(Value *Dst, Value *Src, Value *Len,
unsigned Align, IRBuilder<> &B) {
Module *M = Caller->getParent();
- Intrinsic::ID IID = Intrinsic::memmove;
- const Type *Tys[1];
- Tys[0] = TD->getIntPtrType(*Context);
- Value *MemMove = Intrinsic::getDeclaration(M, IID, Tys, 1);
- Value *D = CastToCStr(Dst, B);
- Value *S = CastToCStr(Src, B);
+ const Type *Ty = TD->getIntPtrType(*Context);
+ Value *MemMove = Intrinsic::getDeclaration(M, Intrinsic::memmove, &Ty, 1);
+ Dst = CastToCStr(Dst, B);
+ Src = CastToCStr(Src, B);
Value *A = ConstantInt::get(Type::getInt32Ty(*Context), Align);
- return B.CreateCall4(MemMove, D, S, Len, A);
+ return B.CreateCall4(MemMove, Dst, Src, Len, A);
}
/// EmitMemChr - Emit a call to the memchr function. This assumes that Ptr is
@@ -2647,10 +2644,11 @@ bool SimplifyLibCalls::doInitialization(Module &M) {
// * strcspn("",a) -> 0
// * strcspn(s,"") -> strlen(a)
//
-// strstr:
+// strstr: (PR5783)
// * strstr(x,x) -> x
-// * strstr(s1,s2) -> offset_of_s2_in(s1)
-// (if s1 and s2 are constant strings)
+// * strstr(x, "") -> x
+// * strstr(x, "a") -> strchr(x, 'a')
+// * strstr(s1,s2) -> result (if s1 and s2 are constant strings)
//
// tan, tanf, tanl:
// * tan(atan(x)) -> x
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 2974592..2962e84 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -16,7 +16,6 @@
#include "llvm/Function.h"
#include "llvm/Instructions.h"
#include "llvm/IntrinsicInst.h"
-#include "llvm/LLVMContext.h"
#include "llvm/Constant.h"
#include "llvm/Type.h"
#include "llvm/Analysis/AliasAnalysis.h"
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index aef0f5f..7a37aa3 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -603,3 +603,65 @@ bool llvm::OnlyUsedByDbgInfoIntrinsics(Instruction *I,
return true;
}
+/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
+/// nodes in this block. This doesn't try to be clever about PHI nodes
+/// which differ only in the order of the incoming values, but instcombine
+/// orders them so it usually won't matter.
+///
+bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
+ bool Changed = false;
+
+ // This implementation doesn't currently consider undef operands
+ // specially. Theroetically, two phis which are identical except for
+ // one having an undef where the other doesn't could be collapsed.
+
+ // Map from PHI hash values to PHI nodes. If multiple PHIs have
+ // the same hash value, the element is the first PHI in the
+ // linked list in CollisionMap.
+ DenseMap<uintptr_t, PHINode *> HashMap;
+
+ // Maintain linked lists of PHI nodes with common hash values.
+ DenseMap<PHINode *, PHINode *> CollisionMap;
+
+ // Examine each PHI.
+ for (BasicBlock::iterator I = BB->begin();
+ PHINode *PN = dyn_cast<PHINode>(I++); ) {
+ // Compute a hash value on the operands. Instcombine will likely have sorted
+ // them, which helps expose duplicates, but we have to check all the
+ // operands to be safe in case instcombine hasn't run.
+ uintptr_t Hash = 0;
+ for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
+ // This hash algorithm is quite weak as hash functions go, but it seems
+ // to do a good enough job for this particular purpose, and is very quick.
+ Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
+ Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
+ }
+ // If we've never seen this hash value before, it's a unique PHI.
+ std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
+ HashMap.insert(std::make_pair(Hash, PN));
+ if (Pair.second) continue;
+ // Otherwise it's either a duplicate or a hash collision.
+ for (PHINode *OtherPN = Pair.first->second; ; ) {
+ if (OtherPN->isIdenticalTo(PN)) {
+ // A duplicate. Replace this PHI with its duplicate.
+ PN->replaceAllUsesWith(OtherPN);
+ PN->eraseFromParent();
+ Changed = true;
+ break;
+ }
+ // A non-duplicate hash collision.
+ DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
+ if (I == CollisionMap.end()) {
+ // Set this PHI to be the head of the linked list of colliding PHIs.
+ PHINode *Old = Pair.first->second;
+ Pair.first->second = PN;
+ CollisionMap[PN] = Old;
+ break;
+ }
+ // Procede to the next PHI in the list.
+ OtherPN = I->second;
+ }
+ }
+
+ return Changed;
+}
diff --git a/lib/Transforms/Utils/LowerSwitch.cpp b/lib/Transforms/Utils/LowerSwitch.cpp
index 8c18b59..743bb6e 100644
--- a/lib/Transforms/Utils/LowerSwitch.cpp
+++ b/lib/Transforms/Utils/LowerSwitch.cpp
@@ -244,7 +244,7 @@ unsigned LowerSwitch::Clusterify(CaseVector& Cases, SwitchInst *SI) {
// Merge case into clusters
if (Cases.size()>=2)
- for (CaseItr I=Cases.begin(), J=next(Cases.begin()); J!=Cases.end(); ) {
+ for (CaseItr I=Cases.begin(), J=llvm::next(Cases.begin()); J!=Cases.end(); ) {
int64_t nextValue = cast<ConstantInt>(J->Low)->getSExtValue();
int64_t currentValue = cast<ConstantInt>(I->High)->getSExtValue();
BasicBlock* nextBB = J->BB;
diff --git a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
index e25f9e2..846e432 100644
--- a/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
+++ b/lib/Transforms/Utils/PromoteMemoryToRegister.cpp
@@ -55,7 +55,6 @@ struct DenseMapInfo<std::pair<BasicBlock*, unsigned> > {
static bool isEqual(const EltTy &LHS, const EltTy &RHS) {
return LHS == RHS;
}
- static bool isPod() { return true; }
};
}
@@ -102,7 +101,7 @@ namespace {
public:
typedef std::vector<Value *> ValVector;
- RenamePassData() {}
+ RenamePassData() : BB(NULL), Pred(NULL), Values() {}
RenamePassData(BasicBlock *B, BasicBlock *P,
const ValVector &V) : BB(B), Pred(P), Values(V) {}
BasicBlock *BB;
diff --git a/lib/Transforms/Utils/SSAUpdater.cpp b/lib/Transforms/Utils/SSAUpdater.cpp
index 8a07c35..ba41bf9 100644
--- a/lib/Transforms/Utils/SSAUpdater.cpp
+++ b/lib/Transforms/Utils/SSAUpdater.cpp
@@ -295,10 +295,14 @@ Value *SSAUpdater::GetValueAtEndOfBlockInternal(BasicBlock *BB) {
InsertedVal = SingularValue;
}
+ // Either path through the 'if' should have set insertedVal -> SingularVal.
+ assert((InsertedVal == SingularValue || isa<UndefValue>(InsertedVal)) &&
+ "RAUW didn't change InsertedVal to be SingularVal");
+
// Drop the entries we added in IncomingPredInfo to restore the stack.
IncomingPredInfo.erase(IncomingPredInfo.begin()+FirstPredInfoEntry,
IncomingPredInfo.end());
- return InsertedVal;
+ return SingularValue;
}
// Otherwise, we do need a PHI: insert one now if we don't already have one.
diff --git a/lib/Transforms/Utils/SimplifyCFG.cpp b/lib/Transforms/Utils/SimplifyCFG.cpp
index 89b0bd9..d7ca45e 100644
--- a/lib/Transforms/Utils/SimplifyCFG.cpp
+++ b/lib/Transforms/Utils/SimplifyCFG.cpp
@@ -1589,69 +1589,6 @@ static bool SimplifyCondBranchToCondBranch(BranchInst *PBI, BranchInst *BI) {
return true;
}
-/// EliminateDuplicatePHINodes - Check for and eliminate duplicate PHI
-/// nodes in this block. This doesn't try to be clever about PHI nodes
-/// which differ only in the order of the incoming values, but instcombine
-/// orders them so it usually won't matter.
-///
-bool llvm::EliminateDuplicatePHINodes(BasicBlock *BB) {
- bool Changed = false;
-
- // This implementation doesn't currently consider undef operands
- // specially. Theroetically, two phis which are identical except for
- // one having an undef where the other doesn't could be collapsed.
-
- // Map from PHI hash values to PHI nodes. If multiple PHIs have
- // the same hash value, the element is the first PHI in the
- // linked list in CollisionMap.
- DenseMap<uintptr_t, PHINode *> HashMap;
-
- // Maintain linked lists of PHI nodes with common hash values.
- DenseMap<PHINode *, PHINode *> CollisionMap;
-
- // Examine each PHI.
- for (BasicBlock::iterator I = BB->begin();
- PHINode *PN = dyn_cast<PHINode>(I++); ) {
- // Compute a hash value on the operands. Instcombine will likely have sorted
- // them, which helps expose duplicates, but we have to check all the
- // operands to be safe in case instcombine hasn't run.
- uintptr_t Hash = 0;
- for (User::op_iterator I = PN->op_begin(), E = PN->op_end(); I != E; ++I) {
- // This hash algorithm is quite weak as hash functions go, but it seems
- // to do a good enough job for this particular purpose, and is very quick.
- Hash ^= reinterpret_cast<uintptr_t>(static_cast<Value *>(*I));
- Hash = (Hash << 7) | (Hash >> (sizeof(uintptr_t) * CHAR_BIT - 7));
- }
- // If we've never seen this hash value before, it's a unique PHI.
- std::pair<DenseMap<uintptr_t, PHINode *>::iterator, bool> Pair =
- HashMap.insert(std::make_pair(Hash, PN));
- if (Pair.second) continue;
- // Otherwise it's either a duplicate or a hash collision.
- for (PHINode *OtherPN = Pair.first->second; ; ) {
- if (OtherPN->isIdenticalTo(PN)) {
- // A duplicate. Replace this PHI with its duplicate.
- PN->replaceAllUsesWith(OtherPN);
- PN->eraseFromParent();
- Changed = true;
- break;
- }
- // A non-duplicate hash collision.
- DenseMap<PHINode *, PHINode *>::iterator I = CollisionMap.find(OtherPN);
- if (I == CollisionMap.end()) {
- // Set this PHI to be the head of the linked list of colliding PHIs.
- PHINode *Old = Pair.first->second;
- Pair.first->second = PN;
- CollisionMap[PN] = Old;
- break;
- }
- // Procede to the next PHI in the list.
- OtherPN = I->second;
- }
- }
-
- return Changed;
-}
-
/// SimplifyCFG - This function is used to do simplification of a CFG. For
/// example, it adjusts branches to branches to eliminate the extra hop, it
/// eliminates unreachable basic blocks, and does other "peephole" optimization
diff --git a/lib/VMCore/AsmWriter.cpp b/lib/VMCore/AsmWriter.cpp
index 82d7914..c765d96 100644
--- a/lib/VMCore/AsmWriter.cpp
+++ b/lib/VMCore/AsmWriter.cpp
@@ -813,6 +813,11 @@ void SlotTracker::CreateFunctionSlot(const Value *V) {
void SlotTracker::CreateMetadataSlot(const MDNode *N) {
assert(N && "Can't insert a null Value into SlotTracker!");
+ // Don't insert if N contains an instruction.
+ for (unsigned i = 0, e = N->getNumElements(); i != e; ++i)
+ if (N->getElement(i) && isa<Instruction>(N->getElement(i)))
+ return;
+
ValueMap::iterator I = mdnMap.find(N);
if (I != mdnMap.end())
return;
@@ -1227,6 +1232,25 @@ static void WriteAsOperandInternal(raw_ostream &Out, const Value *V,
}
if (const MDNode *N = dyn_cast<MDNode>(V)) {
+ if (Machine->getMetadataSlot(N) == -1) {
+ // Print metadata inline, not via slot reference number.
+ Out << "!{";
+ for (unsigned mi = 0, me = N->getNumElements(); mi != me; ++mi) {
+ const Value *Val = N->getElement(mi);
+ if (!Val)
+ Out << "null";
+ else {
+ TypePrinter->print(N->getElement(0)->getType(), Out);
+ Out << ' ';
+ WriteAsOperandInternal(Out, N->getElement(0), TypePrinter, Machine);
+ }
+ if (mi + 1 != me)
+ Out << ", ";
+ }
+ Out << '}';
+ return;
+ }
+
Out << '!' << Machine->getMetadataSlot(N);
return;
}
@@ -1636,6 +1660,7 @@ void AssemblyWriter::printFunction(const Function *F) {
case CallingConv::ARM_APCS: Out << "arm_apcscc "; break;
case CallingConv::ARM_AAPCS: Out << "arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << "arm_aapcs_vfpcc "; break;
+ case CallingConv::MSP430_INTR: Out << "msp430_intrcc "; break;
default: Out << "cc" << F->getCallingConv() << " "; break;
}
@@ -1903,6 +1928,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
case CallingConv::ARM_APCS: Out << " arm_apcscc "; break;
case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
+ case CallingConv::MSP430_INTR: Out << " msp430_intrcc "; break;
default: Out << " cc" << CI->getCallingConv(); break;
}
@@ -1953,6 +1979,7 @@ void AssemblyWriter::printInstruction(const Instruction &I) {
case CallingConv::ARM_APCS: Out << " arm_apcscc "; break;
case CallingConv::ARM_AAPCS: Out << " arm_aapcscc "; break;
case CallingConv::ARM_AAPCS_VFP:Out << " arm_aapcs_vfpcc "; break;
+ case CallingConv::MSP430_INTR: Out << " msp430_intrcc "; break;
default: Out << " cc" << II->getCallingConv(); break;
}
diff --git a/lib/VMCore/BasicBlock.cpp b/lib/VMCore/BasicBlock.cpp
index 23d0557..c7f7f53 100644
--- a/lib/VMCore/BasicBlock.cpp
+++ b/lib/VMCore/BasicBlock.cpp
@@ -262,7 +262,7 @@ BasicBlock *BasicBlock::splitBasicBlock(iterator I, const Twine &BBName) {
assert(I != InstList.end() &&
"Trying to get me to create degenerate basic block!");
- BasicBlock *InsertBefore = next(Function::iterator(this))
+ BasicBlock *InsertBefore = llvm::next(Function::iterator(this))
.getNodePtrUnchecked();
BasicBlock *New = BasicBlock::Create(getContext(), BBName,
getParent(), InsertBefore);
diff --git a/lib/VMCore/Constants.cpp b/lib/VMCore/Constants.cpp
index c622558..a62f75b 100644
--- a/lib/VMCore/Constants.cpp
+++ b/lib/VMCore/Constants.cpp
@@ -1560,7 +1560,7 @@ Constant *ConstantExpr::getGetElementPtrTy(const Type *ReqTy, Constant *C,
Constant *ConstantExpr::getInBoundsGetElementPtrTy(const Type *ReqTy,
Constant *C,
- Value* const *Idxs,
+ Value *const *Idxs,
unsigned NumIdx) {
assert(GetElementPtrInst::getIndexedType(C->getType(), Idxs,
Idxs+NumIdx) ==
diff --git a/lib/VMCore/Function.cpp b/lib/VMCore/Function.cpp
index 6cf2c81..88e1fe8 100644
--- a/lib/VMCore/Function.cpp
+++ b/lib/VMCore/Function.cpp
@@ -77,6 +77,13 @@ bool Argument::hasByValAttr() const {
return getParent()->paramHasAttr(getArgNo()+1, Attribute::ByVal);
}
+/// hasNestAttr - Return true if this argument has the nest attribute on
+/// it in its containing function.
+bool Argument::hasNestAttr() const {
+ if (!isa<PointerType>(getType())) return false;
+ return getParent()->paramHasAttr(getArgNo()+1, Attribute::Nest);
+}
+
/// hasNoAliasAttr - Return true if this argument has the noalias attribute on
/// it in its containing function.
bool Argument::hasNoAliasAttr() const {
diff --git a/lib/VMCore/LLVMContextImpl.h b/lib/VMCore/LLVMContextImpl.h
index 1c3244b..8a2378e 100644
--- a/lib/VMCore/LLVMContextImpl.h
+++ b/lib/VMCore/LLVMContextImpl.h
@@ -62,7 +62,6 @@ struct DenseMapAPIntKeyInfo {
static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
return LHS == RHS;
}
- static bool isPod() { return false; }
};
struct DenseMapAPFloatKeyInfo {
@@ -89,7 +88,6 @@ struct DenseMapAPFloatKeyInfo {
static bool isEqual(const KeyTy &LHS, const KeyTy &RHS) {
return LHS == RHS;
}
- static bool isPod() { return false; }
};
class LLVMContextImpl {
diff --git a/lib/VMCore/Pass.cpp b/lib/VMCore/Pass.cpp
index 1232fe2..6bea7a8 100644
--- a/lib/VMCore/Pass.cpp
+++ b/lib/VMCore/Pass.cpp
@@ -41,6 +41,10 @@ Pass::~Pass() {
// Force out-of-line virtual method.
ModulePass::~ModulePass() { }
+PassManagerType ModulePass::getPotentialPassManagerType() const {
+ return PMT_ModulePassManager;
+}
+
bool Pass::mustPreserveAnalysisID(const PassInfo *AnalysisID) const {
return Resolver->getAnalysisIfAvailable(AnalysisID, true) != 0;
}
@@ -60,6 +64,27 @@ const char *Pass::getPassName() const {
return "Unnamed pass: implement Pass::getPassName()";
}
+void Pass::preparePassManager(PMStack &) {
+ // By default, don't do anything.
+}
+
+PassManagerType Pass::getPotentialPassManagerType() const {
+ // Default implementation.
+ return PMT_Unknown;
+}
+
+void Pass::getAnalysisUsage(AnalysisUsage &) const {
+ // By default, no analysis results are used, all are invalidated.
+}
+
+void Pass::releaseMemory() {
+ // By default, don't do anything.
+}
+
+void Pass::verifyAnalysis() const {
+ // By default, don't do anything.
+}
+
// print - Print out the internal state of the pass. This is called by Analyze
// to print out the contents of an analysis. Otherwise it is not necessary to
// implement this method.
@@ -79,6 +104,10 @@ void Pass::dump() const {
// Force out-of-line virtual method.
ImmutablePass::~ImmutablePass() { }
+void ImmutablePass::initializePass() {
+ // By default, don't do anything.
+}
+
//===----------------------------------------------------------------------===//
// FunctionPass Implementation
//
@@ -107,6 +136,20 @@ bool FunctionPass::run(Function &F) {
return Changed | doFinalization(*F.getParent());
}
+bool FunctionPass::doInitialization(Module &) {
+ // By default, don't do anything.
+ return false;
+}
+
+bool FunctionPass::doFinalization(Module &) {
+ // By default, don't do anything.
+ return false;
+}
+
+PassManagerType FunctionPass::getPotentialPassManagerType() const {
+ return PMT_FunctionPassManager;
+}
+
//===----------------------------------------------------------------------===//
// BasicBlockPass Implementation
//
@@ -121,6 +164,30 @@ bool BasicBlockPass::runOnFunction(Function &F) {
return Changed | doFinalization(F);
}
+bool BasicBlockPass::doInitialization(Module &) {
+ // By default, don't do anything.
+ return false;
+}
+
+bool BasicBlockPass::doInitialization(Function &) {
+ // By default, don't do anything.
+ return false;
+}
+
+bool BasicBlockPass::doFinalization(Function &) {
+ // By default, don't do anything.
+ return false;
+}
+
+bool BasicBlockPass::doFinalization(Module &) {
+ // By default, don't do anything.
+ return false;
+}
+
+PassManagerType BasicBlockPass::getPotentialPassManagerType() const {
+ return PMT_BasicBlockPassManager;
+}
+
//===----------------------------------------------------------------------===//
// Pass Registration mechanism
//
diff --git a/lib/VMCore/PassManager.cpp b/lib/VMCore/PassManager.cpp
index ae418a0..52e8a82 100644
--- a/lib/VMCore/PassManager.cpp
+++ b/lib/VMCore/PassManager.cpp
@@ -738,9 +738,15 @@ void PMDataManager::removeNotPreservedAnalysis(Pass *P) {
std::map<AnalysisID, Pass *>::iterator Info = I++;
if (!dynamic_cast<ImmutablePass*>(Info->second) &&
std::find(PreservedSet.begin(), PreservedSet.end(), Info->first) ==
- PreservedSet.end())
+ PreservedSet.end()) {
// Remove this analysis
+ if (PassDebugging >= Details) {
+ Pass *S = Info->second;
+ errs() << " -- '" << P->getPassName() << "' is not preserving '";
+ errs() << S->getPassName() << "'\n";
+ }
InheritedAnalysis[Index]->erase(Info);
+ }
}
}
}
@@ -1391,8 +1397,7 @@ MPPassManager::runOnModule(Module &M) {
for (unsigned Index = 0; Index < getNumContainedPasses(); ++Index) {
ModulePass *MP = getContainedPass(Index);
- dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG,
- M.getModuleIdentifier().c_str());
+ dumpPassInfo(MP, EXECUTION_MSG, ON_MODULE_MSG, M.getModuleIdentifier());
dumpRequiredSet(MP);
initializeAnalysisImpl(MP);
@@ -1406,13 +1411,13 @@ MPPassManager::runOnModule(Module &M) {
if (Changed)
dumpPassInfo(MP, MODIFICATION_MSG, ON_MODULE_MSG,
- M.getModuleIdentifier().c_str());
+ M.getModuleIdentifier());
dumpPreservedSet(MP);
verifyPreservedAnalysis(MP);
removeNotPreservedAnalysis(MP);
recordAvailableAnalysis(MP);
- removeDeadPasses(MP, M.getModuleIdentifier().c_str(), ON_MODULE_MSG);
+ removeDeadPasses(MP, M.getModuleIdentifier(), ON_MODULE_MSG);
}
// Finalize on-the-fly passes
OpenPOWER on IntegriCloud